From 4e82c9a606da2b1c1c7ea7dfd8052626a4c6d5d6 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Feb 2008 18:00:03 -0800 Subject: [SPARC64]: Move ramdisk discovery code out to seperate function. And add some comments explaining all of the quirks involved in the way the bootloader provides this information. Signed-off-by: David S. Miller --- arch/sparc64/mm/init.c | 57 +++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index f37078d9640..21e9267608c 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -883,6 +883,37 @@ static void __init trim_pavail(unsigned long *cur_size_p, } } +static void __init find_ramdisk(unsigned long phys_base) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (sparc_ramdisk_image || sparc_ramdisk_image64) { + unsigned long ramdisk_image; + + /* Older versions of the bootloader only supported a + * 32-bit physical address for the ramdisk image + * location, stored at sparc_ramdisk_image. Newer + * SILO versions set sparc_ramdisk_image to zero and + * provide a full 64-bit physical address at + * sparc_ramdisk_image64. + */ + ramdisk_image = sparc_ramdisk_image; + if (!ramdisk_image) + ramdisk_image = sparc_ramdisk_image64; + + /* Another bootloader quirk. The bootloader normalizes + * the physical address to KERNBASE, so we have to + * factor that back out and add in the lowest valid + * physical page address to get the true physical address. + */ + ramdisk_image -= KERNBASE; + ramdisk_image += phys_base; + + initrd_start = ramdisk_image; + initrd_end = ramdisk_image + sparc_ramdisk_size; + } +#endif +} + /* About pages_avail, this is the value we will use to calculate * the zholes_size[] argument given to free_area_init_node(). The * page allocator uses this to calculate nr_kernel_pages, @@ -912,30 +943,6 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail, bytes_avail += pavail[i].reg_size; } - /* Determine the location of the initial ramdisk before trying - * to honor the "mem=xxx" command line argument. We must know - * where the kernel image and the ramdisk image are so that we - * do not trim those two areas from the physical memory map. - */ - -#ifdef CONFIG_BLK_DEV_INITRD - /* Now have to check initial ramdisk, so that bootmap does not overwrite it */ - if (sparc_ramdisk_image || sparc_ramdisk_image64) { - unsigned long ramdisk_image = sparc_ramdisk_image ? - sparc_ramdisk_image : sparc_ramdisk_image64; - ramdisk_image -= KERNBASE; - initrd_start = ramdisk_image + phys_base; - initrd_end = initrd_start + sparc_ramdisk_size; - if (initrd_end > end_of_phys_memory) { - printk(KERN_CRIT "initrd extends beyond end of memory " - "(0x%016lx > 0x%016lx)\ndisabling initrd\n", - initrd_end, end_of_phys_memory); - initrd_start = 0; - initrd_end = 0; - } - } -#endif - if (cmdline_memory_size && bytes_avail > cmdline_memory_size) trim_pavail(&bytes_avail, @@ -1337,6 +1344,8 @@ void __init paging_init(void) for (i = 0; i < pavail_ents; i++) phys_base = min(phys_base, pavail[i].phys_addr); + find_ramdisk(phys_base); + set_bit(0, mmu_context_bmap); shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); -- cgit v1.2.3 From 3b2a7e23a9808e349bc5fb32327bacc5e81be79c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Feb 2008 18:13:20 -0800 Subject: [SPARC64]: Initialize LMB tables. Call lmb_add() on available regions, and call lmb_reserve() on the main kernel image and the ramdisk (if any). Signed-off-by: David S. Miller --- arch/sparc64/mm/init.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 21e9267608c..6eb76243fa9 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -910,6 +911,8 @@ static void __init find_ramdisk(unsigned long phys_base) initrd_start = ramdisk_image; initrd_end = ramdisk_image + sparc_ramdisk_size; + + lmb_reserve(initrd_start, initrd_end); } #endif } @@ -1337,15 +1340,24 @@ void __init paging_init(void) sun4v_ktsb_init(); } + lmb_init(); + /* Find available physical memory... */ read_obp_memory("available", &pavail[0], &pavail_ents); phys_base = 0xffffffffffffffffUL; - for (i = 0; i < pavail_ents; i++) + for (i = 0; i < pavail_ents; i++) { phys_base = min(phys_base, pavail[i].phys_addr); + lmb_add(pavail[i].phys_addr, pavail[i].reg_size); + } + + lmb_reserve(kern_base, kern_size); find_ramdisk(phys_base); + lmb_analyze(); + lmb_dump_all(); + set_bit(0, mmu_context_bmap); shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE); -- cgit v1.2.3 From 25b0c659dfb94f1ddaeda7a8c88ef7043f57f419 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Feb 2008 18:20:14 -0800 Subject: [SPARC64]: Start using LMB information in bootmem_init(). This allows us to kill the incredibly complicated and stupid function trim_pavail(). Signed-off-by: David S. Miller --- arch/sparc64/mm/init.c | 132 +++---------------------------------------------- 1 file changed, 6 insertions(+), 126 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 6eb76243fa9..3e6c4ecb8fe 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -775,115 +775,6 @@ static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, prom_halt(); } -static void __init trim_pavail(unsigned long *cur_size_p, - unsigned long *end_of_phys_p) -{ - unsigned long to_trim = *cur_size_p - cmdline_memory_size; - unsigned long avoid_start, avoid_end; - int i; - - to_trim = PAGE_ALIGN(to_trim); - - avoid_start = avoid_end = 0; -#ifdef CONFIG_BLK_DEV_INITRD - avoid_start = initrd_start; - avoid_end = PAGE_ALIGN(initrd_end); -#endif - - /* Trim some pavail[] entries in order to satisfy the - * requested "mem=xxx" kernel command line specification. - * - * We must not trim off the kernel image area nor the - * initial ramdisk range (if any). Also, we must not trim - * any pavail[] entry down to zero in order to preserve - * the invariant that all pavail[] entries have a non-zero - * size which is assumed by all of the code in here. - */ - for (i = 0; i < pavail_ents; i++) { - unsigned long start, end, kern_end; - unsigned long trim_low, trim_high, n; - - kern_end = PAGE_ALIGN(kern_base + kern_size); - - trim_low = start = pavail[i].phys_addr; - trim_high = end = start + pavail[i].reg_size; - - if (kern_base >= start && - kern_base < end) { - trim_low = kern_base; - if (kern_end >= end) - continue; - } - if (kern_end >= start && - kern_end < end) { - trim_high = kern_end; - } - if (avoid_start && - avoid_start >= start && - avoid_start < end) { - if (trim_low > avoid_start) - trim_low = avoid_start; - if (avoid_end >= end) - continue; - } - if (avoid_end && - avoid_end >= start && - avoid_end < end) { - if (trim_high < avoid_end) - trim_high = avoid_end; - } - - if (trim_high <= trim_low) - continue; - - if (trim_low == start && trim_high == end) { - /* Whole chunk is available for trimming. - * Trim all except one page, in order to keep - * entry non-empty. - */ - n = (end - start) - PAGE_SIZE; - if (n > to_trim) - n = to_trim; - - if (n) { - pavail[i].phys_addr += n; - pavail[i].reg_size -= n; - to_trim -= n; - } - } else { - n = (trim_low - start); - if (n > to_trim) - n = to_trim; - - if (n) { - pavail[i].phys_addr += n; - pavail[i].reg_size -= n; - to_trim -= n; - } - if (to_trim) { - n = end - trim_high; - if (n > to_trim) - n = to_trim; - if (n) { - pavail[i].reg_size -= n; - to_trim -= n; - } - } - } - - if (!to_trim) - break; - } - - /* Recalculate. */ - *cur_size_p = 0UL; - for (i = 0; i < pavail_ents; i++) { - *end_of_phys_p = pavail[i].phys_addr + - pavail[i].reg_size; - *cur_size_p += pavail[i].reg_size; - } -} - static void __init find_ramdisk(unsigned long phys_base) { #ifdef CONFIG_BLK_DEV_INITRD @@ -935,25 +826,11 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail, unsigned long phys_base) { unsigned long bootmap_size, end_pfn; - unsigned long end_of_phys_memory = 0UL; - unsigned long bootmap_pfn, bytes_avail, size; + unsigned long bootmap_pfn, size; int i; - bytes_avail = 0UL; - for (i = 0; i < pavail_ents; i++) { - end_of_phys_memory = pavail[i].phys_addr + - pavail[i].reg_size; - bytes_avail += pavail[i].reg_size; - } - - if (cmdline_memory_size && - bytes_avail > cmdline_memory_size) - trim_pavail(&bytes_avail, - &end_of_phys_memory); - - *pages_avail = bytes_avail >> PAGE_SHIFT; - - end_pfn = end_of_phys_memory >> PAGE_SHIFT; + *pages_avail = lmb_phys_mem_size() >> PAGE_SHIFT; + end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; /* Initialize the boot-time allocator. */ max_pfn = max_low_pfn = end_pfn; @@ -1355,6 +1232,9 @@ void __init paging_init(void) find_ramdisk(phys_base); + if (cmdline_memory_size) + lmb_enforce_memory_limit(phys_base + cmdline_memory_size); + lmb_analyze(); lmb_dump_all(); -- cgit v1.2.3 From 9422273ba7d139537720c8c47514925d9a621e0d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Feb 2008 18:31:41 -0800 Subject: [SPARC64]: Fully use LMB information in bootmem_init(). Signed-off-by: David S. Miller --- arch/sparc64/mm/init.c | 100 +++++++++---------------------------------------- 1 file changed, 18 insertions(+), 82 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 3e6c4ecb8fe..90e644a0e93 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -722,57 +722,12 @@ out: static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, unsigned long end_pfn) { - unsigned long avoid_start, avoid_end, bootmap_size; - int i; + unsigned long bootmap_size; bootmap_size = bootmem_bootmap_pages(end_pfn - start_pfn); bootmap_size <<= PAGE_SHIFT; - avoid_start = avoid_end = 0; -#ifdef CONFIG_BLK_DEV_INITRD - avoid_start = initrd_start; - avoid_end = PAGE_ALIGN(initrd_end); -#endif - - for (i = 0; i < pavail_ents; i++) { - unsigned long start, end; - - start = pavail[i].phys_addr; - end = start + pavail[i].reg_size; - - while (start < end) { - if (start >= kern_base && - start < PAGE_ALIGN(kern_base + kern_size)) { - start = PAGE_ALIGN(kern_base + kern_size); - continue; - } - if (start >= avoid_start && start < avoid_end) { - start = avoid_end; - continue; - } - - if ((end - start) < bootmap_size) - break; - - if (start < kern_base && - (start + bootmap_size) > kern_base) { - start = PAGE_ALIGN(kern_base + kern_size); - continue; - } - - if (start < avoid_start && - (start + bootmap_size) > avoid_start) { - start = avoid_end; - continue; - } - - /* OK, it doesn't overlap anything, use it. */ - return start >> PAGE_SHIFT; - } - } - - prom_printf("Cannot find free area for bootmap, aborting.\n"); - prom_halt(); + return lmb_alloc(bootmap_size, PAGE_SIZE) >> PAGE_SHIFT; } static void __init find_ramdisk(unsigned long phys_base) @@ -825,8 +780,7 @@ static void __init find_ramdisk(unsigned long phys_base) static unsigned long __init bootmem_init(unsigned long *pages_avail, unsigned long phys_base) { - unsigned long bootmap_size, end_pfn; - unsigned long bootmap_pfn, size; + unsigned long end_pfn; int i; *pages_avail = lmb_phys_mem_size() >> PAGE_SHIFT; @@ -836,49 +790,31 @@ static unsigned long __init bootmem_init(unsigned long *pages_avail, max_pfn = max_low_pfn = end_pfn; min_low_pfn = (phys_base >> PAGE_SHIFT); - bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn); - - bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, - min_low_pfn, end_pfn); + init_bootmem_node(NODE_DATA(0), + choose_bootmap_pfn(min_low_pfn, end_pfn), + min_low_pfn, end_pfn); /* Now register the available physical memory with the * allocator. */ - for (i = 0; i < pavail_ents; i++) - free_bootmem(pavail[i].phys_addr, pavail[i].reg_size); + for (i = 0; i < lmb.memory.cnt; i++) + free_bootmem(lmb.memory.region[i].base, + lmb_size_bytes(&lmb.memory, i)); -#ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start) { - size = initrd_end - initrd_start; - - /* Reserve the initrd image area. */ - reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT); + for (i = 0; i < lmb.reserved.cnt; i++) + reserve_bootmem(lmb.reserved.region[i].base, + lmb_size_bytes(&lmb.reserved, i), + BOOTMEM_DEFAULT); - initrd_start += PAGE_OFFSET; - initrd_end += PAGE_OFFSET; - } -#endif - /* Reserve the kernel text/data/bss. */ - reserve_bootmem(kern_base, kern_size, BOOTMEM_DEFAULT); *pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT; - /* Add back in the initmem pages. */ - size = ((unsigned long)(__init_end) & PAGE_MASK) - - PAGE_ALIGN((unsigned long)__init_begin); - *pages_avail += size >> PAGE_SHIFT; + for (i = 0; i < lmb.memory.cnt; ++i) { + unsigned long start_pfn, end_pfn, pages; - /* Reserve the bootmem map. We do not account for it - * in pages_avail because we will release that memory - * in free_all_bootmem. - */ - size = bootmap_size; - reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT); - - for (i = 0; i < pavail_ents; i++) { - unsigned long start_pfn, end_pfn; + pages = lmb_size_pages(&lmb.memory, i); + start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; + end_pfn = start_pfn + pages; - start_pfn = pavail[i].phys_addr >> PAGE_SHIFT; - end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT)); memory_present(0, start_pfn, end_pfn); } -- cgit v1.2.3 From b97094560b991af5c62391014e72bfa4c3a3701f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Feb 2008 19:20:45 -0800 Subject: [SPARC64]: Call real_setup_per_cpu_areas() earlier and use lmb_alloc(). We have to do it like this before we can move the PROM and MDESC device tree code over to using lmb_alloc(). Signed-off-by: David S. Miller --- arch/sparc64/kernel/smp.c | 11 ++++++++--- arch/sparc64/mm/init.c | 8 ++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 59f020d69d4..524b8892094 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include @@ -1431,7 +1431,7 @@ EXPORT_SYMBOL(__per_cpu_shift); void __init real_setup_per_cpu_areas(void) { - unsigned long goal, size, i; + unsigned long paddr, goal, size, i; char *ptr; /* Copy section for each CPU (we discard the original) */ @@ -1441,8 +1441,13 @@ void __init real_setup_per_cpu_areas(void) for (size = PAGE_SIZE; size < goal; size <<= 1UL) __per_cpu_shift++; - ptr = alloc_bootmem_pages(size * NR_CPUS); + paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); + if (!paddr) { + prom_printf("Cannot allocate per-cpu memory.\n"); + prom_halt(); + } + ptr = __va(paddr); __per_cpu_base = ptr - __per_cpu_start; for (i = 0; i < NR_CPUS; i++, ptr += size) diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 90e644a0e93..658ec462ed4 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1208,6 +1208,12 @@ void __init paging_init(void) if (tlb_type == hypervisor) sun4v_ktsb_register(); + /* We must setup the per-cpu areas before we pull in the + * PROM and the MDESC. The code there fills in cpu and + * other information into per-cpu data structures. + */ + real_setup_per_cpu_areas(); + /* Setup bootmem... */ pages_avail = 0; last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); @@ -1216,8 +1222,6 @@ void __init paging_init(void) kernel_physical_mapping_init(); - real_setup_per_cpu_areas(); - prom_build_devicetree(); if (tlb_type == hypervisor) -- cgit v1.2.3 From ad072004ca35a9918964ca7aee2bf00d79c8657f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Feb 2008 19:21:51 -0800 Subject: [SPARC64]: Use lmb_alloc() for PROM device tree. Signed-off-by: David S. Miller --- arch/sparc64/kernel/prom.c | 14 +++++++++----- arch/sparc64/mm/init.c | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c index 68964ddcde1..ed03a18d3b3 100644 --- a/arch/sparc64/kernel/prom.c +++ b/arch/sparc64/kernel/prom.c @@ -19,8 +19,8 @@ #include #include #include -#include #include +#include #include #include @@ -122,16 +122,20 @@ int of_find_in_proplist(const char *list, const char *match, int len) } EXPORT_SYMBOL(of_find_in_proplist); -static unsigned int prom_early_allocated; +static unsigned int prom_early_allocated __initdata; static void * __init prom_early_alloc(unsigned long size) { + unsigned long paddr = lmb_alloc(size, SMP_CACHE_BYTES); void *ret; - ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL); - if (ret != NULL) - memset(ret, 0, size); + if (!paddr) { + prom_printf("prom_early_alloc(%lu) failed\n"); + prom_halt(); + } + ret = __va(paddr); + memset(ret, 0, size); prom_early_allocated += size; return ret; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 658ec462ed4..0abefc8ca40 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1214,6 +1214,8 @@ void __init paging_init(void) */ real_setup_per_cpu_areas(); + prom_build_devicetree(); + /* Setup bootmem... */ pages_avail = 0; last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); @@ -1222,8 +1224,6 @@ void __init paging_init(void) kernel_physical_mapping_init(); - prom_build_devicetree(); - if (tlb_type == hypervisor) sun4v_mdesc_init(); -- cgit v1.2.3 From 4a28333984be123d9c063df23175c48749c4b4a0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Feb 2008 19:22:23 -0800 Subject: [SPARC64]: Initialize MDESC earlier and use lmb_alloc() Signed-off-by: David S. Miller --- arch/sparc64/kernel/mdesc.c | 28 ++++++++++++++++------------ arch/sparc64/mm/init.c | 6 +++--- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c index 91008358956..dde52bcf5c6 100644 --- a/arch/sparc64/kernel/mdesc.c +++ b/arch/sparc64/kernel/mdesc.c @@ -1,10 +1,10 @@ /* mdesc.c: Sun4V machine description handling. * - * Copyright (C) 2007 David S. Miller + * Copyright (C) 2007, 2008 David S. Miller */ #include #include -#include +#include #include #include #include @@ -84,24 +84,28 @@ static void mdesc_handle_init(struct mdesc_handle *hp, hp->handle_size = handle_size; } -static struct mdesc_handle * __init mdesc_bootmem_alloc(unsigned int mdesc_size) +static struct mdesc_handle * __init mdesc_lmb_alloc(unsigned int mdesc_size) { - struct mdesc_handle *hp; unsigned int handle_size, alloc_size; + struct mdesc_handle *hp; + unsigned long paddr; handle_size = (sizeof(struct mdesc_handle) - sizeof(struct mdesc_hdr) + mdesc_size); alloc_size = PAGE_ALIGN(handle_size); - hp = __alloc_bootmem(alloc_size, PAGE_SIZE, 0UL); - if (hp) - mdesc_handle_init(hp, handle_size, hp); + paddr = lmb_alloc(alloc_size, PAGE_SIZE); + hp = NULL; + if (paddr) { + hp = __va(paddr); + mdesc_handle_init(hp, handle_size, hp); + } return hp; } -static void mdesc_bootmem_free(struct mdesc_handle *hp) +static void mdesc_lmb_free(struct mdesc_handle *hp) { unsigned int alloc_size, handle_size = hp->handle_size; unsigned long start, end; @@ -124,9 +128,9 @@ static void mdesc_bootmem_free(struct mdesc_handle *hp) } } -static struct mdesc_mem_ops bootmem_mdesc_ops = { - .alloc = mdesc_bootmem_alloc, - .free = mdesc_bootmem_free, +static struct mdesc_mem_ops lmb_mdesc_ops = { + .alloc = mdesc_lmb_alloc, + .free = mdesc_lmb_free, }; static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) @@ -888,7 +892,7 @@ void __init sun4v_mdesc_init(void) printk("MDESC: Size is %lu bytes.\n", len); - hp = mdesc_alloc(len, &bootmem_mdesc_ops); + hp = mdesc_alloc(len, &lmb_mdesc_ops); if (hp == NULL) { prom_printf("MDESC: alloc of %lu bytes failed.\n", len); prom_halt(); diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 0abefc8ca40..8e0e8678712 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -1216,6 +1216,9 @@ void __init paging_init(void) prom_build_devicetree(); + if (tlb_type == hypervisor) + sun4v_mdesc_init(); + /* Setup bootmem... */ pages_avail = 0; last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); @@ -1224,9 +1227,6 @@ void __init paging_init(void) kernel_physical_mapping_init(); - if (tlb_type == hypervisor) - sun4v_mdesc_init(); - { unsigned long zones_size[MAX_NR_ZONES]; unsigned long zholes_size[MAX_NR_ZONES]; -- cgit v1.2.3 From ce3b1d47a8605905b666649ea309b6471a75b4ed Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 19 Mar 2008 03:54:09 -0700 Subject: [SPARC64]: Once we have the boot cmdline, call parse_early_param() Signed-off-by: David S. Miller --- arch/sparc64/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 6acb4c51cfe..46bcf41097a 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -281,6 +281,7 @@ void __init setup_arch(char **cmdline_p) /* Initialize PROM console and command line. */ *cmdline_p = prom_getbootargs(); strcpy(boot_command_line, *cmdline_p); + parse_early_param(); boot_flags_init(*cmdline_p); register_console(&prom_early_console); -- cgit v1.2.3 From 0c49a573ea93f777fd27f26b7853e7bf5a20e1a3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 13 Feb 2008 22:15:46 -0800 Subject: [SPARC64]: Kill pci_iommu_table_init() declaration. No longer exists. Signed-off-by: David S. Miller --- arch/sparc64/kernel/pci_impl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/pci_impl.h b/arch/sparc64/kernel/pci_impl.h index 4a50da13ce4..f79c923fdd8 100644 --- a/arch/sparc64/kernel/pci_impl.h +++ b/arch/sparc64/kernel/pci_impl.h @@ -161,8 +161,6 @@ extern struct pci_pbm_info *pci_pbm_root; extern int pci_num_pbms; /* PCI bus scanning and fixup support. */ -extern void pci_iommu_table_init(struct iommu *iommu, int tsbsize, - u32 dma_offset, u32 dma_addr_mask); extern void pci_get_pbm_props(struct pci_pbm_info *pbm); extern struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm); extern void pci_determine_mem_io_space(struct pci_pbm_info *pbm); -- cgit v1.2.3 From c1b1a5f1f1b2612b69b67381b223bce9f8ec4da5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 19 Mar 2008 04:52:48 -0700 Subject: [SPARC64]: NUMA device infrastructure. Record and propagate NUMA information for devices. Signed-off-by: David S. Miller --- arch/sparc64/kernel/ebus.c | 1 + arch/sparc64/kernel/iommu.c | 33 +++++++++++++++++++++------------ arch/sparc64/kernel/isa.c | 1 + arch/sparc64/kernel/of_device.c | 12 +++++++++++- arch/sparc64/kernel/pci.c | 12 ++++++++++++ arch/sparc64/kernel/pci_fire.c | 5 ++++- arch/sparc64/kernel/pci_impl.h | 2 ++ arch/sparc64/kernel/pci_msi.c | 8 +++++++- arch/sparc64/kernel/pci_psycho.c | 5 ++++- arch/sparc64/kernel/pci_sabre.c | 4 +++- arch/sparc64/kernel/pci_schizo.c | 5 ++++- arch/sparc64/kernel/pci_sun4v.c | 13 ++++++++++--- arch/sparc64/kernel/sbus.c | 3 ++- 13 files changed, 82 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/ebus.c b/arch/sparc64/kernel/ebus.c index 04ab81cb4f4..bc263227484 100644 --- a/arch/sparc64/kernel/ebus.c +++ b/arch/sparc64/kernel/ebus.c @@ -396,6 +396,7 @@ static void __init fill_ebus_device(struct device_node *dp, struct linux_ebus_de sd->op = &dev->ofdev; sd->iommu = dev->bus->ofdev.dev.parent->archdata.iommu; sd->stc = dev->bus->ofdev.dev.parent->archdata.stc; + sd->numa_node = dev->bus->ofdev.dev.parent->archdata.numa_node; dev->ofdev.node = dp; dev->ofdev.dev.parent = &dev->bus->ofdev.dev; diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c index 756fa24eeef..2a37a6ca2a1 100644 --- a/arch/sparc64/kernel/iommu.c +++ b/arch/sparc64/kernel/iommu.c @@ -173,9 +173,11 @@ void iommu_range_free(struct iommu *iommu, dma_addr_t dma_addr, unsigned long np } int iommu_table_init(struct iommu *iommu, int tsbsize, - u32 dma_offset, u32 dma_addr_mask) + u32 dma_offset, u32 dma_addr_mask, + int numa_node) { - unsigned long i, tsbbase, order, sz, num_tsb_entries; + unsigned long i, order, sz, num_tsb_entries; + struct page *page; num_tsb_entries = tsbsize / sizeof(iopte_t); @@ -188,11 +190,12 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, /* Allocate and initialize the free area map. */ sz = num_tsb_entries / 8; sz = (sz + 7UL) & ~7UL; - iommu->arena.map = kzalloc(sz, GFP_KERNEL); + iommu->arena.map = kmalloc_node(sz, GFP_KERNEL, numa_node); if (!iommu->arena.map) { printk(KERN_ERR "IOMMU: Error, kmalloc(arena.map) failed.\n"); return -ENOMEM; } + memset(iommu->arena.map, 0, sz); iommu->arena.limit = num_tsb_entries; if (tlb_type != hypervisor) @@ -201,21 +204,23 @@ int iommu_table_init(struct iommu *iommu, int tsbsize, /* Allocate and initialize the dummy page which we * set inactive IO PTEs to point to. */ - iommu->dummy_page = get_zeroed_page(GFP_KERNEL); - if (!iommu->dummy_page) { + page = alloc_pages_node(numa_node, GFP_KERNEL, 0); + if (!page) { printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n"); goto out_free_map; } + iommu->dummy_page = (unsigned long) page_address(page); + memset((void *)iommu->dummy_page, 0, PAGE_SIZE); iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page); /* Now allocate and setup the IOMMU page table itself. */ order = get_order(tsbsize); - tsbbase = __get_free_pages(GFP_KERNEL, order); - if (!tsbbase) { + page = alloc_pages_node(numa_node, GFP_KERNEL, order); + if (!page) { printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n"); goto out_free_dummy_page; } - iommu->page_table = (iopte_t *)tsbbase; + iommu->page_table = (iopte_t *)page_address(page); for (i = 0; i < num_tsb_entries; i++) iopte_make_dummy(iommu, &iommu->page_table[i]); @@ -276,20 +281,24 @@ static inline void iommu_free_ctx(struct iommu *iommu, int ctx) static void *dma_4u_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp) { + unsigned long flags, order, first_page; struct iommu *iommu; + struct page *page; + int npages, nid; iopte_t *iopte; - unsigned long flags, order, first_page; void *ret; - int npages; size = IO_PAGE_ALIGN(size); order = get_order(size); if (order >= 10) return NULL; - first_page = __get_free_pages(gfp, order); - if (first_page == 0UL) + nid = dev->archdata.numa_node; + page = alloc_pages_node(nid, gfp, order); + if (unlikely(!page)) return NULL; + + first_page = (unsigned long) page_address(page); memset((char *)first_page, 0, PAGE_SIZE << order); iommu = dev->archdata.iommu; diff --git a/arch/sparc64/kernel/isa.c b/arch/sparc64/kernel/isa.c index b5f7b354084..a2af5ed784c 100644 --- a/arch/sparc64/kernel/isa.c +++ b/arch/sparc64/kernel/isa.c @@ -92,6 +92,7 @@ static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br) sd->op = &isa_dev->ofdev; sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu; sd->stc = isa_br->ofdev.dev.parent->archdata.stc; + sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node; isa_dev->ofdev.node = dp; isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev; diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c index 0fd9db95b89..9e58e8cba1c 100644 --- a/arch/sparc64/kernel/of_device.c +++ b/arch/sparc64/kernel/of_device.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -660,6 +661,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op, struct device_node *dp = op->node; struct device_node *pp, *ip; unsigned int orig_irq = irq; + int nid; if (irq == 0xffffffff) return irq; @@ -672,7 +674,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op, printk("%s: direct translate %x --> %x\n", dp->full_name, orig_irq, irq); - return irq; + goto out; } /* Something more complicated. Walk up to the root, applying @@ -744,6 +746,14 @@ static unsigned int __init build_one_device_irq(struct of_device *op, printk("%s: Apply IRQ trans [%s] %x --> %x\n", op->node->full_name, ip->full_name, orig_irq, irq); +out: + nid = of_node_to_nid(dp); + if (nid != -1) { + cpumask_t numa_mask = node_to_cpumask(nid); + + irq_set_affinity(irq, numa_mask); + } + return irq; } diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 545356b00e2..49f91276651 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -369,10 +369,12 @@ struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, sd->host_controller = pbm; sd->prom_node = node; sd->op = of_find_device_by_node(node); + sd->numa_node = pbm->numa_node; sd = &sd->op->dev.archdata; sd->iommu = pbm->iommu; sd->stc = &pbm->stc; + sd->numa_node = pbm->numa_node; type = of_get_property(node, "device_type", NULL); if (type == NULL) @@ -1159,6 +1161,16 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return 0; } +#ifdef CONFIG_NUMA +int pcibus_to_node(struct pci_bus *pbus) +{ + struct pci_pbm_info *pbm = pbus->sysdata; + + return pbm->numa_node; +} +EXPORT_SYMBOL(pcibus_to_node); +#endif + /* Return the domain nuber for this pci bus */ int pci_domain_nr(struct pci_bus *pbus) diff --git a/arch/sparc64/kernel/pci_fire.c b/arch/sparc64/kernel/pci_fire.c index 7571ed56314..d23bb6f53cd 100644 --- a/arch/sparc64/kernel/pci_fire.c +++ b/arch/sparc64/kernel/pci_fire.c @@ -71,7 +71,8 @@ static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm) */ fire_write(iommu->iommu_flushinv, ~(u64)0); - err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); + err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask, + pbm->numa_node); if (err) return err; @@ -449,6 +450,8 @@ static int __init pci_fire_pbm_init(struct pci_controller_info *p, pbm->next = pci_pbm_root; pci_pbm_root = pbm; + pbm->numa_node = -1; + pbm->scan_bus = pci_fire_scan_bus; pbm->pci_ops = &sun4u_pci_ops; pbm->config_space_reg_bits = 12; diff --git a/arch/sparc64/kernel/pci_impl.h b/arch/sparc64/kernel/pci_impl.h index f79c923fdd8..218bac4ff79 100644 --- a/arch/sparc64/kernel/pci_impl.h +++ b/arch/sparc64/kernel/pci_impl.h @@ -148,6 +148,8 @@ struct pci_pbm_info { struct pci_bus *pci_bus; void (*scan_bus)(struct pci_pbm_info *); struct pci_ops *pci_ops; + + int numa_node; }; struct pci_controller_info { diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c index d6d64b44af6..db5e8fd8f67 100644 --- a/arch/sparc64/kernel/pci_msi.c +++ b/arch/sparc64/kernel/pci_msi.c @@ -279,11 +279,17 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, unsigned long devino) { int irq = ops->msiq_build_irq(pbm, msiqid, devino); - int err; + int err, nid; if (irq < 0) return irq; + nid = pbm->numa_node; + if (nid != -1) { + cpumask_t numa_mask = node_to_cpumask(nid); + + irq_set_affinity(irq, numa_mask); + } err = request_irq(irq, sparc64_msiq_interrupt, 0, "MSIQ", &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]); diff --git a/arch/sparc64/kernel/pci_psycho.c b/arch/sparc64/kernel/pci_psycho.c index 0bad96e5d18..994dbe0603d 100644 --- a/arch/sparc64/kernel/pci_psycho.c +++ b/arch/sparc64/kernel/pci_psycho.c @@ -848,7 +848,8 @@ static int psycho_iommu_init(struct pci_pbm_info *pbm) /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ - err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff); + err = iommu_table_init(iommu, IO_TSB_SIZE, 0xc0000000, 0xffffffff, + pbm->numa_node); if (err) return err; @@ -979,6 +980,8 @@ static void __init psycho_pbm_init(struct pci_controller_info *p, pbm->next = pci_pbm_root; pci_pbm_root = pbm; + pbm->numa_node = -1; + pbm->scan_bus = psycho_scan_bus; pbm->pci_ops = &sun4u_pci_ops; pbm->config_space_reg_bits = 8; diff --git a/arch/sparc64/kernel/pci_sabre.c b/arch/sparc64/kernel/pci_sabre.c index 1c5f5fa2339..4c34195baf3 100644 --- a/arch/sparc64/kernel/pci_sabre.c +++ b/arch/sparc64/kernel/pci_sabre.c @@ -704,7 +704,7 @@ static int sabre_iommu_init(struct pci_pbm_info *pbm, * in pci_iommu.c */ err = iommu_table_init(iommu, tsbsize * 1024 * 8, - dvma_offset, dma_mask); + dvma_offset, dma_mask, pbm->numa_node); if (err) return err; @@ -737,6 +737,8 @@ static void __init sabre_pbm_init(struct pci_controller_info *p, pbm->name = dp->full_name; printk("%s: SABRE PCI Bus Module\n", pbm->name); + pbm->numa_node = -1; + pbm->scan_bus = sabre_scan_bus; pbm->pci_ops = &sun4u_pci_ops; pbm->config_space_reg_bits = 8; diff --git a/arch/sparc64/kernel/pci_schizo.c b/arch/sparc64/kernel/pci_schizo.c index e3060936232..615edd9c8e2 100644 --- a/arch/sparc64/kernel/pci_schizo.c +++ b/arch/sparc64/kernel/pci_schizo.c @@ -1220,7 +1220,8 @@ static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm) /* Leave diag mode enabled for full-flushing done * in pci_iommu.c */ - err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask); + err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask, + pbm->numa_node); if (err) return err; @@ -1379,6 +1380,8 @@ static int __init schizo_pbm_init(struct pci_controller_info *p, pbm->next = pci_pbm_root; pci_pbm_root = pbm; + pbm->numa_node = -1; + pbm->scan_bus = schizo_scan_bus; pbm->pci_ops = &sun4u_pci_ops; pbm->config_space_reg_bits = 8; diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c index 01839706bd5..e2bb9790039 100644 --- a/arch/sparc64/kernel/pci_sun4v.c +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -127,10 +127,12 @@ static inline long iommu_batch_end(void) static void *dma_4v_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp) { - struct iommu *iommu; unsigned long flags, order, first_page, npages, n; + struct iommu *iommu; + struct page *page; void *ret; long entry; + int nid; size = IO_PAGE_ALIGN(size); order = get_order(size); @@ -139,10 +141,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, npages = size >> IO_PAGE_SHIFT; - first_page = __get_free_pages(gfp, order); - if (unlikely(first_page == 0UL)) + nid = dev->archdata.numa_node; + page = alloc_pages_node(nid, gfp, order); + if (unlikely(!page)) return NULL; + first_page = (unsigned long) page_address(page); memset((char *)first_page, 0, PAGE_SIZE << order); iommu = dev->archdata.iommu; @@ -899,6 +903,8 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p, pbm->next = pci_pbm_root; pci_pbm_root = pbm; + pbm->numa_node = of_node_to_nid(dp); + pbm->scan_bus = pci_sun4v_scan_bus; pbm->pci_ops = &sun4v_pci_ops; pbm->config_space_reg_bits = 12; @@ -913,6 +919,7 @@ static void __init pci_sun4v_pbm_init(struct pci_controller_info *p, pbm->name = dp->full_name; printk("%s: SUN4V PCI Bus Module\n", pbm->name); + printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node); pci_determine_mem_io_space(pbm); diff --git a/arch/sparc64/kernel/sbus.c b/arch/sparc64/kernel/sbus.c index d1fb13ba02b..fa2827c4a3a 100644 --- a/arch/sparc64/kernel/sbus.c +++ b/arch/sparc64/kernel/sbus.c @@ -544,6 +544,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus) sbus->ofdev.dev.archdata.iommu = iommu; sbus->ofdev.dev.archdata.stc = strbuf; + sbus->ofdev.dev.archdata.numa_node = -1; reg_base = regs + SYSIO_IOMMUREG_BASE; iommu->iommu_control = reg_base + IOMMU_CONTROL; @@ -575,7 +576,7 @@ static void __init sbus_iommu_init(int __node, struct sbus_bus *sbus) sbus->portid, regs); /* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */ - if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff)) + if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1)) goto fatal_memory_error; control = upa_readq(iommu->iommu_control); -- cgit v1.2.3 From 1f261ef53ba06658dfeb5a9c3007d0ad1b85cadf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 19 Mar 2008 04:53:58 -0700 Subject: [SPARC64]: Allocate TSB node-local. Signed-off-by: David S. Miller --- arch/sparc64/mm/tsb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index a3e6e4b635b..fe70c8a557b 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -321,7 +321,8 @@ retry_tsb_alloc: if (new_size > (PAGE_SIZE * 2)) gfp_flags = __GFP_NOWARN | __GFP_NORETRY; - new_tsb = kmem_cache_alloc(tsb_caches[new_cache_index], gfp_flags); + new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index], + gfp_flags, numa_node_id()); if (unlikely(!new_tsb)) { /* Not being able to fork due to a high-order TSB * allocation failure is very bad behavior. Just back -- cgit v1.2.3 From 919ee677b656c52c5f86d3d916786891220d5452 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Apr 2008 05:40:25 -0700 Subject: [SPARC64]: Add NUMA support. Currently there is only code to parse NUMA attributes on sun4v/niagara systems, but later on we will add such parsing for older systems. Signed-off-by: David S. Miller --- arch/sparc64/Kconfig | 20 ++ arch/sparc64/defconfig | 99 ++++-- arch/sparc64/kernel/sysfs.c | 12 + arch/sparc64/mm/init.c | 796 ++++++++++++++++++++++++++++++++++++++------ 4 files changed, 792 insertions(+), 135 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index df3eacb5ca1..8acc5cc3862 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -250,6 +250,26 @@ endchoice endmenu +config NUMA + bool "NUMA support" + +config NODES_SHIFT + int + default "4" + depends on NEED_MULTIPLE_NODES + +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. See memmap_init_zone() +# for details. +config NODES_SPAN_OTHER_NODES + def_bool y + depends on NEED_MULTIPLE_NODES + +config ARCH_POPULATES_NODE_MAP + def_bool y + config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig index e1835868ad3..92f79680f70 100644 --- a/arch/sparc64/defconfig +++ b/arch/sparc64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.25 -# Sun Apr 20 01:33:21 2008 +# Linux kernel version: 2.6.25-numa +# Wed Apr 23 04:49:08 2008 # CONFIG_SPARC=y CONFIG_SPARC64=y @@ -152,6 +152,8 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_HUGETLB_PAGE_SIZE_4MB=y # CONFIG_HUGETLB_PAGE_SIZE_512K is not set # CONFIG_HUGETLB_PAGE_SIZE_64K is not set +# CONFIG_NUMA is not set +CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y @@ -787,7 +789,6 @@ CONFIG_I2C_ALGOBIT=y # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCF8591 is not set -# CONFIG_TPS65010 is not set # CONFIG_SENSORS_MAX6875 is not set # CONFIG_SENSORS_TSL2550 is not set # CONFIG_I2C_DEBUG_CORE is not set @@ -869,6 +870,7 @@ CONFIG_SSB_POSSIBLE=y # Multifunction device drivers # # CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices @@ -1219,10 +1221,6 @@ CONFIG_USB_STORAGE=m # CONFIG_NEW_LEDS is not set # CONFIG_INFINIBAND is not set # CONFIG_RTC_CLASS is not set - -# -# Userspace I/O -# # CONFIG_UIO is not set # @@ -1399,6 +1397,7 @@ CONFIG_SCHEDSTATS=y CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_BOOT_PRINTK_DELAY is not set @@ -1425,53 +1424,82 @@ CONFIG_ASYNC_CORE=m CONFIG_ASYNC_MEMCPY=m CONFIG_ASYNC_XOR=m CONFIG_CRYPTO=y + +# +# Crypto core or helper +# CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_BLKCIPHER=y -# CONFIG_CRYPTO_SEQIV is not set CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_GF128MUL=m +CONFIG_CRYPTO_NULL=m +# CONFIG_CRYPTO_CRYPTD is not set +CONFIG_CRYPTO_AUTHENC=y +CONFIG_CRYPTO_TEST=m + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_LRW=m +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_XTS=m + +# +# Hash modes +# CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=y -CONFIG_CRYPTO_NULL=m + +# +# Digest +# +CONFIG_CRYPTO_CRC32C=m CONFIG_CRYPTO_MD4=y CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_GF128MUL=m -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_XTS=m -# CONFIG_CRYPTO_CTR is not set -# CONFIG_CRYPTO_GCM is not set -# CONFIG_CRYPTO_CCM is not set -# CONFIG_CRYPTO_CRYPTD is not set -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_TWOFISH_COMMON=m -CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_WP512=m + +# +# Ciphers +# CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_DES=y +CONFIG_CRYPTO_FCRYPT=m CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_SEED=m # CONFIG_CRYPTO_SALSA20 is not set +CONFIG_CRYPTO_SEED=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_TEA=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_TWOFISH_COMMON=m + +# +# Compression +# CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_CRC32C=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_AUTHENC=y # CONFIG_CRYPTO_LZO is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set @@ -1492,3 +1520,4 @@ CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y +CONFIG_HAVE_LMB=y diff --git a/arch/sparc64/kernel/sysfs.c b/arch/sparc64/kernel/sysfs.c index 52816c7be0b..e885034a6b7 100644 --- a/arch/sparc64/kernel/sysfs.c +++ b/arch/sparc64/kernel/sysfs.c @@ -273,10 +273,22 @@ static void __init check_mmu_stats(void) mmu_stats_supported = 1; } +static void register_nodes(void) +{ +#ifdef CONFIG_NUMA + int i; + + for (i = 0; i < MAX_NUMNODES; i++) + register_one_node(i); +#endif +} + static int __init topology_init(void) { int cpu; + register_nodes(); + check_mmu_stats(); register_cpu_notifier(&sysfs_cpu_nb); diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 8e0e8678712..177d8aaeec4 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -73,9 +74,7 @@ extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES]; #define MAX_BANKS 32 static struct linux_prom64_registers pavail[MAX_BANKS] __initdata; -static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; static int pavail_ents __initdata; -static int pavail_rescan_ents __initdata; static int cmp_p64(const void *a, const void *b) { @@ -716,19 +715,28 @@ out: smp_new_mmu_context_version(); } -/* Find a free area for the bootmem map, avoiding the kernel image - * and the initial ramdisk. - */ -static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn, - unsigned long end_pfn) +static int numa_enabled = 1; +static int numa_debug; + +static int __init early_numa(char *p) { - unsigned long bootmap_size; + if (!p) + return 0; + + if (strstr(p, "off")) + numa_enabled = 0; - bootmap_size = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmap_size <<= PAGE_SHIFT; + if (strstr(p, "debug")) + numa_debug = 1; - return lmb_alloc(bootmap_size, PAGE_SIZE) >> PAGE_SHIFT; + return 0; } +early_param("numa", early_numa); + +#define numadbg(f, a...) \ +do { if (numa_debug) \ + printk(KERN_INFO f, ## a); \ +} while (0) static void __init find_ramdisk(unsigned long phys_base) { @@ -755,6 +763,9 @@ static void __init find_ramdisk(unsigned long phys_base) ramdisk_image -= KERNBASE; ramdisk_image += phys_base; + numadbg("Found ramdisk at physical address 0x%lx, size %u\n", + ramdisk_image, sparc_ramdisk_size); + initrd_start = ramdisk_image; initrd_end = ramdisk_image + sparc_ramdisk_size; @@ -763,60 +774,625 @@ static void __init find_ramdisk(unsigned long phys_base) #endif } -/* About pages_avail, this is the value we will use to calculate - * the zholes_size[] argument given to free_area_init_node(). The - * page allocator uses this to calculate nr_kernel_pages, - * nr_all_pages and zone->present_pages. On NUMA it is used - * to calculate zone->min_unmapped_pages and zone->min_slab_pages. - * - * So this number should really be set to what the page allocator - * actually ends up with. This means: - * 1) It should include bootmem map pages, we'll release those. - * 2) It should not include the kernel image, except for the - * __init sections which we will also release. - * 3) It should include the initrd image, since we'll release - * that too. +struct node_mem_mask { + unsigned long mask; + unsigned long val; + unsigned long bootmem_paddr; +}; +static struct node_mem_mask node_masks[MAX_NUMNODES]; +static int num_node_masks; + +int numa_cpu_lookup_table[NR_CPUS]; +cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; + +#ifdef CONFIG_NEED_MULTIPLE_NODES +static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; + +struct mdesc_mblock { + u64 base; + u64 size; + u64 offset; /* RA-to-PA */ +}; +static struct mdesc_mblock *mblocks; +static int num_mblocks; + +static unsigned long ra_to_pa(unsigned long addr) +{ + int i; + + for (i = 0; i < num_mblocks; i++) { + struct mdesc_mblock *m = &mblocks[i]; + + if (addr >= m->base && + addr < (m->base + m->size)) { + addr += m->offset; + break; + } + } + return addr; +} + +static int find_node(unsigned long addr) +{ + int i; + + addr = ra_to_pa(addr); + for (i = 0; i < num_node_masks; i++) { + struct node_mem_mask *p = &node_masks[i]; + + if ((addr & p->mask) == p->val) + return i; + } + return -1; +} + +static unsigned long nid_range(unsigned long start, unsigned long end, + int *nid) +{ + *nid = find_node(start); + start += PAGE_SIZE; + while (start < end) { + int n = find_node(start); + + if (n != *nid) + break; + start += PAGE_SIZE; + } + + return start; +} +#else +static unsigned long nid_range(unsigned long start, unsigned long end, + int *nid) +{ + *nid = 0; + return end; +} +#endif + +/* This must be invoked after performing all of the necessary + * add_active_range() calls for 'nid'. We need to be able to get + * correct data from get_pfn_range_for_nid(). */ -static unsigned long __init bootmem_init(unsigned long *pages_avail, - unsigned long phys_base) +static void __init allocate_node_data(int nid) +{ + unsigned long paddr, num_pages, start_pfn, end_pfn; + struct pglist_data *p; + +#ifdef CONFIG_NEED_MULTIPLE_NODES + paddr = lmb_alloc_nid(sizeof(struct pglist_data), + SMP_CACHE_BYTES, nid, nid_range); + if (!paddr) { + prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid); + prom_halt(); + } + NODE_DATA(nid) = __va(paddr); + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); + + NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; +#endif + + p = NODE_DATA(nid); + + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); + p->node_start_pfn = start_pfn; + p->node_spanned_pages = end_pfn - start_pfn; + + if (p->node_spanned_pages) { + num_pages = bootmem_bootmap_pages(p->node_spanned_pages); + + paddr = lmb_alloc_nid(num_pages << PAGE_SHIFT, PAGE_SIZE, nid, + nid_range); + if (!paddr) { + prom_printf("Cannot allocate bootmap for nid[%d]\n", + nid); + prom_halt(); + } + node_masks[nid].bootmem_paddr = paddr; + } +} + +static void init_node_masks_nonnuma(void) { - unsigned long end_pfn; int i; - *pages_avail = lmb_phys_mem_size() >> PAGE_SHIFT; - end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + numadbg("Initializing tables for non-numa.\n"); - /* Initialize the boot-time allocator. */ - max_pfn = max_low_pfn = end_pfn; - min_low_pfn = (phys_base >> PAGE_SHIFT); + node_masks[0].mask = node_masks[0].val = 0; + num_node_masks = 1; - init_bootmem_node(NODE_DATA(0), - choose_bootmap_pfn(min_low_pfn, end_pfn), - min_low_pfn, end_pfn); + for (i = 0; i < NR_CPUS; i++) + numa_cpu_lookup_table[i] = 0; - /* Now register the available physical memory with the - * allocator. - */ - for (i = 0; i < lmb.memory.cnt; i++) - free_bootmem(lmb.memory.region[i].base, - lmb_size_bytes(&lmb.memory, i)); + numa_cpumask_lookup_table[0] = CPU_MASK_ALL; +} + +#ifdef CONFIG_NEED_MULTIPLE_NODES +struct pglist_data *node_data[MAX_NUMNODES]; + +EXPORT_SYMBOL(numa_cpu_lookup_table); +EXPORT_SYMBOL(numa_cpumask_lookup_table); +EXPORT_SYMBOL(node_data); + +struct mdesc_mlgroup { + u64 node; + u64 latency; + u64 match; + u64 mask; +}; +static struct mdesc_mlgroup *mlgroups; +static int num_mlgroups; + +static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio, + u32 cfg_handle) +{ + u64 arc; + + mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + const u64 *val; + + val = mdesc_get_property(md, target, + "cfg-handle", NULL); + if (val && *val == cfg_handle) + return 0; + } + return -ENODEV; +} + +static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp, + u32 cfg_handle) +{ + u64 arc, candidate, best_latency = ~(u64)0; + + candidate = MDESC_NODE_NULL; + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + const char *name = mdesc_node_name(md, target); + const u64 *val; + + if (strcmp(name, "pio-latency-group")) + continue; + + val = mdesc_get_property(md, target, "latency", NULL); + if (!val) + continue; + + if (*val < best_latency) { + candidate = target; + best_latency = *val; + } + } + + if (candidate == MDESC_NODE_NULL) + return -ENODEV; + + return scan_pio_for_cfg_handle(md, candidate, cfg_handle); +} + +int of_node_to_nid(struct device_node *dp) +{ + const struct linux_prom64_registers *regs; + struct mdesc_handle *md; + u32 cfg_handle; + int count, nid; + u64 grp; + + if (!mlgroups) + return -1; + + regs = of_get_property(dp, "reg", NULL); + if (!regs) + return -1; + + cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff; + + md = mdesc_grab(); + + count = 0; + nid = -1; + mdesc_for_each_node_by_name(md, grp, "group") { + if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) { + nid = count; + break; + } + count++; + } + + mdesc_release(md); + + return nid; +} + +static void add_node_ranges(void) +{ + int i; + + for (i = 0; i < lmb.memory.cnt; i++) { + unsigned long size = lmb_size_bytes(&lmb.memory, i); + unsigned long start, end; + + start = lmb.memory.region[i].base; + end = start + size; + while (start < end) { + unsigned long this_end; + int nid; + + this_end = nid_range(start, end, &nid); + + numadbg("Adding active range nid[%d] " + "start[%lx] end[%lx]\n", + nid, start, this_end); + + add_active_range(nid, + start >> PAGE_SHIFT, + this_end >> PAGE_SHIFT); + + start = this_end; + } + } +} - for (i = 0; i < lmb.reserved.cnt; i++) - reserve_bootmem(lmb.reserved.region[i].base, - lmb_size_bytes(&lmb.reserved, i), - BOOTMEM_DEFAULT); +static int __init grab_mlgroups(struct mdesc_handle *md) +{ + unsigned long paddr; + int count = 0; + u64 node; + + mdesc_for_each_node_by_name(md, node, "memory-latency-group") + count++; + if (!count) + return -ENOENT; + + paddr = lmb_alloc(count * sizeof(struct mdesc_mlgroup), + SMP_CACHE_BYTES); + if (!paddr) + return -ENOMEM; + + mlgroups = __va(paddr); + num_mlgroups = count; + + count = 0; + mdesc_for_each_node_by_name(md, node, "memory-latency-group") { + struct mdesc_mlgroup *m = &mlgroups[count++]; + const u64 *val; + + m->node = node; + + val = mdesc_get_property(md, node, "latency", NULL); + m->latency = *val; + val = mdesc_get_property(md, node, "address-match", NULL); + m->match = *val; + val = mdesc_get_property(md, node, "address-mask", NULL); + m->mask = *val; + + numadbg("MLGROUP[%d]: node[%lx] latency[%lx] " + "match[%lx] mask[%lx]\n", + count - 1, m->node, m->latency, m->match, m->mask); + } - *pages_avail -= PAGE_ALIGN(kern_size) >> PAGE_SHIFT; + return 0; +} - for (i = 0; i < lmb.memory.cnt; ++i) { - unsigned long start_pfn, end_pfn, pages; +static int __init grab_mblocks(struct mdesc_handle *md) +{ + unsigned long paddr; + int count = 0; + u64 node; + + mdesc_for_each_node_by_name(md, node, "mblock") + count++; + if (!count) + return -ENOENT; + + paddr = lmb_alloc(count * sizeof(struct mdesc_mblock), + SMP_CACHE_BYTES); + if (!paddr) + return -ENOMEM; + + mblocks = __va(paddr); + num_mblocks = count; + + count = 0; + mdesc_for_each_node_by_name(md, node, "mblock") { + struct mdesc_mblock *m = &mblocks[count++]; + const u64 *val; + + val = mdesc_get_property(md, node, "base", NULL); + m->base = *val; + val = mdesc_get_property(md, node, "size", NULL); + m->size = *val; + val = mdesc_get_property(md, node, + "address-congruence-offset", NULL); + m->offset = *val; + + numadbg("MBLOCK[%d]: base[%lx] size[%lx] offset[%lx]\n", + count - 1, m->base, m->size, m->offset); + } + + return 0; +} + +static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md, + u64 grp, cpumask_t *mask) +{ + u64 arc; + + cpus_clear(*mask); + + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) { + u64 target = mdesc_arc_target(md, arc); + const char *name = mdesc_node_name(md, target); + const u64 *id; + + if (strcmp(name, "cpu")) + continue; + id = mdesc_get_property(md, target, "id", NULL); + if (*id < NR_CPUS) + cpu_set(*id, *mask); + } +} + +static struct mdesc_mlgroup * __init find_mlgroup(u64 node) +{ + int i; + + for (i = 0; i < num_mlgroups; i++) { + struct mdesc_mlgroup *m = &mlgroups[i]; + if (m->node == node) + return m; + } + return NULL; +} + +static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, + int index) +{ + struct mdesc_mlgroup *candidate = NULL; + u64 arc, best_latency = ~(u64)0; + struct node_mem_mask *n; + + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + if (!m) + continue; + if (m->latency < best_latency) { + candidate = m; + best_latency = m->latency; + } + } + if (!candidate) + return -ENOENT; + + if (num_node_masks != index) { + printk(KERN_ERR "Inconsistent NUMA state, " + "index[%d] != num_node_masks[%d]\n", + index, num_node_masks); + return -EINVAL; + } + + n = &node_masks[num_node_masks++]; + + n->mask = candidate->mask; + n->val = candidate->match; + + numadbg("NUMA NODE[%d]: mask[%lx] val[%lx] (latency[%lx])\n", + index, n->mask, n->val, candidate->latency); + + return 0; +} + +static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, + int index) +{ + cpumask_t mask; + int cpu; + + numa_parse_mdesc_group_cpus(md, grp, &mask); + + for_each_cpu_mask(cpu, mask) + numa_cpu_lookup_table[cpu] = index; + numa_cpumask_lookup_table[index] = mask; + + if (numa_debug) { + printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index); + for_each_cpu_mask(cpu, mask) + printk("%d ", cpu); + printk("]\n"); + } + + return numa_attach_mlgroup(md, grp, index); +} + +static int __init numa_parse_mdesc(void) +{ + struct mdesc_handle *md = mdesc_grab(); + int i, err, count; + u64 node; + + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); + if (node == MDESC_NODE_NULL) { + mdesc_release(md); + return -ENOENT; + } + + err = grab_mblocks(md); + if (err < 0) + goto out; + + err = grab_mlgroups(md); + if (err < 0) + goto out; + + count = 0; + mdesc_for_each_node_by_name(md, node, "group") { + err = numa_parse_mdesc_group(md, node, count); + if (err < 0) + break; + count++; + } + + add_node_ranges(); + + for (i = 0; i < num_node_masks; i++) { + allocate_node_data(i); + node_set_online(i); + } + + err = 0; +out: + mdesc_release(md); + return err; +} + +static int __init numa_parse_sun4u(void) +{ + return -1; +} + +static int __init bootmem_init_numa(void) +{ + int err = -1; + + numadbg("bootmem_init_numa()\n"); + + if (numa_enabled) { + if (tlb_type == hypervisor) + err = numa_parse_mdesc(); + else + err = numa_parse_sun4u(); + } + return err; +} + +#else + +static int bootmem_init_numa(void) +{ + return -1; +} + +#endif + +static void __init bootmem_init_nonnuma(void) +{ + unsigned long top_of_ram = lmb_end_of_DRAM(); + unsigned long total_ram = lmb_phys_mem_size(); + unsigned int i; + + numadbg("bootmem_init_nonnuma()\n"); + + printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + top_of_ram, total_ram); + printk(KERN_INFO "Memory hole size: %ldMB\n", + (top_of_ram - total_ram) >> 20); + + init_node_masks_nonnuma(); + + for (i = 0; i < lmb.memory.cnt; i++) { + unsigned long size = lmb_size_bytes(&lmb.memory, i); + unsigned long start_pfn, end_pfn; + + if (!size) + continue; - pages = lmb_size_pages(&lmb.memory, i); start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; - end_pfn = start_pfn + pages; + end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); + add_active_range(0, start_pfn, end_pfn); + } - memory_present(0, start_pfn, end_pfn); + allocate_node_data(0); + + node_set_online(0); +} + +static void __init reserve_range_in_node(int nid, unsigned long start, + unsigned long end) +{ + numadbg(" reserve_range_in_node(nid[%d],start[%lx],end[%lx]\n", + nid, start, end); + while (start < end) { + unsigned long this_end; + int n; + + this_end = nid_range(start, end, &n); + if (n == nid) { + numadbg(" MATCH reserving range [%lx:%lx]\n", + start, this_end); + reserve_bootmem_node(NODE_DATA(nid), start, + (this_end - start), BOOTMEM_DEFAULT); + } else + numadbg(" NO MATCH, advancing start to %lx\n", + this_end); + + start = this_end; } +} + +static void __init trim_reserved_in_node(int nid) +{ + int i; + + numadbg(" trim_reserved_in_node(%d)\n", nid); + + for (i = 0; i < lmb.reserved.cnt; i++) { + unsigned long start = lmb.reserved.region[i].base; + unsigned long size = lmb_size_bytes(&lmb.reserved, i); + unsigned long end = start + size; + + reserve_range_in_node(nid, start, end); + } +} + +static void __init bootmem_init_one_node(int nid) +{ + struct pglist_data *p; + + numadbg("bootmem_init_one_node(%d)\n", nid); + + p = NODE_DATA(nid); + + if (p->node_spanned_pages) { + unsigned long paddr = node_masks[nid].bootmem_paddr; + unsigned long end_pfn; + + end_pfn = p->node_start_pfn + p->node_spanned_pages; + + numadbg(" init_bootmem_node(%d, %lx, %lx, %lx)\n", + nid, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); + + init_bootmem_node(p, paddr >> PAGE_SHIFT, + p->node_start_pfn, end_pfn); + + numadbg(" free_bootmem_with_active_regions(%d, %lx)\n", + nid, end_pfn); + free_bootmem_with_active_regions(nid, end_pfn); + + trim_reserved_in_node(nid); + + numadbg(" sparse_memory_present_with_active_regions(%d)\n", + nid); + sparse_memory_present_with_active_regions(nid); + } +} + +static unsigned long __init bootmem_init(unsigned long phys_base) +{ + unsigned long end_pfn; + int nid; + + end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_pfn = max_low_pfn = end_pfn; + min_low_pfn = (phys_base >> PAGE_SHIFT); + + if (bootmem_init_numa() < 0) + bootmem_init_nonnuma(); + + /* XXX cpu notifier XXX */ + + for_each_online_node(nid) + bootmem_init_one_node(nid); sparse_init(); @@ -1112,7 +1688,7 @@ void __init setup_per_cpu_areas(void) void __init paging_init(void) { - unsigned long end_pfn, pages_avail, shift, phys_base; + unsigned long end_pfn, shift, phys_base; unsigned long real_end, i; /* These build time checkes make sure that the dcache_dirty_cpu() @@ -1220,27 +1796,21 @@ void __init paging_init(void) sun4v_mdesc_init(); /* Setup bootmem... */ - pages_avail = 0; - last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base); + last_valid_pfn = end_pfn = bootmem_init(phys_base); +#ifndef CONFIG_NEED_MULTIPLE_NODES max_mapnr = last_valid_pfn; - +#endif kernel_physical_mapping_init(); { - unsigned long zones_size[MAX_NR_ZONES]; - unsigned long zholes_size[MAX_NR_ZONES]; - int znum; + unsigned long max_zone_pfns[MAX_NR_ZONES]; - for (znum = 0; znum < MAX_NR_ZONES; znum++) - zones_size[znum] = zholes_size[znum] = 0; + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - zones_size[ZONE_NORMAL] = end_pfn; - zholes_size[ZONE_NORMAL] = end_pfn - pages_avail; + max_zone_pfns[ZONE_NORMAL] = end_pfn; - free_area_init_node(0, &contig_page_data, zones_size, - __pa(PAGE_OFFSET) >> PAGE_SHIFT, - zholes_size); + free_area_init_nodes(max_zone_pfns); } printk("Booting Linux...\n"); @@ -1249,21 +1819,52 @@ void __init paging_init(void) cpu_probe(); } -static void __init taint_real_pages(void) +int __init page_in_phys_avail(unsigned long paddr) +{ + int i; + + paddr &= PAGE_MASK; + + for (i = 0; i < pavail_ents; i++) { + unsigned long start, end; + + start = pavail[i].phys_addr; + end = start + pavail[i].reg_size; + + if (paddr >= start && paddr < end) + return 1; + } + if (paddr >= kern_base && paddr < (kern_base + kern_size)) + return 1; +#ifdef CONFIG_BLK_DEV_INITRD + if (paddr >= __pa(initrd_start) && + paddr < __pa(PAGE_ALIGN(initrd_end))) + return 1; +#endif + + return 0; +} + +static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata; +static int pavail_rescan_ents __initdata; + +/* Certain OBP calls, such as fetching "available" properties, can + * claim physical memory. So, along with initializing the valid + * address bitmap, what we do here is refetch the physical available + * memory list again, and make sure it provides at least as much + * memory as 'pavail' does. + */ +static void setup_valid_addr_bitmap_from_pavail(void) { int i; read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents); - /* Find changes discovered in the physmem available rescan and - * reserve the lost portions in the bootmem maps. - */ for (i = 0; i < pavail_ents; i++) { unsigned long old_start, old_end; old_start = pavail[i].phys_addr; - old_end = old_start + - pavail[i].reg_size; + old_end = old_start + pavail[i].reg_size; while (old_start < old_end) { int n; @@ -1281,7 +1882,16 @@ static void __init taint_real_pages(void) goto do_next_page; } } - reserve_bootmem(old_start, PAGE_SIZE, BOOTMEM_DEFAULT); + + prom_printf("mem_init: Lost memory in pavail\n"); + prom_printf("mem_init: OLD start[%lx] size[%lx]\n", + pavail[i].phys_addr, + pavail[i].reg_size); + prom_printf("mem_init: NEW start[%lx] size[%lx]\n", + pavail_rescan[i].phys_addr, + pavail_rescan[i].reg_size); + prom_printf("mem_init: Cannot continue, aborting.\n"); + prom_halt(); do_next_page: old_start += PAGE_SIZE; @@ -1289,32 +1899,6 @@ static void __init taint_real_pages(void) } } -int __init page_in_phys_avail(unsigned long paddr) -{ - int i; - - paddr &= PAGE_MASK; - - for (i = 0; i < pavail_rescan_ents; i++) { - unsigned long start, end; - - start = pavail_rescan[i].phys_addr; - end = start + pavail_rescan[i].reg_size; - - if (paddr >= start && paddr < end) - return 1; - } - if (paddr >= kern_base && paddr < (kern_base + kern_size)) - return 1; -#ifdef CONFIG_BLK_DEV_INITRD - if (paddr >= __pa(initrd_start) && - paddr < __pa(PAGE_ALIGN(initrd_end))) - return 1; -#endif - - return 0; -} - void __init mem_init(void) { unsigned long codepages, datapages, initpages; @@ -1337,14 +1921,26 @@ void __init mem_init(void) addr += PAGE_SIZE; } - taint_real_pages(); + setup_valid_addr_bitmap_from_pavail(); high_memory = __va(last_valid_pfn << PAGE_SHIFT); +#ifdef CONFIG_NEED_MULTIPLE_NODES + for_each_online_node(i) { + if (NODE_DATA(i)->node_spanned_pages != 0) { + totalram_pages += + free_all_bootmem_node(NODE_DATA(i)); + } + } +#else + totalram_pages = free_all_bootmem(); +#endif + /* We subtract one to account for the mem_map_zero page * allocated below. */ - totalram_pages = num_physpages = free_all_bootmem() - 1; + totalram_pages -= 1; + num_physpages = totalram_pages; /* * Set up the zero page, mark it reserved, so that page count -- cgit v1.2.3 From a5c564279854c971a27cc650be4bb32c290e9ae7 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 23 Apr 2008 19:15:34 -0700 Subject: sparc: cleanup after SunOS binary emulation removal The following cleanups are now possible: - arch/sparc/kernel/entry.S:ret_sys_call no longer has to be global - arch/sparc/kernel/signal.c:sys_sigpause() can be removed Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- arch/sparc/kernel/entry.S | 1 - arch/sparc/kernel/signal.c | 5 ----- 2 files changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 135644f8add..484c83d23ee 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -1409,7 +1409,6 @@ syscall_is_too_hard: st %o0, [%sp + STACKFRAME_SZ + PT_I0] - .globl ret_sys_call ret_sys_call: ld [%curptr + TI_FLAGS], %l6 cmp %o0, -ERESTART_RESTARTBLOCK diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c index 1f730619a24..3e849e8e348 100644 --- a/arch/sparc/kernel/signal.c +++ b/arch/sparc/kernel/signal.c @@ -105,11 +105,6 @@ static int _sigpause_common(old_sigset_t set) return -ERESTARTNOHAND; } -asmlinkage int sys_sigpause(unsigned int set) -{ - return _sigpause_common(set); -} - asmlinkage int sys_sigsuspend(old_sigset_t set) { return _sigpause_common(set); -- cgit v1.2.3 From c6ca978370c57422cdcf814af1da96cfc7c24811 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 23 Apr 2008 19:15:59 -0700 Subject: sparc64: cleanup after SunOS/Solaris binary emulation removal The following cleanups are now possible: - arch/sparc64/kernel/entry.S:ret_sys_call no longer has to be global - arch/sparc64/kernel/sparc64_ksyms.c: remove no longer used prototypes Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 2 +- arch/sparc64/kernel/sparc64_ksyms.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index fb43c76bdc2..f54cd2dd55d 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -1559,7 +1559,7 @@ linux_sparc_syscall32: /* Linux native system calls enter here... */ .align 32 - .globl linux_sparc_syscall, ret_sys_call + .globl linux_sparc_syscall linux_sparc_syscall: /* Direct access to user regs, much faster. */ cmp %g1, NR_SYSCALLS ! IEU1 Group diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 38736460b8d..66336590e83 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -68,8 +68,6 @@ extern void *__memscan_zero(void *, size_t); extern void *__memscan_generic(void *, int, size_t); extern int __memcmp(const void *, const void *, __kernel_size_t); extern __kernel_size_t strlen(const char *); -extern void linux_sparc_syscall(void); -extern void rtrap(void); extern void show_regs(struct pt_regs *); extern void syscall_trace(struct pt_regs *, int); extern void sys_sigsuspend(void); -- cgit v1.2.3 From db9a7fb12c8d05104e8a541003593d62a42ade8f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Apr 2008 22:22:29 -0700 Subject: [SPARC64]: PROM debug console can be CON_ANYTIME. No per-cpu or similar resources need to be setup before we can use this console device. Signed-off-by: David S. Miller --- arch/sparc64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 46bcf41097a..da5e6ee0c66 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -82,7 +82,7 @@ unsigned long cmdline_memory_size = 0; static struct console prom_early_console = { .name = "earlyprom", .write = prom_console_write, - .flags = CON_PRINTBUFFER | CON_BOOT, + .flags = CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME, .index = -1, }; -- cgit v1.2.3 From 8243e40acb087fcd9e7609333f0b0551391f49fc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 Apr 2008 22:52:13 -0700 Subject: [SPARC64]: Store magic cookie and trap type in pt_regs. This sets us up for several simplifications and facilities: 1) The magic cookie lets us identify trap frames more accurately in stack backtraces. 2) The trap type lets us simplify all of the "are we in a syscall" state management and checks. 3) We can now see if a task off the cpu is sleeping in a system call or not. In fact, we can see what trap it is sleeping in whatever the type. The utrace guys will use this. Based upon some discussions with Roland McGrath. Signed-off-by: David S. Miller --- arch/sparc64/kernel/etrap.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index 4b2bf9eb447..b49d3b60bc0 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -53,7 +53,11 @@ etrap_irq: stx %g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC] rd %y, %g3 stx %g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC] + rdpr %tt, %g1 st %g3, [%g2 + STACKFRAME_SZ + PT_V9_Y] + sethi %hi(PT_REGS_MAGIC), %g3 + or %g3, %g1, %g1 + st %g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC] rdpr %cansave, %g1 brnz,pt %g1, etrap_save -- cgit v1.2.3 From 238468b2ac76020c192a7402c92df5097916bf4a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Apr 2008 03:01:48 -0700 Subject: [SPARC64]: Use trap type stored in pt_regs to handle syscall restart. Now that we can check the trap type directly, we don't need the funny restart_syscall indication from the trap return paths. Signed-off-by: David S. Miller --- arch/sparc64/kernel/signal.c | 22 +++++++++++++--------- arch/sparc64/kernel/signal32.c | 20 ++++++++------------ 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c index 1c47009eb5e..b959597201e 100644 --- a/arch/sparc64/kernel/signal.c +++ b/arch/sparc64/kernel/signal.c @@ -510,15 +510,20 @@ static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs, * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_syscall) +static void do_signal(struct pt_regs *regs, unsigned long orig_i0) { - siginfo_t info; struct signal_deliver_cookie cookie; struct k_sigaction ka; - int signr; sigset_t *oldset; + siginfo_t info; + int signr, tt; - cookie.restart_syscall = restart_syscall; + tt = regs->magic & 0x1ff; + if (tt == 0x110 || tt == 0x111 || tt == 0x16d) { + regs->magic &= ~0x1ff; + cookie.restart_syscall = 1; + } else + cookie.restart_syscall = 0; cookie.orig_i0 = orig_i0; if (test_thread_flag(TIF_RESTORE_SIGMASK)) @@ -529,9 +534,8 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_s #ifdef CONFIG_SPARC32_COMPAT if (test_thread_flag(TIF_32BIT)) { extern void do_signal32(sigset_t *, struct pt_regs *, - unsigned long, int); - do_signal32(oldset, regs, orig_i0, - cookie.restart_syscall); + struct signal_deliver_cookie *); + do_signal32(oldset, regs, &cookie); return; } #endif @@ -539,7 +543,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0, int restart_s signr = get_signal_to_deliver(&info, &ka, regs, &cookie); if (signr > 0) { if (cookie.restart_syscall) - syscall_restart(orig_i0, regs, &ka.sa); + syscall_restart(cookie.orig_i0, regs, &ka.sa); handle_signal(signr, &ka, &info, oldset, regs); /* a signal was successfully delivered; the saved @@ -580,7 +584,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, int restart_s unsigned long thread_info_flags) { if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) - do_signal(regs, orig_i0, restart_syscall); + do_signal(regs, orig_i0); } void ptrace_signal_deliver(struct pt_regs *regs, void *cookie) diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index 74e0512f135..43cdec64d9c 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -982,20 +982,16 @@ static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs * mistake. */ void do_signal32(sigset_t *oldset, struct pt_regs * regs, - unsigned long orig_i0, int restart_syscall) + struct signal_deliver_cookie *cookie) { - siginfo_t info; - struct signal_deliver_cookie cookie; struct k_sigaction ka; + siginfo_t info; int signr; - cookie.restart_syscall = restart_syscall; - cookie.orig_i0 = orig_i0; - - signr = get_signal_to_deliver(&info, &ka, regs, &cookie); + signr = get_signal_to_deliver(&info, &ka, regs, cookie); if (signr > 0) { - if (cookie.restart_syscall) - syscall_restart32(orig_i0, regs, &ka.sa); + if (cookie->restart_syscall) + syscall_restart32(cookie->orig_i0, regs, &ka.sa); handle_signal32(signr, &ka, &info, oldset, regs); /* a signal was successfully delivered; the saved @@ -1007,16 +1003,16 @@ void do_signal32(sigset_t *oldset, struct pt_regs * regs, clear_thread_flag(TIF_RESTORE_SIGMASK); return; } - if (cookie.restart_syscall && + if (cookie->restart_syscall && (regs->u_regs[UREG_I0] == ERESTARTNOHAND || regs->u_regs[UREG_I0] == ERESTARTSYS || regs->u_regs[UREG_I0] == ERESTARTNOINTR)) { /* replay the system call when we are done */ - regs->u_regs[UREG_I0] = cookie.orig_i0; + regs->u_regs[UREG_I0] = cookie->orig_i0; regs->tpc -= 4; regs->tnpc -= 4; } - if (cookie.restart_syscall && + if (cookie->restart_syscall && regs->u_regs[UREG_I0] == ERESTART_RESTARTBLOCK) { regs->u_regs[UREG_G1] = __NR_restart_syscall; regs->tpc -= 4; -- cgit v1.2.3 From 7697daaa894ca2bc5cd652269c316bcdc3ec441b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Apr 2008 03:15:22 -0700 Subject: [SPARC64]: %l6 trap return handling no longer necessary. Now that we indicate the "restart system call" in the trap type field of pt_regs->magic, we don't need to set the %l6 boolean in all of the trap return paths. And we therefore don't need to pass it to do_notify_resume(). Signed-off-by: David S. Miller --- arch/sparc64/kernel/entry.S | 35 +++++++++++++++++------------------ arch/sparc64/kernel/entry.h | 1 - arch/sparc64/kernel/rtrap.S | 21 ++++----------------- arch/sparc64/kernel/signal.c | 3 +-- arch/sparc64/kernel/sun4v_tlb_miss.S | 16 ++++++++-------- arch/sparc64/kernel/tsb.S | 2 +- arch/sparc64/kernel/winfixup.S | 12 ++++++------ arch/sparc64/mm/ultra.S | 4 +--- 8 files changed, 38 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index f54cd2dd55d..fd06e937ae1 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -47,7 +47,7 @@ do_fpdis: ba,pt %xcc, etrap 109: or %g7, %lo(109b), %g7 add %g0, %g0, %g0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap 1: TRAP_LOAD_THREAD_REG(%g6, %g1) ldub [%g6 + TI_FPSAVED], %g5 @@ -226,7 +226,7 @@ fp_other_bounce: call do_fpother add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop .globl do_fpother_check_fitos .align 32 @@ -489,7 +489,7 @@ utrap_trap: /* %g3=handler,%g4=level */ call bad_trap add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop invoke_utrap: sllx %g3, 3, %g3 @@ -607,7 +607,7 @@ __spitfire_cee_trap_continue: call spitfire_access_error add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop /* This is the trap handler entry point for ECC correctable * errors. They are corrected, but we listen for the trap @@ -686,7 +686,7 @@ __spitfire_data_access_exception_tl1: call spitfire_data_access_exception_tl1 add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop __spitfire_data_access_exception: rdpr %pstate, %g4 @@ -705,7 +705,7 @@ __spitfire_data_access_exception: call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop .globl __spitfire_insn_access_exception .globl __spitfire_insn_access_exception_tl1 @@ -725,7 +725,7 @@ __spitfire_insn_access_exception_tl1: call spitfire_insn_access_exception_tl1 add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop __spitfire_insn_access_exception: rdpr %pstate, %g4 @@ -743,7 +743,7 @@ __spitfire_insn_access_exception: call spitfire_insn_access_exception add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop /* These get patched into the trap table at boot time * once we know we have a cheetah processor. @@ -937,7 +937,7 @@ do_dcpe_tl1_fatal: call cheetah_plus_parity_error add %sp, PTREGS_OFF, %o1 ba,pt %xcc, rtrap - clr %l6 + nop do_icpe_tl1: rdpr %tl, %g1 ! Save original trap level @@ -979,7 +979,7 @@ do_icpe_tl1_fatal: call cheetah_plus_parity_error add %sp, PTREGS_OFF, %o1 ba,pt %xcc, rtrap - clr %l6 + nop dcpe_icpe_tl1_common: /* Flush D-cache, re-enable D/I caches in DCU and finally @@ -1281,7 +1281,7 @@ __do_privact: call do_privact add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop .globl do_mna do_mna: @@ -1308,7 +1308,7 @@ do_mna: call mem_address_unaligned add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop .globl do_lddfmna do_lddfmna: @@ -1326,7 +1326,7 @@ do_lddfmna: call handle_lddfmna add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop .globl do_stdfmna do_stdfmna: @@ -1344,7 +1344,7 @@ do_stdfmna: call handle_stdfmna add %sp, PTREGS_OFF, %o0 ba,pt %xcc, rtrap - clr %l6 + nop .globl breakpoint_trap breakpoint_trap: @@ -1424,13 +1424,13 @@ sys32_rt_sigreturn: 1: ldx [%curptr + TI_FLAGS], %l5 andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT), %g0 be,pt %icc, rtrap - clr %l6 + nop add %sp, PTREGS_OFF, %o0 call syscall_trace mov 1, %o1 ba,pt %xcc, rtrap - clr %l6 + nop /* This is how fork() was meant to be done, 8 instruction entry. * @@ -1605,7 +1605,7 @@ ret_sys_call: bne,pn %icc, linux_syscall_trace2 add %l1, 0x4, %l2 ! npc = npc+4 stx %l1, [%sp + PTREGS_OFF + PT_V9_TPC] - ba,pt %xcc, rtrap_clr_l6 + ba,pt %xcc, rtrap stx %l2, [%sp + PTREGS_OFF + PT_V9_TNPC] 1: @@ -1616,7 +1616,6 @@ ret_sys_call: sub %g0, %o0, %o0 or %g3, %g2, %g3 stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] - mov 1, %l6 stx %g3, [%sp + PTREGS_OFF + PT_V9_TSTATE] bne,pn %icc, linux_syscall_trace2 add %l1, 0x4, %l2 ! npc = npc+4 diff --git a/arch/sparc64/kernel/entry.h b/arch/sparc64/kernel/entry.h index 4a91e9c6d31..32fbab62085 100644 --- a/arch/sparc64/kernel/entry.h +++ b/arch/sparc64/kernel/entry.h @@ -20,7 +20,6 @@ extern void timer_interrupt(int irq, struct pt_regs *regs); extern void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, - int restart_syscall, unsigned long thread_info_flags); extern asmlinkage void syscall_trace(struct pt_regs *regs, diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index 079d18a11d2..ecf6753b204 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -18,12 +18,6 @@ #define RTRAP_PSTATE_IRQOFF (PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV) #define RTRAP_PSTATE_AG_IRQOFF (PSTATE_RMO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG) - /* Register %l6 keeps track of whether we are returning - * from a system call or not. It is cleared if we call - * do_notify_resume, and it must not be otherwise modified - * until we fully commit to returning to userspace. - */ - .text .align 32 __handle_softirq: @@ -56,14 +50,12 @@ __handle_user_windows: be,pt %xcc, __handle_user_windows_continue nop mov %l5, %o1 - mov %l6, %o2 add %sp, PTREGS_OFF, %o0 - mov %l0, %o3 + mov %l0, %o2 call do_notify_resume wrpr %g0, RTRAP_PSTATE, %pstate wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate - clr %l6 /* Signal delivery can modify pt_regs tstate, so we must * reload it. */ @@ -99,14 +91,12 @@ __handle_perfctrs: be,pt %xcc, __handle_perfctrs_continue sethi %hi(TSTATE_PEF), %o0 mov %l5, %o1 - mov %l6, %o2 add %sp, PTREGS_OFF, %o0 - mov %l0, %o3 + mov %l0, %o2 call do_notify_resume wrpr %g0, RTRAP_PSTATE, %pstate wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate - clr %l6 /* Signal delivery can modify pt_regs tstate, so we must * reload it. */ @@ -127,13 +117,11 @@ __handle_userfpu: __handle_signal: mov %l5, %o1 - mov %l6, %o2 add %sp, PTREGS_OFF, %o0 - mov %l0, %o3 + mov %l0, %o2 call do_notify_resume wrpr %g0, RTRAP_PSTATE, %pstate wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate - clr %l6 /* Signal delivery can modify pt_regs tstate, so we must * reload it. @@ -145,9 +133,8 @@ __handle_signal: andn %l1, %l4, %l1 .align 64 - .globl rtrap_irq, rtrap_clr_l6, rtrap, irqsz_patchme, rtrap_xcall + .globl rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall rtrap_irq: -rtrap_clr_l6: clr %l6 rtrap: #ifndef CONFIG_SMP sethi %hi(per_cpu____cpu_data), %l0 diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c index b959597201e..77a3e8592cb 100644 --- a/arch/sparc64/kernel/signal.c +++ b/arch/sparc64/kernel/signal.c @@ -580,8 +580,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) } } -void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, int restart_syscall, - unsigned long thread_info_flags) +void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags) { if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs, orig_i0); diff --git a/arch/sparc64/kernel/sun4v_tlb_miss.S b/arch/sparc64/kernel/sun4v_tlb_miss.S index fd9430562e0..e1fbf8c7578 100644 --- a/arch/sparc64/kernel/sun4v_tlb_miss.S +++ b/arch/sparc64/kernel/sun4v_tlb_miss.S @@ -262,7 +262,7 @@ sun4v_iacc: mov %l5, %o2 call sun4v_insn_access_exception add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap /* Instruction Access Exception, tl1. */ sun4v_iacc_tl1: @@ -278,7 +278,7 @@ sun4v_iacc_tl1: mov %l5, %o2 call sun4v_insn_access_exception_tl1 add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap /* Data Access Exception, tl0. */ sun4v_dacc: @@ -294,7 +294,7 @@ sun4v_dacc: mov %l5, %o2 call sun4v_data_access_exception add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap /* Data Access Exception, tl1. */ sun4v_dacc_tl1: @@ -310,7 +310,7 @@ sun4v_dacc_tl1: mov %l5, %o2 call sun4v_data_access_exception_tl1 add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap /* Memory Address Unaligned. */ sun4v_mna: @@ -344,7 +344,7 @@ sun4v_mna: mov %l5, %o2 call sun4v_do_mna add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap /* Privileged Action. */ sun4v_privact: @@ -352,7 +352,7 @@ sun4v_privact: rd %pc, %g7 call do_privact add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap /* Unaligned ldd float, tl0. */ sun4v_lddfmna: @@ -368,7 +368,7 @@ sun4v_lddfmna: mov %l5, %o2 call handle_lddfmna add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap /* Unaligned std float, tl0. */ sun4v_stdfmna: @@ -384,7 +384,7 @@ sun4v_stdfmna: mov %l5, %o2 call handle_stdfmna add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap #define BRANCH_ALWAYS 0x10680000 #define NOP 0x01000000 diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index 10adb2fb8ff..c499214b501 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -275,7 +275,7 @@ sparc64_realfault_common: stx %l5, [%g6 + TI_FAULT_ADDR] ! Save fault address call do_sparc64_fault ! Call fault handler add %sp, PTREGS_OFF, %o0 ! Compute pt_regs arg - ba,pt %xcc, rtrap_clr_l6 ! Restore cpu state + ba,pt %xcc, rtrap ! Restore cpu state nop ! Delay slot (fill me) winfix_trampoline: diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index c4aa110a10e..a6b0863c27d 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -32,7 +32,7 @@ fill_fixup: rd %pc, %g7 call do_sparc64_fault add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap_clr_l6 + ba,pt %xcc, rtrap nop /* Be very careful about usage of the trap globals here. @@ -100,7 +100,7 @@ spill_fixup_dax: rd %pc, %g7 call do_sparc64_fault add %sp, PTREGS_OFF, %o0 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap winfix_mna: andn %g3, 0x7f, %g3 @@ -122,12 +122,12 @@ fill_fixup_mna: mov %l4, %o2 call sun4v_do_mna mov %l5, %o1 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap 1: mov %l4, %o1 mov %l5, %o2 call mem_address_unaligned nop - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap winfix_dax: andn %g3, 0x7f, %g3 @@ -150,7 +150,7 @@ fill_fixup_dax: add %sp, PTREGS_OFF, %o0 call sun4v_data_access_exception nop - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap 1: call spitfire_data_access_exception nop - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 2865c105b6a..e686a67561a 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -476,7 +476,6 @@ xcall_sync_tick: #endif call smp_synchronize_tick_client nop - clr %l6 b rtrap_xcall ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 @@ -511,7 +510,6 @@ xcall_report_regs: #endif call __show_regs add %sp, PTREGS_OFF, %o0 - clr %l6 /* Has to be a non-v9 branch due to the large distance. */ b rtrap_xcall ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 @@ -576,7 +574,7 @@ __hypervisor_tlb_xcall_error: mov %l4, %o0 call hypervisor_tlbop_error_xcall mov %l5, %o1 - ba,a,pt %xcc, rtrap_clr_l6 + ba,a,pt %xcc, rtrap .globl __hypervisor_xcall_flush_tlb_mm __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ -- cgit v1.2.3 From 77c664fa58624079f7a0fc29b46e8a32883633a5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Apr 2008 03:28:52 -0700 Subject: [SPARC64]: Detect trap frames in stack backtraces. Now that we have a magic cookie in the pt_regs, we can properly detect trap frames in stack bactraces. Signed-off-by: David S. Miller --- arch/sparc64/kernel/stacktrace.c | 16 +++++++++++++--- arch/sparc64/kernel/traps.c | 19 +++++++++++++++---- 2 files changed, 28 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/sparc64/kernel/stacktrace.c b/arch/sparc64/kernel/stacktrace.c index 84d39e873e8..01b52f561af 100644 --- a/arch/sparc64/kernel/stacktrace.c +++ b/arch/sparc64/kernel/stacktrace.c @@ -20,6 +20,8 @@ void save_stack_trace(struct stack_trace *trace) thread_base = (unsigned long) tp; do { struct reg_window *rw; + struct pt_regs *regs; + unsigned long pc; /* Bogus frame pointer? */ if (fp < (thread_base + sizeof(struct thread_info)) || @@ -27,11 +29,19 @@ void save_stack_trace(struct stack_trace *trace) break; rw = (struct reg_window *) fp; + regs = (struct pt_regs *) (rw + 1); + + if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) { + pc = regs->tpc; + fp = regs->u_regs[UREG_I6] + STACK_BIAS; + } else { + pc = rw->ins[7]; + fp = rw->ins[6] + STACK_BIAS; + } + if (trace->skip > 0) trace->skip--; else - trace->entries[trace->nr_entries++] = rw->ins[7]; - - fp = rw->ins[6] + STACK_BIAS; + trace->entries[trace->nr_entries++] = pc; } while (trace->nr_entries < trace->max_entries); } diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 96da847023f..d9b8d46707d 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -2091,9 +2091,8 @@ static void user_instruction_dump(unsigned int __user *pc) void show_stack(struct task_struct *tsk, unsigned long *_ksp) { - unsigned long pc, fp, thread_base, ksp; + unsigned long fp, thread_base, ksp; struct thread_info *tp; - struct reg_window *rw; int count = 0; ksp = (unsigned long) _ksp; @@ -2117,15 +2116,27 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) printk("\n"); #endif do { + struct reg_window *rw; + struct pt_regs *regs; + unsigned long pc; + /* Bogus frame pointer? */ if (fp < (thread_base + sizeof(struct thread_info)) || fp >= (thread_base + THREAD_SIZE)) break; rw = (struct reg_window *)fp; - pc = rw->ins[7]; + regs = (struct pt_regs *) (rw + 1); + + if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) { + pc = regs->tpc; + fp = regs->u_regs[UREG_I6] + STACK_BIAS; + } else { + pc = rw->ins[7]; + fp = rw->ins[6] + STACK_BIAS; + } + printk(" [%016lx] ", pc); print_symbol("%s\n", pc); - fp = rw->ins[6] + STACK_BIAS; } while (++count < 16); #ifndef CONFIG_KALLSYMS printk("\n"); -- cgit v1.2.3