From eda80228860641b7b0e963e6bd219b960c500af9 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 7 Jul 2005 17:56:00 -0700 Subject: [PATCH] uml: kill some useless vmalloc tlb flushing There is absolutely no reason to flush the kernel's VM area during a tlb_flush_mm. This results in a noticable performance increase in the kernel build benchmark. Signed-off-by: Jeff Dike Cc: Paolo Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/skas/tlb.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index b8c5e71763d..18f9a7711de 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -76,7 +76,6 @@ void flush_tlb_mm_skas(struct mm_struct *mm) return; fix_range(mm, 0, host_task_size, 0); - flush_tlb_kernel_range_common(start_vm, end_vm); } void force_flush_all_skas(void) -- cgit v1.2.3 From c23a4e9649f80a9379d7df4a33bc63b365d5e7fc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 7 Jul 2005 17:56:02 -0700 Subject: [PATCH] iounmap debugging We get sporadic reports of `__iounmap: bad address' coming out. Add a dump_stack() to find the culprit. Try to identify which subsystem is having iounmap() problems. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/ioremap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index 6b25afc933b..f379b8d6755 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -228,7 +228,8 @@ EXPORT_SYMBOL(ioremap_nocache); void iounmap(volatile void __iomem *addr) { struct vm_struct *p; - if ((void __force *) addr <= high_memory) + + if ((void __force *)addr <= high_memory) return; /* @@ -241,9 +242,10 @@ void iounmap(volatile void __iomem *addr) return; write_lock(&vmlist_lock); - p = __remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); + p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr)); if (!p) { printk(KERN_WARNING "iounmap: bad address %p\n", addr); + dump_stack(); goto out_unlock; } -- cgit v1.2.3 From 2098eec22882e8a50a21eb214df4742b34927dae Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 7 Jul 2005 17:56:09 -0700 Subject: [PATCH] ppc64: vdso32: fix link errors after recent toolchain changes Patch from , http://sources.redhat.com/bugzilla/show_bug.cgi?id=1042 /usr/bin/ld: arch/ppc64/kernel/vdso32/vdso32.so: The first section in the PT_DYNAMIC segment is not the .dynamic section Signed-off-by: Olaf Hering Acked-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/vdso32/vdso32.lds.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/vdso32/vdso32.lds.S b/arch/ppc64/kernel/vdso32/vdso32.lds.S index 11290c902ba..6f87a916a39 100644 --- a/arch/ppc64/kernel/vdso32/vdso32.lds.S +++ b/arch/ppc64/kernel/vdso32/vdso32.lds.S @@ -40,9 +40,9 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .fixup : { *(.fixup) } - .got ALIGN(4) : { *(.got.plt) *(.got) } - .dynamic : { *(.dynamic) } :text :dynamic + .got : { *(.got) } + .plt : { *(.plt) } _end = .; __end = .; -- cgit v1.2.3 From 315a699851722a6bc31e35f91562f31f55d4c4a2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:11 -0700 Subject: [PATCH] ppc64: use c99 initialisers in cputable code Use c99 initialisers in the cputable code. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/cputable.c | 365 +++++++++++++++++++++++++------------------ 1 file changed, 212 insertions(+), 153 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 1d162c7c59d..c301366176e 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -49,160 +49,219 @@ extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec); #endif struct cpu_spec cpu_specs[] = { - { /* Power3 */ - 0xffff0000, 0x00400000, "POWER3 (630)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Power3+ */ - 0xffff0000, 0x00410000, "POWER3 (630+)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Northstar */ - 0xffff0000, 0x00330000, "RS64-II (northstar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Pulsar */ - 0xffff0000, 0x00340000, "RS64-III (pulsar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* I-star */ - 0xffff0000, 0x00360000, "RS64-III (icestar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* S-star */ - 0xffff0000, 0x00370000, "RS64-IV (sstar)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_IABR | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power3, - COMMON_PPC64_FW - }, - { /* Power4 */ - 0xffff0000, 0x00350000, "POWER4 (gp)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* Power4+ */ - 0xffff0000, 0x00380000, "POWER4+ (gq)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* PPC970 */ - 0xffff0000, 0x00390000, "PPC970", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_ppc970, - COMMON_PPC64_FW - }, - { /* PPC970FX */ - 0xffff0000, 0x003c0000, "PPC970FX", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_ppc970, - COMMON_PPC64_FW - }, - { /* Power5 */ - 0xffff0000, 0x003a0000, "POWER5 (gr)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* Power5 */ - 0xffff0000, 0x003b0000, "POWER5 (gs)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | - CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | - CPU_FTR_MMCRA_SIHV, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - }, - { /* BE DD1.x */ - 0xffff0000, 0x00700000, "Broadband Engine", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | - CPU_FTR_SMT, - COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, - 128, 128, - __setup_cpu_be, - COMMON_PPC64_FW - }, - { /* default match */ - 0x00000000, 0x00000000, "POWER4 (compatible)", - CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | - CPU_FTR_PPCAS_ARCH_V2, - COMMON_USER_PPC64, - 128, 128, - __setup_cpu_power4, - COMMON_PPC64_FW - } + { /* Power3 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00400000, + .cpu_name = "POWER3 (630)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power3+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00410000, + .cpu_name = "POWER3 (630+)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Northstar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00330000, + .cpu_name = "RS64-II (northstar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Pulsar */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00340000, + .cpu_name = "RS64-III (pulsar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* I-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00360000, + .cpu_name = "RS64-III (icestar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* S-star */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00370000, + .cpu_name = "RS64-IV (sstar)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | + CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power4 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00350000, + .cpu_name = "POWER4 (gp)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power4+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00380000, + .cpu_name = "POWER4+ (gq)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00390000, + .cpu_name = "PPC970", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* PPC970FX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003c0000, + .cpu_name = "PPC970FX", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_CAN_NAP | CPU_FTR_PMC8 | CPU_FTR_MMCRA, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power5 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003a0000, + .cpu_name = "POWER5 (gr)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* Power5 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003b0000, + .cpu_name = "POWER5 (gs)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_MMCRA | CPU_FTR_SMT | + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | + CPU_FTR_MMCRA_SIHV, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + }, + { /* BE DD1.x */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00700000, + .cpu_name = "Broadband Engine", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_SMT, + .cpu_user_features = COMMON_USER_PPC64 | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_be, + .firmware_features = COMMON_PPC64_FW, + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "POWER4 (compatible)", + .cpu_features = CPU_FTR_SPLIT_ID_CACHE | + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2, + .cpu_user_features = COMMON_USER_PPC64, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, + } }; firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, }; -- cgit v1.2.3 From a2f7a9ce2a5c3d21cc0eb37a03da603b44ba4b09 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:11 -0700 Subject: [PATCH] ppc64: Fix runlatch code to work on pseries machines Not all ppc64 CPUs have the CTRL SPR, so we need a cputable feature for it. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/cputable.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index c301366176e..8d4c46f6f0b 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -81,7 +81,7 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "RS64-II (northstar)", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | - CPU_FTR_PMC8 | CPU_FTR_MMCRA, + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, .cpu_user_features = COMMON_USER_PPC64, .icache_bsize = 128, .dcache_bsize = 128, @@ -94,7 +94,7 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "RS64-III (pulsar)", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | - CPU_FTR_PMC8 | CPU_FTR_MMCRA, + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, .cpu_user_features = COMMON_USER_PPC64, .icache_bsize = 128, .dcache_bsize = 128, @@ -107,7 +107,7 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "RS64-III (icestar)", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | - CPU_FTR_PMC8 | CPU_FTR_MMCRA, + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, .cpu_user_features = COMMON_USER_PPC64, .icache_bsize = 128, .dcache_bsize = 128, @@ -120,7 +120,7 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "RS64-IV (sstar)", .cpu_features = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | - CPU_FTR_PMC8 | CPU_FTR_MMCRA, + CPU_FTR_PMC8 | CPU_FTR_MMCRA | CPU_FTR_CTRL, .cpu_user_features = COMMON_USER_PPC64, .icache_bsize = 128, .dcache_bsize = 128, -- cgit v1.2.3 From 8dc4fd87f229414fc38648508aad7def2275fe81 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:12 -0700 Subject: [PATCH] ppc64: Turn runlatch on in exception entry Enable the runlatch at the start of each exception. Unfortunately we are out of space in the 0x300 handler, so I added it a bit later. The SPR write is fairly expensive, perhaps we should cache the runlatch state in the paca and avoid the write when possible. We don't need to turn the runlatch off, we do that in the idle loop. Better to take the hit in the idle loop than for each exception exit. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/head.S | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 675c2708588..93ebcac0d5a 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -308,6 +308,7 @@ exception_marker: label##_pSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) #define STD_EXCEPTION_ISERIES(n, label, area) \ @@ -315,6 +316,7 @@ label##_pSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(area); \ EXCEPTION_PROLOG_ISERIES_2; \ b label##_common @@ -324,6 +326,7 @@ label##_iSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ lbz r10,PACAPROCENABLED(r13); \ cmpwi 0,r10,0; \ @@ -393,6 +396,7 @@ __start_interrupts: _machine_check_pSeries: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) . = 0x300 @@ -419,6 +423,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) data_access_slb_pSeries: HMT_MEDIUM mtspr SPRG1,r13 + RUNLATCH_ON(r13) mfspr r13,SPRG3 /* get paca address into r13 */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ std r10,PACA_EXSLB+EX_R10(r13) @@ -439,6 +444,7 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM mtspr SPRG1,r13 + RUNLATCH_ON(r13) mfspr r13,SPRG3 /* get paca address into r13 */ std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ std r10,PACA_EXSLB+EX_R10(r13) @@ -464,6 +470,7 @@ instruction_access_slb_pSeries: .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM + RUNLATCH_ON(r9) mr r9,r13 mfmsr r10 mfspr r13,SPRG3 @@ -707,11 +714,13 @@ fwnmi_data_area: system_reset_fwnmi: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) .globl machine_check_fwnmi machine_check_fwnmi: HMT_MEDIUM mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) /* @@ -848,6 +857,7 @@ unrecov_fer: .align 7 .globl data_access_common data_access_common: + RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ mfspr r10,DAR std r10,PACA_EXGEN+EX_DAR(r13) mfspr r10,DSISR -- cgit v1.2.3 From 4416f3968a23e25a257d679227a89710447760ab Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:14 -0700 Subject: [PATCH] ppc64: sys_ppc32.c cleanups Remove some unnecessary includes, an out of date comment and a prototype for sys_timer_create (which is now in syscalls.h) Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/sys_ppc32.c | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 118436e8085..9bd16cef0ed 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -30,47 +30,26 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include -#include -#include #include -#include #include -#include #include #include #include #include -#include #include #include #include -#include #include -#include -#include - #include #include #include #include #include #include -#include #include #include #include @@ -350,8 +329,6 @@ asmlinkage long sys32_adjtimex(struct timex32 __user *utp) return ret; } - -/* These are here just in case some old sparc32 binary calls it. */ asmlinkage long sys32_pause(void) { current->state = TASK_INTERRUPTIBLE; @@ -360,8 +337,6 @@ asmlinkage long sys32_pause(void) return -ERESTARTNOHAND; } - - static inline long get_ts32(struct timespec *o, struct compat_timeval __user *i) { long usec; @@ -1273,8 +1248,6 @@ long ppc32_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, (u64)len_high << 32 | len_low, advice); } -extern asmlinkage long sys_timer_create(clockid_t, sigevent_t __user *, timer_t __user *); - long ppc32_timer_create(clockid_t clock, struct compat_sigevent __user *ev32, timer_t __user *timer_id) -- cgit v1.2.3 From 79c2cc7b6d2cc31cff6a3d8e966a890f0a0d5f7a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:15 -0700 Subject: [PATCH] ppc64: add ioprio syscalls - Clean up sys32_getpriority comment. - Add ioprio syscalls, and sign extend 32bit versions. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/misc.S | 6 +++++- arch/ppc64/kernel/sys_ppc32.c | 27 +++++++++++++++++---------- 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index f3dea0c5a88..59f4f997381 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -1124,9 +1124,11 @@ _GLOBAL(sys_call_table32) .llong .compat_sys_mq_getsetattr .llong .compat_sys_kexec_load .llong .sys32_add_key - .llong .sys32_request_key + .llong .sys32_request_key /* 270 */ .llong .compat_sys_keyctl .llong .compat_sys_waitid + .llong .sys32_ioprio_set + .llong .sys32_ioprio_get .balign 8 _GLOBAL(sys_call_table) @@ -1403,3 +1405,5 @@ _GLOBAL(sys_call_table) .llong .sys_request_key /* 270 */ .llong .sys_keyctl .llong .sys_waitid + .llong .sys_ioprio_set + .llong .sys_ioprio_get diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 9bd16cef0ed..206619080e6 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -822,16 +822,6 @@ asmlinkage long sys32_getpgid(u32 pid) } -/* Note: it is necessary to treat which and who as unsigned ints, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -asmlinkage long sys32_getpriority(u32 which, u32 who) -{ - return sys_getpriority((int)which, (int)who); -} - /* Note: it is necessary to treat pid as an unsigned int, * with the corresponding cast to a signed int to insure that the @@ -1023,6 +1013,11 @@ asmlinkage long sys32_setpgid(u32 pid, u32 pgid) return sys_setpgid((int)pid, (int)pgid); } +long sys32_getpriority(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_getpriority((int)which, (int)who); +} long sys32_setpriority(u32 which, u32 who, u32 niceval) { @@ -1030,6 +1025,18 @@ long sys32_setpriority(u32 which, u32 who, u32 niceval) return sys_setpriority((int)which, (int)who, (int)niceval); } +long sys32_ioprio_get(u32 which, u32 who) +{ + /* sign extend which and who */ + return sys_ioprio_get((int)which, (int)who); +} + +long sys32_ioprio_set(u32 which, u32 who, u32 ioprio) +{ + /* sign extend which, who and ioprio */ + return sys_ioprio_set((int)which, (int)who, (int)ioprio); +} + /* Note: it is necessary to treat newmask as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) -- cgit v1.2.3 From d5ee257c3342185ba8ab642d125d192eb99ea8f2 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 7 Jul 2005 17:56:24 -0700 Subject: [PATCH] hvc_console: Separate hvc_console and vio code Separate the console setup routines of the hvc_console and the vio layer. Remove the call to find_init_vty from hvc_console.c. Fail the setup routine if the console doesn't exist, but register the console again when the specified channel is instantiated. This scheme maintains the print buffer semantics while eliminating callout and call back for the console code. Signed-off-by: Milton Miller Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/hvconsole.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c index c72fb8ffe97..94fb06198ea 100644 --- a/arch/ppc64/kernel/hvconsole.c +++ b/arch/ppc64/kernel/hvconsole.c @@ -93,7 +93,7 @@ EXPORT_SYMBOL(hvc_put_chars); * We hope/assume that the first vty found corresponds to the first console * device. */ -int hvc_find_vtys(void) +static int hvc_find_vtys(void) { struct device_node *vty; int num_found = 0; @@ -119,3 +119,4 @@ int hvc_find_vtys(void) return num_found; } +console_initcall(hvc_find_vtys); -- cgit v1.2.3 From acad9559f1054487292eb10d7bb81f256e9d8f2d Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 7 Jul 2005 17:56:24 -0700 Subject: [PATCH] hvc_console: Separate hvc_console and vio code 2 Remove all the vio device driver code from hvc_console.c This will allow us to separate hvsi, hvc, and allow hvc_console to be used without the ppc64 vio layer. Signed-off-by: Milton Miller Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/hvconsole.c | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c index 94fb06198ea..9d8876d92eb 100644 --- a/arch/ppc64/kernel/hvconsole.c +++ b/arch/ppc64/kernel/hvconsole.c @@ -27,7 +27,6 @@ #include #include #include -#include /** * hvc_get_chars - retrieve characters from firmware for denoted vterm adatper @@ -88,35 +87,3 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) } EXPORT_SYMBOL(hvc_put_chars); - -/* - * We hope/assume that the first vty found corresponds to the first console - * device. - */ -static int hvc_find_vtys(void) -{ - struct device_node *vty; - int num_found = 0; - - for (vty = of_find_node_by_name(NULL, "vty"); vty != NULL; - vty = of_find_node_by_name(vty, "vty")) { - uint32_t *vtermno; - - /* We have statically defined space for only a certain number of - * console adapters. */ - if (num_found >= MAX_NR_HVC_CONSOLES) - break; - - vtermno = (uint32_t *)get_property(vty, "reg", NULL); - if (!vtermno) - continue; - - if (device_is_compatible(vty, "hvterm1")) { - hvc_instantiate(*vtermno, num_found); - ++num_found; - } - } - - return num_found; -} -console_initcall(hvc_find_vtys); -- cgit v1.2.3 From 70b234a40107596a713e9981c643f2717e31463f Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 7 Jul 2005 17:56:26 -0700 Subject: [PATCH] hvc_console: Separate the NUL character filtering from get_hvc_chars Separate the NUL character filtering from get_hvc_chars. Signed-off-by: Milton Miller Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/hvconsole.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/hvconsole.c b/arch/ppc64/kernel/hvconsole.c index 9d8876d92eb..138e128a388 100644 --- a/arch/ppc64/kernel/hvconsole.c +++ b/arch/ppc64/kernel/hvconsole.c @@ -41,29 +41,14 @@ int hvc_get_chars(uint32_t vtermno, char *buf, int count) unsigned long got; if (plpar_hcall(H_GET_TERM_CHAR, vtermno, 0, 0, 0, &got, - (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) { - /* - * Work around a HV bug where it gives us a null - * after every \r. -- paulus - */ - if (got > 0) { - int i; - for (i = 1; i < got; ++i) { - if (buf[i] == 0 && buf[i-1] == '\r') { - --got; - if (i < got) - memmove(&buf[i], &buf[i+1], - got - i); - } - } - } + (unsigned long *)buf, (unsigned long *)buf+1) == H_Success) return got; - } return 0; } EXPORT_SYMBOL(hvc_get_chars); + /** * hvc_put_chars: send characters to firmware for denoted vterm adapter * @vtermno: The vtermno or unit_address of the adapter from which the data -- cgit v1.2.3 From fd899c0cc725387992ccfc83fb6f70505c36cbeb Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2005 17:56:28 -0700 Subject: [PATCH] ppc64: Make idle_loop a ppc_md function This patch adds an idle member to the ppc_md structure and calls it from cpu_idle(). If a platform leaves ppc_md.idle as null it will get the default idle loop default_idle(). Signed-off-by: Michael Ellerman Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/idle.c | 8 +++++--- arch/ppc64/kernel/setup.c | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 08952c7e621..e270055e73e 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -33,6 +33,7 @@ #include #include #include +#include extern void power4_idle(void); @@ -122,7 +123,7 @@ static int iSeries_idle(void) #else -static int default_idle(void) +int default_idle(void) { long oldval; unsigned int cpu = smp_processor_id(); @@ -288,7 +289,7 @@ static int shared_idle(void) #endif /* CONFIG_PPC_PSERIES */ -static int native_idle(void) +int native_idle(void) { while(1) { /* check CPU type here */ @@ -308,7 +309,8 @@ static int native_idle(void) void cpu_idle(void) { - idle_loop(); + BUG_ON(NULL == ppc_md.idle_loop); + ppc_md.idle_loop(); } int powersave_nap; diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index d5e4866e9ac..a278998ecb4 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -96,7 +96,6 @@ extern void udbg_init_maple_realmode(void); extern unsigned long klimit; extern void mm_init_ppc64(void); -extern int idle_setup(void); extern void stab_initialize(unsigned long stab); extern void htab_initialize(void); extern void early_init_devtree(void *flat_dt); @@ -1081,8 +1080,9 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); - /* Select the correct idle loop for the platform. */ - idle_setup(); + /* Use the default idle loop if the platform hasn't provided one. */ + if (NULL == ppc_md.idle_loop) + ppc_md.idle_loop = default_idle; paging_init(); ppc64_boot_msg(0x15, "Setup Done"); -- cgit v1.2.3 From d200903e11f6867b91dffa81b2038e55be599f49 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2005 17:56:29 -0700 Subject: [PATCH] ppc64: Move iSeries_idle() into iSeries_setup.c Move iSeries_idle() into iSeries_setup.c, no one else needs to know about it. Signed-off-by: Michael Ellerman Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/iSeries_setup.c | 81 ++++++++++++++++++++++++++++++++++++ arch/ppc64/kernel/idle.c | 86 --------------------------------------- 2 files changed, 81 insertions(+), 86 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index b3f770f6d40..1139e27e171 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -834,6 +834,87 @@ static int __init iSeries_src_init(void) late_initcall(iSeries_src_init); +static unsigned long maxYieldTime = 0; +static unsigned long minYieldTime = 0xffffffffffffffffUL; + +static inline void process_iSeries_events(void) +{ + asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); +} + +static void yield_shared_processor(void) +{ + unsigned long tb; + unsigned long yieldTime; + + HvCall_setEnabledInterrupts(HvCall_MaskIPI | + HvCall_MaskLpEvent | + HvCall_MaskLpProd | + HvCall_MaskTimeout); + + tb = get_tb(); + /* Compute future tb value when yield should expire */ + HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); + + yieldTime = get_tb() - tb; + if (yieldTime > maxYieldTime) + maxYieldTime = yieldTime; + + if (yieldTime < minYieldTime) + minYieldTime = yieldTime; + + /* + * The decrementer stops during the yield. Force a fake decrementer + * here and let the timer_interrupt code sort out the actual time. + */ + get_paca()->lppaca.int_dword.fields.decr_int = 1; + process_iSeries_events(); +} + +static int iSeries_idle(void) +{ + struct paca_struct *lpaca; + long oldval; + + /* ensure iSeries run light will be out when idle */ + ppc64_runlatch_off(); + + lpaca = get_paca(); + + while (1) { + if (lpaca->lppaca.shared_proc) { + if (hvlpevent_is_pending()) + process_iSeries_events(); + if (!need_resched()) + yield_shared_processor(); + } else { + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + + while (!need_resched()) { + HMT_medium(); + if (hvlpevent_is_pending()) + process_iSeries_events(); + HMT_low(); + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + } + + ppc64_runlatch_on(); + schedule(); + ppc64_runlatch_off(); + } + + return 0; +} + #ifndef CONFIG_PCI void __init iSeries_init_IRQ(void) { } #endif diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index e270055e73e..22615246779 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -39,90 +39,6 @@ extern void power4_idle(void); static int (*idle_loop)(void); -#ifdef CONFIG_PPC_ISERIES -static unsigned long maxYieldTime = 0; -static unsigned long minYieldTime = 0xffffffffffffffffUL; - -static inline void process_iSeries_events(void) -{ - asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); -} - -static void yield_shared_processor(void) -{ - unsigned long tb; - unsigned long yieldTime; - - HvCall_setEnabledInterrupts(HvCall_MaskIPI | - HvCall_MaskLpEvent | - HvCall_MaskLpProd | - HvCall_MaskTimeout); - - tb = get_tb(); - /* Compute future tb value when yield should expire */ - HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); - - yieldTime = get_tb() - tb; - if (yieldTime > maxYieldTime) - maxYieldTime = yieldTime; - - if (yieldTime < minYieldTime) - minYieldTime = yieldTime; - - /* - * The decrementer stops during the yield. Force a fake decrementer - * here and let the timer_interrupt code sort out the actual time. - */ - get_paca()->lppaca.int_dword.fields.decr_int = 1; - process_iSeries_events(); -} - -static int iSeries_idle(void) -{ - struct paca_struct *lpaca; - long oldval; - - /* ensure iSeries run light will be out when idle */ - ppc64_runlatch_off(); - - lpaca = get_paca(); - - while (1) { - if (lpaca->lppaca.shared_proc) { - if (hvlpevent_is_pending()) - process_iSeries_events(); - if (!need_resched()) - yield_shared_processor(); - } else { - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - - while (!need_resched()) { - HMT_medium(); - if (hvlpevent_is_pending()) - process_iSeries_events(); - HMT_low(); - } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - } - - ppc64_runlatch_on(); - schedule(); - ppc64_runlatch_off(); - } - - return 0; -} - -#else - int default_idle(void) { long oldval; @@ -305,8 +221,6 @@ int native_idle(void) return 0; } -#endif /* CONFIG_PPC_ISERIES */ - void cpu_idle(void) { BUG_ON(NULL == ppc_md.idle_loop); -- cgit v1.2.3 From c66d5dd6b5b62e1435b95c0fb42f6bcddeb395ea Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2005 17:56:29 -0700 Subject: [PATCH] ppc64: Move pSeries idle functions into pSeries_setup.c dedicated_idle() and shared_idle() are only used by pSeries, so move them into pSeries_setup.c Signed-off-by: Michael Ellerman Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/idle.c | 131 -------------------------------------- arch/ppc64/kernel/pSeries_setup.c | 127 ++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 131 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 22615246779..69b7c22bad5 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -74,137 +74,6 @@ int default_idle(void) return 0; } -#ifdef CONFIG_PPC_PSERIES - -DECLARE_PER_CPU(unsigned long, smt_snooze_delay); - -int dedicated_idle(void) -{ - long oldval; - struct paca_struct *lpaca = get_paca(), *ppaca; - unsigned long start_snooze; - unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - unsigned int cpu = smp_processor_id(); - - ppaca = &paca[cpu ^ 1]; - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; - - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - start_snooze = __get_tb() + - *smt_snooze_delay * tb_ticks_per_usec; - while (!need_resched() && !cpu_is_offline(cpu)) { - /* - * Go into low thread priority and possibly - * low power mode. - */ - HMT_low(); - HMT_very_low(); - - if (*smt_snooze_delay == 0 || - __get_tb() < start_snooze) - continue; - - HMT_medium(); - - if (!(ppaca->lppaca.idle)) { - local_irq_disable(); - - /* - * We are about to sleep the thread - * and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - - /* - * SMT dynamic mode. Cede will result - * in this thread going dormant, if the - * partner thread is still doing work. - * Thread wakes up if partner goes idle, - * an interrupt is presented, or a prod - * occurs. Returning from the cede - * enables external interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - } else { - /* - * Give the HV an opportunity at the - * processor, since we are not doing - * any work. - */ - poll_pending(); - } - } - - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } - - HMT_medium(); - lpaca->lppaca.idle = 0; - schedule(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } - return 0; -} - -static int shared_idle(void) -{ - struct paca_struct *lpaca = get_paca(); - unsigned int cpu = smp_processor_id(); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - lpaca->lppaca.idle = 1; - - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); - - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - * - * Check need_resched() again with interrupts disabled - * to avoid a race. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - } - - HMT_medium(); - lpaca->lppaca.idle = 0; - schedule(); - if (cpu_is_offline(smp_processor_id()) && - system_state == SYSTEM_RUNNING) - cpu_die(); - } - - return 0; -} - -#endif /* CONFIG_PPC_PSERIES */ - int native_idle(void) { while(1) { diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 44d9af72d22..849ed9ba785 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -418,6 +418,133 @@ static int __init pSeries_probe(int platform) return 1; } +DECLARE_PER_CPU(unsigned long, smt_snooze_delay); + +int dedicated_idle(void) +{ + long oldval; + struct paca_struct *lpaca = get_paca(), *ppaca; + unsigned long start_snooze; + unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); + unsigned int cpu = smp_processor_id(); + + ppaca = &paca[cpu ^ 1]; + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); + start_snooze = __get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + while (!need_resched() && !cpu_is_offline(cpu)) { + /* + * Go into low thread priority and possibly + * low power mode. + */ + HMT_low(); + HMT_very_low(); + + if (*smt_snooze_delay == 0 || + __get_tb() < start_snooze) + continue; + + HMT_medium(); + + if (!(ppaca->lppaca.idle)) { + local_irq_disable(); + + /* + * We are about to sleep the thread + * and so wont be polling any + * more. + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + + /* + * SMT dynamic mode. Cede will result + * in this thread going dormant, if the + * partner thread is still doing work. + * Thread wakes up if partner goes idle, + * an interrupt is presented, or a prod + * occurs. Returning from the cede + * enables external interrupts. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } else { + /* + * Give the HV an opportunity at the + * processor, since we are not doing + * any work. + */ + poll_pending(); + } + } + + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); + } + + HMT_medium(); + lpaca->lppaca.idle = 0; + schedule(); + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) + cpu_die(); + } + return 0; +} + +static int shared_idle(void) +{ + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); + + while (1) { + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + lpaca->lppaca.idle = 1; + + while (!need_resched() && !cpu_is_offline(cpu)) { + local_irq_disable(); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + * + * Check need_resched() again with interrupts disabled + * to avoid a race. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } + + HMT_medium(); + lpaca->lppaca.idle = 0; + schedule(); + if (cpu_is_offline(smp_processor_id()) && + system_state == SYSTEM_RUNNING) + cpu_die(); + } + + return 0; +} + struct machdep_calls __initdata pSeries_md = { .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, -- cgit v1.2.3 From 62d60e9f0f890c31e5a83a7d8ecdfd1c7975fdb9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2005 17:56:30 -0700 Subject: [PATCH] ppc64: Fixup platforms for new ppc_md.idle This patch fixes up iSeries, pSeries, pmac and maple to set the correct idle function for each platform. Signed-off-by: Michael Ellerman Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/iSeries_setup.c | 1 + arch/ppc64/kernel/maple_setup.c | 3 +++ arch/ppc64/kernel/pSeries_setup.c | 18 ++++++++++++++++++ arch/ppc64/kernel/pmac_setup.c | 5 ++++- 4 files changed, 26 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index 1139e27e171..fae215ea54b 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -940,5 +940,6 @@ void __init iSeries_early_setup(void) ppc_md.get_rtc_time = iSeries_get_rtc_time; ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; + ppc_md.idle_loop = iSeries_idle; } diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c index da8900b51f4..bb55b5a5691 100644 --- a/arch/ppc64/kernel/maple_setup.c +++ b/arch/ppc64/kernel/maple_setup.c @@ -177,6 +177,8 @@ void __init maple_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + + printk(KERN_INFO "Using native/NAP idle loop\n"); } /* @@ -297,4 +299,5 @@ struct machdep_calls __initdata maple_md = { .get_rtc_time = maple_get_rtc_time, .calibrate_decr = generic_calibrate_decr, .progress = maple_progress, + .idle_loop = native_idle, }; diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 849ed9ba785..3f3be8ae935 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -19,6 +19,7 @@ #undef DEBUG #include +#include #include #include #include @@ -82,6 +83,9 @@ int fwnmi_active; /* TRUE if an FWNMI handler is present */ extern void pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); +static int shared_idle(void); +static int dedicated_idle(void); + static volatile void __iomem * chrp_int_ack_special; struct mpic *pSeries_mpic; @@ -229,6 +233,20 @@ static void __init pSeries_setup_arch(void) if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) vpa_init(boot_cpuid); + + /* Choose an idle loop */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (get_paca()->lppaca.shared_proc) { + printk(KERN_INFO "Using shared processor idle loop\n"); + ppc_md.idle_loop = shared_idle; + } else { + printk(KERN_INFO "Using dedicated idle loop\n"); + ppc_md.idle_loop = dedicated_idle; + } + } else { + printk(KERN_INFO "Using default idle loop\n"); + ppc_md.idle_loop = default_idle; + } } static int __init pSeries_init_panel(void) diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index 6cf03d387b9..3013cdb5f93 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -186,6 +186,8 @@ void __init pmac_setup_arch(void) #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif + + printk(KERN_INFO "Using native/NAP idle loop\n"); } #ifdef CONFIG_SCSI @@ -507,5 +509,6 @@ struct machdep_calls __initdata pmac_md = { .calibrate_decr = pmac_calibrate_decr, .feature_call = pmac_do_feature_call, .progress = pmac_progress, - .check_legacy_ioport = pmac_check_legacy_ioport + .check_legacy_ioport = pmac_check_legacy_ioport, + .idle_loop = native_idle, }; -- cgit v1.2.3 From 08d5e3eb4b2141e1031835c89a62ee3ddf896641 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2005 17:56:31 -0700 Subject: [PATCH] ppc64: Remove obsolete idle_setup() Now that the idle loop is configured by each platform we don't need idle_setup() anymore. Signed-off-by: Michael Ellerman Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/idle.c | 41 ----------------------------------------- 1 file changed, 41 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index 69b7c22bad5..b8cfb37e5f1 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -37,8 +37,6 @@ extern void power4_idle(void); -static int (*idle_loop)(void); - int default_idle(void) { long oldval; @@ -127,42 +125,3 @@ register_powersave_nap_sysctl(void) } __initcall(register_powersave_nap_sysctl); #endif - -int idle_setup(void) -{ - /* - * Move that junk to each platform specific file, eventually define - * a pSeries_idle for shared processor stuff - */ -#ifdef CONFIG_PPC_ISERIES - idle_loop = iSeries_idle; - return 1; -#else - idle_loop = default_idle; -#endif -#ifdef CONFIG_PPC_PSERIES - if (systemcfg->platform & PLATFORM_PSERIES) { - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { - if (get_paca()->lppaca.shared_proc) { - printk(KERN_INFO "Using shared processor idle loop\n"); - idle_loop = shared_idle; - } else { - printk(KERN_INFO "Using dedicated idle loop\n"); - idle_loop = dedicated_idle; - } - } else { - printk(KERN_INFO "Using default idle loop\n"); - idle_loop = default_idle; - } - } -#endif /* CONFIG_PPC_PSERIES */ -#ifndef CONFIG_PPC_ISERIES - if (systemcfg->platform == PLATFORM_POWERMAC || - systemcfg->platform == PLATFORM_MAPLE) { - printk(KERN_INFO "Using native/NAP idle loop\n"); - idle_loop = native_idle; - } -#endif /* CONFIG_PPC_ISERIES */ - - return 1; -} -- cgit v1.2.3 From 3c57bb9f454e8fc7b3d815b991b0dec43c766641 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:32 -0700 Subject: [PATCH] ppc64: iSeries idle fixups - remove min/max yield time, we dont use the values anywhere - separate shared and dedicated idle loops - check need_resched again with irqs off to avoid sleeping with pending work - continually set runlatch off in idle loop, this means we dont need to turn the runlatch off on exception exit and suffer that associated cost for all exceptions. (A future patch will turn the runlatch on at exception entry) Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/iSeries_setup.c | 84 ++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index fae215ea54b..b4c919e18fa 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -834,9 +834,6 @@ static int __init iSeries_src_init(void) late_initcall(iSeries_src_init); -static unsigned long maxYieldTime = 0; -static unsigned long minYieldTime = 0xffffffffffffffffUL; - static inline void process_iSeries_events(void) { asm volatile ("li 0,0x5555; sc" : : : "r0", "r3"); @@ -845,7 +842,6 @@ static inline void process_iSeries_events(void) static void yield_shared_processor(void) { unsigned long tb; - unsigned long yieldTime; HvCall_setEnabledInterrupts(HvCall_MaskIPI | HvCall_MaskLpEvent | @@ -856,13 +852,6 @@ static void yield_shared_processor(void) /* Compute future tb value when yield should expire */ HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy); - yieldTime = get_tb() - tb; - if (yieldTime > maxYieldTime) - maxYieldTime = yieldTime; - - if (yieldTime < minYieldTime) - minYieldTime = yieldTime; - /* * The decrementer stops during the yield. Force a fake decrementer * here and let the timer_interrupt code sort out the actual time. @@ -871,45 +860,62 @@ static void yield_shared_processor(void) process_iSeries_events(); } -static int iSeries_idle(void) +static int iseries_shared_idle(void) { - struct paca_struct *lpaca; - long oldval; + while (1) { + while (!need_resched() && !hvlpevent_is_pending()) { + local_irq_disable(); + ppc64_runlatch_off(); + + /* Recheck with irqs off */ + if (!need_resched() && !hvlpevent_is_pending()) + yield_shared_processor(); - /* ensure iSeries run light will be out when idle */ - ppc64_runlatch_off(); + HMT_medium(); + local_irq_enable(); + } + + ppc64_runlatch_on(); - lpaca = get_paca(); + if (hvlpevent_is_pending()) + process_iSeries_events(); + + schedule(); + } + + return 0; +} + +static int iseries_dedicated_idle(void) +{ + struct paca_struct *lpaca = get_paca(); + long oldval; while (1) { - if (lpaca->lppaca.shared_proc) { - if (hvlpevent_is_pending()) - process_iSeries_events(); - if (!need_resched()) - yield_shared_processor(); - } else { - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); + oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); + if (!oldval) { + set_thread_flag(TIF_POLLING_NRFLAG); - while (!need_resched()) { + while (!need_resched()) { + ppc64_runlatch_off(); + HMT_low(); + + if (hvlpevent_is_pending()) { HMT_medium(); - if (hvlpevent_is_pending()) - process_iSeries_events(); - HMT_low(); + ppc64_runlatch_on(); + process_iSeries_events(); } - - HMT_medium(); - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + } else { + set_need_resched(); } ppc64_runlatch_on(); schedule(); - ppc64_runlatch_off(); } return 0; @@ -940,6 +946,10 @@ void __init iSeries_early_setup(void) ppc_md.get_rtc_time = iSeries_get_rtc_time; ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; - ppc_md.idle_loop = iSeries_idle; + + if (get_paca()->lppaca.shared_proc) + ppc_md.idle_loop = iseries_shared_idle; + else + ppc_md.idle_loop = iseries_dedicated_idle; } -- cgit v1.2.3 From 050a09389e045f37e5bf08718cf36909766e20d1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:33 -0700 Subject: [PATCH] ppc64: pSeries idle fixups - separate out sleep logic in dedicated_idle, it was so far indented that it got squashed against the right side of the screen. - add runlatch support, looping on runlatch disable. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/pSeries_setup.c | 113 +++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 3f3be8ae935..5bec956e44a 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -83,8 +83,8 @@ int fwnmi_active; /* TRUE if an FWNMI handler is present */ extern void pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); -static int shared_idle(void); -static int dedicated_idle(void); +static int pseries_shared_idle(void); +static int pseries_dedicated_idle(void); static volatile void __iomem * chrp_int_ack_special; struct mpic *pSeries_mpic; @@ -238,10 +238,10 @@ static void __init pSeries_setup_arch(void) if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { if (get_paca()->lppaca.shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); - ppc_md.idle_loop = shared_idle; + ppc_md.idle_loop = pseries_shared_idle; } else { printk(KERN_INFO "Using dedicated idle loop\n"); - ppc_md.idle_loop = dedicated_idle; + ppc_md.idle_loop = pseries_dedicated_idle; } } else { printk(KERN_INFO "Using default idle loop\n"); @@ -438,15 +438,47 @@ static int __init pSeries_probe(int platform) DECLARE_PER_CPU(unsigned long, smt_snooze_delay); -int dedicated_idle(void) +static inline void dedicated_idle_sleep(unsigned int cpu) +{ + struct paca_struct *ppaca = &paca[cpu ^ 1]; + + /* Only sleep if the other thread is not idle */ + if (!(ppaca->lppaca.idle)) { + local_irq_disable(); + + /* + * We are about to sleep the thread and so wont be polling any + * more. + */ + clear_thread_flag(TIF_POLLING_NRFLAG); + + /* + * SMT dynamic mode. Cede will result in this thread going + * dormant, if the partner thread is still doing work. Thread + * wakes up if partner goes idle, an interrupt is presented, or + * a prod occurs. Returning from the cede enables external + * interrupts. + */ + if (!need_resched()) + cede_processor(); + else + local_irq_enable(); + } else { + /* + * Give the HV an opportunity at the processor, since we are + * not doing any work. + */ + poll_pending(); + } +} + +static int pseries_dedicated_idle(void) { long oldval; - struct paca_struct *lpaca = get_paca(), *ppaca; + struct paca_struct *lpaca = get_paca(); + unsigned int cpu = smp_processor_id(); unsigned long start_snooze; unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - unsigned int cpu = smp_processor_id(); - - ppaca = &paca[cpu ^ 1]; while (1) { /* @@ -458,9 +490,13 @@ int dedicated_idle(void) oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); if (!oldval) { set_thread_flag(TIF_POLLING_NRFLAG); + start_snooze = __get_tb() + *smt_snooze_delay * tb_ticks_per_usec; + while (!need_resched() && !cpu_is_offline(cpu)) { + ppc64_runlatch_off(); + /* * Go into low thread priority and possibly * low power mode. @@ -468,60 +504,31 @@ int dedicated_idle(void) HMT_low(); HMT_very_low(); - if (*smt_snooze_delay == 0 || - __get_tb() < start_snooze) - continue; - - HMT_medium(); - - if (!(ppaca->lppaca.idle)) { - local_irq_disable(); - - /* - * We are about to sleep the thread - * and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - - /* - * SMT dynamic mode. Cede will result - * in this thread going dormant, if the - * partner thread is still doing work. - * Thread wakes up if partner goes idle, - * an interrupt is presented, or a prod - * occurs. Returning from the cede - * enables external interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - } else { - /* - * Give the HV an opportunity at the - * processor, since we are not doing - * any work. - */ - poll_pending(); + if (*smt_snooze_delay != 0 && + __get_tb() > start_snooze) { + HMT_medium(); + dedicated_idle_sleep(cpu); } + } + HMT_medium(); clear_thread_flag(TIF_POLLING_NRFLAG); } else { set_need_resched(); } - HMT_medium(); lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + schedule(); + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } - return 0; } -static int shared_idle(void) +static int pseries_shared_idle(void) { struct paca_struct *lpaca = get_paca(); unsigned int cpu = smp_processor_id(); @@ -535,6 +542,7 @@ static int shared_idle(void) while (!need_resched() && !cpu_is_offline(cpu)) { local_irq_disable(); + ppc64_runlatch_off(); /* * Yield the processor to the hypervisor. We return if @@ -550,13 +558,16 @@ static int shared_idle(void) cede_processor(); else local_irq_enable(); + + HMT_medium(); } - HMT_medium(); lpaca->lppaca.idle = 0; + ppc64_runlatch_on(); + schedule(); - if (cpu_is_offline(smp_processor_id()) && - system_state == SYSTEM_RUNNING) + + if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); } -- cgit v1.2.3 From 45e75dfb609df4391636c2218bec5ea04536601d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:33 -0700 Subject: [PATCH] ppc64: idle fixups - remove some unnecessary includes - add runlatch support - no need to use raw_smp_processor_id any more, current preempt debug logic checks for processes that are bound to one cpu. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/idle.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c index b8cfb37e5f1..954395d4263 100644 --- a/arch/ppc64/kernel/idle.c +++ b/arch/ppc64/kernel/idle.c @@ -20,18 +20,12 @@ #include #include #include -#include #include -#include #include #include -#include #include #include -#include -#include -#include #include #include @@ -49,7 +43,8 @@ int default_idle(void) set_thread_flag(TIF_POLLING_NRFLAG); while (!need_resched() && !cpu_is_offline(cpu)) { - barrier(); + ppc64_runlatch_off(); + /* * Go into low thread priority and possibly * low power mode. @@ -64,6 +59,7 @@ int default_idle(void) set_need_resched(); } + ppc64_runlatch_on(); schedule(); if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) cpu_die(); @@ -74,17 +70,22 @@ int default_idle(void) int native_idle(void) { - while(1) { - /* check CPU type here */ + while (1) { + ppc64_runlatch_off(); + if (!need_resched()) power4_idle(); - if (need_resched()) + + if (need_resched()) { + ppc64_runlatch_on(); schedule(); + } - if (cpu_is_offline(raw_smp_processor_id()) && + if (cpu_is_offline(smp_processor_id()) && system_state == SYSTEM_RUNNING) cpu_die(); } + return 0; } -- cgit v1.2.3 From 10ca1e1ed58d6428924b5a44539334c341a6f485 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:34 -0700 Subject: [PATCH] ppc64: fix compile warning Fix a compile warning introduced by the previous patches. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/iSeries_setup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index b4c919e18fa..32483dc16d4 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -888,7 +888,6 @@ static int iseries_shared_idle(void) static int iseries_dedicated_idle(void) { - struct paca_struct *lpaca = get_paca(); long oldval; while (1) { -- cgit v1.2.3 From b6bff397ea9c36d410212f785ee644103146102a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Jul 2005 17:56:35 -0700 Subject: [PATCH] ppc64: Be consistent about printing which idle loop we're using Not sure if we really need this, but it was handy to know which iSeries loop I was testing. Be consistent about printing which idle loop we're using, with this patch we cover all cases. Signed-off-by: Michael Ellerman Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/iSeries_setup.c | 7 +++++-- arch/ppc64/kernel/setup.c | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index 32483dc16d4..077c82fc9f3 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -946,9 +946,12 @@ void __init iSeries_early_setup(void) ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; - if (get_paca()->lppaca.shared_proc) + if (get_paca()->lppaca.shared_proc) { ppc_md.idle_loop = iseries_shared_idle; - else + printk(KERN_INFO "Using shared processor idle loop\n"); + } else { ppc_md.idle_loop = iseries_dedicated_idle; + printk(KERN_INFO "Using dedicated idle loop\n"); + } } diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index a278998ecb4..d1b33f0b26c 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -1081,8 +1081,10 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); /* Use the default idle loop if the platform hasn't provided one. */ - if (NULL == ppc_md.idle_loop) + if (NULL == ppc_md.idle_loop) { ppc_md.idle_loop = default_idle; + printk(KERN_INFO "Using default idle loop\n"); + } paging_init(); ppc64_boot_msg(0x15, "Setup Done"); -- cgit v1.2.3 From 059e277e5ba6486b5ef66deb336d4ef887f163ac Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 7 Jul 2005 17:56:36 -0700 Subject: [PATCH] ppc64: silence perfmon exception warnings We dont need to use the PERFMON exception on POWER5, in fact the firmware returns an error. Due to this just remove the warning. Also now that we have proper runlatch support we can remove the bootup hack. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/sysfs.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index 2f704a2cafb..02b8ac4e016 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -112,7 +112,6 @@ void ppc64_enable_pmcs(void) unsigned long hid0; #ifdef CONFIG_PPC_PSERIES unsigned long set, reset; - int ret; #endif /* CONFIG_PPC_PSERIES */ /* Only need to enable them once */ @@ -145,11 +144,7 @@ void ppc64_enable_pmcs(void) case PLATFORM_PSERIES_LPAR: set = 1UL << 63; reset = 0; - ret = plpar_hcall_norets(H_PERFMON, set, reset); - if (ret) - printk(KERN_ERR "H_PERFMON call on cpu %u " - "returned %d\n", - smp_processor_id(), ret); + plpar_hcall_norets(H_PERFMON, set, reset); break; #endif /* CONFIG_PPC_PSERIES */ @@ -161,13 +156,6 @@ void ppc64_enable_pmcs(void) /* instruct hypervisor to maintain PMCs */ if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) get_paca()->lppaca.pmcregs_in_use = 1; - - /* - * On SMT machines we have to set the run latch in the ctrl register - * in order to make PMC6 spin. - */ - if (cpu_has_feature(CPU_FTR_SMT)) - ppc64_runlatch_on(); #endif /* CONFIG_PPC_PSERIES */ } -- cgit v1.2.3 From 01d299367fe868851a632cfbdb606845f57682aa Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 7 Jul 2005 17:56:36 -0700 Subject: [PATCH] FRV: Add defconfig This patch by Yoshihiro MATSUYAMA (already ACK'ed by David Howells) adds a defconfig for the frv arch. Signed-Off-By: Yoshihiro MATSUYAMA Signed-off-by: Adrian Bunk Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/defconfig | 627 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 627 insertions(+) create mode 100644 arch/frv/defconfig (limited to 'arch') diff --git a/arch/frv/defconfig b/arch/frv/defconfig new file mode 100644 index 00000000000..b6e4ca5efb5 --- /dev/null +++ b/arch/frv/defconfig @@ -0,0 +1,627 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.11.8 +# Fri May 13 17:16:03 2005 +# +CONFIG_FRV=y +CONFIG_UID16=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +# CONFIG_GENERIC_CALIBRATE_DELAY is not set +# CONFIG_GENERIC_HARDIRQS is not set + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +# CONFIG_HOTPLUG is not set +# CONFIG_KOBJECT_UEVENT is not set +# CONFIG_IKCONFIG is not set +CONFIG_EMBEDDED=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 + +# +# Loadable module support +# +# CONFIG_MODULES is not set + +# +# Fujitsu FR-V system setup +# +CONFIG_MMU=y +CONFIG_FRV_OUTOFLINE_ATOMIC_OPS=y +CONFIG_HIGHMEM=y +CONFIG_HIGHPTE=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +# CONFIG_FRV_DEFL_CACHE_WBACK is not set +# CONFIG_FRV_DEFL_CACHE_WBEHIND is not set +CONFIG_FRV_DEFL_CACHE_WTHRU=y +# CONFIG_FRV_DEFL_CACHE_DISABLED is not set + +# +# CPU core support +# +CONFIG_CPU_FR451=y +CONFIG_CPU_FR451_COMPILE=y +CONFIG_FRV_L1_CACHE_SHIFT=5 +CONFIG_MB93091_VDK=y +# CONFIG_MB93093_PDK is not set +CONFIG_MB93090_MB00=y +# CONFIG_MB93091_NO_MB is not set +# CONFIG_GPREL_DATA_8 is not set +CONFIG_GPREL_DATA_4=y +# CONFIG_GPREL_DATA_NONE is not set +CONFIG_PCI=y +# CONFIG_PCI_LEGACY_PROC is not set +# CONFIG_PCI_NAMES is not set +# CONFIG_PCI_DEBUG is not set +# CONFIG_PCMCIA is not set + +# +# Power management options +# +# CONFIG_PM is not set + +# +# Executable formats +# +# CONFIG_BINFMT_ELF is not set +CONFIG_BINFMT_ELF_FDPIC=y +# CONFIG_BINFMT_MISC is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +# CONFIG_STANDALONE is not set +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set + +# +# Connector - unified userspace <-> kernelspace linker +# +# CONFIG_CONNECTOR is not set +# CONFIG_FORK_CONNECTOR is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Plug and Play support +# + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_RAM is not set +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CDROM_PKTCDVD is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_SCSI is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Fusion MPT device support +# +# CONFIG_FUSION is not set + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_PNP=y +# CONFIG_IP_PNP_DHCP is not set +# CONFIG_IP_PNP_BOOTP is not set +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +# CONFIG_IP_TCPDIAG is not set +# CONFIG_IP_TCPDIAG_IPV6 is not set +# CONFIG_IPV6 is not set +# CONFIG_NETFILTER is not set + +# +# SCTP Configuration (EXPERIMENTAL) +# +# CONFIG_IP_SCTP is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_NET_DIVERT is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_KGDBOE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +# CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_HAMRADIO is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_IEEE80211 is not set +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +# CONFIG_NET_VENDOR_3COM is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set +# CONFIG_HP100 is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_DGRS is not set +# CONFIG_EEPRO100 is not set +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +CONFIG_NE2K_PCI=y +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SKGE is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set + +# +# Ethernet (10000 Mbit) +# +# CONFIG_CHELSIO_T1 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +# CONFIG_INPUT is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=1 +CONFIG_SERIAL_8250_EXTENDED=y +# CONFIG_SERIAL_8250_MANY_PORTS is not set +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_MULTIPORT is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_LEGACY_PTYS is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_DRM is not set +# CONFIG_RAW_DRIVER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Misc devices +# + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FB is not set + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +# CONFIG_USB is not set + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# InfiniBand support +# +# CONFIG_INFINIBAND is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_JBD is not set +# CONFIG_REISER4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set + +# +# XFS support +# +# CONFIG_XFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +# CONFIG_PROC_KCORE is not set +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVPTS_FS_XATTR is not set +CONFIG_TMPFS=y +# CONFIG_TMPFS_XATTR is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +# CONFIG_NFS_V3 is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +# CONFIG_NFSD is not set +CONFIG_ROOT_NFS=y +CONFIG_LOCKD=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +# CONFIG_NLS is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_DEBUG_KERNEL=y +# CONFIG_MAGIC_SYSRQ is not set +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_EARLY_PRINTK is not set +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_GDBSTUB is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Hardware crypto devices +# + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +CONFIG_CRC32=y +# CONFIG_LIBCRC32C is not set -- cgit v1.2.3 From 3b520b238e018ef0e9d11c9115d5e7d9419c4ef9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 7 Jul 2005 17:56:38 -0700 Subject: [PATCH] MTRR suspend/resume cleanup There has been some discuss about solving the SMP MTRR suspend/resume breakage, but I didn't find a patch for it. This is an intent for it. The basic idea is moving mtrr initializing into cpu_identify for all APs (so it works for cpu hotplug). For BP, restore_processor_state is responsible for restoring MTRR. Signed-off-by: Shaohua Li Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 5 +++ arch/i386/kernel/cpu/mtrr/generic.c | 22 +++++------ arch/i386/kernel/cpu/mtrr/main.c | 76 ++++++++++++++++++++++++++----------- arch/i386/kernel/cpu/mtrr/mtrr.h | 1 - arch/i386/power/cpu.c | 1 + arch/x86_64/kernel/setup.c | 4 ++ arch/x86_64/kernel/suspend.c | 1 + 7 files changed, 75 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 2203a9d2021..4553ffd94b1 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -435,6 +435,11 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) if (c == &boot_cpu_data) sysenter_setup(); enable_sep_cpu(); + + if (c == &boot_cpu_data) + mtrr_bp_init(); + else + mtrr_ap_init(); } #ifdef CONFIG_X86_HT diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index 64d91f73a0a..169ac8e0db6 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -67,13 +67,6 @@ void __init get_mtrr_state(void) mtrr_state.enabled = (lo & 0xc00) >> 10; } -/* Free resources associated with a struct mtrr_state */ -void __init finalize_mtrr_state(void) -{ - kfree(mtrr_state.var_ranges); - mtrr_state.var_ranges = NULL; -} - /* Some BIOS's are fucked and don't set all MTRRs the same! */ void __init mtrr_state_warn(void) { @@ -334,6 +327,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, */ { unsigned long flags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; local_irq_save(flags); prepare_set(); @@ -342,11 +338,15 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, /* The invalid bit is kept in the mask, so we simply clear the relevant mask register to disable a range. */ mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); + memset(vr, 0, sizeof(struct mtrr_var_range)); } else { - mtrr_wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type, - (base & size_and_mask) >> (32 - PAGE_SHIFT)); - mtrr_wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800, - (-size & size_and_mask) >> (32 - PAGE_SHIFT)); + vr->base_lo = base << PAGE_SHIFT | type; + vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); + vr->mask_lo = -size << PAGE_SHIFT | 0x800; + vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + + mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); + mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); } post_set(); diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index d66b09e0c82..764cac64e21 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -332,6 +332,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = -EINVAL; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); /* Search for existing MTRR */ down(&main_lock); for (i = 0; i < num_var_ranges; ++i) { @@ -372,6 +374,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = i; out: up(&main_lock); + unlock_cpu_hotplug(); return error; } @@ -461,6 +464,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) return -ENXIO; max = num_var_ranges; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); down(&main_lock); if (reg < 0) { /* Search for existing MTRR */ @@ -501,6 +506,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) error = reg; out: up(&main_lock); + unlock_cpu_hotplug(); return error; } /** @@ -544,21 +550,9 @@ static void __init init_ifs(void) centaur_init_mtrr(); } -static void __init init_other_cpus(void) -{ - if (use_intel()) - get_mtrr_state(); - - /* bring up the other processors */ - set_mtrr(~0U,0,0,0); - - if (use_intel()) { - finalize_mtrr_state(); - mtrr_state_warn(); - } -} - - +/* The suspend/resume methods are only for CPU without MTRR. CPU using generic + * MTRR driver doesn't require this + */ struct mtrr_value { mtrr_type ltype; unsigned long lbase; @@ -611,13 +605,13 @@ static struct sysdev_driver mtrr_sysdev_driver = { /** - * mtrr_init - initialize mtrrs on the boot CPU + * mtrr_bp_init - initialize mtrrs on the boot CPU * * This needs to be called early; before any of the other CPUs are * initialized (i.e. before smp_init()). * */ -static int __init mtrr_init(void) +void __init mtrr_bp_init(void) { init_ifs(); @@ -674,12 +668,48 @@ static int __init mtrr_init(void) if (mtrr_if) { set_num_var_ranges(); init_table(); - init_other_cpus(); - - return sysdev_driver_register(&cpu_sysdev_class, - &mtrr_sysdev_driver); + if (use_intel()) + get_mtrr_state(); } - return -ENXIO; } -subsys_initcall(mtrr_init); +void mtrr_ap_init(void) +{ + unsigned long flags; + + if (!mtrr_if || !use_intel()) + return; + /* + * Ideally we should hold main_lock here to avoid mtrr entries changed, + * but this routine will be called in cpu boot time, holding the lock + * breaks it. This routine is called in two cases: 1.very earily time + * of software resume, when there absolutely isn't mtrr entry changes; + * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to + * prevent mtrr entry changes + */ + local_irq_save(flags); + + mtrr_if->set_all(); + + local_irq_restore(flags); +} + +static int __init mtrr_init_finialize(void) +{ + if (!mtrr_if) + return 0; + if (use_intel()) + mtrr_state_warn(); + else { + /* The CPUs haven't MTRR and seemes not support SMP. They have + * specific drivers, we use a tricky method to support + * suspend/resume for them. + * TBD: is there any system with such CPU which supports + * suspend/resume? if no, we should remove the code. + */ + sysdev_driver_register(&cpu_sysdev_class, + &mtrr_sysdev_driver); + } + return 0; +} +subsys_initcall(mtrr_init_finialize); diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h index de135124559..99c9f268204 100644 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h @@ -91,7 +91,6 @@ extern struct mtrr_ops * mtrr_if; extern unsigned int num_var_ranges; -void finalize_mtrr_state(void); void mtrr_state_warn(void); char *mtrr_attrib_to_str(int x); void mtrr_wrmsr(unsigned, unsigned, unsigned); diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 0e6b45b6125..c547c1af6fa 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -137,6 +137,7 @@ void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); + mtrr_ap_init(); } void restore_processor_state(void) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index b02d921da4f..5fd03225058 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -1076,6 +1076,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_MCE mcheck_init(c); #endif + if (c == &boot_cpu_data) + mtrr_bp_init(); + else + mtrr_ap_init(); #ifdef CONFIG_NUMA if (c != &boot_cpu_data) numa_add_cpu(c - cpu_data); diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index 6c0f402e3a8..0612640d91b 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c @@ -119,6 +119,7 @@ void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); + mtrr_ap_init(); } void restore_processor_state(void) -- cgit v1.2.3 From e00d9967e3addea86dded46deefc5daec5d52e5a Mon Sep 17 00:00:00 2001 From: Bernard Blackham Date: Thu, 7 Jul 2005 17:56:42 -0700 Subject: [PATCH] pm: fix u32 vs. pm_message_t confusion in cpufreq Fix u32 vs pm_message_t confusion in cpufreq. Signed-off-by: Bernard Blackham Signed-off-by: Pavel Machek Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/platforms/pmac_cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ppc/platforms/pmac_cpufreq.c b/arch/ppc/platforms/pmac_cpufreq.c index 5fdd4f607a4..c0605244edd 100644 --- a/arch/ppc/platforms/pmac_cpufreq.c +++ b/arch/ppc/platforms/pmac_cpufreq.c @@ -452,7 +452,7 @@ static u32 __pmac read_gpio(struct device_node *np) return offset; } -static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, u32 state) +static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, pm_message_t pmsg) { /* Ok, this could be made a bit smarter, but let's be robust for now. We * always force a speed change to high speed before sleep, to make sure -- cgit v1.2.3 From d67b569f5f620c0fb95d5212642746b7ba9d29e4 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Thu, 7 Jul 2005 17:56:49 -0700 Subject: [PATCH] uml: skas0 - separate kernel address space on stock hosts UML has had two modes of operation - an insecure, slow mode (tt mode) in which the kernel is mapped into every process address space which requires no host kernel modifications, and a secure, faster mode (skas mode) in which the UML kernel is in a separate host address space, which requires a patch to the host kernel. This patch implements something very close to skas mode for hosts which don't support skas - I'm calling this skas0. It provides the security of the skas host patch, and some of the performance gains. The two main things that are provided by the skas patch, /proc/mm and PTRACE_FAULTINFO, are implemented in a way that require no host patch. For the remote address space changing stuff (mmap, munmap, and mprotect), we set aside two pages in the process above its stack, one of which contains a little bit of code which can call mmap et al. To update the address space, the system call information (system call number and arguments) are written to the stub page above the code. The %esp is set to the beginning of the data, the %eip is set the the start of the stub, and it repeatedly pops the information into its registers and makes the system call until it sees a system call number of zero. This is to amortize the cost of the context switch across multiple address space updates. When the updates are done, it SIGSTOPs itself, and the kernel process continues what it was doing. For a PTRACE_FAULTINFO replacement, we set up a SIGSEGV handler in the child, and let it handle segfaults rather than nullifying them. The handler is in the same page as the mmap stub. The second page is used as the stack. The handler reads cr2 and err from the sigcontext, sticks them at the base of the stack in a faultinfo struct, and SIGSTOPs itself. The kernel then reads the faultinfo and handles the fault. A complication on x86_64 is that this involves resetting the registers to the segfault values when the process is inside the kill system call. This breaks on x86_64 because %rcx will contain %rip because you tell SYSRET where to return to by putting the value in %rcx. So, this corrupts $rcx on return from the segfault. To work around this, I added an arch_finish_segv, which on x86 does nothing, but which on x86_64 ptraces the child back through the sigreturn. This causes %rcx to be restored by sigreturn and avoids the corruption. Ultimately, I think I will replace this with the trick of having it send itself a blocked signal which will be unblocked by the sigreturn. This will allow it to be stopped just after the sigreturn, and PTRACE_SYSCALLed without all the back-and-forth of PTRACE_SYSCALLing it through sigreturn. This runs on a stock host, so theoretically (and hopefully), tt mode isn't needed any more. We need to make sure that this is better in every way than tt mode, though. I'm concerned about the speed of address space updates and page fault handling, since they involve extra round-trips to the child. We can amortize the round-trip cost for large address space updates by writing all of the operations to the data page and having the child execute them all at the same time. This will help fork and exec, but not page faults, since they involve only one page. I can't think of any way to help page faults, except to add something like PTRACE_FAULTINFO to the host. There is PTRACE_SIGINFO, but UML doesn't use siginfo for SIGSEGV (or anything else) because there isn't enough information in the siginfo struct to handle page faults (the faulting operation type is missing). Adding that would make PTRACE_SIGINFO a usable equivalent to PTRACE_FAULTINFO. As for the code itself: - The system call stub is in arch/um/kernel/sys-$(SUBARCH)/stub.S. It is put in its own section of the binary along with stub_segv_handler in arch/um/kernel/skas/process.c. This is manipulated with run_syscall_stub in arch/um/kernel/skas/mem_user.c. syscall_stub will execute any system call at all, but it's only used for mmap, munmap, and mprotect. - The x86_64 stub calls sigreturn by hand rather than allowing the normal sigreturn to happen, because the normal sigreturn is a SA_RESTORER in UML's address space provided by libc. Needless to say, this is not available in the child's address space. Also, it does a couple of odd pops before that which restore the stack to the state it was in at the time the signal handler was called. - There is a new field in the arch mmu_context, which is now a union. This is the pid to be manipulated rather than the /proc/mm file descriptor. Code which deals with this now checks proc_mm to see whether it should use the usual skas code or the new code. - userspace_tramp is now used to create a new host process for every UML process, rather than one per UML processor. It checks proc_mm and ptrace_faultinfo to decide whether to map in the pages above its stack. - start_userspace now makes CLONE_VM conditional on proc_mm since we need separate address spaces now. - switch_mm_skas now just sets userspace_pid[0] to the new pid rather than PTRACE_SWITCH_MM. There is an addition to userspace which updates its idea of the pid being manipulated each time around the loop. This is important on exec, when the pid will change underneath userspace(). - The stub page has a pte, but it can't be mapped in using tlb_flush because it is part of tlb_flush. This is why it's required for it to be mapped in by userspace_tramp. Other random things: - The stub section in uml.lds.S is page aligned. This page is written out to the backing vm file in setup_physmem because it is mapped from there into user processes. - There's some confusion with TASK_SIZE now that there are a couple of extra pages that the process can't use. TASK_SIZE is considered by the elf code to be the usable process memory, which is reasonable, so it is decreased by two pages. This confuses the definition of USER_PGDS_IN_LAST_PML4, making it too small because of the rounding down of the uneven division. So we round it to the nearest PGDIR_SIZE rather than the lower one. - I added a missing PT_SYSCALL_ARG6_OFFSET macro. - um_mmu.h was made into a userspace-usable file. - proc_mm and ptrace_faultinfo are globals which say whether the host supports these features. - There is a bad interaction between the mm.nr_ptes check at the end of exit_mmap, stack randomization, and skas0. exit_mmap will stop freeing pages at the PGDIR_SIZE boundary after the last vma. If the stack isn't on the last page table page, the last pte page won't be freed, as it should be since the stub ptes are there, and exit_mmap will BUG because there is an unfreed page. To get around this, TASK_SIZE is set to the next lowest PGDIR_SIZE boundary and mm->nr_ptes is decremented after the calls to init_stub_pte. This ensures that we know the process stack (and all other process mappings) will be below the top page table page, and thus we know that mm->nr_ptes will be one too many, and can be decremented. Things that need fixing: - We may need better assurrences that the stub code is PIC. - The stub pte is set up in init_new_context_skas. - alloc_pgdir is probably the right place. Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Kconfig_i386 | 12 ++ arch/um/Kconfig_x86_64 | 12 ++ arch/um/Makefile-i386 | 2 +- arch/um/Makefile-x86_64 | 2 +- arch/um/defconfig | 58 ++++++- arch/um/include/mem.h | 1 + arch/um/include/registers.h | 1 + arch/um/include/sysdep-i386/ptrace_user.h | 13 ++ arch/um/include/sysdep-i386/stub.h | 18 +++ arch/um/include/sysdep-x86_64/ptrace_user.h | 14 ++ arch/um/include/sysdep-x86_64/stub.h | 19 +++ arch/um/include/tlb.h | 30 ++-- arch/um/kernel/dyn.lds.S | 6 + arch/um/kernel/physmem.c | 8 + arch/um/kernel/process.c | 28 ++-- arch/um/kernel/skas/exec_kern.c | 2 +- arch/um/kernel/skas/include/mm_id.h | 17 +++ arch/um/kernel/skas/include/mmu-skas.h | 7 +- arch/um/kernel/skas/include/skas.h | 15 +- arch/um/kernel/skas/mem.c | 6 +- arch/um/kernel/skas/mem_user.c | 225 ++++++++++++++++++---------- arch/um/kernel/skas/mmu.c | 136 ++++++++++++++--- arch/um/kernel/skas/process.c | 153 +++++++++++++++---- arch/um/kernel/skas/process_kern.c | 33 +++- arch/um/kernel/skas/tlb.c | 28 ++-- arch/um/kernel/tlb.c | 132 ++++++++-------- arch/um/kernel/tt/tlb.c | 4 +- arch/um/kernel/uml.lds.S | 7 + arch/um/os-Linux/sys-i386/registers.c | 5 + arch/um/os-Linux/sys-x86_64/registers.c | 5 + arch/um/scripts/Makefile.rules | 5 + arch/um/sys-i386/Makefile | 12 +- arch/um/sys-i386/stub.S | 8 + arch/um/sys-i386/stub_segv.c | 30 ++++ arch/um/sys-x86_64/Makefile | 12 +- arch/um/sys-x86_64/stub.S | 15 ++ arch/um/sys-x86_64/stub_segv.c | 31 ++++ 37 files changed, 850 insertions(+), 262 deletions(-) create mode 100644 arch/um/include/sysdep-i386/stub.h create mode 100644 arch/um/include/sysdep-x86_64/stub.h create mode 100644 arch/um/kernel/skas/include/mm_id.h create mode 100644 arch/um/sys-i386/stub.S create mode 100644 arch/um/sys-i386/stub_segv.c create mode 100644 arch/um/sys-x86_64/stub.S create mode 100644 arch/um/sys-x86_64/stub_segv.c (limited to 'arch') diff --git a/arch/um/Kconfig_i386 b/arch/um/Kconfig_i386 index e41f3748d30..27c18a8d9d1 100644 --- a/arch/um/Kconfig_i386 +++ b/arch/um/Kconfig_i386 @@ -19,6 +19,18 @@ config 3_LEVEL_PGTABLES memory. All the memory that can't be mapped directly will be treated as high memory. +config STUB_CODE + hex + default 0xbfffe000 + +config STUB_DATA + hex + default 0xbffff000 + +config STUB_START + hex + default STUB_CODE + config ARCH_HAS_SC_SIGNALS bool default y diff --git a/arch/um/Kconfig_x86_64 b/arch/um/Kconfig_x86_64 index f162f50f0b1..735a047c890 100644 --- a/arch/um/Kconfig_x86_64 +++ b/arch/um/Kconfig_x86_64 @@ -14,6 +14,18 @@ config 3_LEVEL_PGTABLES bool default y +config STUB_CODE + hex + default 0x7fbfffe000 + +config STUB_DATA + hex + default 0x7fbffff000 + +config STUB_START + hex + default STUB_CODE + config ARCH_HAS_SC_SIGNALS bool default n diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386 index 29e182d5a83..301059062a3 100644 --- a/arch/um/Makefile-i386 +++ b/arch/um/Makefile-i386 @@ -8,7 +8,7 @@ ifeq ($(CONFIG_MODE_SKAS),y) endif endif -CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) +CFLAGS += -U__$(SUBARCH)__ -U$(SUBARCH) $(STUB_CFLAGS) ARCH_USER_CFLAGS := ifneq ($(CONFIG_GPROF),y) diff --git a/arch/um/Makefile-x86_64 b/arch/um/Makefile-x86_64 index 32144562c27..d80bd0052e6 100644 --- a/arch/um/Makefile-x86_64 +++ b/arch/um/Makefile-x86_64 @@ -4,7 +4,7 @@ SUBARCH_LIBS := arch/um/sys-x86_64/ START := 0x60000000 -CFLAGS += -U__$(SUBARCH)__ -fno-builtin +CFLAGS += -U__$(SUBARCH)__ -fno-builtin $(STUB_CFLAGS) ARCH_USER_CFLAGS := -D__x86_64__ ELF_ARCH := i386:x86-64 diff --git a/arch/um/defconfig b/arch/um/defconfig index 4067c3aa5b6..80d30d19d75 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc3-skas3-v9-pre2 -# Sun Apr 24 19:46:10 2005 +# Linux kernel version: 2.6.12-rc6-mm1 +# Tue Jun 14 18:22:21 2005 # CONFIG_GENERIC_HARDIRQS=y CONFIG_UML=y @@ -13,23 +13,32 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y # # UML-specific options # -CONFIG_MODE_TT=y +# CONFIG_MODE_TT is not set +# CONFIG_STATIC_LINK is not set CONFIG_MODE_SKAS=y CONFIG_UML_X86=y # CONFIG_64BIT is not set CONFIG_TOP_ADDR=0xc0000000 # CONFIG_3_LEVEL_PGTABLES is not set +CONFIG_STUB_CODE=0xbfffe000 +CONFIG_STUB_DATA=0xbffff000 +CONFIG_STUB_START=0xbfffe000 CONFIG_ARCH_HAS_SC_SIGNALS=y CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y -CONFIG_LD_SCRIPT_STATIC=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_LD_SCRIPT_DYN=y CONFIG_NET=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m -CONFIG_HOSTFS=y +# CONFIG_HOSTFS is not set CONFIG_MCONSOLE=y # CONFIG_MAGIC_SYSRQ is not set # CONFIG_HOST_2G_2G is not set -# CONFIG_SMP is not set CONFIG_NEST_LEVEL=0 CONFIG_KERNEL_HALF_GIGS=1 # CONFIG_HIGHMEM is not set @@ -63,6 +72,8 @@ CONFIG_IKCONFIG_PROC=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_PRINTK=y +CONFIG_BUG=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y @@ -81,6 +92,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_KMOD=y @@ -115,6 +127,7 @@ CONFIG_UML_SOUND=m CONFIG_SOUND=m CONFIG_HOSTAUDIO=m CONFIG_UML_RANDOM=y +# CONFIG_MMAPPER is not set # # Block devices @@ -176,6 +189,17 @@ CONFIG_INET=y # CONFIG_INET_TUNNEL is not set CONFIG_IP_TCPDIAG=y # CONFIG_IP_TCPDIAG_IPV6 is not set + +# +# TCP congestion control +# +CONFIG_TCP_CONG_BIC=y +CONFIG_TCP_CONG_WESTWOOD=y +CONFIG_TCP_CONG_HTCP=y +# CONFIG_TCP_CONG_HSTCP is not set +# CONFIG_TCP_CONG_HYBLA is not set +# CONFIG_TCP_CONG_VEGAS is not set +# CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_IPV6 is not set # CONFIG_NETFILTER is not set @@ -206,11 +230,15 @@ CONFIG_IP_TCPDIAG=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_KGDBOE is not set # CONFIG_NETPOLL is not set +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set # CONFIG_NET_POLL_CONTROLLER is not set # CONFIG_HAMRADIO is not set # CONFIG_IRDA is not set # CONFIG_BT is not set +# CONFIG_IEEE80211 is not set CONFIG_DUMMY=m # CONFIG_BONDING is not set # CONFIG_EQUALIZER is not set @@ -227,6 +255,7 @@ CONFIG_PPP=m # CONFIG_PPP_SYNC_TTY is not set # CONFIG_PPP_DEFLATE is not set # CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPP_MPPE is not set # CONFIG_PPPOE is not set CONFIG_SLIP=m # CONFIG_SLIP_COMPRESSED is not set @@ -240,10 +269,12 @@ CONFIG_SLIP=m # CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set CONFIG_JBD=y # CONFIG_JBD_DEBUG is not set +# CONFIG_REISER4_FS is not set CONFIG_REISERFS_FS=y # CONFIG_REISERFS_CHECK is not set # CONFIG_REISERFS_PROC_INFO is not set @@ -256,6 +287,7 @@ CONFIG_REISERFS_FS=y # CONFIG_XFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y CONFIG_QUOTA=y # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set @@ -264,6 +296,12 @@ CONFIG_DNOTIFY=y CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m +# +# Caches +# +# CONFIG_FSCACHE is not set +# CONFIG_FUSE_FS is not set + # # CD-ROM/DVD Filesystems # @@ -291,6 +329,8 @@ CONFIG_TMPFS=y # CONFIG_TMPFS_XATTR is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y +# CONFIG_CONFIGFS_FS is not set +# CONFIG_RELAYFS_FS is not set # # Miscellaneous filesystems @@ -319,6 +359,7 @@ CONFIG_RAMFS=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set +# CONFIG_9P_FS is not set # # Partition Types @@ -404,14 +445,15 @@ CONFIG_CRC32=m # CONFIG_PRINTK_TIME is not set CONFIG_DEBUG_KERNEL=y CONFIG_LOG_BUF_SHIFT=14 +CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set -# CONFIG_DEBUG_SLAB is not set +CONFIG_DEBUG_SLAB=y # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_SPINLOCK_SLEEP is not set # CONFIG_DEBUG_KOBJECT is not set CONFIG_DEBUG_INFO=y # CONFIG_DEBUG_FS is not set CONFIG_FRAME_POINTER=y -CONFIG_PT_PROXY=y +# CONFIG_GPROF is not set # CONFIG_GCOV is not set # CONFIG_SYSCALL_DEBUG is not set diff --git a/arch/um/include/mem.h b/arch/um/include/mem.h index 10c46c38949..99d3ad4a03e 100644 --- a/arch/um/include/mem.h +++ b/arch/um/include/mem.h @@ -13,6 +13,7 @@ extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); extern int is_remapped(void *virt); extern int physmem_remove_mapping(void *virt); extern void physmem_forget_descriptor(int fd); +extern unsigned long to_phys(void *virt); #endif diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h index 8744abb5224..0a35e6d0baa 100644 --- a/arch/um/include/registers.h +++ b/arch/um/include/registers.h @@ -14,6 +14,7 @@ extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern void save_registers(int pid, union uml_pt_regs *regs); extern void restore_registers(int pid, union uml_pt_regs *regs); extern void init_registers(int pid); +extern void get_safe_registers(unsigned long * regs); #endif diff --git a/arch/um/include/sysdep-i386/ptrace_user.h b/arch/um/include/sysdep-i386/ptrace_user.h index eca8066e7a4..899aa4b2a78 100644 --- a/arch/um/include/sysdep-i386/ptrace_user.h +++ b/arch/um/include/sysdep-i386/ptrace_user.h @@ -20,11 +20,24 @@ #define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX) #define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI) #define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI) +#define PT_SYSCALL_ARG6_OFFSET PT_OFFSET(EBP) #define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX) +#define REGS_SYSCALL_NR EAX /* This is used before a system call */ +#define REGS_SYSCALL_ARG1 EBX +#define REGS_SYSCALL_ARG2 ECX +#define REGS_SYSCALL_ARG3 EDX +#define REGS_SYSCALL_ARG4 ESI +#define REGS_SYSCALL_ARG5 EDI +#define REGS_SYSCALL_ARG6 EBP + +#define REGS_IP_INDEX EIP +#define REGS_SP_INDEX UESP + #define PT_IP_OFFSET PT_OFFSET(EIP) #define PT_IP(regs) ((regs)[EIP]) +#define PT_SP_OFFSET PT_OFFSET(UESP) #define PT_SP(regs) ((regs)[UESP]) #ifndef FRAME_SIZE diff --git a/arch/um/include/sysdep-i386/stub.h b/arch/um/include/sysdep-i386/stub.h new file mode 100644 index 00000000000..fed9ff1cea5 --- /dev/null +++ b/arch/um/include/sysdep-i386/stub.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include +#include + +extern void stub_segv_handler(int sig); + +#define STUB_SYSCALL_RET EAX +#define STUB_MMAP_NR __NR_mmap2 +#define MMAP_OFFSET(o) ((o) >> PAGE_SHIFT) + +#endif diff --git a/arch/um/include/sysdep-x86_64/ptrace_user.h b/arch/um/include/sysdep-x86_64/ptrace_user.h index 31729973fb1..128faf02736 100644 --- a/arch/um/include/sysdep-x86_64/ptrace_user.h +++ b/arch/um/include/sysdep-x86_64/ptrace_user.h @@ -55,6 +55,20 @@ #define PTRACE_OLDSETOPTIONS 21 #endif +/* These are before the system call, so the the system call number is RAX + * rather than ORIG_RAX, and arg4 is R10 rather than RCX + */ +#define REGS_SYSCALL_NR PT_INDEX(RAX) +#define REGS_SYSCALL_ARG1 PT_INDEX(RDI) +#define REGS_SYSCALL_ARG2 PT_INDEX(RSI) +#define REGS_SYSCALL_ARG3 PT_INDEX(RDX) +#define REGS_SYSCALL_ARG4 PT_INDEX(R10) +#define REGS_SYSCALL_ARG5 PT_INDEX(R8) +#define REGS_SYSCALL_ARG6 PT_INDEX(R9) + +#define REGS_IP_INDEX PT_INDEX(RIP) +#define REGS_SP_INDEX PT_INDEX(RSP) + #endif /* diff --git a/arch/um/include/sysdep-x86_64/stub.h b/arch/um/include/sysdep-x86_64/stub.h new file mode 100644 index 00000000000..6b5447ad590 --- /dev/null +++ b/arch/um/include/sysdep-x86_64/stub.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#ifndef __SYSDEP_STUB_H +#define __SYSDEP_STUB_H + +#include +#include +#include + +extern void stub_segv_handler(int sig); + +#define STUB_SYSCALL_RET PT_INDEX(RAX) +#define STUB_MMAP_NR __NR_mmap +#define MMAP_OFFSET(o) (o) + +#endif diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h index da1097285b8..c6f9628f39b 100644 --- a/arch/um/include/tlb.h +++ b/arch/um/include/tlb.h @@ -37,31 +37,25 @@ struct host_vm_op { extern void mprotect_kernel_vm(int w); extern void force_flush_all(void); extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + unsigned long end_addr, int force, + void (*do_ops)(union mm_context *, + struct host_vm_op *, int)); extern int flush_tlb_kernel_range_common(unsigned long start, unsigned long end); extern int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); extern int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, int index, int last_filled, - int data, void (*do_ops)(int, struct host_vm_op *, int)); + union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); extern int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)); + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)); #endif - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S index 715b0838a68..3942a5f245d 100644 --- a/arch/um/kernel/dyn.lds.S +++ b/arch/um/kernel/dyn.lds.S @@ -67,6 +67,12 @@ SECTIONS *(.stub .text.* .gnu.linkonce.t.*) /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } =0x90909090 .fini : { KEEP (*(.fini)) diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c index 420e6d51fa0..a24e3b7f4bf 100644 --- a/arch/um/kernel/physmem.c +++ b/arch/um/kernel/physmem.c @@ -353,6 +353,8 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len, #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +extern int __syscall_stub_start, __binary_start; + void setup_physmem(unsigned long start, unsigned long reserve_end, unsigned long len, unsigned long highmem) { @@ -371,6 +373,12 @@ void setup_physmem(unsigned long start, unsigned long reserve_end, exit(1); } + /* Special kludge - This page will be mapped in to userspace processes + * from physmem_fd, so it needs to be written out there. + */ + os_seek_file(physmem_fd, __pa(&__syscall_stub_start)); + os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE); + bootmap_size = init_bootmem(pfn, pfn + delta); free_bootmem(__pa(reserve_end) + bootmap_size, len - bootmap_size - reserve); diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 1b5ef3e96c7..c45a60e9c92 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -32,6 +32,7 @@ #include "uml-config.h" #include "choose-mode.h" #include "mode.h" +#include "tempfile.h" #ifdef UML_CONFIG_MODE_SKAS #include "skas.h" #include "skas_ptrace.h" @@ -358,11 +359,16 @@ void forward_pending_sigio(int target) kill(target, SIGIO); } +int ptrace_faultinfo = 0; +int proc_mm = 1; + +extern void *__syscall_stub_start, __syscall_stub_end; + #ifdef UML_CONFIG_MODE_SKAS -static inline int check_skas3_ptrace_support(void) +static inline void check_skas3_ptrace_support(void) { struct ptrace_faultinfo fi; - int pid, n, ret = 1; + int pid, n; printf("Checking for the skas3 patch in the host..."); pid = start_ptraced_child(); @@ -374,33 +380,31 @@ static inline int check_skas3_ptrace_support(void) else { perror("not found"); } - ret = 0; - } else { + } + else { + ptrace_faultinfo = 1; printf("found\n"); } init_registers(pid); stop_ptraced_child(pid, 1, 1); - - return(ret); } int can_do_skas(void) { - int ret = 1; - printf("Checking for /proc/mm..."); if (os_access("/proc/mm", OS_ACC_W_OK) < 0) { + proc_mm = 0; printf("not found\n"); - ret = 0; goto out; - } else { + } + else { printf("found\n"); } - ret = check_skas3_ptrace_support(); out: - return ret; + check_skas3_ptrace_support(); + return 1; } #else int can_do_skas(void) diff --git a/arch/um/kernel/skas/exec_kern.c b/arch/um/kernel/skas/exec_kern.c index c6b4d5dba78..77ed7bbab21 100644 --- a/arch/um/kernel/skas/exec_kern.c +++ b/arch/um/kernel/skas/exec_kern.c @@ -18,7 +18,7 @@ void flush_thread_skas(void) { force_flush_all(); - switch_mm_skas(current->mm->context.skas.mm_fd); + switch_mm_skas(¤t->mm->context.skas.id); } void start_thread_skas(struct pt_regs *regs, unsigned long eip, diff --git a/arch/um/kernel/skas/include/mm_id.h b/arch/um/kernel/skas/include/mm_id.h new file mode 100644 index 00000000000..48dd0989dda --- /dev/null +++ b/arch/um/kernel/skas/include/mm_id.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __MM_ID_H +#define __MM_ID_H + +struct mm_id { + union { + int mm_fd; + int pid; + } u; + unsigned long stack; +}; + +#endif diff --git a/arch/um/kernel/skas/include/mmu-skas.h b/arch/um/kernel/skas/include/mmu-skas.h index 4cd60d7213f..278b72f1d9a 100644 --- a/arch/um/kernel/skas/include/mmu-skas.h +++ b/arch/um/kernel/skas/include/mmu-skas.h @@ -6,10 +6,15 @@ #ifndef __SKAS_MMU_H #define __SKAS_MMU_H +#include "mm_id.h" + struct mmu_context_skas { - int mm_fd; + struct mm_id id; + unsigned long last_page_table; }; +extern void switch_mm_skas(struct mm_id * mm_idp); + #endif /* diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index 96b51dba347..d91a60f3830 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -6,9 +6,11 @@ #ifndef __SKAS_H #define __SKAS_H +#include "mm_id.h" #include "sysdep/ptrace.h" extern int userspace_pid[]; +extern int proc_mm, ptrace_faultinfo; extern void switch_threads(void *me, void *next); extern void thread_wait(void *sw, void *fb); @@ -22,16 +24,17 @@ extern void new_thread_proc(void *stack, void (*handler)(int sig)); extern void remove_sigstack(void); extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); -extern void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset); -extern int unmap(int fd, void *addr, unsigned long len); -extern int protect(int fd, unsigned long addr, unsigned long len, - int r, int w, int x); +extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset); +extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len); +extern int protect(struct mm_id * mm_idp, unsigned long addr, + unsigned long len, int r, int w, int x); extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); -extern void start_userspace(int cpu); +extern int start_userspace(unsigned long stub_stack); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); +extern unsigned long current_stub_stack(void); #endif diff --git a/arch/um/kernel/skas/mem.c b/arch/um/kernel/skas/mem.c index 438db2f4345..147466d7ff4 100644 --- a/arch/um/kernel/skas/mem.c +++ b/arch/um/kernel/skas/mem.c @@ -5,7 +5,9 @@ #include "linux/config.h" #include "linux/mm.h" +#include "asm/pgtable.h" #include "mem_user.h" +#include "skas.h" unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, unsigned long *task_size_out) @@ -18,7 +20,9 @@ unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, *task_size_out = CONFIG_HOST_TASK_SIZE; #else *host_size_out = top; - *task_size_out = top; + if (proc_mm && ptrace_faultinfo) + *task_size_out = top; + else *task_size_out = CONFIG_STUB_START & PGDIR_MASK; #endif return(((unsigned long) set_task_sizes_skas) & ~0xffffff); } diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c index 1310bf1e88d..b0980ff3bd9 100644 --- a/arch/um/kernel/skas/mem_user.c +++ b/arch/um/kernel/skas/mem_user.c @@ -3,100 +3,171 @@ * Licensed under the GPL */ +#include #include #include +#include +#include +#include #include "mem_user.h" #include "mem.h" +#include "mm_id.h" #include "user.h" #include "os.h" #include "proc_mm.h" - -void map(int fd, unsigned long virt, unsigned long len, int r, int w, - int x, int phys_fd, unsigned long long offset) +#include "ptrace_user.h" +#include "user_util.h" +#include "kern_util.h" +#include "task.h" +#include "registers.h" +#include "uml-config.h" +#include "sysdep/ptrace.h" +#include "sysdep/stub.h" +#include "skas.h" + +extern unsigned long syscall_stub, __syscall_stub_start; + +extern void wait_stub_done(int pid, int sig, char * fname); + +static long run_syscall_stub(struct mm_id * mm_idp, int syscall, + unsigned long *args) { - struct proc_mm_op map; - int prot, n; - - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); - - map = ((struct proc_mm_op) { .op = MM_MMAP, - .u = - { .mmap = - { .addr = virt, - .len = len, - .prot = prot, - .flags = MAP_SHARED | - MAP_FIXED, - .fd = phys_fd, - .offset = offset - } } } ); - n = os_write_file(fd, &map, sizeof(map)); - if(n != sizeof(map)) - printk("map : /proc/mm map failed, err = %d\n", -n); + int n, pid = mm_idp->u.pid; + unsigned long regs[MAX_REG_NR]; + + get_safe_registers(regs); + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + ((unsigned long) &syscall_stub - + (unsigned long) &__syscall_stub_start); + /* XXX Don't have a define for starting a syscall */ + regs[REGS_SYSCALL_NR] = syscall; + regs[REGS_SYSCALL_ARG1] = args[0]; + regs[REGS_SYSCALL_ARG2] = args[1]; + regs[REGS_SYSCALL_ARG3] = args[2]; + regs[REGS_SYSCALL_ARG4] = args[3]; + regs[REGS_SYSCALL_ARG5] = args[4]; + regs[REGS_SYSCALL_ARG6] = args[5]; + n = ptrace_setregs(pid, regs); + if(n < 0){ + printk("run_syscall_stub : PTRACE_SETREGS failed, " + "errno = %d\n", n); + return(n); + } + + wait_stub_done(pid, 0, "run_syscall_stub"); + + return(*((unsigned long *) mm_idp->stack)); } -int unmap(int fd, void *addr, unsigned long len) +int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, + int r, int w, int x, int phys_fd, unsigned long long offset) { - struct proc_mm_op unmap; - int n; - - unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, - .u = - { .munmap = - { .addr = (unsigned long) addr, - .len = len } } } ); - n = os_write_file(fd, &unmap, sizeof(unmap)); - if(n != sizeof(unmap)) { - if(n < 0) - return(n); - else if(n > 0) - return(-EIO); - } - - return(0); + int prot, n; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + if(proc_mm){ + struct proc_mm_op map; + int fd = mm_idp->u.mm_fd; + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = + { .mmap = + { .addr = virt, + .len = len, + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, + .fd = phys_fd, + .offset= offset + } } } ); + n = os_write_file(fd, &map, sizeof(map)); + if(n != sizeof(map)) + printk("map : /proc/mm map failed, err = %d\n", -n); + } + else { + long res; + unsigned long args[] = { virt, len, prot, + MAP_SHARED | MAP_FIXED, phys_fd, + MMAP_OFFSET(offset) }; + + res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args); + if((void *) res == MAP_FAILED) + printk("mmap stub failed, errno = %d\n", res); + } + + return 0; } -int protect(int fd, unsigned long addr, unsigned long len, int r, int w, - int x, int must_succeed) +int unmap(struct mm_id *mm_idp, void *addr, unsigned long len) { - struct proc_mm_op protect; - int prot, n; - - prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | - (x ? PROT_EXEC : 0); - - protect = ((struct proc_mm_op) { .op = MM_MPROTECT, - .u = - { .mprotect = - { .addr = (unsigned long) addr, - .len = len, - .prot = prot } } } ); - - n = os_write_file(fd, &protect, sizeof(protect)); - if(n != sizeof(protect)) { - if(n == 0) return(0); - - if(must_succeed) - panic("protect failed, err = %d", -n); - - return(-EIO); - } + int n; + + if(proc_mm){ + struct proc_mm_op unmap; + int fd = mm_idp->u.mm_fd; + unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, + .u = + { .munmap = + { .addr = + (unsigned long) addr, + .len = len } } } ); + n = os_write_file(fd, &unmap, sizeof(unmap)); + if(n != sizeof(unmap)) { + if(n < 0) + return(n); + else if(n > 0) + return(-EIO); + } + } + else { + int res; + unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0, + 0 }; + + res = run_syscall_stub(mm_idp, __NR_munmap, args); + if(res < 0) + printk("munmap stub failed, errno = %d\n", res); + } + + return(0); +} - return(0); +int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len, + int r, int w, int x) +{ + struct proc_mm_op protect; + int prot, n; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + + if(proc_mm){ + int fd = mm_idp->u.mm_fd; + protect = ((struct proc_mm_op) { .op = MM_MPROTECT, + .u = + { .mprotect = + { .addr = + (unsigned long) addr, + .len = len, + .prot = prot } } } ); + + n = os_write_file(fd, &protect, sizeof(protect)); + if(n != sizeof(protect)) + panic("protect failed, err = %d", -n); + } + else { + int res; + unsigned long args[] = { addr, len, prot, 0, 0, 0 }; + + res = run_syscall_stub(mm_idp, __NR_mprotect, args); + if(res < 0) + panic("mprotect stub failed, errno = %d\n", res); + } + + return(0); } void before_mem_skas(unsigned long unused) { } - -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 6cb9a6d028a..511a855c9ec 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -3,46 +3,138 @@ * Licensed under the GPL */ +#include "linux/config.h" #include "linux/sched.h" #include "linux/list.h" #include "linux/spinlock.h" #include "linux/slab.h" +#include "linux/errno.h" +#include "linux/mm.h" #include "asm/current.h" #include "asm/segment.h" #include "asm/mmu.h" +#include "asm/pgalloc.h" +#include "asm/pgtable.h" #include "os.h" #include "skas.h" +extern int __syscall_stub_start; + +static int init_stub_pte(struct mm_struct *mm, unsigned long proc, + unsigned long kernel) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + spin_lock(&mm->page_table_lock); + pgd = pgd_offset(mm, proc); + pud = pud_alloc(mm, pgd, proc); + if (!pud) + goto out; + + pmd = pmd_alloc(mm, pud, proc); + if (!pmd) + goto out_pmd; + + pte = pte_alloc_map(mm, pmd, proc); + if (!pte) + goto out_pte; + + /* There's an interaction between the skas0 stub pages, stack + * randomization, and the BUG at the end of exit_mmap. exit_mmap + * checks that the number of page tables freed is the same as had + * been allocated. If the stack is on the last page table page, + * then the stack pte page will be freed, and if not, it won't. To + * avoid having to know where the stack is, or if the process mapped + * something at the top of its address space for some other reason, + * we set TASK_SIZE to end at the start of the last page table. + * This keeps exit_mmap off the last page, but introduces a leak + * of that page. So, we hang onto it here and free it in + * destroy_context_skas. + */ + + mm->context.skas.last_page_table = pmd_page_kernel(*pmd); + + *pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT)); + *pte = pte_mkexec(*pte); + *pte = pte_wrprotect(*pte); + spin_unlock(&mm->page_table_lock); + return(0); + + out_pmd: + pud_free(pud); + out_pte: + pmd_free(pmd); + out: + spin_unlock(&mm->page_table_lock); + return(-ENOMEM); +} + int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) { - int from; + struct mm_struct *cur_mm = current->mm; + struct mm_id *mm_id = &mm->context.skas.id; + unsigned long stack; + int from, ret; - if((current->mm != NULL) && (current->mm != &init_mm)) - from = current->mm->context.skas.mm_fd; - else from = -1; + if(proc_mm){ + if((cur_mm != NULL) && (cur_mm != &init_mm)) + from = cur_mm->context.skas.id.u.mm_fd; + else from = -1; - mm->context.skas.mm_fd = new_mm(from); - if(mm->context.skas.mm_fd < 0){ - printk("init_new_context_skas - new_mm failed, errno = %d\n", - mm->context.skas.mm_fd); - return(mm->context.skas.mm_fd); + ret = new_mm(from); + if(ret < 0){ + printk("init_new_context_skas - new_mm failed, " + "errno = %d\n", ret); + return ret; + } + mm_id->u.mm_fd = ret; } + else { + /* This zeros the entry that pgd_alloc didn't, needed since + * we are about to reinitialize it, and want mm.nr_ptes to + * be accurate. + */ + mm->pgd[USER_PTRS_PER_PGD] = __pgd(0); - return(0); + ret = init_stub_pte(mm, CONFIG_STUB_CODE, + (unsigned long) &__syscall_stub_start); + if(ret) + goto out; + + ret = -ENOMEM; + stack = get_zeroed_page(GFP_KERNEL); + if(stack == 0) + goto out; + mm_id->stack = stack; + + ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack); + if(ret) + goto out_free; + + mm->nr_ptes--; + mm_id->u.pid = start_userspace(stack); + } + + return 0; + + out_free: + free_page(mm_id->stack); + out: + return ret; } void destroy_context_skas(struct mm_struct *mm) { - os_close_file(mm->context.skas.mm_fd); -} + struct mmu_context_skas *mmu = &mm->context.skas; -/* - * Overrides for Emacs so that we follow Linus's tabbing style. - * Emacs will notice this stuff at the end of the file and automatically - * adjust the settings for this buffer only. This must remain at the end - * of the file. - * --------------------------------------------------------------------------- - * Local variables: - * c-file-style: "linux" - * End: - */ + if(proc_mm) + os_close_file(mmu->id.u.mm_fd); + else { + os_kill_ptraced_process(mmu->id.u.pid, 1); + free_page(mmu->id.stack); + free_page(mmu->last_page_table); + } +} diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 773cd2b525f..1647abb0d1a 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com) * Licensed under the GPL */ @@ -14,6 +14,7 @@ #include #include #include +#include #include "user.h" #include "ptrace_user.h" #include "time_user.h" @@ -21,13 +22,17 @@ #include "user_util.h" #include "kern_util.h" #include "skas.h" +#include "mm_id.h" #include "sysdep/sigcontext.h" +#include "sysdep/stub.h" #include "os.h" #include "proc_mm.h" #include "skas_ptrace.h" #include "chan_user.h" #include "signal_user.h" #include "registers.h" +#include "mem.h" +#include "uml-config.h" #include "process.h" int is_skas_winch(int pid, int fd, void *data) @@ -39,20 +44,55 @@ int is_skas_winch(int pid, int fd, void *data) return(1); } -void get_skas_faultinfo(int pid, struct faultinfo * fi) +void wait_stub_done(int pid, int sig, char * fname) { - int err; - - err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); - if(err) - panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " - "errno = %d\n", errno); + int n, status, err; + + do { + if ( sig != -1 ) { + err = ptrace(PTRACE_CONT, pid, 0, sig); + if(err) + panic("%s : continue failed, errno = %d\n", + fname, errno); + } + sig = 0; + + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED)); + } while((n >= 0) && WIFSTOPPED(status) && + (WSTOPSIG(status) == SIGVTALRM)); + + if((n < 0) || !WIFSTOPPED(status) || + (WSTOPSIG(status) != SIGUSR1 && WSTOPSIG(status != SIGTRAP))){ + panic("%s : failed to wait for SIGUSR1/SIGTRAP, " + "pid = %d, n = %d, errno = %d, status = 0x%x\n", + fname, pid, n, errno, status); + } +} - /* Special handling for i386, which has different structs */ - if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) - memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, - sizeof(struct faultinfo) - - sizeof(struct ptrace_faultinfo)); +void get_skas_faultinfo(int pid, struct faultinfo * fi) +{ + int err; + + if(ptrace_faultinfo){ + err = ptrace(PTRACE_FAULTINFO, pid, 0, fi); + if(err) + panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, " + "errno = %d\n", errno); + + /* Special handling for i386, which has different structs */ + if (sizeof(struct ptrace_faultinfo) < sizeof(struct faultinfo)) + memset((char *)fi + sizeof(struct ptrace_faultinfo), 0, + sizeof(struct faultinfo) - + sizeof(struct ptrace_faultinfo)); + } + else { + wait_stub_done(pid, SIGSEGV, "get_skas_faultinfo"); + + /* faultinfo is prepared by the stub-segv-handler at start of + * the stub stack page. We just have to copy it. + */ + memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); + } } static void handle_segv(int pid, union uml_pt_regs * regs) @@ -91,11 +131,56 @@ static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu handle_syscall(regs); } -static int userspace_tramp(void *arg) +extern int __syscall_stub_start; + +static int userspace_tramp(void *stack) { - init_new_thread_signals(0); - enable_timer(); + void *addr; + ptrace(PTRACE_TRACEME, 0, 0, 0); + + init_new_thread_signals(1); + enable_timer(); + + if(!proc_mm){ + /* This has a pte, but it can't be mapped in with the usual + * tlb_flush mechanism because this is part of that mechanism + */ + int fd; + __u64 offset; + + fd = phys_mapping(to_phys(&__syscall_stub_start), &offset); + addr = mmap64((void *) UML_CONFIG_STUB_CODE, page_size(), + PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping mmap stub failed, errno = %d\n", + errno); + exit(1); + } + + if(stack != NULL){ + fd = phys_mapping(to_phys(stack), &offset); + addr = mmap((void *) UML_CONFIG_STUB_DATA, page_size(), + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fd, offset); + if(addr == MAP_FAILED){ + printk("mapping segfault stack failed, " + "errno = %d\n", errno); + exit(1); + } + } + } + if(!ptrace_faultinfo && (stack != NULL)){ + unsigned long v = UML_CONFIG_STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) &__syscall_stub_start; + + set_sigstack((void *) UML_CONFIG_STUB_DATA, page_size()); + set_handler(SIGSEGV, (void *) v, SA_ONSTACK, + SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, + SIGUSR1, -1); + } + os_stop_process(os_getpid()); return(0); } @@ -105,11 +190,11 @@ static int userspace_tramp(void *arg) #define NR_CPUS 1 int userspace_pid[NR_CPUS]; -void start_userspace(int cpu) +int start_userspace(unsigned long stub_stack) { void *stack; unsigned long sp; - int pid, status, n; + int pid, status, n, flags; stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); @@ -117,8 +202,9 @@ void start_userspace(int cpu) panic("start_userspace : mmap failed, errno = %d", errno); sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); - pid = clone(userspace_tramp, (void *) sp, - CLONE_FILES | CLONE_VM | SIGCHLD, NULL); + flags = CLONE_FILES | SIGCHLD; + if(proc_mm) flags |= CLONE_VM; + pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack); if(pid < 0) panic("start_userspace : clone failed, errno = %d", errno); @@ -140,7 +226,7 @@ void start_userspace(int cpu) if(munmap(stack, PAGE_SIZE) < 0) panic("start_userspace : munmap failed, errno = %d\n", errno); - userspace_pid[cpu] = pid; + return(pid); } void userspace(union uml_pt_regs *regs) @@ -174,7 +260,9 @@ void userspace(union uml_pt_regs *regs) if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: - handle_segv(pid, regs); + if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo) + user_signal(SIGSEGV, regs, pid); + else handle_segv(pid, regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs, local_using_sysemu); @@ -194,6 +282,7 @@ void userspace(union uml_pt_regs *regs) printk("userspace - child stopped with signal " "%d\n", WSTOPSIG(status)); } + pid = userspace_pid[0]; interrupt_end(); /* Avoid -ERESTARTSYS handling in host */ @@ -334,21 +423,19 @@ void reboot_skas(void) siglongjmp(initial_jmpbuf, INIT_JMP_REBOOT); } -void switch_mm_skas(int mm_fd) +void switch_mm_skas(struct mm_id *mm_idp) { int err; #warning need cpu pid in switch_mm_skas - err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); - if(err) - panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", - errno); -} - -void kill_off_processes_skas(void) -{ -#warning need to loop over userspace_pids in kill_off_processes_skas - os_kill_ptraced_process(userspace_pid[0], 1); + if(proc_mm){ + err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, + mm_idp->u.mm_fd); + if(err) + panic("switch_mm_skas - PTRACE_SWITCH_MM failed, " + "errno = %d\n", errno); + } + else userspace_pid[0] = mm_idp->u.pid; } /* diff --git a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c index 0a7b8aa55db..cbabab104ac 100644 --- a/arch/um/kernel/skas/process_kern.c +++ b/arch/um/kernel/skas/process_kern.c @@ -175,9 +175,12 @@ static int start_kernel_proc(void *unused) return(0); } +extern int userspace_pid[]; + int start_uml_skas(void) { - start_userspace(0); + if(proc_mm) + userspace_pid[0] = start_userspace(0); init_new_thread_signals(1); @@ -199,3 +202,31 @@ int thread_pid_skas(struct task_struct *task) #warning Need to look up userspace_pid by cpu return(userspace_pid[0]); } + +void kill_off_processes_skas(void) +{ + if(proc_mm) +#warning need to loop over userspace_pids in kill_off_processes_skas + os_kill_ptraced_process(userspace_pid[0], 1); + else { + struct task_struct *p; + int pid, me; + + me = os_getpid(); + for_each_process(p){ + if(p->mm == NULL) + continue; + + pid = p->mm->context.skas.id.u.pid; + os_kill_ptraced_process(pid, 1); + } + } +} + +unsigned long current_stub_stack(void) +{ + if(current->mm == NULL) + return(0); + + return(current->mm->context.skas.id.stack); +} diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c index 18f9a7711de..6230999c672 100644 --- a/arch/um/kernel/skas/tlb.c +++ b/arch/um/kernel/skas/tlb.c @@ -6,6 +6,7 @@ #include "linux/stddef.h" #include "linux/sched.h" +#include "linux/config.h" #include "linux/mm.h" #include "asm/page.h" #include "asm/pgtable.h" @@ -17,7 +18,7 @@ #include "os.h" #include "tlb.h" -static void do_ops(int fd, struct host_vm_op *ops, int last) +static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) { struct host_vm_op *op; int i; @@ -26,18 +27,18 @@ static void do_ops(int fd, struct host_vm_op *ops, int last) op = &ops[i]; switch(op->type){ case MMAP: - map(fd, op->u.mmap.addr, op->u.mmap.len, + map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len, op->u.mmap.r, op->u.mmap.w, op->u.mmap.x, op->u.mmap.fd, op->u.mmap.offset); break; case MUNMAP: - unmap(fd, (void *) op->u.munmap.addr, + unmap(&mmu->skas.id, (void *) op->u.munmap.addr, op->u.munmap.len); break; case MPROTECT: - protect(fd, op->u.mprotect.addr, op->u.mprotect.len, - op->u.mprotect.r, op->u.mprotect.w, - op->u.mprotect.x); + protect(&mmu->skas.id, op->u.mprotect.addr, + op->u.mprotect.len, op->u.mprotect.r, + op->u.mprotect.w, op->u.mprotect.x); break; default: printk("Unknown op type %d in do_ops\n", op->type); @@ -46,12 +47,15 @@ static void do_ops(int fd, struct host_vm_op *ops, int last) } } +extern int proc_mm; + static void fix_range(struct mm_struct *mm, unsigned long start_addr, unsigned long end_addr, int force) { - int fd = mm->context.skas.mm_fd; + if(!proc_mm && (end_addr > CONFIG_STUB_START)) + end_addr = CONFIG_STUB_START; - fix_range_common(mm, start_addr, end_addr, force, fd, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } void __flush_tlb_one_skas(unsigned long addr) @@ -69,16 +73,20 @@ void flush_tlb_range_skas(struct vm_area_struct *vma, unsigned long start, void flush_tlb_mm_skas(struct mm_struct *mm) { + unsigned long end; + /* Don't bother flushing if this address space is about to be * destroyed. */ if(atomic_read(&mm->mm_users) == 0) return; - fix_range(mm, 0, host_task_size, 0); + end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(mm, 0, end, 0); } void force_flush_all_skas(void) { - fix_range(current->mm, 0, host_task_size, 1); + unsigned long end = proc_mm ? task_size : CONFIG_STUB_START; + fix_range(current->mm, 0, end, 1); } diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index eda477edfdf..83ec8d4747f 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -18,13 +18,15 @@ #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1)) void fix_range_common(struct mm_struct *mm, unsigned long start_addr, - unsigned long end_addr, int force, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + unsigned long end_addr, int force, + void (*do_ops)(union mm_context *, struct host_vm_op *, + int)) { pgd_t *npgd; pud_t *npud; pmd_t *npmd; pte_t *npte; + union mm_context *mmu = &mm->context; unsigned long addr, end; int r, w, x; struct host_vm_op ops[16]; @@ -40,7 +42,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pgd_newpage(*npgd)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pgd_mkuptodate(*npgd); } @@ -55,7 +57,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pud_newpage(*npud)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pud_mkuptodate(*npud); } @@ -70,7 +72,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, end = end_addr; if(force || pmd_newpage(*npmd)){ op_index = add_munmap(addr, end - addr, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); pmd_mkuptodate(*npmd); } @@ -93,21 +95,21 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, op_index = add_mmap(addr, pte_val(*npte) & PAGE_MASK, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); else op_index = add_munmap(addr, PAGE_SIZE, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); } else if(pte_newprot(*npte)) op_index = add_mprotect(addr, PAGE_SIZE, r, w, x, ops, - op_index, last_op, data, + op_index, last_op, mmu, do_ops); *npte = pte_mkuptodate(*npte); addr += PAGE_SIZE; } - (*do_ops)(data, ops, op_index); + (*do_ops)(mmu, ops, op_index); } int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) @@ -195,51 +197,6 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end) return(updated); } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -{ - address &= PAGE_MASK; - flush_tlb_range(vma, address, address + PAGE_SIZE); -} - -void flush_tlb_all(void) -{ - flush_tlb_mm(current->mm); -} - -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, - flush_tlb_kernel_range_common, start, end); -} - -void flush_tlb_kernel_vm(void) -{ - CHOOSE_MODE(flush_tlb_kernel_vm_tt(), - flush_tlb_kernel_range_common(start_vm, end_vm)); -} - -void __flush_tlb_one(unsigned long addr) -{ - CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); -} - -void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) -{ - CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, - end); -} - -void flush_tlb_mm(struct mm_struct *mm) -{ - CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); -} - -void force_flush_all(void) -{ - CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); -} - pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) { return(pgd_offset(mm, address)); @@ -270,9 +227,9 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr) } int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, - int r, int w, int x, struct host_vm_op *ops, int index, - int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + int r, int w, int x, struct host_vm_op *ops, int index, + int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { __u64 offset; struct host_vm_op *last; @@ -292,7 +249,7 @@ int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -310,8 +267,8 @@ int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, } int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, - int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + int index, int last_filled, union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { struct host_vm_op *last; @@ -325,7 +282,7 @@ int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -337,8 +294,9 @@ int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *ops, } int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, - struct host_vm_op *ops, int index, int last_filled, int data, - void (*do_ops)(int, struct host_vm_op *, int)) + struct host_vm_op *ops, int index, int last_filled, + union mm_context *mmu, + void (*do_ops)(union mm_context *, struct host_vm_op *, int)) { struct host_vm_op *last; @@ -354,7 +312,7 @@ int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, } if(index == last_filled){ - (*do_ops)(data, ops, last_filled); + (*do_ops)(mmu, ops, last_filled); index = -1; } @@ -367,3 +325,49 @@ int add_mprotect(unsigned long addr, unsigned long len, int r, int w, int x, .x = x } } }); return(index); } + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) +{ + address &= PAGE_MASK; + flush_tlb_range(vma, address, address + PAGE_SIZE); +} + +void flush_tlb_all(void) +{ + flush_tlb_mm(current->mm); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt, + flush_tlb_kernel_range_common, start, end); +} + +void flush_tlb_kernel_vm(void) +{ + CHOOSE_MODE(flush_tlb_kernel_vm_tt(), + flush_tlb_kernel_range_common(start_vm, end_vm)); +} + +void __flush_tlb_one(unsigned long addr) +{ + CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start, + end); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); +} + +void force_flush_all(void) +{ + CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); +} + diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c index 203216ad86f..2eefb43bc9c 100644 --- a/arch/um/kernel/tt/tlb.c +++ b/arch/um/kernel/tt/tlb.c @@ -17,7 +17,7 @@ #include "os.h" #include "tlb.h" -static void do_ops(int unused, struct host_vm_op *ops, int last) +static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last) { struct host_vm_op *op; int i; @@ -55,7 +55,7 @@ static void fix_range(struct mm_struct *mm, unsigned long start_addr, panic("fix_range fixing wrong address space, current = 0x%p", current); - fix_range_common(mm, start_addr, end_addr, force, 0, do_ops); + fix_range_common(mm, start_addr, end_addr, force, do_ops); } atomic_t vmchange_seq = ATOMIC_INIT(1); diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S index 61dfd4fef75..163476a8cb1 100644 --- a/arch/um/kernel/uml.lds.S +++ b/arch/um/kernel/uml.lds.S @@ -30,6 +30,7 @@ SECTIONS _einittext = .; } . = ALIGN(4096); + .text : { *(.text) @@ -39,6 +40,12 @@ SECTIONS /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) *(.gnu.linkonce.t*) + + . = ALIGN(4096); + __syscall_stub_start = .; + *(.__syscall_stub*) + __syscall_stub_end = .; + . = ALIGN(4096); } #include "asm/common.lds.S" diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c index 9a0ad094d92..3125d320722 100644 --- a/arch/um/os-Linux/sys-i386/registers.c +++ b/arch/um/os-Linux/sys-i386/registers.c @@ -121,6 +121,11 @@ void init_registers(int pid) err); } +void get_safe_registers(unsigned long *regs) +{ + memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long)); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c index 6286c974bbe..44438d15c3d 100644 --- a/arch/um/os-Linux/sys-x86_64/registers.c +++ b/arch/um/os-Linux/sys-x86_64/registers.c @@ -69,6 +69,11 @@ void init_registers(int pid) err); } +void get_safe_registers(unsigned long *regs) +{ + memcpy(regs, exec_regs, HOST_FRAME_SIZE * sizeof(unsigned long)); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index 7459d09c233..17f305b6bad 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -16,6 +16,11 @@ define unprofile endef +# The stubs and unmap.o can't try to call mcount or update basic block data +define unprofile + $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1))) +endef + quiet_cmd_make_link = SYMLINK $@ cmd_make_link = ln -sf $(srctree)/arch/$(SUBARCH)/$($(notdir $@)-dir)/$(notdir $@) $@ diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 095bcdb0b9c..77c3c4d29f5 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -1,6 +1,6 @@ obj-y = bitops.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ - ptrace_user.o semaphore.o signal.o sigcontext.o syscalls.o sysrq.o \ - sys_call_table.o + ptrace_user.o semaphore.o signal.o sigcontext.o stub.o stub_segv.o \ + syscalls.o sysrq.o sys_call_table.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_MODULES) += module.o @@ -16,6 +16,14 @@ semaphore.c-dir = kernel highmem.c-dir = mm module.c-dir = kernel +STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) + +# _cflags works with kernel files, not with userspace ones, but c_flags does, +# why ask why? +$(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) + +$(obj)/stub.o : a_flags = $(STUB_CFLAGS) + subdir- := util include arch/um/scripts/Makefile.unmap diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S new file mode 100644 index 00000000000..2f2c70a8f04 --- /dev/null +++ b/arch/um/sys-i386/stub.S @@ -0,0 +1,8 @@ +#include "uml-config.h" + + .globl syscall_stub +.section .__syscall_stub, "x" +syscall_stub: + int $0x80 + mov %eax, UML_CONFIG_STUB_DATA + int3 diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c new file mode 100644 index 00000000000..b251442ad0b --- /dev/null +++ b/arch/um/sys-i386/stub_segv.c @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include "uml-config.h" +#include "sysdep/sigcontext.h" +#include "sysdep/faultinfo.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig) +{ + struct sigcontext *sc = (struct sigcontext *) (&sig + 1); + + GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA), + sc); + + __asm__("movl %0, %%eax ; int $0x80": : "g" (__NR_getpid)); + __asm__("movl %%eax, %%ebx ; movl %0, %%eax ; movl %1, %%ecx ;" + "int $0x80": : "g" (__NR_kill), "g" (SIGUSR1)); + /* Pop the frame pointer and return address since we need to leave + * the stack in its original form when we do the sigreturn here, by + * hand. + */ + __asm__("popl %%eax ; popl %%eax ; popl %%eax ; movl %0, %%eax ; " + "int $0x80" : : "g" (__NR_sigreturn)); +} diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index 2bc6f684901..7488206ce6f 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -6,8 +6,8 @@ #XXX: why into lib-y? lib-y = bitops.o bugs.o csum-partial.o delay.o fault.o mem.o memcpy.o \ - ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o \ - syscalls.o sysrq.o thunk.o syscall_table.o + ptrace.o ptrace_user.o semaphore.o sigcontext.o signal.o stub.o \ + stub_segv.o syscalls.o syscall_table.o sysrq.o thunk.o obj-y := ksyms.o obj-$(CONFIG_MODULES) += module.o um_module.o @@ -28,6 +28,14 @@ semaphore.c-dir = kernel thunk.S-dir = lib module.c-dir = kernel +STUB_CFLAGS = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) + +# _cflags works with kernel files, not with userspace ones, but c_flags does, +# why ask why? +$(obj)/stub_segv.o : c_flags = $(STUB_CFLAGS) + +$(obj)/stub.o : a_flags = $(STUB_CFLAGS) + subdir- := util include arch/um/scripts/Makefile.unmap diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S new file mode 100644 index 00000000000..31c14925716 --- /dev/null +++ b/arch/um/sys-x86_64/stub.S @@ -0,0 +1,15 @@ +#include "uml-config.h" + + .globl syscall_stub +.section .__syscall_stub, "x" +syscall_stub: + syscall + /* We don't have 64-bit constants, so this constructs the address + * we need. + */ + movq $(UML_CONFIG_STUB_DATA >> 32), %rbx + salq $32, %rbx + movq $(UML_CONFIG_STUB_DATA & 0xffffffff), %rcx + or %rcx, %rbx + movq %rax, (%rbx) + int3 diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c new file mode 100644 index 00000000000..161d1fe9c03 --- /dev/null +++ b/arch/um/sys-x86_64/stub_segv.c @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include "uml-config.h" +#include "sysdep/sigcontext.h" +#include "sysdep/faultinfo.h" + +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_segv_handler(int sig) +{ + struct ucontext *uc; + + __asm__("movq %%rdx, %0" : "=g" (uc) :); + GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA), + &uc->uc_mcontext); + + __asm__("movq %0, %%rax ; syscall": : "g" (__NR_getpid)); + __asm__("movq %%rax, %%rdi ; movq %0, %%rax ; movq %1, %%rsi ;" + "syscall": : "g" (__NR_kill), "g" (SIGUSR1)); + /* Two popqs to restore the stack to the state just before entering + * the handler, one pops the return address, the other pops the frame + * pointer. + */ + __asm__("popq %%rax ; popq %%rax ; movq %0, %%rax ; syscall" : : "g" + (__NR_rt_sigreturn)); +} -- cgit v1.2.3 From 9786a8f3cbc61f990266e23ffdb338ee3118b03d Mon Sep 17 00:00:00 2001 From: Bodo Stroesser Date: Thu, 7 Jul 2005 17:56:50 -0700 Subject: [PATCH] uml: Proper clone support for skas0 This patch implements the clone-stub mechanism, which allows skas0 to run with proc_mm==0, even if the clib in UML uses modify_ldt. Note: There is a bug in skas3.v7 host patch, that avoids UML-skas from running properly on a SMP-box. In full skas3, I never really saw problems, but in skas0 they showed up. More commentary by jdike - What this patch does is makes sure that the host parent of each new host process matches the UML parent of the corresponding UML process. This ensures that any changed LDTs are inherited. This is done by having clone actually called by the UML process from its stub, rather than by the kernel. We have special syscall stubs that are loaded onto the stub code page because that code must be completely self-contained. These stubs are given C interfaces, and used like normal C functions, but there are subtleties. Principally, we have to be careful about stack variables in stub_clone_handler after the clone. The code is written so that there aren't any - everything boils down to a fixed address. If there were any locals, references to them after the clone would be wrong because the stack just changed. Signed-off-by: Bodo Stroesser Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/sysdep-i386/stub.h | 47 ++++++++++++++++++++++++ arch/um/include/sysdep-x86_64/stub.h | 39 ++++++++++++++++++++ arch/um/include/time_user.h | 1 + arch/um/kernel/skas/Makefile | 7 ++-- arch/um/kernel/skas/clone.c | 44 +++++++++++++++++++++++ arch/um/kernel/skas/include/skas.h | 1 + arch/um/kernel/skas/include/stub-data.h | 18 ++++++++++ arch/um/kernel/skas/mmu.c | 7 +++- arch/um/kernel/skas/process.c | 63 +++++++++++++++++++++++++++++++++ arch/um/kernel/time.c | 7 ++++ 10 files changed, 231 insertions(+), 3 deletions(-) create mode 100644 arch/um/kernel/skas/clone.c create mode 100644 arch/um/kernel/skas/include/stub-data.h (limited to 'arch') diff --git a/arch/um/include/sysdep-i386/stub.h b/arch/um/include/sysdep-i386/stub.h index fed9ff1cea5..d3699fe1c61 100644 --- a/arch/um/include/sysdep-i386/stub.h +++ b/arch/um/include/sysdep-i386/stub.h @@ -10,9 +10,56 @@ #include extern void stub_segv_handler(int sig); +extern void stub_clone_handler(void); #define STUB_SYSCALL_RET EAX #define STUB_MMAP_NR __NR_mmap2 #define MMAP_OFFSET(o) ((o) >> PAGE_SHIFT) +static inline long stub_syscall2(long syscall, long arg1, long arg2) +{ + long ret; + + __asm__("movl %0, %%ecx; " : : "g" (arg2) : "%ecx"); + __asm__("movl %0, %%ebx; " : : "g" (arg1) : "%ebx"); + __asm__("movl %0, %%eax; " : : "g" (syscall) : "%eax"); + __asm__("int $0x80;" : : : "%eax"); + __asm__ __volatile__("movl %%eax, %0; " : "=g" (ret) :); + return(ret); +} + +static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) +{ + __asm__("movl %0, %%edx; " : : "g" (arg3) : "%edx"); + return(stub_syscall2(syscall, arg1, arg2)); +} + +static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, + long arg4) +{ + __asm__("movl %0, %%esi; " : : "g" (arg4) : "%esi"); + return(stub_syscall3(syscall, arg1, arg2, arg3)); +} + +static inline long stub_syscall6(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6) +{ + long ret; + __asm__("movl %0, %%eax; " : : "g" (syscall) : "%eax"); + __asm__("movl %0, %%ebx; " : : "g" (arg1) : "%ebx"); + __asm__("movl %0, %%ecx; " : : "g" (arg2) : "%ecx"); + __asm__("movl %0, %%edx; " : : "g" (arg3) : "%edx"); + __asm__("movl %0, %%esi; " : : "g" (arg4) : "%esi"); + __asm__("movl %0, %%edi; " : : "g" (arg5) : "%edi"); + __asm__ __volatile__("pushl %%ebp ; movl %1, %%ebp; " + "int $0x80; popl %%ebp ; " + "movl %%eax, %0; " : "=g" (ret) : "g" (arg6) : "%eax"); + return(ret); +} + +static inline void trap_myself(void) +{ + __asm("int3"); +} + #endif diff --git a/arch/um/include/sysdep-x86_64/stub.h b/arch/um/include/sysdep-x86_64/stub.h index 6b5447ad590..f599058d826 100644 --- a/arch/um/include/sysdep-x86_64/stub.h +++ b/arch/um/include/sysdep-x86_64/stub.h @@ -11,9 +11,48 @@ #include extern void stub_segv_handler(int sig); +extern void stub_clone_handler(void); #define STUB_SYSCALL_RET PT_INDEX(RAX) #define STUB_MMAP_NR __NR_mmap #define MMAP_OFFSET(o) (o) +static inline long stub_syscall2(long syscall, long arg1, long arg2) +{ + long ret; + + __asm__("movq %0, %%rsi; " : : "g" (arg2) : "%rsi"); + __asm__("movq %0, %%rdi; " : : "g" (arg1) : "%rdi"); + __asm__("movq %0, %%rax; " : : "g" (syscall) : "%rax"); + __asm__("syscall;" : : : "%rax", "%r11", "%rcx"); + __asm__ __volatile__("movq %%rax, %0; " : "=g" (ret) :); + return(ret); +} + +static inline long stub_syscall3(long syscall, long arg1, long arg2, long arg3) +{ + __asm__("movq %0, %%rdx; " : : "g" (arg3) : "%rdx"); + return(stub_syscall2(syscall, arg1, arg2)); +} + +static inline long stub_syscall4(long syscall, long arg1, long arg2, long arg3, + long arg4) +{ + __asm__("movq %0, %%r10; " : : "g" (arg4) : "%r10"); + return(stub_syscall3(syscall, arg1, arg2, arg3)); +} + +static inline long stub_syscall6(long syscall, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6) +{ + __asm__("movq %0, %%r9; " : : "g" (arg6) : "%r9"); + __asm__("movq %0, %%r8; " : : "g" (arg5) : "%r8"); + return(stub_syscall4(syscall, arg1, arg2, arg3, arg4)); +} + +static inline void trap_myself(void) +{ + __asm("int3"); +} + #endif diff --git a/arch/um/include/time_user.h b/arch/um/include/time_user.h index f64ef77019a..17d7ef2141f 100644 --- a/arch/um/include/time_user.h +++ b/arch/um/include/time_user.h @@ -10,6 +10,7 @@ extern void timer(void); extern void switch_timers(int to_real); extern void idle_sleep(int secs); extern void enable_timer(void); +extern void prepare_timer(void * ptr); extern void disable_timer(void); extern unsigned long time_lock(void); extern void time_unlock(unsigned long); diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile index ff69c4b312c..d296d55ade4 100644 --- a/arch/um/kernel/skas/Makefile +++ b/arch/um/kernel/skas/Makefile @@ -3,11 +3,14 @@ # Licensed under the GPL # -obj-y := exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ +obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \ syscall_kern.o syscall_user.o tlb.o trap_user.o uaccess.o \ subdir- := util -USER_OBJS := process.o +USER_OBJS := process.o clone.o include arch/um/scripts/Makefile.rules + +# clone.o is in the stub, so it can't be built with profiling +$(obj)/clone.o : c_flags = -Wp,-MD,$(depfile) $(call unprofile,$(USER_CFLAGS)) diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c new file mode 100644 index 00000000000..4dc55f10cd1 --- /dev/null +++ b/arch/um/kernel/skas/clone.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include +#include "ptrace_user.h" +#include "skas.h" +#include "stub-data.h" +#include "uml-config.h" +#include "sysdep/stub.h" + +/* This is in a separate file because it needs to be compiled with any + * extraneous gcc flags (-pg, -fprofile-arcs, -ftest-coverage) disabled + */ +void __attribute__ ((__section__ (".__syscall_stub"))) +stub_clone_handler(void) +{ + long err; + struct stub_data *from = (struct stub_data *) UML_CONFIG_STUB_DATA; + + err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, + UML_CONFIG_STUB_DATA + PAGE_SIZE / 2 - + sizeof(void *)); + if(err != 0) + goto out; + + err = stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0); + if(err) + goto out; + + err = stub_syscall3(__NR_setitimer, ITIMER_VIRTUAL, + (long) &from->timer, 0); + if(err) + goto out; + + err = stub_syscall6(STUB_MMAP_NR, UML_CONFIG_STUB_DATA, PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, + from->fd, from->offset); + out: + /* save current result. Parent: pid; child: retcode of mmap */ + from->err = err; + trap_myself(); +} diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h index d91a60f3830..d983ea84254 100644 --- a/arch/um/kernel/skas/include/skas.h +++ b/arch/um/kernel/skas/include/skas.h @@ -32,6 +32,7 @@ extern int protect(struct mm_id * mm_idp, unsigned long addr, extern void user_signal(int sig, union uml_pt_regs *regs, int pid); extern int new_mm(int from); extern int start_userspace(unsigned long stub_stack); +extern int copy_context_skas0(unsigned long stack, int pid); extern void get_skas_faultinfo(int pid, struct faultinfo * fi); extern long execute_syscall_skas(void *r); extern unsigned long current_stub_stack(void); diff --git a/arch/um/kernel/skas/include/stub-data.h b/arch/um/kernel/skas/include/stub-data.h new file mode 100644 index 00000000000..f6ed92c3727 --- /dev/null +++ b/arch/um/kernel/skas/include/stub-data.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2005 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +#ifndef __STUB_DATA_H +#define __STUB_DATA_H + +#include + +struct stub_data { + long offset; + int fd; + struct itimerval timer; + long err; +}; + +#endif diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 511a855c9ec..d232daa42c3 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -75,6 +75,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) { struct mm_struct *cur_mm = current->mm; + struct mm_id *cur_mm_id = &cur_mm->context.skas.id; struct mm_id *mm_id = &mm->context.skas.id; unsigned long stack; int from, ret; @@ -115,7 +116,11 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) goto out_free; mm->nr_ptes--; - mm_id->u.pid = start_userspace(stack); + + if((cur_mm != NULL) && (cur_mm != &init_mm)) + mm_id->u.pid = copy_context_skas0(stack, + cur_mm_id->u.pid); + else mm_id->u.pid = start_userspace(stack); } return 0; diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 1647abb0d1a..ba671dab887 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "user.h" @@ -22,6 +23,7 @@ #include "user_util.h" #include "kern_util.h" #include "skas.h" +#include "stub-data.h" #include "mm_id.h" #include "sysdep/sigcontext.h" #include "sysdep/stub.h" @@ -296,6 +298,67 @@ void userspace(union uml_pt_regs *regs) #define INIT_JMP_HALT 3 #define INIT_JMP_REBOOT 4 + +int copy_context_skas0(unsigned long new_stack, int pid) +{ + int err; + unsigned long regs[MAX_REG_NR]; + unsigned long current_stack = current_stub_stack(); + struct stub_data *data = (struct stub_data *) current_stack; + struct stub_data *child_data = (struct stub_data *) new_stack; + __u64 new_offset; + int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset); + + /* prepare offset and fd of child's stack as argument for parent's + * and child's mmap2 calls + */ + *data = ((struct stub_data) { .offset = MMAP_OFFSET(new_offset), + .fd = new_fd, + .timer = ((struct itimerval) + { { 0, 1000000 / hz() }, + { 0, 1000000 / hz() }})}); + get_safe_registers(regs); + + /* Set parent's instruction pointer to start of clone-stub */ + regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE + + (unsigned long) stub_clone_handler - + (unsigned long) &__syscall_stub_start; + regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA + PAGE_SIZE - + sizeof(void *); + err = ptrace_setregs(pid, regs); + if(err < 0) + panic("copy_context_skas0 : PTRACE_SETREGS failed, " + "pid = %d, errno = %d\n", pid, errno); + + /* set a well known return code for detection of child write failure */ + child_data->err = 12345678; + + /* Wait, until parent has finished its work: read child's pid from + * parent's stack, and check, if bad result. + */ + wait_stub_done(pid, 0, "copy_context_skas0"); + + pid = data->err; + if(pid < 0) + panic("copy_context_skas0 - stub-parent reports error %d\n", + pid); + + /* Wait, until child has finished too: read child's result from + * child's stack and check it. + */ + wait_stub_done(pid, -1, "copy_context_skas0"); + if (child_data->err != UML_CONFIG_STUB_DATA) + panic("copy_context_skas0 - stub-child reports error %d\n", + child_data->err); + + if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, + (void *)PTRACE_O_TRACESYSGOOD) < 0) + panic("copy_context_skas0 : PTRACE_SETOPTIONS failed, " + "errno = %d\n", errno); + + return pid; +} + void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, void (*handler)(int)) { diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index f829b309b63..c40b611e3d9 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -48,6 +48,13 @@ void enable_timer(void) set_interval(ITIMER_VIRTUAL); } +void prepare_timer(void * ptr) +{ + int usec = 1000000/hz(); + *(struct itimerval *)ptr = ((struct itimerval) { { 0, usec }, + { 0, usec }}); +} + void disable_timer(void) { struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); -- cgit v1.2.3 From 3f580470baa3afc423e38fdc6e19667446b5aac0 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Thu, 7 Jul 2005 17:56:51 -0700 Subject: [PATCH] uml: restore hppfs support Some time ago a trivial patch broke HPPFS (one var became a pointer, not all uses were updated). It wasn't fixed at that time because not very used, now it's been requested so I've fixed this, and it has been tested positively (at least partially). Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 9469e77303e..6682c788364 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -128,7 +128,6 @@ config HOSTFS config HPPFS tristate "HoneyPot ProcFS (EXPERIMENTAL)" - depends on BROKEN help hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc entries to be overridden, removed, or fabricated from the host. @@ -141,8 +140,9 @@ config HPPFS You only need this if you are setting up a UML honeypot. Otherwise, it is safe to say 'N' here. - If you are actively using it, please ask for it to be fixed. In this - moment, it does not work on 2.6 (it works somehow on 2.4). + If you are actively using it, please report any problems, since it's + getting fixed. In this moment, it is experimental on 2.6 (it works on + 2.4). config MCONSOLE bool "Management console" -- cgit v1.2.3 From 605a69ac81249cca531cdc6b3e695f15dda63102 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Thu, 7 Jul 2005 17:56:52 -0700 Subject: [PATCH] uml: remove winch sem Replace a semaphore (winch_handler_sem) used in atomic code with a spinlock, and reduces as needed the amount of protected code to the bare minimum (for instance no kmalloc calls are needed). This fixes the last problems with spinlocking (in UP mode with DEBUG options); the semaphore, taken inside spinlocks, caused a "spin_lock was already locked" warning, without this patch. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/drivers/line.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 2bb4c4f5dec..e0fdffa2d54 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -663,11 +663,15 @@ struct tty_driver *line_register_devfs(struct lines *set, return driver; } +static spinlock_t winch_handler_lock; +LIST_HEAD(winch_handlers); + void lines_init(struct line *lines, int nlines) { struct line *line; int i; + spin_lock_init(&winch_handler_lock); for(i = 0; i < nlines; i++){ line = &lines[i]; INIT_LIST_HEAD(&line->chan_list); @@ -724,31 +728,30 @@ irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) return IRQ_HANDLED; } -DECLARE_MUTEX(winch_handler_sem); -LIST_HEAD(winch_handlers); - void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty) { struct winch *winch; - down(&winch_handler_sem); winch = kmalloc(sizeof(*winch), GFP_KERNEL); if (winch == NULL) { printk("register_winch_irq - kmalloc failed\n"); - goto out; + return; } + *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), .fd = fd, .tty_fd = tty_fd, .pid = pid, .tty = tty }); + + spin_lock(&winch_handler_lock); list_add(&winch->list, &winch_handlers); + spin_unlock(&winch_handler_lock); + if(um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, "winch", winch) < 0) printk("register_winch_irq - failed to register IRQ\n"); - out: - up(&winch_handler_sem); } static void unregister_winch(struct tty_struct *tty) @@ -756,7 +759,7 @@ static void unregister_winch(struct tty_struct *tty) struct list_head *ele; struct winch *winch, *found = NULL; - down(&winch_handler_sem); + spin_lock(&winch_handler_lock); list_for_each(ele, &winch_handlers){ winch = list_entry(ele, struct winch, list); if(winch->tty == tty){ @@ -764,20 +767,25 @@ static void unregister_winch(struct tty_struct *tty) break; } } - if(found == NULL) - goto out; + goto err; + + list_del(&winch->list); + spin_unlock(&winch_handler_lock); if(winch->pid != -1) os_kill_process(winch->pid, 1); free_irq(WINCH_IRQ, winch); - list_del(&winch->list); kfree(winch); - out: - up(&winch_handler_sem); + + return; +err: + spin_unlock(&winch_handler_lock); } +/* XXX: No lock as it's an exitcall... is this valid? Depending on cleanup + * order... are we sure that nothing else is done on the list? */ static void winch_cleanup(void) { struct list_head *ele; @@ -786,6 +794,9 @@ static void winch_cleanup(void) list_for_each(ele, &winch_handlers){ winch = list_entry(ele, struct winch, list); if(winch->fd != -1){ + /* Why is this different from the above free_irq(), + * which deactivates SIGIO? This searches the FD + * somewhere else and removes it from the list... */ deactivate_fd(winch->fd, WINCH_IRQ); os_close_file(winch->fd); } -- cgit v1.2.3 From 8759145114f72857bcaeed338db21620a6619b26 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Jul 2005 17:56:53 -0700 Subject: [PATCH] xtensa: remove old syscalls xtensa is now in -rc1, with the obsolete syscalls still in there, so I guess this about the last chance to correct the ABI. Applying the patch obviously breaks all sorts of user space binaries and probably also requires the appropriate changes to be made to libc. On the other hand, if a decision is made to keep the broken interface, it should at least be a conscious one instead of an oversight. Signed-off-by: Arnd Bergmann Cc: Chris Zankel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/xtensa/kernel/syscalls.c | 152 +----------------------------------------- arch/xtensa/kernel/syscalls.h | 57 ++++++++-------- 2 files changed, 30 insertions(+), 179 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/syscalls.c b/arch/xtensa/kernel/syscalls.c index abc8ed6c702..3540d8b119f 100644 --- a/arch/xtensa/kernel/syscalls.c +++ b/arch/xtensa/kernel/syscalls.c @@ -46,8 +46,6 @@ extern void do_syscall_trace(void); typedef int (*syscall_t)(void *a0,...); -extern int (*do_syscalls)(struct pt_regs *regs, syscall_t fun, - int narg); extern syscall_t sys_call_table[]; extern unsigned char sys_narg_table[]; @@ -72,10 +70,8 @@ int sys_pipe(int __user *userfds) /* * Common code for old and new mmaps. */ - -static inline long do_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, + unsigned long flags, unsigned long fd, unsigned long pgoff) { int error = -EBADF; struct file * file = NULL; @@ -97,29 +93,6 @@ out: return error; } -unsigned long old_mmap(unsigned long addr, size_t len, int prot, - int flags, int fd, off_t offset) -{ - return do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); -} - -long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long pgoff) -{ - return do_mmap2(addr, len, prot, flags, fd, pgoff); -} - -int sys_fork(struct pt_regs *regs) -{ - return do_fork(SIGCHLD, regs->areg[1], regs, 0, NULL, NULL); -} - -int sys_vfork(struct pt_regs *regs) -{ - return do_fork(CLONE_VFORK|CLONE_VM|SIGCHLD, regs->areg[1], - regs, 0, NULL, NULL); -} - int sys_clone(struct pt_regs *regs) { unsigned long clone_flags; @@ -162,30 +135,6 @@ int sys_uname(struct old_utsname * name) return -EFAULT; } -int sys_olduname(struct oldold_utsname * name) -{ - int error; - - if (!name) - return -EFAULT; - if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) - return -EFAULT; - - error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN); - error -= __put_user(0,name->sysname+__OLD_UTS_LEN); - error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN); - error -= __put_user(0,name->nodename+__OLD_UTS_LEN); - error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN); - error -= __put_user(0,name->release+__OLD_UTS_LEN); - error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN); - error -= __put_user(0,name->version+__OLD_UTS_LEN); - error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN); - error -= __put_user(0,name->machine+__OLD_UTS_LEN); - - return error ? -EFAULT : 0; -} - - /* * Build the string table for the builtin "poor man's strace". */ @@ -319,100 +268,3 @@ void system_call (struct pt_regs *regs) regs->areg[2] = res; do_syscall_trace(); } - -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. - */ - -int sys_ipc (uint call, int first, int second, - int third, void __user *ptr, long fifth) -{ - int version, ret; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - ret = -ENOSYS; - - switch (call) { - case SEMOP: - ret = sys_semtimedop (first, (struct sembuf __user *)ptr, - second, NULL); - break; - - case SEMTIMEDOP: - ret = sys_semtimedop (first, (struct sembuf __user *)ptr, - second, (const struct timespec *) fifth); - break; - - case SEMGET: - ret = sys_semget (first, second, third); - break; - - case SEMCTL: { - union semun fourth; - - if (ptr && !get_user(fourth.__pad, (void *__user *) ptr)) - ret = sys_semctl (first, second, third, fourth); - break; - } - - case MSGSND: - ret = sys_msgsnd (first, (struct msgbuf __user*) ptr, - second, third); - break; - - case MSGRCV: - switch (version) { - case 0: { - struct ipc_kludge tmp; - - if (ptr && !copy_from_user(&tmp, - (struct ipc_kludge *) ptr, - sizeof (tmp))) - ret = sys_msgrcv (first, tmp.msgp, second, - tmp.msgtyp, third); - break; - } - - default: - ret = sys_msgrcv (first, (struct msgbuf __user *) ptr, - second, 0, third); - break; - } - break; - - case MSGGET: - ret = sys_msgget ((key_t) first, second); - break; - - case MSGCTL: - ret = sys_msgctl (first, second, (struct msqid_ds __user*) ptr); - break; - - case SHMAT: { - ulong raddr; - ret = do_shmat (first, (char __user *) ptr, second, &raddr); - - if (!ret) - ret = put_user (raddr, (ulong __user *) third); - - break; - } - - case SHMDT: - ret = sys_shmdt ((char __user *)ptr); - break; - - case SHMGET: - ret = sys_shmget (first, second, third); - break; - - case SHMCTL: - ret = sys_shmctl (first, second, (struct shmid_ds __user*) ptr); - break; - } - return ret; -} - diff --git a/arch/xtensa/kernel/syscalls.h b/arch/xtensa/kernel/syscalls.h index 5b3f75f50fe..07580696b60 100644 --- a/arch/xtensa/kernel/syscalls.h +++ b/arch/xtensa/kernel/syscalls.h @@ -25,20 +25,19 @@ */ SYSCALL(0, 0) /* 00 */ - SYSCALL(sys_exit, 1) -SYSCALL(sys_fork, 0) +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_read, 3) SYSCALL(sys_write, 3) SYSCALL(sys_open, 3) /* 05 */ SYSCALL(sys_close, 1) -SYSCALL(sys_waitpid, 3) +SYSCALL(sys_ni_syscall, 3) SYSCALL(sys_creat, 2) SYSCALL(sys_link, 2) SYSCALL(sys_unlink, 1) /* 10 */ SYSCALL(sys_execve, 0) SYSCALL(sys_chdir, 1) -SYSCALL(sys_time, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_mknod, 3) SYSCALL(sys_chmod, 2) /* 15 */ SYSCALL(sys_lchown, 3) @@ -47,19 +46,19 @@ SYSCALL(sys_stat, 2) SYSCALL(sys_lseek, 3) SYSCALL(sys_getpid, 0) /* 20 */ SYSCALL(sys_mount, 5) -SYSCALL(sys_oldumount, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_setuid, 1) SYSCALL(sys_getuid, 0) -SYSCALL(sys_stime, 1) /* 25 */ +SYSCALL(sys_ni_syscall, 1) /* 25 */ SYSCALL(sys_ptrace, 4) -SYSCALL(sys_alarm, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_fstat, 2) -SYSCALL(sys_pause, 0) +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_utime, 2) /* 30 */ SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_access, 2) -SYSCALL(sys_nice, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_ni_syscall, 0) /* 35 */ SYSCALL(sys_sync, 0) SYSCALL(sys_kill, 2) @@ -73,7 +72,7 @@ SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_brk, 1) /* 45 */ SYSCALL(sys_setgid, 1) SYSCALL(sys_getgid, 0) -SYSCALL(sys_ni_syscall, 0) /* was signal(2) */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_geteuid, 0) SYSCALL(sys_getegid, 0) /* 50 */ SYSCALL(sys_acct, 1) @@ -84,21 +83,21 @@ SYSCALL(sys_fcntl, 3) /* 55 */ SYSCALL(sys_ni_syscall, 2) SYSCALL(sys_setpgid, 2) SYSCALL(sys_ni_syscall, 0) -SYSCALL(sys_olduname, 1) +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_umask, 1) /* 60 */ SYSCALL(sys_chroot, 1) SYSCALL(sys_ustat, 2) SYSCALL(sys_dup2, 2) SYSCALL(sys_getppid, 0) -SYSCALL(sys_getpgrp, 0) /* 65 */ +SYSCALL(sys_ni_syscall, 0) /* 65 */ SYSCALL(sys_setsid, 0) SYSCALL(sys_sigaction, 3) -SYSCALL(sys_sgetmask, 0) -SYSCALL(sys_ssetmask, 1) +SYSCALL(sys_ni_syscall, 0) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_setreuid, 2) /* 70 */ SYSCALL(sys_setregid, 2) SYSCALL(sys_sigsuspend, 0) -SYSCALL(sys_sigpending, 1) +SYSCALL(sys_ni_syscall, 1) SYSCALL(sys_sethostname, 2) SYSCALL(sys_setrlimit, 2) /* 75 */ SYSCALL(sys_getrlimit, 2) @@ -107,15 +106,15 @@ SYSCALL(sys_gettimeofday, 2) SYSCALL(sys_settimeofday, 2) SYSCALL(sys_getgroups, 2) /* 80 */ SYSCALL(sys_setgroups, 2) -SYSCALL(sys_ni_syscall, 0) /* old_select */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_symlink, 2) SYSCALL(sys_lstat, 2) SYSCALL(sys_readlink, 3) /* 85 */ SYSCALL(sys_uselib, 1) SYSCALL(sys_swapon, 2) SYSCALL(sys_reboot, 3) -SYSCALL(old_readdir, 3) -SYSCALL(old_mmap, 6) /* 90 */ +SYSCALL(sys_ni_syscall, 3) +SYSCALL(sys_ni_syscall, 6) /* 90 */ SYSCALL(sys_munmap, 2) SYSCALL(sys_truncate, 2) SYSCALL(sys_ftruncate, 2) @@ -127,7 +126,7 @@ SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_statfs, 2) SYSCALL(sys_fstatfs, 2) /* 100 */ SYSCALL(sys_ni_syscall, 3) -SYSCALL(sys_socketcall, 2) +SYSCALL(sys_ni_syscall, 2) SYSCALL(sys_syslog, 3) SYSCALL(sys_setitimer, 3) SYSCALL(sys_getitimer, 2) /* 105 */ @@ -137,32 +136,32 @@ SYSCALL(sys_newfstat, 2) SYSCALL(sys_uname, 1) SYSCALL(sys_ni_syscall, 0) /* 110 */ SYSCALL(sys_vhangup, 0) -SYSCALL(sys_ni_syscall, 0) /* was sys_idle() */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_wait4, 4) SYSCALL(sys_swapoff, 1) /* 115 */ SYSCALL(sys_sysinfo, 1) -SYSCALL(sys_ipc, 5) /* 6 really, but glibc uses only 5) */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_fsync, 1) SYSCALL(sys_sigreturn, 0) SYSCALL(sys_clone, 0) /* 120 */ SYSCALL(sys_setdomainname, 2) SYSCALL(sys_newuname, 1) -SYSCALL(sys_ni_syscall, 0) /* sys_modify_ldt */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_adjtimex, 1) SYSCALL(sys_mprotect, 3) /* 125 */ -SYSCALL(sys_sigprocmask, 3) -SYSCALL(sys_ni_syscall, 2) /* old sys_create_module */ +SYSCALL(sys_ni_syscall, 3) +SYSCALL(sys_ni_syscall, 2) SYSCALL(sys_init_module, 2) SYSCALL(sys_delete_module, 1) -SYSCALL(sys_ni_syscall, 1) /* old sys_get_kernel_sysm */ /* 130 */ +SYSCALL(sys_ni_syscall, 1) /* 130 */ SYSCALL(sys_quotactl, 0) SYSCALL(sys_getpgid, 1) SYSCALL(sys_fchdir, 1) SYSCALL(sys_bdflush, 2) SYSCALL(sys_sysfs, 3) /* 135 */ SYSCALL(sys_personality, 1) -SYSCALL(sys_ni_syscall, 0) /* for afs_syscall */ +SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_setfsuid, 1) SYSCALL(sys_setfsgid, 1) SYSCALL(sys_llseek, 5) /* 140 */ @@ -212,7 +211,7 @@ SYSCALL(sys_socket, 3) SYSCALL(sys_socketpair, 4) SYSCALL(sys_setresuid, 3) /* 185 */ SYSCALL(sys_getresuid, 3) -SYSCALL(sys_ni_syscall, 5) /* old sys_query_module */ +SYSCALL(sys_ni_syscall, 5) SYSCALL(sys_poll, 3) SYSCALL(sys_nfsservctl, 3) SYSCALL(sys_setresgid, 3) /* 190 */ @@ -235,7 +234,7 @@ SYSCALL(sys_sigaltstack, 0) SYSCALL(sys_sendfile, 4) SYSCALL(sys_ni_syscall, 0) SYSCALL(sys_ni_syscall, 0) -SYSCALL(sys_mmap2, 6) /* 210 */ +SYSCALL(sys_mmap, 6) /* 210 */ SYSCALL(sys_truncate64, 2) SYSCALL(sys_ftruncate64, 2) SYSCALL(sys_stat64, 2) @@ -245,4 +244,4 @@ SYSCALL(sys_pivot_root, 2) SYSCALL(sys_mincore, 3) SYSCALL(sys_madvise, 3) SYSCALL(sys_getdents64, 3) -SYSCALL(sys_vfork, 0) /* 220 */ +SYSCALL(sys_ni_syscall, 0) /* 220 */ -- cgit v1.2.3 From 6c036527a630720063b67d9a65455e8caca2c8fa Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 7 Jul 2005 17:56:59 -0700 Subject: [PATCH] mostly_read data section Add a new section called ".data.read_mostly" for data items that are read frequently and rarely written to like cpumaps etc. If these maps are placed in the .data section then these frequenly read items may end up in cachelines with data is is frequently updated. In that case all processors in an SMP system must needlessly reload the cachelines again and again containing elements of those frequently used variables. The ability to share these cachelines will allow each cpu in an SMP system to keep local copies of those shared cachelines thereby optimizing performance. Signed-off-by: Alok N Kataria Signed-off-by: Shobhit Dayal Signed-off-by: Christoph Lameter Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel.c | 2 +- arch/i386/kernel/smpboot.c | 18 +++++++++--------- arch/i386/kernel/time.c | 2 +- arch/i386/kernel/timers/timer_hpet.c | 4 ++-- arch/i386/kernel/vmlinux.lds.S | 3 +++ arch/x86_64/kernel/vmlinux.lds.S | 4 ++++ 6 files changed, 20 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 96a75d04583..a2c33c1a46c 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -25,7 +25,7 @@ extern int trap_init_f00f_bug(void); /* * Alignment at which movsl is preferred for bulk memory copies. */ -struct movsl_mask movsl_mask; +struct movsl_mask movsl_mask __read_mostly; #endif void __devinit early_intel_workaround(struct cpuinfo_x86 *c) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index d66bf489a2e..8ac8e9fd561 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -68,21 +68,21 @@ EXPORT_SYMBOL(smp_num_siblings); #endif /* Package ID of each logical CPU */ -int phys_proc_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(phys_proc_id); /* Core ID of each logical CPU */ -int cpu_core_id[NR_CPUS] = {[0 ... NR_CPUS-1] = BAD_APICID}; +int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); -cpumask_t cpu_sibling_map[NR_CPUS]; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); -cpumask_t cpu_core_map[NR_CPUS]; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* bitmap of online cpus */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; @@ -100,7 +100,7 @@ static int __devinitdata tsc_sync_disabled; struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_data); -u8 x86_cpu_to_apicid[NR_CPUS] = +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0xff }; EXPORT_SYMBOL(x86_cpu_to_apicid); @@ -550,10 +550,10 @@ extern struct { #ifdef CONFIG_NUMA /* which logical CPUs are on which nodes */ -cpumask_t node_2_cpu_mask[MAX_NUMNODES] = +cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; /* which node each logical CPU is on */ -int cpu_2_node[NR_CPUS] = { [0 ... NR_CPUS-1] = 0 }; +int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; EXPORT_SYMBOL(cpu_2_node); /* set up a mapping between cpu and node. */ @@ -581,7 +581,7 @@ static inline void unmap_cpu_to_node(int cpu) #endif /* CONFIG_NUMA */ -u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; static void map_cpu_to_logical_apicid(void) { diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 2854c357377..0ee9dee8af0 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); -struct timer_opts *cur_timer = &timer_none; +struct timer_opts *cur_timer __read_mostly = &timer_none; /* * This is a special lock that is owned by the CPU and holds the index diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d766e0963ac..ef8dac5dd33 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -18,7 +18,7 @@ #include "mach_timer.h" #include -static unsigned long hpet_usec_quotient; /* convert hpet clks to usec */ +static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */ static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ static unsigned long hpet_last; /* hpet counter value at last tick*/ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ @@ -180,7 +180,7 @@ static int __init init_hpet(char* override) /************************************************************/ /* tsc timer_opts struct */ -static struct timer_opts timer_hpet = { +static struct timer_opts timer_hpet __read_mostly = { .name = "hpet", .mark_offset = mark_offset_hpet, .get_offset = get_offset_hpet, diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 7e01a528a83..761972f8cb6 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -57,6 +57,9 @@ SECTIONS *(.data.cacheline_aligned) } + /* rarely changed data like cpu maps */ + . = ALIGN(32); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) } _edata = .; /* End of data section */ . = ALIGN(THREAD_SIZE); /* init_task */ diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 73389f51c4e..61c12758ca7 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -56,6 +56,10 @@ SECTIONS .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + } #define VSYSCALL_ADDR (-10*1024*1024) #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) -- cgit v1.2.3 From 316240f66a64c95e373d52dc401d882d77a594ee Mon Sep 17 00:00:00 2001 From: Hirokazu Takata Date: Thu, 7 Jul 2005 17:59:32 -0700 Subject: [PATCH] m32r: framebuffer device support This patch is for supporting Epson s1d13xxx framebuffer device for m32r. # Sorry, a little bigger. The Epson s1d13806 is already supported by 2.6.12 kernel, and its driver is placed as drivers/video/s1d13xxxfb.c. For the m32r, a header file include/asm-m32r/s1d13806.h was prepared for several m32r target platforms. It was originally generated by an Epson tool S1D13806CFG.EXE, and modified manually for the m32r platforms. Signed-off-by: Hayato Fujiwara Signed-off-by: Hirokazu Takata Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/setup_m32700ut.c | 55 ++++++++++++++++++++++++++++++++++++--- arch/m32r/kernel/setup_mappi.c | 51 ++++++++++++++++++++++++++++++++++-- arch/m32r/kernel/setup_mappi2.c | 4 +-- arch/m32r/kernel/setup_mappi3.c | 52 ++++++++++++++++++++++++++++++++++-- arch/m32r/kernel/setup_oaks32r.c | 5 ++-- arch/m32r/kernel/setup_opsput.c | 53 ++++++++++++++++++++++++++++++++++--- 6 files changed, 204 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/m32r/kernel/setup_m32700ut.c b/arch/m32r/kernel/setup_m32700ut.c index b014e2c1e52..a146b24a556 100644 --- a/arch/m32r/kernel/setup_m32700ut.c +++ b/arch/m32r/kernel/setup_m32700ut.c @@ -3,8 +3,8 @@ * * Setup routines for Renesas M32700UT Board * - * Copyright (c) 2002 Hiroyuki Kondo, Hirokazu Takata, - * Hitoshi Yamamoto, Takeo Takahashi + * Copyright (c) 2002-2005 Hiroyuki Kondo, Hirokazu Takata, + * Hitoshi Yamamoto, Takeo Takahashi * * This file is subject to the terms and conditions of the GNU General * Public License. See the file "COPYING" in the main directory of this @@ -435,7 +435,7 @@ void __init init_IRQ(void) icu_data[M32R_IRQ_INT2].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD01; enable_m32700ut_irq(M32R_IRQ_INT2); -//#if defined(CONFIG_VIDEO_M32R_AR) +#if defined(CONFIG_VIDEO_M32R_AR) /* * INT3# is used for AR */ @@ -445,9 +445,11 @@ void __init init_IRQ(void) irq_desc[M32R_IRQ_INT3].depth = 1; icu_data[M32R_IRQ_INT3].icucr = M32R_ICUCR_IEN|M32R_ICUCR_ISMOD10; disable_m32700ut_irq(M32R_IRQ_INT3); -//#endif /* CONFIG_VIDEO_M32R_AR */ +#endif /* CONFIG_VIDEO_M32R_AR */ } +#if defined(CONFIG_SMC91X) + #define LAN_IOSTART 0x300 #define LAN_IOEND 0x320 static struct resource smc91x_resources[] = { @@ -469,10 +471,55 @@ static struct platform_device smc91x_device = { .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, }; +#endif + +#if defined(CONFIG_FB_S1D13XXX) + +#include