From 5f954c3426190f7ae432a09abd62164d5d14c709 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jul 2009 12:39:49 +0200 Subject: [S390] hibernation: fix lowcore handling Our swsusp_arch_suspend() backend implementation disables prefixing by setting the contents of the prefix register to 0. However afterwards common code functions are called which might access percpu data structures. Since the lowcore contains e.g. the percpu base pointer this isn't a good idea. So fix this by copying the hibernating cpu's lowcore to absolute address zero. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 4 ++-- arch/s390/power/swsusp_asm64.S | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index f9b144049dc..8d15314381e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -210,7 +210,7 @@ static noinline __init void detect_machine_type(void) machine_flags |= MACHINE_FLAG_VM; } -static void early_pgm_check_handler(void) +static __init void early_pgm_check_handler(void) { unsigned long addr; const struct exception_table_entry *fixup; @@ -222,7 +222,7 @@ static void early_pgm_check_handler(void) S390_lowcore.program_old_psw.addr = fixup->fixup | PSW_ADDR_AMODE; } -void setup_lowcore_early(void) +static noinline __init void setup_lowcore_early(void) { psw_t psw; diff --git a/arch/s390/power/swsusp_asm64.S b/arch/s390/power/swsusp_asm64.S index 76d688da32f..e27bd316489 100644 --- a/arch/s390/power/swsusp_asm64.S +++ b/arch/s390/power/swsusp_asm64.S @@ -40,11 +40,11 @@ swsusp_arch_suspend: /* Store prefix register on stack */ stpx __SF_EMPTY(%r15) - /* Setup base register for lowcore (absolute 0) */ - llgf %r1,__SF_EMPTY(%r15) + /* Save prefix register contents for lowcore */ + llgf %r4,__SF_EMPTY(%r15) /* Get pointer to save area */ - aghi %r1,0x1000 + lghi %r1,0x1000 /* Store registers */ mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */ @@ -79,8 +79,11 @@ swsusp_arch_suspend: xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) spx __SF_EMPTY(%r15) - /* Setup lowcore */ - brasl %r14,setup_lowcore_early + lghi %r2,0 + lghi %r3,2*PAGE_SIZE + lghi %r5,2*PAGE_SIZE +1: mvcle %r2,%r4,0 + jo 1b /* Save image */ brasl %r14,swsusp_save -- cgit v1.2.3 From c63b196afcf22405527abe4c2c57926a5bbd6fc9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jul 2009 12:39:50 +0200 Subject: [S390] hibernation: fix register corruption on machine checks swsusp_arch_suspend() actually saves all cpu register contents on hibernation. Machine checks must be disabled since swsusp_arch_suspend() stores register contents to their lowcore save areas. That's the same place where register contents on machine checks would be saved. To avoid register corruption disable machine checks. We must also disable machine checks in the new psw mask for program checks, since swsusp_arch_suspend() may generate program checks. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/power/swsusp.c | 36 ++++++++++++++++++++++++------------ arch/s390/power/swsusp_asm64.S | 22 +--------------------- 2 files changed, 25 insertions(+), 33 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/power/swsusp.c b/arch/s390/power/swsusp.c index e6a4fe9f5f2..bd1f5c6b0b8 100644 --- a/arch/s390/power/swsusp.c +++ b/arch/s390/power/swsusp.c @@ -7,24 +7,36 @@ * */ +#include -/* - * save CPU registers before creating a hibernation image and before - * restoring the memory state from it - */ void save_processor_state(void) { - /* implentation contained in the - * swsusp_arch_suspend function + /* swsusp_arch_suspend() actually saves all cpu register contents. + * Machine checks must be disabled since swsusp_arch_suspend() stores + * register contents to their lowcore save areas. That's the same + * place where register contents on machine checks would be saved. + * To avoid register corruption disable machine checks. + * We must also disable machine checks in the new psw mask for + * program checks, since swsusp_arch_suspend() may generate program + * checks. Disabling machine checks for all other new psw masks is + * just paranoia. */ + local_mcck_disable(); + /* Disable lowcore protection */ + __ctl_clear_bit(0,28); + S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK; } -/* - * restore the contents of CPU registers - */ void restore_processor_state(void) { - /* implentation contained in the - * swsusp_arch_resume function - */ + S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK; + S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK; + /* Enable lowcore protection */ + __ctl_set_bit(0,28); + local_mcck_enable(); } diff --git a/arch/s390/power/swsusp_asm64.S b/arch/s390/power/swsusp_asm64.S index e27bd316489..b26df5c5933 100644 --- a/arch/s390/power/swsusp_asm64.S +++ b/arch/s390/power/swsusp_asm64.S @@ -32,11 +32,6 @@ swsusp_arch_suspend: /* Deactivate DAT */ stnsm __SF_EMPTY(%r15),0xfb - /* Switch off lowcore protection */ - stctg %c0,%c0,__SF_EMPTY(%r15) - ni __SF_EMPTY+4(%r15),0xef - lctlg %c0,%c0,__SF_EMPTY(%r15) - /* Store prefix register on stack */ stpx __SF_EMPTY(%r15) @@ -88,11 +83,6 @@ swsusp_arch_suspend: /* Save image */ brasl %r14,swsusp_save - /* Switch on lowcore protection */ - stctg %c0,%c0,__SF_EMPTY(%r15) - oi __SF_EMPTY+4(%r15),0x10 - lctlg %c0,%c0,__SF_EMPTY(%r15) - /* Restore prefix register and return */ lghi %r1,0x1000 spx 0x318(%r1) @@ -120,11 +110,6 @@ swsusp_arch_resume: /* Deactivate DAT */ stnsm __SF_EMPTY(%r15),0xfb - /* Switch off lowcore protection */ - stctg %c0,%c0,__SF_EMPTY(%r15) - ni __SF_EMPTY+4(%r15),0xef - lctlg %c0,%c0,__SF_EMPTY(%r15) - /* Set prefix page to zero */ xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15) spx __SF_EMPTY(%r15) @@ -178,7 +163,7 @@ swsusp_arch_resume: /* Load old stack */ lg %r15,0x2f8(%r13) - /* Pointer to save arae */ + /* Pointer to save area */ lghi %r13,0x1000 #ifdef CONFIG_SMP @@ -190,11 +175,6 @@ swsusp_arch_resume: /* Restore prefix register */ spx 0x318(%r13) - /* Switch on lowcore protection */ - stctg %c0,%c0,__SF_EMPTY(%r15) - oi __SF_EMPTY+4(%r15),0x10 - lctlg %c0,%c0,__SF_EMPTY(%r15) - /* Activate DAT */ stosm __SF_EMPTY(%r15),0x04 -- cgit v1.2.3 From 3a6ba4600d6fb913ddb0dd08843ad75405795883 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jul 2009 12:39:51 +0200 Subject: [S390] vdso: fix per cpu area allocation vdso per cpu area allocation in smp_prepare_cpus() happens with GFP_KERNEL but irqs disabled. Triggers this one: Badness at kernel/lockdep.c:2280 Modules linked in: CPU: 0 Not tainted 2.6.30 #2 Process swapper (pid: 1, task: 000000003fe88000, ksp: 000000003fe87eb8) Krnl PSW : 0400c00180000000 0000000000083360 (lockdep_trace_alloc+0xec/0xf8) [...] Call Trace: ([<00000000000832b6>] lockdep_trace_alloc+0x42/0xf8) [<00000000000b1880>] __alloc_pages_internal+0x3e8/0x5c4 [<00000000000b1b4a>] __get_free_pages+0x3a/0xb0 [<0000000000026546>] vdso_alloc_per_cpu+0x6a/0x18c [<00000000005eff82>] smp_prepare_cpus+0x322/0x594 [<00000000005e8232>] kernel_init+0x76/0x398 [<000000000001bb1e>] kernel_thread_starter+0x6/0xc [<000000000001bb18>] kernel_thread_starter+0x0/0xc Fix this by moving the allocation out of the irqs disabled section. Reported-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2270730f535..be2cae08340 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -687,13 +687,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) lowcore->extended_save_area_addr = (u32) save_area; -#else - if (vdso_alloc_per_cpu(smp_processor_id(), lowcore)) - BUG(); #endif set_prefix((u32)(unsigned long) lowcore); local_mcck_enable(); local_irq_enable(); +#ifdef CONFIG_64BIT + if (vdso_alloc_per_cpu(smp_processor_id(), &S390_lowcore)) + BUG(); +#endif for_each_possible_cpu(cpu) if (cpu != smp_processor_id()) smp_create_idle(cpu); -- cgit v1.2.3 From 1277580fe5dfb5aef84854bdb7983657df00b920 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Jul 2009 12:39:52 +0200 Subject: [S390] vdso: clock_gettime of CLOCK_THREAD_CPUTIME_ID with noexec=on The combination of noexec=on and a clock_gettime call with clock id CLOCK_THREAD_CPUTIME_ID is broken. The vdso code switches to the access register mode to get access to the per-cpu data structure to execute the magic ectg instruction. After the ectg instruction the code always switches back to the primary mode but for noexec=on the correct mode is the secondary mode. The effect of the bug is that the user space program looses the access to all mappings without PROT_EXEC, e.g. the stack. The problem is fixed by restoring the mode that has been active before the switch to the access register mode. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/vdso64/clock_gettime.S | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 79dbfee831e..49106c6e6f8 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -88,10 +88,17 @@ __kernel_clock_gettime: llilh %r4,0x0100 sar %a4,%r4 lghi %r4,0 + epsw %r5,0 sacf 512 /* Magic ectg instruction */ .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 - sacf 0 - sar %a4,%r2 + tml %r5,0x4000 + jo 11f + tml %r5,0x8000 + jno 10f + sacf 256 + j 11f +10: sacf 0 +11: sar %a4,%r2 algr %r1,%r0 /* r1 = cputime as TOD value */ mghi %r1,1000 /* convert to nanoseconds */ srlg %r1,%r1,12 /* r1 = cputime in nanosec */ -- cgit v1.2.3 From 9e1b32caa525cb236e80e9c671e179bcecccc657 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 22 Jul 2009 15:44:28 +1000 Subject: mm: Pass virtual address to [__]p{te,ud,md}_free_tlb() mm: Pass virtual address to [__]p{te,ud,md}_free_tlb() Upcoming paches to support the new 64-bit "BookE" powerpc architecture will need to have the virtual address corresponding to PTE page when freeing it, due to the way the HW table walker works. Basically, the TLB can be loaded with "large" pages that cover the whole virtual space (well, sort-of, half of it actually) represented by a PTE page, and which contain an "indirect" bit indicating that this TLB entry RPN points to an array of PTEs from which the TLB can then create direct entries. Thus, in order to invalidate those when PTE pages are deleted, we need the virtual address to pass to tlbilx or tlbivax instructions. The old trick of sticking it somewhere in the PTE page struct page sucks too much, the address is almost readily available in all call sites and almost everybody implemets these as macros, so we may as well add the argument everywhere. I added it to the pmd and pud variants for consistency. Signed-off-by: Benjamin Herrenschmidt Acked-by: David Howells [MN10300 & FRV] Acked-by: Nick Piggin Acked-by: Martin Schwidefsky [s390] Signed-off-by: Linus Torvalds --- arch/s390/include/asm/tlb.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 3d8a96d39d9..81150b05368 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -96,7 +96,8 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) * pte_free_tlb frees a pte table and clears the CRSTE for the * page table from the tlb. */ -static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte) +static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, + unsigned long address) { if (!tlb->fullmm) { tlb->array[tlb->nr_ptes++] = pte; @@ -113,7 +114,8 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte) * as the pgd. pmd_free_tlb checks the asce_limit against 2GB * to avoid the double free of the pmd in this case. */ -static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) +static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, + unsigned long address) { #ifdef __s390x__ if (tlb->mm->context.asce_limit <= (1UL << 31)) @@ -134,7 +136,8 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) * as the pgd. pud_free_tlb checks the asce_limit against 4TB * to avoid the double free of the pud in this case. */ -static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) +static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, + unsigned long address) { #ifdef __s390x__ if (tlb->mm->context.asce_limit <= (1UL << 42)) -- cgit v1.2.3 From d3bc2f91b4761a8d9f96bea167fef2f8c00dea54 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 16 Jul 2009 17:17:37 +0200 Subject: KVM: s390: fix wait_queue handling There are two waitqueues in kvm for wait handling: vcpu->wq for virt/kvm/kvm_main.c and vpcu->arch.local_int.wq for the s390 specific wait code. the wait handling in kvm_s390_handle_wait was broken by using different wait_queues for add_wait queue and remove_wait_queue. There are two options to fix the problem: o move all the s390 specific code to vcpu->wq and remove vcpu->arch.local_int.wq o move all the s390 specific code to vcpu->arch.local_int.wq This patch chooses the 2nd variant for two reasons: o s390 does not use kvm_vcpu_block but implements its own enabled wait handling. Having a separate wait_queue make it clear, that our wait mechanism is different o the patch is much smaller Report-by: Julia Lawall Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/s390/kvm/interrupt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f04f5301b1b..4d613415c43 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -386,7 +386,7 @@ no_timer: } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); - remove_wait_queue(&vcpu->wq, &wait); + remove_wait_queue(&vcpu->arch.local_int.wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); spin_unlock(&vcpu->arch.local_int.float_int->lock); hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); -- cgit v1.2.3 From 677c1dd706d9cc384730cbd52baf821923d8be9b Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 7 Aug 2009 10:39:24 +0200 Subject: [S390] kernel: Storing machine flags early in lowcore Currently, the machine_flags are stored late in the startup initialization which results in failing machine type checks (e.g. for MACHINE_IS_VM). To allow these checks, store the machine flags in the lowcore when the machine type has been detected. Moving the machine_flags to the lowcore has been introduced with git commit 25097bf153391f7be4c591d47061b3dc4990dac2 Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 8d15314381e..cae14c49951 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -208,6 +208,9 @@ static noinline __init void detect_machine_type(void) machine_flags |= MACHINE_FLAG_KVM; else machine_flags |= MACHINE_FLAG_VM; + + /* Store machine flags for setting up lowcore early */ + S390_lowcore.machine_flags = machine_flags; } static __init void early_pgm_check_handler(void) -- cgit v1.2.3 From 53cb780adbe69df90c8dc23e992ce40455e687c3 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 7 Aug 2009 10:39:25 +0200 Subject: [S390] KVM: Read buffer overflow Check whether index is within bounds before testing the element. Signed-off-by: Roel Kluin Signed-off-by: Martin Schwidefsky --- arch/s390/kvm/sigp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 36678835034..0ef81d6776e 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -169,7 +169,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, unsigned long *reg) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; - struct kvm_s390_local_interrupt *li; + struct kvm_s390_local_interrupt *li = NULL; struct kvm_s390_interrupt_info *inti; int rc; u8 tmp; @@ -189,9 +189,10 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, return 2; /* busy */ spin_lock(&fi->lock); - li = fi->local_int[cpu_addr]; + if (cpu_addr < KVM_MAX_VCPUS) + li = fi->local_int[cpu_addr]; - if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { + if (li == NULL) { rc = 1; /* incorrect state */ *reg &= SIGP_STAT_INCORRECT_STATE; kfree(inti); -- cgit v1.2.3 From 637952ca689013339b977558061fa4ca8e07e1c1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Sun, 23 Aug 2009 18:09:06 +0200 Subject: [S390] set preferred console based on conmode setup_arch() unconditionally sets the preferred console to ttyS. This breaks the use of 3270 devices as the console. Provide a new function to set the default preferred console for s390. The preferred console depends on the conmode parameter that is used to switch between 3270 and 3215 terminal/console mode. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9717717c6fe..cbb897bc50b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -154,6 +154,20 @@ static int __init condev_setup(char *str) __setup("condev=", condev_setup); +static void __init set_preferred_console(void) +{ + if (MACHINE_IS_KVM) { + add_preferred_console("hvc", 0, NULL); + s390_virtio_console_init(); + return; + } + + if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + add_preferred_console("ttyS", 0, NULL); + if (CONSOLE_IS_3270) + add_preferred_console("tty3270", 0, NULL); +} + static int __init conmode_setup(char *str) { #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) @@ -168,6 +182,7 @@ static int __init conmode_setup(char *str) if (strncmp(str, "3270", 5) == 0) SET_CONSOLE_3270; #endif + set_preferred_console(); return 1; } @@ -780,9 +795,6 @@ static void __init setup_hwcaps(void) void __init setup_arch(char **cmdline_p) { - /* set up preferred console */ - add_preferred_console("ttyS", 0, NULL); - /* * print what head.S has found out about the machine */ @@ -802,11 +814,9 @@ setup_arch(char **cmdline_p) if (MACHINE_IS_VM) pr_info("Linux is running as a z/VM " "guest operating system in 64-bit mode\n"); - else if (MACHINE_IS_KVM) { + else if (MACHINE_IS_KVM) pr_info("Linux is running under KVM in 64-bit mode\n"); - add_preferred_console("hvc", 0, NULL); - s390_virtio_console_init(); - } else + else pr_info("Linux is running natively in 64-bit mode\n"); #endif /* CONFIG_64BIT */ @@ -851,6 +861,7 @@ setup_arch(char **cmdline_p) /* Setup default console */ conmode_default(); + set_preferred_console(); /* Setup zfcpdump support */ setup_zfcpdump(console_devno); -- cgit v1.2.3