From b8cee18cc75d7b9dbe6c6526dfae9ab49e84fa95 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 21 May 2008 13:37:16 +0200 Subject: KVM: s390: use yield instead of schedule to implement diag 0x44 diag 0x44 is the common way on s390 to yield the cpu to the hypervisor. It is called by the guest in cpu_relax and in the spinlock code to yield to other guest cpus. This semantic is similar to yield. Lets replace the call to schedule with yield to make sure that current is really yielding. Signed-off-by: Christian Borntraeger Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index f639a152869..a0775e1f08d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -20,7 +20,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); vcpu->stat.diagnose_44++; vcpu_put(vcpu); - schedule(); + yield(); vcpu_load(vcpu); return 0; } -- cgit v1.2.3 From 74b6b522ec83f9c44fc7743f2adcb24664aa8f45 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 21 May 2008 13:37:29 +0200 Subject: KVM: s390: fix locking order problem in enable_sie There are potential locking problem in enable_sie. We take the task_lock and the mmap_sem. As exit_mm uses the same locks vice versa, this triggers a lockdep warning. The second problem is that dup_mm and mmput might sleep, so we must not hold the task_lock at that moment. The solution is to dup the mm unconditional and use the task_lock before and afterwards to check if we can use the new mm. dup_mm and mmput are called outside the task_lock, but we run update_mm while holding the task_lock, protection us against ptrace. Signed-off-by: Christian Borntraeger Signed-off-by: Carsten Otte Acked-by: Martin Schwidefsky Signed-off-by: Avi Kivity --- arch/s390/mm/pgtable.c | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5c1aea97cd1..3d98ba82ea6 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -254,36 +254,46 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) int s390_enable_sie(void) { struct task_struct *tsk = current; - struct mm_struct *mm; - int rc; + struct mm_struct *mm, *old_mm; - task_lock(tsk); - - rc = 0; + /* Do we have pgstes? if yes, we are done */ if (tsk->mm->context.pgstes) - goto unlock; + return 0; - rc = -EINVAL; + /* lets check if we are allowed to replace the mm */ + task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || - tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) - goto unlock; + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { + task_unlock(tsk); + return -EINVAL; + } + task_unlock(tsk); - tsk->mm->context.pgstes = 1; /* dirty little tricks .. */ + /* we copy the mm with pgstes enabled */ + tsk->mm->context.pgstes = 1; mm = dup_mm(tsk); tsk->mm->context.pgstes = 0; - - rc = -ENOMEM; if (!mm) - goto unlock; - mmput(tsk->mm); + return -ENOMEM; + + /* Now lets check again if somebody attached ptrace etc */ + task_lock(tsk); + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { + mmput(mm); + task_unlock(tsk); + return -EINVAL; + } + + /* ok, we are alone. No ptrace, no threads, etc. */ + old_mm = tsk->mm; tsk->mm = tsk->active_mm = mm; preempt_disable(); update_mm(mm, tsk); cpu_set(smp_processor_id(), mm->cpu_vm_mask); preempt_enable(); - rc = 0; -unlock: task_unlock(tsk); - return rc; + mmput(old_mm); + return 0; } EXPORT_SYMBOL_GPL(s390_enable_sie); -- cgit v1.2.3 From 71cde5879f10b639506bc0b9f29a89f58b42a17e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 21 May 2008 13:37:34 +0200 Subject: KVM: s390: handle machine checks when guest is running The low-level interrupt handler on s390 checks for _TIF_WORK_INT and exits the guest context, if work is pending. TIF_WORK_INT is defined as_TIF_SIGPENDING | _TIF_NEED_RESCHED | _TIF_MCCK_PENDING. Currently the sie loop checks for signals and reschedule, but it does not check for machine checks. That means that we exit the guest context if a machine check is pending, but we do not handle the machine check. Signed-off-by: Christian Borntraeger CC: Heiko Carstens Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0ac36a649eb..40e4f2de732 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -423,6 +423,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return -EINVAL; /* not implemented yet */ } +extern void s390_handle_mcck(void); + static void __vcpu_run(struct kvm_vcpu *vcpu) { memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); @@ -430,6 +432,9 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) if (need_resched()) schedule(); + if (test_thread_flag(TIF_MCCK_PENDING)) + s390_handle_mcck(); + vcpu->arch.sie_block->icptcode = 0; local_irq_disable(); kvm_guest_enter(); -- cgit v1.2.3 From 0ff318674503ce3787ef62d84f4d948db204b268 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 21 May 2008 13:37:37 +0200 Subject: KVM: s390: fix interrupt delivery The current code delivers pending interrupts before it checks for need_resched. On a busy host, this can lead to a longer interrupt latency if the interrupt is injected while the process is scheduled away. This patch moves delivering the interrupt _after_ schedule(), which makes more sense. Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 40e4f2de732..ded27c7777c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -435,6 +435,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) if (test_thread_flag(TIF_MCCK_PENDING)) s390_handle_mcck(); + kvm_s390_deliver_pending_interrupts(vcpu); + vcpu->arch.sie_block->icptcode = 0; local_irq_disable(); kvm_guest_enter(); @@ -480,7 +482,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) might_sleep(); do { - kvm_s390_deliver_pending_interrupts(vcpu); __vcpu_run(vcpu); rc = kvm_handle_sie_intercept(vcpu); } while (!signal_pending(current) && !rc); -- cgit v1.2.3 From 1f0d0f094df9a570dfc26d5eb825986b7e165e1d Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 21 May 2008 13:37:40 +0200 Subject: KVM: s390: Send program check on access error If the guest accesses non-existing memory, the sie64a function returns -EFAULT. We must check the return value and send a program check to the guest if the sie instruction faulted, otherwise the guest will loop at the faulting code. Signed-off-by: Christian Borntraeger Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ded27c7777c..6558b09ff57 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -443,7 +443,10 @@ static void __vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); VCPU_EVENT(vcpu, 6, "entering sie flags %x", atomic_read(&vcpu->arch.sie_block->cpuflags)); - sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs); + if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + } VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); local_irq_disable(); -- cgit v1.2.3 From e52b2af541bcb299212a63cfa3e3231618a415be Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 21 May 2008 13:37:44 +0200 Subject: KVM: s390: Fix race condition in kvm_s390_handle_wait The call to add_timer was issued before local_int.lock was taken and before timer_due was set to 0. If the timer expires before the lock is being taken, the timer function will set timer_due to 1 and exit before the vcpu falls asleep. Depending on other external events, the vcpu might sleep forever. This fix pulls setting timer_due to the beginning of the function before add_timer, which ensures correct behavior. Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/kvm/interrupt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fcd1ed8015c..84a7fed4cd4 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -339,6 +339,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) if (kvm_cpu_has_interrupt(vcpu)) return 0; + __set_cpu_idle(vcpu); + spin_lock_bh(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 0; + spin_unlock_bh(&vcpu->arch.local_int.lock); + if (psw_interrupts_disabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); __unset_cpu_idle(vcpu); @@ -366,8 +371,6 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) no_timer: spin_lock_bh(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); - __set_cpu_idle(vcpu); - vcpu->arch.local_int.timer_due = 0; add_wait_queue(&vcpu->arch.local_int.wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && list_empty(&vcpu->arch.local_int.float_int->list) && -- cgit v1.2.3 From 148f1678f0ba7a5e79e44ff23064d4326fa145a4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 10 Jun 2008 10:03:18 +0200 Subject: [S390] sparsemem: use SPARSEMEM_STATIC if !64BIT. In case of !64BIT kernel we end up with a zero sized mem_section array. This happens because NR_MEM_SECTIONS is smaller than SECTIONS_PER_ROOT but we have: #define NR_SECTION_ROOTS (NR_MEM_SECTIONS / SECTIONS_PER_ROOT) and struct mem_section *mem_section[NR_SECTION_ROOTS]; So fix this by selecting SPARSEMEM_STATIC which makes sure that SECTIONS_PER_ROOT is 1. Cc: Gerald Schaefer Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 93acb3c1859..107e492cb47 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -304,6 +304,7 @@ config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_VMEMMAP_ENABLE select SPARSEMEM_VMEMMAP + select SPARSEMEM_STATIC if !64BIT config ARCH_SPARSEMEM_DEFAULT def_bool y -- cgit v1.2.3 From ee0ddadd086e25503f81be551c43f66472300acd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 10 Jun 2008 10:03:20 +0200 Subject: [S390] vmemmap: fix off-by-one bug. If a memory range is supposed to be added to the 1:1 mapping and it ends just below the maximum supported physical address it won't succeed. This is because a test doesn't consider that the end address is 1 smaller than start + size. Fix the comparison. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/vmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index f591188fa2c..e4868bfc672 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -236,7 +236,7 @@ static int insert_memory_segment(struct memory_segment *seg) { struct memory_segment *tmp; - if (seg->start + seg->size >= VMEM_MAX_PHYS || + if (seg->start + seg->size > VMEM_MAX_PHYS || seg->start + seg->size < seg->start) return -ERANGE; -- cgit v1.2.3 From 24d3e210c18bfedafe986ec489575cf91ac39d22 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Tue, 10 Jun 2008 10:03:23 +0200 Subject: [S390] Fix build failure in __cpu_up() The first argument to __ctl_store() should be the array to store stuff in, not just the first element of that array. With the current code in __cpu_up(), mainline GCC dies with an internal compiler error. I didn't diagnose that further, but just fixed the kernel bug. Signed-off-by: Segher Boessenkool Signed-off-by: Martin Schwidefsky Cc: Heiko Carstens --- arch/s390/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 42b1d12ebb1..5d4fa4b1c74 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -711,7 +711,7 @@ int __cpuinit __cpu_up(unsigned int cpu) memset(sf, 0, sizeof(struct stack_frame)); sf->gprs[9] = (unsigned long) sf; cpu_lowcore->save_area[15] = (unsigned long) sf; - __ctl_store(cpu_lowcore->cregs_save_area[0], 0, 15); + __ctl_store(cpu_lowcore->cregs_save_area, 0, 15); asm volatile( " stam 0,15,0(%0)" : : "a" (&cpu_lowcore->access_regs_save_area) : "memory"); -- cgit v1.2.3