From f20d2752980c144c82649eb18746ef0c29f508dd Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Tue, 20 May 2008 13:13:50 +0200
Subject: KVM: ia64: fix zero extending for mmio ld1/2/4 emulation in KVM

Only copy in the data actually requested by the instruction emulation
and zero pad the destination register first. This avoids the problem
where emulated mmio access got garbled data from ld2.acq instructions
in the vga console driver.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Acked-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/ia64/kvm/mmio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 351bf70da46..7f1a858bc69 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -159,7 +159,8 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
 
 	if (p->u.ioreq.state == STATE_IORESP_READY) {
 		if (dir == IOREQ_READ)
-			*dest = p->u.ioreq.data;
+			/* it's necessary to ensure zero extending */
+			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
 	} else
 		panic_vm(vcpu);
 out:
-- 
cgit v1.2.3


From 33e3885de25148e00595c4dd808d6eb15db2edcf Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Wed, 21 May 2008 15:34:25 +0300
Subject: KVM: x86 emulator: fix hypercall return value on AMD

The hypercall instructions on Intel and AMD are different.  KVM allows the
guest to choose one or the other (the default is Intel), and if the guest
chooses incorrectly, KVM will patch it at runtime to select the correct
instruction.  This allows live migration between Intel and AMD machines.

This patching occurs in the x86 emulator.  The current code also executes
the hypercall.  Unfortunately, the tail end of the x86 emulator code also
executes, overwriting the return value of the hypercall with the original
contents of rax (which happens to be the hypercall number).

Fix not by executing the hypercall in the emulator context; instead let the
guest reissue the patched instruction and execute the hypercall via the
normal path.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86_emulate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 8a96320ab07..932f216d890 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1727,7 +1727,8 @@ twobyte_insn:
 			if (rc)
 				goto done;
 
-			kvm_emulate_hypercall(ctxt->vcpu);
+			/* Let the processor re-execute the fixed hypercall */
+			c->eip = ctxt->vcpu->arch.rip;
 			/* Disable writeback. */
 			c->dst.type = OP_NONE;
 			break;
-- 
cgit v1.2.3


From b8cee18cc75d7b9dbe6c6526dfae9ab49e84fa95 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 21 May 2008 13:37:16 +0200
Subject: KVM: s390: use yield instead of schedule to implement diag 0x44

diag 0x44 is the common way on s390 to yield the cpu to the hypervisor.
It is called by the guest in cpu_relax and in the spinlock code to
yield to other guest cpus.

This semantic is similar to yield. Lets replace the call to schedule with
yield to make sure that current is really yielding.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/kvm/diag.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index f639a152869..a0775e1f08d 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -20,7 +20,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
 	vcpu->stat.diagnose_44++;
 	vcpu_put(vcpu);
-	schedule();
+	yield();
 	vcpu_load(vcpu);
 	return 0;
 }
-- 
cgit v1.2.3


From 74b6b522ec83f9c44fc7743f2adcb24664aa8f45 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 21 May 2008 13:37:29 +0200
Subject: KVM: s390: fix locking order problem in enable_sie

There are potential locking problem in enable_sie. We take the task_lock
and the mmap_sem. As exit_mm uses the same locks vice versa, this triggers
a lockdep warning.
The second problem is that dup_mm and mmput might sleep, so we must not
hold the task_lock at that moment.

The solution is to dup the mm unconditional and use the task_lock before and
afterwards to check  if we can use the new mm. dup_mm and mmput are called
outside the task_lock, but we run update_mm while holding the task_lock,
protection us against ptrace.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/mm/pgtable.c | 44 +++++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 17 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5c1aea97cd1..3d98ba82ea6 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -254,36 +254,46 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
 int s390_enable_sie(void)
 {
 	struct task_struct *tsk = current;
-	struct mm_struct *mm;
-	int rc;
+	struct mm_struct *mm, *old_mm;
 
-	task_lock(tsk);
-
-	rc = 0;
+	/* Do we have pgstes? if yes, we are done */
 	if (tsk->mm->context.pgstes)
-		goto unlock;
+		return 0;
 
-	rc = -EINVAL;
+	/* lets check if we are allowed to replace the mm */
+	task_lock(tsk);
 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
-		goto unlock;
+	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+		task_unlock(tsk);
+		return -EINVAL;
+	}
+	task_unlock(tsk);
 
-	tsk->mm->context.pgstes = 1;	/* dirty little tricks .. */
+	/* we copy the mm with pgstes enabled */
+	tsk->mm->context.pgstes = 1;
 	mm = dup_mm(tsk);
 	tsk->mm->context.pgstes = 0;
-
-	rc = -ENOMEM;
 	if (!mm)
-		goto unlock;
-	mmput(tsk->mm);
+		return -ENOMEM;
+
+	/* Now lets check again if somebody attached ptrace etc */
+	task_lock(tsk);
+	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+		mmput(mm);
+		task_unlock(tsk);
+		return -EINVAL;
+	}
+
+	/* ok, we are alone. No ptrace, no threads, etc. */
+	old_mm = tsk->mm;
 	tsk->mm = tsk->active_mm = mm;
 	preempt_disable();
 	update_mm(mm, tsk);
 	cpu_set(smp_processor_id(), mm->cpu_vm_mask);
 	preempt_enable();
-	rc = 0;
-unlock:
 	task_unlock(tsk);
-	return rc;
+	mmput(old_mm);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
-- 
cgit v1.2.3


From 71cde5879f10b639506bc0b9f29a89f58b42a17e Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 21 May 2008 13:37:34 +0200
Subject: KVM: s390: handle machine checks when guest is running

The low-level interrupt handler on s390 checks for _TIF_WORK_INT and
exits the guest context, if work is pending.
TIF_WORK_INT is defined as_TIF_SIGPENDING | _TIF_NEED_RESCHED |
 _TIF_MCCK_PENDING. Currently the sie loop checks for signals and
reschedule, but it does not check for machine checks. That means that
we exit the guest context if a machine check is pending, but we do not
handle the machine check.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
CC: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/kvm/kvm-s390.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0ac36a649eb..40e4f2de732 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -423,6 +423,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 }
 
+extern void s390_handle_mcck(void);
+
 static void __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
@@ -430,6 +432,9 @@ static void __vcpu_run(struct kvm_vcpu *vcpu)
 	if (need_resched())
 		schedule();
 
+	if (test_thread_flag(TIF_MCCK_PENDING))
+		s390_handle_mcck();
+
 	vcpu->arch.sie_block->icptcode = 0;
 	local_irq_disable();
 	kvm_guest_enter();
-- 
cgit v1.2.3


From 0ff318674503ce3787ef62d84f4d948db204b268 Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Wed, 21 May 2008 13:37:37 +0200
Subject: KVM: s390: fix interrupt delivery

The current code delivers pending interrupts before it checks for
need_resched. On a busy host, this can lead to a longer interrupt
latency if the interrupt is injected while the process is scheduled
away. This patch moves delivering the interrupt _after_ schedule(),
which makes more sense.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/kvm/kvm-s390.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 40e4f2de732..ded27c7777c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -435,6 +435,8 @@ static void __vcpu_run(struct kvm_vcpu *vcpu)
 	if (test_thread_flag(TIF_MCCK_PENDING))
 		s390_handle_mcck();
 
+	kvm_s390_deliver_pending_interrupts(vcpu);
+
 	vcpu->arch.sie_block->icptcode = 0;
 	local_irq_disable();
 	kvm_guest_enter();
@@ -480,7 +482,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	might_sleep();
 
 	do {
-		kvm_s390_deliver_pending_interrupts(vcpu);
 		__vcpu_run(vcpu);
 		rc = kvm_handle_sie_intercept(vcpu);
 	} while (!signal_pending(current) && !rc);
-- 
cgit v1.2.3


From 1f0d0f094df9a570dfc26d5eb825986b7e165e1d Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Wed, 21 May 2008 13:37:40 +0200
Subject: KVM: s390: Send program check on access error

If the guest accesses non-existing memory, the sie64a function returns
-EFAULT. We must check the return value and send a program check to the
guest if the sie instruction faulted, otherwise the guest will loop at
the faulting code.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/kvm/kvm-s390.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ded27c7777c..6558b09ff57 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -443,7 +443,10 @@ static void __vcpu_run(struct kvm_vcpu *vcpu)
 	local_irq_enable();
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
 		   atomic_read(&vcpu->arch.sie_block->cpuflags));
-	sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
+	if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
+		VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	}
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 		   vcpu->arch.sie_block->icptcode);
 	local_irq_disable();
-- 
cgit v1.2.3


From e52b2af541bcb299212a63cfa3e3231618a415be Mon Sep 17 00:00:00 2001
From: Carsten Otte <cotte@de.ibm.com>
Date: Wed, 21 May 2008 13:37:44 +0200
Subject: KVM: s390: Fix race condition in kvm_s390_handle_wait

The call to add_timer was issued before local_int.lock was taken and before
timer_due was set to 0. If the timer expires before the lock is being taken,
the timer function will set timer_due to 1 and exit before the vcpu falls
asleep. Depending on other external events, the vcpu might sleep forever.
This fix pulls setting timer_due to the beginning of the function before
add_timer, which ensures correct behavior.

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/s390/kvm/interrupt.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fcd1ed8015c..84a7fed4cd4 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -339,6 +339,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 	if (kvm_cpu_has_interrupt(vcpu))
 		return 0;
 
+	__set_cpu_idle(vcpu);
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	vcpu->arch.local_int.timer_due = 0;
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+
 	if (psw_interrupts_disabled(vcpu)) {
 		VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
 		__unset_cpu_idle(vcpu);
@@ -366,8 +371,6 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 no_timer:
 	spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
 	spin_lock_bh(&vcpu->arch.local_int.lock);
-	__set_cpu_idle(vcpu);
-	vcpu->arch.local_int.timer_due = 0;
 	add_wait_queue(&vcpu->arch.local_int.wq, &wait);
 	while (list_empty(&vcpu->arch.local_int.list) &&
 		list_empty(&vcpu->arch.local_int.float_int->list) &&
-- 
cgit v1.2.3


From ce263d70e509287ee761f9bba519342f57b121ca Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 21 May 2008 18:22:51 -0500
Subject: KVM: ppc: Remove duplicate function

This was left behind from some code movement.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/powerpc/kvm/booke_guest.c | 33 ---------------------------------
 1 file changed, 33 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
index 712d89a28c4..9c8ad850c6e 100644
--- a/arch/powerpc/kvm/booke_guest.c
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -227,39 +227,6 @@ void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
 	}
 }
 
-static int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
-{
-	enum emulation_result er;
-	int r;
-
-	er = kvmppc_emulate_instruction(run, vcpu);
-	switch (er) {
-	case EMULATE_DONE:
-		/* Future optimization: only reload non-volatiles if they were
-		 * actually modified. */
-		r = RESUME_GUEST_NV;
-		break;
-	case EMULATE_DO_MMIO:
-		run->exit_reason = KVM_EXIT_MMIO;
-		/* We must reload nonvolatiles because "update" load/store
-		 * instructions modify register state. */
-		/* Future optimization: only reload non-volatiles if they were
-		 * actually modified. */
-		r = RESUME_HOST_NV;
-		break;
-	case EMULATE_FAIL:
-		/* XXX Deliver Program interrupt to guest. */
-		printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
-		       vcpu->arch.last_inst);
-		r = RESUME_HOST;
-		break;
-	default:
-		BUG();
-	}
-
-	return r;
-}
-
 /**
  * kvmppc_handle_exit
  *
-- 
cgit v1.2.3


From ac3cd34e4eb9e3dccaec8e586c073ba2660b322f Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 21 May 2008 18:22:52 -0500
Subject: KVM: ppc: add lwzx/stwz emulation

Somehow these load/store instructions got missed before, but weren't used by
the guest so didn't break anything.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/powerpc/kvm/emulate.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index a03fe0c8069..00009746128 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -246,6 +246,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	case 31:
 		switch (get_xop(inst)) {
 
+		case 23:                                        /* lwzx */
+			rt = get_rt(inst);
+			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+			break;
+
 		case 83:                                        /* mfmsr */
 			rt = get_rt(inst);
 			vcpu->arch.gpr[rt] = vcpu->arch.msr;
@@ -267,6 +272,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
 			break;
 
+		case 151:                                       /* stwx */
+			rs = get_rs(inst);
+			emulated = kvmppc_handle_store(run, vcpu,
+			                               vcpu->arch.gpr[rs],
+			                               4, 1);
+			break;
+
 		case 163:                                       /* wrteei */
 			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
 			                 | (inst & MSR_EE);
-- 
cgit v1.2.3


From 52435b7c7a29f7dd7947c8c204494d7f52f14813 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 21 May 2008 18:22:53 -0500
Subject: KVM: ppc: Remove unmatched kunmap() call

We're not calling kmap() now, so we shouldn't call kunmap() either. This has no
practical effect in the non-highmem case, which is why it hasn't caused more
obvious problems.

Pointed out by Anthony Liguori.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/powerpc/kvm/44x_tlb.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index f5d7a5eab96..aa649c7db99 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -116,8 +116,6 @@ static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
 	struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
 	struct page *page = vcpu->arch.shadow_pages[index];
 
-	kunmap(vcpu->arch.shadow_pages[index]);
-
 	if (get_tlb_v(stlbe)) {
 		if (kvmppc_44x_tlbe_is_writable(stlbe))
 			kvm_release_page_dirty(page);
-- 
cgit v1.2.3


From 905fa4b9d6e2c9fd1c9ad84e3abe83021f498f53 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 21 May 2008 18:22:54 -0500
Subject: KVM: ppc: Use a read lock around MMU operations, and release it on
 error

gfn_to_page() and kvm_release_page_clean() are called from other contexts with
mmap_sem locked only for reading.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/powerpc/kvm/44x_tlb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index aa649c7db99..1c48d6164bd 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -142,18 +142,19 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 	stlbe = &vcpu->arch.shadow_tlb[victim];
 
 	/* Get reference to new page. */
-	down_write(&current->mm->mmap_sem);
+	down_read(&current->mm->mmap_sem);
 	new_page = gfn_to_page(vcpu->kvm, gfn);
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page!\n");
 		kvm_release_page_clean(new_page);
+		up_read(&current->mm->mmap_sem);
 		return;
 	}
 	hpaddr = page_to_phys(new_page);
 
 	/* Drop reference to old page. */
 	kvmppc_44x_shadow_release(vcpu, victim);
-	up_write(&current->mm->mmap_sem);
+	up_read(&current->mm->mmap_sem);
 
 	vcpu->arch.shadow_pages[victim] = new_page;
 
-- 
cgit v1.2.3


From 9dcb40e1aa5bfe7d6ffc729f3c2b6c8f1392d2d3 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollisb@us.ibm.com>
Date: Wed, 21 May 2008 18:22:55 -0500
Subject: KVM: ppc: Report bad GFNs

This code shouldn't be hit anyways, but when it is, it's useful to have a
little more information about the failure.

Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/powerpc/kvm/44x_tlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 1c48d6164bd..75dff7cfa81 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -145,7 +145,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
 	down_read(&current->mm->mmap_sem);
 	new_page = gfn_to_page(vcpu->kvm, gfn);
 	if (is_error_page(new_page)) {
-		printk(KERN_ERR "Couldn't get guest page!\n");
+		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
 		kvm_release_page_clean(new_page);
 		up_read(&current->mm->mmap_sem);
 		return;
-- 
cgit v1.2.3


From 2f5997140f22f68f6390c49941150d3fa8a95cb7 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 27 May 2008 12:10:20 -0300
Subject: KVM: migrate PIT timer

Migrate the PIT timer to the physical CPU which vcpu0 is scheduled on,
similarly to what is done for the LAPIC timers, otherwise PIT interrupts
will be delayed until an unrelated event causes an exit.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c | 14 +++++++++++++-
 arch/x86/kvm/irq.c   |  6 ++++++
 arch/x86/kvm/irq.h   |  2 ++
 arch/x86/kvm/svm.c   |  2 +-
 arch/x86/kvm/vmx.c   |  2 +-
 arch/x86/kvm/x86.c   |  2 +-
 6 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 7c077a9d977..f2f5d260874 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,7 +200,6 @@ int __pit_timer_fn(struct kvm_kpit_state *ps)
 
 	atomic_inc(&pt->pending);
 	smp_mb__after_atomic_inc();
-	/* FIXME: handle case where the guest is in guest mode */
 	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
 		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		wake_up_interruptible(&vcpu0->wq);
@@ -237,6 +236,19 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 		return HRTIMER_NORESTART;
 }
 
+void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+	struct hrtimer *timer;
+
+	if (vcpu->vcpu_id != 0 || !pit)
+		return;
+
+	timer = &pit->pit_state.pit_timer.timer;
+	if (hrtimer_cancel(timer))
+		hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+}
+
 static void destroy_pit_timer(struct kvm_kpit_timer *pt)
 {
 	pr_debug("pit: execute del timer!\n");
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index ce1f583459b..76d736b5f66 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -94,3 +94,9 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 	/* TODO: PIT, RTC etc. */
 }
 EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
+
+void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
+{
+	__kvm_migrate_apic_timer(vcpu);
+	__kvm_migrate_pit_timer(vcpu);
+}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 1802134b836..2a15be2275c 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -84,6 +84,8 @@ void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
+void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
+void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
 
 int pit_has_pending_timer(struct kvm_vcpu *vcpu);
 int apic_has_pending_timer(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ab22615eee8..6b0d5fa5bab 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -688,7 +688,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		delta = vcpu->arch.host_tsc - tsc_this;
 		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
-		kvm_migrate_apic_timer(vcpu);
+		kvm_migrate_timers(vcpu);
 	}
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe4db11989..96445f34151 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -608,7 +608,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	if (vcpu->cpu != cpu) {
 		vcpu_clear(vmx);
-		kvm_migrate_apic_timer(vcpu);
+		kvm_migrate_timers(vcpu);
 		vpid_sync_vcpu_all(vmx);
 	}
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21338bdb28f..00acf1301a1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2758,7 +2758,7 @@ again:
 
 	if (vcpu->requests) {
 		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
-			__kvm_migrate_apic_timer(vcpu);
+			__kvm_migrate_timers(vcpu);
 		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
 				       &vcpu->requests)) {
 			kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
-- 
cgit v1.2.3


From e693d71b46e64536581bf4884434fc1b8797e96f Mon Sep 17 00:00:00 2001
From: Eli Collins <ecollins@vmware.com>
Date: Sun, 1 Jun 2008 20:24:40 -0700
Subject: KVM: VMX: Clear CR4.VMXE in hardware_disable

Clear CR4.VMXE in hardware_disable. There's no reason to leave it set
after doing a VMXOFF.

VMware Workstation 6.5 checks CR4.VMXE as a proxy for whether the CPU is
in VMX mode, so leaving VMXE set means we'll refuse to power on. With this
change the user can power on after unloading the kvm-intel module. I
tested on kvm-67 and kvm-69.

Signed-off-by: Eli Collins <ecollins@vmware.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/vmx.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 96445f34151..02efbe75f31 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1036,6 +1036,7 @@ static void hardware_enable(void *garbage)
 static void hardware_disable(void *garbage)
 {
 	asm volatile (ASM_VMX_VMXOFF : : : "cc");
+	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
 
 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
-- 
cgit v1.2.3


From 8d2d73b9a5c35f2c6abf427afba7888cfc4cc65d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Wed, 4 Jun 2008 18:42:24 +0300
Subject: KVM: MMU: reschedule during shadow teardown

Shadows for large guests can take a long time to tear down, so reschedule
occasionally to avoid softlockup warnings.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7246b60afb9..c2fd6a4a58c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1858,6 +1858,7 @@ static void free_mmu_pages(struct kvm_vcpu *vcpu)
 		sp = container_of(vcpu->kvm->arch.active_mmu_pages.next,
 				  struct kvm_mmu_page, link);
 		kvm_mmu_zap_page(vcpu->kvm, sp);
+		cond_resched();
 	}
 	free_page((unsigned long)vcpu->arch.mmu.pae_root);
 }
-- 
cgit v1.2.3


From ebb0e6264c7a65c51feb3575e9edb58eab0cf469 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 20 May 2008 16:21:58 +0300
Subject: KVM: MMU: Fix printk() format string

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/paging_tmpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 156fe10288a..934c7b61939 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -418,7 +418,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
 	/* mmio */
 	if (is_error_pfn(pfn)) {
-		pgprintk("gfn %x is mmio\n", walker.gfn);
+		pgprintk("gfn %lx is mmio\n", walker.gfn);
 		kvm_release_pfn_clean(pfn);
 		return 1;
 	}
-- 
cgit v1.2.3


From 3c9155106d589584f67b026ec444e69c4a68d7dc Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Tue, 20 May 2008 16:21:13 +0300
Subject: KVM: MMU: Fix is_empty_shadow_page() check

The check is only looking at one of two possible empty ptes.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c2fd6a4a58c..ee3f53098f0 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -658,7 +658,7 @@ static int is_empty_shadow_page(u64 *spt)
 	u64 *end;
 
 	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
-		if (*pos != shadow_trap_nonpresent_pte) {
+		if (is_shadow_present_pte(*pos)) {
 			printk(KERN_ERR "%s: %p %llx\n", __func__,
 			       pos, *pos);
 			return 0;
-- 
cgit v1.2.3