diff options
Diffstat (limited to 'drivers/kvm/x86.c')
-rw-r--r-- | drivers/kvm/x86.c | 427 |
1 files changed, 427 insertions, 0 deletions
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c index ef1661f10b4..394da660536 100644 --- a/drivers/kvm/x86.c +++ b/drivers/kvm/x86.c @@ -1787,6 +1787,433 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); /* + * Check if userspace requested an interrupt window, and that the + * interrupt window is open. + * + * No need to exit to userspace if we already have an interrupt queued. + */ +static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return (!vcpu->irq_summary && + kvm_run->request_interrupt_window && + vcpu->interrupt_window_open && + (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF)); +} + +static void post_kvm_run_save(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + kvm_run->cr8 = get_cr8(vcpu); + kvm_run->apic_base = kvm_get_apic_base(vcpu); + if (irqchip_in_kernel(vcpu->kvm)) + kvm_run->ready_for_interrupt_injection = 1; + else + kvm_run->ready_for_interrupt_injection = + (vcpu->interrupt_window_open && + vcpu->irq_summary == 0); +} + +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + + if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { + pr_debug("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, vcpu->sipi_vector); + kvm_lapic_reset(vcpu); + r = kvm_x86_ops->vcpu_reset(vcpu); + if (r) + return r; + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; + } + +preempted: + if (vcpu->guest_debug.enabled) + kvm_x86_ops->guest_debug_pre(vcpu); + +again: + r = kvm_mmu_reload(vcpu); + if (unlikely(r)) + goto out; + + kvm_inject_pending_timer_irqs(vcpu); + + preempt_disable(); + + kvm_x86_ops->prepare_guest_switch(vcpu); + kvm_load_guest_fpu(vcpu); + + local_irq_disable(); + + if (signal_pending(current)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.signal_exits; + goto out; + } + + if (irqchip_in_kernel(vcpu->kvm)) + kvm_x86_ops->inject_pending_irq(vcpu); + else if (!vcpu->mmio_read_completed) + kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); + + vcpu->guest_mode = 1; + kvm_guest_enter(); + + if (vcpu->requests) + if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + kvm_x86_ops->tlb_flush(vcpu); + + kvm_x86_ops->run(vcpu, kvm_run); + + vcpu->guest_mode = 0; + local_irq_enable(); + + ++vcpu->stat.exits; + + /* + * We must have an instruction between local_irq_enable() and + * kvm_guest_exit(), so the timer interrupt isn't delayed by + * the interrupt shadow. The stat.exits increment will do nicely. + * But we need to prevent reordering, hence this barrier(): + */ + barrier(); + + kvm_guest_exit(); + + preempt_enable(); + + /* + * Profile KVM exit RIPs: + */ + if (unlikely(prof_on == KVM_PROFILING)) { + kvm_x86_ops->cache_regs(vcpu); + profile_hit(KVM_PROFILING, (void *)vcpu->rip); + } + + r = kvm_x86_ops->handle_exit(kvm_run, vcpu); + + if (r > 0) { + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.request_irq_exits; + goto out; + } + if (!need_resched()) { + ++vcpu->stat.light_exits; + goto again; + } + } + +out: + if (r > 0) { + kvm_resched(vcpu); + goto preempted; + } + + post_kvm_run_save(vcpu, kvm_run); + + return r; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { + kvm_vcpu_block(vcpu); + vcpu_put(vcpu); + return -EAGAIN; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + /* re-sync apic's tpr */ + if (!irqchip_in_kernel(vcpu->kvm)) + set_cr8(vcpu, kvm_run->cr8); + + if (vcpu->pio.cur_count) { + r = complete_pio(vcpu); + if (r) + goto out; + } +#if CONFIG_HAS_IOMEM + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + r = emulate_instruction(vcpu, kvm_run, + vcpu->mmio_fault_cr2, 0, 1); + if (r == EMULATE_DO_MMIO) { + /* + * Read-modify-write. Back to userspace. + */ + r = 0; + goto out; + } + } +#endif + if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { + kvm_x86_ops->cache_regs(vcpu); + vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; + kvm_x86_ops->decache_regs(vcpu); + } + + r = __vcpu_run(vcpu, kvm_run); + +out: + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + return r; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + + kvm_x86_ops->cache_regs(vcpu); + + regs->rax = vcpu->regs[VCPU_REGS_RAX]; + regs->rbx = vcpu->regs[VCPU_REGS_RBX]; + regs->rcx = vcpu->regs[VCPU_REGS_RCX]; + regs->rdx = vcpu->regs[VCPU_REGS_RDX]; + regs->rsi = vcpu->regs[VCPU_REGS_RSI]; + regs->rdi = vcpu->regs[VCPU_REGS_RDI]; + regs->rsp = vcpu->regs[VCPU_REGS_RSP]; + regs->rbp = vcpu->regs[VCPU_REGS_RBP]; +#ifdef CONFIG_X86_64 + regs->r8 = vcpu->regs[VCPU_REGS_R8]; + regs->r9 = vcpu->regs[VCPU_REGS_R9]; + regs->r10 = vcpu->regs[VCPU_REGS_R10]; + regs->r11 = vcpu->regs[VCPU_REGS_R11]; + regs->r12 = vcpu->regs[VCPU_REGS_R12]; + regs->r13 = vcpu->regs[VCPU_REGS_R13]; + regs->r14 = vcpu->regs[VCPU_REGS_R14]; + regs->r15 = vcpu->regs[VCPU_REGS_R15]; +#endif + + regs->rip = vcpu->rip; + regs->rflags = kvm_x86_ops->get_rflags(vcpu); + + /* + * Don't leak debug flags in case they were set for guest debugging + */ + if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep) + regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + + vcpu_put(vcpu); + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + + vcpu->regs[VCPU_REGS_RAX] = regs->rax; + vcpu->regs[VCPU_REGS_RBX] = regs->rbx; + vcpu->regs[VCPU_REGS_RCX] = regs->rcx; + vcpu->regs[VCPU_REGS_RDX] = regs->rdx; + vcpu->regs[VCPU_REGS_RSI] = regs->rsi; + vcpu->regs[VCPU_REGS_RDI] = regs->rdi; + vcpu->regs[VCPU_REGS_RSP] = regs->rsp; + vcpu->regs[VCPU_REGS_RBP] = regs->rbp; +#ifdef CONFIG_X86_64 + vcpu->regs[VCPU_REGS_R8] = regs->r8; + vcpu->regs[VCPU_REGS_R9] = regs->r9; + vcpu->regs[VCPU_REGS_R10] = regs->r10; + vcpu->regs[VCPU_REGS_R11] = regs->r11; + vcpu->regs[VCPU_REGS_R12] = regs->r12; + vcpu->regs[VCPU_REGS_R13] = regs->r13; + vcpu->regs[VCPU_REGS_R14] = regs->r14; + vcpu->regs[VCPU_REGS_R15] = regs->r15; +#endif + + vcpu->rip = regs->rip; + kvm_x86_ops->set_rflags(vcpu, regs->rflags); + + kvm_x86_ops->decache_regs(vcpu); + + vcpu_put(vcpu); + + return 0; +} + +static void get_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) +{ + return kvm_x86_ops->get_segment(vcpu, var, seg); +} + +void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) +{ + struct kvm_segment cs; + + get_segment(vcpu, &cs, VCPU_SREG_CS); + *db = cs.db; + *l = cs.l; +} +EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + struct descriptor_table dt; + int pending_vec; + + vcpu_load(vcpu); + + get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); + get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); + get_segment(vcpu, &sregs->es, VCPU_SREG_ES); + get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); + get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); + get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); + + get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); + get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); + + kvm_x86_ops->get_idt(vcpu, &dt); + sregs->idt.limit = dt.limit; + sregs->idt.base = dt.base; + kvm_x86_ops->get_gdt(vcpu, &dt); + sregs->gdt.limit = dt.limit; + sregs->gdt.base = dt.base; + + kvm_x86_ops->decache_cr4_guest_bits(vcpu); + sregs->cr0 = vcpu->cr0; + sregs->cr2 = vcpu->cr2; + sregs->cr3 = vcpu->cr3; + sregs->cr4 = vcpu->cr4; + sregs->cr8 = get_cr8(vcpu); + sregs->efer = vcpu->shadow_efer; + sregs->apic_base = kvm_get_apic_base(vcpu); + + if (irqchip_in_kernel(vcpu->kvm)) { + memset(sregs->interrupt_bitmap, 0, + sizeof sregs->interrupt_bitmap); + pending_vec = kvm_x86_ops->get_irq(vcpu); + if (pending_vec >= 0) + set_bit(pending_vec, + (unsigned long *)sregs->interrupt_bitmap); + } else + memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, + sizeof sregs->interrupt_bitmap); + + vcpu_put(vcpu); + + return 0; +} + +static void set_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) +{ + return kvm_x86_ops->set_segment(vcpu, var, seg); +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + int mmu_reset_needed = 0; + int i, pending_vec, max_bits; + struct descriptor_table dt; + + vcpu_load(vcpu); + + dt.limit = sregs->idt.limit; + dt.base = sregs->idt.base; + kvm_x86_ops->set_idt(vcpu, &dt); + dt.limit = sregs->gdt.limit; + dt.base = sregs->gdt.base; + kvm_x86_ops->set_gdt(vcpu, &dt); + + vcpu->cr2 = sregs->cr2; + mmu_reset_needed |= vcpu->cr3 != sregs->cr3; + vcpu->cr3 = sregs->cr3; + + set_cr8(vcpu, sregs->cr8); + + mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; +#ifdef CONFIG_X86_64 + kvm_x86_ops->set_efer(vcpu, sregs->efer); +#endif + kvm_set_apic_base(vcpu, sregs->apic_base); + + kvm_x86_ops->decache_cr4_guest_bits(vcpu); + + mmu_reset_needed |= vcpu->cr0 != sregs->cr0; + vcpu->cr0 = sregs->cr0; + kvm_x86_ops->set_cr0(vcpu, sregs->cr0); + + mmu_reset_needed |= vcpu->cr4 != sregs->cr4; + kvm_x86_ops->set_cr4(vcpu, sregs->cr4); + if (!is_long_mode(vcpu) && is_pae(vcpu)) + load_pdptrs(vcpu, vcpu->cr3); + + if (mmu_reset_needed) + kvm_mmu_reset_context(vcpu); + + if (!irqchip_in_kernel(vcpu->kvm)) { + memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, + sizeof vcpu->irq_pending); + vcpu->irq_summary = 0; + for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) + if (vcpu->irq_pending[i]) + __set_bit(i, &vcpu->irq_summary); + } else { + max_bits = (sizeof sregs->interrupt_bitmap) << 3; + pending_vec = find_first_bit( + (const unsigned long *)sregs->interrupt_bitmap, + max_bits); + /* Only pending external irq is handled here */ + if (pending_vec < max_bits) { + kvm_x86_ops->set_irq(vcpu, pending_vec); + pr_debug("Set back pending irq %d\n", + pending_vec); + } + } + + set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); + set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); + set_segment(vcpu, &sregs->es, VCPU_SREG_ES); + set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); + set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); + set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); + + set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); + set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); + + vcpu_put(vcpu); + + return 0; +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + int r; + + vcpu_load(vcpu); + + r = kvm_x86_ops->set_guest_debug(vcpu, dbg); + + vcpu_put(vcpu); + + return r; +} + +/* * fxsave fpu state. Taken from x86_64/processor.h. To be killed when * we have asm/x86/processor.h */ |