diff options
Diffstat (limited to 'drivers/kvm')
-rw-r--r-- | drivers/kvm/kvm.h | 1 | ||||
-rw-r--r-- | drivers/kvm/kvm_main.c | 19 | ||||
-rw-r--r-- | drivers/kvm/mmu.c | 16 | ||||
-rw-r--r-- | drivers/kvm/paging_tmpl.h | 79 | ||||
-rw-r--r-- | drivers/kvm/svm.c | 28 | ||||
-rw-r--r-- | drivers/kvm/vmx.c | 5 | ||||
-rw-r--r-- | drivers/kvm/x86_emulate.c | 98 |
7 files changed, 151 insertions, 95 deletions
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 91e0c75aca8..2db1ca4c680 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -242,6 +242,7 @@ struct kvm_vcpu { u64 pdptrs[4]; /* pae */ u64 shadow_efer; u64 apic_base; + u64 ia32_misc_enable_msr; int nmsrs; struct vmx_msr_entry *guest_msrs; struct vmx_msr_entry *host_msrs; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 67c1154960f..b10972ed0c9 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -272,7 +272,9 @@ static void kvm_free_physmem(struct kvm *kvm) static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { + vcpu_load(vcpu->kvm, vcpu_slot(vcpu)); kvm_mmu_destroy(vcpu); + vcpu_put(vcpu); kvm_arch_ops->vcpu_free(vcpu); } @@ -1224,6 +1226,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_APICBASE: data = vcpu->apic_base; break; + case MSR_IA32_MISC_ENABLE: + data = vcpu->ia32_misc_enable_msr; + break; #ifdef CONFIG_X86_64 case MSR_EFER: data = vcpu->shadow_efer; @@ -1295,6 +1300,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case MSR_IA32_APICBASE: vcpu->apic_base = data; break; + case MSR_IA32_MISC_ENABLE: + vcpu->ia32_misc_enable_msr = data; + break; default: printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr); return 1; @@ -1598,6 +1606,10 @@ static u32 msrs_to_save[] = { static unsigned num_msrs_to_save; +static u32 emulated_msrs[] = { + MSR_IA32_MISC_ENABLE, +}; + static __init void kvm_init_msr_list(void) { u32 dummy[2]; @@ -1923,7 +1935,7 @@ static long kvm_dev_ioctl(struct file *filp, if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list)) goto out; n = msr_list.nmsrs; - msr_list.nmsrs = num_msrs_to_save; + msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs); if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) goto out; r = -E2BIG; @@ -1933,6 +1945,11 @@ static long kvm_dev_ioctl(struct file *filp, if (copy_to_user(user_msr_list->indices, &msrs_to_save, num_msrs_to_save * sizeof(u32))) goto out; + if (copy_to_user(user_msr_list->indices + + num_msrs_to_save * sizeof(u32), + &emulated_msrs, + ARRAY_SIZE(emulated_msrs) * sizeof(u32))) + goto out; r = 0; break; } diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index c6f972914f0..22c426cd8cb 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -143,6 +143,7 @@ static int dbg = 1; #define PFERR_PRESENT_MASK (1U << 0) #define PFERR_WRITE_MASK (1U << 1) #define PFERR_USER_MASK (1U << 2) +#define PFERR_FETCH_MASK (1U << 4) #define PT64_ROOT_LEVEL 4 #define PT32_ROOT_LEVEL 2 @@ -168,6 +169,11 @@ static int is_cpuid_PSE36(void) return 1; } +static int is_nx(struct kvm_vcpu *vcpu) +{ + return vcpu->shadow_efer & EFER_NX; +} + static int is_present_pte(unsigned long pte) { return pte & PT_PRESENT_MASK; @@ -992,16 +998,6 @@ static inline int fix_read_pf(u64 *shadow_ent) return 0; } -static int may_access(u64 pte, int write, int user) -{ - - if (user && !(pte & PT_USER_MASK)) - return 0; - if (write && !(pte & PT_WRITABLE_MASK)) - return 0; - return 1; -} - static void paging_free(struct kvm_vcpu *vcpu) { nonpaging_free(vcpu); diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 2dbf4307ed9..149fa45fd9a 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -63,13 +63,15 @@ struct guest_walker { pt_element_t *ptep; pt_element_t inherited_ar; gfn_t gfn; + u32 error_code; }; /* * Fetch a guest pte for a guest virtual address */ -static void FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr) +static int FNAME(walk_addr)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, gva_t addr, + int write_fault, int user_fault, int fetch_fault) { hpa_t hpa; struct kvm_memory_slot *slot; @@ -86,7 +88,7 @@ static void FNAME(walk_addr)(struct guest_walker *walker, walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; root = *walker->ptep; if (!(root & PT_PRESENT_MASK)) - return; + goto not_present; --walker->level; } #endif @@ -111,11 +113,23 @@ static void FNAME(walk_addr)(struct guest_walker *walker, ASSERT(((unsigned long)walker->table & PAGE_MASK) == ((unsigned long)ptep & PAGE_MASK)); - if (is_present_pte(*ptep) && !(*ptep & PT_ACCESSED_MASK)) - *ptep |= PT_ACCESSED_MASK; - if (!is_present_pte(*ptep)) - break; + goto not_present; + + if (write_fault && !is_writeble_pte(*ptep)) + if (user_fault || is_write_protection(vcpu)) + goto access_error; + + if (user_fault && !(*ptep & PT_USER_MASK)) + goto access_error; + +#if PTTYPE == 64 + if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK)) + goto access_error; +#endif + + if (!(*ptep & PT_ACCESSED_MASK)) + *ptep |= PT_ACCESSED_MASK; /* avoid rmw */ if (walker->level == PT_PAGE_TABLE_LEVEL) { walker->gfn = (*ptep & PT_BASE_ADDR_MASK) @@ -146,6 +160,23 @@ static void FNAME(walk_addr)(struct guest_walker *walker, } walker->ptep = ptep; pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); + return 1; + +not_present: + walker->error_code = 0; + goto err; + +access_error: + walker->error_code = PFERR_PRESENT_MASK; + +err: + if (write_fault) + walker->error_code |= PFERR_WRITE_MASK; + if (user_fault) + walker->error_code |= PFERR_USER_MASK; + if (fetch_fault) + walker->error_code |= PFERR_FETCH_MASK; + return 0; } static void FNAME(release_walker)(struct guest_walker *walker) @@ -274,7 +305,7 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page; if (is_writeble_pte(*shadow_ent)) - return 0; + return !user || (*shadow_ent & PT_USER_MASK); writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK; if (user) { @@ -347,8 +378,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code) { int write_fault = error_code & PFERR_WRITE_MASK; - int pte_present = error_code & PFERR_PRESENT_MASK; int user_fault = error_code & PFERR_USER_MASK; + int fetch_fault = error_code & PFERR_FETCH_MASK; struct guest_walker walker; u64 *shadow_pte; int fixed; @@ -365,19 +396,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, /* * Look up the shadow pte for the faulting address. */ - FNAME(walk_addr)(&walker, vcpu, addr); - shadow_pte = FNAME(fetch)(vcpu, addr, &walker); + r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, + fetch_fault); /* * The page is not mapped by the guest. Let the guest handle it. */ - if (!shadow_pte) { - pgprintk("%s: not mapped\n", __FUNCTION__); - inject_page_fault(vcpu, addr, error_code); + if (!r) { + pgprintk("%s: guest page fault\n", __FUNCTION__); + inject_page_fault(vcpu, addr, walker.error_code); FNAME(release_walker)(&walker); return 0; } + shadow_pte = FNAME(fetch)(vcpu, addr, &walker); pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__, shadow_pte, *shadow_pte); @@ -399,22 +431,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, * mmio: emulate if accessible, otherwise its a guest fault. */ if (is_io_pte(*shadow_pte)) { - if (may_access(*shadow_pte, write_fault, user_fault)) - return 1; - pgprintk("%s: io work, no access\n", __FUNCTION__); - inject_page_fault(vcpu, addr, - error_code | PFERR_PRESENT_MASK); - kvm_mmu_audit(vcpu, "post page fault (io)"); - return 0; - } - - /* - * pte not present, guest page fault. - */ - if (pte_present && !fixed && !write_pt) { - inject_page_fault(vcpu, addr, error_code); - kvm_mmu_audit(vcpu, "post page fault (guest)"); - return 0; + return 1; } ++kvm_stat.pf_fixed; @@ -429,7 +446,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) pt_element_t guest_pte; gpa_t gpa; - FNAME(walk_addr)(&walker, vcpu, vaddr); + FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0); guest_pte = *walker.ptep; FNAME(release_walker)(&walker); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 714f6a7841c..c79df79307e 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -502,6 +502,7 @@ static void init_vmcb(struct vmcb *vmcb) (1ULL << INTERCEPT_IOIO_PROT) | (1ULL << INTERCEPT_MSR_PROT) | (1ULL << INTERCEPT_TASK_SWITCH) | + (1ULL << INTERCEPT_SHUTDOWN) | (1ULL << INTERCEPT_VMRUN) | (1ULL << INTERCEPT_VMMCALL) | (1ULL << INTERCEPT_VMLOAD) | @@ -680,14 +681,14 @@ static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - dt->limit = vcpu->svm->vmcb->save.ldtr.limit; - dt->base = vcpu->svm->vmcb->save.ldtr.base; + dt->limit = vcpu->svm->vmcb->save.idtr.limit; + dt->base = vcpu->svm->vmcb->save.idtr.base; } static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - vcpu->svm->vmcb->save.ldtr.limit = dt->limit; - vcpu->svm->vmcb->save.ldtr.base = dt->base ; + vcpu->svm->vmcb->save.idtr.limit = dt->limit; + vcpu->svm->vmcb->save.idtr.base = dt->base ; } static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) @@ -892,6 +893,19 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } +static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + /* + * VMCB is undefined after a SHUTDOWN intercept + * so reinitialize it. + */ + memset(vcpu->svm->vmcb, 0, PAGE_SIZE); + init_vmcb(vcpu->svm->vmcb); + + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + return 0; +} + static int io_get_override(struct kvm_vcpu *vcpu, struct vmcb_seg **seg, int *addr_override) @@ -1149,7 +1163,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_K6_STAR: vcpu->svm->vmcb->save.star = data; break; -#ifdef CONFIG_X86_64_ +#ifdef CONFIG_X86_64 case MSR_LSTAR: vcpu->svm->vmcb->save.lstar = data; break; @@ -1249,6 +1263,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, [SVM_EXIT_IOIO] = io_interception, [SVM_EXIT_MSR] = msr_interception, [SVM_EXIT_TASK_SWITCH] = task_switch_interception, + [SVM_EXIT_SHUTDOWN] = shutdown_interception, [SVM_EXIT_VMRUN] = invalid_op_interception, [SVM_EXIT_VMMCALL] = invalid_op_interception, [SVM_EXIT_VMLOAD] = invalid_op_interception, @@ -1407,7 +1422,8 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; again: - do_interrupt_requests(vcpu, kvm_run); + if (!vcpu->mmio_read_completed) + do_interrupt_requests(vcpu, kvm_run); clgi(); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 0aa2659f6ae..54c35c0b318 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1116,6 +1116,8 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) if (rdmsr_safe(index, &data_low, &data_high) < 0) continue; + if (wrmsr_safe(index, data_low, data_high) < 0) + continue; data = data_low | ((u64)data_high << 32); vcpu->host_msrs[j].index = index; vcpu->host_msrs[j].reserved = 0; @@ -1717,7 +1719,8 @@ again: vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); #endif - do_interrupt_requests(vcpu, kvm_run); + if (!vcpu->mmio_read_completed) + do_interrupt_requests(vcpu, kvm_run); if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index be70795b482..7513cddb929 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -61,6 +61,7 @@ #define ModRM (1<<6) /* Destination is only written; never read. */ #define Mov (1<<7) +#define BitOp (1<<8) static u8 opcode_table[256] = { /* 0x00 - 0x07 */ @@ -148,7 +149,7 @@ static u8 opcode_table[256] = { 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM }; -static u8 twobyte_table[256] = { +static u16 twobyte_table[256] = { /* 0x00 - 0x0F */ 0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, @@ -180,16 +181,16 @@ static u8 twobyte_table[256] = { /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ - 0, 0, 0, DstMem | SrcReg | ModRM, 0, 0, 0, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, /* 0xA8 - 0xAF */ - 0, 0, 0, DstMem | SrcReg | ModRM, 0, 0, 0, 0, + 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, /* 0xB0 - 0xB7 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, - DstMem | SrcReg | ModRM, + DstMem | SrcReg | ModRM | BitOp, 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem16 | ModRM | Mov, /* 0xB8 - 0xBF */ - 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM, + 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp, 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem16 | ModRM | Mov, /* 0xC0 - 0xCF */ @@ -469,7 +470,8 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, int x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { - u8 b, d, sib, twobyte = 0, rex_prefix = 0; + unsigned d; + u8 b, sib, twobyte = 0, rex_prefix = 0; u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0; unsigned long *override_base = NULL; unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i; @@ -726,46 +728,6 @@ done_prefixes: ; } - /* Decode and fetch the destination operand: register or memory. */ - switch (d & DstMask) { - case ImplicitOps: - /* Special instructions do their own operand decoding. */ - goto special_insn; - case DstReg: - dst.type = OP_REG; - if ((d & ByteOp) - && !(twobyte_table && (b == 0xb6 || b == 0xb7))) { - dst.ptr = decode_register(modrm_reg, _regs, - (rex_prefix == 0)); - dst.val = *(u8 *) dst.ptr; - dst.bytes = 1; - } else { - dst.ptr = decode_register(modrm_reg, _regs, 0); - switch ((dst.bytes = op_bytes)) { - case 2: - dst.val = *(u16 *)dst.ptr; - break; - case 4: - dst.val = *(u32 *)dst.ptr; - break; - case 8: - dst.val = *(u64 *)dst.ptr; - break; - } - } - break; - case DstMem: - dst.type = OP_MEM; - dst.ptr = (unsigned long *)cr2; - dst.bytes = (d & ByteOp) ? 1 : op_bytes; - if (!(d & Mov) && /* optimisation - avoid slow emulated read */ - ((rc = ops->read_emulated((unsigned long)dst.ptr, - &dst.val, dst.bytes, ctxt)) != 0)) - goto done; - break; - } - dst.orig_val = dst.val; - /* * Decode and fetch the source operand: register, memory * or immediate. @@ -838,6 +800,50 @@ done_prefixes: break; } + /* Decode and fetch the destination operand: register or memory. */ + switch (d & DstMask) { + case ImplicitOps: + /* Special instructions do their own operand decoding. */ + goto special_insn; + case DstReg: + dst.type = OP_REG; + if ((d & ByteOp) + && !(twobyte_table && (b == 0xb6 || b == 0xb7))) { + dst.ptr = decode_register(modrm_reg, _regs, + (rex_prefix == 0)); + dst.val = *(u8 *) dst.ptr; + dst.bytes = 1; + } else { + dst.ptr = decode_register(modrm_reg, _regs, 0); + switch ((dst.bytes = op_bytes)) { + case 2: + dst.val = *(u16 *)dst.ptr; + break; + case 4: + dst.val = *(u32 *)dst.ptr; + break; + case 8: + dst.val = *(u64 *)dst.ptr; + break; + } + } + break; + case DstMem: + dst.type = OP_MEM; + dst.ptr = (unsigned long *)cr2; + dst.bytes = (d & ByteOp) ? 1 : op_bytes; + if (d & BitOp) { + dst.ptr += src.val / BITS_PER_LONG; + dst.bytes = sizeof(long); + } + if (!(d & Mov) && /* optimisation - avoid slow emulated read */ + ((rc = ops->read_emulated((unsigned long)dst.ptr, + &dst.val, dst.bytes, ctxt)) != 0)) + goto done; + break; + } + dst.orig_val = dst.val; + if (twobyte) goto twobyte_insn; |