From 81fe96bde7db24c02adf245604f073ea9e8d941a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 27 Sep 2007 10:07:04 +0200 Subject: i386: Expose IOAPIC register definitions even if CONFIG_X86_IO_APIC is not set KVM reuses the IOAPIC register definitions, and needs them even if the host is not compiled with IOAPIC support. Move the #ifdef below so that only the IOAPIC variables and functions are protected, and the register definitions are available to all. Signed-off-by: Avi Kivity --- include/asm-x86/io_apic_32.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/include/asm-x86/io_apic_32.h b/include/asm-x86/io_apic_32.h index dbe734ddf2a..3f087883ea4 100644 --- a/include/asm-x86/io_apic_32.h +++ b/include/asm-x86/io_apic_32.h @@ -11,8 +11,6 @@ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar */ -#ifdef CONFIG_X86_IO_APIC - /* * The structure of the IO-APIC: */ @@ -55,12 +53,6 @@ union IO_APIC_reg_03 { } __attribute__ ((packed)) bits; }; -/* - * # of IO-APICs and # of IRQ routing registers - */ -extern int nr_ioapics; -extern int nr_ioapic_registers[MAX_IO_APICS]; - enum ioapic_irq_destination_types { dest_Fixed = 0, dest_LowestPrio = 1, @@ -100,6 +92,14 @@ struct IO_APIC_route_entry { } __attribute__ ((packed)); +#ifdef CONFIG_X86_IO_APIC + +/* + * # of IO-APICs and # of IRQ routing registers + */ +extern int nr_ioapics; +extern int nr_ioapic_registers[MAX_IO_APICS]; + /* * MP-BIOS irq configuration table structures: */ -- cgit v1.2.3 From cd0d91379776cb6850c7b11c0a8843ca75967558 Mon Sep 17 00:00:00 2001 From: Nguyen Anh Quynh Date: Wed, 11 Jul 2007 14:30:54 +0300 Subject: KVM: Fix *nopage() in kvm_main.c *nopage() in kvm_main.c should only store the type of mmap() fault if the pointers are not NULL. This patch fixes the problem. Signed-off-by: Nguyen Anh Quynh Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index cd0557954e5..36a458f13ec 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2285,7 +2285,6 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, unsigned long pgoff; struct page *page; - *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; if (pgoff == 0) page = virt_to_page(vcpu->run); @@ -2294,6 +2293,9 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, else return NOPAGE_SIGBUS; get_page(page); + if (type != NULL) + *type = VM_FAULT_MINOR; + return page; } @@ -2768,12 +2770,14 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, unsigned long pgoff; struct page *page; - *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; page = gfn_to_page(kvm, pgoff); if (!page) return NOPAGE_SIGBUS; get_page(page); + if (type != NULL) + *type = VM_FAULT_MINOR; + return page; } -- cgit v1.2.3 From dad3795d2baa4e02cbfd161d9089c73dea16b4ba Mon Sep 17 00:00:00 2001 From: Qing He Date: Thu, 12 Jul 2007 12:33:56 +0300 Subject: KVM: SMP: Add vcpu_id field in struct vcpu This patch adds a `vcpu_id' field in `struct vcpu', so we can differentiate BSP and APs without pointer comparison or arithmetic. Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 1 + drivers/kvm/svm.c | 2 +- drivers/kvm/vmx.c | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 336be86c6f5..b629a83eb82 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -328,6 +328,7 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_vcpu { struct kvm *kvm; + int vcpu_id; union { struct vmcs *vmcs; struct vcpu_svm *svm; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 36a458f13ec..df9c05e9b34 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2355,6 +2355,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) goto out; vcpu = &kvm->vcpus[n]; + vcpu->vcpu_id = n; mutex_lock(&vcpu->mutex); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index bc818cc126e..52a11ccdf0c 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -591,7 +591,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) fx_init(vcpu); vcpu->fpu_active = 1; vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; - if (vcpu == &vcpu->kvm->vcpus[0]) + if (vcpu->vcpu_id == 0) vcpu->apic_base |= MSR_IA32_APICBASE_BSP; return 0; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 80628f69916..7fa62c780ce 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1253,7 +1253,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); vcpu->cr8 = 0; vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; - if (vcpu == &vcpu->kvm->vcpus[0]) + if (vcpu->vcpu_id == 0) vcpu->apic_base |= MSR_IA32_APICBASE_BSP; fx_init(vcpu); -- cgit v1.2.3 From 24cbc7e9cb0488095e4e144a762276c85ff55f9b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 17 Jul 2007 11:45:55 +0300 Subject: KVM: Future-proof the exit information union ABI Note that as the size of struct kvm_run is not part of the ABI, we can add things at the end. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index e6edca81ab8..b9a4b7c436f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -111,6 +111,8 @@ struct kvm_run { __u32 longmode; __u32 pad; } hypercall; + /* Fix the size of the union. */ + char padding[256]; }; }; -- cgit v1.2.3 From 65619eb5a88dae3dadbb1050f957ed357aa54a50 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Tue, 17 Jul 2007 11:52:33 +0300 Subject: KVM: In-kernel string pio write support Add string pio write support to support some version of Windows. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 48 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index df9c05e9b34..1be510b657f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1760,18 +1760,35 @@ static int complete_pio(struct kvm_vcpu *vcpu) return 0; } -void kernel_pio(struct kvm_io_device *pio_dev, struct kvm_vcpu *vcpu) +static void kernel_pio(struct kvm_io_device *pio_dev, + struct kvm_vcpu *vcpu, + void *pd) { /* TODO: String I/O for in kernel device */ if (vcpu->pio.in) kvm_iodevice_read(pio_dev, vcpu->pio.port, vcpu->pio.size, - vcpu->pio_data); + pd); else kvm_iodevice_write(pio_dev, vcpu->pio.port, vcpu->pio.size, - vcpu->pio_data); + pd); +} + +static void pio_string_write(struct kvm_io_device *pio_dev, + struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->pio; + void *pd = vcpu->pio_data; + int i; + + for (i = 0; i < io->cur_count; i++) { + kvm_iodevice_write(pio_dev, io->port, + io->size, + pd); + pd += io->size; + } } int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, @@ -1779,7 +1796,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, gva_t address, int rep, unsigned port) { unsigned now, in_page; - int i; + int i, ret = 0; int nr_pages = 1; struct page *page; struct kvm_io_device *pio_dev; @@ -1806,15 +1823,12 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); kvm_arch_ops->decache_regs(vcpu); if (pio_dev) { - kernel_pio(pio_dev, vcpu); + kernel_pio(pio_dev, vcpu, vcpu->pio_data); complete_pio(vcpu); return 1; } return 0; } - /* TODO: String I/O for in kernel device */ - if (pio_dev) - printk(KERN_ERR "kvm_setup_pio: no string io support\n"); if (!count) { kvm_arch_ops->skip_emulated_instruction(vcpu); @@ -1862,9 +1876,21 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } } - if (!vcpu->pio.in) - return pio_copy_data(vcpu); - return 0; + if (!vcpu->pio.in) { + /* string PIO write */ + ret = pio_copy_data(vcpu); + if (ret >= 0 && pio_dev) { + pio_string_write(pio_dev, vcpu); + complete_pio(vcpu); + if (vcpu->pio.count == 0) + ret = 1; + } + } else if (pio_dev) + printk(KERN_ERR "no string pio read support yet, " + "port %x size %d count %ld\n", + port, size, count); + + return ret; } EXPORT_SYMBOL_GPL(kvm_setup_pio); -- cgit v1.2.3 From dea8caee7b6971ae90e9d303b5d98dbf2dafed53 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:12:26 +1000 Subject: KVM: Trivial: /dev/kvm interface is no longer experimental. KVM interface is no longer experimental. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- include/linux/kvm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index b9a4b7c436f..68ecced4011 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -4,8 +4,7 @@ /* * Userspace interface for /dev/kvm - kernel based virtual machine * - * Note: this interface is considered experimental and may change without - * notice. + * Note: you must update KVM_API_VERSION if you change this interface. */ #include -- cgit v1.2.3 From 5eb549a085c3500f2b9d8b48d40393b6e50b68a9 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:15:29 +1000 Subject: KVM: Trivial: Remove unused struct cpu_user_regs declaration Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h index ea3407d7fee..2847d67abfd 100644 --- a/drivers/kvm/x86_emulate.h +++ b/drivers/kvm/x86_emulate.h @@ -112,8 +112,6 @@ struct x86_emulate_ops { }; -struct cpu_user_regs; - struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; -- cgit v1.2.3 From 1e3c5cb0d5a63b7169708614bfba0c7f25aa493e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:16:11 +1000 Subject: KVM: Trivial: Make decode_register() static I have shied away from touching x86_emulate.c (it could definitely use some love, but it is forked from the Xen code, and it would be more productive to cross-merge fixes). Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 9 +++++++-- drivers/kvm/x86_emulate.h | 8 -------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 4b8a0cc9665..f5e4644e243 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -443,8 +443,13 @@ struct operand { (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \ } while (0) -void *decode_register(u8 modrm_reg, unsigned long *regs, - int highbyte_regs) +/* + * Given the 'reg' portion of a ModRM byte, and a register block, return a + * pointer into the block that addresses the relevant register. + * @highbyte_regs specifies whether to decode AH,CH,DH,BH. + */ +static void *decode_register(u8 modrm_reg, unsigned long *regs, + int highbyte_regs) { void *p; diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h index 2847d67abfd..574cca70b22 100644 --- a/drivers/kvm/x86_emulate.h +++ b/drivers/kvm/x86_emulate.h @@ -152,12 +152,4 @@ struct x86_emulate_ctxt { int x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); -/* - * Given the 'reg' portion of a ModRM byte, and a register block, return a - * pointer into the block that addresses the relevant register. - * @highbyte_regs specifies whether to decode AH,CH,DH,BH. - */ -void *decode_register(u8 modrm_reg, unsigned long *regs, - int highbyte_regs); - #endif /* __X86_EMULATE_H__ */ -- cgit v1.2.3 From dcc0766b22e165bcb80518bf367e86b81fcf4351 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:16:56 +1000 Subject: KVM: Trivial: Comment spelling may escape grep Speling error in comment. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index f5e4644e243..db9f9553487 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -6,7 +6,7 @@ * Copyright (c) 2005 Keir Fraser * * Linux coding style, mod r/m decoder, segment base fixes, real-mode - * privieged instructions: + * privileged instructions: * * Copyright (C) 2006 Qumranet * -- cgit v1.2.3 From 9a2b85c620b9779360c7726de4caeda78cac38d4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:17:55 +1000 Subject: KVM: Trivial: Avoid hardware_disable predeclaration Don't pre-declare hardware_disable: shuffle the reboot hook down. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1be510b657f..326fa79fbeb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -54,8 +54,6 @@ static cpumask_t cpus_hardware_enabled; struct kvm_arch_ops *kvm_arch_ops; -static void hardware_disable(void *ignored); - #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) static struct kvm_stats_debugfs_item { @@ -2924,25 +2922,6 @@ static struct miscdevice kvm_dev = { &kvm_chardev_ops, }; -static int kvm_reboot(struct notifier_block *notifier, unsigned long val, - void *v) -{ - if (val == SYS_RESTART) { - /* - * Some (well, at least mine) BIOSes hang on reboot if - * in vmx root mode. - */ - printk(KERN_INFO "kvm: exiting hardware virtualization\n"); - on_each_cpu(hardware_disable, NULL, 0, 1); - } - return NOTIFY_OK; -} - -static struct notifier_block kvm_reboot_notifier = { - .notifier_call = kvm_reboot, - .priority = 0, -}; - /* * Make sure that a cpu that is being hot-unplugged does not have any vcpus * cached on it. @@ -3025,6 +3004,25 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } +static int kvm_reboot(struct notifier_block *notifier, unsigned long val, + void *v) +{ + if (val == SYS_RESTART) { + /* + * Some (well, at least mine) BIOSes hang on reboot if + * in vmx root mode. + */ + printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + on_each_cpu(hardware_disable, NULL, 0, 1); + } + return NOTIFY_OK; +} + +static struct notifier_block kvm_reboot_notifier = { + .notifier_call = kvm_reboot, + .priority = 0, +}; + void kvm_io_bus_init(struct kvm_io_bus *bus) { memset(bus, 0, sizeof(*bus)); -- cgit v1.2.3 From 707d92fa72b425bc919a84670c01402e81505c58 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:19:08 +1000 Subject: KVM: Trivial: Use standard CR0 flags macros from asm/cpu-features.h The kernel now has asm/cpu-features.h: use those macros instead of inventing our own. Also spell out definition of CR0_RESEVED_BITS (no code change) and fix typo. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 19 +++++-------------- drivers/kvm/kvm_main.c | 15 +++++++++------ drivers/kvm/mmu.c | 2 +- drivers/kvm/svm.c | 20 ++++++++++---------- drivers/kvm/vmx.c | 22 +++++++++++----------- 5 files changed, 36 insertions(+), 42 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index b629a83eb82..7117c3b3cca 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -19,15 +19,6 @@ #include #include -#define CR0_PE_MASK (1ULL << 0) -#define CR0_MP_MASK (1ULL << 1) -#define CR0_TS_MASK (1ULL << 3) -#define CR0_NE_MASK (1ULL << 5) -#define CR0_WP_MASK (1ULL << 16) -#define CR0_NW_MASK (1ULL << 29) -#define CR0_CD_MASK (1ULL << 30) -#define CR0_PG_MASK (1ULL << 31) - #define CR3_WPT_MASK (1ULL << 3) #define CR3_PCD_MASK (1ULL << 4) @@ -42,11 +33,11 @@ #define CR4_VMXE_MASK (1ULL << 13) #define KVM_GUEST_CR0_MASK \ - (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \ - | CR0_NW_MASK | CR0_CD_MASK) + (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \ + | X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON \ - (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK \ - | CR0_MP_MASK) + (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \ + | X86_CR0_MP) #define KVM_GUEST_CR4_MASK \ (CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK) #define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK) @@ -667,7 +658,7 @@ static inline int is_pse(struct kvm_vcpu *vcpu) static inline int is_paging(struct kvm_vcpu *vcpu) { - return vcpu->cr0 & CR0_PG_MASK; + return vcpu->cr0 & X86_CR0_PG; } static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 326fa79fbeb..5d8febe580d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -82,7 +82,10 @@ static struct dentry *debugfs_dir; #define MAX_IO_MSRS 256 -#define CR0_RESEVED_BITS 0xffffffff1ffaffc0ULL +#define CR0_RESERVED_BITS \ + (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ + | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ + | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) #define LMSW_GUEST_MASK 0x0eULL #define CR4_RESEVED_BITS (~((1ULL << 11) - 1)) #define CR8_RESEVED_BITS (~0x0fULL) @@ -466,27 +469,27 @@ out: void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { - if (cr0 & CR0_RESEVED_BITS) { + if (cr0 & CR0_RESERVED_BITS) { printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", cr0, vcpu->cr0); inject_gp(vcpu); return; } - if ((cr0 & CR0_NW_MASK) && !(cr0 & CR0_CD_MASK)) { + if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); inject_gp(vcpu); return; } - if ((cr0 & CR0_PG_MASK) && !(cr0 & CR0_PE_MASK)) { + if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { printk(KERN_DEBUG "set_cr0: #GP, set PG flag " "and a clear PE flag\n"); inject_gp(vcpu); return; } - if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { + if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { #ifdef CONFIG_X86_64 if ((vcpu->shadow_efer & EFER_LME)) { int cs_db, cs_l; @@ -1158,7 +1161,7 @@ int emulate_clts(struct kvm_vcpu *vcpu) { unsigned long cr0; - cr0 = vcpu->cr0 & ~CR0_TS_MASK; + cr0 = vcpu->cr0 & ~X86_CR0_TS; kvm_arch_ops->set_cr0(vcpu, cr0); return X86EMUL_CONTINUE; } diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 23965aa5ee7..75faef4fb08 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -158,7 +158,7 @@ static struct kmem_cache *mmu_page_header_cache; static int is_write_protection(struct kvm_vcpu *vcpu) { - return vcpu->cr0 & CR0_WP_MASK; + return vcpu->cr0 & X86_CR0_WP; } static int is_cpuid_PSE36(void) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 52a11ccdf0c..e920c2269af 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -99,7 +99,7 @@ static unsigned get_addr_size(struct kvm_vcpu *vcpu) struct vmcb_save_area *sa = &vcpu->svm->vmcb->save; u16 cs_attrib; - if (!(sa->cr0 & CR0_PE_MASK) || (sa->rflags & X86_EFLAGS_VM)) + if (!(sa->cr0 & X86_CR0_PE) || (sa->rflags & X86_EFLAGS_VM)) return 2; cs_attrib = sa->cs.attrib; @@ -563,7 +563,7 @@ static void init_vmcb(struct vmcb *vmcb) * cr0 val on cpu init should be 0x60000010, we enable cpu * cache by default. the orderly way is to enable cache in bios. */ - save->cr0 = 0x00000010 | CR0_PG_MASK | CR0_WP_MASK; + save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; save->cr4 = CR4_PAE_MASK; /* rdx = ?? */ } @@ -756,25 +756,25 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { #ifdef CONFIG_X86_64 if (vcpu->shadow_efer & KVM_EFER_LME) { - if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) { + if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->shadow_efer |= KVM_EFER_LMA; vcpu->svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME; } - if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK) ) { + if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) { vcpu->shadow_efer &= ~KVM_EFER_LMA; vcpu->svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME); } } #endif - if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) { + if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); vcpu->fpu_active = 1; } vcpu->cr0 = cr0; - cr0 |= CR0_PG_MASK | CR0_WP_MASK; - cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); + cr0 |= X86_CR0_PG | X86_CR0_WP; + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); vcpu->svm->vmcb->save.cr0 = cr0; } @@ -945,8 +945,8 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); - if (!(vcpu->cr0 & CR0_TS_MASK)) - vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK; + if (!(vcpu->cr0 & X86_CR0_TS)) + vcpu->svm->vmcb->save.cr0 &= ~X86_CR0_TS; vcpu->fpu_active = 1; return 1; @@ -1702,7 +1702,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) if (vcpu->fpu_active) { vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); - vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK; + vcpu->svm->vmcb->save.cr0 |= X86_CR0_TS; vcpu->fpu_active = 0; } } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 7fa62c780ce..ebd93b4775a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -436,9 +436,9 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu) if (vcpu->fpu_active) return; vcpu->fpu_active = 1; - vmcs_clear_bits(GUEST_CR0, CR0_TS_MASK); - if (vcpu->cr0 & CR0_TS_MASK) - vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); + vmcs_clear_bits(GUEST_CR0, X86_CR0_TS); + if (vcpu->cr0 & X86_CR0_TS) + vmcs_set_bits(GUEST_CR0, X86_CR0_TS); update_exception_bitmap(vcpu); } @@ -447,7 +447,7 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) if (!vcpu->fpu_active) return; vcpu->fpu_active = 0; - vmcs_set_bits(GUEST_CR0, CR0_TS_MASK); + vmcs_set_bits(GUEST_CR0, X86_CR0_TS); update_exception_bitmap(vcpu); } @@ -1002,17 +1002,17 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { vmx_fpu_deactivate(vcpu); - if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) + if (vcpu->rmode.active && (cr0 & X86_CR0_PE)) enter_pmode(vcpu); - if (!vcpu->rmode.active && !(cr0 & CR0_PE_MASK)) + if (!vcpu->rmode.active && !(cr0 & X86_CR0_PE)) enter_rmode(vcpu); #ifdef CONFIG_X86_64 if (vcpu->shadow_efer & EFER_LME) { - if (!is_paging(vcpu) && (cr0 & CR0_PG_MASK)) + if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) enter_lmode(vcpu); - if (is_paging(vcpu) && !(cr0 & CR0_PG_MASK)) + if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) exit_lmode(vcpu); } #endif @@ -1022,14 +1022,14 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); vcpu->cr0 = cr0; - if (!(cr0 & CR0_TS_MASK) || !(cr0 & CR0_PE_MASK)) + if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) vmx_fpu_activate(vcpu); } static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { vmcs_writel(GUEST_CR3, cr3); - if (vcpu->cr0 & CR0_PE_MASK) + if (vcpu->cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); } @@ -1778,7 +1778,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) case 2: /* clts */ vcpu_load_rsp_rip(vcpu); vmx_fpu_deactivate(vcpu); - vcpu->cr0 &= ~CR0_TS_MASK; + vcpu->cr0 &= ~X86_CR0_TS; vmcs_writel(CR0_READ_SHADOW, vcpu->cr0); vmx_fpu_activate(vcpu); skip_emulated_instruction(vcpu); -- cgit v1.2.3 From f802a307cb2cabdd0c6b48067dbe901d6fe27246 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:32:55 +1000 Subject: KVM: Use standard CR3 flags, tighten checking The kernel now has asm/cpu-features.h: use those macros instead of inventing our own. Also spell out definition of CR3_RESEVED_BITS, fix spelling and tighten it for the non-PAE case. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 9 +++------ drivers/kvm/kvm_main.c | 33 +++++++++++++++++++++------------ drivers/kvm/paging_tmpl.h | 2 +- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 7117c3b3cca..983c33f3837 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -19,12 +19,9 @@ #include #include -#define CR3_WPT_MASK (1ULL << 3) -#define CR3_PCD_MASK (1ULL << 4) - -#define CR3_RESEVED_BITS 0x07ULL -#define CR3_L_MODE_RESEVED_BITS (~((1ULL << 40) - 1) | 0x0fe7ULL) -#define CR3_FLAGS_MASK ((1ULL << 5) - 1) +#define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) +#define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) +#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS|0xFFFFFF0000000000ULL) #define CR4_VME_MASK (1ULL << 0) #define CR4_PSE_MASK (1ULL << 4) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 5d8febe580d..34a571dee51 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -571,23 +571,32 @@ EXPORT_SYMBOL_GPL(set_cr4); void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { if (is_long_mode(vcpu)) { - if (cr3 & CR3_L_MODE_RESEVED_BITS) { + if (cr3 & CR3_L_MODE_RESERVED_BITS) { printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); inject_gp(vcpu); return; } } else { - if (cr3 & CR3_RESEVED_BITS) { - printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); - inject_gp(vcpu); - return; - } - if (is_paging(vcpu) && is_pae(vcpu) && - !load_pdptrs(vcpu, cr3)) { - printk(KERN_DEBUG "set_cr3: #GP, pdptrs " - "reserved bits\n"); - inject_gp(vcpu); - return; + if (is_pae(vcpu)) { + if (cr3 & CR3_PAE_RESERVED_BITS) { + printk(KERN_DEBUG + "set_cr3: #GP, reserved bits\n"); + inject_gp(vcpu); + return; + } + if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { + printk(KERN_DEBUG "set_cr3: #GP, pdptrs " + "reserved bits\n"); + inject_gp(vcpu); + return; + } + } else { + if (cr3 & CR3_NONPAE_RESERVED_BITS) { + printk(KERN_DEBUG + "set_cr3: #GP, reserved bits\n"); + inject_gp(vcpu); + return; + } } } diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 4b5391c717f..01901ec3fe8 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -99,7 +99,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0); ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || - (vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0); + (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; -- cgit v1.2.3 From 66aee91aaab8f998d28a61ed7733be17ad8e6d8f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:34:16 +1000 Subject: KVM: Use standard CR4 flags, tighten checking On this machine (Intel), writing to the CR4 bits 0x00000800 and 0x00001000 cause a GPF. The Intel manual is a little unclear, but AFIACT they're reserved, too. Also fix spelling of CR4_RESEVED_BITS. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 16 +++++----------- drivers/kvm/kvm_main.c | 16 ++++++++++------ drivers/kvm/svm.c | 7 +++---- drivers/kvm/vmx.c | 8 ++++---- drivers/kvm/vmx.h | 2 -- 5 files changed, 22 insertions(+), 27 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 983c33f3837..25439a5968f 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -23,12 +23,6 @@ #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS|0xFFFFFF0000000000ULL) -#define CR4_VME_MASK (1ULL << 0) -#define CR4_PSE_MASK (1ULL << 4) -#define CR4_PAE_MASK (1ULL << 5) -#define CR4_PGE_MASK (1ULL << 7) -#define CR4_VMXE_MASK (1ULL << 13) - #define KVM_GUEST_CR0_MASK \ (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE \ | X86_CR0_NW | X86_CR0_CD) @@ -36,9 +30,9 @@ (X86_CR0_PG | X86_CR0_PE | X86_CR0_WP | X86_CR0_NE | X86_CR0_TS \ | X86_CR0_MP) #define KVM_GUEST_CR4_MASK \ - (CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK) -#define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK) -#define KVM_RMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK | CR4_VME_MASK) + (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE) +#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) +#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) #define INVALID_PAGE (~(hpa_t)0) #define UNMAPPED_GVA (~(gpa_t)0) @@ -645,12 +639,12 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu) static inline int is_pae(struct kvm_vcpu *vcpu) { - return vcpu->cr4 & CR4_PAE_MASK; + return vcpu->cr4 & X86_CR4_PAE; } static inline int is_pse(struct kvm_vcpu *vcpu) { - return vcpu->cr4 & CR4_PSE_MASK; + return vcpu->cr4 & X86_CR4_PSE; } static inline int is_paging(struct kvm_vcpu *vcpu) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 34a571dee51..af02320012c 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -86,8 +86,12 @@ static struct dentry *debugfs_dir; (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) -#define LMSW_GUEST_MASK 0x0eULL -#define CR4_RESEVED_BITS (~((1ULL << 11) - 1)) +#define CR4_RESERVED_BITS \ + (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ + | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ + | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) + #define CR8_RESEVED_BITS (~0x0fULL) #define EFER_RESERVED_BITS 0xfffffffffffff2fe @@ -537,26 +541,26 @@ EXPORT_SYMBOL_GPL(lmsw); void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - if (cr4 & CR4_RESEVED_BITS) { + if (cr4 & CR4_RESERVED_BITS) { printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); inject_gp(vcpu); return; } if (is_long_mode(vcpu)) { - if (!(cr4 & CR4_PAE_MASK)) { + if (!(cr4 & X86_CR4_PAE)) { printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " "in long mode\n"); inject_gp(vcpu); return; } - } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) + } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE) && !load_pdptrs(vcpu, vcpu->cr3)) { printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); inject_gp(vcpu); } - if (cr4 & CR4_VMXE_MASK) { + if (cr4 & X86_CR4_VMXE) { printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); inject_gp(vcpu); return; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index e920c2269af..5c058fa1c8a 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -38,7 +38,6 @@ MODULE_LICENSE("GPL"); #define DR7_GD_MASK (1 << 13) #define DR6_BD_MASK (1 << 13) -#define CR4_DE_MASK (1UL << 3) #define SEG_TYPE_LDT 2 #define SEG_TYPE_BUSY_TSS16 3 @@ -564,7 +563,7 @@ static void init_vmcb(struct vmcb *vmcb) * cache by default. the orderly way is to enable cache in bios. */ save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; - save->cr4 = CR4_PAE_MASK; + save->cr4 = X86_CR4_PAE; /* rdx = ?? */ } @@ -781,7 +780,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { vcpu->cr4 = cr4; - vcpu->svm->vmcb->save.cr4 = cr4 | CR4_PAE_MASK; + vcpu->svm->vmcb->save.cr4 = cr4 | X86_CR4_PAE; } static void svm_set_segment(struct kvm_vcpu *vcpu, @@ -877,7 +876,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, vcpu->svm->db_regs[dr] = value; return; case 4 ... 5: - if (vcpu->cr4 & CR4_DE_MASK) { + if (vcpu->cr4 & X86_CR4_DE) { *exception = UD_VECTOR; return; } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index ebd93b4775a..f3e78187e89 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -764,7 +764,7 @@ static void hardware_enable(void *garbage) if ((old & 5) != 5) /* enable and lock */ wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 5); - write_cr4(read_cr4() | CR4_VMXE); /* FIXME: not cpu hotplug safe */ + write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) : "memory", "cc"); } @@ -879,8 +879,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) flags |= (vcpu->rmode.save_iopl << IOPL_SHIFT); vmcs_writel(GUEST_RFLAGS, flags); - vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~CR4_VME_MASK) | - (vmcs_readl(CR4_READ_SHADOW) & CR4_VME_MASK)); + vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | + (vmcs_readl(CR4_READ_SHADOW) & X86_CR4_VME)); update_exception_bitmap(vcpu); @@ -937,7 +937,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) flags |= IOPL_MASK | X86_EFLAGS_VM; vmcs_writel(GUEST_RFLAGS, flags); - vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | CR4_VME_MASK); + vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); update_exception_bitmap(vcpu); vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4); diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h index d0dc93df411..76ad7933cde 100644 --- a/drivers/kvm/vmx.h +++ b/drivers/kvm/vmx.h @@ -285,8 +285,6 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define CR4_VMXE 0x2000 - #define MSR_IA32_VMX_BASIC 0x480 #define MSR_IA32_FEATURE_CONTROL 0x03a #define MSR_IA32_VMX_PINBASED_CTLS 0x481 -- cgit v1.2.3 From 9eb829ced8c6f2e43a3a644ddf1279ffeee38a33 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 18 Jul 2007 13:05:58 +1000 Subject: KVM: Trivial: Use standard BITMAP macros, open-code userspace-exposed header Creating one's own BITMAP macro seems suboptimal: if we use manual arithmetic in the one place exposed to userspace, we can use standard macros elsewhere. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 3 +-- drivers/kvm/kvm_main.c | 2 +- include/linux/kvm.h | 10 ++-------- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 25439a5968f..cec5f057f3b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -324,8 +324,7 @@ struct kvm_vcpu { int guest_mode; unsigned long requests; unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ -#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) - unsigned long irq_pending[NR_IRQ_WORDS]; + DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */ unsigned long rip; /* needs vcpu_load_rsp_rip() */ diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index af02320012c..fc63de25d9b 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2130,7 +2130,7 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, sizeof vcpu->irq_pending); vcpu->irq_summary = 0; - for (i = 0; i < NR_IRQ_WORDS; ++i) + for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) if (vcpu->irq_pending[i]) __set_bit(i, &vcpu->irq_summary); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 68ecced4011..8db01a91e1a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -12,14 +12,8 @@ #define KVM_API_VERSION 12 -/* - * Architectural interrupt line count, and the size of the bitmap needed - * to hold them. - */ +/* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -#define KVM_IRQ_BITMAP_SIZE_BYTES ((KVM_NR_INTERRUPTS + 7) / 8) -#define KVM_IRQ_BITMAP_SIZE(type) (KVM_IRQ_BITMAP_SIZE_BYTES / sizeof(type)) - /* for KVM_CREATE_MEMORY_REGION */ struct kvm_memory_region { @@ -165,7 +159,7 @@ struct kvm_sregs { __u64 cr0, cr2, cr3, cr4, cr8; __u64 efer; __u64 apic_base; - __u64 interrupt_bitmap[KVM_IRQ_BITMAP_SIZE(__u64)]; + __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; }; struct kvm_msr_entry { -- cgit v1.2.3 From 8fc0d085f521a2a76418f8f569cf1cd27f0e43d4 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Tue, 17 Jul 2007 12:26:59 -0400 Subject: KVM: Set exit_reason to KVM_EXIT_MMIO where run->mmio is initialized. Signed-off-by: Jeff Dike Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 2 +- drivers/kvm/svm.c | 1 - drivers/kvm/vmx.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index fc63de25d9b..193197fb399 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1276,6 +1276,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); if ((r || vcpu->mmio_is_write) && run) { + run->exit_reason = KVM_EXIT_MMIO; run->mmio.phys_addr = vcpu->mmio_phys_addr; memcpy(run->mmio.data, vcpu->mmio_data, 8); run->mmio.len = vcpu->mmio_size; @@ -1937,7 +1938,6 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* * Read-modify-write. Back to userspace. */ - kvm_run->exit_reason = KVM_EXIT_MMIO; r = 0; goto out; } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 5c058fa1c8a..850a1b1d86c 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -928,7 +928,6 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; case EMULATE_DO_MMIO: ++vcpu->stat.mmio_exits; - kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; case EMULATE_FAIL: vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index f3e78187e89..2c4f01bea1f 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1610,7 +1610,6 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; case EMULATE_DO_MMIO: ++vcpu->stat.mmio_exits; - kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; case EMULATE_FAIL: vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); -- cgit v1.2.3 From 7075bc816cfad142da92207ed5a6f3da55b143ef Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Jul 2007 23:37:17 +1000 Subject: KVM: Use standard CR8 flags, and fix TPR definition Intel manual (and KVM definition) say the TPR is 4 bits wide. Also fix CR8_RESEVED_BITS typo. Signed-off-by: Rusty Russell Acked-by: H. Peter Anvin Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 4 ++-- include/asm-x86/processor-flags.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 193197fb399..f0fc8d9e71e 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -92,7 +92,7 @@ static struct dentry *debugfs_dir; | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) -#define CR8_RESEVED_BITS (~0x0fULL) +#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) #define EFER_RESERVED_BITS 0xfffffffffffff2fe #ifdef CONFIG_X86_64 @@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(set_cr3); void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { - if ( cr8 & CR8_RESEVED_BITS) { + if (cr8 & CR8_RESERVED_BITS) { printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); inject_gp(vcpu); return; diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 5404e90edd5..199cab107d8 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -63,7 +63,7 @@ /* * x86-64 Task Priority Register, CR8 */ -#define X86_CR8_TPR 0x00000007 /* task priority register */ +#define X86_CR8_TPR 0x0000000F /* task priority register */ /* * AMD and Transmeta use MSRs for configuration; see -- cgit v1.2.3 From e3243452f4f35ed5f79d575100521bf257504b81 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 20 Jul 2007 12:30:58 +0300 Subject: KVM: x86 emulator: fix cmov for writeback changes The writeback fixes (02c03a326a5df825cc01de426f72e160db2b9538) broke cmov emulation. Fix. Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index db9f9553487..82b4ea62c98 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -1235,40 +1235,40 @@ twobyte_insn: break; case 0x40 ... 0x4f: /* cmov */ dst.val = dst.orig_val = src.val; - d &= ~Mov; /* default to no move */ + no_wb = 1; /* * First, assume we're decoding an even cmov opcode * (lsb == 0). */ switch ((b & 15) >> 1) { case 0: /* cmovo */ - d |= (_eflags & EFLG_OF) ? Mov : 0; + no_wb = (_eflags & EFLG_OF) ? 0 : 1; break; case 1: /* cmovb/cmovc/cmovnae */ - d |= (_eflags & EFLG_CF) ? Mov : 0; + no_wb = (_eflags & EFLG_CF) ? 0 : 1; break; case 2: /* cmovz/cmove */ - d |= (_eflags & EFLG_ZF) ? Mov : 0; + no_wb = (_eflags & EFLG_ZF) ? 0 : 1; break; case 3: /* cmovbe/cmovna */ - d |= (_eflags & (EFLG_CF | EFLG_ZF)) ? Mov : 0; + no_wb = (_eflags & (EFLG_CF | EFLG_ZF)) ? 0 : 1; break; case 4: /* cmovs */ - d |= (_eflags & EFLG_SF) ? Mov : 0; + no_wb = (_eflags & EFLG_SF) ? 0 : 1; break; case 5: /* cmovp/cmovpe */ - d |= (_eflags & EFLG_PF) ? Mov : 0; + no_wb = (_eflags & EFLG_PF) ? 0 : 1; break; case 7: /* cmovle/cmovng */ - d |= (_eflags & EFLG_ZF) ? Mov : 0; + no_wb = (_eflags & EFLG_ZF) ? 0 : 1; /* fall through */ case 6: /* cmovl/cmovnge */ - d |= (!(_eflags & EFLG_SF) != - !(_eflags & EFLG_OF)) ? Mov : 0; + no_wb &= (!(_eflags & EFLG_SF) != + !(_eflags & EFLG_OF)) ? 0 : 1; break; } /* Odd cmov opcodes (lsb == 1) have inverted sense. */ - d ^= (b & 1) ? Mov : 0; + no_wb ^= b & 1; break; case 0xb0 ... 0xb1: /* cmpxchg */ /* -- cgit v1.2.3 From 394b6e5944865a558fe25f0c5903b34c434038ee Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 22 Jul 2007 15:51:58 +0300 Subject: KVM: x86 emulator: fix faulty check for two-byte opcode Right now, the bug is harmless as we never emulate one-byte 0xb6 or 0xb7. But things may change. Noted by the mysterious Gabriel C. Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 82b4ea62c98..ef7518a2d7a 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -819,7 +819,7 @@ done_prefixes: case DstReg: dst.type = OP_REG; if ((d & ByteOp) - && !(twobyte_table && (b == 0xb6 || b == 0xb7))) { + && !(twobyte && (b == 0xb6 || b == 0xb7))) { dst.ptr = decode_register(modrm_reg, _regs, (rex_prefix == 0)); dst.val = *(u8 *) dst.ptr; -- cgit v1.2.3 From 310bc76c2b6829cd280def4927b7ccf8b8c795df Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 23 Jul 2007 17:11:02 +1000 Subject: KVM: Return if the pdptrs are invalid when the guest turns on PAE. Don't fall through and turn on PAE in this case. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index f0fc8d9e71e..093cea36194 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -558,6 +558,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) && !load_pdptrs(vcpu, vcpu->cr3)) { printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); inject_gp(vcpu); + return; } if (cr4 & X86_CR4_VMXE) { -- cgit v1.2.3 From 9ae0448f53324b3c476f68bd134d97ac4ec27e0c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 23 Jul 2007 14:51:32 +0800 Subject: KVM: Hoist kvm_mmu_reload() out of the critical section vmx_cpu_run doesn't handle error correctly and kvm_mmu_reload might sleep with mutex changes, so I move it above. Signed-off-by: Shaohua Li Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 2c4f01bea1f..79674a7a92b 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1999,16 +1999,16 @@ preempted: kvm_guest_debug_pre(vcpu); again: + r = kvm_mmu_reload(vcpu); + if (unlikely(r)) + goto out; + if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); vmx_save_host_state(vcpu); kvm_load_guest_fpu(vcpu); - r = kvm_mmu_reload(vcpu); - if (unlikely(r)) - goto out; - /* * Loading guest fpu may have cleared host cr0.ts */ -- cgit v1.2.3 From fe5518819463d57ed032bc12458ed681bc790609 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 23 Jul 2007 14:51:39 +0800 Subject: KVM: Move gfn_to_page out of kmap/unmap pairs gfn_to_page might sleep with swap support. Move it out of the kmap calls. Signed-off-by: Shaohua Li Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 +- drivers/kvm/kvm_main.c | 7 ++--- drivers/kvm/mmu.c | 2 +- drivers/kvm/paging_tmpl.h | 80 ++++++++++++++++++++++++++++------------------- 4 files changed, 52 insertions(+), 39 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index cec5f057f3b..57504ae93db 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -599,7 +599,7 @@ int kvm_write_guest(struct kvm_vcpu *vcpu, unsigned long segment_base(u16 selector); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *old, const u8 *new, int bytes); + const u8 *new, int bytes); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 093cea36194..80ee427754d 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1076,7 +1076,6 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, { struct page *page; void *virt; - unsigned offset = offset_in_page(gpa); if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) return 0; @@ -1085,7 +1084,7 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, return 0; mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); - kvm_mmu_pte_write(vcpu, gpa, virt + offset, val, bytes); + kvm_mmu_pte_write(vcpu, gpa, val, bytes); memcpy(virt + offset_in_page(gpa), val, bytes); kunmap_atomic(virt, KM_USER0); return 1; @@ -1455,7 +1454,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT); para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT); - para_state = kmap_atomic(para_state_page, KM_USER0); + para_state = kmap(para_state_page); printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version); printk(KERN_DEBUG ".... size: %d\n", para_state->size); @@ -1491,7 +1490,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) para_state->ret = 0; err_kunmap_skip: - kunmap_atomic(para_state, KM_USER0); + kunmap(para_state_page); return 0; err_gp: return 1; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 75faef4fb08..5437de2aa2d 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1124,7 +1124,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *old, const u8 *new, int bytes) + const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; struct kvm_mmu_page *page; diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 01901ec3fe8..660243b39d8 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -58,7 +58,10 @@ struct guest_walker { int level; gfn_t table_gfn[PT_MAX_FULL_LEVELS]; pt_element_t *table; + pt_element_t pte; pt_element_t *ptep; + struct page *page; + int index; pt_element_t inherited_ar; gfn_t gfn; u32 error_code; @@ -80,11 +83,14 @@ static int FNAME(walk_addr)(struct guest_walker *walker, pgprintk("%s: addr %lx\n", __FUNCTION__, addr); walker->level = vcpu->mmu.root_level; walker->table = NULL; + walker->page = NULL; + walker->ptep = NULL; root = vcpu->cr3; #if PTTYPE == 64 if (!is_long_mode(vcpu)) { walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3]; root = *walker->ptep; + walker->pte = root; if (!(root & PT_PRESENT_MASK)) goto not_present; --walker->level; @@ -96,7 +102,8 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walker->level - 1, table_gfn); slot = gfn_to_memslot(vcpu->kvm, table_gfn); hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK); - walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0); + walker->page = pfn_to_page(hpa >> PAGE_SHIFT); + walker->table = kmap_atomic(walker->page, KM_USER0); ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0); @@ -108,6 +115,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, hpa_t paddr; ptep = &walker->table[index]; + walker->index = index; ASSERT(((unsigned long)walker->table & PAGE_MASK) == ((unsigned long)ptep & PAGE_MASK)); @@ -148,16 +156,20 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walker->inherited_ar &= walker->table[index]; table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; - paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK); kunmap_atomic(walker->table, KM_USER0); - walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT), - KM_USER0); + paddr = safe_gpa_to_hpa(vcpu, table_gfn << PAGE_SHIFT); + walker->page = pfn_to_page(paddr >> PAGE_SHIFT); + walker->table = kmap_atomic(walker->page, KM_USER0); --walker->level; walker->table_gfn[walker->level - 1 ] = table_gfn; pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__, walker->level - 1, table_gfn); } - walker->ptep = ptep; + walker->pte = *ptep; + if (walker->page) + walker->ptep = NULL; + if (walker->table) + kunmap_atomic(walker->table, KM_USER0); pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep); return 1; @@ -175,13 +187,9 @@ err: walker->error_code |= PFERR_USER_MASK; if (fetch_fault) walker->error_code |= PFERR_FETCH_MASK; - return 0; -} - -static void FNAME(release_walker)(struct guest_walker *walker) -{ if (walker->table) kunmap_atomic(walker->table, KM_USER0); + return 0; } static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, @@ -193,7 +201,7 @@ static void FNAME(mark_pagetable_dirty)(struct kvm *kvm, static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, u64 *shadow_pte, gpa_t gaddr, - pt_element_t *gpte, + pt_element_t gpte, u64 access_bits, int user_fault, int write_fault, @@ -202,23 +210,34 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, gfn_t gfn) { hpa_t paddr; - int dirty = *gpte & PT_DIRTY_MASK; + int dirty = gpte & PT_DIRTY_MASK; u64 spte = *shadow_pte; int was_rmapped = is_rmap_pte(spte); pgprintk("%s: spte %llx gpte %llx access %llx write_fault %d" " user_fault %d gfn %lx\n", - __FUNCTION__, spte, (u64)*gpte, access_bits, + __FUNCTION__, spte, (u64)gpte, access_bits, write_fault, user_fault, gfn); if (write_fault && !dirty) { - *gpte |= PT_DIRTY_MASK; + pt_element_t *guest_ent, *tmp = NULL; + + if (walker->ptep) + guest_ent = walker->ptep; + else { + tmp = kmap_atomic(walker->page, KM_USER0); + guest_ent = &tmp[walker->index]; + } + + *guest_ent |= PT_DIRTY_MASK; + if (!walker->ptep) + kunmap_atomic(tmp, KM_USER0); dirty = 1; FNAME(mark_pagetable_dirty)(vcpu->kvm, walker); } spte |= PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK; - spte |= *gpte & PT64_NX_MASK; + spte |= gpte & PT64_NX_MASK; if (!dirty) access_bits &= ~PT_WRITABLE_MASK; @@ -273,13 +292,13 @@ unshadowed: rmap_add(vcpu, shadow_pte); } -static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t *gpte, +static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte, u64 *shadow_pte, u64 access_bits, int user_fault, int write_fault, int *ptwrite, struct guest_walker *walker, gfn_t gfn) { - access_bits &= *gpte; - FNAME(set_pte_common)(vcpu, shadow_pte, *gpte & PT_BASE_ADDR_MASK, + access_bits &= gpte; + FNAME(set_pte_common)(vcpu, shadow_pte, gpte & PT_BASE_ADDR_MASK, gpte, access_bits, user_fault, write_fault, ptwrite, walker, gfn); } @@ -295,22 +314,22 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) return; pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte); - FNAME(set_pte)(vcpu, &gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, + FNAME(set_pte)(vcpu, gpte, spte, PT_USER_MASK | PT_WRITABLE_MASK, 0, 0, NULL, NULL, (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT); } -static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t *gpde, +static void FNAME(set_pde)(struct kvm_vcpu *vcpu, pt_element_t gpde, u64 *shadow_pte, u64 access_bits, int user_fault, int write_fault, int *ptwrite, struct guest_walker *walker, gfn_t gfn) { gpa_t gaddr; - access_bits &= *gpde; + access_bits &= gpde; gaddr = (gpa_t)gfn << PAGE_SHIFT; if (PTTYPE == 32 && is_cpuid_PSE36()) - gaddr |= (*gpde & PT32_DIR_PSE36_MASK) << + gaddr |= (gpde & PT32_DIR_PSE36_MASK) << (32 - PT32_DIR_PSE36_SHIFT); FNAME(set_pte_common)(vcpu, shadow_pte, gaddr, gpde, access_bits, user_fault, write_fault, @@ -328,9 +347,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, int level; u64 *shadow_ent; u64 *prev_shadow_ent = NULL; - pt_element_t *guest_ent = walker->ptep; - if (!is_present_pte(*guest_ent)) + if (!is_present_pte(walker->pte)) return NULL; shadow_addr = vcpu->mmu.root_hpa; @@ -364,12 +382,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (level - 1 == PT_PAGE_TABLE_LEVEL && walker->level == PT_DIRECTORY_LEVEL) { metaphysical = 1; - hugepage_access = *guest_ent; + hugepage_access = walker->pte; hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; - if (*guest_ent & PT64_NX_MASK) + if (walker->pte & PT64_NX_MASK) hugepage_access |= (1 << 2); hugepage_access >>= PT_WRITABLE_SHIFT; - table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) + table_gfn = (walker->pte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; } else { metaphysical = 0; @@ -386,12 +404,12 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, } if (walker->level == PT_DIRECTORY_LEVEL) { - FNAME(set_pde)(vcpu, guest_ent, shadow_ent, + FNAME(set_pde)(vcpu, walker->pte, shadow_ent, walker->inherited_ar, user_fault, write_fault, ptwrite, walker, walker->gfn); } else { ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); - FNAME(set_pte)(vcpu, guest_ent, shadow_ent, + FNAME(set_pte)(vcpu, walker->pte, shadow_ent, walker->inherited_ar, user_fault, write_fault, ptwrite, walker, walker->gfn); } @@ -442,7 +460,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (!r) { pgprintk("%s: guest page fault\n", __FUNCTION__); inject_page_fault(vcpu, addr, walker.error_code); - FNAME(release_walker)(&walker); vcpu->last_pt_write_count = 0; /* reset fork detector */ return 0; } @@ -452,8 +469,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__, shadow_pte, *shadow_pte, write_pt); - FNAME(release_walker)(&walker); - if (!write_pt) vcpu->last_pt_write_count = 0; /* reset fork detector */ @@ -482,7 +497,6 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) gpa |= vaddr & ~PAGE_MASK; } - FNAME(release_walker)(&walker); return gpa; } -- cgit v1.2.3 From 62b3ffb8b357a791491726cff8d395027e5245b7 Mon Sep 17 00:00:00 2001 From: "Yang, Sheng" Date: Wed, 25 Jul 2007 12:17:06 +0300 Subject: KVM: VMX: Import some constants of vmcs from IA32 SDM This patch mainly imports some constants and rename two exist constants of vmcs according to IA32 SDM. It also adds two constants to indicate Lock bit and Enable bit in MSR_IA32_FEATURE_CONTROL, and replace the hardcode _5_ with these two bits. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 16 +++++++++---- drivers/kvm/vmx.h | 69 ++++++++++++++++++++++++++++++++++--------------------- 2 files changed, 55 insertions(+), 30 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 79674a7a92b..dac2f93d1a0 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -751,7 +751,10 @@ static __init int vmx_disabled_by_bios(void) u64 msr; rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - return (msr & 5) == 1; /* locked but not enabled */ + return (msr & (MSR_IA32_FEATURE_CONTROL_LOCKED | + MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) + == MSR_IA32_FEATURE_CONTROL_LOCKED; + /* locked but not enabled */ } static void hardware_enable(void *garbage) @@ -761,9 +764,14 @@ static void hardware_enable(void *garbage) u64 old; rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - if ((old & 5) != 5) + if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | + MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) + != (MSR_IA32_FEATURE_CONTROL_LOCKED | + MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | 5); + wrmsrl(MSR_IA32_FEATURE_CONTROL, old | + MSR_IA32_FEATURE_CONTROL_LOCKED | + MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) : "memory", "cc"); @@ -1326,7 +1334,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) CPU_BASED_HLT_EXITING /* 20.6.2 */ | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ - | CPU_BASED_ACTIVATE_IO_BITMAP /* 20.6.2 */ + | CPU_BASED_USE_IO_BITMAPS /* 20.6.2 */ | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ ); diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h index 76ad7933cde..7e4dc1208dd 100644 --- a/drivers/kvm/vmx.h +++ b/drivers/kvm/vmx.h @@ -25,29 +25,36 @@ * */ -#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 -#define CPU_BASED_HLT_EXITING 0x00000080 -#define CPU_BASED_INVDPG_EXITING 0x00000200 -#define CPU_BASED_MWAIT_EXITING 0x00000400 -#define CPU_BASED_RDPMC_EXITING 0x00000800 -#define CPU_BASED_RDTSC_EXITING 0x00001000 -#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 -#define CPU_BASED_CR8_STORE_EXITING 0x00100000 -#define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_MOV_DR_EXITING 0x00800000 -#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 -#define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000 -#define CPU_BASED_MSR_BITMAPS 0x10000000 -#define CPU_BASED_MONITOR_EXITING 0x20000000 -#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 -#define PIN_BASED_EXT_INTR_MASK 0x1 -#define PIN_BASED_NMI_EXITING 0x8 +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 -#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 -#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 + +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 /* VMCS Encodings */ enum vmcs_field { @@ -285,11 +292,21 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define MSR_IA32_VMX_BASIC 0x480 -#define MSR_IA32_FEATURE_CONTROL 0x03a -#define MSR_IA32_VMX_PINBASED_CTLS 0x481 -#define MSR_IA32_VMX_PROCBASED_CTLS 0x482 -#define MSR_IA32_VMX_EXIT_CTLS 0x483 -#define MSR_IA32_VMX_ENTRY_CTLS 0x484 +#define MSR_IA32_VMX_BASIC 0x480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x482 +#define MSR_IA32_VMX_EXIT_CTLS 0x483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x484 +#define MSR_IA32_VMX_MISC 0x485 +#define MSR_IA32_VMX_CR0_FIXED0 0x486 +#define MSR_IA32_VMX_CR0_FIXED1 0x487 +#define MSR_IA32_VMX_CR4_FIXED0 0x488 +#define MSR_IA32_VMX_CR4_FIXED1 0x489 +#define MSR_IA32_VMX_VMCS_ENUM 0x48a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b + +#define MSR_IA32_FEATURE_CONTROL 0x3a +#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 +#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 #endif -- cgit v1.2.3 From 3ccb8827fb3bd389ed15320da83543d016a94822 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Wed, 25 Jul 2007 11:41:57 +0200 Subject: KVM: Remove dead code in the cmpxchg instruction emulation The writeback fixes (02c03a326a5df825cc01de426f72e160db2b9538) let some dead code in the cmpxchg instruction emulation. Remove it. Signed-off-by: Aurelien Jarno Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index ef7518a2d7a..2136da5d697 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -1278,8 +1278,6 @@ twobyte_insn: src.orig_val = src.val; src.val = _regs[VCPU_REGS_RAX]; emulate_2op_SrcV("cmp", src, dst, _eflags); - /* Always write back. The question is: where to? */ - d |= Mov; if (_eflags & EFLG_ZF) { /* Success: write back to memory. */ dst.val = src.orig_val; -- cgit v1.2.3 From c820c2aa27bb5b6069aa708b0a0b44b59a16bfa7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 25 Jul 2007 13:29:51 +1000 Subject: KVM: load_pdptrs() cleanups load_pdptrs can be handed an invalid cr3, and it should not oops. This can happen because we injected #gp in set_cr3() after we set vcpu->cr3 to the invalid value, or from kvm_vcpu_ioctl_set_sregs(), or memory configuration changes after the guest did set_cr3(). We should also copy the pdpte array once, before checking and assigning, otherwise an SMP guest can potentially alter the values between the check and the set. Finally one nitpick: ret = 1 should be done as late as possible: this allows GCC to check for unset "ret" should the function change in future. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 80ee427754d..65c9a31f1d9 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -442,30 +442,32 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2; int i; - u64 pdpte; u64 *pdpt; int ret; struct page *page; + u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; spin_lock(&vcpu->kvm->lock); page = gfn_to_page(vcpu->kvm, pdpt_gfn); - /* FIXME: !page - emulate? 0xff? */ + if (!page) { + ret = 0; + goto out; + } + pdpt = kmap_atomic(page, KM_USER0); + memcpy(pdpte, pdpt+offset, sizeof(pdpte)); + kunmap_atomic(pdpt, KM_USER0); - ret = 1; - for (i = 0; i < 4; ++i) { - pdpte = pdpt[offset + i]; - if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) { + for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { + if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { ret = 0; goto out; } } + ret = 1; - for (i = 0; i < 4; ++i) - vcpu->pdptrs[i] = pdpt[offset + i]; - + memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs)); out: - kunmap_atomic(pdpt, KM_USER0); spin_unlock(&vcpu->kvm->lock); return ret; -- cgit v1.2.3 From a2fa3e9f52d875f7d4ca98434603b8756be71ba8 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Fri, 27 Jul 2007 08:13:10 -0400 Subject: KVM: Remove arch specific components from the general code struct kvm_vcpu has vmx-specific members; remove them to a private structure. Signed-off-by: Gregory Haskins Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 31 +--- drivers/kvm/kvm_main.c | 26 +--- drivers/kvm/kvm_svm.h | 3 + drivers/kvm/svm.c | 394 ++++++++++++++++++++++++++++--------------------- drivers/kvm/vmx.c | 249 +++++++++++++++++++------------ 5 files changed, 397 insertions(+), 306 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 57504ae93db..954a1408960 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -15,7 +15,6 @@ #include #include -#include "vmx.h" #include #include @@ -140,14 +139,6 @@ struct kvm_mmu_page { }; }; -struct vmcs { - u32 revision_id; - u32 abort; - char data[0]; -}; - -#define vmx_msr_entry kvm_msr_entry - struct kvm_vcpu; /* @@ -309,15 +300,12 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev); struct kvm_vcpu { + int valid; struct kvm *kvm; int vcpu_id; - union { - struct vmcs *vmcs; - struct vcpu_svm *svm; - }; + void *_priv; struct mutex mutex; int cpu; - int launched; u64 host_tsc; struct kvm_run *run; int interrupt_window_open; @@ -340,14 +328,6 @@ struct kvm_vcpu { u64 shadow_efer; u64 apic_base; u64 ia32_misc_enable_msr; - int nmsrs; - int save_nmsrs; - int msr_offset_efer; -#ifdef CONFIG_X86_64 - int msr_offset_kernel_gs_base; -#endif - struct vmx_msr_entry *guest_msrs; - struct vmx_msr_entry *host_msrs; struct kvm_mmu mmu; @@ -366,11 +346,6 @@ struct kvm_vcpu { char *guest_fx_image; int fpu_active; int guest_fpu_loaded; - struct vmx_host_state { - int loaded; - u16 fs_sel, gs_sel, ldt_sel; - int fs_gs_ldt_reload_needed; - } vmx_host_state; int mmio_needed; int mmio_read_completed; @@ -579,8 +554,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); void fx_init(struct kvm_vcpu *vcpu); -void load_msrs(struct vmx_msr_entry *e, int n); -void save_msrs(struct vmx_msr_entry *e, int n); void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 65c9a31f1d9..bf8b8f03019 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -367,7 +367,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - if (!vcpu->vmcs) + if (!vcpu->valid) return; vcpu_load(vcpu); @@ -377,7 +377,7 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { - if (!vcpu->vmcs) + if (!vcpu->valid) return; vcpu_load(vcpu); @@ -1645,24 +1645,6 @@ void kvm_resched(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_resched); -void load_msrs(struct vmx_msr_entry *e, int n) -{ - int i; - - for (i = 0; i < n; ++i) - wrmsrl(e[i].index, e[i].data); -} -EXPORT_SYMBOL_GPL(load_msrs); - -void save_msrs(struct vmx_msr_entry *e, int n) -{ - int i; - - for (i = 0; i < n; ++i) - rdmsrl(e[i].index, e[i].data); -} -EXPORT_SYMBOL_GPL(save_msrs); - void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { int i; @@ -2401,7 +2383,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) mutex_lock(&vcpu->mutex); - if (vcpu->vmcs) { + if (vcpu->valid) { mutex_unlock(&vcpu->mutex); return -EEXIST; } @@ -2449,6 +2431,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) kvm->nvcpus = n + 1; spin_unlock(&kvm_lock); + vcpu->valid = 1; + return r; out_free_vcpus: diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h index a869983d683..82e5d77acbb 100644 --- a/drivers/kvm/kvm_svm.h +++ b/drivers/kvm/kvm_svm.h @@ -20,7 +20,10 @@ static const u32 host_save_user_msrs[] = { #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) #define NUM_DB_REGS 4 +struct kvm_vcpu; + struct vcpu_svm { + struct kvm_vcpu *vcpu; struct vmcb *vmcb; unsigned long vmcb_pa; struct svm_cpu_data *svm_data; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 850a1b1d86c..32481876d98 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -49,6 +49,11 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_LBRV (1 << 1) #define SVM_DEATURE_SVML (1 << 2) +static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) +{ + return (struct vcpu_svm*)vcpu->_priv; +} + unsigned long iopm_base; unsigned long msrpm_base; @@ -95,7 +100,7 @@ static inline u32 svm_has(u32 feat) static unsigned get_addr_size(struct kvm_vcpu *vcpu) { - struct vmcb_save_area *sa = &vcpu->svm->vmcb->save; + struct vmcb_save_area *sa = &to_svm(vcpu)->vmcb->save; u16 cs_attrib; if (!(sa->cr0 & X86_CR0_PE) || (sa->rflags & X86_EFLAGS_VM)) @@ -181,7 +186,7 @@ static inline void write_dr7(unsigned long val) static inline void force_new_asid(struct kvm_vcpu *vcpu) { - vcpu->svm->asid_generation--; + to_svm(vcpu)->asid_generation--; } static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) @@ -194,22 +199,24 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (!(efer & KVM_EFER_LMA)) efer &= ~KVM_EFER_LME; - vcpu->svm->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; + to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; vcpu->shadow_efer = efer; } static void svm_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) { - vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_VALID_ERR | SVM_EVTINJ_TYPE_EXEPT | GP_VECTOR; - vcpu->svm->vmcb->control.event_inj_err = error_code; + svm->vmcb->control.event_inj_err = error_code; } static void inject_ud(struct kvm_vcpu *vcpu) { - vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | + to_svm(vcpu)->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT | UD_VECTOR; } @@ -228,19 +235,21 @@ static int is_external_interrupt(u32 info) static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { - if (!vcpu->svm->next_rip) { + struct vcpu_svm *svm = to_svm(vcpu); + + if (!svm->next_rip) { printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); return; } - if (vcpu->svm->next_rip - vcpu->svm->vmcb->save.rip > 15) { + if (svm->next_rip - svm->vmcb->save.rip > 15) { printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", __FUNCTION__, - vcpu->svm->vmcb->save.rip, - vcpu->svm->next_rip); + svm->vmcb->save.rip, + svm->next_rip); } - vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip; - vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; + vcpu->rip = svm->vmcb->save.rip = svm->next_rip; + svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; vcpu->interrupt_window_open = 1; } @@ -569,23 +578,27 @@ static void init_vmcb(struct vmcb *vmcb) static int svm_create_vcpu(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm; struct page *page; int r; r = -ENOMEM; - vcpu->svm = kzalloc(sizeof *vcpu->svm, GFP_KERNEL); - if (!vcpu->svm) + svm = kzalloc(sizeof *svm, GFP_KERNEL); + if (!svm) goto out1; page = alloc_page(GFP_KERNEL); if (!page) goto out2; - vcpu->svm->vmcb = page_address(page); - clear_page(vcpu->svm->vmcb); - vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; - vcpu->svm->asid_generation = 0; - memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); - init_vmcb(vcpu->svm->vmcb); + svm->vmcb = page_address(page); + clear_page(svm->vmcb); + svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; + svm->asid_generation = 0; + memset(svm->db_regs, 0, sizeof(svm->db_regs)); + init_vmcb(svm->vmcb); + + svm->vcpu = vcpu; + vcpu->_priv = svm; fx_init(vcpu); vcpu->fpu_active = 1; @@ -596,22 +609,26 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) return 0; out2: - kfree(vcpu->svm); + kfree(svm); out1: return r; } static void svm_free_vcpu(struct kvm_vcpu *vcpu) { - if (!vcpu->svm) + struct vcpu_svm *svm = to_svm(vcpu); + + if (!svm) return; - if (vcpu->svm->vmcb) - __free_page(pfn_to_page(vcpu->svm->vmcb_pa >> PAGE_SHIFT)); - kfree(vcpu->svm); + if (svm->vmcb) + __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); + kfree(svm); + vcpu->_priv = NULL; } static void svm_vcpu_load(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); int cpu, i; cpu = get_cpu(); @@ -624,20 +641,21 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu) */ rdtscll(tsc_this); delta = vcpu->host_tsc - tsc_this; - vcpu->svm->vmcb->control.tsc_offset += delta; + svm->vmcb->control.tsc_offset += delta; vcpu->cpu = cpu; } for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) - rdmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); + rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); int i; for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) - wrmsrl(host_save_user_msrs[i], vcpu->svm->host_user_msrs[i]); + wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); rdtscll(vcpu->host_tsc); put_cpu(); @@ -649,31 +667,34 @@ static void svm_vcpu_decache(struct kvm_vcpu *vcpu) static void svm_cache_regs(struct kvm_vcpu *vcpu) { - vcpu->regs[VCPU_REGS_RAX] = vcpu->svm->vmcb->save.rax; - vcpu->regs[VCPU_REGS_RSP] = vcpu->svm->vmcb->save.rsp; - vcpu->rip = vcpu->svm->vmcb->save.rip; + struct vcpu_svm *svm = to_svm(vcpu); + + vcpu->regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; + vcpu->regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; + vcpu->rip = svm->vmcb->save.rip; } static void svm_decache_regs(struct kvm_vcpu *vcpu) { - vcpu->svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX]; - vcpu->svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP]; - vcpu->svm->vmcb->save.rip = vcpu->rip; + struct vcpu_svm *svm = to_svm(vcpu); + svm->vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX]; + svm->vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP]; + svm->vmcb->save.rip = vcpu->rip; } static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) { - return vcpu->svm->vmcb->save.rflags; + return to_svm(vcpu)->vmcb->save.rflags; } static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { - vcpu->svm->vmcb->save.rflags = rflags; + to_svm(vcpu)->vmcb->save.rflags = rflags; } static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) { - struct vmcb_save_area *save = &vcpu->svm->vmcb->save; + struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; switch (seg) { case VCPU_SREG_CS: return &save->cs; @@ -725,26 +746,34 @@ static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - dt->limit = vcpu->svm->vmcb->save.idtr.limit; - dt->base = vcpu->svm->vmcb->save.idtr.base; + struct vcpu_svm *svm = to_svm(vcpu); + + dt->limit = svm->vmcb->save.idtr.limit; + dt->base = svm->vmcb->save.idtr.base; } static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - vcpu->svm->vmcb->save.idtr.limit = dt->limit; - vcpu->svm->vmcb->save.idtr.base = dt->base ; + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->save.idtr.limit = dt->limit; + svm->vmcb->save.idtr.base = dt->base ; } static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - dt->limit = vcpu->svm->vmcb->save.gdtr.limit; - dt->base = vcpu->svm->vmcb->save.gdtr.base; + struct vcpu_svm *svm = to_svm(vcpu); + + dt->limit = svm->vmcb->save.gdtr.limit; + dt->base = svm->vmcb->save.gdtr.base; } static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - vcpu->svm->vmcb->save.gdtr.limit = dt->limit; - vcpu->svm->vmcb->save.gdtr.base = dt->base ; + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->save.gdtr.limit = dt->limit; + svm->vmcb->save.gdtr.base = dt->base ; } static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) @@ -753,39 +782,42 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { + struct vcpu_svm *svm = to_svm(vcpu); + #ifdef CONFIG_X86_64 if (vcpu->shadow_efer & KVM_EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->shadow_efer |= KVM_EFER_LMA; - vcpu->svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME; + svm->vmcb->save.efer |= KVM_EFER_LMA | KVM_EFER_LME; } if (is_paging(vcpu) && !(cr0 & X86_CR0_PG) ) { vcpu->shadow_efer &= ~KVM_EFER_LMA; - vcpu->svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME); + svm->vmcb->save.efer &= ~(KVM_EFER_LMA | KVM_EFER_LME); } } #endif if ((vcpu->cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { - vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); + svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); vcpu->fpu_active = 1; } vcpu->cr0 = cr0; cr0 |= X86_CR0_PG | X86_CR0_WP; cr0 &= ~(X86_CR0_CD | X86_CR0_NW); - vcpu->svm->vmcb->save.cr0 = cr0; + svm->vmcb->save.cr0 = cr0; } static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { vcpu->cr4 = cr4; - vcpu->svm->vmcb->save.cr4 = cr4 | X86_CR4_PAE; + to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; } static void svm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { + struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_seg *s = svm_seg(vcpu, seg); s->base = var->base; @@ -804,16 +836,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; } if (seg == VCPU_SREG_CS) - vcpu->svm->vmcb->save.cpl - = (vcpu->svm->vmcb->save.cs.attrib + svm->vmcb->save.cpl + = (svm->vmcb->save.cs.attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; } /* FIXME: - vcpu->svm->vmcb->control.int_ctl &= ~V_TPR_MASK; - vcpu->svm->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); + svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK; + svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK); */ @@ -825,55 +857,59 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) static void load_host_msrs(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); + wrmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); #endif } static void save_host_msrs(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 - rdmsrl(MSR_GS_BASE, vcpu->svm->host_gs_base); + rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host_gs_base); #endif } static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) { + struct vcpu_svm *svm = to_svm(vcpu); + if (svm_data->next_asid > svm_data->max_asid) { ++svm_data->asid_generation; svm_data->next_asid = 1; - vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; + svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; } vcpu->cpu = svm_data->cpu; - vcpu->svm->asid_generation = svm_data->asid_generation; - vcpu->svm->vmcb->control.asid = svm_data->next_asid++; + svm->asid_generation = svm_data->asid_generation; + svm->vmcb->control.asid = svm_data->next_asid++; } static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address) { - invlpga(address, vcpu->svm->vmcb->control.asid); // is needed? + invlpga(address, to_svm(vcpu)->vmcb->control.asid); // is needed? } static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) { - return vcpu->svm->db_regs[dr]; + return to_svm(vcpu)->db_regs[dr]; } static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, int *exception) { + struct vcpu_svm *svm = to_svm(vcpu); + *exception = 0; - if (vcpu->svm->vmcb->save.dr7 & DR7_GD_MASK) { - vcpu->svm->vmcb->save.dr7 &= ~DR7_GD_MASK; - vcpu->svm->vmcb->save.dr6 |= DR6_BD_MASK; + if (svm->vmcb->save.dr7 & DR7_GD_MASK) { + svm->vmcb->save.dr7 &= ~DR7_GD_MASK; + svm->vmcb->save.dr6 |= DR6_BD_MASK; *exception = DB_VECTOR; return; } switch (dr) { case 0 ... 3: - vcpu->svm->db_regs[dr] = value; + svm->db_regs[dr] = value; return; case 4 ... 5: if (vcpu->cr4 & X86_CR4_DE) { @@ -885,7 +921,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, *exception = GP_VECTOR; return; } - vcpu->svm->vmcb->save.dr7 = value; + svm->vmcb->save.dr7 = value; return; } default: @@ -898,7 +934,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u32 exit_int_info = vcpu->svm->vmcb->control.exit_int_info; + struct vcpu_svm *svm = to_svm(vcpu); + u32 exit_int_info = svm->vmcb->control.exit_int_info; u64 fault_address; u32 error_code; enum emulation_result er; @@ -909,8 +946,8 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) spin_lock(&vcpu->kvm->lock); - fault_address = vcpu->svm->vmcb->control.exit_info_2; - error_code = vcpu->svm->vmcb->control.exit_info_1; + fault_address = svm->vmcb->control.exit_info_2; + error_code = svm->vmcb->control.exit_info_1; r = kvm_mmu_page_fault(vcpu, fault_address, error_code); if (r < 0) { spin_unlock(&vcpu->kvm->lock); @@ -942,22 +979,25 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); - if (!(vcpu->cr0 & X86_CR0_TS)) - vcpu->svm->vmcb->save.cr0 &= ~X86_CR0_TS; - vcpu->fpu_active = 1; + struct vcpu_svm *svm = to_svm(vcpu); - return 1; + svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); + if (!(vcpu->cr0 & X86_CR0_TS)) + svm->vmcb->save.cr0 &= ~X86_CR0_TS; + vcpu->fpu_active = 1; + + return 1; } static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_svm *svm = to_svm(vcpu); /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. */ - clear_page(vcpu->svm->vmcb); - init_vmcb(vcpu->svm->vmcb); + clear_page(svm->vmcb); + init_vmcb(svm->vmcb); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; @@ -967,23 +1007,24 @@ static int io_get_override(struct kvm_vcpu *vcpu, struct vmcb_seg **seg, int *addr_override) { + struct vcpu_svm *svm = to_svm(vcpu); u8 inst[MAX_INST_SIZE]; unsigned ins_length; gva_t rip; int i; - rip = vcpu->svm->vmcb->save.rip; - ins_length = vcpu->svm->next_rip - rip; - rip += vcpu->svm->vmcb->save.cs.base; + rip = svm->vmcb->save.rip; + ins_length = svm->next_rip - rip; + rip += svm->vmcb->save.cs.base; if (ins_length > MAX_INST_SIZE) printk(KERN_DEBUG "%s: inst length err, cs base 0x%llx rip 0x%llx " "next rip 0x%llx ins_length %u\n", __FUNCTION__, - vcpu->svm->vmcb->save.cs.base, - vcpu->svm->vmcb->save.rip, - vcpu->svm->vmcb->control.exit_info_2, + svm->vmcb->save.cs.base, + svm->vmcb->save.rip, + svm->vmcb->control.exit_info_2, ins_length); if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length) @@ -1003,22 +1044,22 @@ static int io_get_override(struct kvm_vcpu *vcpu, *addr_override = 1; continue; case 0x2e: - *seg = &vcpu->svm->vmcb->save.cs; + *seg = &svm->vmcb->save.cs; continue; case 0x36: - *seg = &vcpu->svm->vmcb->save.ss; + *seg = &svm->vmcb->save.ss; continue; case 0x3e: - *seg = &vcpu->svm->vmcb->save.ds; + *seg = &svm->vmcb->save.ds; continue; case 0x26: - *seg = &vcpu->svm->vmcb->save.es; + *seg = &svm->vmcb->save.es; continue; case 0x64: - *seg = &vcpu->svm->vmcb->save.fs; + *seg = &svm->vmcb->save.fs; continue; case 0x65: - *seg = &vcpu->svm->vmcb->save.gs; + *seg = &svm->vmcb->save.gs; continue; default: return 1; @@ -1033,7 +1074,8 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) unsigned long *reg; struct vmcb_seg *seg; int addr_override; - struct vmcb_save_area *save_area = &vcpu->svm->vmcb->save; + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_save_area *save_area = &svm->vmcb->save; u16 cs_attrib = save_area->cs.attrib; unsigned addr_size = get_addr_size(vcpu); @@ -1045,16 +1087,16 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) if (ins) { reg = &vcpu->regs[VCPU_REGS_RDI]; - seg = &vcpu->svm->vmcb->save.es; + seg = &svm->vmcb->save.es; } else { reg = &vcpu->regs[VCPU_REGS_RSI]; - seg = (seg) ? seg : &vcpu->svm->vmcb->save.ds; + seg = (seg) ? seg : &svm->vmcb->save.ds; } addr_mask = ~0ULL >> (64 - (addr_size * 8)); if ((cs_attrib & SVM_SELECTOR_L_MASK) && - !(vcpu->svm->vmcb->save.rflags & X86_EFLAGS_VM)) { + !(svm->vmcb->save.rflags & X86_EFLAGS_VM)) { *address = (*reg & addr_mask); return addr_mask; } @@ -1070,7 +1112,8 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? + struct vcpu_svm *svm = to_svm(vcpu); + u32 io_info = svm->vmcb->control.exit_info_1; //address size bug? int size, down, in, string, rep; unsigned port; unsigned long count; @@ -1078,7 +1121,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ++vcpu->stat.io_exits; - vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; + svm->next_rip = svm->vmcb->control.exit_info_2; in = (io_info & SVM_IOIO_TYPE_MASK) != 0; port = io_info >> 16; @@ -1086,7 +1129,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) string = (io_info & SVM_IOIO_STR_MASK) != 0; rep = (io_info & SVM_IOIO_REP_MASK) != 0; count = 1; - down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; + down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; if (string) { unsigned addr_mask; @@ -1112,14 +1155,18 @@ static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; + struct vcpu_svm *svm = to_svm(vcpu); + + svm->next_rip = svm->vmcb->save.rip + 1; skip_emulated_instruction(vcpu); return kvm_emulate_halt(vcpu); } static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 3; + struct vcpu_svm *svm = to_svm(vcpu); + + svm->next_rip = svm->vmcb->save.rip + 3; skip_emulated_instruction(vcpu); return kvm_hypercall(vcpu, kvm_run); } @@ -1139,7 +1186,9 @@ static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_r static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; + struct vcpu_svm *svm = to_svm(vcpu); + + svm->next_rip = svm->vmcb->save.rip + 2; kvm_emulate_cpuid(vcpu); return 1; } @@ -1153,39 +1202,41 @@ static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_ru static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) { + struct vcpu_svm *svm = to_svm(vcpu); + switch (ecx) { case MSR_IA32_TIME_STAMP_COUNTER: { u64 tsc; rdtscll(tsc); - *data = vcpu->svm->vmcb->control.tsc_offset + tsc; + *data = svm->vmcb->control.tsc_offset + tsc; break; } case MSR_K6_STAR: - *data = vcpu->svm->vmcb->save.star; + *data = svm->vmcb->save.star; break; #ifdef CONFIG_X86_64 case MSR_LSTAR: - *data = vcpu->svm->vmcb->save.lstar; + *data = svm->vmcb->save.lstar; break; case MSR_CSTAR: - *data = vcpu->svm->vmcb->save.cstar; + *data = svm->vmcb->save.cstar; break; case MSR_KERNEL_GS_BASE: - *data = vcpu->svm->vmcb->save.kernel_gs_base; + *data = svm->vmcb->save.kernel_gs_base; break; case MSR_SYSCALL_MASK: - *data = vcpu->svm->vmcb->save.sfmask; + *data = svm->vmcb->save.sfmask; break; #endif case MSR_IA32_SYSENTER_CS: - *data = vcpu->svm->vmcb->save.sysenter_cs; + *data = svm->vmcb->save.sysenter_cs; break; case MSR_IA32_SYSENTER_EIP: - *data = vcpu->svm->vmcb->save.sysenter_eip; + *data = svm->vmcb->save.sysenter_eip; break; case MSR_IA32_SYSENTER_ESP: - *data = vcpu->svm->vmcb->save.sysenter_esp; + *data = svm->vmcb->save.sysenter_esp; break; default: return kvm_get_msr_common(vcpu, ecx, data); @@ -1195,15 +1246,16 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_svm *svm = to_svm(vcpu); u32 ecx = vcpu->regs[VCPU_REGS_RCX]; u64 data; if (svm_get_msr(vcpu, ecx, &data)) svm_inject_gp(vcpu, 0); else { - vcpu->svm->vmcb->save.rax = data & 0xffffffff; + svm->vmcb->save.rax = data & 0xffffffff; vcpu->regs[VCPU_REGS_RDX] = data >> 32; - vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; + svm->next_rip = svm->vmcb->save.rip + 2; skip_emulated_instruction(vcpu); } return 1; @@ -1211,39 +1263,41 @@ static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) { + struct vcpu_svm *svm = to_svm(vcpu); + switch (ecx) { case MSR_IA32_TIME_STAMP_COUNTER: { u64 tsc; rdtscll(tsc); - vcpu->svm->vmcb->control.tsc_offset = data - tsc; + svm->vmcb->control.tsc_offset = data - tsc; break; } case MSR_K6_STAR: - vcpu->svm->vmcb->save.star = data; + svm->vmcb->save.star = data; break; #ifdef CONFIG_X86_64 case MSR_LSTAR: - vcpu->svm->vmcb->save.lstar = data; + svm->vmcb->save.lstar = data; break; case MSR_CSTAR: - vcpu->svm->vmcb->save.cstar = data; + svm->vmcb->save.cstar = data; break; case MSR_KERNEL_GS_BASE: - vcpu->svm->vmcb->save.kernel_gs_base = data; + svm->vmcb->save.kernel_gs_base = data; break; case MSR_SYSCALL_MASK: - vcpu->svm->vmcb->save.sfmask = data; + svm->vmcb->save.sfmask = data; break; #endif case MSR_IA32_SYSENTER_CS: - vcpu->svm->vmcb->save.sysenter_cs = data; + svm->vmcb->save.sysenter_cs = data; break; case MSR_IA32_SYSENTER_EIP: - vcpu->svm->vmcb->save.sysenter_eip = data; + svm->vmcb->save.sysenter_eip = data; break; case MSR_IA32_SYSENTER_ESP: - vcpu->svm->vmcb->save.sysenter_esp = data; + svm->vmcb->save.sysenter_esp = data; break; default: return kvm_set_msr_common(vcpu, ecx, data); @@ -1253,10 +1307,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_svm *svm = to_svm(vcpu); u32 ecx = vcpu->regs[VCPU_REGS_RCX]; - u64 data = (vcpu->svm->vmcb->save.rax & -1u) + u64 data = (svm->vmcb->save.rax & -1u) | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32); - vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 2; + svm->next_rip = svm->vmcb->save.rip + 2; if (svm_set_msr(vcpu, ecx, data)) svm_inject_gp(vcpu, 0); else @@ -1266,7 +1321,7 @@ static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - if (vcpu->svm->vmcb->control.exit_info_1) + if (to_svm(vcpu)->vmcb->control.exit_info_1) return wrmsr_interception(vcpu, kvm_run); else return rdmsr_interception(vcpu, kvm_run); @@ -1338,13 +1393,14 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u32 exit_code = vcpu->svm->vmcb->control.exit_code; + struct vcpu_svm *svm = to_svm(vcpu); + u32 exit_code = svm->vmcb->control.exit_code; - if (is_external_interrupt(vcpu->svm->vmcb->control.exit_int_info) && + if (is_external_interrupt(svm->vmcb->control.exit_int_info) && exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " "exit_code 0x%x\n", - __FUNCTION__, vcpu->svm->vmcb->control.exit_int_info, + __FUNCTION__, svm->vmcb->control.exit_int_info, exit_code); if (exit_code >= ARRAY_SIZE(svm_exit_handlers) @@ -1368,13 +1424,14 @@ static void reload_tss(struct kvm_vcpu *vcpu) static void pre_svm_run(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); int cpu = raw_smp_processor_id(); struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); - vcpu->svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; + svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; if (vcpu->cpu != cpu || - vcpu->svm->asid_generation != svm_data->asid_generation) + svm->asid_generation != svm_data->asid_generation) new_asid(vcpu, svm_data); } @@ -1383,7 +1440,7 @@ static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { struct vmcb_control_area *control; - control = &vcpu->svm->vmcb->control; + control = &to_svm(vcpu)->vmcb->control; control->int_vector = pop_irq(vcpu); control->int_ctl &= ~V_INTR_PRIO_MASK; control->int_ctl |= V_IRQ_MASK | @@ -1392,7 +1449,7 @@ static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) static void kvm_reput_irq(struct kvm_vcpu *vcpu) { - struct vmcb_control_area *control = &vcpu->svm->vmcb->control; + struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; if (control->int_ctl & V_IRQ_MASK) { control->int_ctl &= ~V_IRQ_MASK; @@ -1406,11 +1463,12 @@ static void kvm_reput_irq(struct kvm_vcpu *vcpu) static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct vmcb_control_area *control = &vcpu->svm->vmcb->control; + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; vcpu->interrupt_window_open = (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && - (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); + (svm->vmcb->save.rflags & X86_EFLAGS_IF)); if (vcpu->interrupt_window_open && vcpu->irq_summary) /* @@ -1431,9 +1489,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu, static void post_kvm_run_save(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_svm *svm = to_svm(vcpu); + kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && vcpu->irq_summary == 0); - kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; + kvm_run->if_flag = (svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; kvm_run->cr8 = vcpu->cr8; kvm_run->apic_base = vcpu->apic_base; } @@ -1450,7 +1510,7 @@ static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, return (!vcpu->irq_summary && kvm_run->request_interrupt_window && vcpu->interrupt_window_open && - (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)); + (to_svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF)); } static void save_db_regs(unsigned long *db_regs) @@ -1476,6 +1536,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu) static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_svm *svm = to_svm(vcpu); u16 fs_selector; u16 gs_selector; u16 ldt_selector; @@ -1502,15 +1563,15 @@ again: fs_selector = read_fs(); gs_selector = read_gs(); ldt_selector = read_ldt(); - vcpu->svm->host_cr2 = kvm_read_cr2(); - vcpu->svm->host_dr6 = read_dr6(); - vcpu->svm->host_dr7 = read_dr7(); - vcpu->svm->vmcb->save.cr2 = vcpu->cr2; + svm->host_cr2 = kvm_read_cr2(); + svm->host_dr6 = read_dr6(); + svm->host_dr7 = read_dr7(); + svm->vmcb->save.cr2 = vcpu->cr2; - if (vcpu->svm->vmcb->save.dr7 & 0xff) { + if (svm->vmcb->save.dr7 & 0xff) { write_dr7(0); - save_db_regs(vcpu->svm->host_db_regs); - load_db_regs(vcpu->svm->db_regs); + save_db_regs(svm->host_db_regs); + load_db_regs(svm->db_regs); } if (vcpu->fpu_active) { @@ -1607,7 +1668,7 @@ again: #endif : : [vcpu]"a"(vcpu), - [svm]"i"(offsetof(struct kvm_vcpu, svm)), + [svm]"i"(offsetof(struct kvm_vcpu, _priv)), [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])), @@ -1634,14 +1695,14 @@ again: fx_restore(vcpu->host_fx_image); } - if ((vcpu->svm->vmcb->save.dr7 & 0xff)) - load_db_regs(vcpu->svm->host_db_regs); + if ((svm->vmcb->save.dr7 & 0xff)) + load_db_regs(svm->host_db_regs); - vcpu->cr2 = vcpu->svm->vmcb->save.cr2; + vcpu->cr2 = svm->vmcb->save.cr2; - write_dr6(vcpu->svm->host_dr6); - write_dr7(vcpu->svm->host_dr7); - kvm_write_cr2(vcpu->svm->host_cr2); + write_dr6(svm->host_dr6); + write_dr7(svm->host_dr7); + kvm_write_cr2(svm->host_cr2); load_fs(fs_selector); load_gs(gs_selector); @@ -1655,18 +1716,18 @@ again: */ if (unlikely(prof_on == KVM_PROFILING)) profile_hit(KVM_PROFILING, - (void *)(unsigned long)vcpu->svm->vmcb->save.rip); + (void *)(unsigned long)svm->vmcb->save.rip); stgi(); kvm_reput_irq(vcpu); - vcpu->svm->next_rip = 0; + svm->next_rip = 0; - if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { + if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason - = vcpu->svm->vmcb->control.exit_code; + = svm->vmcb->control.exit_code; post_kvm_run_save(vcpu, kvm_run); return 0; } @@ -1695,12 +1756,14 @@ again: static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { - vcpu->svm->vmcb->save.cr3 = root; + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->save.cr3 = root; force_new_asid(vcpu); if (vcpu->fpu_active) { - vcpu->svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); - vcpu->svm->vmcb->save.cr0 |= X86_CR0_TS; + svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); + svm->vmcb->save.cr0 |= X86_CR0_TS; vcpu->fpu_active = 0; } } @@ -1709,26 +1772,27 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, uint32_t err_code) { - uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; + struct vcpu_svm *svm = to_svm(vcpu); + uint32_t exit_int_info = svm->vmcb->control.exit_int_info; ++vcpu->stat.pf_guest; if (is_page_fault(exit_int_info)) { - vcpu->svm->vmcb->control.event_inj_err = 0; - vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | - SVM_EVTINJ_VALID_ERR | - SVM_EVTINJ_TYPE_EXEPT | - DF_VECTOR; + svm->vmcb->control.event_inj_err = 0; + svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | + SVM_EVTINJ_VALID_ERR | + SVM_EVTINJ_TYPE_EXEPT | + DF_VECTOR; return; } vcpu->cr2 = addr; - vcpu->svm->vmcb->save.cr2 = addr; - vcpu->svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | - SVM_EVTINJ_VALID_ERR | - SVM_EVTINJ_TYPE_EXEPT | - PF_VECTOR; - vcpu->svm->vmcb->control.event_inj_err = err_code; + svm->vmcb->save.cr2 = addr; + svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | + SVM_EVTINJ_VALID_ERR | + SVM_EVTINJ_TYPE_EXEPT | + PF_VECTOR; + svm->vmcb->control.event_inj_err = err_code; } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index dac2f93d1a0..96837d6ed50 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -32,6 +32,37 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +struct vmcs { + u32 revision_id; + u32 abort; + char data[0]; +}; + +struct vcpu_vmx { + struct kvm_vcpu *vcpu; + int launched; + struct kvm_msr_entry *guest_msrs; + struct kvm_msr_entry *host_msrs; + int nmsrs; + int save_nmsrs; + int msr_offset_efer; +#ifdef CONFIG_X86_64 + int msr_offset_kernel_gs_base; +#endif + struct vmcs *vmcs; + struct { + int loaded; + u16 fs_sel, gs_sel, ldt_sel; + int fs_gs_ldt_reload_needed; + }host_state; + +}; + +static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) +{ + return (struct vcpu_vmx*)vcpu->_priv; +} + static int init_rmode_tss(struct kvm *kvm); static DEFINE_PER_CPU(struct vmcs *, vmxarea); @@ -89,16 +120,33 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) -static inline u64 msr_efer_save_restore_bits(struct vmx_msr_entry msr) +static void load_msrs(struct kvm_msr_entry *e, int n) +{ + int i; + + for (i = 0; i < n; ++i) + wrmsrl(e[i].index, e[i].data); +} + +static void save_msrs(struct kvm_msr_entry *e, int n) +{ + int i; + + for (i = 0; i < n; ++i) + rdmsrl(e[i].index, e[i].data); +} + +static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr) { return (u64)msr.data & EFER_SAVE_RESTORE_BITS; } static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) { - int efer_offset = vcpu->msr_offset_efer; - return msr_efer_save_restore_bits(vcpu->host_msrs[efer_offset]) != - msr_efer_save_restore_bits(vcpu->guest_msrs[efer_offset]); + struct vcpu_vmx *vmx = to_vmx(vcpu); + int efer_offset = vmx->msr_offset_efer; + return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) != + msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]); } static inline int is_page_fault(u32 intr_info) @@ -123,21 +171,23 @@ static inline int is_external_interrupt(u32 intr_info) static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr) { + struct vcpu_vmx *vmx = to_vmx(vcpu); int i; - for (i = 0; i < vcpu->nmsrs; ++i) - if (vcpu->guest_msrs[i].index == msr) + for (i = 0; i < vmx->nmsrs; ++i) + if (vmx->guest_msrs[i].index == msr) return i; return -1; } -static struct vmx_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) +static struct kvm_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) { + struct vcpu_vmx *vmx = to_vmx(vcpu); int i; i = __find_msr_index(vcpu, msr); if (i >= 0) - return &vcpu->guest_msrs[i]; + return &vmx->guest_msrs[i]; return NULL; } @@ -157,11 +207,12 @@ static void vmcs_clear(struct vmcs *vmcs) static void __vcpu_clear(void *arg) { struct kvm_vcpu *vcpu = arg; + struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu = raw_smp_processor_id(); if (vcpu->cpu == cpu) - vmcs_clear(vcpu->vmcs); - if (per_cpu(current_vmcs, cpu) == vcpu->vmcs) + vmcs_clear(vmx->vmcs); + if (per_cpu(current_vmcs, cpu) == vmx->vmcs) per_cpu(current_vmcs, cpu) = NULL; rdtscll(vcpu->host_tsc); } @@ -172,7 +223,7 @@ static void vcpu_clear(struct kvm_vcpu *vcpu) smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, 0, 1); else __vcpu_clear(vcpu); - vcpu->launched = 0; + to_vmx(vcpu)->launched = 0; } static unsigned long vmcs_readl(unsigned long field) @@ -285,80 +336,81 @@ static void reload_tss(void) static void load_transition_efer(struct kvm_vcpu *vcpu) { u64 trans_efer; - int efer_offset = vcpu->msr_offset_efer; + struct vcpu_vmx *vmx = to_vmx(vcpu); + int efer_offset = vmx->msr_offset_efer; - trans_efer = vcpu->host_msrs[efer_offset].data; + trans_efer = vmx->host_msrs[efer_offset].data; trans_efer &= ~EFER_SAVE_RESTORE_BITS; - trans_efer |= msr_efer_save_restore_bits( - vcpu->guest_msrs[efer_offset]); + trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]); wrmsrl(MSR_EFER, trans_efer); vcpu->stat.efer_reload++; } static void vmx_save_host_state(struct kvm_vcpu *vcpu) { - struct vmx_host_state *hs = &vcpu->vmx_host_state; + struct vcpu_vmx *vmx = to_vmx(vcpu); - if (hs->loaded) + if (vmx->host_state.loaded) return; - hs->loaded = 1; + vmx->host_state.loaded = 1; /* * Set host fs and gs selectors. Unfortunately, 22.2.3 does not * allow segment selectors with cpl > 0 or ti == 1. */ - hs->ldt_sel = read_ldt(); - hs->fs_gs_ldt_reload_needed = hs->ldt_sel; - hs->fs_sel = read_fs(); - if (!(hs->fs_sel & 7)) - vmcs_write16(HOST_FS_SELECTOR, hs->fs_sel); + vmx->host_state.ldt_sel = read_ldt(); + vmx->host_state.fs_gs_ldt_reload_needed = vmx->host_state.ldt_sel; + vmx->host_state.fs_sel = read_fs(); + if (!(vmx->host_state.fs_sel & 7)) + vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); else { vmcs_write16(HOST_FS_SELECTOR, 0); - hs->fs_gs_ldt_reload_needed = 1; + vmx->host_state.fs_gs_ldt_reload_needed = 1; } - hs->gs_sel = read_gs(); - if (!(hs->gs_sel & 7)) - vmcs_write16(HOST_GS_SELECTOR, hs->gs_sel); + vmx->host_state.gs_sel = read_gs(); + if (!(vmx->host_state.gs_sel & 7)) + vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { vmcs_write16(HOST_GS_SELECTOR, 0); - hs->fs_gs_ldt_reload_needed = 1; + vmx->host_state.fs_gs_ldt_reload_needed = 1; } #ifdef CONFIG_X86_64 vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); #else - vmcs_writel(HOST_FS_BASE, segment_base(hs->fs_sel)); - vmcs_writel(HOST_GS_BASE, segment_base(hs->gs_sel)); + vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); + vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); #endif #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { - save_msrs(vcpu->host_msrs + vcpu->msr_offset_kernel_gs_base, 1); + save_msrs(vmx->host_msrs + + vmx->msr_offset_kernel_gs_base, 1); } #endif - load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + load_msrs(vmx->guest_msrs, vmx->save_nmsrs); if (msr_efer_need_save_restore(vcpu)) load_transition_efer(vcpu); } static void vmx_load_host_state(struct kvm_vcpu *vcpu) { - struct vmx_host_state *hs = &vcpu->vmx_host_state; + struct vcpu_vmx *vmx = to_vmx(vcpu); - if (!hs->loaded) + if (!vmx->host_state.loaded) return; - hs->loaded = 0; - if (hs->fs_gs_ldt_reload_needed) { - load_ldt(hs->ldt_sel); - load_fs(hs->fs_sel); + vmx->host_state.loaded = 0; + if (vmx->host_state.fs_gs_ldt_reload_needed) { + load_ldt(vmx->host_state.ldt_sel); + load_fs(vmx->host_state.fs_sel); /* * If we have to reload gs, we must take care to * preserve our gs base. */ local_irq_disable(); - load_gs(hs->gs_sel); + load_gs(vmx->host_state.gs_sel); #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); #endif @@ -366,10 +418,10 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) reload_tss(); } - save_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); - load_msrs(vcpu->host_msrs, vcpu->save_nmsrs); + save_msrs(vmx->guest_msrs, vmx->save_nmsrs); + load_msrs(vmx->host_msrs, vmx->save_nmsrs); if (msr_efer_need_save_restore(vcpu)) - load_msrs(vcpu->host_msrs + vcpu->msr_offset_efer, 1); + load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); } /* @@ -378,7 +430,8 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) */ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) { - u64 phys_addr = __pa(vcpu->vmcs); + struct vcpu_vmx *vmx = to_vmx(vcpu); + u64 phys_addr = __pa(vmx->vmcs); int cpu; u64 tsc_this, delta; @@ -387,16 +440,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu) if (vcpu->cpu != cpu) vcpu_clear(vcpu); - if (per_cpu(current_vmcs, cpu) != vcpu->vmcs) { + if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { u8 error; - per_cpu(current_vmcs, cpu) = vcpu->vmcs; + per_cpu(current_vmcs, cpu) = vmx->vmcs; asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc"); if (error) printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n", - vcpu->vmcs, phys_addr); + vmx->vmcs, phys_addr); } if (vcpu->cpu != cpu) { @@ -503,13 +556,15 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) */ void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) { - struct vmx_msr_entry tmp; - tmp = vcpu->guest_msrs[to]; - vcpu->guest_msrs[to] = vcpu->guest_msrs[from]; - vcpu->guest_msrs[from] = tmp; - tmp = vcpu->host_msrs[to]; - vcpu->host_msrs[to] = vcpu->host_msrs[from]; - vcpu->host_msrs[from] = tmp; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry tmp; + + tmp = vmx->guest_msrs[to]; + vmx->guest_msrs[to] = vmx->guest_msrs[from]; + vmx->guest_msrs[from] = tmp; + tmp = vmx->host_msrs[to]; + vmx->host_msrs[to] = vmx->host_msrs[from]; + vmx->host_msrs[from] = tmp; } /* @@ -519,6 +574,7 @@ void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) */ static void setup_msrs(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); int save_nmsrs; save_nmsrs = 0; @@ -547,13 +603,13 @@ static void setup_msrs(struct kvm_vcpu *vcpu) move_msr_up(vcpu, index, save_nmsrs++); } #endif - vcpu->save_nmsrs = save_nmsrs; + vmx->save_nmsrs = save_nmsrs; #ifdef CONFIG_X86_64 - vcpu->msr_offset_kernel_gs_base = + vmx->msr_offset_kernel_gs_base = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); #endif - vcpu->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); + vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); } /* @@ -589,7 +645,7 @@ static void guest_write_tsc(u64 guest_tsc) static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { u64 data; - struct vmx_msr_entry *msr; + struct kvm_msr_entry *msr; if (!pdata) { printk(KERN_ERR "BUG: get_msr called with NULL pdata\n"); @@ -639,14 +695,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) */ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { - struct vmx_msr_entry *msr; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry *msr; int ret = 0; switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: ret = kvm_set_msr_common(vcpu, msr_index, data); - if (vcpu->vmx_host_state.loaded) + if (vmx->host_state.loaded) load_transition_efer(vcpu); break; case MSR_FS_BASE: @@ -672,8 +729,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) msr = find_msr_entry(vcpu, msr_index); if (msr) { msr->data = data; - if (vcpu->vmx_host_state.loaded) - load_msrs(vcpu->guest_msrs, vcpu->save_nmsrs); + if (vmx->host_state.loaded) + load_msrs(vmx->guest_msrs, vmx->save_nmsrs); break; } ret = kvm_set_msr_common(vcpu, msr_index, data); @@ -1053,7 +1110,7 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) { - struct vmx_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER); + struct kvm_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER); vcpu->shadow_efer = efer; if (efer & EFER_LMA) { @@ -1244,6 +1301,7 @@ static void seg_setup(int seg) */ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); u32 host_sysenter_cs; u32 junk; unsigned long a; @@ -1385,18 +1443,18 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) u32 index = vmx_msr_index[i]; u32 data_low, data_high; u64 data; - int j = vcpu->nmsrs; + int j = vmx->nmsrs; if (rdmsr_safe(index, &data_low, &data_high) < 0) continue; if (wrmsr_safe(index, data_low, data_high) < 0) continue; data = data_low | ((u64)data_high << 32); - vcpu->host_msrs[j].index = index; - vcpu->host_msrs[j].reserved = 0; - vcpu->host_msrs[j].data = data; - vcpu->guest_msrs[j] = vcpu->host_msrs[j]; - ++vcpu->nmsrs; + vmx->host_msrs[j].index = index; + vmx->host_msrs[j].reserved = 0; + vmx->host_msrs[j].data = data; + vmx->guest_msrs[j] = vmx->host_msrs[j]; + ++vmx->nmsrs; } setup_msrs(vcpu); @@ -1999,6 +2057,7 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_vmx *vmx = to_vmx(vcpu); u8 fail; int r; @@ -2123,7 +2182,7 @@ again: #endif "setbe %0 \n\t" : "=q" (fail) - : "r"(vcpu->launched), "d"((unsigned long)HOST_RSP), + : "r"(vmx->launched), "d"((unsigned long)HOST_RSP), "c"(vcpu), [rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])), [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), @@ -2167,7 +2226,7 @@ again: if (unlikely(prof_on == KVM_PROFILING)) profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); - vcpu->launched = 1; + vmx->launched = 1; r = kvm_handle_exit(kvm_run, vcpu); if (r > 0) { /* Give scheduler a change to reschedule. */ @@ -2232,10 +2291,12 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, static void vmx_free_vmcs(struct kvm_vcpu *vcpu) { - if (vcpu->vmcs) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->vmcs) { on_each_cpu(__vcpu_clear, vcpu, 0, 1); - free_vmcs(vcpu->vmcs); - vcpu->vmcs = NULL; + free_vmcs(vmx->vmcs); + vmx->vmcs = NULL; } } @@ -2246,33 +2307,39 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { - struct vmcs *vmcs; + struct vcpu_vmx *vmx; - vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->guest_msrs) + vmx = kzalloc(sizeof(*vmx), GFP_KERNEL); + if (!vmx) return -ENOMEM; - vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->host_msrs) - goto out_free_guest_msrs; + vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vmx->guest_msrs) + goto out_free; - vmcs = alloc_vmcs(); - if (!vmcs) - goto out_free_msrs; + vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vmx->host_msrs) + goto out_free; - vmcs_clear(vmcs); - vcpu->vmcs = vmcs; - vcpu->launched = 0; + vmx->vmcs = alloc_vmcs(); + if (!vmx->vmcs) + goto out_free; + + vmcs_clear(vmx->vmcs); + + vmx->vcpu = vcpu; + vcpu->_priv = vmx; return 0; -out_free_msrs: - kfree(vcpu->host_msrs); - vcpu->host_msrs = NULL; +out_free: + if (vmx->host_msrs) + kfree(vmx->host_msrs); + + if (vmx->guest_msrs) + kfree(vmx->guest_msrs); -out_free_guest_msrs: - kfree(vcpu->guest_msrs); - vcpu->guest_msrs = NULL; + kfree(vmx); return -ENOMEM; } -- cgit v1.2.3 From fb3f0f51d92d1496f9628ca6f0fb06a48dc9ed2a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 27 Jul 2007 17:16:56 +1000 Subject: KVM: Dynamically allocate vcpus This patch converts the vcpus array in "struct kvm" to a pointer array, and changes the "vcpu_create" and "vcpu_setup" hooks into one "vcpu_create" call which does the allocation and initialization of the vcpu (calling back into the kvm_vcpu_init core helper). Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 12 +-- drivers/kvm/kvm_main.c | 198 +++++++++++++++++++++++++------------------------ drivers/kvm/kvm_svm.h | 2 +- drivers/kvm/svm.c | 177 +++++++++++++++++++++---------------------- drivers/kvm/vmx.c | 65 +++++++++------- 5 files changed, 236 insertions(+), 218 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 954a1408960..e92c84b04c1 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -300,10 +300,8 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev); struct kvm_vcpu { - int valid; struct kvm *kvm; int vcpu_id; - void *_priv; struct mutex mutex; int cpu; u64 host_tsc; @@ -404,8 +402,7 @@ struct kvm { struct list_head active_mmu_pages; int n_free_mmu_pages; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; - int nvcpus; - struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; int memory_config_version; int busy; unsigned long rmap_overflow; @@ -428,7 +425,8 @@ struct kvm_arch_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ - int (*vcpu_create)(struct kvm_vcpu *vcpu); + /* Create, but do not attach this VCPU */ + struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu); @@ -470,7 +468,6 @@ struct kvm_arch_ops { void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code); int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); - int (*vcpu_setup)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); @@ -481,6 +478,9 @@ extern struct kvm_arch_ops *kvm_arch_ops; #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) +int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); +void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); + int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); void kvm_exit_arch(void); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index bf8b8f03019..69d9ab4e7cb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -266,8 +266,10 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) atomic_set(&completed, 0); cpus_clear(cpus); needed = 0; - for (i = 0; i < kvm->nvcpus; ++i) { - vcpu = &kvm->vcpus[i]; + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; @@ -291,10 +293,61 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) } } +int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) +{ + struct page *page; + int r; + + mutex_init(&vcpu->mutex); + vcpu->cpu = -1; + vcpu->mmu.root_hpa = INVALID_PAGE; + vcpu->kvm = kvm; + vcpu->vcpu_id = id; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto fail; + } + vcpu->run = page_address(page); + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + r = -ENOMEM; + goto fail_free_run; + } + vcpu->pio_data = page_address(page); + + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, + FX_IMAGE_ALIGN); + vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; + + r = kvm_mmu_create(vcpu); + if (r < 0) + goto fail_free_pio_data; + + return 0; + +fail_free_pio_data: + free_page((unsigned long)vcpu->pio_data); +fail_free_run: + free_page((unsigned long)vcpu->run); +fail: + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_init); + +void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + kvm_mmu_destroy(vcpu); + free_page((unsigned long)vcpu->pio_data); + free_page((unsigned long)vcpu->run); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); + static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); - int i; if (!kvm) return ERR_PTR(-ENOMEM); @@ -303,14 +356,6 @@ static struct kvm *kvm_create_vm(void) spin_lock_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); kvm_io_bus_init(&kvm->mmio_bus); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - struct kvm_vcpu *vcpu = &kvm->vcpus[i]; - - mutex_init(&vcpu->mutex); - vcpu->cpu = -1; - vcpu->kvm = kvm; - vcpu->mmu.root_hpa = INVALID_PAGE; - } spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); @@ -367,30 +412,11 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - if (!vcpu->valid) - return; - vcpu_load(vcpu); kvm_mmu_unload(vcpu); vcpu_put(vcpu); } -static void kvm_free_vcpu(struct kvm_vcpu *vcpu) -{ - if (!vcpu->valid) - return; - - vcpu_load(vcpu); - kvm_mmu_destroy(vcpu); - vcpu_put(vcpu); - kvm_arch_ops->vcpu_free(vcpu); - free_page((unsigned long)vcpu->run); - vcpu->run = NULL; - free_page((unsigned long)vcpu->pio_data); - vcpu->pio_data = NULL; - free_pio_guest_pages(vcpu); -} - static void kvm_free_vcpus(struct kvm *kvm) { unsigned int i; @@ -399,9 +425,15 @@ static void kvm_free_vcpus(struct kvm *kvm) * Unpin any mmu pages first. */ for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_unload_vcpu_mmu(&kvm->vcpus[i]); - for (i = 0; i < KVM_MAX_VCPUS; ++i) - kvm_free_vcpu(&kvm->vcpus[i]); + if (kvm->vcpus[i]) + kvm_unload_vcpu_mmu(kvm->vcpus[i]); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_ops->vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } + } static int kvm_dev_release(struct inode *inode, struct file *filp) @@ -2372,77 +2404,47 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) { int r; struct kvm_vcpu *vcpu; - struct page *page; - r = -EINVAL; if (!valid_vcpu(n)) - goto out; - - vcpu = &kvm->vcpus[n]; - vcpu->vcpu_id = n; - - mutex_lock(&vcpu->mutex); - - if (vcpu->valid) { - mutex_unlock(&vcpu->mutex); - return -EEXIST; - } - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - r = -ENOMEM; - if (!page) - goto out_unlock; - vcpu->run = page_address(page); - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - r = -ENOMEM; - if (!page) - goto out_free_run; - vcpu->pio_data = page_address(page); - - vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, - FX_IMAGE_ALIGN); - vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; - vcpu->cr0 = 0x10; - - r = kvm_arch_ops->vcpu_create(vcpu); - if (r < 0) - goto out_free_vcpus; + return -EINVAL; - r = kvm_mmu_create(vcpu); - if (r < 0) - goto out_free_vcpus; + vcpu = kvm_arch_ops->vcpu_create(kvm, n); + if (IS_ERR(vcpu)) + return PTR_ERR(vcpu); - kvm_arch_ops->vcpu_load(vcpu); + vcpu_load(vcpu); r = kvm_mmu_setup(vcpu); - if (r >= 0) - r = kvm_arch_ops->vcpu_setup(vcpu); vcpu_put(vcpu); - if (r < 0) - goto out_free_vcpus; + goto free_vcpu; + spin_lock(&kvm->lock); + if (kvm->vcpus[n]) { + r = -EEXIST; + spin_unlock(&kvm->lock); + goto mmu_unload; + } + kvm->vcpus[n] = vcpu; + spin_unlock(&kvm->lock); + + /* Now it's all set up, let userspace reach it */ r = create_vcpu_fd(vcpu); if (r < 0) - goto out_free_vcpus; - - spin_lock(&kvm_lock); - if (n >= kvm->nvcpus) - kvm->nvcpus = n + 1; - spin_unlock(&kvm_lock); + goto unlink; + return r; - vcpu->valid = 1; +unlink: + spin_lock(&kvm->lock); + kvm->vcpus[n] = NULL; + spin_unlock(&kvm->lock); - return r; +mmu_unload: + vcpu_load(vcpu); + kvm_mmu_unload(vcpu); + vcpu_put(vcpu); -out_free_vcpus: - kvm_free_vcpu(vcpu); -out_free_run: - free_page((unsigned long)vcpu->run); - vcpu->run = NULL; -out_unlock: - mutex_unlock(&vcpu->mutex); -out: +free_vcpu: + kvm_arch_ops->vcpu_free(vcpu); return r; } @@ -2935,9 +2937,12 @@ static void decache_vcpus_on_cpu(int cpu) int i; spin_lock(&kvm_lock); - list_for_each_entry(vm, &vm_list, vm_list) + list_for_each_entry(vm, &vm_list, vm_list) { + spin_lock(&vm->lock); for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &vm->vcpus[i]; + vcpu = vm->vcpus[i]; + if (!vcpu) + continue; /* * If the vcpu is locked, then it is running on some * other cpu and therefore it is not cached on the @@ -2954,6 +2959,8 @@ static void decache_vcpus_on_cpu(int cpu) mutex_unlock(&vcpu->mutex); } } + spin_unlock(&vm->lock); + } spin_unlock(&kvm_lock); } @@ -3078,8 +3085,9 @@ static u64 stat_get(void *_offset) spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = &kvm->vcpus[i]; - total += *(u32 *)((void *)vcpu + offset); + vcpu = kvm->vcpus[i]; + if (vcpu) + total += *(u32 *)((void *)vcpu + offset); } spin_unlock(&kvm_lock); return total; diff --git a/drivers/kvm/kvm_svm.h b/drivers/kvm/kvm_svm.h index 82e5d77acbb..a0e415daef5 100644 --- a/drivers/kvm/kvm_svm.h +++ b/drivers/kvm/kvm_svm.h @@ -23,7 +23,7 @@ static const u32 host_save_user_msrs[] = { struct kvm_vcpu; struct vcpu_svm { - struct kvm_vcpu *vcpu; + struct kvm_vcpu vcpu; struct vmcb *vmcb; unsigned long vmcb_pa; struct svm_cpu_data *svm_data; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 32481876d98..0feec855859 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -51,7 +51,7 @@ MODULE_LICENSE("GPL"); static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) { - return (struct vcpu_svm*)vcpu->_priv; + return container_of(vcpu, struct vcpu_svm, vcpu); } unsigned long iopm_base; @@ -466,11 +466,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static int svm_vcpu_setup(struct kvm_vcpu *vcpu) -{ - return 0; -} - static void init_vmcb(struct vmcb *vmcb) { struct vmcb_control_area *control = &vmcb->control; @@ -576,19 +571,27 @@ static void init_vmcb(struct vmcb *vmcb) /* rdx = ?? */ } -static int svm_create_vcpu(struct kvm_vcpu *vcpu) +static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) { struct vcpu_svm *svm; struct page *page; - int r; + int err; - r = -ENOMEM; svm = kzalloc(sizeof *svm, GFP_KERNEL); - if (!svm) - goto out1; + if (!svm) { + err = -ENOMEM; + goto out; + } + + err = kvm_vcpu_init(&svm->vcpu, kvm, id); + if (err) + goto free_svm; + page = alloc_page(GFP_KERNEL); - if (!page) - goto out2; + if (!page) { + err = -ENOMEM; + goto uninit; + } svm->vmcb = page_address(page); clear_page(svm->vmcb); @@ -597,33 +600,29 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) memset(svm->db_regs, 0, sizeof(svm->db_regs)); init_vmcb(svm->vmcb); - svm->vcpu = vcpu; - vcpu->_priv = svm; + fx_init(&svm->vcpu); + svm->vcpu.fpu_active = 1; + svm->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + if (svm->vcpu.vcpu_id == 0) + svm->vcpu.apic_base |= MSR_IA32_APICBASE_BSP; - fx_init(vcpu); - vcpu->fpu_active = 1; - vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; - if (vcpu->vcpu_id == 0) - vcpu->apic_base |= MSR_IA32_APICBASE_BSP; + return &svm->vcpu; - return 0; - -out2: +uninit: + kvm_vcpu_uninit(&svm->vcpu); +free_svm: kfree(svm); -out1: - return r; +out: + return ERR_PTR(err); } static void svm_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (!svm) - return; - if (svm->vmcb) - __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); + __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); + kvm_vcpu_uninit(vcpu); kfree(svm); - vcpu->_priv = NULL; } static void svm_vcpu_load(struct kvm_vcpu *vcpu) @@ -1591,34 +1590,33 @@ again: #endif #ifdef CONFIG_X86_64 - "mov %c[rbx](%[vcpu]), %%rbx \n\t" - "mov %c[rcx](%[vcpu]), %%rcx \n\t" - "mov %c[rdx](%[vcpu]), %%rdx \n\t" - "mov %c[rsi](%[vcpu]), %%rsi \n\t" - "mov %c[rdi](%[vcpu]), %%rdi \n\t" - "mov %c[rbp](%[vcpu]), %%rbp \n\t" - "mov %c[r8](%[vcpu]), %%r8 \n\t" - "mov %c[r9](%[vcpu]), %%r9 \n\t" - "mov %c[r10](%[vcpu]), %%r10 \n\t" - "mov %c[r11](%[vcpu]), %%r11 \n\t" - "mov %c[r12](%[vcpu]), %%r12 \n\t" - "mov %c[r13](%[vcpu]), %%r13 \n\t" - "mov %c[r14](%[vcpu]), %%r14 \n\t" - "mov %c[r15](%[vcpu]), %%r15 \n\t" + "mov %c[rbx](%[svm]), %%rbx \n\t" + "mov %c[rcx](%[svm]), %%rcx \n\t" + "mov %c[rdx](%[svm]), %%rdx \n\t" + "mov %c[rsi](%[svm]), %%rsi \n\t" + "mov %c[rdi](%[svm]), %%rdi \n\t" + "mov %c[rbp](%[svm]), %%rbp \n\t" + "mov %c[r8](%[svm]), %%r8 \n\t" + "mov %c[r9](%[svm]), %%r9 \n\t" + "mov %c[r10](%[svm]), %%r10 \n\t" + "mov %c[r11](%[svm]), %%r11 \n\t" + "mov %c[r12](%[svm]), %%r12 \n\t" + "mov %c[r13](%[svm]), %%r13 \n\t" + "mov %c[r14](%[svm]), %%r14 \n\t" + "mov %c[r15](%[svm]), %%r15 \n\t" #else - "mov %c[rbx](%[vcpu]), %%ebx \n\t" - "mov %c[rcx](%[vcpu]), %%ecx \n\t" - "mov %c[rdx](%[vcpu]), %%edx \n\t" - "mov %c[rsi](%[vcpu]), %%esi \n\t" - "mov %c[rdi](%[vcpu]), %%edi \n\t" - "mov %c[rbp](%[vcpu]), %%ebp \n\t" + "mov %c[rbx](%[svm]), %%ebx \n\t" + "mov %c[rcx](%[svm]), %%ecx \n\t" + "mov %c[rdx](%[svm]), %%edx \n\t" + "mov %c[rsi](%[svm]), %%esi \n\t" + "mov %c[rdi](%[svm]), %%edi \n\t" + "mov %c[rbp](%[svm]), %%ebp \n\t" #endif #ifdef CONFIG_X86_64 /* Enter guest mode */ "push %%rax \n\t" - "mov %c[svm](%[vcpu]), %%rax \n\t" - "mov %c[vmcb](%%rax), %%rax \n\t" + "mov %c[vmcb](%[svm]), %%rax \n\t" SVM_VMLOAD "\n\t" SVM_VMRUN "\n\t" SVM_VMSAVE "\n\t" @@ -1626,8 +1624,7 @@ again: #else /* Enter guest mode */ "push %%eax \n\t" - "mov %c[svm](%[vcpu]), %%eax \n\t" - "mov %c[vmcb](%%eax), %%eax \n\t" + "mov %c[vmcb](%[svm]), %%eax \n\t" SVM_VMLOAD "\n\t" SVM_VMRUN "\n\t" SVM_VMSAVE "\n\t" @@ -1636,55 +1633,54 @@ again: /* Save guest registers, load host registers */ #ifdef CONFIG_X86_64 - "mov %%rbx, %c[rbx](%[vcpu]) \n\t" - "mov %%rcx, %c[rcx](%[vcpu]) \n\t" - "mov %%rdx, %c[rdx](%[vcpu]) \n\t" - "mov %%rsi, %c[rsi](%[vcpu]) \n\t" - "mov %%rdi, %c[rdi](%[vcpu]) \n\t" - "mov %%rbp, %c[rbp](%[vcpu]) \n\t" - "mov %%r8, %c[r8](%[vcpu]) \n\t" - "mov %%r9, %c[r9](%[vcpu]) \n\t" - "mov %%r10, %c[r10](%[vcpu]) \n\t" - "mov %%r11, %c[r11](%[vcpu]) \n\t" - "mov %%r12, %c[r12](%[vcpu]) \n\t" - "mov %%r13, %c[r13](%[vcpu]) \n\t" - "mov %%r14, %c[r14](%[vcpu]) \n\t" - "mov %%r15, %c[r15](%[vcpu]) \n\t" + "mov %%rbx, %c[rbx](%[svm]) \n\t" + "mov %%rcx, %c[rcx](%[svm]) \n\t" + "mov %%rdx, %c[rdx](%[svm]) \n\t" + "mov %%rsi, %c[rsi](%[svm]) \n\t" + "mov %%rdi, %c[rdi](%[svm]) \n\t" + "mov %%rbp, %c[rbp](%[svm]) \n\t" + "mov %%r8, %c[r8](%[svm]) \n\t" + "mov %%r9, %c[r9](%[svm]) \n\t" + "mov %%r10, %c[r10](%[svm]) \n\t" + "mov %%r11, %c[r11](%[svm]) \n\t" + "mov %%r12, %c[r12](%[svm]) \n\t" + "mov %%r13, %c[r13](%[svm]) \n\t" + "mov %%r14, %c[r14](%[svm]) \n\t" + "mov %%r15, %c[r15](%[svm]) \n\t" "pop %%r15; pop %%r14; pop %%r13; pop %%r12;" "pop %%r11; pop %%r10; pop %%r9; pop %%r8;" "pop %%rbp; pop %%rdi; pop %%rsi;" "pop %%rdx; pop %%rcx; pop %%rbx; \n\t" #else - "mov %%ebx, %c[rbx](%[vcpu]) \n\t" - "mov %%ecx, %c[rcx](%[vcpu]) \n\t" - "mov %%edx, %c[rdx](%[vcpu]) \n\t" - "mov %%esi, %c[rsi](%[vcpu]) \n\t" - "mov %%edi, %c[rdi](%[vcpu]) \n\t" - "mov %%ebp, %c[rbp](%[vcpu]) \n\t" + "mov %%ebx, %c[rbx](%[svm]) \n\t" + "mov %%ecx, %c[rcx](%[svm]) \n\t" + "mov %%edx, %c[rdx](%[svm]) \n\t" + "mov %%esi, %c[rsi](%[svm]) \n\t" + "mov %%edi, %c[rdi](%[svm]) \n\t" + "mov %%ebp, %c[rbp](%[svm]) \n\t" "pop %%ebp; pop %%edi; pop %%esi;" "pop %%edx; pop %%ecx; pop %%ebx; \n\t" #endif : - : [vcpu]"a"(vcpu), - [svm]"i"(offsetof(struct kvm_vcpu, _priv)), + : [svm]"a"(svm), [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), - [rbx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBX])), - [rcx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RCX])), - [rdx]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDX])), - [rsi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RSI])), - [rdi]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RDI])), - [rbp]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RBP])) + [rbx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBX])), + [rcx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RCX])), + [rdx]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDX])), + [rsi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RSI])), + [rdi]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RDI])), + [rbp]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_RBP])) #ifdef CONFIG_X86_64 - ,[r8 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R8 ])), - [r9 ]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R9 ])), - [r10]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R10])), - [r11]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R11])), - [r12]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R12])), - [r13]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R13])), - [r14]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R14])), - [r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])) + ,[r8 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R8])), + [r9 ]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R9 ])), + [r10]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R10])), + [r11]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R11])), + [r12]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R12])), + [r13]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R13])), + [r14]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R14])), + [r15]"i"(offsetof(struct vcpu_svm,vcpu.regs[VCPU_REGS_R15])) #endif : "cc", "memory" ); @@ -1865,7 +1861,6 @@ static struct kvm_arch_ops svm_arch_ops = { .run = svm_vcpu_run, .skip_emulated_instruction = skip_emulated_instruction, - .vcpu_setup = svm_vcpu_setup, .patch_hypercall = svm_patch_hypercall, }; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 96837d6ed50..df578782330 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -39,7 +39,7 @@ struct vmcs { }; struct vcpu_vmx { - struct kvm_vcpu *vcpu; + struct kvm_vcpu vcpu; int launched; struct kvm_msr_entry *guest_msrs; struct kvm_msr_entry *host_msrs; @@ -60,7 +60,7 @@ struct vcpu_vmx { static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { - return (struct vcpu_vmx*)vcpu->_priv; + return container_of(vcpu, struct vcpu_vmx, vcpu); } static int init_rmode_tss(struct kvm *kvm); @@ -2302,46 +2302,62 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu) static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + vmx_free_vmcs(vcpu); + kfree(vmx->host_msrs); + kfree(vmx->guest_msrs); + kvm_vcpu_uninit(vcpu); + kfree(vmx); } -static int vmx_create_vcpu(struct kvm_vcpu *vcpu) +static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { - struct vcpu_vmx *vmx; + int err; + struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL); - vmx = kzalloc(sizeof(*vmx), GFP_KERNEL); if (!vmx) - return -ENOMEM; + return ERR_PTR(-ENOMEM); + + err = kvm_vcpu_init(&vmx->vcpu, kvm, id); + if (err) + goto free_vcpu; vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vmx->guest_msrs) - goto out_free; + if (!vmx->guest_msrs) { + err = -ENOMEM; + goto uninit_vcpu; + } vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!vmx->host_msrs) - goto out_free; + goto free_guest_msrs; vmx->vmcs = alloc_vmcs(); if (!vmx->vmcs) - goto out_free; + goto free_msrs; vmcs_clear(vmx->vmcs); - vmx->vcpu = vcpu; - vcpu->_priv = vmx; - - return 0; - -out_free: - if (vmx->host_msrs) - kfree(vmx->host_msrs); - - if (vmx->guest_msrs) - kfree(vmx->guest_msrs); - + vmx_vcpu_load(&vmx->vcpu); + err = vmx_vcpu_setup(&vmx->vcpu); + vmx_vcpu_put(&vmx->vcpu); + if (err) + goto free_vmcs; + + return &vmx->vcpu; + +free_vmcs: + free_vmcs(vmx->vmcs); +free_msrs: + kfree(vmx->host_msrs); +free_guest_msrs: + kfree(vmx->guest_msrs); +uninit_vcpu: + kvm_vcpu_uninit(&vmx->vcpu); +free_vcpu: kfree(vmx); - - return -ENOMEM; + return ERR_PTR(err); } static struct kvm_arch_ops vmx_arch_ops = { @@ -2389,7 +2405,6 @@ static struct kvm_arch_ops vmx_arch_ops = { .run = vmx_vcpu_run, .skip_emulated_instruction = skip_emulated_instruction, - .vcpu_setup = vmx_vcpu_setup, .patch_hypercall = vmx_patch_hypercall, }; -- cgit v1.2.3 From 1c3d14fe0ab75337a3f6c06b6bc18bcbc2b3d0bc Mon Sep 17 00:00:00 2001 From: "Yang, Sheng" Date: Sun, 29 Jul 2007 11:07:42 +0300 Subject: KVM: VMX: Improve the method of writing vmcs control Put cpu feature detecting part in hardware_setup, and stored the vmcs condition in global variable for further check. [glommer: fix for some i386-only machines not supporting CR8 load/store exiting] Signed-off-by: Sheng Yang Signed-off-by: Glauber de Oliveira Costa Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 147 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 102 insertions(+), 45 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index df578782330..18f9b0b3fb1 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -71,18 +71,17 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static struct page *vmx_io_bitmap_a; static struct page *vmx_io_bitmap_b; -#ifdef CONFIG_X86_64 -#define HOST_IS_64 1 -#else -#define HOST_IS_64 0 -#endif #define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE) -static struct vmcs_descriptor { +static struct vmcs_config { int size; int order; u32 revision_id; -} vmcs_descriptor; + u32 pin_based_exec_ctrl; + u32 cpu_based_exec_ctrl; + u32 vmexit_ctrl; + u32 vmentry_ctrl; +} vmcs_config; #define VMX_SEGMENT_FIELD(seg) \ [VCPU_SREG_##seg] = { \ @@ -839,14 +838,93 @@ static void hardware_disable(void *garbage) asm volatile (ASM_VMX_VMXOFF : : : "cc"); } -static __init void setup_vmcs_descriptor(void) +static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, + u32 msr, u32* result) +{ + u32 vmx_msr_low, vmx_msr_high; + u32 ctl = ctl_min | ctl_opt; + + rdmsr(msr, vmx_msr_low, vmx_msr_high); + + ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */ + ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */ + + /* Ensure minimum (required) set of control bits are supported. */ + if (ctl_min & ~ctl) + return -1; + + *result = ctl; + return 0; +} + +static __init int setup_vmcs_config(void) { u32 vmx_msr_low, vmx_msr_high; + u32 min, opt; + u32 _pin_based_exec_control = 0; + u32 _cpu_based_exec_control = 0; + u32 _vmexit_control = 0; + u32 _vmentry_control = 0; + + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = 0; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, + &_pin_based_exec_control) < 0) + return -1; + + min = CPU_BASED_HLT_EXITING | +#ifdef CONFIG_X86_64 + CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING | +#endif + CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_MOV_DR_EXITING | + CPU_BASED_USE_TSC_OFFSETING; + opt = 0; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, + &_cpu_based_exec_control) < 0) + return -1; + + min = 0; +#ifdef CONFIG_X86_64 + min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; +#endif + opt = 0; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, + &_vmexit_control) < 0) + return -1; + + min = opt = 0; + if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, + &_vmentry_control) < 0) + return -1; rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); - vmcs_descriptor.size = vmx_msr_high & 0x1fff; - vmcs_descriptor.order = get_order(vmcs_descriptor.size); - vmcs_descriptor.revision_id = vmx_msr_low; + + /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ + if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) + return -1; + +#ifdef CONFIG_X86_64 + /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ + if (vmx_msr_high & (1u<<16)) + return -1; +#endif + + /* Require Write-Back (WB) memory type for VMCS accesses. */ + if (((vmx_msr_high >> 18) & 15) != 6) + return -1; + + vmcs_config.size = vmx_msr_high & 0x1fff; + vmcs_config.order = get_order(vmcs_config.size); + vmcs_config.revision_id = vmx_msr_low; + + vmcs_config.pin_based_exec_ctrl = _pin_based_exec_control; + vmcs_config.cpu_based_exec_ctrl = _cpu_based_exec_control; + vmcs_config.vmexit_ctrl = _vmexit_control; + vmcs_config.vmentry_ctrl = _vmentry_control; + + return 0; } static struct vmcs *alloc_vmcs_cpu(int cpu) @@ -855,12 +933,12 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) struct page *pages; struct vmcs *vmcs; - pages = alloc_pages_node(node, GFP_KERNEL, vmcs_descriptor.order); + pages = alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); if (!pages) return NULL; vmcs = page_address(pages); - memset(vmcs, 0, vmcs_descriptor.size); - vmcs->revision_id = vmcs_descriptor.revision_id; /* vmcs revision id */ + memset(vmcs, 0, vmcs_config.size); + vmcs->revision_id = vmcs_config.revision_id; /* vmcs revision id */ return vmcs; } @@ -871,7 +949,7 @@ static struct vmcs *alloc_vmcs(void) static void free_vmcs(struct vmcs *vmcs) { - free_pages((unsigned long)vmcs, vmcs_descriptor.order); + free_pages((unsigned long)vmcs, vmcs_config.order); } static void free_kvm_area(void) @@ -904,7 +982,8 @@ static __init int alloc_kvm_area(void) static __init int hardware_setup(void) { - setup_vmcs_descriptor(); + if (setup_vmcs_config() < 0) + return -1; return alloc_kvm_area(); } @@ -1275,17 +1354,6 @@ static int init_rmode_tss(struct kvm* kvm) return 1; } -static void vmcs_write32_fixedbits(u32 msr, u32 vmcs_field, u32 val) -{ - u32 msr_high, msr_low; - - rdmsr(msr, msr_low, msr_high); - - val &= msr_high; - val |= msr_low; - vmcs_write32(vmcs_field, val); -} - static void seg_setup(int seg) { struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; @@ -1382,20 +1450,10 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); /* Control */ - vmcs_write32_fixedbits(MSR_IA32_VMX_PINBASED_CTLS, - PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_EXT_INTR_MASK /* 20.6.1 */ - | PIN_BASED_NMI_EXITING /* 20.6.1 */ - ); - vmcs_write32_fixedbits(MSR_IA32_VMX_PROCBASED_CTLS, - CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_HLT_EXITING /* 20.6.2 */ - | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ - | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ - | CPU_BASED_USE_IO_BITMAPS /* 20.6.2 */ - | CPU_BASED_MOV_DR_EXITING - | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ - ); + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, + vmcs_config.pin_based_exec_ctrl); + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl); vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); @@ -1459,12 +1517,11 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) setup_msrs(vcpu); - vmcs_write32_fixedbits(MSR_IA32_VMX_EXIT_CTLS, VM_EXIT_CONTROLS, - (HOST_IS_64 << 9)); /* 22.2,1, 20.7.1 */ + vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); /* 22.2.1, 20.8.1 */ - vmcs_write32_fixedbits(MSR_IA32_VMX_ENTRY_CTLS, - VM_ENTRY_CONTROLS, 0); + vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 519ef35341b4f360f072ea74e398b70a5a2fc270 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 16 Jul 2007 15:24:47 -0400 Subject: KVM: add hypercall nr to kvm_run Add the hypercall number to kvm_run and initialize it. This changes the ABI, but as this particular ABI was unusable before this no users are affected. Signed-off-by: Jeff Dike Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 1 + include/linux/kvm.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 69d9ab4e7cb..20947462f40 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1378,6 +1378,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) } switch (nr) { default: + run->hypercall.nr = nr; run->hypercall.args[0] = a0; run->hypercall.args[1] = a1; run->hypercall.args[2] = a2; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8db01a91e1a..91a446f450b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -99,6 +99,7 @@ struct kvm_run { } mmio; /* KVM_EXIT_HYPERCALL */ struct { + __u64 nr; __u64 args[6]; __u64 ret; __u32 longmode; -- cgit v1.2.3 From 15ad71460d75fd7ca41bb248a2310f3f39b302ba Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 11 Jul 2007 18:17:21 +0300 Subject: KVM: Use the scheduler preemption notifiers to make kvm preemptible Current kvm disables preemption while the new virtualization registers are in use. This of course is not very good for latency sensitive workloads (one use of virtualization is to offload user interface and other latency insensitive stuff to a container, so that it is easier to analyze the remaining workload). This patch re-enables preemption for kvm; preemption is now only disabled when switching the registers in and out, and during the switch to guest mode and back. Contains fixes from Shaohua Li . Signed-off-by: Avi Kivity --- drivers/kvm/Kconfig | 1 + drivers/kvm/kvm.h | 4 +++- drivers/kvm/kvm_main.c | 43 +++++++++++++++++++++++++++++++++++++------ drivers/kvm/mmu.c | 2 -- drivers/kvm/svm.c | 6 ++---- drivers/kvm/vmx.c | 22 +++++++++++++--------- 6 files changed, 56 insertions(+), 22 deletions(-) diff --git a/drivers/kvm/Kconfig b/drivers/kvm/Kconfig index 0a419a0de60..8749fa4ffce 100644 --- a/drivers/kvm/Kconfig +++ b/drivers/kvm/Kconfig @@ -17,6 +17,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on X86 && EXPERIMENTAL + select PREEMPT_NOTIFIERS select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index e92c84b04c1..0667183ecbe 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -301,6 +302,7 @@ void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_vcpu { struct kvm *kvm; + struct preempt_notifier preempt_notifier; int vcpu_id; struct mutex mutex; int cpu; @@ -429,7 +431,7 @@ struct kvm_arch_ops { struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); - void (*vcpu_load)(struct kvm_vcpu *vcpu); + void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); void (*vcpu_decache)(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 20947462f40..6035e6d3541 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -54,6 +54,8 @@ static cpumask_t cpus_hardware_enabled; struct kvm_arch_ops *kvm_arch_ops; +static __read_mostly struct preempt_ops kvm_preempt_ops; + #define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) static struct kvm_stats_debugfs_item { @@ -239,13 +241,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); */ static void vcpu_load(struct kvm_vcpu *vcpu) { + int cpu; + mutex_lock(&vcpu->mutex); - kvm_arch_ops->vcpu_load(vcpu); + cpu = get_cpu(); + preempt_notifier_register(&vcpu->preempt_notifier); + kvm_arch_ops->vcpu_load(vcpu, cpu); + put_cpu(); } static void vcpu_put(struct kvm_vcpu *vcpu) { + preempt_disable(); kvm_arch_ops->vcpu_put(vcpu); + preempt_notifier_unregister(&vcpu->preempt_notifier); + preempt_enable(); mutex_unlock(&vcpu->mutex); } @@ -1672,9 +1682,7 @@ void kvm_resched(struct kvm_vcpu *vcpu) { if (!need_resched()) return; - vcpu_put(vcpu); cond_resched(); - vcpu_load(vcpu); } EXPORT_SYMBOL_GPL(kvm_resched); @@ -1722,11 +1730,9 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) unsigned bytes; int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; - kvm_arch_ops->vcpu_put(vcpu); q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, PAGE_KERNEL); if (!q) { - kvm_arch_ops->vcpu_load(vcpu); free_pio_guest_pages(vcpu); return -ENOMEM; } @@ -1738,7 +1744,6 @@ static int pio_copy_data(struct kvm_vcpu *vcpu) memcpy(p, q, bytes); q -= vcpu->pio.guest_page_offset; vunmap(q); - kvm_arch_ops->vcpu_load(vcpu); free_pio_guest_pages(vcpu); return 0; } @@ -2413,6 +2418,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) if (IS_ERR(vcpu)) return PTR_ERR(vcpu); + preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); + vcpu_load(vcpu); r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); @@ -3145,6 +3152,27 @@ static struct sys_device kvm_sysdev = { hpa_t bad_page_address; +static inline +struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) +{ + return container_of(pn, struct kvm_vcpu, preempt_notifier); +} + +static void kvm_sched_in(struct preempt_notifier *pn, int cpu) +{ + struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + + kvm_arch_ops->vcpu_load(vcpu, cpu); +} + +static void kvm_sched_out(struct preempt_notifier *pn, + struct task_struct *next) +{ + struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + + kvm_arch_ops->vcpu_put(vcpu); +} + int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) { int r; @@ -3191,6 +3219,9 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) goto out_free; } + kvm_preempt_ops.sched_in = kvm_sched_in; + kvm_preempt_ops.sched_out = kvm_sched_out; + return r; out_free: diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 5437de2aa2d..396c736e546 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -276,9 +276,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) kvm_mmu_free_some_pages(vcpu); if (r < 0) { spin_unlock(&vcpu->kvm->lock); - kvm_arch_ops->vcpu_put(vcpu); r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); - kvm_arch_ops->vcpu_load(vcpu); spin_lock(&vcpu->kvm->lock); kvm_mmu_free_some_pages(vcpu); } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 0feec855859..3997bbd78fb 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -625,12 +625,11 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) kfree(svm); } -static void svm_vcpu_load(struct kvm_vcpu *vcpu) +static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_svm *svm = to_svm(vcpu); - int cpu, i; + int i; - cpu = get_cpu(); if (unlikely(cpu != vcpu->cpu)) { u64 tsc_this, delta; @@ -657,7 +656,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); rdtscll(vcpu->host_tsc); - put_cpu(); } static void svm_vcpu_decache(struct kvm_vcpu *vcpu) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 18f9b0b3fb1..8c87d20f8e3 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -396,6 +396,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) static void vmx_load_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long flags; if (!vmx->host_state.loaded) return; @@ -408,12 +409,12 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) * If we have to reload gs, we must take care to * preserve our gs base. */ - local_irq_disable(); + local_irq_save(flags); load_gs(vmx->host_state.gs_sel); #ifdef CONFIG_X86_64 wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); #endif - local_irq_enable(); + local_irq_restore(flags); reload_tss(); } @@ -427,15 +428,12 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */ -static void vmx_vcpu_load(struct kvm_vcpu *vcpu) +static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 phys_addr = __pa(vmx->vmcs); - int cpu; u64 tsc_this, delta; - cpu = get_cpu(); - if (vcpu->cpu != cpu) vcpu_clear(vcpu); @@ -480,7 +478,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { vmx_load_host_state(vcpu); kvm_put_guest_fpu(vcpu); - put_cpu(); } static void vmx_fpu_activate(struct kvm_vcpu *vcpu) @@ -2127,6 +2124,8 @@ again: if (unlikely(r)) goto out; + preempt_disable(); + if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); @@ -2269,6 +2268,9 @@ again: vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); + vmx->launched = 1; + + preempt_enable(); if (unlikely(fail)) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; @@ -2283,7 +2285,6 @@ again: if (unlikely(prof_on == KVM_PROFILING)) profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); - vmx->launched = 1; r = kvm_handle_exit(kvm_run, vcpu); if (r > 0) { /* Give scheduler a change to reschedule. */ @@ -2372,6 +2373,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL); + int cpu; if (!vmx) return ERR_PTR(-ENOMEM); @@ -2396,9 +2398,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmcs_clear(vmx->vmcs); - vmx_vcpu_load(&vmx->vcpu); + cpu = get_cpu(); + vmx_vcpu_load(&vmx->vcpu, cpu); err = vmx_vcpu_setup(&vmx->vcpu); vmx_vcpu_put(&vmx->vcpu); + put_cpu(); if (err) goto free_vmcs; -- cgit v1.2.3 From 11ec2804711896546ee3c945f3786c7f9fdd175a Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 23 Jul 2007 14:51:37 +0800 Subject: KVM: Convert vm lock to a mutex This allows the kvm mmu to perform sleepy operations, such as memory allocation. Signed-off-by: Shaohua Li Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 +- drivers/kvm/kvm_main.c | 69 ++++++++++++++++++++++++-------------------------- drivers/kvm/mmu.c | 9 +++---- drivers/kvm/svm.c | 8 +++--- drivers/kvm/vmx.c | 8 +++--- 5 files changed, 46 insertions(+), 50 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 0667183ecbe..1072c8322d4 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -393,7 +393,7 @@ struct kvm_memory_slot { }; struct kvm { - spinlock_t lock; /* protects everything except vcpus */ + struct mutex lock; /* protects everything except vcpus */ int naliases; struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS]; int nmemslots; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 6035e6d3541..7aeaaba79c5 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -363,7 +363,7 @@ static struct kvm *kvm_create_vm(void) return ERR_PTR(-ENOMEM); kvm_io_bus_init(&kvm->pio_bus); - spin_lock_init(&kvm->lock); + mutex_init(&kvm->lock); INIT_LIST_HEAD(&kvm->active_mmu_pages); kvm_io_bus_init(&kvm->mmio_bus); spin_lock(&kvm_lock); @@ -489,7 +489,7 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) struct page *page; u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)]; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); page = gfn_to_page(vcpu->kvm, pdpt_gfn); if (!page) { ret = 0; @@ -510,7 +510,7 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs)); out: - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return ret; } @@ -570,9 +570,9 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_arch_ops->set_cr0(vcpu, cr0); vcpu->cr0 = cr0; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); kvm_mmu_reset_context(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return; } EXPORT_SYMBOL_GPL(set_cr0); @@ -611,9 +611,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return; } kvm_arch_ops->set_cr4(vcpu, cr4); - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); kvm_mmu_reset_context(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); } EXPORT_SYMBOL_GPL(set_cr4); @@ -650,7 +650,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) } vcpu->cr3 = cr3; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); /* * Does the new cr3 value map to physical memory? (Note, we * catch an invalid cr3 even in real-mode, because it would @@ -664,7 +664,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) inject_gp(vcpu); else vcpu->mmu.new_cr3(vcpu); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); } EXPORT_SYMBOL_GPL(set_cr3); @@ -741,7 +741,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; raced: - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); memory_config_version = kvm->memory_config_version; new = old = *memslot; @@ -770,7 +770,7 @@ raced: * Do memory allocations outside lock. memory_config_version will * detect any races. */ - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); /* Deallocate if slot is being removed */ if (!npages) @@ -809,10 +809,10 @@ raced: memset(new.dirty_bitmap, 0, dirty_bytes); } - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); if (memory_config_version != kvm->memory_config_version) { - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); kvm_free_physmem_slot(&new, &old); goto raced; } @@ -830,13 +830,13 @@ raced: kvm_mmu_slot_remove_write_access(kvm, mem->slot); kvm_flush_remote_tlbs(kvm); - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); kvm_free_physmem_slot(&old, &new); return 0; out_unlock: - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); out_free: kvm_free_physmem_slot(&new, &old); out: @@ -854,14 +854,14 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, int n; unsigned long any = 0; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); /* * Prevent changes to guest memory configuration even while the lock * is not taken. */ ++kvm->busy; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); r = -EINVAL; if (log->slot >= KVM_MEMORY_SLOTS) goto out; @@ -880,18 +880,18 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) goto out; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); kvm_mmu_slot_remove_write_access(kvm, log->slot); kvm_flush_remote_tlbs(kvm); memset(memslot->dirty_bitmap, 0, n); - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); r = 0; out: - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); --kvm->busy; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); return r; } @@ -921,7 +921,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, < alias->target_phys_addr) goto out; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); p = &kvm->aliases[alias->slot]; p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; @@ -935,7 +935,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, kvm_mmu_zap_all(kvm); - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); return 0; @@ -1900,12 +1900,12 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->pio.cur_count = now; for (i = 0; i < nr_pages; ++i) { - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); if (page) get_page(page); vcpu->pio.guest_pages[i] = page; - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); if (!page) { inject_gp(vcpu); free_pio_guest_pages(vcpu); @@ -2298,13 +2298,13 @@ static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, gpa_t gpa; vcpu_load(vcpu); - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr); tr->physical_address = gpa; tr->valid = gpa != UNMAPPED_GVA; tr->writeable = 1; tr->usermode = 0; - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); vcpu_put(vcpu); return 0; @@ -2426,14 +2426,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) if (r < 0) goto free_vcpu; - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); if (kvm->vcpus[n]) { r = -EEXIST; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); goto mmu_unload; } kvm->vcpus[n] = vcpu; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); /* Now it's all set up, let userspace reach it */ r = create_vcpu_fd(vcpu); @@ -2442,9 +2442,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) return r; unlink: - spin_lock(&kvm->lock); + mutex_lock(&kvm->lock); kvm->vcpus[n] = NULL; - spin_unlock(&kvm->lock); + mutex_unlock(&kvm->lock); mmu_unload: vcpu_load(vcpu); @@ -2945,8 +2945,7 @@ static void decache_vcpus_on_cpu(int cpu) int i; spin_lock(&kvm_lock); - list_for_each_entry(vm, &vm_list, vm_list) { - spin_lock(&vm->lock); + list_for_each_entry(vm, &vm_list, vm_list) for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = vm->vcpus[i]; if (!vcpu) @@ -2967,8 +2966,6 @@ static void decache_vcpus_on_cpu(int cpu) mutex_unlock(&vcpu->mutex); } } - spin_unlock(&vm->lock); - } spin_unlock(&kvm_lock); } diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 396c736e546..e303b4137bf 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -275,10 +275,9 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT); kvm_mmu_free_some_pages(vcpu); if (r < 0) { - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); - spin_lock(&vcpu->kvm->lock); - kvm_mmu_free_some_pages(vcpu); + mutex_lock(&vcpu->kvm->lock); } return r; } @@ -1069,7 +1068,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) { int r; - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); r = mmu_topup_memory_caches(vcpu); if (r) goto out; @@ -1077,7 +1076,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); kvm_mmu_flush_tlb(vcpu); out: - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return r; } EXPORT_SYMBOL_GPL(kvm_mmu_load); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 3997bbd78fb..9a840e08b20 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -941,21 +941,21 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (is_external_interrupt(exit_int_info)) push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); fault_address = svm->vmcb->control.exit_info_2; error_code = svm->vmcb->control.exit_info_1; r = kvm_mmu_page_fault(vcpu, fault_address, error_code); if (r < 0) { - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return r; } if (!r) { - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return 1; } er = emulate_instruction(vcpu, kvm_run, fault_address, error_code); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); switch (er) { case EMULATE_DONE: diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 8c87d20f8e3..5b77d9b7b1a 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1711,19 +1711,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (is_page_fault(intr_info)) { cr2 = vmcs_readl(EXIT_QUALIFICATION); - spin_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->lock); r = kvm_mmu_page_fault(vcpu, cr2, error_code); if (r < 0) { - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return r; } if (!r) { - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); return 1; } er = emulate_instruction(vcpu, kvm_run, cr2, error_code); - spin_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->lock); switch (er) { case EMULATE_DONE: -- cgit v1.2.3 From 9bd01506ee551689b90ba5822c28ef55207146af Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Jul 2007 16:29:56 +1000 Subject: KVM: fx_init() needs preemption disabled while it plays with the FPU state Now that kvm generally runs with preemption enabled, we need to protect the fpu intialization sequence. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 7aeaaba79c5..5b42731beba 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -693,10 +693,13 @@ void fx_init(struct kvm_vcpu *vcpu) } *fx_image; + /* Initialize guest FPU by resetting ours and saving into guest's */ + preempt_disable(); fx_save(vcpu->host_fx_image); fpu_init(); fx_save(vcpu->guest_fx_image); fx_restore(vcpu->host_fx_image); + preempt_enable(); fx_image = (struct fx_image_s *)vcpu->guest_fx_image; fx_image->mxcsr = 0x1f80; -- cgit v1.2.3 From 8b9cf98cc7ea7354d6d4cbc4ffdb18a26a1129d3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Jul 2007 16:31:43 +1000 Subject: KVM: VMX: pass vcpu_vmx internally container_of is wonderful, but not casting at all is better. This patch changes vmx.c's internal functions to pass "struct vcpu_vmx" instead of "struct kvm_vcpu" and using container_of. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 140 +++++++++++++++++++++++++----------------------------- 1 file changed, 65 insertions(+), 75 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 5b77d9b7b1a..cc7ee3d484f 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -140,9 +140,8 @@ static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr) return (u64)msr.data & EFER_SAVE_RESTORE_BITS; } -static inline int msr_efer_need_save_restore(struct kvm_vcpu *vcpu) +static inline int msr_efer_need_save_restore(struct vcpu_vmx *vmx) { - struct vcpu_vmx *vmx = to_vmx(vcpu); int efer_offset = vmx->msr_offset_efer; return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) != msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]); @@ -168,9 +167,8 @@ static inline int is_external_interrupt(u32 intr_info) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } -static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr) +static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) { - struct vcpu_vmx *vmx = to_vmx(vcpu); int i; for (i = 0; i < vmx->nmsrs; ++i) @@ -179,12 +177,11 @@ static int __find_msr_index(struct kvm_vcpu *vcpu, u32 msr) return -1; } -static struct kvm_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr) +static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { - struct vcpu_vmx *vmx = to_vmx(vcpu); int i; - i = __find_msr_index(vcpu, msr); + i = __find_msr_index(vmx, msr); if (i >= 0) return &vmx->guest_msrs[i]; return NULL; @@ -205,24 +202,24 @@ static void vmcs_clear(struct vmcs *vmcs) static void __vcpu_clear(void *arg) { - struct kvm_vcpu *vcpu = arg; - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vcpu_vmx *vmx = arg; int cpu = raw_smp_processor_id(); - if (vcpu->cpu == cpu) + if (vmx->vcpu.cpu == cpu) vmcs_clear(vmx->vmcs); if (per_cpu(current_vmcs, cpu) == vmx->vmcs) per_cpu(current_vmcs, cpu) = NULL; - rdtscll(vcpu->host_tsc); + rdtscll(vmx->vcpu.host_tsc); } -static void vcpu_clear(struct kvm_vcpu *vcpu) +static void vcpu_clear(struct vcpu_vmx *vmx) { - if (vcpu->cpu != raw_smp_processor_id() && vcpu->cpu != -1) - smp_call_function_single(vcpu->cpu, __vcpu_clear, vcpu, 0, 1); + if (vmx->vcpu.cpu != raw_smp_processor_id() && vmx->vcpu.cpu != -1) + smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, + vmx, 0, 1); else - __vcpu_clear(vcpu); - to_vmx(vcpu)->launched = 0; + __vcpu_clear(vmx); + vmx->launched = 0; } static unsigned long vmcs_readl(unsigned long field) @@ -332,23 +329,20 @@ static void reload_tss(void) #endif } -static void load_transition_efer(struct kvm_vcpu *vcpu) +static void load_transition_efer(struct vcpu_vmx *vmx) { u64 trans_efer; - struct vcpu_vmx *vmx = to_vmx(vcpu); int efer_offset = vmx->msr_offset_efer; trans_efer = vmx->host_msrs[efer_offset].data; trans_efer &= ~EFER_SAVE_RESTORE_BITS; trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]); wrmsrl(MSR_EFER, trans_efer); - vcpu->stat.efer_reload++; + vmx->vcpu.stat.efer_reload++; } -static void vmx_save_host_state(struct kvm_vcpu *vcpu) +static void vmx_save_host_state(struct vcpu_vmx *vmx) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - if (vmx->host_state.loaded) return; @@ -383,19 +377,18 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #endif #ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { + if (is_long_mode(&vmx->vcpu)) { save_msrs(vmx->host_msrs + vmx->msr_offset_kernel_gs_base, 1); } #endif load_msrs(vmx->guest_msrs, vmx->save_nmsrs); - if (msr_efer_need_save_restore(vcpu)) - load_transition_efer(vcpu); + if (msr_efer_need_save_restore(vmx)) + load_transition_efer(vmx); } -static void vmx_load_host_state(struct kvm_vcpu *vcpu) +static void vmx_load_host_state(struct vcpu_vmx *vmx) { - struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long flags; if (!vmx->host_state.loaded) @@ -420,7 +413,7 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu) } save_msrs(vmx->guest_msrs, vmx->save_nmsrs); load_msrs(vmx->host_msrs, vmx->save_nmsrs); - if (msr_efer_need_save_restore(vcpu)) + if (msr_efer_need_save_restore(vmx)) load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1); } @@ -435,7 +428,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) u64 tsc_this, delta; if (vcpu->cpu != cpu) - vcpu_clear(vcpu); + vcpu_clear(vmx); if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { u8 error; @@ -476,7 +469,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { - vmx_load_host_state(vcpu); + vmx_load_host_state(to_vmx(vcpu)); kvm_put_guest_fpu(vcpu); } @@ -502,7 +495,7 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) { - vcpu_clear(vcpu); + vcpu_clear(to_vmx(vcpu)); } static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) @@ -550,9 +543,8 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) /* * Swap MSR entry in host/guest MSR entry array. */ -void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) +static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) { - struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_msr_entry tmp; tmp = vmx->guest_msrs[to]; @@ -568,44 +560,43 @@ void move_msr_up(struct kvm_vcpu *vcpu, int from, int to) * msrs. Don't touch the 64-bit msrs if the guest is in legacy * mode, as fiddling with msrs is very expensive. */ -static void setup_msrs(struct kvm_vcpu *vcpu) +static void setup_msrs(struct vcpu_vmx *vmx) { - struct vcpu_vmx *vmx = to_vmx(vcpu); int save_nmsrs; save_nmsrs = 0; #ifdef CONFIG_X86_64 - if (is_long_mode(vcpu)) { + if (is_long_mode(&vmx->vcpu)) { int index; - index = __find_msr_index(vcpu, MSR_SYSCALL_MASK); + index = __find_msr_index(vmx, MSR_SYSCALL_MASK); if (index >= 0) - move_msr_up(vcpu, index, save_nmsrs++); - index = __find_msr_index(vcpu, MSR_LSTAR); + move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_LSTAR); if (index >= 0) - move_msr_up(vcpu, index, save_nmsrs++); - index = __find_msr_index(vcpu, MSR_CSTAR); + move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_CSTAR); if (index >= 0) - move_msr_up(vcpu, index, save_nmsrs++); - index = __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); + move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE); if (index >= 0) - move_msr_up(vcpu, index, save_nmsrs++); + move_msr_up(vmx, index, save_nmsrs++); /* * MSR_K6_STAR is only needed on long mode guests, and only * if efer.sce is enabled. */ - index = __find_msr_index(vcpu, MSR_K6_STAR); - if ((index >= 0) && (vcpu->shadow_efer & EFER_SCE)) - move_msr_up(vcpu, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_K6_STAR); + if ((index >= 0) && (vmx->vcpu.shadow_efer & EFER_SCE)) + move_msr_up(vmx, index, save_nmsrs++); } #endif vmx->save_nmsrs = save_nmsrs; #ifdef CONFIG_X86_64 vmx->msr_offset_kernel_gs_base = - __find_msr_index(vcpu, MSR_KERNEL_GS_BASE); + __find_msr_index(vmx, MSR_KERNEL_GS_BASE); #endif - vmx->msr_offset_efer = __find_msr_index(vcpu, MSR_EFER); + vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); } /* @@ -672,7 +663,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) data = vmcs_readl(GUEST_SYSENTER_ESP); break; default: - msr = find_msr_entry(vcpu, msr_index); + msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { data = msr->data; break; @@ -700,7 +691,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_EFER: ret = kvm_set_msr_common(vcpu, msr_index, data); if (vmx->host_state.loaded) - load_transition_efer(vcpu); + load_transition_efer(vmx); break; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); @@ -722,7 +713,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) guest_write_tsc(data); break; default: - msr = find_msr_entry(vcpu, msr_index); + msr = find_msr_entry(vmx, msr_index); if (msr) { msr->data = data; if (vmx->host_state.loaded) @@ -1116,7 +1107,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) vcpu->shadow_efer |= EFER_LMA; - find_msr_entry(vcpu, MSR_EFER)->data |= EFER_LMA | EFER_LME; + find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME; vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) | VM_ENTRY_CONTROLS_IA32E_MASK); @@ -1186,7 +1177,8 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) { - struct kvm_msr_entry *msr = find_msr_entry(vcpu, MSR_EFER); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER); vcpu->shadow_efer = efer; if (efer & EFER_LMA) { @@ -1202,7 +1194,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) msr->data = efer & ~EFER_LME; } - setup_msrs(vcpu); + setup_msrs(vmx); } #endif @@ -1364,9 +1356,8 @@ static void seg_setup(int seg) /* * Sets up the vmcs for emulated real mode. */ -static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) +static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { - struct vcpu_vmx *vmx = to_vmx(vcpu); u32 host_sysenter_cs; u32 junk; unsigned long a; @@ -1375,19 +1366,18 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) int ret = 0; unsigned long kvm_vmx_return; - if (!init_rmode_tss(vcpu->kvm)) { + if (!init_rmode_tss(vmx->vcpu.kvm)) { ret = -ENOMEM; goto out; } - memset(vcpu->regs, 0, sizeof(vcpu->regs)); - vcpu->regs[VCPU_REGS_RDX] = get_rdx_init_val(); - vcpu->cr8 = 0; - vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; - if (vcpu->vcpu_id == 0) - vcpu->apic_base |= MSR_IA32_APICBASE_BSP; + vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val(); + vmx->vcpu.cr8 = 0; + vmx->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + if (vmx->vcpu.vcpu_id == 0) + vmx->vcpu.apic_base |= MSR_IA32_APICBASE_BSP; - fx_init(vcpu); + fx_init(&vmx->vcpu); /* * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode @@ -1512,7 +1502,7 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) ++vmx->nmsrs; } - setup_msrs(vcpu); + setup_msrs(vmx); vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); @@ -1529,14 +1519,14 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK); - vcpu->cr0 = 0x60000010; - vmx_set_cr0(vcpu, vcpu->cr0); // enter rmode - vmx_set_cr4(vcpu, 0); + vmx->vcpu.cr0 = 0x60000010; + vmx_set_cr0(&vmx->vcpu, vmx->vcpu.cr0); // enter rmode + vmx_set_cr4(&vmx->vcpu, 0); #ifdef CONFIG_X86_64 - vmx_set_efer(vcpu, 0); + vmx_set_efer(&vmx->vcpu, 0); #endif - vmx_fpu_activate(vcpu); - update_exception_bitmap(vcpu); + vmx_fpu_activate(&vmx->vcpu); + update_exception_bitmap(&vmx->vcpu); return 0; @@ -2129,7 +2119,7 @@ again: if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); - vmx_save_host_state(vcpu); + vmx_save_host_state(vmx); kvm_load_guest_fpu(vcpu); /* @@ -2352,7 +2342,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); if (vmx->vmcs) { - on_each_cpu(__vcpu_clear, vcpu, 0, 1); + on_each_cpu(__vcpu_clear, vmx, 0, 1); free_vmcs(vmx->vmcs); vmx->vmcs = NULL; } @@ -2400,7 +2390,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); - err = vmx_vcpu_setup(&vmx->vcpu); + err = vmx_vcpu_setup(vmx); vmx_vcpu_put(&vmx->vcpu); put_cpu(); if (err) -- cgit v1.2.3 From 3077c4513c46f66537c1205acc464e49c9847dc0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Jul 2007 16:41:57 +1000 Subject: KVM: Remove three magic numbers There are several places where hardcoded numbers are used in place of the easily-available constant, which is poor form. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 2 +- drivers/kvm/svm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 5b42731beba..6ad1b04f309 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -413,7 +413,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu) { int i; - for (i = 0; i < 2; ++i) + for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i) if (vcpu->pio.guest_pages[i]) { __free_page(vcpu->pio.guest_pages[i]); vcpu->pio.guest_pages[i] = NULL; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 9a840e08b20..c18f0b2d3d3 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -241,7 +241,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__); return; } - if (svm->next_rip - svm->vmcb->save.rip > 15) { + if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE) { printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n", __FUNCTION__, svm->vmcb->save.rip, -- cgit v1.2.3 From e756fc626d7d8a220864dd6bc6634d9d933650b0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Jul 2007 20:07:08 +1000 Subject: KVM: SVM: de-containization container_of is wonderful, but not casting at all is better. This patch changes svm.c's internal functions to pass "struct vcpu_svm" instead of "struct kvm_vcpu" and using container_of. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 232 +++++++++++++++++++++++++----------------------------- 1 file changed, 108 insertions(+), 124 deletions(-) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index c18f0b2d3d3..504fb50662d 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -98,9 +98,9 @@ static inline u32 svm_has(u32 feat) return svm_features & feat; } -static unsigned get_addr_size(struct kvm_vcpu *vcpu) +static unsigned get_addr_size(struct vcpu_svm *svm) { - struct vmcb_save_area *sa = &to_svm(vcpu)->vmcb->save; + struct vmcb_save_area *sa = &svm->vmcb->save; u16 cs_attrib; if (!(sa->cr0 & X86_CR0_PE) || (sa->rflags & X86_EFLAGS_VM)) @@ -865,17 +865,15 @@ static void save_host_msrs(struct kvm_vcpu *vcpu) #endif } -static void new_asid(struct kvm_vcpu *vcpu, struct svm_cpu_data *svm_data) +static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) { - struct vcpu_svm *svm = to_svm(vcpu); - if (svm_data->next_asid > svm_data->max_asid) { ++svm_data->asid_generation; svm_data->next_asid = 1; svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; } - vcpu->cpu = svm_data->cpu; + svm->vcpu.cpu = svm_data->cpu; svm->asid_generation = svm_data->asid_generation; svm->vmcb->control.asid = svm_data->next_asid++; } @@ -929,42 +927,43 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, } } -static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); u32 exit_int_info = svm->vmcb->control.exit_int_info; + struct kvm *kvm = svm->vcpu.kvm; u64 fault_address; u32 error_code; enum emulation_result er; int r; if (is_external_interrupt(exit_int_info)) - push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); + push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&kvm->lock); fault_address = svm->vmcb->control.exit_info_2; error_code = svm->vmcb->control.exit_info_1; - r = kvm_mmu_page_fault(vcpu, fault_address, error_code); + r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); if (r < 0) { - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&kvm->lock); return r; } if (!r) { - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&kvm->lock); return 1; } - er = emulate_instruction(vcpu, kvm_run, fault_address, error_code); - mutex_unlock(&vcpu->kvm->lock); + er = emulate_instruction(&svm->vcpu, kvm_run, fault_address, + error_code); + mutex_unlock(&kvm->lock); switch (er) { case EMULATE_DONE: return 1; case EMULATE_DO_MMIO: - ++vcpu->stat.mmio_exits; + ++svm->vcpu.stat.mmio_exits; return 0; case EMULATE_FAIL: - vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); + vcpu_printf(&svm->vcpu, "%s: emulate fail\n", __FUNCTION__); break; default: BUG(); @@ -974,21 +973,18 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); - if (!(vcpu->cr0 & X86_CR0_TS)) + if (!(svm->vcpu.cr0 & X86_CR0_TS)) svm->vmcb->save.cr0 &= ~X86_CR0_TS; - vcpu->fpu_active = 1; + svm->vcpu.fpu_active = 1; return 1; } -static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); /* * VMCB is undefined after a SHUTDOWN intercept * so reinitialize it. @@ -1000,11 +996,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int io_get_override(struct kvm_vcpu *vcpu, +static int io_get_override(struct vcpu_svm *svm, struct vmcb_seg **seg, int *addr_override) { - struct vcpu_svm *svm = to_svm(vcpu); u8 inst[MAX_INST_SIZE]; unsigned ins_length; gva_t rip; @@ -1024,7 +1019,7 @@ static int io_get_override(struct kvm_vcpu *vcpu, svm->vmcb->control.exit_info_2, ins_length); - if (kvm_read_guest(vcpu, rip, ins_length, inst) != ins_length) + if (kvm_read_guest(&svm->vcpu, rip, ins_length, inst) != ins_length) /* #PF */ return 0; @@ -1065,28 +1060,27 @@ static int io_get_override(struct kvm_vcpu *vcpu, return 0; } -static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) +static unsigned long io_adress(struct vcpu_svm *svm, int ins, gva_t *address) { unsigned long addr_mask; unsigned long *reg; struct vmcb_seg *seg; int addr_override; - struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_save_area *save_area = &svm->vmcb->save; u16 cs_attrib = save_area->cs.attrib; - unsigned addr_size = get_addr_size(vcpu); + unsigned addr_size = get_addr_size(svm); - if (!io_get_override(vcpu, &seg, &addr_override)) + if (!io_get_override(svm, &seg, &addr_override)) return 0; if (addr_override) addr_size = (addr_size == 2) ? 4: (addr_size >> 1); if (ins) { - reg = &vcpu->regs[VCPU_REGS_RDI]; + reg = &svm->vcpu.regs[VCPU_REGS_RDI]; seg = &svm->vmcb->save.es; } else { - reg = &vcpu->regs[VCPU_REGS_RSI]; + reg = &svm->vcpu.regs[VCPU_REGS_RSI]; seg = (seg) ? seg : &svm->vmcb->save.ds; } @@ -1099,7 +1093,7 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) } if (!(seg->attrib & SVM_SELECTOR_P_SHIFT)) { - svm_inject_gp(vcpu, 0); + svm_inject_gp(&svm->vcpu, 0); return 0; } @@ -1107,16 +1101,15 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) return addr_mask; } -static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); u32 io_info = svm->vmcb->control.exit_info_1; //address size bug? int size, down, in, string, rep; unsigned port; unsigned long count; gva_t address = 0; - ++vcpu->stat.io_exits; + ++svm->vcpu.stat.io_exits; svm->next_rip = svm->vmcb->control.exit_info_2; @@ -1131,7 +1124,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (string) { unsigned addr_mask; - addr_mask = io_adress(vcpu, in, &address); + addr_mask = io_adress(svm, in, &address); if (!addr_mask) { printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); @@ -1139,60 +1132,57 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if (rep) - count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; + count = svm->vcpu.regs[VCPU_REGS_RCX] & addr_mask; } - return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, - address, rep, port); + return kvm_setup_pio(&svm->vcpu, kvm_run, in, size, count, string, + down, address, rep, port); } -static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { return 1; } -static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); - svm->next_rip = svm->vmcb->save.rip + 1; - skip_emulated_instruction(vcpu); - return kvm_emulate_halt(vcpu); + skip_emulated_instruction(&svm->vcpu); + return kvm_emulate_halt(&svm->vcpu); } -static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); - svm->next_rip = svm->vmcb->save.rip + 3; - skip_emulated_instruction(vcpu); - return kvm_hypercall(vcpu, kvm_run); + skip_emulated_instruction(&svm->vcpu); + return kvm_hypercall(&svm->vcpu, kvm_run); } -static int invalid_op_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int invalid_op_interception(struct vcpu_svm *svm, + struct kvm_run *kvm_run) { - inject_ud(vcpu); + inject_ud(&svm->vcpu); return 1; } -static int task_switch_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int task_switch_interception(struct vcpu_svm *svm, + struct kvm_run *kvm_run) { printk(KERN_DEBUG "%s: task swiche is unsupported\n", __FUNCTION__); kvm_run->exit_reason = KVM_EXIT_UNKNOWN; return 0; } -static int cpuid_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); - svm->next_rip = svm->vmcb->save.rip + 2; - kvm_emulate_cpuid(vcpu); + kvm_emulate_cpuid(&svm->vcpu); return 1; } -static int emulate_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int emulate_on_interception(struct vcpu_svm *svm, + struct kvm_run *kvm_run) { - if (emulate_instruction(vcpu, NULL, 0, 0) != EMULATE_DONE) + if (emulate_instruction(&svm->vcpu, NULL, 0, 0) != EMULATE_DONE) printk(KERN_ERR "%s: failed\n", __FUNCTION__); return 1; } @@ -1241,19 +1231,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) return 0; } -static int rdmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); - u32 ecx = vcpu->regs[VCPU_REGS_RCX]; + u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX]; u64 data; - if (svm_get_msr(vcpu, ecx, &data)) - svm_inject_gp(vcpu, 0); + if (svm_get_msr(&svm->vcpu, ecx, &data)) + svm_inject_gp(&svm->vcpu, 0); else { svm->vmcb->save.rax = data & 0xffffffff; - vcpu->regs[VCPU_REGS_RDX] = data >> 32; + svm->vcpu.regs[VCPU_REGS_RDX] = data >> 32; svm->next_rip = svm->vmcb->save.rip + 2; - skip_emulated_instruction(vcpu); + skip_emulated_instruction(&svm->vcpu); } return 1; } @@ -1302,29 +1291,28 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) return 0; } -static int wrmsr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); - u32 ecx = vcpu->regs[VCPU_REGS_RCX]; + u32 ecx = svm->vcpu.regs[VCPU_REGS_RCX]; u64 data = (svm->vmcb->save.rax & -1u) - | ((u64)(vcpu->regs[VCPU_REGS_RDX] & -1u) << 32); + | ((u64)(svm->vcpu.regs[VCPU_REGS_RDX] & -1u) << 32); svm->next_rip = svm->vmcb->save.rip + 2; - if (svm_set_msr(vcpu, ecx, data)) - svm_inject_gp(vcpu, 0); + if (svm_set_msr(&svm->vcpu, ecx, data)) + svm_inject_gp(&svm->vcpu, 0); else - skip_emulated_instruction(vcpu); + skip_emulated_instruction(&svm->vcpu); return 1; } -static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - if (to_svm(vcpu)->vmcb->control.exit_info_1) - return wrmsr_interception(vcpu, kvm_run); + if (svm->vmcb->control.exit_info_1) + return wrmsr_interception(svm, kvm_run); else - return rdmsr_interception(vcpu, kvm_run); + return rdmsr_interception(svm, kvm_run); } -static int interrupt_window_interception(struct kvm_vcpu *vcpu, +static int interrupt_window_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { /* @@ -1332,8 +1320,8 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu, * possible */ if (kvm_run->request_interrupt_window && - !vcpu->irq_summary) { - ++vcpu->stat.irq_window_exits; + !svm->vcpu.irq_summary) { + ++svm->vcpu.stat.irq_window_exits; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; } @@ -1341,7 +1329,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu, return 1; } -static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, +static int (*svm_exit_handlers[])(struct vcpu_svm *svm, struct kvm_run *kvm_run) = { [SVM_EXIT_READ_CR0] = emulate_on_interception, [SVM_EXIT_READ_CR3] = emulate_on_interception, @@ -1388,9 +1376,8 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, }; -static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int handle_exit(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); u32 exit_code = svm->vmcb->control.exit_code; if (is_external_interrupt(svm->vmcb->control.exit_int_info) && @@ -1407,7 +1394,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } - return svm_exit_handlers[exit_code](vcpu, kvm_run); + return svm_exit_handlers[exit_code](svm, kvm_run); } static void reload_tss(struct kvm_vcpu *vcpu) @@ -1419,80 +1406,77 @@ static void reload_tss(struct kvm_vcpu *vcpu) load_TR_desc(); } -static void pre_svm_run(struct kvm_vcpu *vcpu) +static void pre_svm_run(struct vcpu_svm *svm) { - struct vcpu_svm *svm = to_svm(vcpu); int cpu = raw_smp_processor_id(); struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; - if (vcpu->cpu != cpu || + if (svm->vcpu.cpu != cpu || svm->asid_generation != svm_data->asid_generation) - new_asid(vcpu, svm_data); + new_asid(svm, svm_data); } -static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu) +static inline void kvm_do_inject_irq(struct vcpu_svm *svm) { struct vmcb_control_area *control; - control = &to_svm(vcpu)->vmcb->control; - control->int_vector = pop_irq(vcpu); + control = &svm->vmcb->control; + control->int_vector = pop_irq(&svm->vcpu); control->int_ctl &= ~V_INTR_PRIO_MASK; control->int_ctl |= V_IRQ_MASK | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); } -static void kvm_reput_irq(struct kvm_vcpu *vcpu) +static void kvm_reput_irq(struct vcpu_svm *svm) { - struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + struct vmcb_control_area *control = &svm->vmcb->control; if (control->int_ctl & V_IRQ_MASK) { control->int_ctl &= ~V_IRQ_MASK; - push_irq(vcpu, control->int_vector); + push_irq(&svm->vcpu, control->int_vector); } - vcpu->interrupt_window_open = + svm->vcpu.interrupt_window_open = !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); } -static void do_interrupt_requests(struct kvm_vcpu *vcpu, +static void do_interrupt_requests(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; - vcpu->interrupt_window_open = + svm->vcpu.interrupt_window_open = (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && (svm->vmcb->save.rflags & X86_EFLAGS_IF)); - if (vcpu->interrupt_window_open && vcpu->irq_summary) + if (svm->vcpu.interrupt_window_open && svm->vcpu.irq_summary) /* * If interrupts enabled, and not blocked by sti or mov ss. Good. */ - kvm_do_inject_irq(vcpu); + kvm_do_inject_irq(svm); /* * Interrupts blocked. Wait for unblock. */ - if (!vcpu->interrupt_window_open && - (vcpu->irq_summary || kvm_run->request_interrupt_window)) { + if (!svm->vcpu.interrupt_window_open && + (svm->vcpu.irq_summary || kvm_run->request_interrupt_window)) { control->intercept |= 1ULL << INTERCEPT_VINTR; } else control->intercept &= ~(1ULL << INTERCEPT_VINTR); } -static void post_kvm_run_save(struct kvm_vcpu *vcpu, +static void post_kvm_run_save(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - struct vcpu_svm *svm = to_svm(vcpu); - - kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && - vcpu->irq_summary == 0); + kvm_run->ready_for_interrupt_injection + = (svm->vcpu.interrupt_window_open && + svm->vcpu.irq_summary == 0); kvm_run->if_flag = (svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; - kvm_run->cr8 = vcpu->cr8; - kvm_run->apic_base = vcpu->apic_base; + kvm_run->cr8 = svm->vcpu.cr8; + kvm_run->apic_base = svm->vcpu.apic_base; } /* @@ -1501,13 +1485,13 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, * * No need to exit to userspace if we already have an interrupt queued. */ -static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, +static int dm_request_for_irq_injection(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - return (!vcpu->irq_summary && + return (!svm->vcpu.irq_summary && kvm_run->request_interrupt_window && - vcpu->interrupt_window_open && - (to_svm(vcpu)->vmcb->save.rflags & X86_EFLAGS_IF)); + svm->vcpu.interrupt_window_open && + (svm->vmcb->save.rflags & X86_EFLAGS_IF)); } static void save_db_regs(unsigned long *db_regs) @@ -1545,7 +1529,7 @@ again: return r; if (!vcpu->mmio_read_completed) - do_interrupt_requests(vcpu, kvm_run); + do_interrupt_requests(svm, kvm_run); clgi(); @@ -1554,7 +1538,7 @@ again: if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) svm_flush_tlb(vcpu); - pre_svm_run(vcpu); + pre_svm_run(svm); save_host_msrs(vcpu); fs_selector = read_fs(); @@ -1714,7 +1698,7 @@ again: stgi(); - kvm_reput_irq(vcpu); + kvm_reput_irq(svm); svm->next_rip = 0; @@ -1722,29 +1706,29 @@ again: kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason = svm->vmcb->control.exit_code; - post_kvm_run_save(vcpu, kvm_run); + post_kvm_run_save(svm, kvm_run); return 0; } - r = handle_exit(vcpu, kvm_run); + r = handle_exit(svm, kvm_run); if (r > 0) { if (signal_pending(current)) { ++vcpu->stat.signal_exits; - post_kvm_run_save(vcpu, kvm_run); + post_kvm_run_save(svm, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } - if (dm_request_for_irq_injection(vcpu, kvm_run)) { + if (dm_request_for_irq_injection(svm, kvm_run)) { ++vcpu->stat.request_irq_exits; - post_kvm_run_save(vcpu, kvm_run); + post_kvm_run_save(svm, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } kvm_resched(vcpu); goto again; } - post_kvm_run_save(vcpu, kvm_run); + post_kvm_run_save(svm, kvm_run); return r; } -- cgit v1.2.3 From 0e5017d4ae981b0311a3ec1ca04806a4ae7d7446 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Jul 2007 20:08:05 +1000 Subject: KVM: SVM: internal function name cleanup Changes some svm.c internal function names: 1) io_adress -> io_address (de-germanify the spelling) 2) kvm_reput_irq -> reput_irq (it's not a generic kvm function) 3) kvm_do_inject_irq -> (it's not a generic kvm function) Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 504fb50662d..cd966739970 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1060,7 +1060,7 @@ static int io_get_override(struct vcpu_svm *svm, return 0; } -static unsigned long io_adress(struct vcpu_svm *svm, int ins, gva_t *address) +static unsigned long io_address(struct vcpu_svm *svm, int ins, gva_t *address) { unsigned long addr_mask; unsigned long *reg; @@ -1124,7 +1124,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) if (string) { unsigned addr_mask; - addr_mask = io_adress(svm, in, &address); + addr_mask = io_address(svm, in, &address); if (!addr_mask) { printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); @@ -1419,7 +1419,7 @@ static void pre_svm_run(struct vcpu_svm *svm) } -static inline void kvm_do_inject_irq(struct vcpu_svm *svm) +static inline void inject_irq(struct vcpu_svm *svm) { struct vmcb_control_area *control; @@ -1430,7 +1430,7 @@ static inline void kvm_do_inject_irq(struct vcpu_svm *svm) ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); } -static void kvm_reput_irq(struct vcpu_svm *svm) +static void reput_irq(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1456,7 +1456,7 @@ static void do_interrupt_requests(struct vcpu_svm *svm, /* * If interrupts enabled, and not blocked by sti or mov ss. Good. */ - kvm_do_inject_irq(svm); + inject_irq(svm); /* * Interrupts blocked. Wait for unblock. @@ -1698,7 +1698,7 @@ again: stgi(); - kvm_reput_irq(svm); + reput_irq(svm); svm->next_rip = 0; -- cgit v1.2.3 From cebff02b11b02a81d21b6cc8390938dc9bdf0a12 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 30 Jul 2007 13:35:24 +0300 Subject: KVM: Change the emulator_{read,write,cmpxchg}_* functions to take a vcpu ... instead of a x86_emulate_ctxt, so that other callers can use it easily. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 25 +++++++++++-------------- drivers/kvm/x86_emulate.c | 35 ++++++++++++++++++++--------------- drivers/kvm/x86_emulate.h | 10 +++++----- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 6ad1b04f309..a65a145f305 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1020,9 +1020,8 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) static int emulator_read_std(unsigned long addr, void *val, unsigned int bytes, - struct x86_emulate_ctxt *ctxt) + struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = ctxt->vcpu; void *data = val; while (bytes) { @@ -1056,7 +1055,7 @@ static int emulator_read_std(unsigned long addr, static int emulator_write_std(unsigned long addr, const void *val, unsigned int bytes, - struct x86_emulate_ctxt *ctxt) + struct kvm_vcpu *vcpu) { printk(KERN_ERR "emulator_write_std: addr %lx n %d\n", addr, bytes); @@ -1083,9 +1082,8 @@ static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, static int emulator_read_emulated(unsigned long addr, void *val, unsigned int bytes, - struct x86_emulate_ctxt *ctxt) + struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = ctxt->vcpu; struct kvm_io_device *mmio_dev; gpa_t gpa; @@ -1093,7 +1091,7 @@ static int emulator_read_emulated(unsigned long addr, memcpy(val, vcpu->mmio_data, bytes); vcpu->mmio_read_completed = 0; return X86EMUL_CONTINUE; - } else if (emulator_read_std(addr, val, bytes, ctxt) + } else if (emulator_read_std(addr, val, bytes, vcpu) == X86EMUL_CONTINUE) return X86EMUL_CONTINUE; @@ -1140,9 +1138,8 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, static int emulator_write_emulated_onepage(unsigned long addr, const void *val, unsigned int bytes, - struct x86_emulate_ctxt *ctxt) + struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = ctxt->vcpu; struct kvm_io_device *mmio_dev; gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); @@ -1175,28 +1172,28 @@ static int emulator_write_emulated_onepage(unsigned long addr, static int emulator_write_emulated(unsigned long addr, const void *val, unsigned int bytes, - struct x86_emulate_ctxt *ctxt) + struct kvm_vcpu *vcpu) { /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { int rc, now; now = -addr & ~PAGE_MASK; - rc = emulator_write_emulated_onepage(addr, val, now, ctxt); + rc = emulator_write_emulated_onepage(addr, val, now, vcpu); if (rc != X86EMUL_CONTINUE) return rc; addr += now; val += now; bytes -= now; } - return emulator_write_emulated_onepage(addr, val, bytes, ctxt); + return emulator_write_emulated_onepage(addr, val, bytes, vcpu); } static int emulator_cmpxchg_emulated(unsigned long addr, const void *old, const void *new, unsigned int bytes, - struct x86_emulate_ctxt *ctxt) + struct kvm_vcpu *vcpu) { static int reported; @@ -1204,7 +1201,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, reported = 1; printk(KERN_WARNING "kvm: emulating exchange as write\n"); } - return emulator_write_emulated(addr, new, bytes, ctxt); + return emulator_write_emulated(addr, new, bytes, vcpu); } static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) @@ -1266,7 +1263,7 @@ static void report_emulation_failure(struct x86_emulate_ctxt *ctxt) if (reported) return; - emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt); + emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt->vcpu); printk(KERN_ERR "emulation failed but !mmio_needed?" " rip %lx %02x %02x %02x %02x\n", diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 2136da5d697..44eb28d3149 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -420,7 +420,7 @@ struct operand { #define insn_fetch(_type, _size, _eip) \ ({ unsigned long _x; \ rc = ops->read_std((unsigned long)(_eip) + ctxt->cs_base, &_x, \ - (_size), ctxt); \ + (_size), ctxt->vcpu); \ if ( rc != 0 ) \ goto done; \ (_eip) += (_size); \ @@ -469,10 +469,12 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, if (op_bytes == 2) op_bytes = 3; *address = 0; - rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, ctxt); + rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, + ctxt->vcpu); if (rc) return rc; - rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, ctxt); + rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, + ctxt->vcpu); return rc; } @@ -780,7 +782,7 @@ done_prefixes: src.type = OP_MEM; src.ptr = (unsigned long *)cr2; if ((rc = ops->read_emulated((unsigned long)src.ptr, - &src.val, src.bytes, ctxt)) != 0) + &src.val, src.bytes, ctxt->vcpu)) != 0) goto done; src.orig_val = src.val; break; @@ -850,7 +852,7 @@ done_prefixes: } if (!(d & Mov) && /* optimisation - avoid slow emulated read */ ((rc = ops->read_emulated((unsigned long)dst.ptr, - &dst.val, dst.bytes, ctxt)) != 0)) + &dst.val, dst.bytes, ctxt->vcpu)) != 0)) goto done; break; } @@ -963,7 +965,7 @@ done_prefixes: dst.bytes = 8; if ((rc = ops->read_std(register_address(ctxt->ss_base, _regs[VCPU_REGS_RSP]), - &dst.val, dst.bytes, ctxt)) != 0) + &dst.val, dst.bytes, ctxt->vcpu)) != 0) goto done; register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes); break; @@ -1048,7 +1050,7 @@ done_prefixes: dst.bytes = 8; if ((rc = ops->read_std((unsigned long)dst.ptr, &dst.val, 8, - ctxt)) != 0) + ctxt->vcpu)) != 0) goto done; } register_address_increment(_regs[VCPU_REGS_RSP], @@ -1056,7 +1058,7 @@ done_prefixes: if ((rc = ops->write_std( register_address(ctxt->ss_base, _regs[VCPU_REGS_RSP]), - &dst.val, dst.bytes, ctxt)) != 0) + &dst.val, dst.bytes, ctxt->vcpu)) != 0) goto done; no_wb = 1; break; @@ -1091,11 +1093,11 @@ writeback: rc = ops->cmpxchg_emulated((unsigned long)dst. ptr, &dst.orig_val, &dst.val, dst.bytes, - ctxt); + ctxt->vcpu); else rc = ops->write_emulated((unsigned long)dst.ptr, &dst.val, dst.bytes, - ctxt); + ctxt->vcpu); if (rc != 0) goto done; default: @@ -1130,7 +1132,7 @@ special_insn: _regs[VCPU_REGS_RDI]); if ((rc = ops->read_emulated(register_address( override_base ? *override_base : ctxt->ds_base, - _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt)) != 0) + _regs[VCPU_REGS_RSI]), &dst.val, dst.bytes, ctxt->vcpu)) != 0) goto done; register_address_increment(_regs[VCPU_REGS_RSI], (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes); @@ -1152,7 +1154,8 @@ special_insn: dst.type = OP_REG; dst.bytes = (d & ByteOp) ? 1 : op_bytes; dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; - if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes, ctxt)) != 0) + if ((rc = ops->read_emulated(cr2, &dst.val, dst.bytes, + ctxt->vcpu)) != 0) goto done; register_address_increment(_regs[VCPU_REGS_RSI], (_eflags & EFLG_DF) ? -dst.bytes : dst.bytes); @@ -1171,7 +1174,8 @@ special_insn: pop_instruction: if ((rc = ops->read_std(register_address(ctxt->ss_base, - _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt)) != 0) + _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt->vcpu)) + != 0) goto done; register_address_increment(_regs[VCPU_REGS_RSP], op_bytes); @@ -1378,7 +1382,8 @@ twobyte_special_insn: case 0xc7: /* Grp9 (cmpxchg8b) */ { u64 old, new; - if ((rc = ops->read_emulated(cr2, &old, 8, ctxt)) != 0) + if ((rc = ops->read_emulated(cr2, &old, 8, ctxt->vcpu)) + != 0) goto done; if (((u32) (old >> 0) != (u32) _regs[VCPU_REGS_RAX]) || ((u32) (old >> 32) != (u32) _regs[VCPU_REGS_RDX])) { @@ -1389,7 +1394,7 @@ twobyte_special_insn: new = ((u64)_regs[VCPU_REGS_RCX] << 32) | (u32) _regs[VCPU_REGS_RBX]; if ((rc = ops->cmpxchg_emulated(cr2, &old, - &new, 8, ctxt)) != 0) + &new, 8, ctxt->vcpu)) != 0) goto done; _eflags |= EFLG_ZF; } diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h index 574cca70b22..92c73aa7f9a 100644 --- a/drivers/kvm/x86_emulate.h +++ b/drivers/kvm/x86_emulate.h @@ -60,7 +60,7 @@ struct x86_emulate_ops { * @bytes: [IN ] Number of bytes to read from memory. */ int (*read_std)(unsigned long addr, void *val, - unsigned int bytes, struct x86_emulate_ctxt * ctxt); + unsigned int bytes, struct kvm_vcpu *vcpu); /* * write_std: Write bytes of standard (non-emulated/special) memory. @@ -71,7 +71,7 @@ struct x86_emulate_ops { * @bytes: [IN ] Number of bytes to write to memory. */ int (*write_std)(unsigned long addr, const void *val, - unsigned int bytes, struct x86_emulate_ctxt * ctxt); + unsigned int bytes, struct kvm_vcpu *vcpu); /* * read_emulated: Read bytes from emulated/special memory area. @@ -82,7 +82,7 @@ struct x86_emulate_ops { int (*read_emulated) (unsigned long addr, void *val, unsigned int bytes, - struct x86_emulate_ctxt * ctxt); + struct kvm_vcpu *vcpu); /* * write_emulated: Read bytes from emulated/special memory area. @@ -94,7 +94,7 @@ struct x86_emulate_ops { int (*write_emulated) (unsigned long addr, const void *val, unsigned int bytes, - struct x86_emulate_ctxt * ctxt); + struct kvm_vcpu *vcpu); /* * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an @@ -108,7 +108,7 @@ struct x86_emulate_ops { const void *old, const void *new, unsigned int bytes, - struct x86_emulate_ctxt * ctxt); + struct kvm_vcpu *vcpu); }; -- cgit v1.2.3 From e7d5d76cae970117affe07f809faf0f18bbac675 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 30 Jul 2007 13:41:19 +0300 Subject: KVM: Remove kvm_{read,write}_guest() ... in favor of the more general emulator_{read,write}_*. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 17 ++++++------ drivers/kvm/kvm_main.c | 74 +++----------------------------------------------- drivers/kvm/svm.c | 3 +- drivers/kvm/vmx.c | 19 +++++++------ 4 files changed, 25 insertions(+), 88 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 1072c8322d4..030b93bcdf1 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -561,15 +561,14 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); -int kvm_read_guest(struct kvm_vcpu *vcpu, - gva_t addr, - unsigned long size, - void *dest); - -int kvm_write_guest(struct kvm_vcpu *vcpu, - gva_t addr, - unsigned long size, - void *data); +int emulator_read_std(unsigned long addr, + void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); +int emulator_write_emulated(unsigned long addr, + const void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu); unsigned long segment_base(u16 selector); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index a65a145f305..4bbd89e0332 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -146,74 +146,6 @@ static inline int valid_vcpu(int n) return likely(n >= 0 && n < KVM_MAX_VCPUS); } -int kvm_read_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, - void *dest) -{ - unsigned char *host_buf = dest; - unsigned long req_size = size; - - while (size) { - hpa_t paddr; - unsigned now; - unsigned offset; - hva_t guest_buf; - - paddr = gva_to_hpa(vcpu, addr); - - if (is_error_hpa(paddr)) - break; - - guest_buf = (hva_t)kmap_atomic( - pfn_to_page(paddr >> PAGE_SHIFT), - KM_USER0); - offset = addr & ~PAGE_MASK; - guest_buf |= offset; - now = min(size, PAGE_SIZE - offset); - memcpy(host_buf, (void*)guest_buf, now); - host_buf += now; - addr += now; - size -= now; - kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0); - } - return req_size - size; -} -EXPORT_SYMBOL_GPL(kvm_read_guest); - -int kvm_write_guest(struct kvm_vcpu *vcpu, gva_t addr, unsigned long size, - void *data) -{ - unsigned char *host_buf = data; - unsigned long req_size = size; - - while (size) { - hpa_t paddr; - unsigned now; - unsigned offset; - hva_t guest_buf; - gfn_t gfn; - - paddr = gva_to_hpa(vcpu, addr); - - if (is_error_hpa(paddr)) - break; - - gfn = vcpu->mmu.gva_to_gpa(vcpu, addr) >> PAGE_SHIFT; - mark_page_dirty(vcpu->kvm, gfn); - guest_buf = (hva_t)kmap_atomic( - pfn_to_page(paddr >> PAGE_SHIFT), KM_USER0); - offset = addr & ~PAGE_MASK; - guest_buf |= offset; - now = min(size, PAGE_SIZE - offset); - memcpy((void*)guest_buf, host_buf, now); - host_buf += now; - addr += now; - size -= now; - kunmap_atomic((void *)(guest_buf & PAGE_MASK), KM_USER0); - } - return req_size - size; -} -EXPORT_SYMBOL_GPL(kvm_write_guest); - void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { if (!vcpu->fpu_active || vcpu->guest_fpu_loaded) @@ -1017,7 +949,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) } } -static int emulator_read_std(unsigned long addr, +int emulator_read_std(unsigned long addr, void *val, unsigned int bytes, struct kvm_vcpu *vcpu) @@ -1051,6 +983,7 @@ static int emulator_read_std(unsigned long addr, return X86EMUL_CONTINUE; } +EXPORT_SYMBOL_GPL(emulator_read_std); static int emulator_write_std(unsigned long addr, const void *val, @@ -1169,7 +1102,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, return X86EMUL_CONTINUE; } -static int emulator_write_emulated(unsigned long addr, +int emulator_write_emulated(unsigned long addr, const void *val, unsigned int bytes, struct kvm_vcpu *vcpu) @@ -1188,6 +1121,7 @@ static int emulator_write_emulated(unsigned long addr, } return emulator_write_emulated_onepage(addr, val, bytes, vcpu); } +EXPORT_SYMBOL_GPL(emulator_write_emulated); static int emulator_cmpxchg_emulated(unsigned long addr, const void *old, diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index cd966739970..b25f4e117e7 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1019,7 +1019,8 @@ static int io_get_override(struct vcpu_svm *svm, svm->vmcb->control.exit_info_2, ins_length); - if (kvm_read_guest(&svm->vcpu, rip, ins_length, inst) != ins_length) + if (emulator_read_std(rip, inst, ins_length, &svm->vcpu) + != X86EMUL_CONTINUE) /* #PF */ return 0; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index cc7ee3d484f..f770f55d46c 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -16,6 +16,7 @@ */ #include "kvm.h" +#include "x86_emulate.h" #include "vmx.h" #include "segment_descriptor.h" @@ -1553,8 +1554,8 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) return; } - if (kvm_read_guest(vcpu, irq * sizeof(ent), sizeof(ent), &ent) != - sizeof(ent)) { + if (emulator_read_std(irq * sizeof(ent), &ent, sizeof(ent), vcpu) != + X86EMUL_CONTINUE) { vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__); return; } @@ -1564,9 +1565,9 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) ip = vmcs_readl(GUEST_RIP); - if (kvm_write_guest(vcpu, ss_base + sp - 2, 2, &flags) != 2 || - kvm_write_guest(vcpu, ss_base + sp - 4, 2, &cs) != 2 || - kvm_write_guest(vcpu, ss_base + sp - 6, 2, &ip) != 2) { + if (emulator_write_emulated(ss_base + sp - 2, &flags, 2, vcpu) != X86EMUL_CONTINUE || + emulator_write_emulated(ss_base + sp - 4, &cs, 2, vcpu) != X86EMUL_CONTINUE || + emulator_write_emulated(ss_base + sp - 6, &ip, 2, vcpu) != X86EMUL_CONTINUE) { vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__); return; } @@ -1767,7 +1768,7 @@ static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) u64 inst; gva_t rip; int countr_size; - int i, n; + int i; if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) { countr_size = 2; @@ -1782,9 +1783,11 @@ static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) if (countr_size != 8) rip += vmcs_readl(GUEST_CS_BASE); - n = kvm_read_guest(vcpu, rip, sizeof(inst), &inst); + if (emulator_read_std(rip, &inst, sizeof(inst), vcpu) != + X86EMUL_CONTINUE) + return 0; - for (i = 0; i < n; i++) { + for (i = 0; i < sizeof(inst); i++) { switch (((u8*)&inst)[i]) { case 0xf0: case 0xf2: -- cgit v1.2.3 From c16f862d0257349607b7a9be7b4a4b7ed419a3ab Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Jul 2007 21:12:19 +1000 Subject: KVM: Use kmem cache for allocating vcpus Avi wants the allocations of vcpus centralized again. The easiest way is to add a "size" arg to kvm_init_arch, and expose the thus-prepared cache to the modules. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 4 +++- drivers/kvm/kvm_main.c | 16 +++++++++++++++- drivers/kvm/svm.c | 5 +++-- drivers/kvm/vmx.c | 4 ++-- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 030b93bcdf1..b362e8f8f83 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -141,6 +141,7 @@ struct kvm_mmu_page { }; struct kvm_vcpu; +extern struct kmem_cache *kvm_vcpu_cache; /* * x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level @@ -483,7 +484,8 @@ extern struct kvm_arch_ops *kvm_arch_ops; int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); -int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); +int kvm_init_arch(struct kvm_arch_ops *ops, unsigned int vcpu_size, + struct module *module); void kvm_exit_arch(void); int kvm_mmu_module_init(void); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 4bbd89e0332..4166a08ce50 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -53,6 +53,8 @@ static LIST_HEAD(vm_list); static cpumask_t cpus_hardware_enabled; struct kvm_arch_ops *kvm_arch_ops; +struct kmem_cache *kvm_vcpu_cache; +EXPORT_SYMBOL_GPL(kvm_vcpu_cache); static __read_mostly struct preempt_ops kvm_preempt_ops; @@ -3104,7 +3106,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, kvm_arch_ops->vcpu_put(vcpu); } -int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) +int kvm_init_arch(struct kvm_arch_ops *ops, unsigned int vcpu_size, + struct module *module) { int r; @@ -3142,6 +3145,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) if (r) goto out_free_3; + /* A kmem cache lets us meet the alignment requirements of fx_save. */ + kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, + __alignof__(struct kvm_vcpu), 0, 0); + if (!kvm_vcpu_cache) { + r = -ENOMEM; + goto out_free_4; + } + kvm_chardev_ops.owner = module; r = misc_register(&kvm_dev); @@ -3156,6 +3167,8 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module) return r; out_free: + kmem_cache_destroy(kvm_vcpu_cache); +out_free_4: sysdev_unregister(&kvm_sysdev); out_free_3: sysdev_class_unregister(&kvm_sysdev_class); @@ -3173,6 +3186,7 @@ out: void kvm_exit_arch(void) { misc_deregister(&kvm_dev); + kmem_cache_destroy(kvm_vcpu_cache); sysdev_unregister(&kvm_sysdev); sysdev_class_unregister(&kvm_sysdev_class); unregister_reboot_notifier(&kvm_reboot_notifier); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index b25f4e117e7..8193651dd81 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -577,7 +577,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) struct page *page; int err; - svm = kzalloc(sizeof *svm, GFP_KERNEL); + svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!svm) { err = -ENOMEM; goto out; @@ -1849,7 +1849,8 @@ static struct kvm_arch_ops svm_arch_ops = { static int __init svm_init(void) { - return kvm_init_arch(&svm_arch_ops, THIS_MODULE); + return kvm_init_arch(&svm_arch_ops, sizeof(struct vcpu_svm), + THIS_MODULE); } static void __exit svm_exit(void) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index f770f55d46c..2b30274656f 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -2365,7 +2365,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; - struct vcpu_vmx *vmx = kzalloc(sizeof(*vmx), GFP_KERNEL); + struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); int cpu; if (!vmx) @@ -2490,7 +2490,7 @@ static int __init vmx_init(void) memset(iova, 0xff, PAGE_SIZE); kunmap(vmx_io_bitmap_b); - r = kvm_init_arch(&vmx_arch_ops, THIS_MODULE); + r = kvm_init_arch(&vmx_arch_ops, sizeof(struct vcpu_vmx), THIS_MODULE); if (r) goto out1; -- cgit v1.2.3 From b114b0804df7131cb6764b948c1c530c834fa3c0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Jul 2007 21:13:43 +1000 Subject: KVM: Use alignment properties of vcpu to simplify FPU ops Now we use a kmem cache for allocating vcpus, we can get the 16-byte alignment required by fxsave & fxrstor instructions, and avoid manually aligning the buffer. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 13 ++++--------- drivers/kvm/kvm_main.c | 45 +++++++++++++++++---------------------------- drivers/kvm/svm.c | 8 ++++---- 3 files changed, 25 insertions(+), 41 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index b362e8f8f83..7a34706f42b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -45,10 +45,6 @@ #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 40 -#define FX_IMAGE_SIZE 512 -#define FX_IMAGE_ALIGN 16 -#define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) - #define DE_VECTOR 0 #define NM_VECTOR 7 #define DF_VECTOR 8 @@ -342,9 +338,8 @@ struct kvm_vcpu { struct kvm_guest_debug guest_debug; - char fx_buf[FX_BUF_SIZE]; - char *host_fx_image; - char *guest_fx_image; + struct i387_fxsave_struct host_fx_image; + struct i387_fxsave_struct guest_fx_image; int fpu_active; int guest_fpu_loaded; @@ -704,12 +699,12 @@ static inline unsigned long read_msr(unsigned long msr) } #endif -static inline void fx_save(void *image) +static inline void fx_save(struct i387_fxsave_struct *image) { asm ("fxsave (%0)":: "r" (image)); } -static inline void fx_restore(void *image) +static inline void fx_restore(struct i387_fxsave_struct *image) { asm ("fxrstor (%0)":: "r" (image)); } diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 4166a08ce50..bfb1b6de058 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -154,8 +154,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 1; - fx_save(vcpu->host_fx_image); - fx_restore(vcpu->guest_fx_image); + fx_save(&vcpu->host_fx_image); + fx_restore(&vcpu->guest_fx_image); } EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); @@ -165,8 +165,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 0; - fx_save(vcpu->guest_fx_image); - fx_restore(vcpu->host_fx_image); + fx_save(&vcpu->guest_fx_image); + fx_restore(&vcpu->host_fx_image); } EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); @@ -262,10 +262,6 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) } vcpu->pio_data = page_address(page); - vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, - FX_IMAGE_ALIGN); - vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; - r = kvm_mmu_create(vcpu); if (r < 0) goto fail_free_pio_data; @@ -615,30 +611,20 @@ EXPORT_SYMBOL_GPL(set_cr8); void fx_init(struct kvm_vcpu *vcpu) { - struct __attribute__ ((__packed__)) fx_image_s { - u16 control; //fcw - u16 status; //fsw - u16 tag; // ftw - u16 opcode; //fop - u64 ip; // fpu ip - u64 operand;// fpu dp - u32 mxcsr; - u32 mxcsr_mask; - - } *fx_image; + unsigned after_mxcsr_mask; /* Initialize guest FPU by resetting ours and saving into guest's */ preempt_disable(); - fx_save(vcpu->host_fx_image); + fx_save(&vcpu->host_fx_image); fpu_init(); - fx_save(vcpu->guest_fx_image); - fx_restore(vcpu->host_fx_image); + fx_save(&vcpu->guest_fx_image); + fx_restore(&vcpu->host_fx_image); preempt_enable(); - fx_image = (struct fx_image_s *)vcpu->guest_fx_image; - fx_image->mxcsr = 0x1f80; - memset(vcpu->guest_fx_image + sizeof(struct fx_image_s), - 0, FX_IMAGE_SIZE - sizeof(struct fx_image_s)); + after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); + vcpu->guest_fx_image.mxcsr = 0x1f80; + memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask, + 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask); } EXPORT_SYMBOL_GPL(fx_init); @@ -2356,6 +2342,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); + /* We do fxsave: this must be aligned. */ + BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF); + vcpu_load(vcpu); r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); @@ -2468,7 +2457,7 @@ struct fxsave { static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; + struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; vcpu_load(vcpu); @@ -2488,7 +2477,7 @@ static int kvm_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct fxsave *fxsave = (struct fxsave *)vcpu->guest_fx_image; + struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image; vcpu_load(vcpu); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 8193651dd81..5277084f3a3 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1557,8 +1557,8 @@ again: } if (vcpu->fpu_active) { - fx_save(vcpu->host_fx_image); - fx_restore(vcpu->guest_fx_image); + fx_save(&vcpu->host_fx_image); + fx_restore(&vcpu->guest_fx_image); } asm volatile ( @@ -1670,8 +1670,8 @@ again: vcpu->guest_mode = 0; if (vcpu->fpu_active) { - fx_save(vcpu->guest_fx_image); - fx_restore(vcpu->host_fx_image); + fx_save(&vcpu->guest_fx_image); + fx_restore(&vcpu->host_fx_image); } if ((svm->vmcb->save.dr7 & 0xff)) -- cgit v1.2.3 From 39214915f50f6ac2350355f2db63910d968fa790 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 31 Jul 2007 19:57:47 +1000 Subject: KVM: kvm_vm_ioctl_get_dirty_log restore "nothing dirty" optimization kvm_vm_ioctl_get_dirty_log scans bitmap to see it it's all zero, but doesn't use that information. Avi says: Looks like it was used to guard kvm_mmu_slot_remove_write_access(); optimizing the case where the guest just leaves the screen alone (which it usually does, especially in benchmarks). I'd rather reinstate that optimization. See 90cb0529dd230548a7f0d6b315997be854caea1b where the damage was done. It's pretty simple: if the bitmap is all zero, we don't need to do anything to clean it. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index bfb1b6de058..5dee3024579 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -803,11 +803,14 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n)) goto out; - mutex_lock(&kvm->lock); - kvm_mmu_slot_remove_write_access(kvm, log->slot); - kvm_flush_remote_tlbs(kvm); - memset(memslot->dirty_bitmap, 0, n); - mutex_unlock(&kvm->lock); + /* If nothing is dirty, don't bother messing with page tables. */ + if (any) { + mutex_lock(&kvm->lock); + kvm_mmu_slot_remove_write_access(kvm, log->slot); + kvm_flush_remote_tlbs(kvm); + memset(memslot->dirty_bitmap, 0, n); + mutex_unlock(&kvm->lock); + } r = 0; -- cgit v1.2.3 From 002c7f7c32a6123f0894d7d579ffae8e98911830 Mon Sep 17 00:00:00 2001 From: "Yang, Sheng" Date: Tue, 31 Jul 2007 14:23:01 +0300 Subject: KVM: VMX: Add cpu consistency check All the physical CPUs on the board should support the same VMX feature set. Add check_processor_compatibility to kvm_arch_ops for the consistency check. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 10 ++++++++++ drivers/kvm/svm.c | 6 ++++++ drivers/kvm/vmx.c | 51 ++++++++++++++++++++++++++++++++------------------ 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 7a34706f42b..cfda3abff89 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -420,6 +420,7 @@ struct kvm_arch_ops { int (*disabled_by_bios)(void); /* __init */ void (*hardware_enable)(void *dummy); /* __init */ void (*hardware_disable)(void *dummy); + void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 5dee3024579..2be6b1ca1a0 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -3102,6 +3102,7 @@ int kvm_init_arch(struct kvm_arch_ops *ops, unsigned int vcpu_size, struct module *module) { int r; + int cpu; if (kvm_arch_ops) { printk(KERN_ERR "kvm: already loaded the other module\n"); @@ -3123,6 +3124,14 @@ int kvm_init_arch(struct kvm_arch_ops *ops, unsigned int vcpu_size, if (r < 0) goto out; + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, + kvm_arch_ops->check_processor_compatibility, + &r, 0, 1); + if (r < 0) + goto out_free_0; + } + on_each_cpu(hardware_enable, NULL, 0, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) @@ -3169,6 +3178,7 @@ out_free_2: unregister_cpu_notifier(&kvm_cpu_notifier); out_free_1: on_each_cpu(hardware_disable, NULL, 0, 1); +out_free_0: kvm_arch_ops->hardware_unsetup(); out: kvm_arch_ops = NULL; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 5277084f3a3..827bc2774e7 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1798,11 +1798,17 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[3] = 0xc3; } +static void svm_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + static struct kvm_arch_ops svm_arch_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, .hardware_setup = svm_hardware_setup, .hardware_unsetup = svm_hardware_unsetup, + .check_processor_compatibility = svm_check_processor_compat, .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 2b30274656f..c4b8bfea441 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -840,13 +840,13 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, /* Ensure minimum (required) set of control bits are supported. */ if (ctl_min & ~ctl) - return -1; + return -EIO; *result = ctl; return 0; } -static __init int setup_vmcs_config(void) +static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) { u32 vmx_msr_low, vmx_msr_high; u32 min, opt; @@ -859,7 +859,7 @@ static __init int setup_vmcs_config(void) opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) - return -1; + return -EIO; min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 @@ -872,7 +872,7 @@ static __init int setup_vmcs_config(void) opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, &_cpu_based_exec_control) < 0) - return -1; + return -EIO; min = 0; #ifdef CONFIG_X86_64 @@ -881,37 +881,37 @@ static __init int setup_vmcs_config(void) opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) - return -1; + return -EIO; min = opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, &_vmentry_control) < 0) - return -1; + return -EIO; rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) - return -1; + return -EIO; #ifdef CONFIG_X86_64 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ if (vmx_msr_high & (1u<<16)) - return -1; + return -EIO; #endif /* Require Write-Back (WB) memory type for VMCS accesses. */ if (((vmx_msr_high >> 18) & 15) != 6) - return -1; + return -EIO; - vmcs_config.size = vmx_msr_high & 0x1fff; - vmcs_config.order = get_order(vmcs_config.size); - vmcs_config.revision_id = vmx_msr_low; + vmcs_conf->size = vmx_msr_high & 0x1fff; + vmcs_conf->order = get_order(vmcs_config.size); + vmcs_conf->revision_id = vmx_msr_low; - vmcs_config.pin_based_exec_ctrl = _pin_based_exec_control; - vmcs_config.cpu_based_exec_ctrl = _cpu_based_exec_control; - vmcs_config.vmexit_ctrl = _vmexit_control; - vmcs_config.vmentry_ctrl = _vmentry_control; + vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; + vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; + vmcs_conf->vmexit_ctrl = _vmexit_control; + vmcs_conf->vmentry_ctrl = _vmentry_control; return 0; } @@ -971,8 +971,8 @@ static __init int alloc_kvm_area(void) static __init int hardware_setup(void) { - if (setup_vmcs_config() < 0) - return -1; + if (setup_vmcs_config(&vmcs_config) < 0) + return -EIO; return alloc_kvm_area(); } @@ -2414,11 +2414,26 @@ free_vcpu: return ERR_PTR(err); } +static void __init vmx_check_processor_compat(void *rtn) +{ + struct vmcs_config vmcs_conf; + + *(int *)rtn = 0; + if (setup_vmcs_config(&vmcs_conf) < 0) + *(int *)rtn = -EIO; + if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { + printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", + smp_processor_id()); + *(int *)rtn = -EIO; + } +} + static struct kvm_arch_ops vmx_arch_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, .hardware_setup = hardware_setup, .hardware_unsetup = hardware_unsetup, + .check_processor_compatibility = vmx_check_processor_compat, .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, -- cgit v1.2.3 From fb764416492a1204c20fdf4507019012d27bd1a2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 31 Jul 2007 20:45:03 +1000 Subject: KVM: Don't assign vcpu->cr3 if it's invalid: check first, set last sSigned-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 2be6b1ca1a0..5dc60170278 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -579,7 +579,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) } } - vcpu->cr3 = cr3; mutex_lock(&vcpu->kvm->lock); /* * Does the new cr3 value map to physical memory? (Note, we @@ -592,8 +591,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) */ if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT))) inject_gp(vcpu); - else + else { + vcpu->cr3 = cr3; vcpu->mmu.new_cr3(vcpu); + } mutex_unlock(&vcpu->kvm->lock); } EXPORT_SYMBOL_GPL(set_cr3); -- cgit v1.2.3 From 7e9d619d2aa28b71d547edfa15c66d6ab9a3a39c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 31 Jul 2007 20:41:14 +1000 Subject: KVM: Cleanup mark_page_dirty For some reason, mark_page_dirty open-codes __gfn_to_memslot(). Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 5dc60170278..55639aceca6 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -916,28 +916,18 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_page); +/* WARNING: Does not work on aliased pages. */ void mark_page_dirty(struct kvm *kvm, gfn_t gfn) { - int i; struct kvm_memory_slot *memslot; - unsigned long rel_gfn; - for (i = 0; i < kvm->nmemslots; ++i) { - memslot = &kvm->memslots[i]; + memslot = __gfn_to_memslot(kvm, gfn); + if (memslot && memslot->dirty_bitmap) { + unsigned long rel_gfn = gfn - memslot->base_gfn; - if (gfn >= memslot->base_gfn - && gfn < memslot->base_gfn + memslot->npages) { - - if (!memslot->dirty_bitmap) - return; - - rel_gfn = gfn - memslot->base_gfn; - - /* avoid RMW */ - if (!test_bit(rel_gfn, memslot->dirty_bitmap)) - set_bit(rel_gfn, memslot->dirty_bitmap); - return; - } + /* avoid RMW */ + if (!test_bit(rel_gfn, memslot->dirty_bitmap)) + set_bit(rel_gfn, memslot->dirty_bitmap); } } -- cgit v1.2.3 From bfc733a7a32612fe213a7492c385f2b03f592d7f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 31 Jul 2007 20:42:42 +1000 Subject: KVM: SVM: Make set_msr_interception more reliable set_msr_interception() is used by svm to set up which MSRs should be intercepted. It can only fail if someone has changed the code to try to intercept an MSR without updating the array of ranges. The return value is ignored anyway: it should just BUG() if it doesn't work. (A build-time failure would be better, but that's tricky). Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 827bc2774e7..7beaff1789b 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -359,8 +359,8 @@ err_1: } -static int set_msr_interception(u32 *msrpm, unsigned msr, - int read, int write) +static void set_msr_interception(u32 *msrpm, unsigned msr, + int read, int write) { int i; @@ -375,11 +375,10 @@ static int set_msr_interception(u32 *msrpm, unsigned msr, u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); *base = (*base & ~(0x3 << msr_shift)) | (mask << msr_shift); - return 1; + return; } } - printk(KERN_DEBUG "%s: not found 0x%x\n", __FUNCTION__, msr); - return 0; + BUG(); } static __init int svm_hardware_setup(void) -- cgit v1.2.3 From 37c00051b53861929a910309c1823b415d55cfdf Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 31 Jul 2007 20:46:12 +1000 Subject: KVM: Remove redundant alloc_vmcs_cpu declaration alloc_vmcs_cpu is already declared (static) above, no need to redeclare. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c4b8bfea441..a94eb205cec 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -949,8 +949,6 @@ static void free_kvm_area(void) free_vmcs(per_cpu(vmxarea, cpu)); } -extern struct vmcs *alloc_vmcs_cpu(int cpu); - static __init int alloc_kvm_area(void) { int cpu; -- cgit v1.2.3 From 54e11fa1f87771df0fc1bb42a75be0740c3babb9 Mon Sep 17 00:00:00 2001 From: Gabriel C Date: Wed, 1 Aug 2007 16:23:10 +0200 Subject: KVM: Fix defined but not used warning in drivers/kvm/vmx.c move_msr_up() is used only on X86_64 and generates a warning on !X86_64 Signed-off-by: Gabriel Craciunescu Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a94eb205cec..3bf36508c84 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -544,6 +544,7 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) /* * Swap MSR entry in host/guest MSR entry array. */ +#ifdef CONFIG_X86_64 static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) { struct kvm_msr_entry tmp; @@ -555,6 +556,7 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->host_msrs[to] = vmx->host_msrs[from]; vmx->host_msrs[from] = tmp; } +#endif /* * Set up the vmcs to automatically save and restore system -- cgit v1.2.3 From 3dea7ca7160f80dd6d31c0bbeb2d871e51b567b6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 1 Aug 2007 10:12:22 +1000 Subject: KVM: Remove stat_set from debugfs We shouldn't define stat_set on the debug attributes, since that will cause silent failure on writing: without a set argument, userspace will get -EACCESS. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 55639aceca6..25d76a5a999 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -3017,11 +3017,7 @@ static u64 stat_get(void *_offset) return total; } -static void stat_set(void *offset, u64 val) -{ -} - -DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, NULL, "%llu\n"); static __init void kvm_init_debug(void) { -- cgit v1.2.3 From 33830b4f5bdf2bc21d0c10d38beffdce8edcded7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 1 Aug 2007 10:17:06 +1000 Subject: KVM: Remove unneeded kvm_dev_open and kvm_dev_release functions. Devices don't need open or release functions. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 25d76a5a999..9de3b1aba20 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -302,11 +302,6 @@ static struct kvm *kvm_create_vm(void) return kvm; } -static int kvm_dev_open(struct inode *inode, struct file *filp) -{ - return 0; -} - /* * Free any memory in @free but not in @dont. */ @@ -376,11 +371,6 @@ static void kvm_free_vcpus(struct kvm *kvm) } -static int kvm_dev_release(struct inode *inode, struct file *filp) -{ - return 0; -} - static void kvm_destroy_vm(struct kvm *kvm) { spin_lock(&kvm_lock); @@ -2841,8 +2831,6 @@ out: } static struct file_operations kvm_chardev_ops = { - .open = kvm_dev_open, - .release = kvm_dev_release, .unlocked_ioctl = kvm_dev_ioctl, .compat_ioctl = kvm_dev_ioctl, }; -- cgit v1.2.3 From f02424785ab83bab8283ad33044284f749c08db8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 1 Aug 2007 10:48:02 +1000 Subject: KVM: Add and use pr_unimpl for standard formatting of unimplemented features All guest-invokable printks should be ratelimited to prevent malicious guests from flooding logs. This is a start. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 8 ++++++++ drivers/kvm/kvm_main.c | 18 ++++++++---------- drivers/kvm/svm.c | 4 ++-- drivers/kvm/vmx.c | 2 +- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index cfda3abff89..6d258261891 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -474,6 +474,14 @@ struct kvm_arch_ops { extern struct kvm_arch_ops *kvm_arch_ops; +/* The guest did something we don't support. */ +#define pr_unimpl(vcpu, fmt, ...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_ERR "kvm: %i: cpu%i " fmt, \ + current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \ + } while(0) + #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 9de3b1aba20..1b86ab0d42f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -962,8 +962,7 @@ static int emulator_write_std(unsigned long addr, unsigned int bytes, struct kvm_vcpu *vcpu) { - printk(KERN_ERR "emulator_write_std: addr %lx n %d\n", - addr, bytes); + pr_unimpl(vcpu, "emulator_write_std: addr %lx n %d\n", addr, bytes); return X86EMUL_UNHANDLEABLE; } @@ -1138,8 +1137,7 @@ int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest) *dest = kvm_arch_ops->get_dr(vcpu, dr); return X86EMUL_CONTINUE; default: - printk(KERN_DEBUG "%s: unexpected dr %u\n", - __FUNCTION__, dr); + pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); return X86EMUL_UNHANDLEABLE; } } @@ -1488,7 +1486,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) break; #endif default: - printk(KERN_ERR "kvm: unhandled rdmsr: 0x%x\n", msr); + pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); return 1; } *pdata = data; @@ -1543,11 +1541,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; #endif case MSR_IA32_MC0_STATUS: - printk(KERN_WARNING "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", + pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n", __FUNCTION__, data); break; case MSR_IA32_MCG_STATUS: - printk(KERN_WARNING "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", + pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n", __FUNCTION__, data); break; case MSR_IA32_UCODE_REV: @@ -1567,7 +1565,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) return vcpu_register_para(vcpu, data); default: - printk(KERN_ERR "kvm: unhandled wrmsr: 0x%x\n", msr); + pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr); return 1; } return 0; @@ -1798,7 +1796,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, /* * String I/O in reverse. Yuck. Kill the guest, fix later. */ - printk(KERN_ERR "kvm: guest string pio down\n"); + pr_unimpl(vcpu, "guest string pio down\n"); inject_gp(vcpu); return 1; } @@ -1829,7 +1827,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, ret = 1; } } else if (pio_dev) - printk(KERN_ERR "no string pio read support yet, " + pr_unimpl(vcpu, "no string pio read support yet, " "port %x size %d count %ld\n", port, size, count); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 7beaff1789b..3ec30d7c9b9 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1167,7 +1167,7 @@ static int invalid_op_interception(struct vcpu_svm *svm, static int task_switch_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - printk(KERN_DEBUG "%s: task swiche is unsupported\n", __FUNCTION__); + pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__); kvm_run->exit_reason = KVM_EXIT_UNKNOWN; return 0; } @@ -1183,7 +1183,7 @@ static int emulate_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { if (emulate_instruction(&svm->vcpu, NULL, 0, 0) != EMULATE_DONE) - printk(KERN_ERR "%s: failed\n", __FUNCTION__); + pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__); return 1; } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 3bf36508c84..ae54d9a24ce 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1920,7 +1920,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) break; } kvm_run->exit_reason = 0; - printk(KERN_ERR "kvm: unhandled control register: op %d cr %d\n", + pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n", (int)(exit_qualification >> 4) & 3, cr); return 0; } -- cgit v1.2.3 From a477034750ce59df7fc17823f085df5a30e316f0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 1 Aug 2007 14:46:11 +1000 Subject: KVM: Use kmem_cache_free for kmem_cache_zalloc'ed objects We use kfree in svm.c and vmx.c, and this works, but it could break at any time. kfree() is supposed to match up with kmalloc(). Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 4 ++-- drivers/kvm/vmx.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 3ec30d7c9b9..436bdff9b0b 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -610,7 +610,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: - kfree(svm); + kmem_cache_free(kvm_vcpu_cache, svm); out: return ERR_PTR(err); } @@ -621,7 +621,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); kvm_vcpu_uninit(vcpu); - kfree(svm); + kmem_cache_free(kvm_vcpu_cache, svm); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index ae54d9a24ce..708055a5052 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -2359,7 +2359,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) kfree(vmx->host_msrs); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); - kfree(vmx); + kmem_cache_free(kvm_vcpu_cache, vmx); } static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) @@ -2410,7 +2410,7 @@ free_guest_msrs: uninit_vcpu: kvm_vcpu_uninit(&vmx->vcpu); free_vcpu: - kfree(vmx); + kmem_cache_free(kvm_vcpu_cache, vmx); return ERR_PTR(err); } -- cgit v1.2.3 From 1e4e6e00136b82a5595de903c28912afee1178cb Mon Sep 17 00:00:00 2001 From: "Li, Xin B" Date: Wed, 1 Aug 2007 21:49:10 +0300 Subject: KVM: VMX: Remove a duplicated ia32e mode vm entry control Remove a duplicated ia32e mode VM Entry control definition and use the proper one. Signed-off-by: Xin Li Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 8 ++++---- drivers/kvm/vmx.h | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 708055a5052..30c627d3b21 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1111,7 +1111,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) find_msr_entry(to_vmx(vcpu), MSR_EFER)->data |= EFER_LMA | EFER_LME; vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) - | VM_ENTRY_CONTROLS_IA32E_MASK); + | VM_ENTRY_IA32E_MODE); } static void exit_lmode(struct kvm_vcpu *vcpu) @@ -1120,7 +1120,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) - & ~VM_ENTRY_CONTROLS_IA32E_MASK); + & ~VM_ENTRY_IA32E_MODE); } #endif @@ -1185,13 +1185,13 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) if (efer & EFER_LMA) { vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) | - VM_ENTRY_CONTROLS_IA32E_MASK); + VM_ENTRY_IA32E_MODE); msr->data = efer; } else { vmcs_write32(VM_ENTRY_CONTROLS, vmcs_read32(VM_ENTRY_CONTROLS) & - ~VM_ENTRY_CONTROLS_IA32E_MASK); + ~VM_ENTRY_IA32E_MODE); msr->data = efer & ~EFER_LME; } diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h index 7e4dc1208dd..35d0b58c0a0 100644 --- a/drivers/kvm/vmx.h +++ b/drivers/kvm/vmx.h @@ -268,9 +268,6 @@ enum vmcs_field { /* segment AR */ #define SEGMENT_AR_L_MASK (1 << 13) -/* entry controls */ -#define VM_ENTRY_CONTROLS_IA32E_MASK (1 << 9) - #define AR_TYPE_ACCESSES_MASK 1 #define AR_TYPE_READABLE_MASK (1 << 1) #define AR_TYPE_WRITEABLE_MASK (1 << 2) -- cgit v1.2.3 From 9fdaaac38e8c8a63c6383b807b91fea2d51da95d Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 1 Aug 2007 21:51:09 +0300 Subject: KVM: Remove useless assignment Line 1809 of kvm_main.c is useless, value is overwritten in line 1815: 1809 now = min(count, PAGE_SIZE / size); 1810 1811 if (!down) 1812 in_page = PAGE_SIZE - offset_in_page(address); 1813 else 1814 in_page = offset_in_page(address) + size; 1815 now = min(count, (unsigned long)in_page / size); 1816 if (!now) { Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 1b86ab0d42f..62adaeedfdb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1776,8 +1776,6 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, return 1; } - now = min(count, PAGE_SIZE / size); - if (!down) in_page = PAGE_SIZE - offset_in_page(address); else -- cgit v1.2.3 From e70669abd4e60dfea3ac1639848e20e2b8dd1255 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sun, 5 Aug 2007 10:36:40 +0300 Subject: KVM: Cleanup string I/O instruction emulation Both vmx and svm decode the I/O instructions, and both botch the job, requiring the instruction prefixes to be fetched in order to completely decode the instruction. So, if we see a string I/O instruction, use the x86 emulator to decode it, as it already has all the prefix decoding machinery. This patch defines ins/outs opcodes in x86_emulate.c and calls emulate_instruction() from io_interception() (svm.c) and from handle_io() (vmx.c). It removes all vmx/svm prefix instruction decoders (get_addr_size(), io_get_override(), io_address(), get_io_count()) Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 3 + drivers/kvm/svm.c | 149 ++++------------------------------------------ drivers/kvm/vmx.c | 76 ++++------------------- drivers/kvm/x86_emulate.c | 49 +++++++++++++-- 4 files changed, 69 insertions(+), 208 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 62adaeedfdb..661d065fd86 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1221,7 +1221,10 @@ int emulate_instruction(struct kvm_vcpu *vcpu, emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS); vcpu->mmio_is_write = 0; + vcpu->pio.string = 0; r = x86_emulate_memop(&emulate_ctxt, &emulate_ops); + if (vcpu->pio.string) + return EMULATE_DO_MMIO; if ((r || vcpu->mmio_is_write) && run) { run->exit_reason = KVM_EXIT_MMIO; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 436bdff9b0b..a83ff01bb01 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -98,20 +98,6 @@ static inline u32 svm_has(u32 feat) return svm_features & feat; } -static unsigned get_addr_size(struct vcpu_svm *svm) -{ - struct vmcb_save_area *sa = &svm->vmcb->save; - u16 cs_attrib; - - if (!(sa->cr0 & X86_CR0_PE) || (sa->rflags & X86_EFLAGS_VM)) - return 2; - - cs_attrib = sa->cs.attrib; - - return (cs_attrib & SVM_SELECTOR_L_MASK) ? 8 : - (cs_attrib & SVM_SELECTOR_DB_MASK) ? 4 : 2; -} - static inline u8 pop_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->irq_summary); @@ -995,147 +981,32 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 0; } -static int io_get_override(struct vcpu_svm *svm, - struct vmcb_seg **seg, - int *addr_override) -{ - u8 inst[MAX_INST_SIZE]; - unsigned ins_length; - gva_t rip; - int i; - - rip = svm->vmcb->save.rip; - ins_length = svm->next_rip - rip; - rip += svm->vmcb->save.cs.base; - - if (ins_length > MAX_INST_SIZE) - printk(KERN_DEBUG - "%s: inst length err, cs base 0x%llx rip 0x%llx " - "next rip 0x%llx ins_length %u\n", - __FUNCTION__, - svm->vmcb->save.cs.base, - svm->vmcb->save.rip, - svm->vmcb->control.exit_info_2, - ins_length); - - if (emulator_read_std(rip, inst, ins_length, &svm->vcpu) - != X86EMUL_CONTINUE) - /* #PF */ - return 0; - - *addr_override = 0; - *seg = NULL; - for (i = 0; i < ins_length; i++) - switch (inst[i]) { - case 0xf0: - case 0xf2: - case 0xf3: - case 0x66: - continue; - case 0x67: - *addr_override = 1; - continue; - case 0x2e: - *seg = &svm->vmcb->save.cs; - continue; - case 0x36: - *seg = &svm->vmcb->save.ss; - continue; - case 0x3e: - *seg = &svm->vmcb->save.ds; - continue; - case 0x26: - *seg = &svm->vmcb->save.es; - continue; - case 0x64: - *seg = &svm->vmcb->save.fs; - continue; - case 0x65: - *seg = &svm->vmcb->save.gs; - continue; - default: - return 1; - } - printk(KERN_DEBUG "%s: unexpected\n", __FUNCTION__); - return 0; -} - -static unsigned long io_address(struct vcpu_svm *svm, int ins, gva_t *address) -{ - unsigned long addr_mask; - unsigned long *reg; - struct vmcb_seg *seg; - int addr_override; - struct vmcb_save_area *save_area = &svm->vmcb->save; - u16 cs_attrib = save_area->cs.attrib; - unsigned addr_size = get_addr_size(svm); - - if (!io_get_override(svm, &seg, &addr_override)) - return 0; - - if (addr_override) - addr_size = (addr_size == 2) ? 4: (addr_size >> 1); - - if (ins) { - reg = &svm->vcpu.regs[VCPU_REGS_RDI]; - seg = &svm->vmcb->save.es; - } else { - reg = &svm->vcpu.regs[VCPU_REGS_RSI]; - seg = (seg) ? seg : &svm->vmcb->save.ds; - } - - addr_mask = ~0ULL >> (64 - (addr_size * 8)); - - if ((cs_attrib & SVM_SELECTOR_L_MASK) && - !(svm->vmcb->save.rflags & X86_EFLAGS_VM)) { - *address = (*reg & addr_mask); - return addr_mask; - } - - if (!(seg->attrib & SVM_SELECTOR_P_SHIFT)) { - svm_inject_gp(&svm->vcpu, 0); - return 0; - } - - *address = (*reg & addr_mask) + seg->base; - return addr_mask; -} - static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { u32 io_info = svm->vmcb->control.exit_info_1; //address size bug? int size, down, in, string, rep; unsigned port; - unsigned long count; - gva_t address = 0; ++svm->vcpu.stat.io_exits; svm->next_rip = svm->vmcb->control.exit_info_2; + string = (io_info & SVM_IOIO_STR_MASK) != 0; + + if (string) { + if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO) + return 0; + return 1; + } + in = (io_info & SVM_IOIO_TYPE_MASK) != 0; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; - string = (io_info & SVM_IOIO_STR_MASK) != 0; rep = (io_info & SVM_IOIO_REP_MASK) != 0; - count = 1; down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; - if (string) { - unsigned addr_mask; - - addr_mask = io_address(svm, in, &address); - if (!addr_mask) { - printk(KERN_DEBUG "%s: get io address failed\n", - __FUNCTION__); - return 1; - } - - if (rep) - count = svm->vcpu.regs[VCPU_REGS_RCX] & addr_mask; - } - return kvm_setup_pio(&svm->vcpu, kvm_run, in, size, count, string, - down, address, rep, port); + return kvm_setup_pio(&svm->vcpu, kvm_run, in, size, 1, 0, + down, 0, rep, port); } static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 30c627d3b21..044722bc1a7 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1763,82 +1763,30 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) -{ - u64 inst; - gva_t rip; - int countr_size; - int i; - - if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_VM)) { - countr_size = 2; - } else { - u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES); - - countr_size = (cs_ar & AR_L_MASK) ? 8: - (cs_ar & AR_DB_MASK) ? 4: 2; - } - - rip = vmcs_readl(GUEST_RIP); - if (countr_size != 8) - rip += vmcs_readl(GUEST_CS_BASE); - - if (emulator_read_std(rip, &inst, sizeof(inst), vcpu) != - X86EMUL_CONTINUE) - return 0; - - for (i = 0; i < sizeof(inst); i++) { - switch (((u8*)&inst)[i]) { - case 0xf0: - case 0xf2: - case 0xf3: - case 0x2e: - case 0x36: - case 0x3e: - case 0x26: - case 0x64: - case 0x65: - case 0x66: - break; - case 0x67: - countr_size = (countr_size == 2) ? 4: (countr_size >> 1); - default: - goto done; - } - } - return 0; -done: - countr_size *= 8; - *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); - //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); - return 1; -} - static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; int size, down, in, string, rep; unsigned port; - unsigned long count; - gva_t address; ++vcpu->stat.io_exits; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); - in = (exit_qualification & 8) != 0; - size = (exit_qualification & 7) + 1; string = (exit_qualification & 16) != 0; + + if (string) { + if (emulate_instruction(vcpu, kvm_run, 0, 0) == EMULATE_DO_MMIO) + return 0; + return 1; + } + + size = (exit_qualification & 7) + 1; + in = (exit_qualification & 8) != 0; down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - count = 1; rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; - address = 0; - if (string) { - if (rep && !get_io_count(vcpu, &count)) - return 1; - address = vmcs_readl(GUEST_LINEAR_ADDRESS); - } - return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, - address, rep, port); + + return kvm_setup_pio(vcpu, kvm_run, in, size, 1, 0, down, + 0, rep, port); } static void diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 44eb28d3149..d553719fc4c 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -103,9 +103,12 @@ static u8 opcode_table[256] = { /* 0x58 - 0x5F */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x60 - 0x6F */ + /* 0x60 - 0x6B */ 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x6C - 0x6F */ + SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ + SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x87 */ @@ -428,10 +431,11 @@ struct operand { }) /* Access/update address held in a register, based on addressing mode. */ +#define address_mask(reg) \ + ((ad_bytes == sizeof(unsigned long)) ? \ + (reg) : ((reg) & ((1UL << (ad_bytes << 3)) - 1))) #define register_address(base, reg) \ - ((base) + ((ad_bytes == sizeof(unsigned long)) ? (reg) : \ - ((reg) & ((1UL << (ad_bytes << 3)) - 1)))) - + ((base) + address_mask(reg)) #define register_address_increment(reg, inc) \ do { \ /* signed type ensures sign extension to long */ \ @@ -1116,6 +1120,41 @@ done: special_insn: if (twobyte) goto twobyte_special_insn; + switch(b) { + case 0x6c: /* insb */ + case 0x6d: /* insw/insd */ + if (kvm_setup_pio(ctxt->vcpu, NULL, + 1, /* in */ + (d & ByteOp) ? 1 : op_bytes, /* size */ + rep_prefix ? + address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */ + 1, /* strings */ + (_eflags & EFLG_DF), /* down */ + register_address(ctxt->es_base, + _regs[VCPU_REGS_RDI]), /* address */ + rep_prefix, + _regs[VCPU_REGS_RDX] /* port */ + ) == 0) + return -1; + return 0; + case 0x6e: /* outsb */ + case 0x6f: /* outsw/outsd */ + if (kvm_setup_pio(ctxt->vcpu, NULL, + 0, /* in */ + (d & ByteOp) ? 1 : op_bytes, /* size */ + rep_prefix ? + address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */ + 1, /* strings */ + (_eflags & EFLG_DF), /* down */ + register_address(override_base ? + *override_base : ctxt->ds_base, + _regs[VCPU_REGS_RSI]), /* address */ + rep_prefix, + _regs[VCPU_REGS_RDX] /* port */ + ) == 0) + return -1; + return 0; + } if (rep_prefix) { if (_regs[VCPU_REGS_RCX] == 0) { ctxt->vcpu->rip = _eip; -- cgit v1.2.3 From 3090dd7377c7eb5cbe229e2a538f9dc7e5b06814 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sun, 5 Aug 2007 10:43:32 +0300 Subject: KVM: Clean up kvm_setup_pio() Split kvm_setup_pio() into two functions, one to setup in/out pio (kvm_emulate_pio()) and one to setup ins/outs pio (kvm_emulate_pio_string()). Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 8 +++--- drivers/kvm/kvm_main.c | 63 +++++++++++++++++++++++++++++------------------ drivers/kvm/svm.c | 3 +-- drivers/kvm/vmx.c | 3 +-- drivers/kvm/x86_emulate.c | 6 ++--- 5 files changed, 48 insertions(+), 35 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 6d258261891..2245baeeb02 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -539,9 +539,11 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); struct x86_emulate_ctxt; -int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, - int size, unsigned long count, int string, int down, - gva_t address, int rep, unsigned port); +int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned port); +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int down, + gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 661d065fd86..d154487b772 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1735,8 +1735,39 @@ static void pio_string_write(struct kvm_io_device *pio_dev, } } -int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, - int size, unsigned long count, int string, int down, +int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned port) +{ + struct kvm_io_device *pio_dev; + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; + vcpu->run->io.size = vcpu->pio.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = 1; + vcpu->run->io.port = vcpu->pio.port = port; + vcpu->pio.in = in; + vcpu->pio.string = 0; + vcpu->pio.down = 0; + vcpu->pio.guest_page_offset = 0; + vcpu->pio.rep = 0; + + kvm_arch_ops->cache_regs(vcpu); + memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); + kvm_arch_ops->decache_regs(vcpu); + + pio_dev = vcpu_find_pio_dev(vcpu, port); + if (pio_dev) { + kernel_pio(pio_dev, vcpu, vcpu->pio_data); + complete_pio(vcpu); + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(kvm_emulate_pio); + +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int down, gva_t address, int rep, unsigned port) { unsigned now, in_page; @@ -1747,33 +1778,16 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->run->exit_reason = KVM_EXIT_IO; vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; - vcpu->run->io.size = size; + vcpu->run->io.size = vcpu->pio.size = size; vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; - vcpu->run->io.count = count; - vcpu->run->io.port = port; - vcpu->pio.count = count; - vcpu->pio.cur_count = count; - vcpu->pio.size = size; + vcpu->run->io.count = vcpu->pio.count = vcpu->pio.cur_count = count; + vcpu->run->io.port = vcpu->pio.port = port; vcpu->pio.in = in; - vcpu->pio.port = port; - vcpu->pio.string = string; + vcpu->pio.string = 1; vcpu->pio.down = down; vcpu->pio.guest_page_offset = offset_in_page(address); vcpu->pio.rep = rep; - pio_dev = vcpu_find_pio_dev(vcpu, port); - if (!string) { - kvm_arch_ops->cache_regs(vcpu); - memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); - kvm_arch_ops->decache_regs(vcpu); - if (pio_dev) { - kernel_pio(pio_dev, vcpu, vcpu->pio_data); - complete_pio(vcpu); - return 1; - } - return 0; - } - if (!count) { kvm_arch_ops->skip_emulated_instruction(vcpu); return 1; @@ -1818,6 +1832,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } } + pio_dev = vcpu_find_pio_dev(vcpu, port); if (!vcpu->pio.in) { /* string PIO write */ ret = pio_copy_data(vcpu); @@ -1834,7 +1849,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, return ret; } -EXPORT_SYMBOL_GPL(kvm_setup_pio); +EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index a83ff01bb01..e3c6d891326 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1005,8 +1005,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) rep = (io_info & SVM_IOIO_REP_MASK) != 0; down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; - return kvm_setup_pio(&svm->vcpu, kvm_run, in, size, 1, 0, - down, 0, rep, port); + return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); } static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 044722bc1a7..906d4fa13d1 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1785,8 +1785,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; - return kvm_setup_pio(vcpu, kvm_run, in, size, 1, 0, down, - 0, rep, port); + return kvm_emulate_pio(vcpu, kvm_run, in, size, port); } static void diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index d553719fc4c..b4f439cfc66 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -1123,12 +1123,11 @@ special_insn: switch(b) { case 0x6c: /* insb */ case 0x6d: /* insw/insd */ - if (kvm_setup_pio(ctxt->vcpu, NULL, + if (kvm_emulate_pio_string(ctxt->vcpu, NULL, 1, /* in */ (d & ByteOp) ? 1 : op_bytes, /* size */ rep_prefix ? address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */ - 1, /* strings */ (_eflags & EFLG_DF), /* down */ register_address(ctxt->es_base, _regs[VCPU_REGS_RDI]), /* address */ @@ -1139,12 +1138,11 @@ special_insn: return 0; case 0x6e: /* outsb */ case 0x6f: /* outsw/outsd */ - if (kvm_setup_pio(ctxt->vcpu, NULL, + if (kvm_emulate_pio_string(ctxt->vcpu, NULL, 0, /* in */ (d & ByteOp) ? 1 : op_bytes, /* size */ rep_prefix ? address_mask(_regs[VCPU_REGS_RCX]) : 1, /* count */ - 1, /* strings */ (_eflags & EFLG_DF), /* down */ register_address(override_base ? *override_base : ctxt->ds_base, -- cgit v1.2.3 From 7e66f350cfc853043bfa71b281581dd6f92fa347 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 15 Aug 2007 15:23:34 +0300 Subject: KVM: Close minor race in signal handling We need to check for signals inside the critical section, otherwise a signal can be sent which we will not notice. Also move the check before entry, so that if the signal happens before the first entry, we exit immediately instead of waiting for something to happen to the guest. Signed-off-by: Avi Kivity --- drivers/kvm/svm.c | 19 ++++++++++--------- drivers/kvm/vmx.c | 23 ++++++++++++----------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index e3c6d891326..cc674bfd31d 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1398,11 +1398,19 @@ again: if (unlikely(r)) return r; + clgi(); + + if (signal_pending(current)) { + stgi(); + ++vcpu->stat.signal_exits; + post_kvm_run_save(svm, kvm_run); + kvm_run->exit_reason = KVM_EXIT_INTR; + return -EINTR; + } + if (!vcpu->mmio_read_completed) do_interrupt_requests(svm, kvm_run); - clgi(); - vcpu->guest_mode = 1; if (vcpu->requests) if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) @@ -1582,13 +1590,6 @@ again: r = handle_exit(svm, kvm_run); if (r > 0) { - if (signal_pending(current)) { - ++vcpu->stat.signal_exits; - post_kvm_run_save(svm, kvm_run); - kvm_run->exit_reason = KVM_EXIT_INTR; - return -EINTR; - } - if (dm_request_for_irq_injection(svm, kvm_run)) { ++vcpu->stat.request_irq_exits; post_kvm_run_save(svm, kvm_run); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 906d4fa13d1..a9b4cb51dec 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -2066,9 +2066,6 @@ again: preempt_disable(); - if (!vcpu->mmio_read_completed) - do_interrupt_requests(vcpu, kvm_run); - vmx_save_host_state(vmx); kvm_load_guest_fpu(vcpu); @@ -2079,6 +2076,18 @@ again: local_irq_disable(); + if (signal_pending(current)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.signal_exits; + goto out; + } + + if (!vcpu->mmio_read_completed) + do_interrupt_requests(vcpu, kvm_run); + vcpu->guest_mode = 1; if (vcpu->requests) if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) @@ -2227,14 +2236,6 @@ again: r = kvm_handle_exit(kvm_run, vcpu); if (r > 0) { - /* Give scheduler a change to reschedule. */ - if (signal_pending(current)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.signal_exits; - goto out; - } - if (dm_request_for_irq_injection(vcpu, kvm_run)) { r = -EINTR; kvm_run->exit_reason = KVM_EXIT_INTR; -- cgit v1.2.3 From 253abdee5ec2edd0a7f6dc2358bef42e3fdf1f39 Mon Sep 17 00:00:00 2001 From: "Yang, Sheng" Date: Thu, 16 Aug 2007 13:01:00 +0300 Subject: KVM: Communicate cr8 changes to userspace This allows running 64-bit Windows. Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 3 ++- include/linux/kvm.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index a9b4cb51dec..cd999c0a24c 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1831,7 +1831,8 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load_rsp_rip(vcpu); set_cr8(vcpu, vcpu->regs[reg]); skip_emulated_instruction(vcpu); - return 1; + kvm_run->exit_reason = KVM_EXIT_SET_TPR; + return 0; }; break; case 2: /* clts */ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 91a446f450b..1d5a49cdda3 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -46,6 +46,7 @@ enum kvm_exit_reason { KVM_EXIT_SHUTDOWN = 8, KVM_EXIT_FAIL_ENTRY = 9, KVM_EXIT_INTR = 10, + KVM_EXIT_SET_TPR = 11 }; /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ -- cgit v1.2.3 From 19eb938e0115693414a83b6bde2b67896bd9953a Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Fri, 17 Aug 2007 15:17:41 +0300 Subject: KVM: x86 emulator: implement 'and $imm, %{al|ax|eax}' Implement emulation of instruction and al imm8 (opcode 0x24) and ax/eax imm16/imm32 (opcode 0x25) Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index b4f439cfc66..2b94d16e9d2 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -83,7 +83,7 @@ static u8 opcode_table[256] = { /* 0x20 - 0x27 */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, - 0, 0, 0, 0, + SrcImmByte, SrcImm, 0, 0, /* 0x28 - 0x2F */ ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, @@ -882,10 +882,27 @@ done_prefixes: sbb: /* sbb */ emulate_2op_SrcV("sbb", src, dst, _eflags); break; - case 0x20 ... 0x25: + case 0x20 ... 0x23: and: /* and */ emulate_2op_SrcV("and", src, dst, _eflags); break; + case 0x24: /* and al imm8 */ + dst.type = OP_REG; + dst.ptr = &_regs[VCPU_REGS_RAX]; + dst.val = *(u8 *)dst.ptr; + dst.bytes = 1; + dst.orig_val = dst.val; + goto and; + case 0x25: /* and ax imm16, or eax imm32 */ + dst.type = OP_REG; + dst.bytes = op_bytes; + dst.ptr = &_regs[VCPU_REGS_RAX]; + if (op_bytes == 2) + dst.val = *(u16 *)dst.ptr; + else + dst.val = *(u32 *)dst.ptr; + dst.orig_val = dst.val; + goto and; case 0x28 ... 0x2d: sub: /* sub */ emulate_2op_SrcV("sub", src, dst, _eflags); -- cgit v1.2.3 From 098c937ba30acc5b7dcb6a4ad7cc8d63c7117546 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sun, 19 Aug 2007 11:00:36 +0300 Subject: KVM: x86 emulator: implement 'jmp rel' instruction (opcode 0xe9) Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 2b94d16e9d2..fa7aa278956 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -145,8 +145,10 @@ static u8 opcode_table[256] = { 0, 0, 0, 0, /* 0xD8 - 0xDF */ 0, 0, 0, 0, 0, 0, 0, 0, - /* 0xE0 - 0xEF */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xE0 - 0xE7 */ + 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xE8 - 0xEF */ + 0, SrcImm|ImplicitOps, 0, 0, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, ImplicitOps, 0, @@ -447,6 +449,12 @@ struct operand { (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \ } while (0) +#define JMP_REL(rel) \ + do { \ + _eip += (int)(rel); \ + _eip = ((op_bytes == 2) ? (uint16_t)_eip : (uint32_t)_eip); \ + } while (0) + /* * Given the 'reg' portion of a ModRM byte, and a register block, return a * pointer into the block that addresses the relevant register. @@ -1023,6 +1031,10 @@ done_prefixes: case 0xd2 ... 0xd3: /* Grp2 */ src.val = _regs[VCPU_REGS_RCX]; goto grp2; + case 0xe9: /* jmp rel */ + JMP_REL(src.val); + no_wb = 1; /* Disable writeback. */ + break; case 0xf6 ... 0xf7: /* Grp3 */ switch (modrm_reg) { case 0 ... 1: /* test */ -- cgit v1.2.3 From c53ce170a9e8531f293e402c7d8c65e38452a551 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sun, 19 Aug 2007 11:03:13 +0300 Subject: KVM: x86 emulator: Implement 'jmp rel short' instruction (opcode 0xeb) Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index fa7aa278956..1036e0224aa 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -148,7 +148,7 @@ static u8 opcode_table[256] = { /* 0xE0 - 0xE7 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE8 - 0xEF */ - 0, SrcImm|ImplicitOps, 0, 0, 0, 0, 0, 0, + 0, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, ImplicitOps, 0, @@ -1032,6 +1032,7 @@ done_prefixes: src.val = _regs[VCPU_REGS_RCX]; goto grp2; case 0xe9: /* jmp rel */ + case 0xeb: /* jmp rel short */ JMP_REL(src.val); no_wb = 1; /* Disable writeback. */ break; -- cgit v1.2.3 From 7e778161fb4612d2ceac9604f10e1061f5f4cf48 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sun, 19 Aug 2007 11:07:06 +0300 Subject: KVM: x86 emulator: implement 'push reg' (opcodes 0x50-0x57) Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 1036e0224aa..cf895aab51d 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -99,7 +99,8 @@ static u8 opcode_table[256] = { /* 0x40 - 0x4F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x50 - 0x57 */ - 0, 0, 0, 0, 0, 0, 0, 0, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0x58 - 0x5F */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, @@ -1151,6 +1152,19 @@ special_insn: if (twobyte) goto twobyte_special_insn; switch(b) { + case 0x50 ... 0x57: /* push reg */ + if (op_bytes == 2) + src.val = (u16) _regs[b & 0x7]; + else + src.val = (u32) _regs[b & 0x7]; + dst.type = OP_MEM; + dst.bytes = op_bytes; + dst.val = src.val; + register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes); + dst.ptr = (void *) register_address( + ctxt->ss_base, _regs[VCPU_REGS_RSP]); + no_wb = 1; /* force writeback */ + break; case 0x6c: /* insb */ case 0x6d: /* insw/insd */ if (kvm_emulate_pio_string(ctxt->vcpu, NULL, -- cgit v1.2.3 From 33f5fa1664046208b890e9231a47e377fdfc7762 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Sun, 19 Aug 2007 22:24:58 +0300 Subject: KVM: VMX: allow rmode_tss_base() to work with >2G of guest memory Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index cd999c0a24c..b40066854c1 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1030,7 +1030,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); } -static int rmode_tss_base(struct kvm* kvm) +static gva_t rmode_tss_base(struct kvm* kvm) { gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 3; return base_gfn << PAGE_SHIFT; -- cgit v1.2.3 From 2e2c618dad6d5768da4a891ff71fc1ca0cbd3fe0 Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Mon, 20 Aug 2007 18:11:00 +0300 Subject: KVM: Support more memory slots Needed for mapping memory at 4GB. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 2245baeeb02..a42a6f314a8 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -39,7 +39,7 @@ #define KVM_MAX_VCPUS 4 #define KVM_ALIAS_SLOTS 4 -#define KVM_MEMORY_SLOTS 4 +#define KVM_MEMORY_SLOTS 8 #define KVM_NUM_MMU_PAGES 1024 #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 -- cgit v1.2.3 From d39dba54ce71ab3234c387219b175dc36d37f85a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 22 Aug 2007 18:09:29 +0300 Subject: KVM: X86 emulator: fix 'push reg' writeback Pointed out by Rusty Russell. Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index cf895aab51d..7439b3422ec 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -1163,7 +1163,6 @@ special_insn: register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes); dst.ptr = (void *) register_address( ctxt->ss_base, _regs[VCPU_REGS_RSP]); - no_wb = 1; /* force writeback */ break; case 0x6c: /* insb */ case 0x6d: /* insw/insd */ -- cgit v1.2.3 From 152d3f2f246ce3c2a0cf2fc6c2214663cd99aa83 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 23 Aug 2007 16:33:11 +0200 Subject: KVM: VMX: Split segments reload in vmx_load_host_state() vmx_load_host_state() bundles fs, gs, ldt, and tss reloading into one in the hope that it is infrequent. With smp guests, fs reloading is frequent due to fs being used by threads. Unbundle the reloads so reduce expensive gs reloads. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b40066854c1..d63e82e5dbf 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -54,7 +54,8 @@ struct vcpu_vmx { struct { int loaded; u16 fs_sel, gs_sel, ldt_sel; - int fs_gs_ldt_reload_needed; + int gs_ldt_reload_needed; + int fs_reload_needed; }host_state; }; @@ -353,20 +354,21 @@ static void vmx_save_host_state(struct vcpu_vmx *vmx) * allow segment selectors with cpl > 0 or ti == 1. */ vmx->host_state.ldt_sel = read_ldt(); - vmx->host_state.fs_gs_ldt_reload_needed = vmx->host_state.ldt_sel; + vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; vmx->host_state.fs_sel = read_fs(); - if (!(vmx->host_state.fs_sel & 7)) + if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); - else { + vmx->host_state.fs_reload_needed = 0; + } else { vmcs_write16(HOST_FS_SELECTOR, 0); - vmx->host_state.fs_gs_ldt_reload_needed = 1; + vmx->host_state.fs_reload_needed = 1; } vmx->host_state.gs_sel = read_gs(); if (!(vmx->host_state.gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { vmcs_write16(HOST_GS_SELECTOR, 0); - vmx->host_state.fs_gs_ldt_reload_needed = 1; + vmx->host_state.gs_ldt_reload_needed = 1; } #ifdef CONFIG_X86_64 @@ -396,9 +398,10 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) return; vmx->host_state.loaded = 0; - if (vmx->host_state.fs_gs_ldt_reload_needed) { - load_ldt(vmx->host_state.ldt_sel); + if (vmx->host_state.fs_reload_needed) load_fs(vmx->host_state.fs_sel); + if (vmx->host_state.gs_ldt_reload_needed) { + load_ldt(vmx->host_state.ldt_sel); /* * If we have to reload gs, we must take care to * preserve our gs base. @@ -409,9 +412,8 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); #endif local_irq_restore(flags); - - reload_tss(); } + reload_tss(); save_msrs(vmx->guest_msrs, vmx->save_nmsrs); load_msrs(vmx->host_msrs, vmx->save_nmsrs); if (msr_efer_need_save_restore(vmx)) -- cgit v1.2.3 From 85f455f7ddbed403b34b4d54b1eaf0e14126a126 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Fri, 6 Jul 2007 12:20:49 +0300 Subject: KVM: Add support for in-kernel PIC emulation Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/Makefile | 2 +- drivers/kvm/i8259.c | 442 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/irq.c | 61 +++++++ drivers/kvm/irq.h | 64 +++++++ drivers/kvm/kvm.h | 11 ++ drivers/kvm/kvm_main.c | 46 ++++- drivers/kvm/svm.c | 69 +++++++- drivers/kvm/vmx.c | 80 ++++++++- include/linux/kvm.h | 19 +++ 9 files changed, 770 insertions(+), 24 deletions(-) create mode 100644 drivers/kvm/i8259.c create mode 100644 drivers/kvm/irq.c create mode 100644 drivers/kvm/irq.h diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile index c0a789fa9d6..952dff38eb6 100644 --- a/drivers/kvm/Makefile +++ b/drivers/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -kvm-objs := kvm_main.o mmu.o x86_emulate.o +kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c new file mode 100644 index 00000000000..40ad1046223 --- /dev/null +++ b/drivers/kvm/i8259.c @@ -0,0 +1,442 @@ +/* + * 8259 interrupt controller emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * Authors: + * Yaozu (Eddie) Dong + * Port from Qemu. + */ +#include +#include "irq.h" + +/* + * set irq level. If an edge is detected, then the IRR is set to 1 + */ +static inline void pic_set_irq1(struct kvm_kpic_state *s, int irq, int level) +{ + int mask; + mask = 1 << irq; + if (s->elcr & mask) /* level triggered */ + if (level) { + s->irr |= mask; + s->last_irr |= mask; + } else { + s->irr &= ~mask; + s->last_irr &= ~mask; + } + else /* edge triggered */ + if (level) { + if ((s->last_irr & mask) == 0) + s->irr |= mask; + s->last_irr |= mask; + } else + s->last_irr &= ~mask; +} + +/* + * return the highest priority found in mask (highest = smallest + * number). Return 8 if no irq + */ +static inline int get_priority(struct kvm_kpic_state *s, int mask) +{ + int priority; + if (mask == 0) + return 8; + priority = 0; + while ((mask & (1 << ((priority + s->priority_add) & 7))) == 0) + priority++; + return priority; +} + +/* + * return the pic wanted interrupt. return -1 if none + */ +static int pic_get_irq(struct kvm_kpic_state *s) +{ + int mask, cur_priority, priority; + + mask = s->irr & ~s->imr; + priority = get_priority(s, mask); + if (priority == 8) + return -1; + /* + * compute current priority. If special fully nested mode on the + * master, the IRQ coming from the slave is not taken into account + * for the priority computation. + */ + mask = s->isr; + if (s->special_fully_nested_mode && s == &s->pics_state->pics[0]) + mask &= ~(1 << 2); + cur_priority = get_priority(s, mask); + if (priority < cur_priority) + /* + * higher priority found: an irq should be generated + */ + return (priority + s->priority_add) & 7; + else + return -1; +} + +/* + * raise irq to CPU if necessary. must be called every time the active + * irq may change + */ +static void pic_update_irq(struct kvm_pic *s) +{ + int irq2, irq; + + irq2 = pic_get_irq(&s->pics[1]); + if (irq2 >= 0) { + /* + * if irq request by slave pic, signal master PIC + */ + pic_set_irq1(&s->pics[0], 2, 1); + pic_set_irq1(&s->pics[0], 2, 0); + } + irq = pic_get_irq(&s->pics[0]); + if (irq >= 0) + s->irq_request(s->irq_request_opaque, 1); + else + s->irq_request(s->irq_request_opaque, 0); +} + +void kvm_pic_set_irq(void *opaque, int irq, int level) +{ + struct kvm_pic *s = opaque; + + pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); + pic_update_irq(s); +} + +/* + * acknowledge interrupt 'irq' + */ +static inline void pic_intack(struct kvm_kpic_state *s, int irq) +{ + if (s->auto_eoi) { + if (s->rotate_on_auto_eoi) + s->priority_add = (irq + 1) & 7; + } else + s->isr |= (1 << irq); + /* + * We don't clear a level sensitive interrupt here + */ + if (!(s->elcr & (1 << irq))) + s->irr &= ~(1 << irq); +} + +int kvm_pic_read_irq(struct kvm_pic *s) +{ + int irq, irq2, intno; + + irq = pic_get_irq(&s->pics[0]); + if (irq >= 0) { + pic_intack(&s->pics[0], irq); + if (irq == 2) { + irq2 = pic_get_irq(&s->pics[1]); + if (irq2 >= 0) + pic_intack(&s->pics[1], irq2); + else + /* + * spurious IRQ on slave controller + */ + irq2 = 7; + intno = s->pics[1].irq_base + irq2; + irq = irq2 + 8; + } else + intno = s->pics[0].irq_base + irq; + } else { + /* + * spurious IRQ on host controller + */ + irq = 7; + intno = s->pics[0].irq_base + irq; + } + pic_update_irq(s); + + return intno; +} + +static void pic_reset(void *opaque) +{ + struct kvm_kpic_state *s = opaque; + + s->last_irr = 0; + s->irr = 0; + s->imr = 0; + s->isr = 0; + s->priority_add = 0; + s->irq_base = 0; + s->read_reg_select = 0; + s->poll = 0; + s->special_mask = 0; + s->init_state = 0; + s->auto_eoi = 0; + s->rotate_on_auto_eoi = 0; + s->special_fully_nested_mode = 0; + s->init4 = 0; +} + +static void pic_ioport_write(void *opaque, u32 addr, u32 val) +{ + struct kvm_kpic_state *s = opaque; + int priority, cmd, irq; + + addr &= 1; + if (addr == 0) { + if (val & 0x10) { + pic_reset(s); /* init */ + /* + * deassert a pending interrupt + */ + s->pics_state->irq_request(s->pics_state-> + irq_request_opaque, 0); + s->init_state = 1; + s->init4 = val & 1; + if (val & 0x02) + printk(KERN_ERR "single mode not supported"); + if (val & 0x08) + printk(KERN_ERR + "level sensitive irq not supported"); + } else if (val & 0x08) { + if (val & 0x04) + s->poll = 1; + if (val & 0x02) + s->read_reg_select = val & 1; + if (val & 0x40) + s->special_mask = (val >> 5) & 1; + } else { + cmd = val >> 5; + switch (cmd) { + case 0: + case 4: + s->rotate_on_auto_eoi = cmd >> 2; + break; + case 1: /* end of interrupt */ + case 5: + priority = get_priority(s, s->isr); + if (priority != 8) { + irq = (priority + s->priority_add) & 7; + s->isr &= ~(1 << irq); + if (cmd == 5) + s->priority_add = (irq + 1) & 7; + pic_update_irq(s->pics_state); + } + break; + case 3: + irq = val & 7; + s->isr &= ~(1 << irq); + pic_update_irq(s->pics_state); + break; + case 6: + s->priority_add = (val + 1) & 7; + pic_update_irq(s->pics_state); + break; + case 7: + irq = val & 7; + s->isr &= ~(1 << irq); + s->priority_add = (irq + 1) & 7; + pic_update_irq(s->pics_state); + break; + default: + break; /* no operation */ + } + } + } else + switch (s->init_state) { + case 0: /* normal mode */ + s->imr = val; + pic_update_irq(s->pics_state); + break; + case 1: + s->irq_base = val & 0xf8; + s->init_state = 2; + break; + case 2: + if (s->init4) + s->init_state = 3; + else + s->init_state = 0; + break; + case 3: + s->special_fully_nested_mode = (val >> 4) & 1; + s->auto_eoi = (val >> 1) & 1; + s->init_state = 0; + break; + } +} + +static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) +{ + int ret; + + ret = pic_get_irq(s); + if (ret >= 0) { + if (addr1 >> 7) { + s->pics_state->pics[0].isr &= ~(1 << 2); + s->pics_state->pics[0].irr &= ~(1 << 2); + } + s->irr &= ~(1 << ret); + s->isr &= ~(1 << ret); + if (addr1 >> 7 || ret != 2) + pic_update_irq(s->pics_state); + } else { + ret = 0x07; + pic_update_irq(s->pics_state); + } + + return ret; +} + +static u32 pic_ioport_read(void *opaque, u32 addr1) +{ + struct kvm_kpic_state *s = opaque; + unsigned int addr; + int ret; + + addr = addr1; + addr &= 1; + if (s->poll) { + ret = pic_poll_read(s, addr1); + s->poll = 0; + } else + if (addr == 0) + if (s->read_reg_select) + ret = s->isr; + else + ret = s->irr; + else + ret = s->imr; + return ret; +} + +static void elcr_ioport_write(void *opaque, u32 addr, u32 val) +{ + struct kvm_kpic_state *s = opaque; + s->elcr = val & s->elcr_mask; +} + +static u32 elcr_ioport_read(void *opaque, u32 addr1) +{ + struct kvm_kpic_state *s = opaque; + return s->elcr; +} + +static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) +{ + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + case 0x4d0: + case 0x4d1: + return 1; + default: + return 0; + } +} + +static void picdev_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + struct kvm_pic *s = this->private; + unsigned char data = *(unsigned char *)val; + + if (len != 1) { + if (printk_ratelimit()) + printk(KERN_ERR "PIC: non byte write\n"); + return; + } + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + pic_ioport_write(&s->pics[addr >> 7], addr, data); + break; + case 0x4d0: + case 0x4d1: + elcr_ioport_write(&s->pics[addr & 1], addr, data); + break; + } +} + +static void picdev_read(struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + struct kvm_pic *s = this->private; + unsigned char data = 0; + + if (len != 1) { + if (printk_ratelimit()) + printk(KERN_ERR "PIC: non byte read\n"); + return; + } + switch (addr) { + case 0x20: + case 0x21: + case 0xa0: + case 0xa1: + data = pic_ioport_read(&s->pics[addr >> 7], addr); + break; + case 0x4d0: + case 0x4d1: + data = elcr_ioport_read(&s->pics[addr & 1], addr); + break; + } + *(unsigned char *)val = data; +} + +/* + * callback when PIC0 irq status changed + */ +static void pic_irq_request(void *opaque, int level) +{ + struct kvm *kvm = opaque; + + pic_irqchip(kvm)->output = level; +} + +struct kvm_pic *kvm_create_pic(struct kvm *kvm) +{ + struct kvm_pic *s; + s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); + if (!s) + return NULL; + s->pics[0].elcr_mask = 0xf8; + s->pics[1].elcr_mask = 0xde; + s->irq_request = pic_irq_request; + s->irq_request_opaque = kvm; + s->pics[0].pics_state = s; + s->pics[1].pics_state = s; + + /* + * Initialize PIO device + */ + s->dev.read = picdev_read; + s->dev.write = picdev_write; + s->dev.in_range = picdev_in_range; + s->dev.private = s; + kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev); + return s; +} diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c new file mode 100644 index 00000000000..b08005ca705 --- /dev/null +++ b/drivers/kvm/irq.c @@ -0,0 +1,61 @@ +/* + * irq.c: API for in kernel interrupt controller + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong + * + */ + +#include + +#include "kvm.h" +#include "irq.h" + +/* + * check if there is pending interrupt without + * intack. + */ +int kvm_cpu_has_interrupt(struct kvm_vcpu *v) +{ + struct kvm_pic *s = pic_irqchip(v->kvm); + + if (s->output) /* PIC */ + return 1; + /* + * TODO: APIC + */ + return 0; +} +EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); + +/* + * Read pending interrupt vector and intack. + */ +int kvm_cpu_get_interrupt(struct kvm_vcpu *v) +{ + struct kvm_pic *s = pic_irqchip(v->kvm); + int vector; + + s->output = 0; + vector = kvm_pic_read_irq(s); + if (vector != -1) + return vector; + /* + * TODO: APIC + */ + return -1; +} +EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h new file mode 100644 index 00000000000..bdb2fc34804 --- /dev/null +++ b/drivers/kvm/irq.h @@ -0,0 +1,64 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong + * + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include "kvm.h" + +typedef void irq_request_func(void *opaque, int level); + +struct kvm_pic; +struct kvm_kpic_state { + u8 last_irr; /* edge detection */ + u8 irr; /* interrupt request register */ + u8 imr; /* interrupt mask register */ + u8 isr; /* interrupt service register */ + u8 priority_add; /* highest irq priority */ + u8 irq_base; + u8 read_reg_select; + u8 poll; + u8 special_mask; + u8 init_state; + u8 auto_eoi; + u8 rotate_on_auto_eoi; + u8 special_fully_nested_mode; + u8 init4; /* true if 4 byte init */ + u8 elcr; /* PIIX edge/trigger selection */ + u8 elcr_mask; + struct kvm_pic *pics_state; +}; + +struct kvm_pic { + struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ + irq_request_func *irq_request; + void *irq_request_opaque; + int output; /* intr from master PIC */ + struct kvm_io_device dev; +}; + +struct kvm_pic *kvm_create_pic(struct kvm *kvm); +void kvm_pic_set_irq(void *opaque, int irq, int level); +int kvm_pic_read_irq(struct kvm_pic *s); +int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +int kvm_cpu_has_interrupt(struct kvm_vcpu *v); + +#endif diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index a42a6f314a8..d71712ddebe 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -408,8 +408,19 @@ struct kvm { struct file *filp; struct kvm_io_bus mmio_bus; struct kvm_io_bus pio_bus; + struct kvm_pic *vpic; }; +static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) +{ + return kvm->vpic; +} + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + return pic_irqchip(kvm) != 0; +} + struct descriptor_table { u16 limit; unsigned long base; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index d154487b772..09a04bc9541 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -18,6 +18,7 @@ #include "kvm.h" #include "x86_emulate.h" #include "segment_descriptor.h" +#include "irq.h" #include #include @@ -378,6 +379,7 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); + kfree(kvm->vpic); kvm_free_vcpus(kvm); kvm_free_physmem(kvm); kfree(kvm); @@ -1258,7 +1260,8 @@ EXPORT_SYMBOL_GPL(emulate_instruction); int kvm_emulate_halt(struct kvm_vcpu *vcpu) { - if (vcpu->irq_summary) + if (vcpu->irq_summary || + (irqchip_in_kernel(vcpu->kvm) && kvm_cpu_has_interrupt(vcpu))) return 1; vcpu->run->exit_reason = KVM_EXIT_HLT; @@ -2715,6 +2718,30 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } + case KVM_CREATE_IRQCHIP: + r = -ENOMEM; + kvm->vpic = kvm_create_pic(kvm); + if (kvm->vpic) + r = 0; + else + goto out; + break; + case KVM_IRQ_LINE: { + struct kvm_irq_level irq_event; + + r = -EFAULT; + if (copy_from_user(&irq_event, argp, sizeof irq_event)) + goto out; + if (irqchip_in_kernel(kvm)) { + if (irq_event.irq < 16) + kvm_pic_set_irq(pic_irqchip(kvm), + irq_event.irq, + irq_event.level); + /* TODO: IOAPIC */ + r = 0; + } + break; + } default: ; } @@ -2825,12 +2852,19 @@ static long kvm_dev_ioctl(struct file *filp, r = 0; break; } - case KVM_CHECK_EXTENSION: - /* - * No extensions defined at present. - */ - r = 0; + case KVM_CHECK_EXTENSION: { + int ext = (long)argp; + + switch (ext) { + case KVM_CAP_IRQCHIP: + r = 1; + break; + default: + r = 0; + break; + } break; + } case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; if (arg) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index cc674bfd31d..2237a594a8e 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -16,6 +16,7 @@ #include "kvm_svm.h" #include "x86_emulate.h" +#include "irq.h" #include #include @@ -921,7 +922,8 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) enum emulation_result er; int r; - if (is_external_interrupt(exit_int_info)) + if (!irqchip_in_kernel(kvm) && + is_external_interrupt(exit_int_info)) push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); mutex_lock(&kvm->lock); @@ -1185,6 +1187,8 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) static int interrupt_window_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { + svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); + svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; /* * If the user space waits to inject interrupts, exit as soon as * possible @@ -1289,22 +1293,56 @@ static void pre_svm_run(struct vcpu_svm *svm) } -static inline void inject_irq(struct vcpu_svm *svm) +static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) { struct vmcb_control_area *control; control = &svm->vmcb->control; - control->int_vector = pop_irq(&svm->vcpu); + control->int_vector = irq; control->int_ctl &= ~V_INTR_PRIO_MASK; control->int_ctl |= V_IRQ_MASK | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); } -static void reput_irq(struct vcpu_svm *svm) +static void svm_intr_assist(struct vcpu_svm *svm) { + struct vmcb *vmcb = svm->vmcb; + int intr_vector = -1; + + if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && + ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) { + intr_vector = vmcb->control.exit_int_info & + SVM_EVTINJ_VEC_MASK; + vmcb->control.exit_int_info = 0; + svm_inject_irq(svm, intr_vector); + return; + } + + if (vmcb->control.int_ctl & V_IRQ_MASK) + return; + + if (!kvm_cpu_has_interrupt(&svm->vcpu)) + return; + + if (!(vmcb->save.rflags & X86_EFLAGS_IF) || + (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || + (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { + /* unable to deliver irq, set pending irq */ + vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR); + svm_inject_irq(svm, 0x0); + return; + } + /* Okay, we can deliver the interrupt: grab it and update PIC state. */ + intr_vector = kvm_cpu_get_interrupt(&svm->vcpu); + svm_inject_irq(svm, intr_vector); +} + +static void kvm_reput_irq(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; struct vmcb_control_area *control = &svm->vmcb->control; - if (control->int_ctl & V_IRQ_MASK) { + if ((control->int_ctl & V_IRQ_MASK) && !irqchip_in_kernel(vcpu->kvm)) { control->int_ctl &= ~V_IRQ_MASK; push_irq(&svm->vcpu, control->int_vector); } @@ -1313,6 +1351,19 @@ static void reput_irq(struct vcpu_svm *svm) !(control->int_state & SVM_INTERRUPT_SHADOW_MASK); } +static void svm_do_inject_vector(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + int word_index = __ffs(vcpu->irq_summary); + int bit_index = __ffs(vcpu->irq_pending[word_index]); + int irq = word_index * BITS_PER_LONG + bit_index; + + clear_bit(bit_index, &vcpu->irq_pending[word_index]); + if (!vcpu->irq_pending[word_index]) + clear_bit(word_index, &vcpu->irq_summary); + svm_inject_irq(svm, irq); +} + static void do_interrupt_requests(struct vcpu_svm *svm, struct kvm_run *kvm_run) { @@ -1326,7 +1377,7 @@ static void do_interrupt_requests(struct vcpu_svm *svm, /* * If interrupts enabled, and not blocked by sti or mov ss. Good. */ - inject_irq(svm); + svm_do_inject_vector(svm); /* * Interrupts blocked. Wait for unblock. @@ -1408,7 +1459,9 @@ again: return -EINTR; } - if (!vcpu->mmio_read_completed) + if (irqchip_in_kernel(vcpu->kvm)) + svm_intr_assist(svm); + else if (!vcpu->mmio_read_completed) do_interrupt_requests(svm, kvm_run); vcpu->guest_mode = 1; @@ -1576,7 +1629,7 @@ again: stgi(); - reput_irq(svm); + kvm_reput_irq(svm); svm->next_rip = 0; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index d63e82e5dbf..f1e80a95b69 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -17,6 +17,7 @@ #include "kvm.h" #include "x86_emulate.h" +#include "irq.h" #include "vmx.h" #include "segment_descriptor.h" @@ -1582,6 +1583,16 @@ static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0xffff) | (sp - 6)); } +static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) +{ + if (vcpu->rmode.active) { + inject_rmode_irq(vcpu, irq); + return; + } + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); +} + static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->irq_summary); @@ -1591,13 +1602,7 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) clear_bit(bit_index, &vcpu->irq_pending[word_index]); if (!vcpu->irq_pending[word_index]) clear_bit(word_index, &vcpu->irq_summary); - - if (vcpu->rmode.active) { - inject_rmode_irq(vcpu, irq); - return; - } - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); + vmx_inject_irq(vcpu, irq); } @@ -1681,7 +1686,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info); } - if (is_external_interrupt(vect_info)) { + if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { int irq = vect_info & VECTORING_INFO_VECTOR_MASK; set_bit(irq, vcpu->irq_pending); set_bit(irq / BITS_PER_LONG, &vcpu->irq_summary); @@ -1961,6 +1966,12 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, static int handle_interrupt_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + u32 cpu_based_vm_exec_control; + + /* clear pending irq */ + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); /* * If the user space waits to inject interrupts, exit as soon as * possible @@ -2052,6 +2063,55 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { } +static void enable_irq_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static void vmx_intr_assist(struct kvm_vcpu *vcpu) +{ + u32 idtv_info_field, intr_info_field; + int has_ext_irq, interrupt_window_open; + + has_ext_irq = kvm_cpu_has_interrupt(vcpu); + intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); + idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD); + if (intr_info_field & INTR_INFO_VALID_MASK) { + if (idtv_info_field & INTR_INFO_VALID_MASK) { + /* TODO: fault when IDT_Vectoring */ + printk(KERN_ERR "Fault when IDT_Vectoring\n"); + } + if (has_ext_irq) + enable_irq_window(vcpu); + return; + } + if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, + vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); + + if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK)) + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, + vmcs_read32(IDT_VECTORING_ERROR_CODE)); + if (unlikely(has_ext_irq)) + enable_irq_window(vcpu); + return; + } + if (!has_ext_irq) + return; + interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); + if (interrupt_window_open) + vmx_inject_irq(vcpu, kvm_cpu_get_interrupt(vcpu)); + else + enable_irq_window(vcpu); +} + static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2088,7 +2148,9 @@ again: goto out; } - if (!vcpu->mmio_read_completed) + if (irqchip_in_kernel(vcpu->kvm)) + vmx_intr_assist(vcpu); + else if (!vcpu->mmio_read_completed) do_interrupt_requests(vcpu, kvm_run); vcpu->guest_mode = 1; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 1d5a49cdda3..bfe742b771f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -34,6 +34,17 @@ struct kvm_memory_alias { __u64 target_phys_addr; }; +/* for KVM_SET_IRQ_LEVEL */ +struct kvm_irq_level { + /* + * ACPI gsi notion of irq. + * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. + * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. + */ + __u32 irq; + __u32 level; +}; + enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, KVM_EXIT_EXCEPTION = 1, @@ -268,6 +279,11 @@ struct kvm_signal_mask { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ +/* + * Extension capability list. + */ +#define KVM_CAP_IRQCHIP 0 + /* * ioctls for VM fds */ @@ -279,6 +295,9 @@ struct kvm_signal_mask { #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) +/* Device model IOC */ +#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) +#define KVM_IRQ_LINE _IO(KVMIO, 0x61) /* * ioctls for vcpu fds -- cgit v1.2.3 From 7017fc3d1a12e30ea7df4992152978a188433457 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Wed, 18 Jul 2007 11:34:57 +0300 Subject: KVM: Define and use cr8 access functions This patch is to wrap APIC base register and CR8 operation which can provide a unique API for user level irqchip and kernel irqchip. This is a preparation of merging lapic/ioapic patch. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 3 +++ drivers/kvm/kvm_main.c | 32 +++++++++++++++++++++++++------- drivers/kvm/svm.c | 8 ++++---- drivers/kvm/vmx.c | 14 ++++++++------ 4 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index d71712ddebe..e0a2f13faf8 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -568,6 +568,9 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); +unsigned long get_cr8(struct kvm_vcpu *vcpu); +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); +void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 09a04bc9541..f879efbefcd 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -602,6 +602,24 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) } EXPORT_SYMBOL_GPL(set_cr8); +unsigned long get_cr8(struct kvm_vcpu *vcpu) +{ + return vcpu->cr8; +} +EXPORT_SYMBOL_GPL(get_cr8); + +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) +{ + return vcpu->apic_base; +} +EXPORT_SYMBOL_GPL(kvm_get_apic_base); + +void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) +{ + vcpu->apic_base = data; +} +EXPORT_SYMBOL_GPL(kvm_set_apic_base); + void fx_init(struct kvm_vcpu *vcpu) { unsigned after_mxcsr_mask; @@ -1481,7 +1499,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) data = 3; break; case MSR_IA32_APICBASE: - data = vcpu->apic_base; + data = kvm_get_apic_base(vcpu); break; case MSR_IA32_MISC_ENABLE: data = vcpu->ia32_misc_enable_msr; @@ -1559,7 +1577,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) case 0x200 ... 0x2ff: /* MTRRs */ break; case MSR_IA32_APICBASE: - vcpu->apic_base = data; + kvm_set_apic_base(vcpu, data); break; case MSR_IA32_MISC_ENABLE: vcpu->ia32_misc_enable_msr = data; @@ -1865,7 +1883,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); /* re-sync apic's tpr */ - vcpu->cr8 = kvm_run->cr8; + set_cr8(vcpu, kvm_run->cr8); if (vcpu->pio.cur_count) { r = complete_pio(vcpu); @@ -2013,9 +2031,9 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->cr2 = vcpu->cr2; sregs->cr3 = vcpu->cr3; sregs->cr4 = vcpu->cr4; - sregs->cr8 = vcpu->cr8; + sregs->cr8 = get_cr8(vcpu); sregs->efer = vcpu->shadow_efer; - sregs->apic_base = vcpu->apic_base; + sregs->apic_base = kvm_get_apic_base(vcpu); memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, sizeof sregs->interrupt_bitmap); @@ -2051,13 +2069,13 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= vcpu->cr3 != sregs->cr3; vcpu->cr3 = sregs->cr3; - vcpu->cr8 = sregs->cr8; + set_cr8(vcpu, sregs->cr8); mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; #ifdef CONFIG_X86_64 kvm_arch_ops->set_efer(vcpu, sregs->efer); #endif - vcpu->apic_base = sregs->apic_base; + kvm_set_apic_base(vcpu, sregs->apic_base); kvm_arch_ops->decache_cr4_guest_bits(vcpu); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 2237a594a8e..57525e7ed28 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1339,10 +1339,10 @@ static void svm_intr_assist(struct vcpu_svm *svm) static void kvm_reput_irq(struct vcpu_svm *svm) { - struct kvm_vcpu *vcpu = &svm->vcpu; struct vmcb_control_area *control = &svm->vmcb->control; - if ((control->int_ctl & V_IRQ_MASK) && !irqchip_in_kernel(vcpu->kvm)) { + if ((control->int_ctl & V_IRQ_MASK) + && !irqchip_in_kernel(svm->vcpu.kvm)) { control->int_ctl &= ~V_IRQ_MASK; push_irq(&svm->vcpu, control->int_vector); } @@ -1396,8 +1396,8 @@ static void post_kvm_run_save(struct vcpu_svm *svm, = (svm->vcpu.interrupt_window_open && svm->vcpu.irq_summary == 0); kvm_run->if_flag = (svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; - kvm_run->cr8 = svm->vcpu.cr8; - kvm_run->apic_base = svm->vcpu.apic_base; + kvm_run->cr8 = get_cr8(&svm->vcpu); + kvm_run->apic_base = kvm_get_apic_base(&svm->vcpu); } /* diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index f1e80a95b69..19676b5a671 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1369,6 +1369,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) int i; int ret = 0; unsigned long kvm_vmx_return; + u64 msr; if (!init_rmode_tss(vmx->vcpu.kvm)) { ret = -ENOMEM; @@ -1376,10 +1377,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) } vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val(); - vmx->vcpu.cr8 = 0; - vmx->vcpu.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; + set_cr8(&vmx->vcpu, 0); + msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; if (vmx->vcpu.vcpu_id == 0) - vmx->vcpu.apic_base |= MSR_IA32_APICBASE_BSP; + msr |= MSR_IA32_APICBASE_BSP; + kvm_set_apic_base(&vmx->vcpu, msr); fx_init(&vmx->vcpu); @@ -1860,7 +1862,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; case 8: vcpu_load_rsp_rip(vcpu); - vcpu->regs[reg] = vcpu->cr8; + vcpu->regs[reg] = get_cr8(vcpu); vcpu_put_rsp_rip(vcpu); skip_emulated_instruction(vcpu); return 1; @@ -1957,8 +1959,8 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; - kvm_run->cr8 = vcpu->cr8; - kvm_run->apic_base = vcpu->apic_base; + kvm_run->cr8 = get_cr8(vcpu); + kvm_run->apic_base = kvm_get_apic_base(vcpu); kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && vcpu->irq_summary == 0); } -- cgit v1.2.3 From 97222cc8316328965851ed28d23f6b64b4c912d2 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Wed, 12 Sep 2007 10:58:04 +0300 Subject: KVM: Emulate local APIC in kernel Because lightweight exits (exits which don't involve userspace) are many times faster than heavyweight exits, it makes sense to emulate high usage devices in the kernel. The local APIC is one such device, especially for Windows and for SMP, so we add an APIC model to kvm. It also allows in-kernel host-side drivers to inject interrupts without going through userspace. [compile fix on i386 from Jindrich Makovicka] Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/Makefile | 2 +- drivers/kvm/irq.c | 53 ++- drivers/kvm/irq.h | 41 ++- drivers/kvm/kvm.h | 3 +- drivers/kvm/kvm_main.c | 52 ++- drivers/kvm/lapic.c | 933 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/svm.c | 6 + drivers/kvm/vmx.c | 6 + include/linux/kvm.h | 4 +- 9 files changed, 1067 insertions(+), 33 deletions(-) create mode 100644 drivers/kvm/lapic.c diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile index 952dff38eb6..3bf7276b032 100644 --- a/drivers/kvm/Makefile +++ b/drivers/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o +kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c index b08005ca705..0b4430a0cae 100644 --- a/drivers/kvm/irq.c +++ b/drivers/kvm/irq.c @@ -30,14 +30,13 @@ */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - struct kvm_pic *s = pic_irqchip(v->kvm); - - if (s->output) /* PIC */ - return 1; - /* - * TODO: APIC - */ - return 0; + struct kvm_pic *s; + + if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ + s = pic_irqchip(v->kvm); /* PIC */ + return s->output; + } + return 1; } EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); @@ -46,16 +45,36 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - struct kvm_pic *s = pic_irqchip(v->kvm); + struct kvm_pic *s; int vector; - s->output = 0; - vector = kvm_pic_read_irq(s); - if (vector != -1) - return vector; - /* - * TODO: APIC - */ - return -1; + vector = kvm_get_apic_interrupt(v); /* APIC */ + if (vector == -1) { + s = pic_irqchip(v->kvm); + s->output = 0; /* PIC */ + vector = kvm_pic_read_irq(s); + } + return vector; } EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); + +static void vcpu_kick_intr(void *info) +{ +#ifdef DEBUG + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; + printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); +#endif +} + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int ipi_pcpu = vcpu->cpu; + + if (vcpu->guest_mode) + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); +} + +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) +{ + /* TODO: for kernel IOAPIC */ +} diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index bdb2fc34804..57e23bdac53 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -26,7 +26,6 @@ typedef void irq_request_func(void *opaque, int level); -struct kvm_pic; struct kvm_kpic_state { u8 last_irr; /* edge detection */ u8 irr; /* interrupt request register */ @@ -61,4 +60,44 @@ int kvm_pic_read_irq(struct kvm_pic *s); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); +struct kvm_lapic { + unsigned long base_address; + struct kvm_io_device dev; + struct { + atomic_t pending; + s64 period; /* unit: ns */ + u32 divide_count; + ktime_t last_update; + struct hrtimer dev; + } timer; + struct kvm_vcpu *vcpu; + struct page *regs_page; + void *regs; +}; + +#ifdef DEBUG +#define ASSERT(x) \ +do { \ + if (!(x)) { \ + printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ + __FILE__, __LINE__, #x); \ + BUG(); \ + } \ +} while (0) +#else +#define ASSERT(x) do { } while (0) +#endif + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu); +int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); +int kvm_create_lapic(struct kvm_vcpu *vcpu); +void kvm_free_apic(struct kvm_lapic *apic); +u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); +void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); +void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); +u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); +void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector); + #endif diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index e0a2f13faf8..a5790cb21ff 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -324,6 +324,7 @@ struct kvm_vcpu { u64 pdptrs[4]; /* pae */ u64 shadow_efer; u64 apic_base; + struct kvm_lapic *apic; /* kernel irqchip context */ u64 ia32_misc_enable_msr; struct kvm_mmu mmu; @@ -569,8 +570,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0); void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); unsigned long get_cr8(struct kvm_vcpu *vcpu); -u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); -void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index f879efbefcd..401e3cdc460 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -281,6 +281,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) { kvm_mmu_destroy(vcpu); + kvm_free_apic(vcpu->apic); free_page((unsigned long)vcpu->pio_data); free_page((unsigned long)vcpu->run); } @@ -598,25 +599,38 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) inject_gp(vcpu); return; } - vcpu->cr8 = cr8; + if (irqchip_in_kernel(vcpu->kvm)) + kvm_lapic_set_tpr(vcpu, cr8); + else + vcpu->cr8 = cr8; } EXPORT_SYMBOL_GPL(set_cr8); unsigned long get_cr8(struct kvm_vcpu *vcpu) { - return vcpu->cr8; + if (irqchip_in_kernel(vcpu->kvm)) + return kvm_lapic_get_cr8(vcpu); + else + return vcpu->cr8; } EXPORT_SYMBOL_GPL(get_cr8); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) { - return vcpu->apic_base; + if (irqchip_in_kernel(vcpu->kvm)) + return vcpu->apic_base; + else + return vcpu->apic_base; } EXPORT_SYMBOL_GPL(kvm_get_apic_base); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) { - vcpu->apic_base = data; + /* TODO: reserve bits check */ + if (irqchip_in_kernel(vcpu->kvm)) + kvm_lapic_set_base(vcpu, data); + else + vcpu->apic_base = data; } EXPORT_SYMBOL_GPL(kvm_set_apic_base); @@ -986,15 +1000,31 @@ static int emulator_write_std(unsigned long addr, return X86EMUL_UNHANDLEABLE; } +/* + * Only apic need an MMIO device hook, so shortcut now.. + */ +static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + struct kvm_io_device *dev; + + if (vcpu->apic) { + dev = &vcpu->apic->dev; + if (dev->in_range(dev, addr)) + return dev; + } + return NULL; +} + static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, gpa_t addr) { - /* - * Note that its important to have this wrapper function because - * in the very near future we will be checking for MMIOs against - * the LAPIC as well as the general MMIO bus - */ - return kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + struct kvm_io_device *dev; + + dev = vcpu_find_pervcpu_dev(vcpu, addr); + if (dev == NULL) + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + return dev; } static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, @@ -2256,6 +2286,8 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, { if (irq->irq < 0 || irq->irq >= 256) return -EINVAL; + if (irqchip_in_kernel(vcpu->kvm)) + return -ENXIO; vcpu_load(vcpu); set_bit(irq->irq, vcpu->irq_pending); diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c new file mode 100644 index 00000000000..4b5c77d8900 --- /dev/null +++ b/drivers/kvm/lapic.c @@ -0,0 +1,933 @@ + +/* + * Local APIC virtualization + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright (C) 2007 Novell + * Copyright (C) 2007 Intel + * + * Authors: + * Dor Laor + * Gregory Haskins + * Yaozu (Eddie) Dong + * + * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include "kvm.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "irq.h" + +#define PRId64 "d" +#define PRIx64 "llx" +#define PRIu64 "u" +#define PRIo64 "o" + +#define APIC_BUS_CYCLE_NS 1 + +/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ +#define apic_debug(fmt, arg...) + +#define APIC_LVT_NUM 6 +/* 14 is the version for Xeon and Pentium 8.4.8*/ +#define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16)) +#define LAPIC_MMIO_LENGTH (1 << 12) +/* followed define is not in apicdef.h */ +#define APIC_SHORT_MASK 0xc0000 +#define APIC_DEST_NOSHORT 0x0 +#define APIC_DEST_MASK 0x800 +#define MAX_APIC_VECTOR 256 + +#define VEC_POS(v) ((v) & (32 - 1)) +#define REG_POS(v) (((v) >> 5) << 4) +static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) +{ + return *((u32 *) (apic->regs + reg_off)); +} + +static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) +{ + *((u32 *) (apic->regs + reg_off)) = val; +} + +static inline int apic_test_and_set_vector(int vec, void *bitmap) +{ + return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + +static inline int apic_test_and_clear_vector(int vec, void *bitmap) +{ + return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + +static inline void apic_set_vector(int vec, void *bitmap) +{ + set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + +static inline void apic_clear_vector(int vec, void *bitmap) +{ + clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + +static inline int apic_hw_enabled(struct kvm_lapic *apic) +{ + return (apic)->vcpu->apic_base & MSR_IA32_APICBASE_ENABLE; +} + +static inline int apic_sw_enabled(struct kvm_lapic *apic) +{ + return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; +} + +static inline int apic_enabled(struct kvm_lapic *apic) +{ + return apic_sw_enabled(apic) && apic_hw_enabled(apic); +} + +#define LVT_MASK \ + (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK) + +#define LINT_MASK \ + (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ + APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) + +static inline int kvm_apic_id(struct kvm_lapic *apic) +{ + return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff; +} + +static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) +{ + return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); +} + +static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) +{ + return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; +} + +static inline int apic_lvtt_period(struct kvm_lapic *apic) +{ + return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; +} + +static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { + LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ + LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ + LVT_MASK | APIC_MODE_MASK, /* LVTPC */ + LINT_MASK, LINT_MASK, /* LVT0-1 */ + LVT_MASK /* LVTERR */ +}; + +static int find_highest_vector(void *bitmap) +{ + u32 *word = bitmap; + int word_offset = MAX_APIC_VECTOR >> 5; + + while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) + continue; + + if (likely(!word_offset && !word[0])) + return -1; + else + return fls(word[word_offset << 2]) - 1 + (word_offset << 5); +} + +static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) +{ + return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); +} + +static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) +{ + apic_clear_vector(vec, apic->regs + APIC_IRR); +} + +static inline int apic_find_highest_irr(struct kvm_lapic *apic) +{ + int result; + + result = find_highest_vector(apic->regs + APIC_IRR); + ASSERT(result == -1 || result >= 16); + + return result; +} + +int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig) +{ + if (!apic_test_and_set_irr(vec, apic)) { + /* a new pending irq is set in IRR */ + if (trig) + apic_set_vector(vec, apic->regs + APIC_TMR); + else + apic_clear_vector(vec, apic->regs + APIC_TMR); + kvm_vcpu_kick(apic->vcpu); + return 1; + } + return 0; +} + +static inline int apic_find_highest_isr(struct kvm_lapic *apic) +{ + int result; + + result = find_highest_vector(apic->regs + APIC_ISR); + ASSERT(result == -1 || result >= 16); + + return result; +} + +static void apic_update_ppr(struct kvm_lapic *apic) +{ + u32 tpr, isrv, ppr; + int isr; + + tpr = apic_get_reg(apic, APIC_TASKPRI); + isr = apic_find_highest_isr(apic); + isrv = (isr != -1) ? isr : 0; + + if ((tpr & 0xf0) >= (isrv & 0xf0)) + ppr = tpr & 0xff; + else + ppr = isrv & 0xf0; + + apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", + apic, ppr, isr, isrv); + + apic_set_reg(apic, APIC_PROCPRI, ppr); +} + +static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) +{ + apic_set_reg(apic, APIC_TASKPRI, tpr); + apic_update_ppr(apic); +} + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) +{ + return kvm_apic_id(apic) == dest; +} + +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) +{ + int result = 0; + u8 logical_id; + + logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); + + switch (apic_get_reg(apic, APIC_DFR)) { + case APIC_DFR_FLAT: + if (logical_id & mda) + result = 1; + break; + case APIC_DFR_CLUSTER: + if (((logical_id >> 4) == (mda >> 0x4)) + && (logical_id & mda & 0xf)) + result = 1; + break; + default: + printk(KERN_WARNING "Bad DFR vcpu %d: %08x\n", + apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); + break; + } + + return result; +} + +static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode) +{ + int result = 0; + struct kvm_lapic *target = vcpu->apic; + + apic_debug("target %p, source %p, dest 0x%x, " + "dest_mode 0x%x, short_hand 0x%x", + target, source, dest, dest_mode, short_hand); + + ASSERT(!target); + switch (short_hand) { + case APIC_DEST_NOSHORT: + if (dest_mode == 0) { + /* Physical mode. */ + if ((dest == 0xFF) || (dest == kvm_apic_id(target))) + result = 1; + } else + /* Logical mode. */ + result = kvm_apic_match_logical_addr(target, dest); + break; + case APIC_DEST_SELF: + if (target == source) + result = 1; + break; + case APIC_DEST_ALLINC: + result = 1; + break; + case APIC_DEST_ALLBUT: + if (target != source) + result = 1; + break; + default: + printk(KERN_WARNING "Bad dest shorthand value %x\n", + short_hand); + break; + } + + return result; +} + +/* + * Add a pending IRQ into lapic. + * Return 1 if successfully added and 0 if discarded. + */ +static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, + int vector, int level, int trig_mode) +{ + int result = 0; + + switch (delivery_mode) { + case APIC_DM_FIXED: + case APIC_DM_LOWEST: + /* FIXME add logic for vcpu on reset */ + if (unlikely(!apic_enabled(apic))) + break; + + if (apic_test_and_set_irr(vector, apic) && trig_mode) { + apic_debug("level trig mode repeatedly for vector %d", + vector); + break; + } + + if (trig_mode) { + apic_debug("level trig mode for vector %d", vector); + apic_set_vector(vector, apic->regs + APIC_TMR); + } else + apic_clear_vector(vector, apic->regs + APIC_TMR); + + kvm_vcpu_kick(apic->vcpu); + + result = 1; + break; + + case APIC_DM_REMRD: + printk(KERN_DEBUG "Ignoring delivery mode 3\n"); + break; + + case APIC_DM_SMI: + printk(KERN_DEBUG "Ignoring guest SMI\n"); + break; + case APIC_DM_NMI: + printk(KERN_DEBUG "Ignoring guest NMI\n"); + break; + + case APIC_DM_INIT: + printk(KERN_DEBUG "Ignoring guest INIT\n"); + break; + + case APIC_DM_STARTUP: + printk(KERN_DEBUG "Ignoring guest STARTUP\n"); + break; + + default: + printk(KERN_ERR "TODO: unsupported delivery mode %x\n", + delivery_mode); + break; + } + return result; +} + +struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, + unsigned long bitmap) +{ + int vcpu_id; + + /* TODO for real round robin */ + vcpu_id = fls(bitmap) - 1; + if (vcpu_id < 0) + printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n"); + return kvm->vcpus[vcpu_id]->apic; +} + +static void apic_set_eoi(struct kvm_lapic *apic) +{ + int vector = apic_find_highest_isr(apic); + + /* + * Not every write EOI will has corresponding ISR, + * one example is when Kernel check timer on setup_IO_APIC + */ + if (vector == -1) + return; + + apic_clear_vector(vector, apic->regs + APIC_ISR); + apic_update_ppr(apic); + + if (apic_test_and_clear_vector(vector, apic->regs + APIC_TMR)) + kvm_ioapic_update_eoi(apic->vcpu->kvm, vector); +} + +static void apic_send_ipi(struct kvm_lapic *apic) +{ + u32 icr_low = apic_get_reg(apic, APIC_ICR); + u32 icr_high = apic_get_reg(apic, APIC_ICR2); + + unsigned int dest = GET_APIC_DEST_FIELD(icr_high); + unsigned int short_hand = icr_low & APIC_SHORT_MASK; + unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; + unsigned int level = icr_low & APIC_INT_ASSERT; + unsigned int dest_mode = icr_low & APIC_DEST_MASK; + unsigned int delivery_mode = icr_low & APIC_MODE_MASK; + unsigned int vector = icr_low & APIC_VECTOR_MASK; + + struct kvm_lapic *target; + struct kvm_vcpu *vcpu; + unsigned long lpr_map = 0; + int i; + + apic_debug("icr_high 0x%x, icr_low 0x%x, " + "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " + "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", + icr_high, icr_low, short_hand, dest, + trig_mode, level, dest_mode, delivery_mode, vector); + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + vcpu = apic->vcpu->kvm->vcpus[i]; + if (!vcpu) + continue; + + if (vcpu->apic && + apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) { + if (delivery_mode == APIC_DM_LOWEST) + set_bit(vcpu->vcpu_id, &lpr_map); + else + __apic_accept_irq(vcpu->apic, delivery_mode, + vector, level, trig_mode); + } + } + + if (delivery_mode == APIC_DM_LOWEST) { + target = kvm_apic_round_robin(vcpu->kvm, vector, lpr_map); + if (target != NULL) + __apic_accept_irq(target, delivery_mode, + vector, level, trig_mode); + } +} + +static u32 apic_get_tmcct(struct kvm_lapic *apic) +{ + u32 counter_passed; + ktime_t passed, now = apic->timer.dev.base->get_time(); + u32 tmcct = apic_get_reg(apic, APIC_TMICT); + + ASSERT(apic != NULL); + + if (unlikely(ktime_to_ns(now) <= + ktime_to_ns(apic->timer.last_update))) { + /* Wrap around */ + passed = ktime_add(( { + (ktime_t) { + .tv64 = KTIME_MAX - + (apic->timer.last_update).tv64}; } + ), now); + apic_debug("time elapsed\n"); + } else + passed = ktime_sub(now, apic->timer.last_update); + + counter_passed = div64_64(ktime_to_ns(passed), + (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); + tmcct -= counter_passed; + + if (tmcct <= 0) { + if (unlikely(!apic_lvtt_period(apic))) + tmcct = 0; + else + do { + tmcct += apic_get_reg(apic, APIC_TMICT); + } while (tmcct <= 0); + } + + return tmcct; +} + +static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) +{ + u32 val = 0; + + if (offset >= LAPIC_MMIO_LENGTH) + return 0; + + switch (offset) { + case APIC_ARBPRI: + printk(KERN_WARNING "Access APIC ARBPRI register " + "which is for P6\n"); + break; + + case APIC_TMCCT: /* Timer CCR */ + val = apic_get_tmcct(apic); + break; + + default: + val = apic_get_reg(apic, offset); + break; + } + + return val; +} + +static void apic_mmio_read(struct kvm_io_device *this, + gpa_t address, int len, void *data) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)this->private; + unsigned int offset = address - apic->base_address; + unsigned char alignment = offset & 0xf; + u32 result; + + if ((alignment + len) > 4) { + printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d", + (unsigned long)address, len); + return; + } + result = __apic_read(apic, offset & ~0xf); + + switch (len) { + case 1: + case 2: + case 4: + memcpy(data, (char *)&result + alignment, len); + break; + default: + printk(KERN_ERR "Local APIC read with len = %x, " + "should be 1,2, or 4 instead\n", len); + break; + } +} + +static void update_divide_count(struct kvm_lapic *apic) +{ + u32 tmp1, tmp2, tdcr; + + tdcr = apic_get_reg(apic, APIC_TDCR); + tmp1 = tdcr & 0xf; + tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; + apic->timer.divide_count = 0x1 << (tmp2 & 0x7); + + apic_debug("timer divide count is 0x%x\n", + apic->timer.divide_count); +} + +static void start_apic_timer(struct kvm_lapic *apic) +{ + ktime_t now = apic->timer.dev.base->get_time(); + + apic->timer.last_update = now; + + apic->timer.period = apic_get_reg(apic, APIC_TMICT) * + APIC_BUS_CYCLE_NS * apic->timer.divide_count; + atomic_set(&apic->timer.pending, 0); + hrtimer_start(&apic->timer.dev, + ktime_add_ns(now, apic->timer.period), + HRTIMER_MODE_ABS); + + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" + PRIx64 ", " + "timer initial count 0x%x, period %lldns, " + "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__, + APIC_BUS_CYCLE_NS, ktime_to_ns(now), + apic_get_reg(apic, APIC_TMICT), + apic->timer.period, + ktime_to_ns(ktime_add_ns(now, + apic->timer.period))); +} + +static void apic_mmio_write(struct kvm_io_device *this, + gpa_t address, int len, const void *data) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)this->private; + unsigned int offset = address - apic->base_address; + unsigned char alignment = offset & 0xf; + u32 val; + + /* + * APIC register must be aligned on 128-bits boundary. + * 32/64/128 bits registers must be accessed thru 32 bits. + * Refer SDM 8.4.1 + */ + if (len != 4 || alignment) { + if (printk_ratelimit()) + printk(KERN_ERR "apic write: bad size=%d %lx\n", + len, (long)address); + return; + } + + val = *(u32 *) data; + + /* too common printing */ + if (offset != APIC_EOI) + apic_debug("%s: offset 0x%x with length 0x%x, and value is " + "0x%x\n", __FUNCTION__, offset, len, val); + + offset &= 0xff0; + + switch (offset) { + case APIC_ID: /* Local APIC ID */ + apic_set_reg(apic, APIC_ID, val); + break; + + case APIC_TASKPRI: + apic_set_tpr(apic, val & 0xff); + break; + + case APIC_EOI: + apic_set_eoi(apic); + break; + + case APIC_LDR: + apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); + break; + + case APIC_DFR: + apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); + break; + + case APIC_SPIV: + apic_set_reg(apic, APIC_SPIV, val & 0x3ff); + if (!(val & APIC_SPIV_APIC_ENABLED)) { + int i; + u32 lvt_val; + + for (i = 0; i < APIC_LVT_NUM; i++) { + lvt_val = apic_get_reg(apic, + APIC_LVTT + 0x10 * i); + apic_set_reg(apic, APIC_LVTT + 0x10 * i, + lvt_val | APIC_LVT_MASKED); + } + atomic_set(&apic->timer.pending, 0); + + } + break; + + case APIC_ICR: + /* No delay here, so we always clear the pending bit */ + apic_set_reg(apic, APIC_ICR, val & ~(1 << 12)); + apic_send_ipi(apic); + break; + + case APIC_ICR2: + apic_set_reg(apic, APIC_ICR2, val & 0xff000000); + break; + + case APIC_LVTT: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + /* TODO: Check vector */ + if (!apic_sw_enabled(apic)) + val |= APIC_LVT_MASKED; + + val &= apic_lvt_mask[(offset - APIC_LVTT) >> 4]; + apic_set_reg(apic, offset, val); + + break; + + case APIC_TMICT: + hrtimer_cancel(&apic->timer.dev); + apic_set_reg(apic, APIC_TMICT, val); + start_apic_timer(apic); + return; + + case APIC_TDCR: + if (val & 4) + printk(KERN_ERR "KVM_WRITE:TDCR %x\n", val); + apic_set_reg(apic, APIC_TDCR, val); + update_divide_count(apic); + break; + + default: + apic_debug("Local APIC Write to read-only register %x\n", + offset); + break; + } + +} + +static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)this->private; + int ret = 0; + + + if (apic_hw_enabled(apic) && + (addr >= apic->base_address) && + (addr < (apic->base_address + LAPIC_MMIO_LENGTH))) + ret = 1; + + return ret; +} + +void kvm_free_apic(struct kvm_lapic *apic) +{ + if (!apic) + return; + + hrtimer_cancel(&apic->timer.dev); + + if (apic->regs_page) { + __free_page(apic->regs_page); + apic->regs_page = 0; + } + + kfree(apic); +} + +/* + *---------------------------------------------------------------------- + * LAPIC interface + *---------------------------------------------------------------------- + */ + +void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; + + if (!apic) + return; + apic_set_tpr(apic, ((cr8 & 0x0f) << 4)); +} + +u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; + u64 tpr; + + if (!apic) + return 0; + tpr = (u64) apic_get_reg(apic, APIC_TASKPRI); + + return (tpr & 0xf0) >> 4; +} + +void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; + + if (!apic) { + value |= MSR_IA32_APICBASE_BSP; + vcpu->apic_base = value; + return; + } + if (apic->vcpu->vcpu_id) + value &= ~MSR_IA32_APICBASE_BSP; + + vcpu->apic_base = value; + apic->base_address = apic->vcpu->apic_base & + MSR_IA32_APICBASE_BASE; + + /* with FSB delivery interrupt, we can restart APIC functionality */ + apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is " + "0x%lx.\n", apic->apic_base, apic->base_address); + +} + +u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu) +{ + return vcpu->apic_base; +} +EXPORT_SYMBOL_GPL(kvm_lapic_get_base); + +static void lapic_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic; + int i; + + apic_debug("%s\n", __FUNCTION__); + + ASSERT(vcpu); + apic = vcpu->apic; + ASSERT(apic != NULL); + + /* Stop the timer in case it's a reset to an active apic */ + hrtimer_cancel(&apic->timer.dev); + + apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); + apic_set_reg(apic, APIC_LVR, APIC_VERSION); + + for (i = 0; i < APIC_LVT_NUM; i++) + apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); + + apic_set_reg(apic, APIC_DFR, 0xffffffffU); + apic_set_reg(apic, APIC_SPIV, 0xff); + apic_set_reg(apic, APIC_TASKPRI, 0); + apic_set_reg(apic, APIC_LDR, 0); + apic_set_reg(apic, APIC_ESR, 0); + apic_set_reg(apic, APIC_ICR, 0); + apic_set_reg(apic, APIC_ICR2, 0); + apic_set_reg(apic, APIC_TDCR, 0); + apic_set_reg(apic, APIC_TMICT, 0); + for (i = 0; i < 8; i++) { + apic_set_reg(apic, APIC_IRR + 0x10 * i, 0); + apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); + apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); + } + apic->timer.divide_count = 0; + atomic_set(&apic->timer.pending, 0); + if (vcpu->vcpu_id == 0) + vcpu->apic_base |= MSR_IA32_APICBASE_BSP; + apic_update_ppr(apic); + + apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" + "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__, + vcpu, kvm_apic_id(apic), + vcpu->apic_base, apic->base_address); +} + +int kvm_lapic_enabled(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; + int ret = 0; + + if (!apic) + return 0; + ret = apic_enabled(apic); + + return ret; +} + +/* + *---------------------------------------------------------------------- + * timer interface + *---------------------------------------------------------------------- + */ +static int __apic_timer_fn(struct kvm_lapic *apic) +{ + u32 vector; + int result = 0; + + if (unlikely(!apic_enabled(apic) || + !apic_lvt_enabled(apic, APIC_LVTT))) { + apic_debug("%s: time interrupt although apic is down\n", + __FUNCTION__); + return 0; + } + + vector = apic_lvt_vector(apic, APIC_LVTT); + apic->timer.last_update = apic->timer.dev.expires; + atomic_inc(&apic->timer.pending); + __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); + + if (apic_lvtt_period(apic)) { + u32 offset; + u32 tmict = apic_get_reg(apic, APIC_TMICT); + + offset = APIC_BUS_CYCLE_NS * apic->timer.divide_count * tmict; + + result = 1; + apic->timer.dev.expires = ktime_add_ns( + apic->timer.dev.expires, + apic->timer.period); + } + + return result; +} + +static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) +{ + struct kvm_lapic *apic; + int restart_timer = 0; + + apic = container_of(data, struct kvm_lapic, timer.dev); + + restart_timer = __apic_timer_fn(apic); + + if (restart_timer) + return HRTIMER_RESTART; + else + return HRTIMER_NORESTART; +} + +int kvm_create_lapic(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic; + + ASSERT(vcpu != NULL); + apic_debug("apic_init %d\n", vcpu->vcpu_id); + + apic = kzalloc(sizeof(*apic), GFP_KERNEL); + if (!apic) + goto nomem; + + vcpu->apic = apic; + + apic->regs_page = alloc_page(GFP_KERNEL); + if (apic->regs_page == NULL) { + printk(KERN_ERR "malloc apic regs error for vcpu %x\n", + vcpu->vcpu_id); + goto nomem; + } + apic->regs = page_address(apic->regs_page); + memset(apic->regs, 0, PAGE_SIZE); + apic->vcpu = vcpu; + + hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + apic->timer.dev.function = apic_timer_fn; + apic->base_address = APIC_DEFAULT_PHYS_BASE; + vcpu->apic_base = APIC_DEFAULT_PHYS_BASE; + + lapic_reset(vcpu); + apic->dev.read = apic_mmio_read; + apic->dev.write = apic_mmio_write; + apic->dev.in_range = apic_mmio_range; + apic->dev.private = apic; + + return 0; +nomem: + kvm_free_apic(apic); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(kvm_create_lapic); + +int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->apic; + int highest_irr; + + if (!apic || !apic_enabled(apic)) + return -1; + + highest_irr = apic_find_highest_irr(apic); + if ((highest_irr == -1) || + ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) + return -1; + return highest_irr; +} + +int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) +{ + int vector = kvm_apic_has_interrupt(vcpu); + struct kvm_lapic *apic = vcpu->apic; + + if (vector == -1) + return -1; + + apic_set_vector(vector, apic->regs + APIC_ISR); + apic_update_ppr(apic); + apic_clear_irr(vector, apic); + return vector; +} diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 57525e7ed28..d576451827e 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -573,6 +573,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_svm; + if (irqchip_in_kernel(kvm)) { + err = kvm_create_lapic(&svm->vcpu); + if (err < 0) + goto free_svm; + } + page = alloc_page(GFP_KERNEL); if (!page) { err = -ENOMEM; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 19676b5a671..c4cc17cc00f 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -2390,6 +2390,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_vcpu; + if (irqchip_in_kernel(kvm)) { + err = kvm_create_lapic(&vmx->vcpu); + if (err < 0) + goto free_vcpu; + } + vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!vmx->guest_msrs) { err = -ENOMEM; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index bfe742b771f..997bb3e46f1 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -34,7 +34,7 @@ struct kvm_memory_alias { __u64 target_phys_addr; }; -/* for KVM_SET_IRQ_LEVEL */ +/* for KVM_IRQ_LINE */ struct kvm_irq_level { /* * ACPI gsi notion of irq. @@ -297,7 +297,7 @@ struct kvm_signal_mask { #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) -#define KVM_IRQ_LINE _IO(KVMIO, 0x61) +#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) /* * ioctls for vcpu fds -- cgit v1.2.3 From 1fd4f2a5ed8f80cf6e23d2bdf78554f6a1ac7997 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Wed, 18 Jul 2007 12:03:39 +0300 Subject: KVM: In-kernel I/O APIC model This allows in-kernel host-side device drivers to raise guest interrupts without going to userspace. [avi: fix level-triggered interrupt redelivery on eoi] [avi: add missing #include] [avi: avoid redelivery of edge-triggered interrupt] [avi: implement polarity] [avi: don't deliver edge-triggered interrupts when unmasking] [avi: fix host oops on invalid guest access] Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/Makefile | 2 +- drivers/kvm/ioapic.c | 388 +++++++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/irq.c | 4 - drivers/kvm/irq.h | 51 +++++++ drivers/kvm/kvm.h | 6 + drivers/kvm/kvm_main.c | 15 +- 6 files changed, 458 insertions(+), 8 deletions(-) create mode 100644 drivers/kvm/ioapic.c diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile index 3bf7276b032..e5a8f4d3e97 100644 --- a/drivers/kvm/Makefile +++ b/drivers/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o +kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c new file mode 100644 index 00000000000..c7992e667fd --- /dev/null +++ b/drivers/kvm/ioapic.c @@ -0,0 +1,388 @@ +/* + * Copyright (C) 2001 MandrakeSoft S.A. + * + * MandrakeSoft S.A. + * 43, rue d'Aboukir + * 75002 Paris - France + * http://www.linux-mandrake.com/ + * http://www.mandrakesoft.com/ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Yunhong Jiang + * Yaozu (Eddie) Dong + * Based on Xen 3.1 code. + */ + +#include "kvm.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "irq.h" +/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ +#define ioapic_debug(fmt, arg...) +static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq); + +static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, + unsigned long addr, + unsigned long length) +{ + unsigned long result = 0; + + switch (ioapic->ioregsel) { + case IOAPIC_REG_VERSION: + result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) + | (IOAPIC_VERSION_ID & 0xff)); + break; + + case IOAPIC_REG_APIC_ID: + case IOAPIC_REG_ARB_ID: + result = ((ioapic->id & 0xf) << 24); + break; + + default: + { + u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; + u64 redir_content; + + ASSERT(redir_index < IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[redir_index].bits; + result = (ioapic->ioregsel & 0x1) ? + (redir_content >> 32) & 0xffffffff : + redir_content & 0xffffffff; + break; + } + } + + return result; +} + +static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) +{ + union ioapic_redir_entry *pent; + + pent = &ioapic->redirtbl[idx]; + + if (!pent->fields.mask) { + ioapic_deliver(ioapic, idx); + if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) + pent->fields.remote_irr = 1; + } + if (!pent->fields.trig_mode) + ioapic->irr &= ~(1 << idx); +} + +static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) +{ + unsigned index; + + switch (ioapic->ioregsel) { + case IOAPIC_REG_VERSION: + /* Writes are ignored. */ + break; + + case IOAPIC_REG_APIC_ID: + ioapic->id = (val >> 24) & 0xf; + break; + + case IOAPIC_REG_ARB_ID: + break; + + default: + index = (ioapic->ioregsel - 0x10) >> 1; + + ioapic_debug("change redir index %x val %x", index, val); + if (index >= IOAPIC_NUM_PINS) + return; + if (ioapic->ioregsel & 1) { + ioapic->redirtbl[index].bits &= 0xffffffff; + ioapic->redirtbl[index].bits |= (u64) val << 32; + } else { + ioapic->redirtbl[index].bits &= ~0xffffffffULL; + ioapic->redirtbl[index].bits |= (u32) val; + ioapic->redirtbl[index].fields.remote_irr = 0; + } + if (ioapic->irr & (1 << index)) + ioapic_service(ioapic, index); + break; + } +} + +static void ioapic_inj_irq(struct kvm_ioapic *ioapic, + struct kvm_lapic *target, + u8 vector, u8 trig_mode, u8 delivery_mode) +{ + ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode, + delivery_mode); + + ASSERT((delivery_mode == dest_Fixed) || + (delivery_mode == dest_LowestPrio)); + + kvm_apic_set_irq(target, vector, trig_mode); +} + +static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, + u8 dest_mode) +{ + u32 mask = 0; + int i; + struct kvm *kvm = ioapic->kvm; + struct kvm_vcpu *vcpu; + + ioapic_debug("dest %d dest_mode %d", dest, dest_mode); + + if (dest_mode == 0) { /* Physical mode. */ + if (dest == 0xFF) { /* Broadcast. */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) + if (kvm->vcpus[i] && kvm->vcpus[i]->apic) + mask |= 1 << i; + return mask; + } + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (kvm_apic_match_physical_addr(vcpu->apic, dest)) { + if (vcpu->apic) + mask = 1 << i; + break; + } + } + } else if (dest != 0) /* Logical mode, MDA non-zero. */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = kvm->vcpus[i]; + if (!vcpu) + continue; + if (vcpu->apic && + kvm_apic_match_logical_addr(vcpu->apic, dest)) + mask |= 1 << vcpu->vcpu_id; + } + ioapic_debug("mask %x", mask); + return mask; +} + +static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq) +{ + u8 dest = ioapic->redirtbl[irq].fields.dest_id; + u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; + u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode; + u8 vector = ioapic->redirtbl[irq].fields.vector; + u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; + u32 deliver_bitmask; + struct kvm_lapic *target; + struct kvm_vcpu *vcpu; + int vcpu_id; + + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " + "vector=%x trig_mode=%x", + dest, dest_mode, delivery_mode, vector, trig_mode); + + deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode); + if (!deliver_bitmask) { + ioapic_debug("no target on destination"); + return; + } + + switch (delivery_mode) { + case dest_LowestPrio: + target = + kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask); + if (target != NULL) + ioapic_inj_irq(ioapic, target, vector, + trig_mode, delivery_mode); + else + ioapic_debug("null round robin: " + "mask=%x vector=%x delivery_mode=%x", + deliver_bitmask, vector, dest_LowestPrio); + break; + case dest_Fixed: + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) { + target = vcpu->apic; + ioapic_inj_irq(ioapic, target, vector, + trig_mode, delivery_mode); + } + } + break; + + /* TODO: NMI */ + default: + printk(KERN_WARNING "Unsupported delivery mode %d\n", + delivery_mode); + break; + } +} + +void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) +{ + u32 old_irr = ioapic->irr; + u32 mask = 1 << irq; + union ioapic_redir_entry entry; + + if (irq >= 0 && irq < IOAPIC_NUM_PINS) { + entry = ioapic->redirtbl[irq]; + level ^= entry.fields.polarity; + if (!level) + ioapic->irr &= ~mask; + else { + ioapic->irr |= mask; + if ((!entry.fields.trig_mode && old_irr != ioapic->irr) + || !entry.fields.remote_irr) + ioapic_service(ioapic, irq); + } + } +} + +static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector) +{ + int i; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) + if (ioapic->redirtbl[i].fields.vector == vector) + return i; + return -1; +} + +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->vioapic; + union ioapic_redir_entry *ent; + int gsi; + + gsi = get_eoi_gsi(ioapic, vector); + if (gsi == -1) { + printk(KERN_WARNING "Can't find redir item for %d EOI\n", + vector); + return; + } + + ent = &ioapic->redirtbl[gsi]; + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); + + ent->fields.remote_irr = 0; + if (!ent->fields.mask && (ioapic->irr & (1 << gsi))) + ioapic_deliver(ioapic, gsi); +} + +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) +{ + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + + return ((addr >= ioapic->base_address && + (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); +} + +static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, + void *val) +{ + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + u32 result; + + ioapic_debug("addr %lx", (unsigned long)addr); + ASSERT(!(addr & 0xf)); /* check alignment */ + + addr &= 0xff; + switch (addr) { + case IOAPIC_REG_SELECT: + result = ioapic->ioregsel; + break; + + case IOAPIC_REG_WINDOW: + result = ioapic_read_indirect(ioapic, addr, len); + break; + + default: + result = 0; + break; + } + switch (len) { + case 8: + *(u64 *) val = result; + break; + case 1: + case 2: + case 4: + memcpy(val, (char *)&result, len); + break; + default: + printk(KERN_WARNING "ioapic: wrong length %d\n", len); + } +} + +static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; + u32 data; + + ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n", + addr, len, val); + ASSERT(!(addr & 0xf)); /* check alignment */ + if (len == 4 || len == 8) + data = *(u32 *) val; + else { + printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); + return; + } + + addr &= 0xff; + switch (addr) { + case IOAPIC_REG_SELECT: + ioapic->ioregsel = data; + break; + + case IOAPIC_REG_WINDOW: + ioapic_write_indirect(ioapic, data); + break; + + default: + break; + } +} + +int kvm_ioapic_init(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic; + int i; + + ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); + if (!ioapic) + return -ENOMEM; + kvm->vioapic = ioapic; + for (i = 0; i < IOAPIC_NUM_PINS; i++) + ioapic->redirtbl[i].fields.mask = 1; + ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; + ioapic->dev.read = ioapic_mmio_read; + ioapic->dev.write = ioapic_mmio_write; + ioapic->dev.in_range = ioapic_in_range; + ioapic->dev.private = ioapic; + ioapic->kvm = kvm; + kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); + return 0; +} diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c index 0b4430a0cae..5265f8267b3 100644 --- a/drivers/kvm/irq.c +++ b/drivers/kvm/irq.c @@ -74,7 +74,3 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); } -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) -{ - /* TODO: for kernel IOAPIC */ -} diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index 57e23bdac53..6ed856a41e2 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -60,6 +60,50 @@ int kvm_pic_read_irq(struct kvm_pic *s); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); +#define IOAPIC_NUM_PINS 24 +#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ +#define IOAPIC_EDGE_TRIG 0 +#define IOAPIC_LEVEL_TRIG 1 + +#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000 +#define IOAPIC_MEM_LENGTH 0x100 + +/* Direct registers. */ +#define IOAPIC_REG_SELECT 0x00 +#define IOAPIC_REG_WINDOW 0x10 +#define IOAPIC_REG_EOI 0x40 /* IA64 IOSAPIC only */ + +/* Indirect registers. */ +#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ +#define IOAPIC_REG_VERSION 0x01 +#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ + +struct kvm_ioapic { + u64 base_address; + u32 ioregsel; + u32 id; + u32 irr; + u32 pad; + union ioapic_redir_entry { + u64 bits; + struct { + u8 vector; + u8 delivery_mode:3; + u8 dest_mode:1; + u8 delivery_status:1; + u8 polarity:1; + u8 remote_irr:1; + u8 trig_mode:1; + u8 mask:1; + u8 reserve:7; + u8 reserved[4]; + u8 dest_id; + } fields; + } redirtbl[IOAPIC_NUM_PINS]; + struct kvm_io_device dev; + struct kvm *kvm; +}; + struct kvm_lapic { unsigned long base_address; struct kvm_io_device dev; @@ -96,8 +140,15 @@ void kvm_free_apic(struct kvm_lapic *apic); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); +struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, + unsigned long bitmap); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector); +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); +int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig); +int kvm_ioapic_init(struct kvm *kvm); +void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); #endif diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index a5790cb21ff..8d07a993af9 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -410,6 +410,7 @@ struct kvm { struct kvm_io_bus mmio_bus; struct kvm_io_bus pio_bus; struct kvm_pic *vpic; + struct kvm_ioapic *vioapic; }; static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) @@ -417,6 +418,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) return kvm->vpic; } +static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) +{ + return kvm->vioapic; +} + static inline int irqchip_in_kernel(struct kvm *kvm) { return pic_irqchip(kvm) != 0; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 401e3cdc460..ffbdadd8797 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -381,6 +381,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); kfree(kvm->vpic); + kfree(kvm->vioapic); kvm_free_vcpus(kvm); kvm_free_physmem(kvm); kfree(kvm); @@ -2771,8 +2772,14 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_CREATE_IRQCHIP: r = -ENOMEM; kvm->vpic = kvm_create_pic(kvm); - if (kvm->vpic) - r = 0; + if (kvm->vpic) { + r = kvm_ioapic_init(kvm); + if (r) { + kfree(kvm->vpic); + kvm->vpic = NULL; + goto out; + } + } else goto out; break; @@ -2787,7 +2794,9 @@ static long kvm_vm_ioctl(struct file *filp, kvm_pic_set_irq(pic_irqchip(kvm), irq_event.irq, irq_event.level); - /* TODO: IOAPIC */ + kvm_ioapic_set_irq(kvm->vioapic, + irq_event.irq, + irq_event.level); r = 0; } break; -- cgit v1.2.3 From b6958ce44a11a9e9425d2b67a653b1ca2a27796f Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Wed, 18 Jul 2007 12:15:21 +0300 Subject: KVM: Emulate hlt in the kernel By sleeping in the kernel when hlt is executed, we simplify the in-kernel guest interrupt path considerably. Signed-off-by: Gregory Haskins Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/i8259.c | 3 +++ drivers/kvm/irq.c | 4 ++++ drivers/kvm/kvm.h | 2 ++ drivers/kvm/kvm_main.c | 41 +++++++++++++++++++++++++++++++++++------ drivers/kvm/svm.c | 9 ++++++--- drivers/kvm/vmx.c | 8 ++++++-- include/linux/kvm.h | 1 + 7 files changed, 57 insertions(+), 11 deletions(-) diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c index 40ad1046223..ee6030dc5c0 100644 --- a/drivers/kvm/i8259.c +++ b/drivers/kvm/i8259.c @@ -413,8 +413,11 @@ static void picdev_read(struct kvm_io_device *this, static void pic_irq_request(void *opaque, int level) { struct kvm *kvm = opaque; + struct kvm_vcpu *vcpu = kvm->vcpus[0]; pic_irqchip(kvm)->output = level; + if (vcpu) + kvm_vcpu_kick(vcpu); } struct kvm_pic *kvm_create_pic(struct kvm *kvm) diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c index 5265f8267b3..e09cd65925d 100644 --- a/drivers/kvm/irq.c +++ b/drivers/kvm/irq.c @@ -70,6 +70,10 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { int ipi_pcpu = vcpu->cpu; + if (waitqueue_active(&vcpu->wq)) { + wake_up_interruptible(&vcpu->wq); + ++vcpu->stat.halt_wakeup; + } if (vcpu->guest_mode) smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); } diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 8d07a993af9..bb506b71797 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -231,6 +231,7 @@ struct kvm_stat { u32 signal_exits; u32 irq_window_exits; u32 halt_exits; + u32 halt_wakeup; u32 request_irq_exits; u32 irq_exits; u32 light_exits; @@ -353,6 +354,7 @@ struct kvm_vcpu { gva_t mmio_fault_cr2; struct kvm_pio_request pio; void *pio_data; + wait_queue_head_t wq; int sigset_active; sigset_t sigset; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ffbdadd8797..4384364fc0c 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -76,6 +76,7 @@ static struct kvm_stats_debugfs_item { { "signal_exits", STAT_OFFSET(signal_exits) }, { "irq_window", STAT_OFFSET(irq_window_exits) }, { "halt_exits", STAT_OFFSET(halt_exits) }, + { "halt_wakeup", STAT_OFFSET(halt_wakeup) }, { "request_irq", STAT_OFFSET(request_irq_exits) }, { "irq_exits", STAT_OFFSET(irq_exits) }, { "light_exits", STAT_OFFSET(light_exits) }, @@ -248,6 +249,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->mmu.root_hpa = INVALID_PAGE; vcpu->kvm = kvm; vcpu->vcpu_id = id; + init_waitqueue_head(&vcpu->wq); page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { @@ -1307,15 +1309,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(emulate_instruction); -int kvm_emulate_halt(struct kvm_vcpu *vcpu) +/* + * The vCPU has executed a HLT instruction with in-kernel mode enabled. + */ +static void kvm_vcpu_kernel_halt(struct kvm_vcpu *vcpu) { - if (vcpu->irq_summary || - (irqchip_in_kernel(vcpu->kvm) && kvm_cpu_has_interrupt(vcpu))) - return 1; + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&vcpu->wq, &wait); + + /* + * We will block until either an interrupt or a signal wakes us up + */ + while(!(irqchip_in_kernel(vcpu->kvm) && kvm_cpu_has_interrupt(vcpu)) + && !vcpu->irq_summary + && !signal_pending(current)) { + set_current_state(TASK_INTERRUPTIBLE); + vcpu_put(vcpu); + schedule(); + vcpu_load(vcpu); + } - vcpu->run->exit_reason = KVM_EXIT_HLT; + remove_wait_queue(&vcpu->wq, &wait); + set_current_state(TASK_RUNNING); +} + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ ++vcpu->stat.halt_exits; - return 0; + if (irqchip_in_kernel(vcpu->kvm)) { + kvm_vcpu_kernel_halt(vcpu); + return 1; + } else { + vcpu->run->exit_reason = KVM_EXIT_HLT; + return 0; + } } EXPORT_SYMBOL_GPL(kvm_emulate_halt); @@ -2916,6 +2944,7 @@ static long kvm_dev_ioctl(struct file *filp, switch (ext) { case KVM_CAP_IRQCHIP: + case KVM_CAP_HLT: r = 1; break; default: diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index d576451827e..a347b61644c 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1398,9 +1398,12 @@ static void do_interrupt_requests(struct vcpu_svm *svm, static void post_kvm_run_save(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - kvm_run->ready_for_interrupt_injection - = (svm->vcpu.interrupt_window_open && - svm->vcpu.irq_summary == 0); + if (irqchip_in_kernel(svm->vcpu.kvm)) + kvm_run->ready_for_interrupt_injection = 1; + else + kvm_run->ready_for_interrupt_injection = + (svm->vcpu.interrupt_window_open && + svm->vcpu.irq_summary == 0); kvm_run->if_flag = (svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; kvm_run->cr8 = get_cr8(&svm->vcpu); kvm_run->apic_base = kvm_get_apic_base(&svm->vcpu); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c4cc17cc00f..7ec8cf84e6e 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1961,8 +1961,12 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; kvm_run->cr8 = get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); - kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && - vcpu->irq_summary == 0); + if (irqchip_in_kernel(vcpu->kvm)) + kvm_run->ready_for_interrupt_injection = 1; + else + kvm_run->ready_for_interrupt_injection = + (vcpu->interrupt_window_open && + vcpu->irq_summary == 0); } static int handle_interrupt_window(struct kvm_vcpu *vcpu, diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 997bb3e46f1..b0a13d1b34c 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -283,6 +283,7 @@ struct kvm_signal_mask { * Extension capability list. */ #define KVM_CAP_IRQCHIP 0 +#define KVM_CAP_HLT 1 /* * ioctls for VM fds -- cgit v1.2.3 From 9cf98828d12285d1fb43e774c8c100a55f8f34e1 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Sun, 22 Jul 2007 10:36:31 +0300 Subject: KVM: Protect in-kernel pio using kvm->lock pio operation and IRQ_LINE kvm_vm_ioctl is not kvm->lock protected. Add lock to same with IOAPIC MMIO operations. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 4384364fc0c..5063b3addbb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1790,6 +1790,7 @@ static void kernel_pio(struct kvm_io_device *pio_dev, { /* TODO: String I/O for in kernel device */ + mutex_lock(&vcpu->kvm->lock); if (vcpu->pio.in) kvm_iodevice_read(pio_dev, vcpu->pio.port, vcpu->pio.size, @@ -1798,6 +1799,7 @@ static void kernel_pio(struct kvm_io_device *pio_dev, kvm_iodevice_write(pio_dev, vcpu->pio.port, vcpu->pio.size, pd); + mutex_unlock(&vcpu->kvm->lock); } static void pio_string_write(struct kvm_io_device *pio_dev, @@ -1807,12 +1809,14 @@ static void pio_string_write(struct kvm_io_device *pio_dev, void *pd = vcpu->pio_data; int i; + mutex_lock(&vcpu->kvm->lock); for (i = 0; i < io->cur_count; i++) { kvm_iodevice_write(pio_dev, io->port, io->size, pd); pd += io->size; } + mutex_unlock(&vcpu->kvm->lock); } int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, @@ -2818,6 +2822,7 @@ static long kvm_vm_ioctl(struct file *filp, if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; if (irqchip_in_kernel(kvm)) { + mutex_lock(&kvm->lock); if (irq_event.irq < 16) kvm_pic_set_irq(pic_irqchip(kvm), irq_event.irq, @@ -2825,6 +2830,7 @@ static long kvm_vm_ioctl(struct file *filp, kvm_ioapic_set_irq(kvm->vioapic, irq_event.irq, irq_event.level); + mutex_unlock(&kvm->lock); r = 0; } break; -- cgit v1.2.3 From 6ceb9d791eeeb0a5493958f5d6d4dc7d91e59cf7 Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Thu, 26 Jul 2007 11:05:18 +0300 Subject: KVM: Add get/set irqchip ioctls for in-kernel PIC live migration support This patch adds two new ioctls to dump and write kernel irqchips for save/restore and live migration. PIC s/r and l/m is implemented in this patch. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/i8259.c | 5 +++ drivers/kvm/irq.h | 1 + drivers/kvm/kvm_main.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/kvm.h | 36 ++++++++++++++++++++++ 4 files changed, 124 insertions(+) diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c index ee6030dc5c0..a679157bc59 100644 --- a/drivers/kvm/i8259.c +++ b/drivers/kvm/i8259.c @@ -119,6 +119,11 @@ static void pic_update_irq(struct kvm_pic *s) s->irq_request(s->irq_request_opaque, 0); } +void kvm_pic_update_irq(struct kvm_pic *s) +{ + pic_update_irq(s); +} + void kvm_pic_set_irq(void *opaque, int irq, int level) { struct kvm_pic *s = opaque; diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index 6ed856a41e2..4034f6576cd 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -59,6 +59,7 @@ void kvm_pic_set_irq(void *opaque, int irq, int level); int kvm_pic_read_irq(struct kvm_pic *s); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); +void kvm_pic_update_irq(struct kvm_pic *s); #define IOAPIC_NUM_PINS 24 #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 5063b3addbb..6e2c5f3f33f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -897,6 +897,53 @@ out: return r; } +static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_PIC_MASTER: + memcpy (&chip->chip.pic, + &pic_irqchip(kvm)->pics[0], + sizeof(struct kvm_pic_state)); + break; + case KVM_IRQCHIP_PIC_SLAVE: + memcpy (&chip->chip.pic, + &pic_irqchip(kvm)->pics[1], + sizeof(struct kvm_pic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_PIC_MASTER: + memcpy (&pic_irqchip(kvm)->pics[0], + &chip->chip.pic, + sizeof(struct kvm_pic_state)); + break; + case KVM_IRQCHIP_PIC_SLAVE: + memcpy (&pic_irqchip(kvm)->pics[1], + &chip->chip.pic, + sizeof(struct kvm_pic_state)); + break; + default: + r = -EINVAL; + break; + } + kvm_pic_update_irq(pic_irqchip(kvm)); + return r; +} + static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) { int i; @@ -2835,6 +2882,41 @@ static long kvm_vm_ioctl(struct file *filp, } break; } + case KVM_GET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_get_irqchip(kvm, &chip); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &chip, sizeof chip)) + goto out; + r = 0; + break; + } + case KVM_SET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_set_irqchip(kvm, &chip); + if (r) + goto out; + r = 0; + break; + } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index b0a13d1b34c..6560f11870f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -45,6 +45,40 @@ struct kvm_irq_level { __u32 level; }; +/* for KVM_GET_IRQCHIP / KVM_SET_IRQCHIP */ +struct kvm_pic_state { + __u8 last_irr; /* edge detection */ + __u8 irr; /* interrupt request register */ + __u8 imr; /* interrupt mask register */ + __u8 isr; /* interrupt service register */ + __u8 priority_add; /* highest irq priority */ + __u8 irq_base; + __u8 read_reg_select; + __u8 poll; + __u8 special_mask; + __u8 init_state; + __u8 auto_eoi; + __u8 rotate_on_auto_eoi; + __u8 special_fully_nested_mode; + __u8 init4; /* true if 4 byte init */ + __u8 elcr; /* PIIX edge/trigger selection */ + __u8 elcr_mask; +}; + +enum kvm_irqchip_id { + KVM_IRQCHIP_PIC_MASTER = 0, + KVM_IRQCHIP_PIC_SLAVE = 1, +}; + +struct kvm_irqchip { + __u32 chip_id; + __u32 pad; + union { + char dummy[512]; /* reserving space */ + struct kvm_pic_state pic; + } chip; +}; + enum kvm_exit_reason { KVM_EXIT_UNKNOWN = 0, KVM_EXIT_EXCEPTION = 1, @@ -299,6 +333,8 @@ struct kvm_signal_mask { /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) +#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) +#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) /* * ioctls for vcpu fds -- cgit v1.2.3 From c52fb35a8b5dada749d35fbe15ac1f9857b22896 Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Thu, 2 Aug 2007 14:03:07 +0300 Subject: KVM: Bypass irq_pending get/set when using in kernel irqchip vcpu->irq_pending is saved in get/set_sreg IOCTL, but when in-kernel local APIC is used, doing this may occasionally overwrite vcpu->apic to an invalid value, as in the vm restore path. Signed-off-by: Qing He --- drivers/kvm/kvm_main.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 6e2c5f3f33f..c270e4afd3f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2145,8 +2145,12 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->efer = vcpu->shadow_efer; sregs->apic_base = kvm_get_apic_base(vcpu); - memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, - sizeof sregs->interrupt_bitmap); + if (irqchip_in_kernel(vcpu->kvm)) + memset(sregs->interrupt_bitmap, 0, + sizeof sregs->interrupt_bitmap); + else + memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, + sizeof sregs->interrupt_bitmap); vcpu_put(vcpu); @@ -2200,12 +2204,14 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, - sizeof vcpu->irq_pending); - vcpu->irq_summary = 0; - for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) - if (vcpu->irq_pending[i]) - __set_bit(i, &vcpu->irq_summary); + if (!irqchip_in_kernel(vcpu->kvm)) { + memcpy(vcpu->irq_pending, sregs->interrupt_bitmap, + sizeof vcpu->irq_pending); + vcpu->irq_summary = 0; + for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) + if (vcpu->irq_pending[i]) + __set_bit(i, &vcpu->irq_summary); + } set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); -- cgit v1.2.3 From 6bf9e962d14deb9e460afbbfd83ea2f450325c2d Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Sun, 5 Aug 2007 10:49:16 +0300 Subject: KVM: in-kernel IOAPIC save and restore support This patch adds support for in-kernel ioapic save and restore (to and from userspace). It uses the same get/set_irqchip ioctl as in-kernel PIC. Signed-off-by: Qing He Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Avi Kivity --- drivers/kvm/irq.h | 2 +- drivers/kvm/kvm_main.c | 10 ++++++++++ include/linux/kvm.h | 29 ++++++++++++++++++++++++++++- 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index 4034f6576cd..30adddcb182 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -61,7 +61,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *v); void kvm_pic_update_irq(struct kvm_pic *s); -#define IOAPIC_NUM_PINS 24 +#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ #define IOAPIC_EDGE_TRIG 0 #define IOAPIC_LEVEL_TRIG 1 diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index c270e4afd3f..61dff55f137 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -913,6 +913,11 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) &pic_irqchip(kvm)->pics[1], sizeof(struct kvm_pic_state)); break; + case KVM_IRQCHIP_IOAPIC: + memcpy (&chip->chip.ioapic, + ioapic_irqchip(kvm), + sizeof(struct kvm_ioapic_state)); + break; default: r = -EINVAL; break; @@ -936,6 +941,11 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) &chip->chip.pic, sizeof(struct kvm_pic_state)); break; + case KVM_IRQCHIP_IOAPIC: + memcpy (ioapic_irqchip(kvm), + &chip->chip.ioapic, + sizeof(struct kvm_ioapic_state)); + break; default: r = -EINVAL; break; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 6560f11870f..42d15150d7a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -45,7 +45,7 @@ struct kvm_irq_level { __u32 level; }; -/* for KVM_GET_IRQCHIP / KVM_SET_IRQCHIP */ +/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ struct kvm_pic_state { __u8 last_irr; /* edge detection */ __u8 irr; /* interrupt request register */ @@ -65,9 +65,35 @@ struct kvm_pic_state { __u8 elcr_mask; }; +#define KVM_IOAPIC_NUM_PINS 24 +struct kvm_ioapic_state { + __u64 base_address; + __u32 ioregsel; + __u32 id; + __u32 irr; + __u32 pad; + union { + __u64 bits; + struct { + __u8 vector; + __u8 delivery_mode:3; + __u8 dest_mode:1; + __u8 delivery_status:1; + __u8 polarity:1; + __u8 remote_irr:1; + __u8 trig_mode:1; + __u8 mask:1; + __u8 reserve:7; + __u8 reserved[4]; + __u8 dest_id; + } fields; + } redirtbl[KVM_IOAPIC_NUM_PINS]; +}; + enum kvm_irqchip_id { KVM_IRQCHIP_PIC_MASTER = 0, KVM_IRQCHIP_PIC_SLAVE = 1, + KVM_IRQCHIP_IOAPIC = 2, }; struct kvm_irqchip { @@ -76,6 +102,7 @@ struct kvm_irqchip { union { char dummy[512]; /* reserving space */ struct kvm_pic_state pic; + struct kvm_ioapic_state ioapic; } chip; }; -- cgit v1.2.3 From 96ad2cc6132479aa0aea485d0838a13fda765bd5 Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Thu, 6 Sep 2007 12:22:56 +0300 Subject: KVM: in-kernel LAPIC save and restore support This patch adds a new vcpu-based IOCTL to save and restore the local apic registers for a single vcpu. The kernel only copies the apic page as a whole, extraction of registers is left to userspace side. On restore, the APIC timer is restarted from the initial count, this introduces a little delay, but works fine. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/irq.h | 1 + drivers/kvm/kvm_main.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/kvm/lapic.c | 13 +++++++++++++ include/linux/kvm.h | 8 ++++++++ 4 files changed, 68 insertions(+) diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index 30adddcb182..24b871f9b5f 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -149,6 +149,7 @@ int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig); +void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 61dff55f137..a012d70d9ef 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2642,6 +2642,27 @@ static int kvm_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } +static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s) +{ + vcpu_load(vcpu); + memcpy(s->regs, vcpu->apic->regs, sizeof *s); + vcpu_put(vcpu); + + return 0; +} + +static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s) +{ + vcpu_load(vcpu); + memcpy(vcpu->apic->regs, s->regs, sizeof *s); + kvm_apic_post_state_restore(vcpu); + vcpu_put(vcpu); + + return 0; +} + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2811,6 +2832,31 @@ static long kvm_vcpu_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_LAPIC: { + struct kvm_lapic_state lapic; + + memset(&lapic, 0, sizeof lapic); + r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &lapic, sizeof lapic)) + goto out; + r = 0; + break; + } + case KVM_SET_LAPIC: { + struct kvm_lapic_state lapic; + + r = -EFAULT; + if (copy_from_user(&lapic, argp, sizeof lapic)) + goto out; + r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);; + if (r) + goto out; + r = 0; + break; + } default: ; } diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index 4b5c77d8900..df636bf1979 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c @@ -931,3 +931,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) apic_clear_irr(vector, apic); return vector; } + +void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->apic; + + apic->base_address = vcpu->apic_base & + MSR_IA32_APICBASE_BASE; + apic_set_reg(apic, APIC_LVR, APIC_VERSION); + apic_update_ppr(apic); + hrtimer_cancel(&apic->timer.dev); + update_divide_count(apic); + start_apic_timer(apic); +} diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 42d15150d7a..30a83696906 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -208,6 +208,12 @@ struct kvm_fpu { __u32 pad2; }; +/* for KVM_GET_LAPIC and KVM_SET_LAPIC */ +#define KVM_APIC_REG_SIZE 0x400 +struct kvm_lapic_state { + char regs[KVM_APIC_REG_SIZE]; +}; + struct kvm_segment { __u64 base; __u32 limit; @@ -380,5 +386,7 @@ struct kvm_signal_mask { #define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) #define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) #define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) +#define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state) +#define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state) #endif -- cgit v1.2.3 From 2a8067f17b8442ecce0b14e134823020ff33b4fa Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Mon, 6 Aug 2007 16:29:07 +0300 Subject: KVM: pending irq save/restore Add in kernel irqchip save/restore support for pending vectors. [avi: fix compile warning on i386] [avi: remove printk] Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 ++ drivers/kvm/kvm_main.c | 20 +++++++++++++++++--- drivers/kvm/svm.c | 19 +++++++++++++++++++ drivers/kvm/vmx.c | 16 ++++++++++++++++ 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index bb506b71797..f8fe87d3ddb 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -490,6 +490,8 @@ struct kvm_arch_ops { void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); + int (*get_irq)(struct kvm_vcpu *vcpu); + void (*set_irq)(struct kvm_vcpu *vcpu, int vec); }; extern struct kvm_arch_ops *kvm_arch_ops; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index a012d70d9ef..d56964a6eb8 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2126,6 +2126,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct descriptor_table dt; + int pending_vec; vcpu_load(vcpu); @@ -2155,10 +2156,13 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->efer = vcpu->shadow_efer; sregs->apic_base = kvm_get_apic_base(vcpu); - if (irqchip_in_kernel(vcpu->kvm)) + if (irqchip_in_kernel(vcpu->kvm)) { memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); - else + pending_vec = kvm_arch_ops->get_irq(vcpu); + if (pending_vec >= 0) + set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap); + } else memcpy(sregs->interrupt_bitmap, vcpu->irq_pending, sizeof sregs->interrupt_bitmap); @@ -2177,7 +2181,7 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { int mmu_reset_needed = 0; - int i; + int i, pending_vec, max_bits; struct descriptor_table dt; vcpu_load(vcpu); @@ -2221,6 +2225,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, for (i = 0; i < ARRAY_SIZE(vcpu->irq_pending); ++i) if (vcpu->irq_pending[i]) __set_bit(i, &vcpu->irq_summary); + } else { + max_bits = (sizeof sregs->interrupt_bitmap) << 3; + pending_vec = find_first_bit( + (const unsigned long *)sregs->interrupt_bitmap, + max_bits); + /* Only pending external irq is handled here */ + if (pending_vec < max_bits) { + kvm_arch_ops->set_irq(vcpu, pending_vec); + printk("Set back pending irq %d\n", pending_vec); + } } set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index a347b61644c..c8cd242f36f 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -843,6 +843,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) return -EOPNOTSUPP; } +static int svm_get_irq(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u32 exit_int_info = svm->vmcb->control.exit_int_info; + + if (is_external_interrupt(exit_int_info)) + return exit_int_info & SVM_EVTINJ_VEC_MASK; + return -1; +} + static void load_host_msrs(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 @@ -1310,6 +1320,13 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); } +static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm_inject_irq(svm, irq); +} + static void svm_intr_assist(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; @@ -1783,6 +1800,8 @@ static struct kvm_arch_ops svm_arch_ops = { .run = svm_vcpu_run, .skip_emulated_instruction = skip_emulated_instruction, .patch_hypercall = svm_patch_hypercall, + .get_irq = svm_get_irq, + .set_irq = svm_set_irq, }; static int __init svm_init(void) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 7ec8cf84e6e..6c371ea2104 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -790,6 +790,20 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) return 0; } +static int vmx_get_irq(struct kvm_vcpu *vcpu) +{ + u32 idtv_info_field; + + idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD); + if (idtv_info_field & INTR_INFO_VALID_MASK) { + if (is_external_interrupt(idtv_info_field)) + return idtv_info_field & VECTORING_INFO_VECTOR_MASK; + else + printk("pending exception: not handled yet\n"); + } + return -1; +} + static __init int cpu_has_kvm_support(void) { unsigned long ecx = cpuid_ecx(1); @@ -2500,6 +2514,8 @@ static struct kvm_arch_ops vmx_arch_ops = { .run = vmx_vcpu_run, .skip_emulated_instruction = skip_emulated_instruction, .patch_hypercall = vmx_patch_hypercall, + .get_irq = vmx_get_irq, + .set_irq = vmx_inject_irq, }; static int __init vmx_init(void) -- cgit v1.2.3 From 6e5d865c0b9679b00b5e5f0754c9fc2b6b9894d6 Mon Sep 17 00:00:00 2001 From: "Yang, Sheng" Date: Wed, 12 Sep 2007 18:03:11 +0800 Subject: KVM: VMX: Use shadow TPR/cr8 for 64-bits guests This patch enables TPR shadow of VMX on CR8 access. 64bit Windows using CR8 access TPR frequently. The TPR shadow can improve the performance of access TPR by not causing vmexit. Signed-off-by: Sheng Yang Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/irq.h | 2 ++ drivers/kvm/lapic.c | 17 ++++++++++++++ drivers/kvm/vmx.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++---- drivers/kvm/vmx.h | 1 + 4 files changed, 81 insertions(+), 4 deletions(-) diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index 24b871f9b5f..07035e8279d 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -152,5 +152,7 @@ int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); +int kvm_lapic_enabled(struct kvm_vcpu *vcpu); +int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); #endif diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index df636bf1979..68bbbb38eda 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c @@ -170,6 +170,19 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) return result; } +int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic; + int highest_irr; + + if (!apic) + return 0; + highest_irr = apic_find_highest_irr(apic); + + return highest_irr; +} +EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); + int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig) { if (!apic_test_and_set_irr(vec, apic)) { @@ -483,6 +496,7 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) break; default: + apic_update_ppr(apic); val = apic_get_reg(apic, offset); break; } @@ -723,6 +737,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) return (tpr & 0xf0) >> 4; } +EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8); void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) { @@ -809,6 +824,7 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu) return ret; } +EXPORT_SYMBOL_GPL(kvm_lapic_enabled); /* *---------------------------------------------------------------------- @@ -911,6 +927,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) if (!apic || !apic_enabled(apic)) return -1; + apic_update_ppr(apic); highest_irr = apic_find_highest_irr(apic); if ((highest_irr == -1) || ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 6c371ea2104..5c2c6e71abf 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -170,6 +170,16 @@ static inline int is_external_interrupt(u32 intr_info) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } +static inline int cpu_has_vmx_tpr_shadow(void) +{ + return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); +} + +static inline int vm_need_tpr_shadow(struct kvm *kvm) +{ + return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm))); +} + static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -888,10 +898,19 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING; +#ifdef CONFIG_X86_64 + opt = CPU_BASED_TPR_SHADOW; +#else opt = 0; +#endif if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS, &_cpu_based_exec_control) < 0) return -EIO; +#ifdef CONFIG_X86_64 + if ((_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) + _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING & + ~CPU_BASED_CR8_STORE_EXITING; +#endif min = 0; #ifdef CONFIG_X86_64 @@ -1384,6 +1403,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) int ret = 0; unsigned long kvm_vmx_return; u64 msr; + u32 exec_control; if (!init_rmode_tss(vmx->vcpu.kvm)) { ret = -ENOMEM; @@ -1459,8 +1479,16 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) /* Control */ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmcs_config.pin_based_exec_ctrl); - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_config.cpu_based_exec_ctrl); + + exec_control = vmcs_config.cpu_based_exec_ctrl; + if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { + exec_control &= ~CPU_BASED_TPR_SHADOW; +#ifdef CONFIG_X86_64 + exec_control |= CPU_BASED_CR8_STORE_EXITING | + CPU_BASED_CR8_LOAD_EXITING; +#endif + } + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0); vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0); @@ -1532,8 +1560,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ #ifdef CONFIG_X86_64 - vmcs_writel(VIRTUAL_APIC_PAGE_ADDR, 0); - vmcs_writel(TPR_THRESHOLD, 0); + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); + if (vm_need_tpr_shadow(vmx->vcpu.kvm)) + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, + page_to_phys(vmx->vcpu.apic->regs_page)); + vmcs_write32(TPR_THRESHOLD, 0); #endif vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); @@ -1969,6 +2000,12 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return 1; +} + static void post_kvm_run_save(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { @@ -2036,6 +2073,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, [EXIT_REASON_HLT] = handle_halt, [EXIT_REASON_VMCALL] = handle_vmcall, + [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold }; static const int kvm_vmx_max_exit_handlers = @@ -2083,6 +2121,23 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { } +static void update_tpr_threshold(struct kvm_vcpu *vcpu) +{ + int max_irr, tpr; + + if (!vm_need_tpr_shadow(vcpu->kvm)) + return; + + if (!kvm_lapic_enabled(vcpu) || + ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) { + vmcs_write32(TPR_THRESHOLD, 0); + return; + } + + tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4; + vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); +} + static void enable_irq_window(struct kvm_vcpu *vcpu) { u32 cpu_based_vm_exec_control; @@ -2097,6 +2152,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) u32 idtv_info_field, intr_info_field; int has_ext_irq, interrupt_window_open; + update_tpr_threshold(vcpu); + has_ext_irq = kvm_cpu_has_interrupt(vcpu); intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD); diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h index 35d0b58c0a0..fd4e1466608 100644 --- a/drivers/kvm/vmx.h +++ b/drivers/kvm/vmx.h @@ -213,6 +213,7 @@ enum vmcs_field { #define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 /* * Interruption-information format -- cgit v1.2.3 From 1b9778dae71dc64d3678d766c0f1fbed79c80f9f Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Mon, 3 Sep 2007 16:56:58 +0300 Subject: KVM: Keep track of missed timer irq injections APIC timer IRQ is set every time when a certain period expires at host time, but the guest may be descheduled at that time and thus the irq be overwritten by later fire. This patch keep track of firing irq numbers and decrease only when the IRQ is injected to guest or buffered in APIC. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/irq.c | 13 +++++++++++ drivers/kvm/irq.h | 4 ++++ drivers/kvm/kvm_main.c | 2 ++ drivers/kvm/lapic.c | 58 +++++++++++++++++++++++++++++++++----------------- drivers/kvm/svm.c | 7 ++++-- drivers/kvm/vmx.c | 10 ++++++--- 6 files changed, 69 insertions(+), 25 deletions(-) diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c index e09cd65925d..b88e5011558 100644 --- a/drivers/kvm/irq.c +++ b/drivers/kvm/irq.c @@ -78,3 +78,16 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); } +void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) +{ + kvm_inject_apic_timer_irqs(vcpu); + /* TODO: PIT, RTC etc. */ +} +EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); + +void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec) +{ + kvm_apic_timer_intr_post(vcpu, vec); + /* TODO: PIT, RTC etc. */ +} +EXPORT_SYMBOL_GPL(kvm_timer_intr_post); diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index 07035e8279d..87baf7e69ea 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -154,5 +154,9 @@ int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); +void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); +void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); +void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); +void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); #endif diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index d56964a6eb8..8f8bfc9160e 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -283,6 +283,8 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) { kvm_mmu_destroy(vcpu); + if (vcpu->apic) + hrtimer_cancel(&vcpu->apic->timer.dev); kvm_free_apic(vcpu->apic); free_page((unsigned long)vcpu->pio_data); free_page((unsigned long)vcpu->run); diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index 68bbbb38eda..490d4939dba 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c @@ -313,6 +313,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode) { int result = 0; + int orig_irr; switch (delivery_mode) { case APIC_DM_FIXED: @@ -321,7 +322,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (unlikely(!apic_enabled(apic))) break; - if (apic_test_and_set_irr(vector, apic) && trig_mode) { + orig_irr = apic_test_and_set_irr(vector, apic); + if (orig_irr && trig_mode) { apic_debug("level trig mode repeatedly for vector %d", vector); break; @@ -335,7 +337,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, kvm_vcpu_kick(apic->vcpu); - result = 1; + result = (orig_irr == 0); break; case APIC_DM_REMRD: @@ -831,38 +833,33 @@ EXPORT_SYMBOL_GPL(kvm_lapic_enabled); * timer interface *---------------------------------------------------------------------- */ + +/* TODO: make sure __apic_timer_fn runs in current pCPU */ static int __apic_timer_fn(struct kvm_lapic *apic) { - u32 vector; int result = 0; + wait_queue_head_t *q = &apic->vcpu->wq; - if (unlikely(!apic_enabled(apic) || - !apic_lvt_enabled(apic, APIC_LVTT))) { - apic_debug("%s: time interrupt although apic is down\n", - __FUNCTION__); - return 0; - } - - vector = apic_lvt_vector(apic, APIC_LVTT); - apic->timer.last_update = apic->timer.dev.expires; atomic_inc(&apic->timer.pending); - __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); - + if (waitqueue_active(q)) + wake_up_interruptible(q); if (apic_lvtt_period(apic)) { - u32 offset; - u32 tmict = apic_get_reg(apic, APIC_TMICT); - - offset = APIC_BUS_CYCLE_NS * apic->timer.divide_count * tmict; - result = 1; apic->timer.dev.expires = ktime_add_ns( apic->timer.dev.expires, apic->timer.period); } - return result; } +static int __inject_apic_timer_irq(struct kvm_lapic *apic) +{ + int vector; + + vector = apic_lvt_vector(apic, APIC_LVTT); + return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); +} + static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) { struct kvm_lapic *apic; @@ -935,6 +932,27 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) return highest_irr; } +void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->apic; + + if (apic && apic_lvt_enabled(apic, APIC_LVTT) && + atomic_read(&apic->timer.pending) > 0) { + if (__inject_apic_timer_irq(apic)) + atomic_dec(&apic->timer.pending); + } +} + +void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec) +{ + struct kvm_lapic *apic = vcpu->apic; + + if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec) + apic->timer.last_update = ktime_add_ns( + apic->timer.last_update, + apic->timer.period); +} + int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { int vector = kvm_apic_has_interrupt(vcpu); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index c8cd242f36f..00119ec4166 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1331,7 +1331,9 @@ static void svm_intr_assist(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; int intr_vector = -1; + struct kvm_vcpu *vcpu = &svm->vcpu; + kvm_inject_pending_timer_irqs(vcpu); if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) { intr_vector = vmcb->control.exit_int_info & @@ -1344,7 +1346,7 @@ static void svm_intr_assist(struct vcpu_svm *svm) if (vmcb->control.int_ctl & V_IRQ_MASK) return; - if (!kvm_cpu_has_interrupt(&svm->vcpu)) + if (!kvm_cpu_has_interrupt(vcpu)) return; if (!(vmcb->save.rflags & X86_EFLAGS_IF) || @@ -1356,8 +1358,9 @@ static void svm_intr_assist(struct vcpu_svm *svm) return; } /* Okay, we can deliver the interrupt: grab it and update PIC state. */ - intr_vector = kvm_cpu_get_interrupt(&svm->vcpu); + intr_vector = kvm_cpu_get_interrupt(vcpu); svm_inject_irq(svm, intr_vector); + kvm_timer_intr_post(vcpu, intr_vector); } static void kvm_reput_irq(struct vcpu_svm *svm) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 5c2c6e71abf..eeecadf5da4 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -2151,7 +2151,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) { u32 idtv_info_field, intr_info_field; int has_ext_irq, interrupt_window_open; + int vector; + kvm_inject_pending_timer_irqs(vcpu); update_tpr_threshold(vcpu); has_ext_irq = kvm_cpu_has_interrupt(vcpu); @@ -2183,9 +2185,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) interrupt_window_open = ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); - if (interrupt_window_open) - vmx_inject_irq(vcpu, kvm_cpu_get_interrupt(vcpu)); - else + if (interrupt_window_open) { + vector = kvm_cpu_get_interrupt(vcpu); + vmx_inject_irq(vcpu, vector); + kvm_timer_intr_post(vcpu, vector); + } else enable_irq_window(vcpu); } -- cgit v1.2.3 From a3d7f85f471f889e4477863a7ca42828ae74e77d Mon Sep 17 00:00:00 2001 From: Eddie Dong Date: Mon, 3 Sep 2007 16:15:12 +0300 Subject: KVM: Migrate lapic hrtimer when vcpu moves to another cpu This reduces overhead by accessing cachelines from the wrong node, as well as simplifying locking. [Qing: fix for inactive or expired one-shot timer] Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/irq.h | 1 + drivers/kvm/lapic.c | 14 ++++++++++++++ drivers/kvm/svm.c | 1 + drivers/kvm/vmx.c | 4 +++- 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index 87baf7e69ea..f324cfb4008 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -158,5 +158,6 @@ void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); +void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); #endif diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index 490d4939dba..2706ec36c25 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c @@ -979,3 +979,17 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) update_divide_count(apic); start_apic_timer(apic); } + +void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->apic; + struct hrtimer *timer; + + if (!apic) + return; + + timer = &apic->timer.dev; + if (hrtimer_cancel(timer)) + hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); +} +EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 00119ec4166..3de9ec35ebf 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -633,6 +633,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) delta = vcpu->host_tsc - tsc_this; svm->vmcb->control.tsc_offset += delta; vcpu->cpu = cpu; + kvm_migrate_apic_timer(vcpu); } for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index eeecadf5da4..f4618b9edf9 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -441,8 +441,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) u64 phys_addr = __pa(vmx->vmcs); u64 tsc_this, delta; - if (vcpu->cpu != cpu) + if (vcpu->cpu != cpu) { vcpu_clear(vmx); + kvm_migrate_apic_timer(vcpu); + } if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { u8 error; -- cgit v1.2.3 From 5cd4f6fd8506f59cb8a232f364c54f6bd8e5150a Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Thu, 30 Aug 2007 17:04:26 +0800 Subject: KVM: disable tpr/cr8 sync when in-kernel APIC is used Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 8f8bfc9160e..02af24e8350 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2005,7 +2005,8 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); /* re-sync apic's tpr */ - set_cr8(vcpu, kvm_run->cr8); + if (!irqchip_in_kernel(vcpu->kvm)) + set_cr8(vcpu, kvm_run->cr8); if (vcpu->pio.cur_count) { r = complete_pio(vcpu); -- cgit v1.2.3 From 40487c680d5855459dfdce340df13d40071bb774 Mon Sep 17 00:00:00 2001 From: Qing He Date: Mon, 17 Sep 2007 14:47:13 +0800 Subject: KVM: deliver PIC interrupt only to vcpu0 This patch changes the PIC interrupts delivery. Now it is only delivered to vcpu0 when either condition is met (on vcpu0): 1. local APIC is hardware disabled 2. LVT0 is unmasked and configured to delivery mode ExtInt It fixes the 2x faster wall clock on x86_64 and SMP i386 Linux guests Signed-off-by: Eddie (Yaozu) Dong Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/irq.c | 15 ++++++++++----- drivers/kvm/irq.h | 1 + drivers/kvm/lapic.c | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c index b88e5011558..7628c7ff628 100644 --- a/drivers/kvm/irq.c +++ b/drivers/kvm/irq.c @@ -33,8 +33,11 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) struct kvm_pic *s; if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ - s = pic_irqchip(v->kvm); /* PIC */ - return s->output; + if (kvm_apic_accept_pic_intr(v)) { + s = pic_irqchip(v->kvm); /* PIC */ + return s->output; + } else + return 0; } return 1; } @@ -50,9 +53,11 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) vector = kvm_get_apic_interrupt(v); /* APIC */ if (vector == -1) { - s = pic_irqchip(v->kvm); - s->output = 0; /* PIC */ - vector = kvm_pic_read_irq(s); + if (kvm_apic_accept_pic_intr(v)) { + s = pic_irqchip(v->kvm); + s->output = 0; /* PIC */ + vector = kvm_pic_read_irq(s); + } } return vector; } diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index f324cfb4008..ec46a09e213 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -135,6 +135,7 @@ do { \ void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); int kvm_create_lapic(struct kvm_vcpu *vcpu); void kvm_free_apic(struct kvm_lapic *apic); diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index 2706ec36c25..01e769672dc 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c @@ -788,6 +788,8 @@ static void lapic_reset(struct kvm_vcpu *vcpu) for (i = 0; i < APIC_LVT_NUM; i++) apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED); + apic_set_reg(apic, APIC_LVT0, + SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_set_reg(apic, APIC_DFR, 0xffffffffU); apic_set_reg(apic, APIC_SPIV, 0xff); @@ -932,6 +934,21 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) return highest_irr; } +int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) +{ + u32 lvt0 = apic_get_reg(vcpu->apic, APIC_LVT0); + int r = 0; + + if (vcpu->vcpu_id == 0) { + if (!apic_hw_enabled(vcpu->apic)) + r = 1; + if ((lvt0 & APIC_LVT_MASKED) == 0 && + GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) + r = 1; + } + return r; +} + void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->apic; -- cgit v1.2.3 From 932f72adbe76f098922c746737cb0bd75fc21e27 Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Mon, 3 Sep 2007 17:01:36 +0300 Subject: KVM: round robin for APIC lowest priority delivery mode Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/lapic.c | 33 ++++++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index f8fe87d3ddb..dbb929d8a31 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -413,6 +413,7 @@ struct kvm { struct kvm_io_bus pio_bus; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; + int round_robin_prev_vcpu; }; static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index 01e769672dc..ca1db3852ac 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c @@ -371,12 +371,35 @@ struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, unsigned long bitmap) { int vcpu_id; + int last; + int next; + struct kvm_lapic *apic; + + last = kvm->round_robin_prev_vcpu; + next = last; + + do { + if (++next == KVM_MAX_VCPUS) + next = 0; + if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap)) + continue; + apic = kvm->vcpus[next]->apic; + if (apic && apic_enabled(apic)) + break; + apic = NULL; + } while (next != last); + kvm->round_robin_prev_vcpu = next; + + if (!apic) { + vcpu_id = ffs(bitmap) - 1; + if (vcpu_id < 0) { + vcpu_id = 0; + printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n"); + } + apic = kvm->vcpus[vcpu_id]->apic; + } - /* TODO for real round robin */ - vcpu_id = fls(bitmap) - 1; - if (vcpu_id < 0) - printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n"); - return kvm->vcpus[vcpu_id]->apic; + return apic; } static void apic_set_eoi(struct kvm_lapic *apic) -- cgit v1.2.3 From c5ec153402b6d276fe20029da1059ba42a4b55e5 Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Mon, 3 Sep 2007 17:07:41 +0300 Subject: KVM: enable in-kernel APIC INIT/SIPI handling This patch enables INIT/SIPI handling using in-kernel APIC by introducing a ->mp_state field to emulate the SMP state transition. [avi: remove smp_processor_id() warning] Signed-off-by: Qing He Signed-off-by: Xin Li Signed-off-by: Avi Kivity --- drivers/kvm/irq.h | 1 + drivers/kvm/kvm.h | 7 +++++++ drivers/kvm/kvm_main.c | 26 ++++++++++++++++++++------ drivers/kvm/lapic.c | 43 ++++++++++++++++++++++++++++++++++++------- drivers/kvm/vmx.c | 24 +++++++++++++++++++++--- 5 files changed, 85 insertions(+), 16 deletions(-) diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h index ec46a09e213..11fc014e2b3 100644 --- a/drivers/kvm/irq.h +++ b/drivers/kvm/irq.h @@ -138,6 +138,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); int kvm_create_lapic(struct kvm_vcpu *vcpu); +void kvm_lapic_reset(struct kvm_vcpu *vcpu); void kvm_free_apic(struct kvm_lapic *apic); u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index dbb929d8a31..5e318b6e215 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -326,6 +326,13 @@ struct kvm_vcpu { u64 shadow_efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ +#define VCPU_MP_STATE_RUNNABLE 0 +#define VCPU_MP_STATE_UNINITIALIZED 1 +#define VCPU_MP_STATE_INIT_RECEIVED 2 +#define VCPU_MP_STATE_SIPI_RECEIVED 3 +#define VCPU_MP_STATE_HALTED 4 + int mp_state; + int sipi_vector; u64 ia32_misc_enable_msr; struct kvm_mmu mmu; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 02af24e8350..d0a5a2b3d59 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -249,6 +249,10 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->mmu.root_hpa = INVALID_PAGE; vcpu->kvm = kvm; vcpu->vcpu_id = id; + if (!irqchip_in_kernel(kvm) || id == 0) + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; + else + vcpu->mp_state = VCPU_MP_STATE_UNINITIALIZED; init_waitqueue_head(&vcpu->wq); page = alloc_page(GFP_KERNEL | __GFP_ZERO); @@ -1371,7 +1375,7 @@ EXPORT_SYMBOL_GPL(emulate_instruction); /* * The vCPU has executed a HLT instruction with in-kernel mode enabled. */ -static void kvm_vcpu_kernel_halt(struct kvm_vcpu *vcpu) +static void kvm_vcpu_block(struct kvm_vcpu *vcpu) { DECLARE_WAITQUEUE(wait, current); @@ -1380,24 +1384,28 @@ static void kvm_vcpu_kernel_halt(struct kvm_vcpu *vcpu) /* * We will block until either an interrupt or a signal wakes us up */ - while(!(irqchip_in_kernel(vcpu->kvm) && kvm_cpu_has_interrupt(vcpu)) - && !vcpu->irq_summary - && !signal_pending(current)) { + while (!kvm_cpu_has_interrupt(vcpu) + && !signal_pending(current) + && vcpu->mp_state != VCPU_MP_STATE_RUNNABLE + && vcpu->mp_state != VCPU_MP_STATE_SIPI_RECEIVED) { set_current_state(TASK_INTERRUPTIBLE); vcpu_put(vcpu); schedule(); vcpu_load(vcpu); } + __set_current_state(TASK_RUNNING); remove_wait_queue(&vcpu->wq, &wait); - set_current_state(TASK_RUNNING); } int kvm_emulate_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; if (irqchip_in_kernel(vcpu->kvm)) { - kvm_vcpu_kernel_halt(vcpu); + vcpu->mp_state = VCPU_MP_STATE_HALTED; + kvm_vcpu_block(vcpu); + if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE) + return -EINTR; return 1; } else { vcpu->run->exit_reason = KVM_EXIT_HLT; @@ -2001,6 +2009,12 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); + if (unlikely(vcpu->mp_state == VCPU_MP_STATE_UNINITIALIZED)) { + kvm_vcpu_block(vcpu); + vcpu_put(vcpu); + return -EAGAIN; + } + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c index ca1db3852ac..a190587cf6a 100644 --- a/drivers/kvm/lapic.c +++ b/drivers/kvm/lapic.c @@ -312,8 +312,8 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode) { - int result = 0; - int orig_irr; + int orig_irr, result = 0; + struct kvm_vcpu *vcpu = apic->vcpu; switch (delivery_mode) { case APIC_DM_FIXED: @@ -335,7 +335,13 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, } else apic_clear_vector(vector, apic->regs + APIC_TMR); - kvm_vcpu_kick(apic->vcpu); + if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE) + kvm_vcpu_kick(vcpu); + else if (vcpu->mp_state == VCPU_MP_STATE_HALTED) { + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + } result = (orig_irr == 0); break; @@ -352,11 +358,30 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; case APIC_DM_INIT: - printk(KERN_DEBUG "Ignoring guest INIT\n"); + if (level) { + if (vcpu->mp_state == VCPU_MP_STATE_RUNNABLE) + printk(KERN_DEBUG + "INIT on a runnable vcpu %d\n", + vcpu->vcpu_id); + vcpu->mp_state = VCPU_MP_STATE_INIT_RECEIVED; + kvm_vcpu_kick(vcpu); + } else { + printk(KERN_DEBUG + "Ignoring de-assert INIT to vcpu %d\n", + vcpu->vcpu_id); + } + break; case APIC_DM_STARTUP: - printk(KERN_DEBUG "Ignoring guest STARTUP\n"); + printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n", + vcpu->vcpu_id, vector); + if (vcpu->mp_state == VCPU_MP_STATE_INIT_RECEIVED) { + vcpu->sipi_vector = vector; + vcpu->mp_state = VCPU_MP_STATE_SIPI_RECEIVED; + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + } break; default: @@ -792,7 +817,7 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_get_base); -static void lapic_reset(struct kvm_vcpu *vcpu) +void kvm_lapic_reset(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic; int i; @@ -839,6 +864,7 @@ static void lapic_reset(struct kvm_vcpu *vcpu) vcpu, kvm_apic_id(apic), vcpu->apic_base, apic->base_address); } +EXPORT_SYMBOL_GPL(kvm_lapic_reset); int kvm_lapic_enabled(struct kvm_vcpu *vcpu) { @@ -867,7 +893,10 @@ static int __apic_timer_fn(struct kvm_lapic *apic) atomic_inc(&apic->timer.pending); if (waitqueue_active(q)) + { + apic->vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; wake_up_interruptible(q); + } if (apic_lvtt_period(apic)) { result = 1; apic->timer.dev.expires = ktime_add_ns( @@ -928,7 +957,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) apic->base_address = APIC_DEFAULT_PHYS_BASE; vcpu->apic_base = APIC_DEFAULT_PHYS_BASE; - lapic_reset(vcpu); + kvm_lapic_reset(vcpu); apic->dev.read = apic_mmio_read; apic->dev.write = apic_mmio_write; apic->dev.in_range = apic_mmio_range; diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index f4618b9edf9..440cacfda89 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1412,6 +1412,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) goto out; } + vmx->vcpu.rmode.active = 0; + vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val(); set_cr8(&vmx->vcpu, 0); msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; @@ -1425,8 +1427,13 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. */ - vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_writel(GUEST_CS_BASE, 0x000f0000); + if (vmx->vcpu.vcpu_id == 0) { + vmcs_write16(GUEST_CS_SELECTOR, 0xf000); + vmcs_writel(GUEST_CS_BASE, 0x000f0000); + } else { + vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8); + vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12); + } vmcs_write32(GUEST_CS_LIMIT, 0xffff); vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); @@ -1451,7 +1458,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(GUEST_SYSENTER_EIP, 0); vmcs_writel(GUEST_RFLAGS, 0x02); - vmcs_writel(GUEST_RIP, 0xfff0); + if (vmx->vcpu.vcpu_id == 0) + vmcs_writel(GUEST_RIP, 0xfff0); + else + vmcs_writel(GUEST_RIP, 0); vmcs_writel(GUEST_RSP, 0); //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 @@ -2201,6 +2211,14 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) u8 fail; int r; + if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { + printk("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, vcpu->sipi_vector); + kvm_lapic_reset(vcpu); + vmx_vcpu_setup(vmx); + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; + } + preempted: if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); -- cgit v1.2.3 From 380102c8e431ba8b25a5b3a29e4529ca02ede4c8 Mon Sep 17 00:00:00 2001 From: Amit Shah Date: Sat, 25 Aug 2007 11:35:52 +0300 Subject: KVM: Set the ET flag in CR0 after initializing FX This was missed when moving stuff around in fbc4f2e Fixes Solaris guests and bug #1773613 Signed-off-by: Amit Shah Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index d0a5a2b3d59..d3e534dcf58 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -655,6 +655,7 @@ void fx_init(struct kvm_vcpu *vcpu) fx_restore(&vcpu->host_fx_image); preempt_enable(); + vcpu->cr0 |= X86_CR0_ET; after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space); vcpu->guest_fx_image.mxcsr = 0x1f80; memset((void *)&vcpu->guest_fx_image + after_mxcsr_mask, -- cgit v1.2.3 From c9a1185c945c8db3185ad40092963cbb39192e31 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 9 Sep 2007 14:10:57 +0300 Subject: KVM: Remove the unused invlpg member of struct kvm_arch_ops. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 - drivers/kvm/svm.c | 6 ------ 2 files changed, 7 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 5e318b6e215..7c353524af2 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -487,7 +487,6 @@ struct kvm_arch_ops { unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*invlpg)(struct kvm_vcpu *vcpu, gva_t addr); void (*tlb_flush)(struct kvm_vcpu *vcpu); void (*inject_page_fault)(struct kvm_vcpu *vcpu, unsigned long addr, u32 err_code); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 3de9ec35ebf..dbd4e813cbe 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -881,11 +881,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) svm->vmcb->control.asid = svm_data->next_asid++; } -static void svm_invlpg(struct kvm_vcpu *vcpu, gva_t address) -{ - invlpga(address, to_svm(vcpu)->vmcb->control.asid); // is needed? -} - static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) { return to_svm(vcpu)->db_regs[dr]; @@ -1795,7 +1790,6 @@ static struct kvm_arch_ops svm_arch_ops = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .invlpg = svm_invlpg, .tlb_flush = svm_flush_tlb, .inject_page_fault = svm_inject_page_fault, -- cgit v1.2.3 From b85b9ee9259917f248ee1507d7d1f575f4fc27dd Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sun, 9 Sep 2007 14:12:54 +0300 Subject: KVM: Clean up unloved invlpg emulation invlpg shouldn't fetch the "src" address, since it may not be valid, however SVM's "solution" which neuters emulation of all group 7 instruction is horrible and breaks kvm-lite. The simplest fix is to put a special check in for invlpg. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 -- drivers/kvm/svm.c | 2 -- drivers/kvm/x86_emulate.c | 16 +++------------- 3 files changed, 3 insertions(+), 17 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 7c353524af2..9bf9ac6389b 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -539,8 +539,6 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); -void kvm_emulator_want_group7_invlpg(void); - extern hpa_t bad_page_address; struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index dbd4e813cbe..e51f6b7f8ff 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -376,8 +376,6 @@ static __init int svm_hardware_setup(void) void *iopm_va, *msrpm_va; int r; - kvm_emulator_want_group7_invlpg(); - iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); if (!iopm_pages) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 7439b3422ec..342594d78d8 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -213,19 +213,6 @@ static u16 twobyte_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; -/* - * Tell the emulator that of the Group 7 instructions (sgdt, lidt, etc.) we - * are interested only in invlpg and not in any of the rest. - * - * invlpg is a special instruction in that the data it references may not - * be mapped. - */ -void kvm_emulator_want_group7_invlpg(void) -{ - twobyte_table[1] &= ~SrcMem; -} -EXPORT_SYMBOL_GPL(kvm_emulator_want_group7_invlpg); - /* Type, address-of, and value of an instruction's operand. */ struct operand { enum { OP_REG, OP_MEM, OP_IMM } type; @@ -791,6 +778,9 @@ done_prefixes: goto srcmem_common; case SrcMem: src.bytes = (d & ByteOp) ? 1 : op_bytes; + /* Don't fetch the address for invlpg: it could be unmapped. */ + if (twobyte && b == 0x01 && modrm_reg == 7) + break; srcmem_common: src.type = OP_MEM; src.ptr = (unsigned long *)cr2; -- cgit v1.2.3 From 81f50e3bfdf864103ef890ca156e7a9c922c7089 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 6 Sep 2007 01:20:38 +1000 Subject: KVM: Keep control regs in sync We don't update the vcpu control registers in various places. We should do so. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index d3e534dcf58..7341c094934 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -543,6 +543,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return; } kvm_arch_ops->set_cr4(vcpu, cr4); + vcpu->cr4 = cr4; mutex_lock(&vcpu->kvm->lock); kvm_mmu_reset_context(vcpu); mutex_unlock(&vcpu->kvm->lock); @@ -1238,10 +1239,8 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) int emulate_clts(struct kvm_vcpu *vcpu) { - unsigned long cr0; - - cr0 = vcpu->cr0 & ~X86_CR0_TS; - kvm_arch_ops->set_cr0(vcpu, cr0); + vcpu->cr0 &= ~X86_CR0_TS; + kvm_arch_ops->set_cr0(vcpu, vcpu->cr0); return X86EMUL_CONTINUE; } @@ -2226,6 +2225,7 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_arch_ops->decache_cr4_guest_bits(vcpu); mmu_reset_needed |= vcpu->cr0 != sregs->cr0; + vcpu->cr0 = sregs->cr0; kvm_arch_ops->set_cr0(vcpu, sregs->cr0); mmu_reset_needed |= vcpu->cr4 != sregs->cr4; -- cgit v1.2.3 From 1747fb71fd7c9389696e91f354d2f841b5c85790 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 6 Sep 2007 01:21:32 +1000 Subject: KVM: Hoist SVM's get_cs_db_l_bits into core code. SVM gets the DB and L bits for the cs by decoding the segment. This is in fact the completely generic code, so hoist it for kvm-lite to use. Signed-off-by: Rusty Russell Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 10 ++++++++++ drivers/kvm/svm.c | 10 +--------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 9bf9ac6389b..ee9f8bdee75 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -586,6 +586,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0); void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0); unsigned long get_cr8(struct kvm_vcpu *vcpu); void lmsw(struct kvm_vcpu *vcpu, unsigned long msw); +void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 7341c094934..9dffbbea46a 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2270,6 +2270,16 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } +void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) +{ + struct kvm_segment cs; + + get_segment(vcpu, &cs, VCPU_SREG_CS); + *db = cs.db; + *l = cs.l; +} +EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); + /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index e51f6b7f8ff..35f3f83b5c6 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -724,14 +724,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->unusable = !var->present; } -static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -{ - struct vmcb_seg *s = svm_seg(vcpu, VCPU_SREG_CS); - - *db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; - *l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; -} - static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1771,7 +1763,7 @@ static struct kvm_arch_ops svm_arch_ops = { .get_segment_base = svm_get_segment_base, .get_segment = svm_get_segment, .set_segment = svm_set_segment, - .get_cs_db_l_bits = svm_get_cs_db_l_bits, + .get_cs_db_l_bits = kvm_get_cs_db_l_bits, .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr3 = svm_set_cr3, -- cgit v1.2.3 From 0d8d2bd4f20c8a2a254b4fe3bc114f12214a6d73 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 30 Aug 2007 14:56:21 +0200 Subject: KVM: Simplify memory allocation The mutex->splinlock convertion alllows us to make some code simplifications. As we can keep the lock longer, we don't have to release it and then have to check if the environment has not been modified before re-taking it. We can remove kvm->busy and kvm->memory_config_version. Signed-off-by: Laurent Vivier Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 -- drivers/kvm/kvm_main.c | 38 +++----------------------------------- 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index ee9f8bdee75..351da40807c 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -411,8 +411,6 @@ struct kvm { int n_free_mmu_pages; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; - int memory_config_version; - int busy; unsigned long rmap_overflow; struct list_head vm_list; struct file *filp; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 9dffbbea46a..8da13a462e3 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -679,7 +679,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, unsigned long i; struct kvm_memory_slot *memslot; struct kvm_memory_slot old, new; - int memory_config_version; r = -EINVAL; /* General sanity checks */ @@ -699,10 +698,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, if (!npages) mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES; -raced: mutex_lock(&kvm->lock); - memory_config_version = kvm->memory_config_version; new = old = *memslot; new.base_gfn = base_gfn; @@ -725,11 +722,6 @@ raced: (base_gfn >= s->base_gfn + s->npages))) goto out_unlock; } - /* - * Do memory allocations outside lock. memory_config_version will - * detect any races. - */ - mutex_unlock(&kvm->lock); /* Deallocate if slot is being removed */ if (!npages) @@ -746,14 +738,14 @@ raced: new.phys_mem = vmalloc(npages * sizeof(struct page *)); if (!new.phys_mem) - goto out_free; + goto out_unlock; memset(new.phys_mem, 0, npages * sizeof(struct page *)); for (i = 0; i < npages; ++i) { new.phys_mem[i] = alloc_page(GFP_HIGHUSER | __GFP_ZERO); if (!new.phys_mem[i]) - goto out_free; + goto out_unlock; set_page_private(new.phys_mem[i],0); } } @@ -764,27 +756,14 @@ raced: new.dirty_bitmap = vmalloc(dirty_bytes); if (!new.dirty_bitmap) - goto out_free; + goto out_unlock; memset(new.dirty_bitmap, 0, dirty_bytes); } - mutex_lock(&kvm->lock); - - if (memory_config_version != kvm->memory_config_version) { - mutex_unlock(&kvm->lock); - kvm_free_physmem_slot(&new, &old); - goto raced; - } - - r = -EAGAIN; - if (kvm->busy) - goto out_unlock; - if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; *memslot = new; - ++kvm->memory_config_version; kvm_mmu_slot_remove_write_access(kvm, mem->slot); kvm_flush_remote_tlbs(kvm); @@ -796,7 +775,6 @@ raced: out_unlock: mutex_unlock(&kvm->lock); -out_free: kvm_free_physmem_slot(&new, &old); out: return r; @@ -815,12 +793,6 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, mutex_lock(&kvm->lock); - /* - * Prevent changes to guest memory configuration even while the lock - * is not taken. - */ - ++kvm->busy; - mutex_unlock(&kvm->lock); r = -EINVAL; if (log->slot >= KVM_MEMORY_SLOTS) goto out; @@ -841,18 +813,14 @@ static int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, /* If nothing is dirty, don't bother messing with page tables. */ if (any) { - mutex_lock(&kvm->lock); kvm_mmu_slot_remove_write_access(kvm, log->slot); kvm_flush_remote_tlbs(kvm); memset(memslot->dirty_bitmap, 0, n); - mutex_unlock(&kvm->lock); } r = 0; out: - mutex_lock(&kvm->lock); - --kvm->busy; mutex_unlock(&kvm->lock); return r; } -- cgit v1.2.3 From cbdd1bea2a2dce4c0b45c5f0122c150d9f07f0bc Mon Sep 17 00:00:00 2001 From: Christian Ehrhardt Date: Sun, 9 Sep 2007 15:41:59 +0300 Subject: KVM: Rename kvm_arch_ops to kvm_x86_ops This patch just renames the current (misnamed) _arch namings to _x86 to ensure better readability when a real arch layer takes place. Signed-off-by: Christian Ehrhardt Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 8 +-- drivers/kvm/kvm_main.c | 160 +++++++++++++++++++++++----------------------- drivers/kvm/mmu.c | 6 +- drivers/kvm/paging_tmpl.h | 2 +- drivers/kvm/svm.c | 6 +- drivers/kvm/vmx.c | 6 +- drivers/kvm/x86_emulate.c | 4 +- 7 files changed, 96 insertions(+), 96 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 351da40807c..42bb225ad6c 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -441,7 +441,7 @@ struct descriptor_table { unsigned long base; } __attribute__((packed)); -struct kvm_arch_ops { +struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ void (*hardware_enable)(void *dummy); /* __init */ @@ -499,7 +499,7 @@ struct kvm_arch_ops { void (*set_irq)(struct kvm_vcpu *vcpu, int vec); }; -extern struct kvm_arch_ops *kvm_arch_ops; +extern struct kvm_x86_ops *kvm_x86_ops; /* The guest did something we don't support. */ #define pr_unimpl(vcpu, fmt, ...) \ @@ -515,9 +515,9 @@ extern struct kvm_arch_ops *kvm_arch_ops; int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); -int kvm_init_arch(struct kvm_arch_ops *ops, unsigned int vcpu_size, +int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size, struct module *module); -void kvm_exit_arch(void); +void kvm_exit_x86(void); int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 8da13a462e3..9bfa1bcd26e 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -53,7 +53,7 @@ static LIST_HEAD(vm_list); static cpumask_t cpus_hardware_enabled; -struct kvm_arch_ops *kvm_arch_ops; +struct kvm_x86_ops *kvm_x86_ops; struct kmem_cache *kvm_vcpu_cache; EXPORT_SYMBOL_GPL(kvm_vcpu_cache); @@ -182,14 +182,14 @@ static void vcpu_load(struct kvm_vcpu *vcpu) mutex_lock(&vcpu->mutex); cpu = get_cpu(); preempt_notifier_register(&vcpu->preempt_notifier); - kvm_arch_ops->vcpu_load(vcpu, cpu); + kvm_x86_ops->vcpu_load(vcpu, cpu); put_cpu(); } static void vcpu_put(struct kvm_vcpu *vcpu) { preempt_disable(); - kvm_arch_ops->vcpu_put(vcpu); + kvm_x86_ops->vcpu_put(vcpu); preempt_notifier_unregister(&vcpu->preempt_notifier); preempt_enable(); mutex_unlock(&vcpu->mutex); @@ -374,7 +374,7 @@ static void kvm_free_vcpus(struct kvm *kvm) kvm_unload_vcpu_mmu(kvm->vcpus[i]); for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { - kvm_arch_ops->vcpu_free(kvm->vcpus[i]); + kvm_x86_ops->vcpu_free(kvm->vcpus[i]); kvm->vcpus[i] = NULL; } } @@ -405,7 +405,7 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) static void inject_gp(struct kvm_vcpu *vcpu) { - kvm_arch_ops->inject_gp(vcpu, 0); + kvm_x86_ops->inject_gp(vcpu, 0); } /* @@ -480,7 +480,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) inject_gp(vcpu); return; } - kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); if (cs_l) { printk(KERN_DEBUG "set_cr0: #GP, start paging " "in long mode while CS.L == 1\n"); @@ -499,7 +499,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } - kvm_arch_ops->set_cr0(vcpu, cr0); + kvm_x86_ops->set_cr0(vcpu, cr0); vcpu->cr0 = cr0; mutex_lock(&vcpu->kvm->lock); @@ -542,7 +542,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) inject_gp(vcpu); return; } - kvm_arch_ops->set_cr4(vcpu, cr4); + kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->cr4 = cr4; mutex_lock(&vcpu->kvm->lock); kvm_mmu_reset_context(vcpu); @@ -1134,7 +1134,7 @@ static int emulator_write_emulated_onepage(unsigned long addr, gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, addr); if (gpa == UNMAPPED_GVA) { - kvm_arch_ops->inject_page_fault(vcpu, addr, 2); + kvm_x86_ops->inject_page_fault(vcpu, addr, 2); return X86EMUL_PROPAGATE_FAULT; } @@ -1197,7 +1197,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) { - return kvm_arch_ops->get_segment_base(vcpu, seg); + return kvm_x86_ops->get_segment_base(vcpu, seg); } int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) @@ -1208,7 +1208,7 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) int emulate_clts(struct kvm_vcpu *vcpu) { vcpu->cr0 &= ~X86_CR0_TS; - kvm_arch_ops->set_cr0(vcpu, vcpu->cr0); + kvm_x86_ops->set_cr0(vcpu, vcpu->cr0); return X86EMUL_CONTINUE; } @@ -1218,7 +1218,7 @@ int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, unsigned long *dest) switch (dr) { case 0 ... 3: - *dest = kvm_arch_ops->get_dr(vcpu, dr); + *dest = kvm_x86_ops->get_dr(vcpu, dr); return X86EMUL_CONTINUE; default: pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr); @@ -1231,7 +1231,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; int exception; - kvm_arch_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); + kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception); if (exception) { /* FIXME: better handling */ return X86EMUL_UNHANDLEABLE; @@ -1277,12 +1277,12 @@ int emulate_instruction(struct kvm_vcpu *vcpu, int cs_db, cs_l; vcpu->mmio_fault_cr2 = cr2; - kvm_arch_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu); - kvm_arch_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); emulate_ctxt.vcpu = vcpu; - emulate_ctxt.eflags = kvm_arch_ops->get_rflags(vcpu); + emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); emulate_ctxt.cr2 = cr2; emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_REAL : cs_l @@ -1328,8 +1328,8 @@ int emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DO_MMIO; } - kvm_arch_ops->decache_regs(vcpu); - kvm_arch_ops->set_rflags(vcpu, emulate_ctxt.eflags); + kvm_x86_ops->decache_regs(vcpu); + kvm_x86_ops->set_rflags(vcpu, emulate_ctxt.eflags); if (vcpu->mmio_is_write) { vcpu->mmio_needed = 0; @@ -1386,7 +1386,7 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) { unsigned long nr, a0, a1, a2, a3, a4, a5, ret; - kvm_arch_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu); ret = -KVM_EINVAL; #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { @@ -1419,11 +1419,11 @@ int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run) run->hypercall.args[5] = a5; run->hypercall.ret = ret; run->hypercall.longmode = is_long_mode(vcpu); - kvm_arch_ops->decache_regs(vcpu); + kvm_x86_ops->decache_regs(vcpu); return 0; } vcpu->regs[VCPU_REGS_RAX] = ret; - kvm_arch_ops->decache_regs(vcpu); + kvm_x86_ops->decache_regs(vcpu); return 1; } EXPORT_SYMBOL_GPL(kvm_hypercall); @@ -1437,26 +1437,26 @@ void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) { struct descriptor_table dt = { limit, base }; - kvm_arch_ops->set_gdt(vcpu, &dt); + kvm_x86_ops->set_gdt(vcpu, &dt); } void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) { struct descriptor_table dt = { limit, base }; - kvm_arch_ops->set_idt(vcpu, &dt); + kvm_x86_ops->set_idt(vcpu, &dt); } void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, unsigned long *rflags) { lmsw(vcpu, msw); - *rflags = kvm_arch_ops->get_rflags(vcpu); + *rflags = kvm_x86_ops->get_rflags(vcpu); } unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) { - kvm_arch_ops->decache_cr4_guest_bits(vcpu); + kvm_x86_ops->decache_cr4_guest_bits(vcpu); switch (cr) { case 0: return vcpu->cr0; @@ -1478,7 +1478,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, switch (cr) { case 0: set_cr0(vcpu, mk_cr_64(vcpu->cr0, val)); - *rflags = kvm_arch_ops->get_rflags(vcpu); + *rflags = kvm_x86_ops->get_rflags(vcpu); break; case 2: vcpu->cr2 = val; @@ -1552,7 +1552,7 @@ static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa) mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT); hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT), KM_USER1) + (hypercall_hpa & ~PAGE_MASK); - kvm_arch_ops->patch_hypercall(vcpu, hypercall); + kvm_x86_ops->patch_hypercall(vcpu, hypercall); kunmap_atomic(hypercall, KM_USER1); para_state->ret = 0; @@ -1619,7 +1619,7 @@ EXPORT_SYMBOL_GPL(kvm_get_msr_common); */ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { - return kvm_arch_ops->get_msr(vcpu, msr_index, pdata); + return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); } #ifdef CONFIG_X86_64 @@ -1640,7 +1640,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) return; } - kvm_arch_ops->set_efer(vcpu, efer); + kvm_x86_ops->set_efer(vcpu, efer); efer &= ~EFER_LMA; efer |= vcpu->shadow_efer & EFER_LMA; @@ -1697,7 +1697,7 @@ EXPORT_SYMBOL_GPL(kvm_set_msr_common); */ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) { - return kvm_arch_ops->set_msr(vcpu, msr_index, data); + return kvm_x86_ops->set_msr(vcpu, msr_index, data); } void kvm_resched(struct kvm_vcpu *vcpu) @@ -1714,7 +1714,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) u32 function; struct kvm_cpuid_entry *e, *best; - kvm_arch_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu); function = vcpu->regs[VCPU_REGS_RAX]; vcpu->regs[VCPU_REGS_RAX] = 0; vcpu->regs[VCPU_REGS_RBX] = 0; @@ -1740,8 +1740,8 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) vcpu->regs[VCPU_REGS_RCX] = best->ecx; vcpu->regs[VCPU_REGS_RDX] = best->edx; } - kvm_arch_ops->decache_regs(vcpu); - kvm_arch_ops->skip_emulated_instruction(vcpu); + kvm_x86_ops->decache_regs(vcpu); + kvm_x86_ops->skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); @@ -1776,7 +1776,7 @@ static int complete_pio(struct kvm_vcpu *vcpu) long delta; int r; - kvm_arch_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu); if (!io->string) { if (io->in) @@ -1786,7 +1786,7 @@ static int complete_pio(struct kvm_vcpu *vcpu) if (io->in) { r = pio_copy_data(vcpu); if (r) { - kvm_arch_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu); return r; } } @@ -1809,13 +1809,13 @@ static int complete_pio(struct kvm_vcpu *vcpu) vcpu->regs[VCPU_REGS_RSI] += delta; } - kvm_arch_ops->decache_regs(vcpu); + kvm_x86_ops->decache_regs(vcpu); io->count -= io->cur_count; io->cur_count = 0; if (!io->count) - kvm_arch_ops->skip_emulated_instruction(vcpu); + kvm_x86_ops->skip_emulated_instruction(vcpu); return 0; } @@ -1871,9 +1871,9 @@ int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->pio.guest_page_offset = 0; vcpu->pio.rep = 0; - kvm_arch_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu); memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); - kvm_arch_ops->decache_regs(vcpu); + kvm_x86_ops->decache_regs(vcpu); pio_dev = vcpu_find_pio_dev(vcpu, port); if (pio_dev) { @@ -1908,7 +1908,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->pio.rep = rep; if (!count) { - kvm_arch_ops->skip_emulated_instruction(vcpu); + kvm_x86_ops->skip_emulated_instruction(vcpu); return 1; } @@ -2012,12 +2012,12 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL) { - kvm_arch_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu); vcpu->regs[VCPU_REGS_RAX] = kvm_run->hypercall.ret; - kvm_arch_ops->decache_regs(vcpu); + kvm_x86_ops->decache_regs(vcpu); } - r = kvm_arch_ops->run(vcpu, kvm_run); + r = kvm_x86_ops->run(vcpu, kvm_run); out: if (vcpu->sigset_active) @@ -2032,7 +2032,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, { vcpu_load(vcpu); - kvm_arch_ops->cache_regs(vcpu); + kvm_x86_ops->cache_regs(vcpu); regs->rax = vcpu->regs[VCPU_REGS_RAX]; regs->rbx = vcpu->regs[VCPU_REGS_RBX]; @@ -2054,7 +2054,7 @@ static int kvm_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, #endif regs->rip = vcpu->rip; - regs->rflags = kvm_arch_ops->get_rflags(vcpu); + regs->rflags = kvm_x86_ops->get_rflags(vcpu); /* * Don't leak debug flags in case they were set for guest debugging @@ -2092,9 +2092,9 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, #endif vcpu->rip = regs->rip; - kvm_arch_ops->set_rflags(vcpu, regs->rflags); + kvm_x86_ops->set_rflags(vcpu, regs->rflags); - kvm_arch_ops->decache_regs(vcpu); + kvm_x86_ops->decache_regs(vcpu); vcpu_put(vcpu); @@ -2104,7 +2104,7 @@ static int kvm_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, static void get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { - return kvm_arch_ops->get_segment(vcpu, var, seg); + return kvm_x86_ops->get_segment(vcpu, var, seg); } static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, @@ -2125,14 +2125,14 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); - kvm_arch_ops->get_idt(vcpu, &dt); + kvm_x86_ops->get_idt(vcpu, &dt); sregs->idt.limit = dt.limit; sregs->idt.base = dt.base; - kvm_arch_ops->get_gdt(vcpu, &dt); + kvm_x86_ops->get_gdt(vcpu, &dt); sregs->gdt.limit = dt.limit; sregs->gdt.base = dt.base; - kvm_arch_ops->decache_cr4_guest_bits(vcpu); + kvm_x86_ops->decache_cr4_guest_bits(vcpu); sregs->cr0 = vcpu->cr0; sregs->cr2 = vcpu->cr2; sregs->cr3 = vcpu->cr3; @@ -2144,7 +2144,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, if (irqchip_in_kernel(vcpu->kvm)) { memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); - pending_vec = kvm_arch_ops->get_irq(vcpu); + pending_vec = kvm_x86_ops->get_irq(vcpu); if (pending_vec >= 0) set_bit(pending_vec, (unsigned long *)sregs->interrupt_bitmap); } else @@ -2159,7 +2159,7 @@ static int kvm_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, static void set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { - return kvm_arch_ops->set_segment(vcpu, var, seg); + return kvm_x86_ops->set_segment(vcpu, var, seg); } static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, @@ -2173,10 +2173,10 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, dt.limit = sregs->idt.limit; dt.base = sregs->idt.base; - kvm_arch_ops->set_idt(vcpu, &dt); + kvm_x86_ops->set_idt(vcpu, &dt); dt.limit = sregs->gdt.limit; dt.base = sregs->gdt.base; - kvm_arch_ops->set_gdt(vcpu, &dt); + kvm_x86_ops->set_gdt(vcpu, &dt); vcpu->cr2 = sregs->cr2; mmu_reset_needed |= vcpu->cr3 != sregs->cr3; @@ -2186,18 +2186,18 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, mmu_reset_needed |= vcpu->shadow_efer != sregs->efer; #ifdef CONFIG_X86_64 - kvm_arch_ops->set_efer(vcpu, sregs->efer); + kvm_x86_ops->set_efer(vcpu, sregs->efer); #endif kvm_set_apic_base(vcpu, sregs->apic_base); - kvm_arch_ops->decache_cr4_guest_bits(vcpu); + kvm_x86_ops->decache_cr4_guest_bits(vcpu); mmu_reset_needed |= vcpu->cr0 != sregs->cr0; vcpu->cr0 = sregs->cr0; - kvm_arch_ops->set_cr0(vcpu, sregs->cr0); + kvm_x86_ops->set_cr0(vcpu, sregs->cr0); mmu_reset_needed |= vcpu->cr4 != sregs->cr4; - kvm_arch_ops->set_cr4(vcpu, sregs->cr4); + kvm_x86_ops->set_cr4(vcpu, sregs->cr4); if (!is_long_mode(vcpu) && is_pae(vcpu)) load_pdptrs(vcpu, vcpu->cr3); @@ -2218,7 +2218,7 @@ static int kvm_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, max_bits); /* Only pending external irq is handled here */ if (pending_vec < max_bits) { - kvm_arch_ops->set_irq(vcpu, pending_vec); + kvm_x86_ops->set_irq(vcpu, pending_vec); printk("Set back pending irq %d\n", pending_vec); } } @@ -2411,7 +2411,7 @@ static int kvm_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, vcpu_load(vcpu); - r = kvm_arch_ops->set_guest_debug(vcpu, dbg); + r = kvm_x86_ops->set_guest_debug(vcpu, dbg); vcpu_put(vcpu); @@ -2493,7 +2493,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) if (!valid_vcpu(n)) return -EINVAL; - vcpu = kvm_arch_ops->vcpu_create(kvm, n); + vcpu = kvm_x86_ops->vcpu_create(kvm, n); if (IS_ERR(vcpu)) return PTR_ERR(vcpu); @@ -2534,7 +2534,7 @@ mmu_unload: vcpu_put(vcpu); free_vcpu: - kvm_arch_ops->vcpu_free(vcpu); + kvm_x86_ops->vcpu_free(vcpu); return r; } @@ -3163,7 +3163,7 @@ static void decache_vcpus_on_cpu(int cpu) */ if (mutex_trylock(&vcpu->mutex)) { if (vcpu->cpu == cpu) { - kvm_arch_ops->vcpu_decache(vcpu); + kvm_x86_ops->vcpu_decache(vcpu); vcpu->cpu = -1; } mutex_unlock(&vcpu->mutex); @@ -3179,7 +3179,7 @@ static void hardware_enable(void *junk) if (cpu_isset(cpu, cpus_hardware_enabled)) return; cpu_set(cpu, cpus_hardware_enabled); - kvm_arch_ops->hardware_enable(NULL); + kvm_x86_ops->hardware_enable(NULL); } static void hardware_disable(void *junk) @@ -3190,7 +3190,7 @@ static void hardware_disable(void *junk) return; cpu_clear(cpu, cpus_hardware_enabled); decache_vcpus_on_cpu(cpu); - kvm_arch_ops->hardware_disable(NULL); + kvm_x86_ops->hardware_disable(NULL); } static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, @@ -3358,7 +3358,7 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - kvm_arch_ops->vcpu_load(vcpu, cpu); + kvm_x86_ops->vcpu_load(vcpu, cpu); } static void kvm_sched_out(struct preempt_notifier *pn, @@ -3366,16 +3366,16 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); - kvm_arch_ops->vcpu_put(vcpu); + kvm_x86_ops->vcpu_put(vcpu); } -int kvm_init_arch(struct kvm_arch_ops *ops, unsigned int vcpu_size, +int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size, struct module *module) { int r; int cpu; - if (kvm_arch_ops) { + if (kvm_x86_ops) { printk(KERN_ERR "kvm: already loaded the other module\n"); return -EEXIST; } @@ -3389,15 +3389,15 @@ int kvm_init_arch(struct kvm_arch_ops *ops, unsigned int vcpu_size, return -EOPNOTSUPP; } - kvm_arch_ops = ops; + kvm_x86_ops = ops; - r = kvm_arch_ops->hardware_setup(); + r = kvm_x86_ops->hardware_setup(); if (r < 0) goto out; for_each_online_cpu(cpu) { smp_call_function_single(cpu, - kvm_arch_ops->check_processor_compatibility, + kvm_x86_ops->check_processor_compatibility, &r, 0, 1); if (r < 0) goto out_free_0; @@ -3450,13 +3450,13 @@ out_free_2: out_free_1: on_each_cpu(hardware_disable, NULL, 0, 1); out_free_0: - kvm_arch_ops->hardware_unsetup(); + kvm_x86_ops->hardware_unsetup(); out: - kvm_arch_ops = NULL; + kvm_x86_ops = NULL; return r; } -void kvm_exit_arch(void) +void kvm_exit_x86(void) { misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); @@ -3465,8 +3465,8 @@ void kvm_exit_arch(void) unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); on_each_cpu(hardware_disable, NULL, 0, 1); - kvm_arch_ops->hardware_unsetup(); - kvm_arch_ops = NULL; + kvm_x86_ops->hardware_unsetup(); + kvm_x86_ops = NULL; } static __init int kvm_init(void) @@ -3509,5 +3509,5 @@ static __exit void kvm_exit(void) module_init(kvm_init) module_exit(kvm_exit) -EXPORT_SYMBOL_GPL(kvm_init_arch); -EXPORT_SYMBOL_GPL(kvm_exit_arch); +EXPORT_SYMBOL_GPL(kvm_init_x86); +EXPORT_SYMBOL_GPL(kvm_exit_x86); diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index e303b4137bf..7b42c88b0b5 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -966,7 +966,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; - kvm_arch_ops->tlb_flush(vcpu); + kvm_x86_ops->tlb_flush(vcpu); } static void paging_new_cr3(struct kvm_vcpu *vcpu) @@ -979,7 +979,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, u64 addr, u32 err_code) { - kvm_arch_ops->inject_page_fault(vcpu, addr, err_code); + kvm_x86_ops->inject_page_fault(vcpu, addr, err_code); } static void paging_free(struct kvm_vcpu *vcpu) @@ -1073,7 +1073,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) if (r) goto out; mmu_alloc_roots(vcpu); - kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); + kvm_x86_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); kvm_mmu_flush_tlb(vcpu); out: mutex_unlock(&vcpu->kvm->lock); diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 660243b39d8..6b094b44f8f 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -274,7 +274,7 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu, access_bits &= ~PT_WRITABLE_MASK; if (is_writeble_pte(spte)) { spte &= ~PT_WRITABLE_MASK; - kvm_arch_ops->tlb_flush(vcpu); + kvm_x86_ops->tlb_flush(vcpu); } if (write_fault) *ptwrite = 1; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 35f3f83b5c6..7b22d396c14 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -1741,7 +1741,7 @@ static void svm_check_processor_compat(void *rtn) *(int *)rtn = 0; } -static struct kvm_arch_ops svm_arch_ops = { +static struct kvm_x86_ops svm_x86_ops = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, .hardware_setup = svm_hardware_setup, @@ -1794,13 +1794,13 @@ static struct kvm_arch_ops svm_arch_ops = { static int __init svm_init(void) { - return kvm_init_arch(&svm_arch_ops, sizeof(struct vcpu_svm), + return kvm_init_x86(&svm_x86_ops, sizeof(struct vcpu_svm), THIS_MODULE); } static void __exit svm_exit(void) { - kvm_exit_arch(); + kvm_exit_x86(); } module_init(svm_init) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 440cacfda89..57a6055ffb0 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -2548,7 +2548,7 @@ static void __init vmx_check_processor_compat(void *rtn) } } -static struct kvm_arch_ops vmx_arch_ops = { +static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, .hardware_setup = hardware_setup, @@ -2627,7 +2627,7 @@ static int __init vmx_init(void) memset(iova, 0xff, PAGE_SIZE); kunmap(vmx_io_bitmap_b); - r = kvm_init_arch(&vmx_arch_ops, sizeof(struct vcpu_vmx), THIS_MODULE); + r = kvm_init_x86(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); if (r) goto out1; @@ -2645,7 +2645,7 @@ static void __exit vmx_exit(void) __free_page(vmx_io_bitmap_b); __free_page(vmx_io_bitmap_a); - kvm_exit_arch(); + kvm_exit_x86(); } module_init(vmx_init) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 342594d78d8..86171ca794d 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -1432,7 +1432,7 @@ twobyte_special_insn: | ((u64)_regs[VCPU_REGS_RDX] << 32); rc = kvm_set_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], msr_data); if (rc) { - kvm_arch_ops->inject_gp(ctxt->vcpu, 0); + kvm_x86_ops->inject_gp(ctxt->vcpu, 0); _eip = ctxt->vcpu->rip; } rc = X86EMUL_CONTINUE; @@ -1441,7 +1441,7 @@ twobyte_special_insn: /* rdmsr */ rc = kvm_get_msr(ctxt->vcpu, _regs[VCPU_REGS_RCX], &msr_data); if (rc) { - kvm_arch_ops->inject_gp(ctxt->vcpu, 0); + kvm_x86_ops->inject_gp(ctxt->vcpu, 0); _eip = ctxt->vcpu->rip; } else { _regs[VCPU_REGS_RAX] = (u32)msr_data; -- cgit v1.2.3 From 2e3e5882dca3ab409aa8c9c96f47610b576719f8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 10 Sep 2007 11:28:17 +0300 Subject: KVM: MMU: Don't do GFP_NOWAIT allocations Before preempt notifiers, kvm needed to allocate memory with GFP_NOWAIT so as not to have to enable preemption and take a heavyweight exit. On oom, we'd fall back to a GFP_KERNEL allocation. With preemption notifiers, we can do a GFP_KERNEL allocation, and perform the heavyweight exit only if the kernel decides to put us to sleep. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 7b42c88b0b5..6d84d30f5ed 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -202,15 +202,14 @@ static void set_shadow_pte(u64 *sptep, u64 spte) } static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, - struct kmem_cache *base_cache, int min, - gfp_t gfp_flags) + struct kmem_cache *base_cache, int min) { void *obj; if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { - obj = kmem_cache_zalloc(base_cache, gfp_flags); + obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); if (!obj) return -ENOMEM; cache->objects[cache->nobjs++] = obj; @@ -225,14 +224,14 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) } static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, - int min, gfp_t gfp_flags) + int min) { struct page *page; if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { - page = alloc_page(gfp_flags); + page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; set_page_private(page, 0); @@ -247,41 +246,28 @@ static void mmu_free_memory_cache_page(struct kvm_mmu_memory_cache *mc) free_page((unsigned long)mc->objects[--mc->nobjs]); } -static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) +static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) { int r; + kvm_mmu_free_some_pages(vcpu); r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, - pte_chain_cache, 4, gfp_flags); + pte_chain_cache, 4); if (r) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, - rmap_desc_cache, 1, gfp_flags); + rmap_desc_cache, 1); if (r) goto out; - r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4, gfp_flags); + r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4); if (r) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, - mmu_page_header_cache, 4, gfp_flags); + mmu_page_header_cache, 4); out: return r; } -static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) -{ - int r; - - r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT); - kvm_mmu_free_some_pages(vcpu); - if (r < 0) { - mutex_unlock(&vcpu->kvm->lock); - r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); - mutex_lock(&vcpu->kvm->lock); - } - return r; -} - static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); -- cgit v1.2.3 From 29bd8a78082f2d7e2165a735f50b5c716ef3213b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 10 Sep 2007 17:27:03 +0300 Subject: KVM: VMX: Move vm entry failure handling to the exit handler This will help moving the main loop to subarch independent code. Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 57a6055ffb0..713f78a8959 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -43,6 +43,7 @@ struct vmcs { struct vcpu_vmx { struct kvm_vcpu vcpu; int launched; + u8 fail; struct kvm_msr_entry *guest_msrs; struct kvm_msr_entry *host_msrs; int nmsrs; @@ -2099,6 +2100,14 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); u32 exit_reason = vmcs_read32(VM_EXIT_REASON); + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (unlikely(vmx->fail)) { + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason + = vmcs_read32(VM_INSTRUCTION_ERROR); + return 0; + } if ( (vectoring_info & VECTORING_INFO_VALID_MASK) && exit_reason != EXIT_REASON_EXCEPTION_NMI ) @@ -2208,7 +2217,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u8 fail; int r; if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { @@ -2352,7 +2360,7 @@ again: "pop %%ecx; popa \n\t" #endif "setbe %0 \n\t" - : "=q" (fail) + : "=q" (vmx->fail) : "r"(vmx->launched), "d"((unsigned long)HOST_RSP), "c"(vcpu), [rax]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_RAX])), @@ -2387,13 +2395,6 @@ again: preempt_enable(); - if (unlikely(fail)) { - kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; - kvm_run->fail_entry.hardware_entry_failure_reason - = vmcs_read32(VM_INSTRUCTION_ERROR); - r = 0; - goto out; - } /* * Profile KVM exit RIPs: */ -- cgit v1.2.3 From 04d2cc7780d48a212843e38d46402d97fa1f4774 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 10 Sep 2007 18:10:54 +0300 Subject: KVM: Move main vcpu loop into subarch independent code This simplifies adding new code as well as reducing overall code size. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 9 +++- drivers/kvm/kvm_main.c | 124 +++++++++++++++++++++++++++++++++++++++++- drivers/kvm/svm.c | 142 ++++++++++++++----------------------------------- drivers/kvm/vmx.c | 129 ++++++-------------------------------------- 4 files changed, 187 insertions(+), 217 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 42bb225ad6c..d93ab48424c 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -453,13 +453,16 @@ struct kvm_x86_ops { /* Create, but do not attach this VCPU */ struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); void (*vcpu_free)(struct kvm_vcpu *vcpu); + void (*vcpu_reset)(struct kvm_vcpu *vcpu); + void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); void (*vcpu_decache)(struct kvm_vcpu *vcpu); int (*set_guest_debug)(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg); + void (*guest_debug_pre)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); @@ -491,12 +494,16 @@ struct kvm_x86_ops { void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code); - int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); + void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); + int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); int (*get_irq)(struct kvm_vcpu *vcpu); void (*set_irq)(struct kvm_vcpu *vcpu, int vec); + void (*inject_pending_irq)(struct kvm_vcpu *vcpu); + void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, + struct kvm_run *run); }; extern struct kvm_x86_ops *kvm_x86_ops; diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 9bfa1bcd26e..e17b433152c 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -1970,6 +1971,127 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, } EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); +/* + * Check if userspace requested an interrupt window, and that the + * interrupt window is open. + * + * No need to exit to userspace if we already have an interrupt queued. + */ +static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return (!vcpu->irq_summary && + kvm_run->request_interrupt_window && + vcpu->interrupt_window_open && + (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF)); +} + +static void post_kvm_run_save(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + kvm_run->cr8 = get_cr8(vcpu); + kvm_run->apic_base = kvm_get_apic_base(vcpu); + if (irqchip_in_kernel(vcpu->kvm)) + kvm_run->ready_for_interrupt_injection = 1; + else + kvm_run->ready_for_interrupt_injection = + (vcpu->interrupt_window_open && + vcpu->irq_summary == 0); +} + +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + + if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { + printk("vcpu %d received sipi with vector # %x\n", + vcpu->vcpu_id, vcpu->sipi_vector); + kvm_lapic_reset(vcpu); + kvm_x86_ops->vcpu_reset(vcpu); + vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; + } + +preempted: + if (vcpu->guest_debug.enabled) + kvm_x86_ops->guest_debug_pre(vcpu); + +again: + r = kvm_mmu_reload(vcpu); + if (unlikely(r)) + goto out; + + preempt_disable(); + + kvm_x86_ops->prepare_guest_switch(vcpu); + kvm_load_guest_fpu(vcpu); + + local_irq_disable(); + + if (signal_pending(current)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.signal_exits; + goto out; + } + + if (irqchip_in_kernel(vcpu->kvm)) + kvm_x86_ops->inject_pending_irq(vcpu); + else if (!vcpu->mmio_read_completed) + kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); + + vcpu->guest_mode = 1; + + if (vcpu->requests) + if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) + kvm_x86_ops->tlb_flush(vcpu); + + kvm_x86_ops->run(vcpu, kvm_run); + + vcpu->guest_mode = 0; + local_irq_enable(); + + ++vcpu->stat.exits; + + preempt_enable(); + + /* + * Profile KVM exit RIPs: + */ + if (unlikely(prof_on == KVM_PROFILING)) { + kvm_x86_ops->cache_regs(vcpu); + profile_hit(KVM_PROFILING, (void *)vcpu->rip); + } + + r = kvm_x86_ops->handle_exit(kvm_run, vcpu); + + if (r > 0) { + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.request_irq_exits; + goto out; + } + if (!need_resched()) { + ++vcpu->stat.light_exits; + goto again; + } + } + +out: + if (r > 0) { + kvm_resched(vcpu); + goto preempted; + } + + post_kvm_run_save(vcpu, kvm_run); + + return r; +} + + static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -2017,7 +2139,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_x86_ops->decache_regs(vcpu); } - r = kvm_x86_ops->run(vcpu, kvm_run); + r = __vcpu_run(vcpu, kvm_run); out: if (vcpu->sigset_active) diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 7b22d396c14..95681ea1638 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -50,6 +49,8 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_LBRV (1 << 1) #define SVM_DEATURE_SVML (1 << 2) +static void kvm_reput_irq(struct vcpu_svm *svm); + static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_svm, vcpu); @@ -555,6 +556,13 @@ static void init_vmcb(struct vmcb *vmcb) /* rdx = ?? */ } +static void svm_vcpu_reset(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + init_vmcb(svm->vmcb); +} + static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) { struct vcpu_svm *svm; @@ -1252,10 +1260,20 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, }; -static int handle_exit(struct vcpu_svm *svm, struct kvm_run *kvm_run) +static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); u32 exit_code = svm->vmcb->control.exit_code; + kvm_reput_irq(svm); + + if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + kvm_run->fail_entry.hardware_entry_failure_reason + = svm->vmcb->control.exit_code; + return 0; + } + if (is_external_interrupt(svm->vmcb->control.exit_int_info) && exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " @@ -1313,11 +1331,11 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) svm_inject_irq(svm, irq); } -static void svm_intr_assist(struct vcpu_svm *svm) +static void svm_intr_assist(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; int intr_vector = -1; - struct kvm_vcpu *vcpu = &svm->vcpu; kvm_inject_pending_timer_irqs(vcpu); if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && @@ -1376,9 +1394,10 @@ static void svm_do_inject_vector(struct vcpu_svm *svm) svm_inject_irq(svm, irq); } -static void do_interrupt_requests(struct vcpu_svm *svm, +static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; svm->vcpu.interrupt_window_open = @@ -1401,35 +1420,6 @@ static void do_interrupt_requests(struct vcpu_svm *svm, control->intercept &= ~(1ULL << INTERCEPT_VINTR); } -static void post_kvm_run_save(struct vcpu_svm *svm, - struct kvm_run *kvm_run) -{ - if (irqchip_in_kernel(svm->vcpu.kvm)) - kvm_run->ready_for_interrupt_injection = 1; - else - kvm_run->ready_for_interrupt_injection = - (svm->vcpu.interrupt_window_open && - svm->vcpu.irq_summary == 0); - kvm_run->if_flag = (svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0; - kvm_run->cr8 = get_cr8(&svm->vcpu); - kvm_run->apic_base = kvm_get_apic_base(&svm->vcpu); -} - -/* - * Check if userspace requested an interrupt window, and that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ -static int dm_request_for_irq_injection(struct vcpu_svm *svm, - struct kvm_run *kvm_run) -{ - return (!svm->vcpu.irq_summary && - kvm_run->request_interrupt_window && - svm->vcpu.interrupt_window_open && - (svm->vmcb->save.rflags & X86_EFLAGS_IF)); -} - static void save_db_regs(unsigned long *db_regs) { asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0])); @@ -1451,38 +1441,16 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu) force_new_asid(vcpu); } -static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) +{ +} + +static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_svm *svm = to_svm(vcpu); u16 fs_selector; u16 gs_selector; u16 ldt_selector; - int r; - -again: - r = kvm_mmu_reload(vcpu); - if (unlikely(r)) - return r; - - clgi(); - - if (signal_pending(current)) { - stgi(); - ++vcpu->stat.signal_exits; - post_kvm_run_save(svm, kvm_run); - kvm_run->exit_reason = KVM_EXIT_INTR; - return -EINTR; - } - - if (irqchip_in_kernel(vcpu->kvm)) - svm_intr_assist(svm); - else if (!vcpu->mmio_read_completed) - do_interrupt_requests(svm, kvm_run); - - vcpu->guest_mode = 1; - if (vcpu->requests) - if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) - svm_flush_tlb(vcpu); pre_svm_run(svm); @@ -1501,10 +1469,9 @@ again: load_db_regs(svm->db_regs); } - if (vcpu->fpu_active) { - fx_save(&vcpu->host_fx_image); - fx_restore(&vcpu->guest_fx_image); - } + clgi(); + + local_irq_enable(); asm volatile ( #ifdef CONFIG_X86_64 @@ -1612,12 +1579,9 @@ again: #endif : "cc", "memory" ); - vcpu->guest_mode = 0; + local_irq_disable(); - if (vcpu->fpu_active) { - fx_save(&vcpu->guest_fx_image); - fx_restore(&vcpu->host_fx_image); - } + stgi(); if ((svm->vmcb->save.dr7 & 0xff)) load_db_regs(svm->host_db_regs); @@ -1635,40 +1599,7 @@ again: reload_tss(vcpu); - /* - * Profile KVM exit RIPs: - */ - if (unlikely(prof_on == KVM_PROFILING)) - profile_hit(KVM_PROFILING, - (void *)(unsigned long)svm->vmcb->save.rip); - - stgi(); - - kvm_reput_irq(svm); - svm->next_rip = 0; - - if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { - kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; - kvm_run->fail_entry.hardware_entry_failure_reason - = svm->vmcb->control.exit_code; - post_kvm_run_save(svm, kvm_run); - return 0; - } - - r = handle_exit(svm, kvm_run); - if (r > 0) { - if (dm_request_for_irq_injection(svm, kvm_run)) { - ++vcpu->stat.request_irq_exits; - post_kvm_run_save(svm, kvm_run); - kvm_run->exit_reason = KVM_EXIT_INTR; - return -EINTR; - } - kvm_resched(vcpu); - goto again; - } - post_kvm_run_save(svm, kvm_run); - return r; } static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) @@ -1752,7 +1683,9 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, + .vcpu_reset = svm_vcpu_reset, + .prepare_guest_switch = svm_prepare_guest_switch, .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, .vcpu_decache = svm_vcpu_decache, @@ -1786,10 +1719,13 @@ static struct kvm_x86_ops svm_x86_ops = { .inject_gp = svm_inject_gp, .run = svm_vcpu_run, + .handle_exit = handle_exit, .skip_emulated_instruction = skip_emulated_instruction, .patch_hypercall = svm_patch_hypercall, .get_irq = svm_get_irq, .set_irq = svm_set_irq, + .inject_pending_irq = svm_intr_assist, + .inject_pending_vectors = do_interrupt_requests, }; static int __init svm_init(void) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 713f78a8959..fa4277d520c 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include @@ -355,8 +354,10 @@ static void load_transition_efer(struct vcpu_vmx *vmx) vmx->vcpu.stat.efer_reload++; } -static void vmx_save_host_state(struct vcpu_vmx *vmx) +static void vmx_save_host_state(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + if (vmx->host_state.loaded) return; @@ -1598,6 +1599,13 @@ out: return ret; } +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmx_vcpu_setup(vmx); +} + static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq) { u16 ent[2]; @@ -2019,20 +2027,6 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu, return 1; } -static void post_kvm_run_save(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; - kvm_run->cr8 = get_cr8(vcpu); - kvm_run->apic_base = kvm_get_apic_base(vcpu); - if (irqchip_in_kernel(vcpu->kvm)) - kvm_run->ready_for_interrupt_injection = 1; - else - kvm_run->ready_for_interrupt_injection = - (vcpu->interrupt_window_open && - vcpu->irq_summary == 0); -} - static int handle_interrupt_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { @@ -2123,21 +2117,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } -/* - * Check if userspace requested an interrupt window, and that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ -static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) -{ - return (!vcpu->irq_summary && - kvm_run->request_interrupt_window && - vcpu->interrupt_window_open && - (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); -} - static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { } @@ -2214,59 +2193,15 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) enable_irq_window(vcpu); } -static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int r; - - if (unlikely(vcpu->mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) { - printk("vcpu %d received sipi with vector # %x\n", - vcpu->vcpu_id, vcpu->sipi_vector); - kvm_lapic_reset(vcpu); - vmx_vcpu_setup(vmx); - vcpu->mp_state = VCPU_MP_STATE_RUNNABLE; - } - -preempted: - if (vcpu->guest_debug.enabled) - kvm_guest_debug_pre(vcpu); - -again: - r = kvm_mmu_reload(vcpu); - if (unlikely(r)) - goto out; - - preempt_disable(); - - vmx_save_host_state(vmx); - kvm_load_guest_fpu(vcpu); /* * Loading guest fpu may have cleared host cr0.ts */ vmcs_writel(HOST_CR0, read_cr0()); - local_irq_disable(); - - if (signal_pending(current)) { - local_irq_enable(); - preempt_enable(); - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.signal_exits; - goto out; - } - - if (irqchip_in_kernel(vcpu->kvm)) - vmx_intr_assist(vcpu); - else if (!vcpu->mmio_read_completed) - do_interrupt_requests(vcpu, kvm_run); - - vcpu->guest_mode = 1; - if (vcpu->requests) - if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests)) - vmx_flush_tlb(vcpu); - asm ( /* Store host registers */ #ifdef CONFIG_X86_64 @@ -2383,46 +2318,10 @@ again: [cr2]"i"(offsetof(struct kvm_vcpu, cr2)) : "cc", "memory" ); - vcpu->guest_mode = 0; - local_irq_enable(); - - ++vcpu->stat.exits; - vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; - - preempt_enable(); - - /* - * Profile KVM exit RIPs: - */ - if (unlikely(prof_on == KVM_PROFILING)) - profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); - - r = kvm_handle_exit(kvm_run, vcpu); - if (r > 0) { - if (dm_request_for_irq_injection(vcpu, kvm_run)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.request_irq_exits; - goto out; - } - if (!need_resched()) { - ++vcpu->stat.light_exits; - goto again; - } - } - -out: - if (r > 0) { - kvm_resched(vcpu); - goto preempted; - } - - post_kvm_run_save(vcpu, kvm_run); - return r; } static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, @@ -2560,12 +2459,15 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, + .vcpu_reset = vmx_vcpu_reset, + .prepare_guest_switch = vmx_save_host_state, .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, .vcpu_decache = vmx_vcpu_decache, .set_guest_debug = set_guest_debug, + .guest_debug_pre = kvm_guest_debug_pre, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, @@ -2594,10 +2496,13 @@ static struct kvm_x86_ops vmx_x86_ops = { .inject_gp = vmx_inject_gp, .run = vmx_vcpu_run, + .handle_exit = kvm_handle_exit, .skip_emulated_instruction = skip_emulated_instruction, .patch_hypercall = vmx_patch_hypercall, .get_irq = vmx_get_irq, .set_irq = vmx_inject_irq, + .inject_pending_irq = vmx_intr_assist, + .inject_pending_vectors = do_interrupt_requests, }; static int __init vmx_init(void) -- cgit v1.2.3 From bfdaab090386e7dda8c442721eeb91179258dad4 Mon Sep 17 00:00:00 2001 From: "He, Qing" Date: Wed, 12 Sep 2007 14:18:28 +0800 Subject: KVM: VMX: Fix exit qualification width on i386 According to Intel Software Developer's Manual, Vol. 3B, Appendix H.4.2, exit qualification should be of natural width. However, current code uses u64 as the data type for this register, which occasionally introduces invalid value to VMExit handling logics. This patch fixes this bug. I have tested Windows and Linux guest on i386 host, and they can boot successfully with this patch. Signed-off-by: Qing He Signed-off-by: Avi Kivity --- drivers/kvm/vmx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index fa4277d520c..c44c9ac79ca 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1840,12 +1840,12 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u64 exit_qualification; + unsigned long exit_qualification; int size, down, in, string, rep; unsigned port; ++vcpu->stat.io_exits; - exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); string = (exit_qualification & 16) != 0; if (string) { @@ -1877,11 +1877,11 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u64 exit_qualification; + unsigned long exit_qualification; int cr; int reg; - exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); cr = exit_qualification & 15; reg = (exit_qualification >> 8) & 15; switch ((exit_qualification >> 4) & 3) { @@ -1950,7 +1950,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u64 exit_qualification; + unsigned long exit_qualification; unsigned long val; int dr, reg; @@ -1958,7 +1958,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) * FIXME: this code assumes the host is debugging the guest. * need to deal with guest debugging itself too. */ - exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); dr = exit_qualification & 7; reg = (exit_qualification >> 8) & 15; vcpu_load_rsp_rip(vcpu); -- cgit v1.2.3 From 7d316911632acb8ba8cb7c1dd318ba723c9f1d50 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Tue, 28 Aug 2007 17:58:52 -0700 Subject: KVM: x86 emulator: push imm8 Implement the instruction push imm8 opcode: 0x6a Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 86171ca794d..4fc2da6aae7 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -104,10 +104,11 @@ static u8 opcode_table[256] = { /* 0x58 - 0x5F */ ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - /* 0x60 - 0x6B */ + /* 0x60 - 0x67 */ 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , - 0, 0, 0, 0, 0, 0, 0, 0, - /* 0x6C - 0x6F */ + 0, 0, 0, 0, + /* 0x68 - 0x6F */ + 0, 0, ImplicitOps|Mov, 0, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ @@ -919,6 +920,16 @@ done_prefixes: goto cannot_emulate; dst.val = (s32) src.val; break; + case 0x6a: /* push imm8 */ + src.val = 0L; + src.val = insn_fetch(s8, 1, _eip); +push: + dst.type = OP_MEM; + dst.bytes = op_bytes; + dst.val = src.val; + register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes); + dst.ptr = register_address(ctxt->ss_base, _regs[VCPU_REGS_RSP]); + break; case 0x80 ... 0x83: /* Grp1 */ switch (modrm_reg) { case 0: -- cgit v1.2.3 From f6eed39135c03d39ff4095b1ddd947672469dfee Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Tue, 28 Aug 2007 18:08:37 -0700 Subject: KVM: x86 emulator: call near Implement emulation of instruction opcode: 0xe8 call (near) Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 4fc2da6aae7..9f1772fd748 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -150,7 +150,7 @@ static u8 opcode_table[256] = { /* 0xE0 - 0xE7 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE8 - 0xEF */ - 0, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0, + ImplicitOps, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, ImplicitOps, 0, @@ -1033,6 +1033,26 @@ push: case 0xd2 ... 0xd3: /* Grp2 */ src.val = _regs[VCPU_REGS_RCX]; goto grp2; + case 0xe8: /* call (near) */ { + long int rel; + switch (op_bytes) { + case 2: + rel = insn_fetch(s16, 2, _eip); + break; + case 4: + rel = insn_fetch(s32, 4, _eip); + break; + case 8: + rel = insn_fetch(s64, 8, _eip); + break; + default: + DPRINTF("Call: Invalid op_bytes\n"); + goto cannot_emulate; + } + src.val = (unsigned long) _eip; + JMP_REL(rel); + goto push; + } case 0xe9: /* jmp rel */ case 0xeb: /* jmp rel short */ JMP_REL(src.val); -- cgit v1.2.3 From fd2a76086527cbe074b08a8820253228cd919ece Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Tue, 28 Aug 2007 18:22:47 -0700 Subject: KVM: x86 emulator: pushf Implement emulation of instruction pushf opcode: 0x9c Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 9f1772fd748..18c2b2cea60 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -123,7 +123,7 @@ static u8 opcode_table[256] = { ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 0, 0, 0, DstMem | SrcNone | ModRM | Mov, /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, 0, 0, 0, /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov, ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov, @@ -928,7 +928,8 @@ push: dst.bytes = op_bytes; dst.val = src.val; register_address_increment(_regs[VCPU_REGS_RSP], -op_bytes); - dst.ptr = register_address(ctxt->ss_base, _regs[VCPU_REGS_RSP]); + dst.ptr = (void *) register_address(ctxt->ss_base, + _regs[VCPU_REGS_RSP]); break; case 0x80 ... 0x83: /* Grp1 */ switch (modrm_reg) { @@ -1216,6 +1217,12 @@ special_insn: ) == 0) return -1; return 0; + + case 0x9c: /* pushf */ + src.val = (unsigned long) _eflags; + goto push; + break; + } if (rep_prefix) { if (_regs[VCPU_REGS_RCX] == 0) { -- cgit v1.2.3 From 054b1369679fb97582fc77f25a700d4290ff3e89 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 12 Sep 2007 13:21:09 +0300 Subject: KVM: Improve emulation failure reporting Report failed opcodes from all locations. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/kvm_main.c | 16 ++++++++-------- drivers/kvm/svm.c | 2 +- drivers/kvm/vmx.c | 2 +- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index d93ab48424c..ad0813843ad 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -558,6 +558,7 @@ enum emulation_result { int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long cr2, u16 error_code); +void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index e17b433152c..3b046507ebc 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1240,25 +1240,25 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) return X86EMUL_CONTINUE; } -static void report_emulation_failure(struct x86_emulate_ctxt *ctxt) +void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) { static int reported; u8 opcodes[4]; - unsigned long rip = ctxt->vcpu->rip; + unsigned long rip = vcpu->rip; unsigned long rip_linear; - rip_linear = rip + get_segment_base(ctxt->vcpu, VCPU_SREG_CS); + rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); if (reported) return; - emulator_read_std(rip_linear, (void *)opcodes, 4, ctxt->vcpu); + emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); - printk(KERN_ERR "emulation failed but !mmio_needed?" - " rip %lx %02x %02x %02x %02x\n", - rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); + printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", + context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); reported = 1; } +EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); struct x86_emulate_ops emulate_ops = { .read_std = emulator_read_std, @@ -1323,7 +1323,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) return EMULATE_DONE; if (!vcpu->mmio_needed) { - report_emulation_failure(&emulate_ctxt); + kvm_report_emulation_failure(vcpu, "mmio"); return EMULATE_FAIL; } return EMULATE_DO_MMIO; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 95681ea1638..729f1cd9360 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -960,7 +960,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) ++svm->vcpu.stat.mmio_exits; return 0; case EMULATE_FAIL: - vcpu_printf(&svm->vcpu, "%s: emulate fail\n", __FUNCTION__); + kvm_report_emulation_failure(&svm->vcpu, "pagetable"); break; default: BUG(); diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index c44c9ac79ca..4f115a8e45e 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1798,7 +1798,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ++vcpu->stat.mmio_exits; return 0; case EMULATE_FAIL: - vcpu_printf(vcpu, "%s: emulate fail\n", __FUNCTION__); + kvm_report_emulation_failure(vcpu, "pagetable"); break; default: BUG(); -- cgit v1.2.3 From 7de752482c71e1ef72ac9650deaeb6d293b8416d Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sat, 15 Sep 2007 10:13:07 +0300 Subject: KVM: x86 emulator: sort opcodes into ascending order Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 99 +++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 18c2b2cea60..e4ce34c52ba 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -978,19 +978,8 @@ push: dst.val = src.val; lock_prefix = 1; break; - case 0xa0 ... 0xa1: /* mov */ - dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; - dst.val = src.val; - _eip += ad_bytes; /* skip src displacement */ - break; - case 0xa2 ... 0xa3: /* mov */ - dst.val = (unsigned long)_regs[VCPU_REGS_RAX]; - _eip += ad_bytes; /* skip dst displacement */ - break; case 0x88 ... 0x8b: /* mov */ - case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ - dst.val = src.val; - break; + goto mov; case 0x8f: /* pop (sole member of Grp1a) */ /* 64-bit mode: POP always pops a 64-bit operand. */ if (mode == X86EMUL_MODE_PROT64) @@ -1001,6 +990,15 @@ push: goto done; register_address_increment(_regs[VCPU_REGS_RSP], dst.bytes); break; + case 0xa0 ... 0xa1: /* mov */ + dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; + dst.val = src.val; + _eip += ad_bytes; /* skip src displacement */ + break; + case 0xa2 ... 0xa3: /* mov */ + dst.val = (unsigned long)_regs[VCPU_REGS_RAX]; + _eip += ad_bytes; /* skip dst displacement */ + break; case 0xc0 ... 0xc1: grp2: /* Grp2 */ switch (modrm_reg) { @@ -1028,6 +1026,10 @@ push: break; } break; + case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ + mov: + dst.val = src.val; + break; case 0xd0 ... 0xd1: /* Grp2 */ src.val = 1; goto grp2; @@ -1186,6 +1188,17 @@ special_insn: dst.ptr = (void *) register_address( ctxt->ss_base, _regs[VCPU_REGS_RSP]); break; + case 0x58 ... 0x5f: /* pop reg */ + dst.ptr = (unsigned long *)&_regs[b & 0x7]; + pop_instruction: + if ((rc = ops->read_std(register_address(ctxt->ss_base, + _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt->vcpu)) + != 0) + goto done; + + register_address_increment(_regs[VCPU_REGS_RSP], op_bytes); + no_wb = 1; /* Disable writeback. */ + break; case 0x6c: /* insb */ case 0x6d: /* insw/insd */ if (kvm_emulate_pio_string(ctxt->vcpu, NULL, @@ -1217,12 +1230,15 @@ special_insn: ) == 0) return -1; return 0; - case 0x9c: /* pushf */ src.val = (unsigned long) _eflags; goto push; - break; - + case 0xc3: /* ret */ + dst.ptr = &_eip; + goto pop_instruction; + case 0xf4: /* hlt */ + ctxt->vcpu->halt_request = 1; + goto done; } if (rep_prefix) { if (_regs[VCPU_REGS_RCX] == 0) { @@ -1271,24 +1287,7 @@ special_insn: case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); goto cannot_emulate; - case 0xf4: /* hlt */ - ctxt->vcpu->halt_request = 1; - goto done; - case 0xc3: /* ret */ - dst.ptr = &_eip; - goto pop_instruction; - case 0x58 ... 0x5f: /* pop reg */ - dst.ptr = (unsigned long *)&_regs[b & 0x7]; - -pop_instruction: - if ((rc = ops->read_std(register_address(ctxt->ss_base, - _regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt->vcpu)) - != 0) - goto done; - register_address_increment(_regs[VCPU_REGS_RSP], op_bytes); - no_wb = 1; /* Disable writeback. */ - break; } goto writeback; @@ -1382,6 +1381,16 @@ twobyte_insn: /* Odd cmov opcodes (lsb == 1) have inverted sense. */ no_wb ^= b & 1; break; + case 0xa3: + bt: /* bt */ + src.val &= (dst.bytes << 3) - 1; /* only subword offset */ + emulate_2op_SrcV_nobyte("bt", src, dst, _eflags); + break; + case 0xab: + bts: /* bts */ + src.val &= (dst.bytes << 3) - 1; /* only subword offset */ + emulate_2op_SrcV_nobyte("bts", src, dst, _eflags); + break; case 0xb0 ... 0xb1: /* cmpxchg */ /* * Save real source value, then compare EAX against @@ -1399,30 +1408,15 @@ twobyte_insn: dst.ptr = (unsigned long *)&_regs[VCPU_REGS_RAX]; } break; - case 0xa3: - bt: /* bt */ - src.val &= (dst.bytes << 3) - 1; /* only subword offset */ - emulate_2op_SrcV_nobyte("bt", src, dst, _eflags); - break; case 0xb3: btr: /* btr */ src.val &= (dst.bytes << 3) - 1; /* only subword offset */ emulate_2op_SrcV_nobyte("btr", src, dst, _eflags); break; - case 0xab: - bts: /* bts */ - src.val &= (dst.bytes << 3) - 1; /* only subword offset */ - emulate_2op_SrcV_nobyte("bts", src, dst, _eflags); - break; case 0xb6 ... 0xb7: /* movzx */ dst.bytes = op_bytes; dst.val = (d & ByteOp) ? (u8) src.val : (u16) src.val; break; - case 0xbb: - btc: /* btc */ - src.val &= (dst.bytes << 3) - 1; /* only subword offset */ - emulate_2op_SrcV_nobyte("btc", src, dst, _eflags); - break; case 0xba: /* Grp8 */ switch (modrm_reg & 3) { case 0: @@ -1435,6 +1429,11 @@ twobyte_insn: goto btc; } break; + case 0xbb: + btc: /* btc */ + src.val &= (dst.bytes << 3) - 1; /* only subword offset */ + emulate_2op_SrcV_nobyte("btc", src, dst, _eflags); + break; case 0xbe ... 0xbf: /* movsx */ dst.bytes = op_bytes; dst.val = (d & ByteOp) ? (s8) src.val : (s16) src.val; @@ -1446,14 +1445,14 @@ twobyte_special_insn: /* Disable writeback. */ no_wb = 1; switch (b) { + case 0x06: + emulate_clts(ctxt->vcpu); + break; case 0x09: /* wbinvd */ break; case 0x0d: /* GrpP (prefetch) */ case 0x18: /* Grp16 (prefetch/nop) */ break; - case 0x06: - emulate_clts(ctxt->vcpu); - break; case 0x20: /* mov cr, reg */ if (modrm_mod != 3) goto cannot_emulate; -- cgit v1.2.3 From bbe9abbdaca366510db1f2df25f4c7b48cba38eb Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sat, 15 Sep 2007 10:23:07 +0300 Subject: KVM: x86 emulator: imlpement jump conditional relative Implement emulation of instruction: jump conditional rel opcodes: 0x0f 0x80 - 0x0f 0x8f Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index e4ce34c52ba..ba53e59f558 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -188,7 +188,10 @@ static u16 twobyte_table[256] = { /* 0x70 - 0x7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x8F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ @@ -479,6 +482,41 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, return rc; } +static int test_cc(unsigned int condition, unsigned int flags) +{ + int rc = 0; + + switch ((condition & 15) >> 1) { + case 0: /* o */ + rc |= (flags & EFLG_OF); + break; + case 1: /* b/c/nae */ + rc |= (flags & EFLG_CF); + break; + case 2: /* z/e */ + rc |= (flags & EFLG_ZF); + break; + case 3: /* be/na */ + rc |= (flags & (EFLG_CF|EFLG_ZF)); + break; + case 4: /* s */ + rc |= (flags & EFLG_SF); + break; + case 5: /* p/pe */ + rc |= (flags & EFLG_PF); + break; + case 7: /* le/ng */ + rc |= (flags & EFLG_ZF); + /* fall through */ + case 6: /* l/nge */ + rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); + break; + } + + /* Odd condition identifiers (lsb == 1) have inverted sense. */ + return (!!rc ^ (condition & 1)); +} + int x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -1486,6 +1524,27 @@ twobyte_special_insn: } rc = X86EMUL_CONTINUE; break; + case 0x80 ... 0x8f: /* jnz rel, etc*/ { + long int rel; + + switch (op_bytes) { + case 2: + rel = insn_fetch(s16, 2, _eip); + break; + case 4: + rel = insn_fetch(s32, 4, _eip); + break; + case 8: + rel = insn_fetch(s64, 8, _eip); + break; + default: + DPRINTF("jnz: Invalid op_bytes\n"); + goto cannot_emulate; + } + if (test_cc(b, _eflags)) + JMP_REL(rel); + break; + } case 0xc7: /* Grp9 (cmpxchg8b) */ { u64 old, new; -- cgit v1.2.3 From 55bebde45ec252295b08cb3990f15df2228dbf0e Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sat, 15 Sep 2007 10:25:41 +0300 Subject: KVM: X86 emulator: jump conditional short Implement emulation of more jump conditional instructions jcc shortrel opcodes: 0x70 - 0x7f Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index ba53e59f558..57f1a5ad011 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -111,8 +111,12 @@ static u8 opcode_table[256] = { 0, 0, ImplicitOps|Mov, 0, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ - /* 0x70 - 0x7F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 - 0x77 */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + /* 0x78 - 0x7F */ + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, /* 0x80 - 0x87 */ ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, @@ -1268,6 +1272,13 @@ special_insn: ) == 0) return -1; return 0; + case 0x70 ... 0x7f: /* jcc (short) */ { + int rel = insn_fetch(s8, 1, _eip); + + if (test_cc(b, _eflags)) + JMP_REL(rel); + break; + } case 0x9c: /* pushf */ src.val = (unsigned long) _eflags; goto push; -- cgit v1.2.3 From 7e0b54b149315743f5743dbc0cf758012682024e Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sat, 15 Sep 2007 10:35:36 +0300 Subject: KVM: x86 emulator: lea Implement emulation of instruction lea r16/r32, m opcode: 0x8d: Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 57f1a5ad011..4c78a4ff868 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -125,7 +125,7 @@ static u8 opcode_table[256] = { /* 0x88 - 0x8F */ ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, - 0, 0, 0, DstMem | SrcNone | ModRM | Mov, + 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, 0, 0, 0, /* 0xA0 - 0xA7 */ @@ -1022,6 +1022,9 @@ push: break; case 0x88 ... 0x8b: /* mov */ goto mov; + case 0x8d: /* lea r16/r32, m */ + dst.val = modrm_val; + break; case 0x8f: /* pop (sole member of Grp1a) */ /* 64-bit mode: POP always pops a 64-bit operand. */ if (mode == X86EMUL_MODE_PROT64) -- cgit v1.2.3 From 26a3e983d154beca544afd36f293cfef10657f24 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sat, 15 Sep 2007 10:41:26 +0300 Subject: KVM: x86 emulator: jmp abs Implement emulation of instruction: jump absolute r/m opcode: 0xff /4 Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 4c78a4ff868..cf8db670df7 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -1148,6 +1148,12 @@ push: case 1: /* dec */ emulate_1op("dec", dst, _eflags); break; + case 4: /* jmp abs */ + if (b == 0xff) + _eip = dst.val; + else + goto cannot_emulate; + break; case 6: /* push */ /* 64-bit mode: PUSH always pushes a 64-bit operand. */ if (mode == X86EMUL_MODE_PROT64) { -- cgit v1.2.3 From 12fa272e315af43d549ce22f9988392d0121350e Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sat, 15 Sep 2007 10:43:33 +0300 Subject: KVM: x86 emulator: fix src, dst value initialization Some operand fetches are less than the machine word size and can result in stale bits if used together with operands of different sizes. Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index cf8db670df7..7360a71094f 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -827,6 +827,7 @@ done_prefixes: srcmem_common: src.type = OP_MEM; src.ptr = (unsigned long *)cr2; + src.val = 0; if ((rc = ops->read_emulated((unsigned long)src.ptr, &src.val, src.bytes, ctxt->vcpu)) != 0) goto done; @@ -891,6 +892,7 @@ done_prefixes: dst.type = OP_MEM; dst.ptr = (unsigned long *)cr2; dst.bytes = (d & ByteOp) ? 1 : op_bytes; + dst.val = 0; if (d & BitOp) { unsigned long mask = ~(dst.bytes * 8 - 1); -- cgit v1.2.3 From 535eabcf0e55804b53d27fe45217d874b19bcfa9 Mon Sep 17 00:00:00 2001 From: Nitin A Kamble Date: Sat, 15 Sep 2007 10:45:05 +0300 Subject: KVM: x86 emulator: popf Implement emulation of instruction: popf opcode: 0x9d Signed-off-by: Nitin A Kamble Signed-off-by: Avi Kivity --- drivers/kvm/x86_emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c index 7360a71094f..9737c3b2f48 100644 --- a/drivers/kvm/x86_emulate.c +++ b/drivers/kvm/x86_emulate.c @@ -127,7 +127,7 @@ static u8 opcode_table[256] = { ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov, /* 0x90 - 0x9F */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 0, /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov, ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov, @@ -1293,6 +1293,9 @@ special_insn: case 0x9c: /* pushf */ src.val = (unsigned long) _eflags; goto push; + case 0x9d: /* popf */ + dst.ptr = (unsigned long *) &_eflags; + goto pop_instruction; case 0xc3: /* ret */ dst.ptr = &_eip; goto pop_instruction; -- cgit v1.2.3 From 0967b7bf1c22b55777aba46ff616547feed0b141 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 15 Sep 2007 17:34:36 +0300 Subject: KVM: Skip pio instruction when it is emulated, not executed If we defer updating rip until pio instructions are executed, we have a problem with reset: a pio reset updates rip, and when the instruction completes we skip the emulated instruction, pointing rip somewhere completely unrelated. Fix by updating rip when we see decode the instruction, not after emulation. Signed-off-by: Avi Kivity --- drivers/kvm/kvm_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 3b046507ebc..353e58527d1 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -1815,8 +1815,6 @@ static int complete_pio(struct kvm_vcpu *vcpu) io->count -= io->cur_count; io->cur_count = 0; - if (!io->count) - kvm_x86_ops->skip_emulated_instruction(vcpu); return 0; } @@ -1876,6 +1874,8 @@ int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in, memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); kvm_x86_ops->decache_regs(vcpu); + kvm_x86_ops->skip_emulated_instruction(vcpu); + pio_dev = vcpu_find_pio_dev(vcpu, port); if (pio_dev) { kernel_pio(pio_dev, vcpu, vcpu->pio_data); @@ -1938,6 +1938,9 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, vcpu->run->io.count = now; vcpu->pio.cur_count = now; + if (vcpu->pio.cur_count == vcpu->pio.count) + kvm_x86_ops->skip_emulated_instruction(vcpu); + for (i = 0; i < nr_pages; ++i) { mutex_lock(&vcpu->kvm->lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); -- cgit v1.2.3 From 8a45450d0a559912873428077908f9bc1411042c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 10 Oct 2007 14:03:16 +0200 Subject: KVM: Replace enum by #define Easier for existence test (#ifdef) in userspace. Signed-off-by: Avi Kivity --- include/linux/kvm.h | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 30a83696906..057a7f34ee3 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -90,11 +90,9 @@ struct kvm_ioapic_state { } redirtbl[KVM_IOAPIC_NUM_PINS]; }; -enum kvm_irqchip_id { - KVM_IRQCHIP_PIC_MASTER = 0, - KVM_IRQCHIP_PIC_SLAVE = 1, - KVM_IRQCHIP_IOAPIC = 2, -}; +#define KVM_IRQCHIP_PIC_MASTER 0 +#define KVM_IRQCHIP_PIC_SLAVE 1 +#define KVM_IRQCHIP_IOAPIC 2 struct kvm_irqchip { __u32 chip_id; @@ -106,20 +104,18 @@ struct kvm_irqchip { } chip; }; -enum kvm_exit_reason { - KVM_EXIT_UNKNOWN = 0, - KVM_EXIT_EXCEPTION = 1, - KVM_EXIT_IO = 2, - KVM_EXIT_HYPERCALL = 3, - KVM_EXIT_DEBUG = 4, - KVM_EXIT_HLT = 5, - KVM_EXIT_MMIO = 6, - KVM_EXIT_IRQ_WINDOW_OPEN = 7, - KVM_EXIT_SHUTDOWN = 8, - KVM_EXIT_FAIL_ENTRY = 9, - KVM_EXIT_INTR = 10, - KVM_EXIT_SET_TPR = 11 -}; +#define KVM_EXIT_UNKNOWN 0 +#define KVM_EXIT_EXCEPTION 1 +#define KVM_EXIT_IO 2 +#define KVM_EXIT_HYPERCALL 3 +#define KVM_EXIT_DEBUG 4 +#define KVM_EXIT_HLT 5 +#define KVM_EXIT_MMIO 6 +#define KVM_EXIT_IRQ_WINDOW_OPEN 7 +#define KVM_EXIT_SHUTDOWN 8 +#define KVM_EXIT_FAIL_ENTRY 9 +#define KVM_EXIT_INTR 10 +#define KVM_EXIT_SET_TPR 11 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { -- cgit v1.2.3