From 7c68af6e32c73992bad24107311f3433c89016e2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Sep 2009 09:40:22 +0300 Subject: core, x86: Add user return notifiers Add a general per-cpu notifier that is called whenever the kernel is about to return to userspace. The notifier uses a thread_info flag and existing checks, so there is no impact on user return or context switch fast paths. This will be used initially to speed up KVM task switching by lazily updating MSRs. Signed-off-by: Avi Kivity LKML-Reference: <1253342422-13811-1-git-send-email-avi@redhat.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/thread_info.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index d27d0a2fec4..375c917c37d 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,6 +83,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -107,6 +108,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -142,13 +144,14 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ + _TIF_USER_RETURN_NOTIFY) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) -#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW +#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) #define PREEMPT_ACTIVE 0x10000000 -- cgit v1.2.3 From 851ba6922ac575b749f63dee0ae072808163ba6a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 24 Aug 2009 11:10:17 +0300 Subject: KVM: Don't pass kvm_run arguments They're just copies of vcpu->run, which is readily accessible. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d83892226f7..0b113f2b58c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -506,8 +506,8 @@ struct kvm_x86_ops { void (*tlb_flush)(struct kvm_vcpu *vcpu); - void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); - int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); + void (*run)(struct kvm_vcpu *vcpu); + int (*handle_exit)(struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); @@ -568,7 +568,7 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, +int emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, u16 error_code, int emulation_type); void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); @@ -585,9 +585,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); struct x86_emulate_ctxt; -int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, +int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port); -int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, int size, unsigned long count, int down, gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 1a6e4a8c276e122dbeb6f9c610f29735e4236bfd Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:19 +0300 Subject: KVM: Move irq sharing information to irqchip level This removes assumptions that max GSIs is smaller than number of pins. Sharing is tracked on pin level not GSI level. [avi: no PIC on ia64] Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0b113f2b58c..35d3236c9de 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,7 +410,6 @@ struct kvm_arch{ gpa_t ept_identity_map_addr; unsigned long irq_sources_bitmap; - unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; u64 vm_init_tsc; }; -- cgit v1.2.3 From 3e71f88bc90792a187703860cf22fbed7c12cbd9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:21 +0300 Subject: KVM: Maintain back mapping from irqchip/pin to gsi Maintain back mapping from irqchip/pin to gsi to speedup interrupt acknowledgment notifications. [avi: build fix on non-x86/ia64] Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 4a5fe914dc5..f02e87a5206 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -79,6 +79,7 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_PIC_MASTER 0 #define KVM_IRQCHIP_PIC_SLAVE 1 #define KVM_IRQCHIP_IOAPIC 2 +#define KVM_NR_IRQCHIPS 3 /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { -- cgit v1.2.3 From 136bdfeee7b5bc986fc94af3a40d7d13ea37bb95 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:23 +0300 Subject: KVM: Move irq ack notifier list to arch independent code Mask irq notifier list is already there. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 35d3236c9de..a46e2dd9aca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -397,7 +397,6 @@ struct kvm_arch{ struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; - struct hlist_head irq_ack_notifier_list; int vapics_in_nmi_mode; unsigned int tss_addr; -- cgit v1.2.3 From 10474ae8945ce08622fd1f3464e55bd817bf2376 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 15 Sep 2009 11:37:46 +0200 Subject: KVM: Activate Virtualization On Demand X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. To circumvent this problem at least a bit, this patch introduces on demand activation of virtualization. This means, that instead virtualization is enabled on creation of the first virtual machine and disabled on destruction of the last one. So using this, KVM can be easily autoloaded, while keeping other hypervisors usable. Signed-off-by: Alexander Graf Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a46e2dd9aca..295c7c4d9c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -459,7 +459,7 @@ struct descriptor_table { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - void (*hardware_enable)(void *dummy); /* __init */ + int (*hardware_enable)(void *dummy); void (*hardware_disable)(void *dummy); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ -- cgit v1.2.3 From 355be0b9300579e02275d7d19374806a974ce622 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 3 Oct 2009 00:31:21 +0200 Subject: KVM: x86: Refactor guest debug IOCTL handling Much of so far vendor-specific code for setting up guest debug can actually be handled by the generic code. This also fixes a minor deficit in the SVM part /wrt processing KVM_GUESTDBG_ENABLE. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 295c7c4d9c9..e7f87083260 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -475,8 +475,8 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - int (*set_guest_debug)(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg); + void (*set_guest_debug)(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *dbg); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); -- cgit v1.2.3 From 91586a3b7d79432772a3cdcb81473cd08a237c79 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 5 Oct 2009 13:07:21 +0200 Subject: KVM: x86: Rework guest single-step flag injection and filtering Push TF and RF injection and filtering on guest single-stepping into the vender get/set_rflags callbacks. This makes the whole mechanism more robust wrt user space IOCTL order and instruction emulations. Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e7f87083260..179a919f53a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -614,6 +614,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); +unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); +void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); + void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, -- cgit v1.2.3 From 4b8d54f9726f1159330201c5ed2ea30bce7e63ea Mon Sep 17 00:00:00 2001 From: "Zhai, Edwin" Date: Fri, 9 Oct 2009 18:03:20 +0800 Subject: KVM: VMX: Add support for Pause-Loop Exiting New NHM processors will support Pause-Loop Exiting by adding 2 VM-execution control fields: PLE_Gap - upper bound on the amount of time between two successive executions of PAUSE in a loop. PLE_Window - upper bound on the amount of time a guest is allowed to execute in a PAUSE loop If the time, between this execution of PAUSE and previous one, exceeds the PLE_Gap, processor consider this PAUSE belongs to a new loop. Otherwise, processor determins the the total execution time of this loop(since 1st PAUSE in this loop), and triggers a VM exit if total time exceeds the PLE_Window. * Refer SDM volume 3b section 21.6.13 & 22.1.3. Pause-Loop Exiting can be used to detect Lock-Holder Preemption, where one VP is sched-out after hold a spinlock, then other VPs for same lock are sched-in to waste the CPU time. Our tests indicate that most spinlocks are held for less than 212 cycles. Performance tests show that with 2X LP over-commitment we can get +2% perf improvement for kernel build(Even more perf gain with more LPs). Signed-off-by: Zhai Edwin Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 272514c2d45..2b4945419a8 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -56,6 +56,7 @@ #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define PIN_BASED_EXT_INTR_MASK 0x00000001 @@ -144,6 +145,8 @@ enum vmcs_field { VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, TPR_THRESHOLD = 0x0000401c, SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, VM_INSTRUCTION_ERROR = 0x00004400, VM_EXIT_REASON = 0x00004402, VM_EXIT_INTR_INFO = 0x00004404, @@ -248,6 +251,7 @@ enum vmcs_field { #define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 #define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 -- cgit v1.2.3 From 565d0998ecac8373b9a9ecd5991abe74318cd235 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 6 Oct 2009 14:25:02 -0500 Subject: KVM: SVM: Support Pause Filter in AMD processors New AMD processors (Family 0x10 models 8+) support the Pause Filter Feature. This feature creates a new field in the VMCB called Pause Filter Count. If Pause Filter Count is greater than 0 and intercepting PAUSEs is enabled, the processor will increment an internal counter when a PAUSE instruction occurs instead of intercepting. When the internal counter reaches the Pause Filter Count value, a PAUSE intercept will occur. This feature can be used to detect contended spinlocks, especially when the lock holding VCPU is not scheduled. Rescheduling another VCPU prevents the VCPU seeking the lock from wasting its quantum by spinning idly. Experimental results show that most spinlocks are held for less than 1000 PAUSE cycles or more than a few thousand. Default the Pause Filter Counter to 3000 to detect the contended spinlocks. Processor support for this feature is indicated by a CPUID bit. On a 24 core system running 4 guests each with 16 VCPUs, this patch improved overall performance of each guest's 32 job kernbench by approximately 3-5% when combined with a scheduler algorithm thati caused the VCPU to sleep for a brief period. Further performance improvement may be possible with a more sophisticated yield algorithm. Signed-off-by: Mark Langsdorf Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/svm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 85574b7c1bc..1fecb7e6113 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u16 intercept_dr_write; u32 intercept_exceptions; u64 intercept; - u8 reserved_1[44]; + u8 reserved_1[42]; + u16 pause_filter_count; u64 iopm_base_pa; u64 msrpm_base_pa; u64 tsc_offset; -- cgit v1.2.3 From ffde22ac53b6d6b1d7206f1172176a667eead778 Mon Sep 17 00:00:00 2001 From: Ed Swierk Date: Thu, 15 Oct 2009 15:21:43 -0700 Subject: KVM: Xen PV-on-HVM guest support Support for Xen PV-on-HVM guests can be implemented almost entirely in userspace, except for handling one annoying MSR that maps a Xen hypercall blob into guest address space. A generic mechanism to delegate MSR writes to userspace seems overkill and risks encouraging similar MSR abuse in the future. Thus this patch adds special support for the Xen HVM MSR. I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell KVM which MSR the guest will write to, as well as the starting address and size of the hypercall blobs (one each for 32-bit and 64-bit) that userspace has loaded from files. When the guest writes to the MSR, KVM copies one page of the blob from userspace to the guest. I've tested this patch with a hacked-up version of Gerd's userspace code, booting a number of guests (CentOS 5.3 i386 and x86_64, and FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices. [jan: fix i386 build warning] [avi: future proof abi with a flags field] Signed-off-by: Ed Swierk Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 1 + arch/x86/include/asm/kvm_host.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f02e87a5206..ef9b4b73cce 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -19,6 +19,7 @@ #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 +#define __KVM_HAVE_XEN_HVM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 179a919f53a..36f3b53f5c2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,6 +410,8 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + + struct kvm_xen_hvm_config xen_hvm_config; }; struct kvm_vm_stat { -- cgit v1.2.3 From 94fe45da48f921d01d8ff02a0ad54ee9c326d7f0 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 18 Oct 2009 13:24:44 +0200 Subject: KVM: x86: Fix guest single-stepping while interruptible Commit 705c5323 opened the doors of hell by unconditionally injecting single-step flags as long as guest_debug signaled this. This doesn't work when the guest branches into some interrupt or exception handler and triggers a vmexit with flag reloading. Fix it by saving cs:rip when user space requests single-stepping and restricting the trace flag injection to this guest code position. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 36f3b53f5c2..2536fbd85b3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -371,6 +371,10 @@ struct kvm_vcpu_arch { u64 mcg_status; u64 mcg_ctl; u64 *mce_banks; + + /* used for guest single stepping over the given code position */ + u16 singlestep_cs; + unsigned long singlestep_rip; }; struct kvm_mem_alias { -- cgit v1.2.3 From 6be7d3062b59af891be7e40c6802350de5f78cef Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 18 Oct 2009 13:24:54 +0200 Subject: KVM: SVM: Cleanup NMI singlestep Push the NMI-related singlestep variable into vcpu_svm. It's dealing with an AMD-specific deficit, nothing generic for x86. Acked-by: Gleb Natapov Signed-off-by: Jan Kiszka arch/x86/include/asm/kvm_host.h | 1 - arch/x86/kvm/svm.c | 12 +++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2536fbd85b3..4d994ad5051 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -354,7 +354,6 @@ struct kvm_vcpu_arch { unsigned int time_offset; struct page *time_page; - bool singlestep; /* guest is single stepped by KVM */ bool nmi_pending; bool nmi_injected; -- cgit v1.2.3 From afbcf7ab8d1bc8c2d04792f6d9e786e0adeb328d Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 16 Oct 2009 15:28:36 -0400 Subject: KVM: allow userspace to adjust kvmclock offset When we migrate a kvm guest that uses pvclock between two hosts, we may suffer a large skew. This is because there can be significant differences between the monotonic clock of the hosts involved. When a new host with a much larger monotonic time starts running the guest, the view of time will be significantly impacted. Situation is much worse when we do the opposite, and migrate to a host with a smaller monotonic clock. This proposed ioctl will allow userspace to inform us what is the monotonic clock value in the source host, so we can keep the time skew short, and more importantly, never goes backwards. Userspace may also need to trigger the current data, since from the first migration onwards, it won't be reflected by a simple call to clock_gettime() anymore. [marcelo: future-proof abi with a flags field] [jan: fix KVM_GET_CLOCK by clearing flags field instead of checking it] Signed-off-by: Glauber Costa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4d994ad5051..0558ff8c32a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -413,6 +413,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + s64 kvmclock_offset; struct kvm_xen_hvm_config xen_hvm_config; }; -- cgit v1.2.3 From 18863bdd60f895f3b3ba16b15e8331aee781e8ec Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 7 Sep 2009 11:12:18 +0300 Subject: KVM: x86 shared msr infrastructure The various syscall-related MSRs are fairly expensive to switch. Currently we switch them on every vcpu preemption, which is far too often: - if we're switching to a kernel thread (idle task, threaded interrupt, kernel-mode virtio server (vhost-net), for example) and back, then there's no need to switch those MSRs since kernel threasd won't be exiting to userspace. - if we're switching to another guest running an identical OS, most likely those MSRs will have the same value, so there's little point in reloading them. - if we're running the same OS on the guest and host, the MSRs will have identical values and reloading is unnecessary. This patch uses the new user return notifiers to implement last-minute switching, and checks the msr values to avoid unnecessary reloading. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0558ff8c32a..26a74b7bb6b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -809,4 +809,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); +void kvm_define_shared_msr(unsigned index, u32 msr); +void kvm_set_shared_msr(unsigned index, u64 val); + #endif /* _ASM_X86_KVM_HOST_H */ -- cgit v1.2.3 From 3cfc3092f40bc37c57ba556cfd8de4218f2135ab Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 12 Nov 2009 01:04:25 +0100 Subject: KVM: x86: Add KVM_GET/SET_VCPU_EVENTS This new IOCTL exports all yet user-invisible states related to exceptions, interrupts, and NMIs. Together with appropriate user space changes, this fixes sporadic problems of vmsave/restore, live migration and system reset. [avi: future-proof abi by adding a flags field] Signed-off-by: Jan Kiszka Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm.h | 28 ++++++++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ 2 files changed, 30 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ef9b4b73cce..950df434763 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -20,6 +20,7 @@ #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM +#define __KVM_HAVE_VCPU_EVENTS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -252,4 +253,31 @@ struct kvm_reinject_control { __u8 pit_reinject; __u8 reserved[31]; }; + +/* for KVM_GET/SET_VCPU_EVENTS */ +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 pad; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; + __u32 reserved[10]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26a74b7bb6b..06e085614da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -523,6 +523,8 @@ struct kvm_x86_ops { bool has_error_code, u32 error_code); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu); + bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); + void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); -- cgit v1.2.3 From eb3c79e64a70fb8f7473e30fa07e89c1ecc2c9bb Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 24 Nov 2009 15:20:15 +0200 Subject: KVM: x86 emulator: limit instructions to 15 bytes While we are never normally passed an instruction that exceeds 15 bytes, smp games can cause us to attempt to interpret one, which will cause large latencies in non-preempt hosts. Cc: stable@kernel.org Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_emulate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b7ed2c42311..7c18e1230f5 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -129,7 +129,7 @@ struct decode_cache { u8 seg_override; unsigned int d; unsigned long regs[NR_VCPU_REGS]; - unsigned long eip; + unsigned long eip, eip_orig; /* modrm */ u8 modrm; u8 modrm_mod; -- cgit v1.2.3 From d5696725b2a4c59503f5e0bc33adeee7f30cd45b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 2 Dec 2009 12:28:47 +0200 Subject: KVM: VMX: Fix comparison of guest efer with stale host value update_transition_efer() masks out some efer bits when deciding whether to switch the msr during guest entry; for example, NX is emulated using the mmu so we don't need to disable it, and LMA/LME are handled by the hardware. However, with shared msrs, the comparison is made against a stale value; at the time of the guest switch we may be running with another guest's efer. Fix by deferring the mask/compare to the actual point of guest entry. Noted by Marcelo. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06e085614da..4f865e8b854 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -812,6 +812,6 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_define_shared_msr(unsigned index, u32 msr); -void kvm_set_shared_msr(unsigned index, u64 val); +void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); #endif /* _ASM_X86_KVM_HOST_H */ -- cgit v1.2.3