aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/Kconfig.debug10
-rw-r--r--arch/i386/kernel/alternative.c63
-rw-r--r--arch/i386/kernel/entry.S39
-rw-r--r--arch/i386/kernel/module.c11
-rw-r--r--arch/i386/kernel/paravirt.c44
-rw-r--r--arch/i386/kernel/vmlinux.lds.S6
6 files changed, 148 insertions, 25 deletions
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index b31c0802e1c..f68cc6f215f 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -85,4 +85,14 @@ config DOUBLEFAULT
option saves about 4k and might cause you much additional grey
hair.
+config DEBUG_PARAVIRT
+ bool "Enable some paravirtualization debugging"
+ default y
+ depends on PARAVIRT && DEBUG_KERNEL
+ help
+ Currently deliberately clobbers regs which are allowed to be
+ clobbered in inlined paravirt hooks, even in native mode.
+ If turning this off solves a problem, then DISABLE_INTERRUPTS() or
+ ENABLE_INTERRUPTS() is lying about what registers can be clobbered.
+
endmenu
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 535f9794fba..9eca21b49f6 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -124,6 +124,20 @@ static unsigned char** find_nop_table(void)
#endif /* CONFIG_X86_64 */
+static void nop_out(void *insns, unsigned int len)
+{
+ unsigned char **noptable = find_nop_table();
+
+ while (len > 0) {
+ unsigned int noplen = len;
+ if (noplen > ASM_NOP_MAX)
+ noplen = ASM_NOP_MAX;
+ memcpy(insns, noptable[noplen], noplen);
+ insns += noplen;
+ len -= noplen;
+ }
+}
+
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -138,10 +152,9 @@ extern u8 __smp_alt_begin[], __smp_alt_end[];
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
- unsigned char **noptable = find_nop_table();
struct alt_instr *a;
u8 *instr;
- int diff, i, k;
+ int diff;
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
for (a = start; a < end; a++) {
@@ -159,13 +172,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
#endif
memcpy(instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
- /* Pad the rest with nops */
- for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
- k = diff;
- if (k > ASM_NOP_MAX)
- k = ASM_NOP_MAX;
- memcpy(a->instr + i, noptable[k], k);
- }
+ nop_out(instr + a->replacementlen, diff);
}
}
@@ -209,7 +216,6 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
{
- unsigned char **noptable = find_nop_table();
u8 **ptr;
for (ptr = start; ptr < end; ptr++) {
@@ -217,7 +223,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
continue;
if (*ptr > text_end)
continue;
- **ptr = noptable[1][0];
+ nop_out(*ptr, 1);
};
}
@@ -343,6 +349,40 @@ void alternatives_smp_switch(int smp)
#endif
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+{
+ struct paravirt_patch *p;
+
+ for (p = start; p < end; p++) {
+ unsigned int used;
+
+ used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
+ p->len);
+#ifdef CONFIG_DEBUG_PARAVIRT
+ {
+ int i;
+ /* Deliberately clobber regs using "not %reg" to find bugs. */
+ for (i = 0; i < 3; i++) {
+ if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
+ memcpy(p->instr + used, "\xf7\xd0", 2);
+ p->instr[used+1] |= i;
+ used += 2;
+ }
+ }
+ }
+#endif
+ /* Pad the rest with nops */
+ nop_out(p->instr + used, p->len - used);
+ }
+
+ /* Sync to be conservative, in case we patched following instructions */
+ sync_core();
+}
+extern struct paravirt_patch __start_parainstructions[],
+ __stop_parainstructions[];
+#endif /* CONFIG_PARAVIRT */
+
void __init alternative_instructions(void)
{
unsigned long flags;
@@ -390,5 +430,6 @@ void __init alternative_instructions(void)
alternatives_smp_switch(0);
}
#endif
+ apply_paravirt(__start_parainstructions, __stop_parainstructions);
local_irq_restore(flags);
}
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index d274612e05c..de34b7fed3c 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -53,6 +53,19 @@
#include <asm/dwarf2.h>
#include "irq_vectors.h"
+/*
+ * We use macros for low-level operations which need to be overridden
+ * for paravirtualization. The following will never clobber any registers:
+ * INTERRUPT_RETURN (aka. "iret")
+ * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
+ * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
+ *
+ * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
+ * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
+ * Allowing a register to be clobbered can shrink the paravirt replacement
+ * enough to patch inline, increasing performance.
+ */
+
#define nr_syscalls ((syscall_table_size)/4)
CF_MASK = 0x00000001
@@ -63,9 +76,9 @@ NT_MASK = 0x00004000
VM_MASK = 0x00020000
#ifdef CONFIG_PREEMPT
-#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
+#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
-#define preempt_stop
+#define preempt_stop(clobbers)
#define resume_kernel restore_nocheck
#endif
@@ -226,7 +239,7 @@ ENTRY(ret_from_fork)
ALIGN
RING0_PTREGS_FRAME
ret_from_exception:
- preempt_stop
+ preempt_stop(CLBR_ANY)
ret_from_intr:
GET_THREAD_INFO(%ebp)
check_userspace:
@@ -237,7 +250,7 @@ check_userspace:
jb resume_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
movl TI_flags(%ebp), %ecx
@@ -248,7 +261,7 @@ ENTRY(resume_userspace)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
- DISABLE_INTERRUPTS
+ DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_nocheck
need_resched:
@@ -277,7 +290,7 @@ sysenter_past_esp:
* No need to follow this irqs on/off section: the syscall
* disabled irqs and here we enable it straight after entry:
*/
- ENABLE_INTERRUPTS
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushl $(__USER_DS)
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ss, 0*/
@@ -322,7 +335,7 @@ sysenter_past_esp:
jae syscall_badsys
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp)
- DISABLE_INTERRUPTS
+ DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testw $_TIF_ALLWORK_MASK, %cx
@@ -364,7 +377,7 @@ syscall_call:
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp) # store the return value
syscall_exit:
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
TRACE_IRQS_OFF
@@ -393,7 +406,7 @@ restore_nocheck_notrace:
.section .fixup,"ax"
iret_exc:
TRACE_IRQS_ON
- ENABLE_INTERRUPTS
+ ENABLE_INTERRUPTS(CLBR_NONE)
pushl $0 # no error code
pushl $do_iret_error
jmp error_code
@@ -436,7 +449,7 @@ ldt_ss:
CFI_ADJUST_CFA_OFFSET 4
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
- DISABLE_INTERRUPTS
+ DISABLE_INTERRUPTS(CLBR_EAX)
TRACE_IRQS_OFF
lss (%esp), %esp
CFI_ADJUST_CFA_OFFSET -8
@@ -451,7 +464,7 @@ work_pending:
jz work_notifysig
work_resched:
call schedule
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
TRACE_IRQS_OFF
@@ -509,7 +522,7 @@ syscall_exit_work:
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
jz work_pending
TRACE_IRQS_ON
- ENABLE_INTERRUPTS # could let do_syscall_trace() call
+ ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
# schedule() instead
movl %esp, %eax
movl $1, %edx
@@ -693,7 +706,7 @@ ENTRY(device_not_available)
GET_CR0_INTO_EAX
testl $0x4, %eax # EM (math emulation bit)
jne device_not_available_emulate
- preempt_stop
+ preempt_stop(CLBR_ANY)
call math_state_restore
jmp ret_from_exception
device_not_available_emulate:
diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c
index 470cf97e7cd..d7d9c8b23f7 100644
--- a/arch/i386/kernel/module.c
+++ b/arch/i386/kernel/module.c
@@ -108,7 +108,8 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
- const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
+ const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+ *para = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -118,6 +119,8 @@ int module_finalize(const Elf_Ehdr *hdr,
alt = s;
if (!strcmp(".smp_locks", secstrings + s->sh_name))
locks= s;
+ if (!strcmp(".parainstructions", secstrings + s->sh_name))
+ para = s;
}
if (alt) {
@@ -132,6 +135,12 @@ int module_finalize(const Elf_Ehdr *hdr,
lseg, lseg + locks->sh_size,
tseg, tseg + text->sh_size);
}
+
+ if (para) {
+ void *pseg = (void *)para->sh_addr;
+ apply_paravirt(pseg, pseg + para->sh_size);
+ }
+
return 0;
}
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 478192cd4b9..d4646042644 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -45,6 +45,49 @@ char *memory_setup(void)
return paravirt_ops.memory_setup();
}
+/* Simple instruction patching code. */
+#define DEF_NATIVE(name, code) \
+ extern const char start_##name[], end_##name[]; \
+ asm("start_" #name ": " code "; end_" #name ":")
+DEF_NATIVE(cli, "cli");
+DEF_NATIVE(sti, "sti");
+DEF_NATIVE(popf, "push %eax; popf");
+DEF_NATIVE(pushf, "pushf; pop %eax");
+DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
+DEF_NATIVE(iret, "iret");
+DEF_NATIVE(sti_sysexit, "sti; sysexit");
+
+static const struct native_insns
+{
+ const char *start, *end;
+} native_insns[] = {
+ [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
+ [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
+ [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
+ [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
+ [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
+ [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
+ [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
+};
+
+static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
+{
+ unsigned int insn_len;
+
+ /* Don't touch it if we don't have a replacement */
+ if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
+ return len;
+
+ insn_len = native_insns[type].end - native_insns[type].start;
+
+ /* Similarly if we can't fit replacement. */
+ if (len < insn_len)
+ return len;
+
+ memcpy(insns, native_insns[type].start, insn_len);
+ return insn_len;
+}
+
static fastcall unsigned long native_get_debugreg(int regno)
{
unsigned long val = 0; /* Damn you, gcc! */
@@ -349,6 +392,7 @@ struct paravirt_ops paravirt_ops = {
.paravirt_enabled = 0,
.kernel_rpl = 0,
+ .patch = native_patch,
.banner = default_banner,
.arch_setup = native_nop,
.memory_setup = machine_specific_memory_setup,
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 6860f20aa57..5c69cf0e594 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -165,6 +165,12 @@ SECTIONS
.altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
*(.altinstr_replacement)
}
+ . = ALIGN(4);
+ __start_parainstructions = .;
+ .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+ *(.parainstructions)
+ }
+ __stop_parainstructions = .;
/* .exit.text is discard at runtime, not link time, to deal with references
from .altinstructions and .eh_frame */
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }