aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorZachary Amsden <zach@vmware.com>2005-09-03 15:56:44 -0700
committerLinus Torvalds <torvalds@evo.osdl.org>2005-09-05 00:06:12 -0700
commita5201129307f414890f9a4410e38da205f5d7359 (patch)
treead70c5f1d3b336ef5665a7fd5ad8707aaec23847 /arch
parent0998e4228aca046fbd747c3fed909791d52e88eb (diff)
[PATCH] x86: make IOPL explicit
The pushf/popf in switch_to are ONLY used to switch IOPL. Making this explicit in C code is more clear. This pushf/popf pair was added as a bugfix for leaking IOPL to unprivileged processes when using sysenter/sysexit based system calls (sysexit does not restore flags). When requesting an IOPL change in sys_iopl(), it is just as easy to change the current flags and the flags in the stack image (in case an IRET is required), but there is no reason to force an IRET if we came in from the SYSENTER path. This change is the minimal solution for supporting a paravirtualized Linux kernel that allows user processes to run with I/O privilege. Other solutions require radical rewrites of part of the low level fault / system call handling code, or do not fully support sysenter based system calls. Unfortunately, this added one field to the thread_struct. But as a bonus, on P4, the fastest time measured for switch_to() went from 312 to 260 cycles, a win of about 17% in the fast case through this performance critical path. Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/kernel/ioport.c7
-rw-r--r--arch/i386/kernel/process.c6
2 files changed, 10 insertions, 3 deletions
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c
index 8b25160393c..f2b37654777 100644
--- a/arch/i386/kernel/ioport.c
+++ b/arch/i386/kernel/ioport.c
@@ -132,6 +132,7 @@ asmlinkage long sys_iopl(unsigned long unused)
volatile struct pt_regs * regs = (struct pt_regs *) &unused;
unsigned int level = regs->ebx;
unsigned int old = (regs->eflags >> 12) & 3;
+ struct thread_struct *t = &current->thread;
if (level > 3)
return -EINVAL;
@@ -140,8 +141,8 @@ asmlinkage long sys_iopl(unsigned long unused)
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
}
- regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12);
- /* Make sure we return the long way (not sysenter) */
- set_thread_flag(TIF_IRET);
+ t->iopl = level << 12;
+ regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl;
+ set_iopl_mask(t->iopl);
return 0;
}
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 66099780039..b45cbf93d43 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -712,6 +712,12 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
loadsegment(gs, next->gs);
/*
+ * Restore IOPL if needed.
+ */
+ if (unlikely(prev->iopl != next->iopl))
+ set_iopl_mask(next->iopl);
+
+ /*
* Now maybe reload the debug registers
*/
if (unlikely(next->debugreg[7])) {