From 4bb0d3ec3e5b1e9e2399cdc641b3b6521ac9cdaa Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 3 Sep 2005 15:56:36 -0700 Subject: [PATCH] i386: inline asm cleanup i386 Inline asm cleanup. Use cr/dr accessor functions. Also, a potential bugfix. Also, some CR accessors really should be volatile. Reads from CR0 (numeric state may change in an exception handler), writes to CR4 (flipping CR4.TSD) and reads from CR2 (page fault) prevent instruction re-ordering. I did not add memory clobber to CR3 / CR4 / CR0 updates, as it was not there to begin with, and in no case should kernel memory be clobbered, except when doing a TLB flush, which already has memory clobber. I noticed that page invalidation does not have a memory clobber. I can't find a bug as a result, but there is definitely a potential for a bug here: #define __flush_tlb_single(addr) \ __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/processor.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'include/asm-i386/processor.h') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index d0d8b016009..7e17d3b4f65 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -203,9 +203,7 @@ static inline unsigned int cpuid_edx(unsigned int op) return edx; } -#define load_cr3(pgdir) \ - asm volatile("movl %0,%%cr3": :"r" (__pa(pgdir))) - +#define load_cr3(pgdir) write_cr3(__pa(pgdir)) /* * Intel CPU features in CR4 @@ -232,22 +230,20 @@ extern unsigned long mmu_cr4_features; static inline void set_in_cr4 (unsigned long mask) { + unsigned cr4; mmu_cr4_features |= mask; - __asm__("movl %%cr4,%%eax\n\t" - "orl %0,%%eax\n\t" - "movl %%eax,%%cr4\n" - : : "irg" (mask) - :"ax"); + cr4 = read_cr4(); + cr4 |= mask; + write_cr4(cr4); } static inline void clear_in_cr4 (unsigned long mask) { + unsigned cr4; mmu_cr4_features &= ~mask; - __asm__("movl %%cr4,%%eax\n\t" - "andl %0,%%eax\n\t" - "movl %%eax,%%cr4\n" - : : "irg" (~mask) - :"ax"); + cr4 = read_cr4(); + cr4 &= ~mask; + write_cr4(cr4); } /* -- cgit v1.2.3 From 245067d1674d451855692fcd4647daf9fd47f82d Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 3 Sep 2005 15:56:37 -0700 Subject: [PATCH] i386: cleanup serialize msr i386 arch cleanup. Introduce the serialize macro to serialize processor state. Why the microcode update needs it I am not quite sure, since wrmsr() is already a serializing instruction, but it is a microcode update, so I will keep the semantic the same, since this could be a timing workaround. As far as I can tell, this has always been there since the original microcode update source. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/processor.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/asm-i386/processor.h') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 7e17d3b4f65..a0489b22270 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -277,6 +277,11 @@ static inline void clear_in_cr4 (unsigned long mask) outb((data), 0x23); \ } while (0) +static inline void serialize_cpu(void) +{ + __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); +} + static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { -- cgit v1.2.3 From 4f0cb8d978ab4b6e3b40147f619f48316d9d7f63 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 3 Sep 2005 15:56:41 -0700 Subject: [PATCH] i386: fix incorrect TSS entry for LDT Noticed by Chuck Ebbert: the .ldt entry of the TSS was set up incorrectly. It never mattered since this was a leftover from old times, so remove it. Signed-off-by: Ingo Molnar Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/asm-i386/processor.h') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index a0489b22270..992224bff54 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -475,7 +475,6 @@ struct thread_struct { .esp0 = sizeof(init_stack) + (long)&init_stack, \ .ss0 = __KERNEL_DS, \ .ss1 = __KERNEL_CS, \ - .ldt = GDT_ENTRY_LDT, \ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \ .io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \ } -- cgit v1.2.3 From a5201129307f414890f9a4410e38da205f5d7359 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sat, 3 Sep 2005 15:56:44 -0700 Subject: [PATCH] x86: make IOPL explicit The pushf/popf in switch_to are ONLY used to switch IOPL. Making this explicit in C code is more clear. This pushf/popf pair was added as a bugfix for leaking IOPL to unprivileged processes when using sysenter/sysexit based system calls (sysexit does not restore flags). When requesting an IOPL change in sys_iopl(), it is just as easy to change the current flags and the flags in the stack image (in case an IRET is required), but there is no reason to force an IRET if we came in from the SYSENTER path. This change is the minimal solution for supporting a paravirtualized Linux kernel that allows user processes to run with I/O privilege. Other solutions require radical rewrites of part of the low level fault / system call handling code, or do not fully support sysenter based system calls. Unfortunately, this added one field to the thread_struct. But as a bonus, on P4, the fastest time measured for switch_to() went from 312 to 260 cycles, a win of about 17% in the fast case through this performance critical path. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-i386/processor.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/asm-i386/processor.h') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 992224bff54..37bef8ed7be 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -455,6 +455,7 @@ struct thread_struct { unsigned int saved_fs, saved_gs; /* IO permissions */ unsigned long *io_bitmap_ptr; + unsigned long iopl; /* max allowed port in the bitmap, in bytes: */ unsigned long io_bitmap_max; }; @@ -511,6 +512,21 @@ static inline void load_esp0(struct tss_struct *tss, struct thread_struct *threa : /* no output */ \ :"r" (value)) +/* + * Set IOPL bits in EFLAGS from given mask + */ +static inline void set_iopl_mask(unsigned mask) +{ + unsigned int reg; + __asm__ __volatile__ ("pushfl;" + "popl %0;" + "andl %1, %0;" + "orl %2, %0;" + "pushl %0;" + "popfl" + : "=&r" (reg) + : "i" (~X86_EFLAGS_IOPL), "r" (mask)); +} /* Forward declaration, a strange C thing */ struct task_struct; -- cgit v1.2.3