From 464d1a78fbf8cf6c7fd970e7b3e2db50a320ce28 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 13 Feb 2007 13:26:20 +0100 Subject: [PATCH] i386: Convert i386 PDA code to use %fs Convert the PDA code to use %fs rather than %gs as the segment for per-processor data. This is because some processors show a small but measurable performance gain for reloading a NULL segment selector (as %fs generally is in user-space) versus a non-NULL one (as %gs generally is). On modern processors the difference is very small, perhaps undetectable. Some old AMD "K6 3D+" processors are noticably slower when %fs is used rather than %gs; I have no idea why this might be, but I think they're sufficiently rare that it doesn't matter much. This patch also fixes the math emulator, which had not been adjusted to match the changed struct pt_regs. [frederik.deweerdt@gmail.com: fixit with gdb] [mingo@elte.hu: Fix KVM too] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Cc: Ian Campbell Acked-by: Ingo Molnar Acked-by: Zachary Amsden Cc: Eric Dumazet Signed-off-by: Frederik Deweerdt Signed-off-by: Andrew Morton --- arch/i386/kernel/process.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/i386/kernel/process.c') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c641056233a..23ae198dbbc 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -308,8 +308,8 @@ void show_regs(struct pt_regs * regs) regs->eax,regs->ebx,regs->ecx,regs->edx); printk("ESI: %08lx EDI: %08lx EBP: %08lx", regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x GS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); + printk(" DS: %04x ES: %04x FS: %04x\n", + 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); cr0 = read_cr0(); cr2 = read_cr2(); @@ -340,7 +340,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) regs.xds = __USER_DS; regs.xes = __USER_DS; - regs.xgs = __KERNEL_PDA; + regs.xfs = __KERNEL_PDA; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; regs.xcs = __KERNEL_CS | get_kernel_rpl(); @@ -425,7 +425,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, p->thread.eip = (unsigned long) ret_from_fork; - savesegment(fs,p->thread.fs); + savesegment(gs,p->thread.gs); tsk = current; if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { @@ -501,8 +501,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->regs.eax = regs->eax; dump->regs.ds = regs->xds; dump->regs.es = regs->xes; - savesegment(fs,dump->regs.fs); - dump->regs.gs = regs->xgs; + dump->regs.fs = regs->xfs; + savesegment(gs,dump->regs.gs); dump->regs.orig_eax = regs->orig_eax; dump->regs.eip = regs->eip; dump->regs.cs = regs->xcs; @@ -653,7 +653,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas load_esp0(tss, next); /* - * Save away %fs. No need to save %gs, as it was saved on the + * Save away %gs. No need to save %fs, as it was saved on the * stack on entry. No need to save %es and %ds, as those are * always kernel segments while inside the kernel. Doing this * before setting the new TLS descriptors avoids the situation @@ -662,7 +662,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas * used %fs or %gs (it does not today), or if the kernel is * running inside of a hypervisor layer. */ - savesegment(fs, prev->fs); + savesegment(gs, prev->gs); /* * Load the per-thread Thread-Local Storage descriptor. @@ -670,12 +670,10 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas load_TLS(next, cpu); /* - * Restore %fs if needed. - * - * Glibc normally makes %fs be zero. + * Restore %gs if needed (which is common) */ - if (unlikely(prev->fs | next->fs)) - loadsegment(fs, next->fs); + if (prev->gs | next->gs) + loadsegment(gs, next->gs); write_pda(pcurrent, next_p); -- cgit v1.2.3