aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86_64/kernel/process.c10
-rw-r--r--arch/x86_64/kernel/traps.c1
-rw-r--r--include/asm-x86_64/i387.h5
-rw-r--r--include/linux/sched.h9
4 files changed, 24 insertions, 1 deletions
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 6fbd19564e4..9e9a70e50c7 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -552,6 +552,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ /* we're going to use this soon, after a few expensive things */
+ if (next_p->fpu_counter>5)
+ prefetch(&next->i387.fxsave);
+
/*
* Reload esp0, LDT and the page table pointer:
*/
@@ -629,6 +633,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|| test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
__switch_to_xtra(prev_p, next_p, tss);
+ /* If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ if (next_p->fpu_counter>5)
+ math_state_restore();
return prev_p;
}
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 28e53342f29..ffc40cff1e0 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -1136,6 +1136,7 @@ asmlinkage void math_state_restore(void)
init_fpu(me);
restore_fpu_checking(&me->thread.i387.fxsave);
task_thread_info(me)->status |= TS_USEDFPU;
+ me->fpu_counter++;
}
void __init trap_init(void)
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h
index cba8a3b0cde..60c0f4853fd 100644
--- a/include/asm-x86_64/i387.h
+++ b/include/asm-x86_64/i387.h
@@ -24,6 +24,7 @@ extern unsigned int mxcsr_feature_mask;
extern void mxcsr_feature_mask_init(void);
extern void init_fpu(struct task_struct *child);
extern int save_i387(struct _fpstate __user *buf);
+extern asmlinkage void math_state_restore(void);
/*
* FPU lazy state save handling...
@@ -31,7 +32,9 @@ extern int save_i387(struct _fpstate __user *buf);
#define unlazy_fpu(tsk) do { \
if (task_thread_info(tsk)->status & TS_USEDFPU) \
- save_init_fpu(tsk); \
+ save_init_fpu(tsk); \
+ else \
+ tsk->fpu_counter = 0; \
} while (0)
/* Ignore delayed exceptions from user space */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 34ed0d99b1b..807556c5bcd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -865,6 +865,15 @@ struct task_struct {
struct key *thread_keyring; /* keyring private to this thread */
unsigned char jit_keyring; /* default keyring to attach requested keys to */
#endif
+ /*
+ * fpu_counter contains the number of consecutive context switches
+ * that the FPU is used. If this is over a threshold, the lazy fpu
+ * saving becomes unlazy to save the trap. This is an unsigned char
+ * so that after 256 times the counter wraps and the behavior turns
+ * lazy again; this to deal with bursty apps that only use FPU for
+ * a short time
+ */
+ unsigned char fpu_counter;
int oomkilladj; /* OOM kill score adjustment (bit shift). */
char comm[TASK_COMM_LEN]; /* executable name excluding path
- access with [gs]et_task_comm (which lock