From e00320875d0cc5f8099a7227b2f25fbb3231268d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 08:48:23 +0100 Subject: x86: fix stackprotector canary updates during context switches fix a bug noticed and fixed by pageexec@freemail.hu. if built with -fstack-protector-all then we'll have canary checks built into the __switch_to() function. That does not work well with the canary-switching code there: while we already use the %rsp of the new task, we still call __switch_to() whith the previous task's canary value in the PDA, hence the __switch_to() ssp prologue instructions will store the previous canary. Then we update the PDA and upon return from __switch_to() the canary check triggers and we panic. so update the canary after we have called __switch_to(), where we are at the same stackframe level as the last stackframe of the next (and now freshly current) task. Note: this means that we call __switch_to() [and its sub-functions] still with the old canary, but that is not a problem, both the previous and the next task has a high-quality canary. The only (mostly academic) disadvantage is that the canary of one task may leak onto the stack of another task, increasing the risk of information leaks, were an attacker able to read the stack of specific tasks (but not that of others). To solve this we'll have to reorganize the way we switch tasks, and move the PDA setting into the switch_to() assembly code. That will happen in another patch. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4..d6a51515878 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1096,10 +1096,9 @@ struct task_struct { pid_t pid; pid_t tgid; -#ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; -#endif + /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with -- cgit v1.2.3 From 7c9f8861e6c9c839f913e49b98c3854daca18f27 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 22 Apr 2008 16:38:23 -0500 Subject: stackprotector: use canary at end of stack to indicate overruns at oops time (Updated with a common max-stack-used checker that knows about the canary, as suggested by Joe Perches) Use a canary at the end of the stack to clearly indicate at oops time whether the stack has ever overflowed. This is a very simple implementation with a couple of drawbacks: 1) a thread may legitimately use exactly up to the last word on the stack -- but the chances of doing this and then oopsing later seem slim 2) it's possible that the stack usage isn't dense enough that the canary location could get skipped over -- but the worst that happens is that we don't flag the overrun -- though this happens fairly often in my testing :( With the code in place, an intentionally-bloated stack oops might do: BUG: unable to handle kernel paging request at ffff8103f84cc680 IP: [] update_curr+0x9a/0xa8 PGD 8063 PUD 0 Thread overran stack or stack corrupted Oops: 0000 [1] SMP CPU 0 ... ... unless the stack overrun is so bad that it corrupts some other thread. Signed-off-by: Eric Sandeen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index d6a51515878..c5181e77f30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1969,6 +1969,19 @@ static inline unsigned long *end_of_stack(struct task_struct *p) extern void thread_info_cache_init(void); +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ + n++; + } while (!*n); + + return (unsigned long)n - (unsigned long)end_of_stack(p); +} +#endif + /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ -- cgit v1.2.3 From 32bd671d6cbeda60dc73be77fa2b9037d9a9bfa0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Feb 2009 12:24:15 +0100 Subject: signal: re-add dead task accumulation stats. We're going to split the process wide cpu accounting into two parts: - clocks; which can take all the time they want since they run from user context. - timers; which need constant time tracing but can affort the overhead because they're default off -- and rare. The clock readout will go back to a full sum of the thread group, for this we need to re-add the exit stats that were removed in the initial itimer rework (f06febc9: timers: fix itimer/many thread hang). Furthermore, since that full sum can be rather slow for large thread groups and we have the complete dead task stats, revert the do_notify_parent time computation. Signed-off-by: Peter Zijlstra Reviewed-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2127e959e0f..2e0646a3031 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -559,7 +559,7 @@ struct signal_struct { * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ - cputime_t cutime, cstime; + cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -567,6 +567,14 @@ struct signal_struct { unsigned long inblock, oublock, cinblock, coublock; struct task_io_accounting ioac; + /* + * Cumulative ns of schedule CPU time fo dead threads in the + * group, not including a zombie group leader, (This only differs + * from jiffies_to_ns(utime + stime) if sched_clock uses something + * other than jiffies.) + */ + unsigned long long sum_sched_runtime; + /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs -- cgit v1.2.3 From 4cd4c1b40d40447fb5e7ba80746c6d7ba91d7a53 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Feb 2009 12:24:16 +0100 Subject: timers: split process wide cpu clocks/timers Change the process wide cpu timers/clocks so that we: 1) don't mess up the kernel with too many threads, 2) don't have a per-cpu allocation for each process, 3) have no impact when not used. In order to accomplish this we're going to split it into two parts: - clocks; which can take all the time they want since they run from user context -- ie. sys_clock_gettime(CLOCK_PROCESS_CPUTIME_ID) - timers; which need constant time sampling but since they're explicity used, the user can pay the overhead. The clock readout will go back to a full sum of the thread group, while the timers will run of a global 'clock' that only runs when needed, so only programs that make use of the facility pay the price. Signed-off-by: Peter Zijlstra Reviewed-by: Ingo Molnar Signed-off-by: Ingo Molnar --- include/linux/sched.h | 54 +++++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 23 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2e0646a3031..082d7619b3a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -443,7 +443,6 @@ struct pacct_struct { * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds - * @lock: lock for fields in this struct * * This structure groups together three kinds of CPU time that are * tracked for threads and thread groups. Most things considering @@ -454,23 +453,33 @@ struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; - spinlock_t lock; }; /* Alternate field names when used to cache expirations. */ #define prof_exp stime #define virt_exp utime #define sched_exp sum_exec_runtime +#define INIT_CPUTIME \ + (struct task_cputime) { \ + .utime = cputime_zero, \ + .stime = cputime_zero, \ + .sum_exec_runtime = 0, \ + } + /** - * struct thread_group_cputime - thread group interval timer counts - * @totals: thread group interval timers; substructure for - * uniprocessor kernel, per-cpu for SMP kernel. + * struct thread_group_cputimer - thread group interval timer counts + * @cputime: thread group interval timers. + * @running: non-zero when there are timers running and + * @cputime receives updates. + * @lock: lock for fields in this struct. * * This structure contains the version of task_cputime, above, that is - * used for thread group CPU clock calculations. + * used for thread group CPU timer calculations. */ -struct thread_group_cputime { - struct task_cputime totals; +struct thread_group_cputimer { + struct task_cputime cputime; + int running; + spinlock_t lock; }; /* @@ -519,10 +528,10 @@ struct signal_struct { cputime_t it_prof_incr, it_virt_incr; /* - * Thread group totals for process CPU clocks. - * See thread_group_cputime(), et al, for details. + * Thread group totals for process CPU timers. + * See thread_group_cputimer(), et al, for details. */ - struct thread_group_cputime cputime; + struct thread_group_cputimer cputimer; /* Earliest-expiration cache. */ struct task_cputime cputime_expires; @@ -2191,27 +2200,26 @@ static inline int spin_needbreak(spinlock_t *lock) /* * Thread group CPU time accounting. */ +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); static inline -void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) { - struct task_cputime *totals = &tsk->signal->cputime.totals; + struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; unsigned long flags; - spin_lock_irqsave(&totals->lock, flags); - *times = *totals; - spin_unlock_irqrestore(&totals->lock, flags); + WARN_ON(!cputimer->running); + + spin_lock_irqsave(&cputimer->lock, flags); + *times = cputimer->cputime; + spin_unlock_irqrestore(&cputimer->lock, flags); } static inline void thread_group_cputime_init(struct signal_struct *sig) { - sig->cputime.totals = (struct task_cputime){ - .utime = cputime_zero, - .stime = cputime_zero, - .sum_exec_runtime = 0, - }; - - spin_lock_init(&sig->cputime.totals.lock); + sig->cputimer.cputime = INIT_CPUTIME; + spin_lock_init(&sig->cputimer.lock); + sig->cputimer.running = 0; } static inline void thread_group_cputime_free(struct signal_struct *sig) -- cgit v1.2.3 From 7d8e23df69820e6be42bcc41d441f4860e8c76f7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 6 Feb 2009 14:57:51 +0100 Subject: timers: split process wide cpu clocks/timers, remove spurious warning Mike Galbraith reported that the new warning in thread_group_cputimer() triggers en masse with Amarok running. Oleg Nesterov observed: Can't fastpath_timer_check()->thread_group_cputimer() have the false warning too? Suppose we had the timer, then posix_cpu_timer_del() removes this timer, but task_cputime_zero(&sig->cputime_expires) still not true. Remove the spurious debug warning. Reported-by: Mike Galbraith Explained-by: Oleg Nesterov Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 082d7619b3a..79392916d6c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2208,8 +2208,6 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; unsigned long flags; - WARN_ON(!cputimer->running); - spin_lock_irqsave(&cputimer->lock, flags); *times = cputimer->cputime; spin_unlock_irqrestore(&cputimer->lock, flags); -- cgit v1.2.3 From 3fccfd67df79c6351a156eb25a7a514e5f39c4d9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Feb 2009 16:37:31 +0100 Subject: timers: split process wide cpu clocks/timers, fix To decrease the chance of a missed enable, always enable the timer when we sample it, we'll always disable it when we find that there are no active timers in the jiffy tick. This fixes a flood of warnings reported by Mike Galbraith. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 79392916d6c..5d10fa0b600 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2209,6 +2209,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) unsigned long flags; spin_lock_irqsave(&cputimer->lock, flags); + cputimer->running = 1; *times = cputimer->cputime; spin_unlock_irqrestore(&cputimer->lock, flags); } -- cgit v1.2.3 From 4da94d49b2ecb0a26e716a8811c3ecc542c2a65d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Feb 2009 11:30:27 +0100 Subject: timers: fix TIMER_ABSTIME for process wide cpu timers The POSIX timer interface allows for absolute time expiry values through the TIMER_ABSTIME flag, therefore we have to synchronize the timer to the clock every time we start it. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'include/linux/sched.h') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5d10fa0b600..8981e52c714 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2201,18 +2201,7 @@ static inline int spin_needbreak(spinlock_t *lock) * Thread group CPU time accounting. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); - -static inline -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) -{ - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - unsigned long flags; - - spin_lock_irqsave(&cputimer->lock, flags); - cputimer->running = 1; - *times = cputimer->cputime; - spin_unlock_irqrestore(&cputimer->lock, flags); -} +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); static inline void thread_group_cputime_init(struct signal_struct *sig) { -- cgit v1.2.3