From bd8fbdee6562ee526f3c2582a3b373ef195015dd Mon Sep 17 00:00:00 2001 From: Rui Sousa Date: Wed, 3 Sep 2008 17:53:07 +0200 Subject: lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set This patch fixes compilation if CONFIG_TRACE_IRQFLAGS_SUPPORT is ever disabled (which is currently not allowed by Kconfig). Alternatively we could just remove the option altogether and the associated code paths. Since the compilation error has been in the tree for at least two years and no one noticed it, I guess we don't really have the need for CONFIG_TRACE_IRQFLAGS_SUPPORT=n. Boot tested on x86 UP. Signed-off-by: Rui Sousa Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 74bde13224c..f2993512b3b 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -52,10 +52,10 @@ # define start_critical_timings() do { } while (0) #endif -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - #include +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ @@ -84,21 +84,20 @@ * The local_irq_*() APIs are equal to the raw_local_irq*() * if !TRACE_IRQFLAGS. */ -# define raw_local_irq_disable() local_irq_disable() -# define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) \ +#define local_irq_disable() raw_local_irq_disable() +#define local_irq_enable() raw_local_irq_enable() +#define local_irq_save(flags) \ do { \ typecheck(unsigned long, flags); \ - local_irq_save(flags); \ + raw_local_irq_save(flags); \ } while (0) -# define raw_local_irq_restore(flags) \ +# define local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ - local_irq_restore(flags); \ + raw_local_irq_restore(flags); \ } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define safe_halt() \ do { \ trace_hardirqs_on(); \ @@ -124,6 +123,5 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) -#endif /* CONFIG_X86 */ #endif -- cgit v1.2.3 From 8c56250f48347750c82ab18d98d647dcf99ca674 Mon Sep 17 00:00:00 2001 From: Rui Sousa Date: Thu, 4 Sep 2008 19:47:53 +0200 Subject: lockdep, UML: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set Hiroshi Shimamoto reported: > > !TRACE_IRQFLAGS_SUPPORT mode of build for future work, it can be > > restored via a simple revert. > > Hi, it seems that this patch breaks uml build. > > kernel/printk.c: In function 'vprintk': > kernel/printk.c:674: error: implicit declaration of function > 'raw_local_irq_save' kernel/printk.c:772: error: implicit declaration of > function 'raw_local_irq_restore' With the patch bellow it compiles (make ARCH=um with a x86 host), but I'm really out of my league on this one... Reported-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- include/asm-um/system-generic.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/asm-um/system-generic.h b/include/asm-um/system-generic.h index 5bcfa35e7a2..f1ea4da34fa 100644 --- a/include/asm-um/system-generic.h +++ b/include/asm-um/system-generic.h @@ -4,15 +4,15 @@ #include "asm/arch/system.h" #undef switch_to -#undef local_irq_save -#undef local_irq_restore -#undef local_irq_disable -#undef local_irq_enable -#undef local_save_flags -#undef local_irq_restore -#undef local_irq_enable -#undef local_irq_disable -#undef local_irq_save +#undef raw_local_irq_save +#undef raw_local_irq_restore +#undef raw_local_irq_disable +#undef raw_local_irq_enable +#undef raw_local_save_flags +#undef raw_local_irq_restore +#undef raw_local_irq_enable +#undef raw_local_irq_disable +#undef raw_local_irq_save #undef irqs_disabled extern void *switch_to(void *prev, void *next, void *last); @@ -23,21 +23,21 @@ extern int get_signals(void); extern void block_signals(void); extern void unblock_signals(void); -#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ +#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \ (flags) = get_signals(); } while(0) -#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ +#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \ set_signals(flags); } while(0) -#define local_irq_save(flags) do { local_save_flags(flags); \ - local_irq_disable(); } while(0) +#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \ + raw_local_irq_disable(); } while(0) -#define local_irq_enable() unblock_signals() -#define local_irq_disable() block_signals() +#define raw_local_irq_enable() unblock_signals() +#define raw_local_irq_disable() block_signals() #define irqs_disabled() \ ({ \ unsigned long flags; \ - local_save_flags(flags); \ + raw_local_save_flags(flags); \ (flags == 0); \ }) -- cgit v1.2.3 From ab7476cf76e560f0efda2a631a70aabe93009025 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 15 Aug 2008 15:29:38 -0700 Subject: debug: add notifier chain debugging, v2 - unbreak ia64 (and powerpc) where function pointers dont point at code but at data (reported by Tony Luck) [ mingo@elte.hu: various cleanups ] Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6..4e1366b552a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -187,6 +187,9 @@ extern unsigned long long memparse(char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); +extern int func_ptr_is_kernel_text(void *ptr); +extern void *dereference_function_descriptor(void *ptr); + struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); -- cgit v1.2.3 From 76b189e91845eab3a9d52bb97f971d312d25652d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Sep 2008 09:57:35 +0200 Subject: lockdep: add might_lock() / might_lock_read() useful to establish a lock dependency in case the actual dependency is rare or hard to trigger. Signed-off-by: Peter Zijlstra Acked-by: Nick Piggin Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 331e5f1c2d8..0aa657aa8a1 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -480,4 +480,22 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define lock_map_release(l) do { } while (0) #endif +#ifdef CONFIG_PROVE_LOCKING +# define might_lock(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +# define might_lock_read(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +#else +# define might_lock(lock) do { } while (0) +# define might_lock_read(lock) do { } while (0) +#endif + #endif /* __LINUX_LOCKDEP_H */ -- cgit v1.2.3 From c10d38dda1774ed4540380333cabd229eff37094 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 10 Sep 2008 13:37:17 +0200 Subject: x86: some lock annotations for user copy paths copy_to/from_user and all its variants (except the atomic ones) can take a page fault and perform non-trivial work like taking mmap_sem and entering the filesyste/pagecache. Unfortunately, this often escapes lockdep because a common pattern is to use it to read in some arguments just set up from userspace, or write data back to a hot buffer. In those cases, it will be unlikely for page reclaim to get a window in to cause copy_*_user to fault. With the new might_lock primitives, add some annotations to x86. I don't know if I caught all possible faulting points (it's a bit of a maze, and I didn't really look at 32-bit). But this is a starting point. Boots and runs OK so far. Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/asm-x86/uaccess.h | 14 ++++++++++++++ include/asm-x86/uaccess_32.h | 10 ++++++++-- include/asm-x86/uaccess_64.h | 12 ++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index 5f702d1d521..ad29752a171 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include @@ -157,6 +159,9 @@ extern int __get_user_bad(void); int __ret_gu; \ unsigned long __val_gu; \ __chk_user_ptr(ptr); \ + might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_x(1, __ret_gu, __val_gu, ptr); \ @@ -241,6 +246,9 @@ extern void __put_user_8(void); int __ret_pu; \ __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ + might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __pu_val = x; \ switch (sizeof(*(ptr))) { \ case 1: \ @@ -265,6 +273,9 @@ extern void __put_user_8(void); #define __put_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ + might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ @@ -317,6 +328,9 @@ do { \ #define __get_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ + might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h index 6fdef39a0bc..d725e2d703f 100644 --- a/include/asm-x86/uaccess_32.h +++ b/include/asm-x86/uaccess_32.h @@ -82,8 +82,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - might_sleep(); - return __copy_to_user_inatomic(to, from, n); + might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); + return __copy_to_user_inatomic(to, from, n); } static __always_inline unsigned long @@ -138,6 +140,8 @@ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (__builtin_constant_p(n)) { unsigned long ret; @@ -160,6 +164,8 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index 515d4dce96b..40a7205fe57 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -28,6 +28,10 @@ static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { int ret = 0; + + might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -70,6 +74,10 @@ static __always_inline __must_check int __copy_to_user(void __user *dst, const void *src, unsigned size) { int ret = 0; + + might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -112,6 +120,10 @@ static __always_inline __must_check int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { int ret = 0; + + might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, (__force void *)src, size); -- cgit v1.2.3 From 3ee1afa308f2a38e5d1e2ad3752ad7abcf480da1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 10 Sep 2008 13:37:17 +0200 Subject: x86: some lock annotations for user copy paths, v2 - introduce might_fault() - handle the atomic user copy paths correctly [ mingo@elte.hu: move might_sleep() outside of in_atomic(). ] Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/asm-x86/uaccess.h | 18 ++++-------------- include/asm-x86/uaccess_32.h | 12 +++--------- include/asm-x86/uaccess_64.h | 12 +++--------- include/linux/kernel.h | 9 +++++++++ 4 files changed, 19 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index ad29752a171..39f8420c75d 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -8,8 +8,6 @@ #include #include #include -#include -#include #include #include @@ -159,9 +157,7 @@ extern int __get_user_bad(void); int __ret_gu; \ unsigned long __val_gu; \ __chk_user_ptr(ptr); \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_x(1, __ret_gu, __val_gu, ptr); \ @@ -246,9 +242,7 @@ extern void __put_user_8(void); int __ret_pu; \ __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __pu_val = x; \ switch (sizeof(*(ptr))) { \ case 1: \ @@ -273,9 +267,7 @@ extern void __put_user_8(void); #define __put_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ @@ -328,9 +320,7 @@ do { \ #define __get_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h index d725e2d703f..d10e842ec3e 100644 --- a/include/asm-x86/uaccess_32.h +++ b/include/asm-x86/uaccess_32.h @@ -82,9 +82,7 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); return __copy_to_user_inatomic(to, from, n); } @@ -139,9 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; @@ -163,9 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index 40a7205fe57..13fd56fbc3a 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -29,9 +29,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size) { int ret = 0; - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -75,9 +73,7 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size) { int ret = 0; - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -121,9 +117,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { int ret = 0; - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, (__force void *)src, size); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6..e580ec09576 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -140,6 +140,15 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) +#ifdef CONFIG_PROVE_LOCKING +void might_fault(void); +#else +static inline void might_fault(void) +{ + might_sleep(); +} +#endif + extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(long time); NORET_TYPE void panic(const char * fmt, ...) -- cgit v1.2.3 From 1d18ef489509314506328b9e464dd47c24c1d68f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Sep 2008 20:53:21 +0200 Subject: x86: some lock annotations for user copy paths, v3 - add annotation back to clear_user() - change probe_kernel_address() to _inatomic*() method Signed-off-by: Ingo Molnar --- include/asm-x86/uaccess.h | 2 -- include/linux/uaccess.h | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index 39f8420c75d..dc8edb5c465 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -267,7 +267,6 @@ extern void __put_user_8(void); #define __put_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ - might_fault(); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ @@ -320,7 +319,6 @@ do { \ #define __get_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ - might_fault(); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index fec6decfb98..2062293e57e 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ + ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ -- cgit v1.2.3 From 53b9d87f41a3d8838210ad7cdef02d814817ce85 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 11 Sep 2008 17:02:58 -0700 Subject: lock debug: sit tight when we are already in a panic in: > http://bugzilla.kernel.org/show_bug.cgi?id=11543 The panic code called the kexec code which called mutex_trylock() which called spin_lock_mutex() which then stupidly went and blurted a load of debug stuff because of in_interrupt(). Keep the lock debug code from escallating an already crappy situation. Signed-off-by: Ingo Molnar --- include/linux/debug_locks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 4aaa4afb1cb..096476f1fb3 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -17,7 +17,7 @@ extern int debug_locks_off(void); ({ \ int __ret = 0; \ \ - if (unlikely(c)) { \ + if (!oops_in_progress && unlikely(c)) { \ if (debug_locks_off() && !debug_locks_silent) \ WARN_ON(1); \ __ret = 1; \ -- cgit v1.2.3 From 30742d5c2277c325fb0e9d2d817d55a19995fe8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:43:39 +0200 Subject: Revert "lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set" This reverts commit bd8fbdee6562ee526f3c2582a3b373ef195015dd. This broke the powerpc build - more fixes are needed before we can undo this revert. --- include/linux/irqflags.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index f2993512b3b..74bde13224c 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -52,10 +52,10 @@ # define start_critical_timings() do { } while (0) #endif -#include - #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +#include + #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ @@ -84,20 +84,21 @@ * The local_irq_*() APIs are equal to the raw_local_irq*() * if !TRACE_IRQFLAGS. */ -#define local_irq_disable() raw_local_irq_disable() -#define local_irq_enable() raw_local_irq_enable() -#define local_irq_save(flags) \ +# define raw_local_irq_disable() local_irq_disable() +# define raw_local_irq_enable() local_irq_enable() +# define raw_local_irq_save(flags) \ do { \ typecheck(unsigned long, flags); \ - raw_local_irq_save(flags); \ + local_irq_save(flags); \ } while (0) -# define local_irq_restore(flags) \ +# define raw_local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ - raw_local_irq_restore(flags); \ + local_irq_restore(flags); \ } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define safe_halt() \ do { \ trace_hardirqs_on(); \ @@ -123,5 +124,6 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) +#endif /* CONFIG_X86 */ #endif -- cgit v1.2.3 From fb71e45338453698bd7460f7e8f171ea0304d218 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 15 Sep 2008 18:04:26 -0700 Subject: uaccess: fix parameters inversion for __copy_from_user_inatomic() The following patch changes to use __copy_from_user_inatomic(), but the passing parameters incorrect: x86: some lock annotations for user copy paths, v3 This fixes the netfilter crash reported by Steven Noonan. Reported-by: Steven Noonan Signed-off-by: Hiroshi Shimamoto Tested-by: Steven Noonan Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 2062293e57e..6b58367d145 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval)); \ + ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ -- cgit v1.2.3 From 38d47c1b7075bd7ec3881141bb3629da58f88dab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Sep 2008 19:32:20 +0200 Subject: futex: rely on get_user_pages() for shared futexes On the way of getting rid of the mmap_sem requirement for shared futexes, start by relying on get_user_pages(). Signed-off-by: Peter Zijlstra Acked-by: Nick Piggin Signed-off-by: Ingo Molnar --- include/linux/futex.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 586ab56a3ec..8f627b9ae2b 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -164,6 +164,8 @@ union futex_key { } both; }; +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } + #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); -- cgit v1.2.3 From c7e78cff6b7518212247fb20b1dc6411540dc9af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Oct 2008 23:17:09 +0200 Subject: lockstat: contend with points We currently only provide points that have to wait on contention, also lists the points we have to wait for. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0aa657aa8a1..fc9f8e88123 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -73,6 +73,8 @@ struct lock_class_key { struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; }; +#define LOCKSTAT_POINTS 4 + /* * The lock-class itself: */ @@ -119,7 +121,8 @@ struct lock_class { int name_version; #ifdef CONFIG_LOCK_STAT - unsigned long contention_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; #endif }; @@ -144,6 +147,7 @@ enum bounce_type { struct lock_class_stats { unsigned long contention_point[4]; + unsigned long contending_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; @@ -165,6 +169,7 @@ struct lockdep_map { const char *name; #ifdef CONFIG_LOCK_STAT int cpu; + unsigned long ip; #endif }; @@ -355,7 +360,7 @@ struct lock_class_key { }; #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); -extern void lock_acquired(struct lockdep_map *lock); +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ @@ -363,13 +368,13 @@ do { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ - lock_acquired(&(_lock)->dep_map); \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) -#define lock_acquired(lockdep_map) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) -- cgit v1.2.3 From 505e371da195fad20cb8aaf45407a2849774d6d0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 15 Oct 2008 14:56:42 +0800 Subject: markers: remove exported symbol marker_probe_cb_noarg() marker_probe_cb_noarg() should not be seen by outer code. this patch remove it. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c7fbb..4cf45472d9f 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, void *call_private, ...); -extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...); /* * Connect a probe to a marker. -- cgit v1.2.3 From 944ac4259e39801c843a915c3da8194ac9af0440 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 19:26:08 -0400 Subject: ftrace: ftrace dump on oops control Impact: add (default-off) dump-trace-on-oops flag Currently, ftrace is set up to dump its contents to the console if the kernel panics or oops. This can be annoying if you have trace data in the buffers and you experience an oops, but the trace data is old or static. Usually when you want ftrace to dump its contents is when you are debugging your system and you have set up ftrace to trace the events leading to an oops. This patch adds a control variable called "ftrace_dump_on_oops" that will enable the ftrace dump to console on oops. This variable is default off but a developer can enable it either through the kernel command line by adding "ftrace_dump_on_oops" or at run time by setting (or disabling) /proc/sys/kernel/ftrace_dump_on_oops. v2: Replaced /** with /* as Randy explained that kernel-doc does not yet handle variables. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a3d46151be1..9623b7b9e5a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -165,6 +165,8 @@ static inline void __ftrace_enabled_restore(int enabled) #endif #ifdef CONFIG_TRACING +extern int ftrace_dump_on_oops; + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -- cgit v1.2.3 From d98d38f2014ab79f28c126ff175d034891f7aefc Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 29 Oct 2008 14:24:09 -0700 Subject: mutex: improve header comment to be actually informative about the API Impact: improve documentation It's nice to say that mutex_trylock follows the spin_trylock convention. It's a lot nicer if the comment also says which that is... make it so. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/linux/mutex.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bc6da10ceee..7a0e5c4f807 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); /* * NOTE: mutex_trylock() follows the spin_trylock() convention, * not the down_trylock() convention! + * + * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); -- cgit v1.2.3 From 17666f02b118099028522dfc3df00a235700e216 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 30 Oct 2008 16:08:32 -0400 Subject: ftrace: nmi safe code modification Impact: fix crashes that can occur in NMI handlers, if their code is modified Modifying code is something that needs special care. On SMP boxes, if code that is being modified is also being executed on another CPU, that CPU will have undefined results. The dynamic ftrace uses kstop_machine to make the system act like a uniprocessor system. But this does not address NMIs, that can still run on other CPUs. One approach to handle this is to make all code that are used by NMIs not be traced. But NMIs can call notifiers that spread throughout the kernel and this will be very hard to maintain, and the chance of missing a function is very high. The approach that this patch takes is to have the NMIs modify the code if the modification is taking place. The way this works is that just writing to code executing on another CPU is not harmful if what is written is the same as what exists. Two buffers are used: an IP buffer and a "code" buffer. The steps that the patcher takes are: 1) Put in the instruction pointer into the IP buffer and the new code into the "code" buffer. 2) Set a flag that says we are modifying code 3) Wait for any running NMIs to finish. 4) Write the code 5) clear the flag. 6) Wait for any running NMIs to finish. If an NMI is executed, it will also write the pending code. Multiple writes are OK, because what is being written is the same. Then the patcher must wait for all running NMIs to finish before going to the next line that must be patched. This is basically the RCU approach to code modification. Thanks to Ingo Molnar for suggesting the idea, and to Arjan van de Ven for his guidence on what is safe and what is not. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a..0087cb43bec 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -161,7 +162,17 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() \ + do { \ + ftrace_nmi_enter(); \ + lockdep_off(); \ + __irq_enter(); \ + } while (0) +#define nmi_exit() \ + do { \ + __irq_exit(); \ + lockdep_on(); \ + ftrace_nmi_exit(); \ + } while (0) #endif /* LINUX_HARDIRQ_H */ -- cgit v1.2.3 From a26a2a27396c0a0877aa701f8f92d08ba550a6c9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 00:03:22 -0400 Subject: ftrace: nmi safe code clean ups Impact: cleanup This patch cleans up the NMI safe code for dynamic ftrace as suggested by Andrew Morton. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2..22240dfe912 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); + /** * ftrace_modify_code - modify code segment * @ip: the address of the code segment -- cgit v1.2.3 From 127cafbb276266b1b8da967bfe25a062ab1d42ab Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 28 Oct 2008 10:51:53 +0800 Subject: tracepoint: introduce *_noupdate APIs. Impact: add new tracepoint APIs to allow the batched registration of probes new APIs separate tracepoint_probe_register(), tracepoint_probe_unregister() into 2 steps. The first step of them is just update tracepoint_entry, not connect or disconnect. this patch introduces tracepoint_probe_update_all() for update all. these APIs are very useful for registering lots of probes but just updating once. Another very important thing is that *_noupdate APIs do not require module_mutex. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c5bb39c7a77..63064e9403f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe); */ extern int tracepoint_probe_unregister(const char *name, void *probe); +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + struct tracepoint_iter { struct module *module; struct tracepoint *tracepoint; -- cgit v1.2.3 From 7e5e26a3d8ac4bcadb380073dc9604c07a9a6198 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 09:36:38 -0400 Subject: ftrace: fix hardirq header for non ftrace archs Impact: build fix for non-ftrace architectures Not all archs implement ftrace, and therefore do not have an asm/ftrace.h. This patch corrects the problem. The ftrace_nmi_enter/exit now must be defined for all archs that implement dynamic ftrace. Currently, only x86 does. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ++++- include/linux/hardirq.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e46a7b34037..0ad1b48aea6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -44,7 +44,6 @@ static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE - enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -105,6 +104,8 @@ extern void ftrace_release(void *start, unsigned long size); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); #else # define skip_trace(ip) ({ 0; }) @@ -113,6 +114,8 @@ extern void ftrace_enable_daemon(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 0087cb43bec..ffc16ab5a87 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,8 +4,8 @@ #include #include #include +#include #include -#include #include /* -- cgit v1.2.3 From 29cbda77a67cf263d636feea65d3bbc9c7de2e24 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Nov 2008 09:16:39 -0800 Subject: rcu: increase RCU stall-check timeouts Impact: increase timeout of debug check feature Increase RCU stall period timeouts to reduce the likelyhood of false positives. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 5f89b62e698..301dda829e3 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -41,7 +41,7 @@ #include #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */ #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -- cgit v1.2.3 From 71566a0d161edec70361b7f90f6e54af6a6d5d05 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 12:57:20 +0100 Subject: tracing/fastboot: Enable boot tracing only during initcalls Impact: modify boot tracer We used to disable the initcall tracing at a specified time (IE: end of builtin initcalls). But we don't need it anymore. It will be stopped when initcalls are finished. However we want two things: _Start this tracing only after pre-smp initcalls are finished. _Since we are planning to trace sched_switches at the same time, we want to enable them only during the initcall execution. For this purpose, this patch introduce two functions to enable/disable the sched_switch tracing during boot. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e46a7b34037..4642959e5bd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -234,6 +234,11 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +/* + * Structure which defines the trace of an initcall. + * You don't have to fill the func field since it is + * only used internally by the tracer. + */ struct boot_trace { pid_t caller; char func[KSYM_NAME_LEN]; @@ -244,13 +249,28 @@ struct boot_trace { }; #ifdef CONFIG_BOOT_TRACER +/* Append the trace on the ring-buffer */ extern void trace_boot(struct boot_trace *it, initcall_t fn); + +/* Tells the tracer that smp_pre_initcall is finished. + * So we can start the tracing + */ extern void start_boot_trace(void); -extern void stop_boot_trace(void); + +/* Resume the tracing of other necessary events + * such as sched switches + */ +extern void enable_boot_trace(void); + +/* Suspend this tracing. Actually, only sched_switches tracing have + * to be suspended. Initcalls doesn't need it.) + */ +extern void disable_boot_trace(void); #else static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } static inline void start_boot_trace(void) { } -static inline void stop_boot_trace(void) { } +static inline void enable_boot_trace(void) { } +static inline void disable_boot_trace(void) { } #endif -- cgit v1.2.3 From 60a7ecf42661f2b22168751298592da6ee210c9e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: add quick function trace stop Impact: quick start and stop of function tracer This patch adds a way to disable the function tracer quickly without the need to run kstop_machine. It adds a new variable called function_trace_stop which will stop the calls to functions from mcount when set. This is just an on/off switch and does not handle recursion like preempt_disable(). It's main purpose is to help other tracers/debuggers start and stop tracing fuctions without the need to call kstop_machine. The config option HAVE_FUNCTION_TRACE_MCOUNT_TEST is added for archs that implement the testing of the function_trace_stop in the mcount arch dependent code. Otherwise, the test is done in the C code. x86 is the only arch at the moment that supports this. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4642959e5bd..794ab907dbf 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -23,6 +23,34 @@ struct ftrace_ops { struct ftrace_ops *next; }; +extern int function_trace_stop; + +/** + * ftrace_stop - stop function tracer. + * + * A quick way to stop the function tracer. Note this an on off switch, + * it is not something that is recursive like preempt_disable. + * This does not disable the calling of mcount, it only stops the + * calling of functions from mcount. + */ +static inline void ftrace_stop(void) +{ + function_trace_stop = 1; +} + +/** + * ftrace_start - start the function tracer. + * + * This function is the inverse of ftrace_stop. This does not enable + * the function tracing if the function tracer is disabled. This only + * sets the function tracer flag to continue calling the functions + * from mcount. + */ +static inline void ftrace_start(void) +{ + function_trace_stop = 0; +} + /* * The ftrace_ops must be a static and should also * be read_mostly. These functions do modify read_mostly variables @@ -41,6 +69,8 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) static inline void ftrace_kill(void) { } +static inline void ftrace_stop(void) { } +static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3 From 0f04870148ecb825133bc2733f473b1c5773ac0b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: soft tracing stop and start Impact: add way to quickly start stop tracing from the kernel This patch adds a soft stop and start to the trace. This simply disables function tracing via the ftrace_disabled flag, and disables the trace buffers to prevent recording. The tracing code may still be executed, but the trace will not be recorded. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 794ab907dbf..7a75fc6d41f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -216,6 +216,9 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_TRACING extern int ftrace_dump_on_oops; +extern void tracing_start(void); +extern void tracing_stop(void); + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -246,6 +249,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } static inline int ftrace_printk(const char *fmt, ...) { -- cgit v1.2.3 From 6a60dd121c5b6c2d827e99b38c1326f2600c3891 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Nov 2008 15:55:21 -0500 Subject: ftrace: split out hardirq ftrace code into own header Impact: moving of function prototypes into own header file ftrace.h is too big of a file for hardirq.h, and some archs will fail to build because of the include dependencies not being met. This patch pulls out the required prototypes for hardirq.h into a smaller and safer ftrace_irq.h file. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ----- include/linux/ftrace_irq.h | 13 +++++++++++++ include/linux/hardirq.h | 2 +- 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 include/linux/ftrace_irq.h (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0ad1b48aea6..1b340e3fa24 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -104,9 +104,6 @@ extern void ftrace_release(void *start, unsigned long size); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); -extern void ftrace_nmi_enter(void); -extern void ftrace_nmi_exit(void); - #else # define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) @@ -114,8 +111,6 @@ extern void ftrace_nmi_exit(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h new file mode 100644 index 00000000000..b1299d6729f --- /dev/null +++ b/include/linux/ftrace_irq.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_FTRACE_IRQ_H +#define _LINUX_FTRACE_IRQ_H + + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); +#else +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif + +#endif /* _LINUX_FTRACE_IRQ_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index ffc16ab5a87..89a56d79e4c 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From caf4b323b02a16c92fba449952ac6515ddc76d7a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 07:03:45 +0100 Subject: tracing, x86: add low level support for ftrace return tracing Impact: add infrastructure for function-return tracing Add low level support for ftrace return tracing. This plug-in stores return addresses on the thread_info structure of the current task. The index of the current return address is initialized when the task is the first one (init) and when a process forks (the child). It is not needed when a task does a sys_execve because after this syscall, it still needs to return on the kernel functions it called. Note that the code of return_to_handler has been suggested by Steven Rostedt as almost all of the ideas of improvements in this V3. For purpose of security, arch/x86/kernel/process_32.c is not traced because __switch_to() changes the current task during its execution. That could cause inconsistency in the stored return address of this function even if I didn't have any crash after testing with tracing on this function enabled. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 20 ++++++++++++++++++++ include/linux/ftrace_irq.h | 2 +- include/linux/sched.h | 11 +++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f5608c1102..dcbbf72a88b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -267,6 +267,26 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +/* + * Structure that defines a return function trace. + */ +struct ftrace_retfunc { + unsigned long ret; /* Return address */ + unsigned long func; /* Current function */ + unsigned long long calltime; + unsigned long long rettime; +}; + +#ifdef CONFIG_FUNCTION_RET_TRACER +/* Type of a callback handler of tracing return function */ +typedef void (*trace_function_return_t)(struct ftrace_retfunc *); + +extern void register_ftrace_return(trace_function_return_t func); +/* The current handler in use */ +extern trace_function_return_t ftrace_function_return; +extern void unregister_ftrace_return(void); +#endif + /* * Structure which defines the trace of an initcall. * You don't have to fill the func field since it is diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index b1299d6729f..0b4df55d7a7 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#ifdef CONFIG_DYNAMIC_FTRACE +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/sched.h b/include/linux/sched.h index 295b7c756ca..df77abe860c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2005,6 +2005,17 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; + +#ifdef CONFIG_FUNCTION_RET_TRACER + /* + * When fork() creates a child process, this function is called. + * But the child task may not inherit the return adresses traced + * by the return function tracer because it will directly execute + * in userspace and will not return to kernel functions its parent + * used. + */ + task_thread_info(p)->curr_ret_stack = -1; +#endif } static inline unsigned long *end_of_stack(struct task_struct *p) -- cgit v1.2.3 From 3f5ec13696fd4a33bde42f385406cbb1d3cc96fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 23:21:31 +0100 Subject: tracing/fastboot: move boot tracer structs and funcs into their own header. Impact: Cleanups on the boot tracer and ftrace This patch bring some cleanups about the boot tracer headers. The functions and structures of this tracer have nothing related to ftrace and should have so their own header file. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 41 ----------------------------------------- include/trace/boot.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 41 deletions(-) create mode 100644 include/trace/boot.h (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index dcbbf72a88b..4fbc4a8b86a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -287,45 +287,4 @@ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); #endif -/* - * Structure which defines the trace of an initcall. - * You don't have to fill the func field since it is - * only used internally by the tracer. - */ -struct boot_trace { - pid_t caller; - char func[KSYM_NAME_LEN]; - int result; - unsigned long long duration; /* usecs */ - ktime_t calltime; - ktime_t rettime; -}; - -#ifdef CONFIG_BOOT_TRACER -/* Append the trace on the ring-buffer */ -extern void trace_boot(struct boot_trace *it, initcall_t fn); - -/* Tells the tracer that smp_pre_initcall is finished. - * So we can start the tracing - */ -extern void start_boot_trace(void); - -/* Resume the tracing of other necessary events - * such as sched switches - */ -extern void enable_boot_trace(void); - -/* Suspend this tracing. Actually, only sched_switches tracing have - * to be suspended. Initcalls doesn't need it.) - */ -extern void disable_boot_trace(void); -#else -static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } -static inline void start_boot_trace(void) { } -static inline void enable_boot_trace(void) { } -static inline void disable_boot_trace(void) { } -#endif - - - #endif /* _LINUX_FTRACE_H */ diff --git a/include/trace/boot.h b/include/trace/boot.h new file mode 100644 index 00000000000..4cbe64e46cd --- /dev/null +++ b/include/trace/boot.h @@ -0,0 +1,43 @@ +#ifndef _LINUX_TRACE_BOOT_H +#define _LINUX_TRACE_BOOT_H + +/* + * Structure which defines the trace of an initcall. + * You don't have to fill the func field since it is + * only used internally by the tracer. + */ +struct boot_trace { + pid_t caller; + char func[KSYM_NAME_LEN]; + int result; + unsigned long long duration; /* usecs */ + ktime_t calltime; + ktime_t rettime; +}; + +#ifdef CONFIG_BOOT_TRACER +/* Append the trace on the ring-buffer */ +extern void trace_boot(struct boot_trace *it, initcall_t fn); + +/* Tells the tracer that smp_pre_initcall is finished. + * So we can start the tracing + */ +extern void start_boot_trace(void); + +/* Resume the tracing of other necessary events + * such as sched switches + */ +extern void enable_boot_trace(void); + +/* Suspend this tracing. Actually, only sched_switches tracing have + * to be suspended. Initcalls doesn't need it.) + */ +extern void disable_boot_trace(void); +#else +static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } +static inline void start_boot_trace(void) { } +static inline void enable_boot_trace(void) { } +static inline void disable_boot_trace(void) { } +#endif /* CONFIG_BOOT_TRACER */ + +#endif /* __LINUX_TRACE_BOOT_H */ -- cgit v1.2.3 From 74239072830ef3f1398edeb1bc1076fc330fd4a2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 23:24:42 +0100 Subject: tracing/fastboot: Use the ring-buffer timestamp for initcall entries Impact: Split the boot tracer entries in two parts: call and return Now that we are using the sched tracer from the boot tracer, we want to use the same timestamp than the ring-buffer to have consistent time captures between sched events and initcall events. So we get rid of the old time capture by the boot tracer and split the initcall events in two parts: call and return. This way we have the ring buffer timestamp of both. An example trace: [ 27.904149584] calling net_ns_init+0x0/0x1c0 @ 1 [ 27.904429624] initcall net_ns_init+0x0/0x1c0 returned 0 after 0 msecs [ 27.904575926] calling reboot_init+0x0/0x20 @ 1 [ 27.904655399] initcall reboot_init+0x0/0x20 returned 0 after 0 msecs [ 27.904800228] calling sysctl_init+0x0/0x30 @ 1 [ 27.905142914] initcall sysctl_init+0x0/0x30 returned 0 after 0 msecs [ 27.905287211] calling ksysfs_init+0x0/0xb0 @ 1 ##### CPU 0 buffer started #### init-1 [000] 27.905395: 1:120:R + [001] 11:115:S ##### CPU 1 buffer started #### -0 [001] 27.905425: 0:140:R ==> [001] 11:115:R init-1 [000] 27.905426: 1:120:D ==> [000] 0:140:R -0 [000] 27.905431: 0:140:R + [000] 4:115:S -0 [000] 27.905451: 0:140:R ==> [000] 4:115:R ksoftirqd/0-4 [000] 27.905456: 4:115:S ==> [000] 0:140:R udevd-11 [001] 27.905458: 11:115:R + [001] 14:115:R -0 [000] 27.905459: 0:140:R + [000] 4:115:S -0 [000] 27.905462: 0:140:R ==> [000] 4:115:R udevd-11 [001] 27.905462: 11:115:R ==> [001] 14:115:R ksoftirqd/0-4 [000] 27.905467: 4:115:S ==> [000] 0:140:R -0 [000] 27.905470: 0:140:R + [000] 4:115:S -0 [000] 27.905473: 0:140:R ==> [000] 4:115:R ksoftirqd/0-4 [000] 27.905476: 4:115:S ==> [000] 0:140:R -0 [000] 27.905479: 0:140:R + [000] 4:115:S -0 [000] 27.905482: 0:140:R ==> [000] 4:115:R ksoftirqd/0-4 [000] 27.905486: 4:115:S ==> [000] 0:140:R udevd-14 [001] 27.905499: 14:120:X ==> [001] 11:115:R udevd-11 [001] 27.905506: 11:115:R + [000] 1:120:D -0 [000] 27.905515: 0:140:R ==> [000] 1:120:R udevd-11 [001] 27.905517: 11:115:S ==> [001] 0:140:R [ 27.905557107] initcall ksysfs_init+0x0/0xb0 returned 0 after 3906 msecs [ 27.905705736] calling init_jiffies_clocksource+0x0/0x10 @ 1 [ 27.905779239] initcall init_jiffies_clocksource+0x0/0x10 returned 0 after 0 msecs [ 27.906769814] calling pm_init+0x0/0x30 @ 1 [ 27.906853627] initcall pm_init+0x0/0x30 returned 0 after 0 msecs [ 27.906997803] calling pm_disk_init+0x0/0x20 @ 1 [ 27.907076946] initcall pm_disk_init+0x0/0x20 returned 0 after 0 msecs [ 27.907222556] calling swsusp_header_init+0x0/0x30 @ 1 [ 27.907294325] initcall swsusp_header_init+0x0/0x30 returned 0 after 0 msecs [ 27.907439620] calling stop_machine_init+0x0/0x50 @ 1 init-1 [000] 27.907485: 1:120:R + [000] 2:115:S init-1 [000] 27.907490: 1:120:D ==> [000] 2:115:R kthreadd-2 [000] 27.907507: 2:115:R + [001] 15:115:R -0 [001] 27.907517: 0:140:R ==> [001] 15:115:R kthreadd-2 [000] 27.907517: 2:115:D ==> [000] 0:140:R -0 [000] 27.907521: 0:140:R + [000] 4:115:S -0 [000] 27.907524: 0:140:R ==> [000] 4:115:R udevd-15 [001] 27.907527: 15:115:D + [000] 2:115:D ksoftirqd/0-4 [000] 27.907537: 4:115:S ==> [000] 2:115:R udevd-15 [001] 27.907537: 15:115:D ==> [001] 0:140:R kthreadd-2 [000] 27.907546: 2:115:R + [000] 1:120:D kthreadd-2 [000] 27.907550: 2:115:S ==> [000] 1:120:R init-1 [000] 27.907584: 1:120:R + [000] 15: 0:D init-1 [000] 27.907589: 1:120:R + [000] 2:115:S init-1 [000] 27.907593: 1:120:D ==> [000] 15: 0:R udevd-15 [000] 27.907601: 15: 0:S ==> [000] 2:115:R ##### CPU 0 buffer started #### kthreadd-2 [000] 27.907616: 2:115:R + [001] 16:115:R ##### CPU 1 buffer started #### -0 [001] 27.907620: 0:140:R ==> [001] 16:115:R kthreadd-2 [000] 27.907621: 2:115:D ==> [000] 0:140:R udevd-16 [001] 27.907625: 16:115:D + [000] 2:115:D -0 [000] 27.907628: 0:140:R + [000] 4:115:S udevd-16 [001] 27.907629: 16:115:D ==> [001] 0:140:R -0 [000] 27.907631: 0:140:R ==> [000] 4:115:R ksoftirqd/0-4 [000] 27.907636: 4:115:S ==> [000] 2:115:R kthreadd-2 [000] 27.907644: 2:115:R + [000] 1:120:D kthreadd-2 [000] 27.907647: 2:115:S ==> [000] 1:120:R init-1 [000] 27.907657: 1:120:R + [001] 16: 0:D -0 [001] 27.907666: 0:140:R ==> [001] 16: 0:R [ 27.907703862] initcall stop_machine_init+0x0/0x50 returned 0 after 0 msecs [ 27.907850704] calling filelock_init+0x0/0x30 @ 1 [ 27.907926573] initcall filelock_init+0x0/0x30 returned 0 after 0 msecs [ 27.908071327] calling init_script_binfmt+0x0/0x10 @ 1 [ 27.908165195] initcall init_script_binfmt+0x0/0x10 returned 0 after 0 msecs [ 27.908309461] calling init_elf_binfmt+0x0/0x10 @ 1 Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/trace/boot.h | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/boot.h b/include/trace/boot.h index 4cbe64e46cd..6b54537eab0 100644 --- a/include/trace/boot.h +++ b/include/trace/boot.h @@ -2,22 +2,30 @@ #define _LINUX_TRACE_BOOT_H /* - * Structure which defines the trace of an initcall. + * Structure which defines the trace of an initcall + * while it is called. * You don't have to fill the func field since it is * only used internally by the tracer. */ -struct boot_trace { +struct boot_trace_call { pid_t caller; char func[KSYM_NAME_LEN]; - int result; - unsigned long long duration; /* usecs */ - ktime_t calltime; - ktime_t rettime; +}; + +/* + * Structure which defines the trace of an initcall + * while it returns. + */ +struct boot_trace_ret { + char func[KSYM_NAME_LEN]; + int result; + unsigned long long duration; /* nsecs */ }; #ifdef CONFIG_BOOT_TRACER -/* Append the trace on the ring-buffer */ -extern void trace_boot(struct boot_trace *it, initcall_t fn); +/* Append the traces on the ring-buffer */ +extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn); +extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn); /* Tells the tracer that smp_pre_initcall is finished. * So we can start the tracing @@ -34,7 +42,12 @@ extern void enable_boot_trace(void); */ extern void disable_boot_trace(void); #else -static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } +static inline +void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { } + +static inline +void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { } + static inline void start_boot_trace(void) { } static inline void enable_boot_trace(void) { } static inline void disable_boot_trace(void) { } -- cgit v1.2.3 From 1f0d69a9fc815db82f15722bf05227190b1d714d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 00:14:39 -0500 Subject: tracing: profile likely and unlikely annotations Impact: new unlikely/likely profiler Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | head correct incorrect % Function File Line ------- --------- - -------- ---- ---- 2167 0 0 do_arch_prctl process_64.c 832 0 0 0 do_arch_prctl process_64.c 804 2670 0 0 IS_ERR err.h 34 71230 5693 7 __switch_to process_64.c 673 76919 0 0 __switch_to process_64.c 639 43184 33743 43 __switch_to process_64.c 624 12740 64181 83 __switch_to process_64.c 594 12740 64174 83 __switch_to process_64.c 590 # cat /debug/tracing/profile_unlikely | \ awk '{ if ($3 > 25) print $0; }' |head -20 44963 35259 43 __switch_to process_64.c 624 12762 67454 84 __switch_to process_64.c 594 12762 67447 84 __switch_to process_64.c 590 1478 595 28 syscall_get_error syscall.h 51 0 2821 100 syscall_trace_leave ptrace.c 1567 0 1 100 native_smp_prepare_cpus smpboot.c 1237 86338 265881 75 calc_delta_fair sched_fair.c 408 210410 108540 34 calc_delta_mine sched.c 1267 0 54550 100 sched_info_queued sched_stats.h 222 51899 66435 56 pick_next_task_fair sched_fair.c 1422 6 10 62 yield_task_fair sched_fair.c 982 7325 2692 26 rt_policy sched.c 144 0 1270 100 pre_schedule_rt sched_rt.c 1261 1268 48073 97 pick_next_task_rt sched_rt.c 884 0 45181 100 sched_info_dequeued sched_stats.h 177 0 15 100 sched_move_task sched.c 8700 0 15 100 sched_move_task sched.c 8690 53167 33217 38 schedule sched.c 4457 0 80208 100 sched_info_switch sched_stats.h 270 30585 49631 61 context_switch sched.c 2619 # cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }' 39900 36577 47 pick_next_task sched.c 4397 20824 15233 42 switch_mm mmu_context_64.h 18 0 7 100 __cancel_work_timer workqueue.c 560 617 66484 99 clocksource_adjust timekeeping.c 456 0 346340 100 audit_syscall_exit auditsc.c 1570 38 347350 99 audit_get_context auditsc.c 732 0 345244 100 audit_syscall_entry auditsc.c 1541 38 1017 96 audit_free auditsc.c 1446 0 1090 100 audit_alloc auditsc.c 862 2618 1090 29 audit_alloc auditsc.c 858 0 6 100 move_masked_irq migration.c 9 1 198 99 probe_sched_wakeup trace_sched_switch.c 58 2 2 50 probe_wakeup trace_sched_wakeup.c 227 0 2 100 probe_wakeup_sched_switch trace_sched_wakeup.c 144 4514 2090 31 __grab_cache_page filemap.c 2149 12882 228786 94 mapping_unevictable pagemap.h 50 4 11 73 __flush_cpu_slab slub.c 1466 627757 330451 34 slab_free slub.c 1731 2959 61245 95 dentry_lru_del_init dcache.c 153 946 1217 56 load_elf_binary binfmt_elf.c 904 102 82 44 disk_put_part genhd.h 206 1 1 50 dst_gc_task dst.c 82 0 19 100 tcp_mss_split_point tcp_output.c 1126 As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 71 hits that were more than 25%. Note: After submitting my first version of this patch, Andrew Morton showed me a version written by Daniel Walker, where I picked up the following ideas from: 1) Using __builtin_constant_p to avoid profiling fixed values. 2) Using __FILE__ instead of instruction pointers. 3) Using the preprocessor to stop all profiling of likely annotations from vsyscall_64.c. Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar for their feed back on this patch. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to have profiling disabled. Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Theodore Tso Cc: Arjan van de Ven Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 14 ++++++++- include/linux/compiler.h | 61 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 80744606bad..e10beb5335c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -45,6 +45,17 @@ #define MCOUNT_REC() #endif +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ + *(_ftrace_likely) \ + VMLINUX_SYMBOL(__stop_likely_profile) = .; \ + VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ + *(_ftrace_unlikely) \ + VMLINUX_SYMBOL(__stop_unlikely_profile) = .; +#else +#define LIKELY_PROFILE() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -62,7 +73,8 @@ VMLINUX_SYMBOL(__stop___markers) = .; \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ - VMLINUX_SYMBOL(__stop___tracepoints) = .; + VMLINUX_SYMBOL(__stop___tracepoints) = .; \ + LIKELY_PROFILE() #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 98115d9d04d..935e30cfaf3 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +struct ftrace_likely_data { + const char *func; + const char *file; + unsigned line; + unsigned long correct; + unsigned long incorrect; +}; +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +#define likely_notrace(x) __builtin_expect(!!(x), 1) +#define unlikely_notrace(x) __builtin_expect(!!(x), 0) + +#define likely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_likely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = likely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 1); \ + ______r; \ + }) +#define unlikely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_unlikely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = unlikely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 0); \ + ______r; \ + }) + +/* + * Using __builtin_constant_p(x) to ignore cases where the return + * value is always the same. This idea is taken from a similar patch + * written by Daniel Walker. + */ +# ifndef likely +# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# endif +#else +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#endif /* Optimization barrier */ #ifndef barrier -- cgit v1.2.3 From e25cf3db560e803292946ef23a30c69e341ce56f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Oct 2008 15:55:07 +0200 Subject: lockdep: include/linux/lockdep.h - fix warning in net/bluetooth/af_bluetooth.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix this warning: net/bluetooth/af_bluetooth.c:60: warning: ‘bt_key_strings’ defined but not used net/bluetooth/af_bluetooth.c:71: warning: ‘bt_slock_key_strings’ defined but not used this is a lockdep macro problem in the !LOCKDEP case. We cannot convert it to an inline because the macro works on multiple types, but we can mark the parameter used. [ also clean up a misaligned tab in sock_lock_init_class_and_name() ] [ also remove #ifdefs from around af_family_clock_key strings - which were certainly added to get rid of the ugly build warnings. ] Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 5 +++-- include/net/sock.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index fc9f8e88123..8956daf64ab 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -336,10 +336,11 @@ static inline void lockdep_on(void) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) -# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0) +# define lockdep_init_map(lock, name, key, sub) \ + do { (void)(name); (void)(key); } while (0) # define lockdep_set_class(lock, key) do { (void)(key); } while (0) # define lockdep_set_class_and_name(lock, key, name) \ - do { (void)(key); } while (0) + do { (void)(key); (void)(name); } while (0) #define lockdep_set_class_and_subclass(lock, key, sub) \ do { (void)(key); } while (0) #define lockdep_set_subclass(lock, sub) do { } while (0) diff --git a/include/net/sock.h b/include/net/sock.h index c04f9e18ea2..2f47107f6d0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -815,7 +815,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ - sk->sk_lock.owned = 0; \ + sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ -- cgit v1.2.3 From 2b7d0390a6d6d595f43ea3806639664afe5b9ebe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 13:17:38 +0100 Subject: tracing: branch tracer, fix vdso crash Impact: fix bootup crash the branch tracer missed arch/x86/vdso/vclock_gettime.c from disabling tracing, which caused such bootup crashes: [ 201.840097] init[1]: segfault at 7fffed3fe7c0 ip 00007fffed3fea2e sp 000077 also clean up the ugly ifdefs in arch/x86/kernel/vsyscall_64.c by creating DISABLE_UNLIKELY_PROFILE facility for code to turn off instrumentation on a per file basis. Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 935e30cfaf3..63b7d9089d6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,7 +59,11 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +/* + * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code + * to disable branch tracing on a per file basis. + */ +#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE) struct ftrace_likely_data { const char *func; const char *file; -- cgit v1.2.3 From 2ed84eeb8808cf3c9f039213ca137ffd7d753f0e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 15:24:24 -0500 Subject: trace: rename unlikely profiler to branch profiler Impact: name change of unlikely tracer and profiler Ingo Molnar suggested changing the config from UNLIKELY_PROFILE to BRANCH_PROFILING. I never did like the "unlikely" name so I went one step farther, and renamed all the unlikely configurations to a "BRANCH" variant. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/compiler.h | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e10beb5335c..a5e4ed9baec 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -45,7 +45,7 @@ #define MCOUNT_REC() #endif -#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +#ifdef CONFIG_TRACE_BRANCH_PROFILING #define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ *(_ftrace_likely) \ VMLINUX_SYMBOL(__stop_likely_profile) = .; \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 63b7d9089d6..c7d804a7a4d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,26 +59,27 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -/* - * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code - * to disable branch tracing on a per file basis. - */ -#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE) -struct ftrace_likely_data { +struct ftrace_branch_data { const char *func; const char *file; unsigned line; unsigned long correct; unsigned long incorrect; }; -void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +/* + * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code + * to disable branch tracing on a per file basis. + */ +#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING) +void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) #define likely_check(x) ({ \ int ______r; \ - static struct ftrace_likely_data \ + static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_likely"))) \ ______f = { \ @@ -93,7 +94,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); }) #define unlikely_check(x) ({ \ int ______r; \ - static struct ftrace_likely_data \ + static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_unlikely"))) \ ______f = { \ -- cgit v1.2.3 From 31e889098a80ceb3e9e3c555d522b2686a6663c6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: ftrace: pass module struct to arch dynamic ftrace functions Impact: allow archs more flexibility on dynamic ftrace implementations Dynamic ftrace has largly been developed on x86. Since x86 does not have the same limitations as other architectures, the ftrace interaction between the generic code and the architecture specific code was not flexible enough to handle some of the issues that other architectures have. Most notably, module trampolines. Due to the limited branch distance that archs make in calling kernel core code from modules, the module load code must create a trampoline to jump to what will make the larger jump into core kernel code. The problem arises when this happens to a call to mcount. Ftrace checks all code before modifying it and makes sure the current code is what it expects. Right now, there is not enough information to handle modifying module trampolines. This patch changes the API between generic dynamic ftrace code and the arch dependent code. There is now two functions for modifying code: ftrace_make_nop(mod, rec, addr) - convert the code at rec->ip into a nop, where the original text is calling addr. (mod is the module struct if called by module init) ftrace_make_caller(rec, addr) - convert the code rec->ip that should be a nop into a caller to addr. The record "rec" now has a new field called "arch" where the architecture can add any special attributes to each call site record. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 53 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4fbc4a8b86a..166a2070ef6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE +/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ +#include + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -88,6 +91,7 @@ struct dyn_ftrace { struct list_head list; unsigned long ip; /* address of mcount call-site */ unsigned long flags; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); @@ -95,22 +99,40 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); -extern unsigned char *ftrace_nop_replace(void); -extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -/* May be defined in arch */ -extern int ftrace_arch_read_dyn_info(char *buf, int size); +/** + * ftrace_make_nop - convert code into top + * @mod: module structure if called by module load initialization + * @rec: the mcount call site record + * @addr: the address that the call site should be calling + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr); /** - * ftrace_modify_code - modify code segment - * @ip: the address of the code segment - * @old_code: the contents of what is expected to be there - * @new_code: the code to patch in + * ftrace_make_call - convert a nop call site into a call to addr + * @rec: the mcount call site record + * @addr: the address that the call site should call * * This is a very sensitive operation and great care needs * to be taken by the arch. The operation should carefully @@ -118,6 +140,8 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * what we expect it to be, and then on success of the compare, * it should write to the location. * + * The code segment at @rec->ip should be a nop + * * Return must be: * 0 on success * -EFAULT on error reading the location @@ -125,8 +149,11 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * -EPERM on error writing to the location * Any other value will be considered a failure. */ -extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code); +extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); + + +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); @@ -259,11 +286,13 @@ static inline void ftrace_dump(void) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(unsigned long *start, unsigned long *end); +extern void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } static inline void -ftrace_init_module(unsigned long *start, unsigned long *end) { } +ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { } #endif -- cgit v1.2.3 From e7d3737ea1b102030f44e96c97754101e41515f0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 06:02:06 +0100 Subject: tracing/function-return-tracer: support for dynamic ftrace on function return tracer This patch adds the support for dynamic tracing on the function return tracer. The whole difference with normal dynamic function tracing is that we don't need to hook on a particular callback. The only pro that we want is to nop or set dynamically the calls to ftrace_caller (which is ftrace_return_caller here). Some security checks ensure that we are not trying to launch dynamic tracing for return tracing while normal function tracing is already running. An example of trace with getnstimeofday set as a filter: ktime_get_ts+0x22/0x50 -> getnstimeofday (2283 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1396 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1825 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1426 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1524 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1434 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1502 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1404 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1397 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1051 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1314 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1344 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1163 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1390 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1374 ns) Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 166a2070ef6..f1af1aab00e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -25,6 +25,17 @@ struct ftrace_ops { extern int function_trace_stop; +/* + * Type of the current tracing. + */ +enum ftrace_tracing_type_t { + FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */ + FTRACE_TYPE_RETURN, /* Hook the return of the function */ +}; + +/* Current tracing type, default is FTRACE_TYPE_ENTER */ +extern enum ftrace_tracing_type_t ftrace_tracing_type; + /** * ftrace_stop - stop function tracer. * @@ -104,6 +115,9 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#ifdef CONFIG_FUNCTION_RET_TRACER +extern void ftrace_return_caller(void); +#endif /** * ftrace_make_nop - convert code into top @@ -310,7 +324,7 @@ struct ftrace_retfunc { /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); -extern void register_ftrace_return(trace_function_return_t func); +extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); -- cgit v1.2.3 From 954e100d2275cb2f150f2b18d5cddcdf67b956ac Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:34 -0500 Subject: rcu: add rcu_read_*_sched_notrace() Impact: new API, useful for tracepoints and markers. Add _notrace version to rcu_read_*_sched(). Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E McKenney Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e43e3..895dc9c1088 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -142,6 +142,7 @@ struct rcu_head { * on the write-side to insure proper synchronization. */ #define rcu_read_lock_sched() preempt_disable() +#define rcu_read_lock_sched_notrace() preempt_disable_notrace() /* * rcu_read_unlock_sched - marks the end of a RCU-classic critical section @@ -149,6 +150,7 @@ struct rcu_head { * See rcu_read_lock_sched for more information. */ #define rcu_read_unlock_sched() preempt_enable() +#define rcu_read_unlock_sched_notrace() preempt_enable_notrace() -- cgit v1.2.3 From e3f8c4b9117d70127a8cab480af83bbfd048a28b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Nov 2008 17:47:36 -0500 Subject: markers: add missing stdargs.h include, needed due to va_list usage Impact: build fix (for future changes) That seemed to cause built issue when marker.h is included early, even though stdargs.h is included in kernel.h. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 4cf45472d9f..05ec0df3708 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -12,6 +12,7 @@ * See the file COPYING for more details. */ +#include #include struct module; -- cgit v1.2.3 From c1df1bd2c4d4b20c83755a0f41956b57aec4842a Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:39 -0500 Subject: markers: auto enable tracepoints (new API : trace_mark_tp()) Impact: new API Add a new API trace_mark_tp(), which declares a marker within a tracepoint probe. When the marker is activated, the tracepoint is automatically enabled. No branch test is used at the marker site, because it would be a duplicate of the branch already present in the tracepoint. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 05ec0df3708..57a307018ce 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -49,6 +49,8 @@ struct marker { void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; + const char *tp_name; /* Optional tracepoint name */ + void *tp_cb; /* Optional tracepoint callback */ } __attribute__((aligned(8))); #ifdef CONFIG_MARKERS @@ -73,7 +75,7 @@ struct marker { __attribute__((section("__markers"), aligned(8))) = \ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL }; \ + { __mark_empty_function, NULL}, NULL, NULL, NULL }; \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -81,11 +83,38 @@ struct marker { } \ } while (0) +#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ + do { \ + void __check_tp_type(void) \ + { \ + register_trace_##tp_name(tp_cb); \ + } \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, \ + { __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\ + __mark_check_format(format, ## args); \ + (*__mark_##name.call)(&__mark_##name, call_private, \ + ## args); \ + } while (0) + extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) +#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ + do { \ + void __check_tp_type(void) \ + { \ + register_trace_##tp_name(tp_cb); \ + } \ + __mark_check_format(format, ## args); \ + } while (0) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } @@ -117,6 +146,20 @@ static inline void marker_update_probe_range(struct marker *begin, #define _trace_mark(name, format, args...) \ __trace_mark(1, name, NULL, format, ## args) +/** + * trace_mark_tp - Marker in a tracepoint callback + * @name: marker name, not quoted. + * @tp_name: tracepoint name, not quoted. + * @tp_cb: tracepoint callback. Should have an associated global symbol so it + * is not optimized away by the compiler (should not be static). + * @format: format string + * @args...: variable argument list + * + * Places a marker in a tracepoint callback. + */ +#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ + __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) + /** * MARK_NOARGS - Format string for a marker with no argument. */ -- cgit v1.2.3 From a0bca6a59ebc052751eed6e3b182c153495672d8 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:40 -0500 Subject: markers: create DEFINE_MARKER and GET_MARKER (new API) Impact: new API. Allow markers to be used only for declaration, without function call associated. Useful to create specialized probes. The problem we had is that two function calls were required when one wanted to put a marker in a tracepoint probe. Now the marker can be used simply for trace data type declaration, leaving the trace write work within the tracepoint probe without any additional function call. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 57a307018ce..34c14bc957f 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -55,6 +55,22 @@ struct marker { #ifdef CONFIG_MARKERS +#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ + NULL, tp_name_str, tp_cb } + +#define DEFINE_MARKER(name, format) \ + _DEFINE_MARKER(name, NULL, NULL, format) + +#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ + _DEFINE_MARKER(name, #tp_name, tp_cb, format) + /* * Note : the empty asm volatile with read constraint is used here instead of a * "used" attribute to fix a gcc 4.1.x bug. @@ -68,14 +84,7 @@ struct marker { */ #define __trace_mark(generic, name, call_private, format, args...) \ do { \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL, NULL, NULL }; \ + DEFINE_MARKER(name, format); \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -89,14 +98,7 @@ struct marker { { \ register_trace_##tp_name(tp_cb); \ } \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\ + DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ __mark_check_format(format, ## args); \ (*__mark_##name.call)(&__mark_##name, call_private, \ ## args); \ @@ -104,7 +106,11 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); + +#define GET_MARKER(name) (__mark_##name) + #else /* !CONFIG_MARKERS */ +#define DEFINE_MARKER(name, tp_name, tp_cb, format) #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) #define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ @@ -118,6 +124,7 @@ extern void marker_update_probe_range(struct marker *begin, static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } +#define GET_MARKER(name) #endif /* CONFIG_MARKERS */ /** -- cgit v1.2.3 From da7b3eab167091693ad215ad7692f7d0d24d1356 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:43 -0500 Subject: tracepoints: use rcu_*_sched_notrace Make sure tracepoints can be called within ftrace callbacks. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63064e9403f..69648c54a32 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -40,14 +40,14 @@ struct tracepoint { do { \ void **it_func; \ \ - rcu_read_lock_sched(); \ + rcu_read_lock_sched_notrace(); \ it_func = rcu_dereference((tp)->funcs); \ if (it_func) { \ do { \ ((void(*)(proto))(*it_func))(args); \ } while (*(++it_func)); \ } \ - rcu_read_unlock_sched(); \ + rcu_read_unlock_sched_notrace(); \ } while (0) /* -- cgit v1.2.3 From c420970ef476d7d68df119711700666224001f43 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:44 -0500 Subject: tracepoints: use unregister return value Impact: bugfix. Unregistering a tracepoint can fail. Return the error value. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 69648c54a32..c60a791f887 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -73,9 +73,9 @@ struct tracepoint { return tracepoint_probe_register(#name ":" #proto, \ (void *)probe); \ } \ - static inline void unregister_trace_##name(void (*probe)(proto))\ + static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - tracepoint_probe_unregister(#name ":" #proto, \ + return tracepoint_probe_unregister(#name ":" #proto, \ (void *)probe); \ } @@ -92,8 +92,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, { \ return -ENOSYS; \ } \ - static inline void unregister_trace_##name(void (*probe)(proto))\ - { } + static inline int unregister_trace_##name(void (*probe)(proto)) \ + { \ + return -ENOSYS; \ + } static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) -- cgit v1.2.3 From 5f382671def7cb9c0f4b75d586dc5f60dca5e1c3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:45 -0500 Subject: tracepoints: do not put arguments in name Impact: cleanup That's overkill, takes space. We have a global tracepoint registery in header files anyway. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c60a791f887..7e9b42aeae0 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,7 +60,7 @@ struct tracepoint { { \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) \ - = #name ":" #proto; \ + = #name; \ static struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"), aligned(8))) = \ { __tpstrtab_##name, 0, NULL }; \ @@ -70,13 +70,11 @@ struct tracepoint { } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_register(#name ":" #proto, \ - (void *)probe); \ + return tracepoint_probe_register(#name, (void *)probe); \ } \ static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_unregister(#name ":" #proto, \ - (void *)probe); \ + return tracepoint_probe_unregister(#name, (void *)probe);\ } extern void tracepoint_update_probe_range(struct tracepoint *begin, -- cgit v1.2.3 From 7e066fb870fcd1025ec3ba7bbde5d541094f4ce1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:47 -0500 Subject: tracepoints: add DECLARE_TRACE() and DEFINE_TRACE() Impact: API *CHANGE*. Must update all tracepoint users. Add DEFINE_TRACE() to tracepoints to let them declare the tracepoint structure in a single spot for all the kernel. It helps reducing memory consumption, especially when declaring a lot of tracepoints, e.g. for kmalloc tracing. *API CHANGE WARNING*: now, DECLARE_TRACE() must be used in headers for tracepoint declarations rather than DEFINE_TRACE(). This is the sane way to do it. The name previously used was misleading. Updates scheduler instrumentation to follow this API change. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 1 + include/linux/tracepoint.h | 35 +++++++++++++++++++++++++---------- include/trace/sched.h | 24 ++++++++++++------------ 3 files changed, 38 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a5e4ed9baec..3b46ae46493 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -71,6 +71,7 @@ VMLINUX_SYMBOL(__start___markers) = .; \ *(__markers) \ VMLINUX_SYMBOL(__stop___markers) = .; \ + . = ALIGN(32); \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7e9b42aeae0..75700545836 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -24,8 +24,12 @@ struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ void **funcs; -} __attribute__((aligned(8))); - +} __attribute__((aligned(32))); /* + * Aligned on 32 bytes because it is + * globally visible and gcc happily + * align these on the structure size. + * Keep in sync with vmlinux.lds.h. + */ #define TPPROTO(args...) args #define TPARGS(args...) args @@ -55,15 +59,10 @@ struct tracepoint { * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ + extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) \ - = #name; \ - static struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"), aligned(8))) = \ - { __tpstrtab_##name, 0, NULL }; \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ TPPROTO(proto), TPARGS(args)); \ @@ -77,11 +76,23 @@ struct tracepoint { return tracepoint_probe_unregister(#name, (void *)probe);\ } +#define DEFINE_TRACE(name) \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) = #name; \ + struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"), aligned(32))) = \ + { __tpstrtab_##name, 0, NULL } + +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ + EXPORT_SYMBOL_GPL(__tracepoint_##name) +#define EXPORT_TRACEPOINT_SYMBOL(name) \ + EXPORT_SYMBOL(__tracepoint_##name) + extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -95,6 +106,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) + static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } diff --git a/include/trace/sched.h b/include/trace/sched.h index ad47369d01b..9b2854abf7e 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -4,52 +4,52 @@ #include #include -DEFINE_TRACE(sched_kthread_stop, +DECLARE_TRACE(sched_kthread_stop, TPPROTO(struct task_struct *t), TPARGS(t)); -DEFINE_TRACE(sched_kthread_stop_ret, +DECLARE_TRACE(sched_kthread_stop_ret, TPPROTO(int ret), TPARGS(ret)); -DEFINE_TRACE(sched_wait_task, +DECLARE_TRACE(sched_wait_task, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup, +DECLARE_TRACE(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup_new, +DECLARE_TRACE(sched_wakeup_new, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_switch, +DECLARE_TRACE(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next)); -DEFINE_TRACE(sched_migrate_task, +DECLARE_TRACE(sched_migrate_task, TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), TPARGS(rq, p, dest_cpu)); -DEFINE_TRACE(sched_process_free, +DECLARE_TRACE(sched_process_free, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_exit, +DECLARE_TRACE(sched_process_exit, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_wait, +DECLARE_TRACE(sched_process_wait, TPPROTO(struct pid *pid), TPARGS(pid)); -DEFINE_TRACE(sched_process_fork, +DECLARE_TRACE(sched_process_fork, TPPROTO(struct task_struct *parent, struct task_struct *child), TPARGS(parent, child)); -DEFINE_TRACE(sched_signal_send, +DECLARE_TRACE(sched_signal_send, TPPROTO(int sig, struct task_struct *p), TPARGS(sig, p)); -- cgit v1.2.3 From 0231022cc32d5f2e7f3c06b75691dda0ad6aec33 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Nov 2008 03:22:41 +0100 Subject: tracing/function-return-tracer: add the overrun field Impact: help to find the better depth of trace We decided to arbitrary define the depth of function return trace as "20". Perhaps this is not enough. To help finding an optimal depth, we measure now the overrun: the number of functions that have been missed for the current thread. By default this is not displayed, we have to do set a particular flag on the return tracer: echo overrun > /debug/tracing/trace_options And the overrun will be printed on the right. As the trace shows below, the current 20 depth is not enough. update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838) update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838) do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838) tick_do_update_jiffies64+0xbf/0x160 -> do_timer (5339 ns) (Overruns: 2838) tick_sched_timer+0x6a/0xf0 -> tick_do_update_jiffies64 (7209 ns) (Overruns: 2838) vgacon_set_cursor_size+0x98/0x120 -> native_io_delay (2613 ns) (Overruns: 274) vgacon_cursor+0x16e/0x1d0 -> vgacon_set_cursor_size (33151 ns) (Overruns: 274) set_cursor+0x5f/0x80 -> vgacon_cursor (36432 ns) (Overruns: 274) con_flush_chars+0x34/0x40 -> set_cursor (38790 ns) (Overruns: 274) release_console_sem+0x1ec/0x230 -> up (721 ns) (Overruns: 274) release_console_sem+0x225/0x230 -> wake_up_klogd (316 ns) (Overruns: 274) con_flush_chars+0x39/0x40 -> release_console_sem (2996 ns) (Overruns: 274) con_write+0x22/0x30 -> con_flush_chars (46067 ns) (Overruns: 274) n_tty_write+0x1cc/0x360 -> con_write (292670 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2a/0x90 -> native_apic_mem_write (330 ns) (Overruns: 274) irq_enter+0x17/0x70 -> idle_cpu (413 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2f/0x90 -> irq_enter (1525 ns) (Overruns: 274) ktime_get_ts+0x40/0x70 -> getnstimeofday (465 ns) (Overruns: 274) ktime_get_ts+0x60/0x70 -> set_normalized_timespec (436 ns) (Overruns: 274) ktime_get+0x16/0x30 -> ktime_get_ts (2501 ns) (Overruns: 274) hrtimer_interrupt+0x77/0x1a0 -> ktime_get (3439 ns) (Overruns: 274) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ include/linux/sched.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f1af1aab00e..f7ba4ea5e12 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -318,6 +318,8 @@ struct ftrace_retfunc { unsigned long func; /* Current function */ unsigned long long calltime; unsigned long long rettime; + /* Number of functions that overran the depth limit for current task */ + unsigned long overrun; }; #ifdef CONFIG_FUNCTION_RET_TRACER diff --git a/include/linux/sched.h b/include/linux/sched.h index 61c8cc36028..c8e0db46420 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2016,6 +2016,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct * used. */ task_thread_info(p)->curr_ret_stack = -1; + atomic_set(&task_thread_info(p)->trace_overrun, 0); #endif } -- cgit v1.2.3 From f201ae2356c74bcae130b2177b3dca903ea98071 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Nov 2008 06:22:56 +0100 Subject: tracing/function-return-tracer: store return stack into task_struct and allocate it dynamically Impact: use deeper function tracing depth safely Some tests showed that function return tracing needed a more deeper depth of function calls. But it could be unsafe to store these return addresses to the stack. So these arrays will now be allocated dynamically into task_struct of current only when the tracer is activated. Typical scheme when tracer is activated: - allocate a return stack for each task in global list. - fork: allocate the return stack for the newly created task - exit: free return stack of current - idle init: same as fork I chose a default depth of 50. I don't have overruns anymore. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 +++++ include/linux/sched.h | 23 +++++++++++------------ 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e12..2ba259b2def 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -323,6 +323,8 @@ struct ftrace_retfunc { }; #ifdef CONFIG_FUNCTION_RET_TRACER +#define FTRACE_RETFUNC_DEPTH 50 +#define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); @@ -330,6 +332,9 @@ extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); + +extern void ftrace_retfunc_init_task(struct task_struct *t); +extern void ftrace_retfunc_exit_task(struct task_struct *t); #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8e0db46420..bee1e93c95a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1352,6 +1352,17 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; +#ifdef CONFIG_FUNCTION_RET_TRACER + /* Index of current stored adress in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack *ret_stack; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; +#endif }; /* @@ -2006,18 +2017,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* - * When fork() creates a child process, this function is called. - * But the child task may not inherit the return adresses traced - * by the return function tracer because it will directly execute - * in userspace and will not return to kernel functions its parent - * used. - */ - task_thread_info(p)->curr_ret_stack = -1; - atomic_set(&task_thread_info(p)->trace_overrun, 0); -#endif } static inline unsigned long *end_of_stack(struct task_struct *p) -- cgit v1.2.3 From 82f60f0bc854aada696f27d863c03bef91f1509d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 23 Nov 2008 09:18:56 +0100 Subject: tracing/function-return-tracer: clean up task start/exit callbacks Impact: cleanup Eliminate #ifdefs in core code by using empty inline functions. Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2ba259b2def..938ca194264 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -335,6 +335,9 @@ extern void unregister_ftrace_return(void); extern void ftrace_retfunc_init_task(struct task_struct *t); extern void ftrace_retfunc_exit_task(struct task_struct *t); +#else +static inline void ftrace_retfunc_init_task(struct task_struct *t) { } +static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 02b67518e2b1c490787dac7f35e1204e74fe21ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sat, 22 Nov 2008 13:28:47 +0200 Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: add new (default-off) tracing visualization feature Usage example: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled .... run application ... echo 0 >tracing_enabled Then read one of 'trace','latency_trace','trace_pipe'. To get the best output you can compile your userspace programs with frame pointers (at least glibc + the app you are tracing). Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- include/linux/stacktrace.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b106fd8e0d5..68de51468f5 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); + +#ifdef CONFIG_X86 +extern void save_stack_trace_user(struct stack_trace *trace); +#else +# define save_stack_trace_user(trace) do { } while (0) +#endif + #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif -- cgit v1.2.3 From 74e2f334f4440cbcb63e9ebbcdcea430d41bdfa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sat, 22 Nov 2008 13:28:48 +0200 Subject: vfs, seqfile: make mangle_path() global MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: expose new VFS API make mangle_path() available, as per the suggestions of Christoph Hellwig and Al Viro: http://lkml.org/lkml/2008/11/4/338 Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- include/linux/seq_file.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dc50bcc282a..b3dfa72f13b 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -34,6 +34,7 @@ struct seq_operations { #define SEQ_SKIP 1 +char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); -- cgit v1.2.3 From 42f565e116e0408b5ddc21a33c4a4d41fd572420 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Nov 2008 23:57:47 -0500 Subject: trace: remove extra assign in branch check Impact: clean up of branch check The unlikely/likely profiler does an extra assign of the f.line. This is not needed since it is already calculated at compile time. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c7d804a7a4d..c25e525121f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -87,7 +87,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .file = __FILE__, \ .line = __LINE__, \ }; \ - ______f.line = __LINE__; \ ______r = likely_notrace(x); \ ftrace_likely_update(&______f, ______r, 1); \ ______r; \ @@ -102,7 +101,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .file = __FILE__, \ .line = __LINE__, \ }; \ - ______f.line = __LINE__; \ ______r = unlikely_notrace(x); \ ftrace_likely_update(&______f, ______r, 0); \ ______r; \ -- cgit v1.2.3 From 45b797492a0758e64dff74e9db70e1f65e0603a5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 00:40:40 -0500 Subject: trace: consolidate unlikely and likely profiler Impact: clean up to make one profiler of like and unlikely tracer The likely and unlikely profiler prints out the file and line numbers of the annotated branches that it is profiling. It shows the number of times it was correct or incorrect in its guess. Having two different files or sections for that matter to tell us if it was a likely or unlikely is pretty pointless. We really only care if it was correct or not. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 9 +++------ include/linux/compiler.h | 24 +++++------------------- 2 files changed, 8 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 3b46ae46493..8bccb49981e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -46,12 +46,9 @@ #endif #ifdef CONFIG_TRACE_BRANCH_PROFILING -#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ - *(_ftrace_likely) \ - VMLINUX_SYMBOL(__stop_likely_profile) = .; \ - VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ - *(_ftrace_unlikely) \ - VMLINUX_SYMBOL(__stop_unlikely_profile) = .; +#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \ + *(_ftrace_annotated_branch) \ + VMLINUX_SYMBOL(__stop_annotated_branch_profile) = .; #else #define LIKELY_PROFILE() #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c25e525121f..0628a2013fa 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -77,32 +77,18 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) -#define likely_check(x) ({ \ +#define __branch_check__(x, expect) ({ \ int ______r; \ static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_likely"))) \ + __attribute__((section("_ftrace_annotated_branch"))) \ ______f = { \ .func = __func__, \ .file = __FILE__, \ .line = __LINE__, \ }; \ ______r = likely_notrace(x); \ - ftrace_likely_update(&______f, ______r, 1); \ - ______r; \ - }) -#define unlikely_check(x) ({ \ - int ______r; \ - static struct ftrace_branch_data \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_unlikely"))) \ - ______f = { \ - .func = __func__, \ - .file = __FILE__, \ - .line = __LINE__, \ - }; \ - ______r = unlikely_notrace(x); \ - ftrace_likely_update(&______f, ______r, 0); \ + ftrace_likely_update(&______f, ______r, expect); \ ______r; \ }) @@ -112,10 +98,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); * written by Daniel Walker. */ # ifndef likely -# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1)) # endif # ifndef unlikely -# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) # endif #else # define likely(x) __builtin_expect(!!(x), 1) -- cgit v1.2.3 From 2bcd521a684cc94befbe2ce7d5b613c841b0d304 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 01:30:54 -0500 Subject: trace: profile all if conditionals Impact: feature to profile if statements This patch adds a branch profiler for all if () statements. The results will be found in: /debugfs/tracing/profile_branch For example: miss hit % Function File Line ------- --------- - -------- ---- ---- 0 1 100 x86_64_start_reservations head64.c 127 0 1 100 copy_bootdata head64.c 69 1 0 0 x86_64_start_kernel head64.c 111 32 0 0 set_intr_gate desc.h 319 1 0 0 reserve_ebda_region head.c 51 1 0 0 reserve_ebda_region head.c 47 0 1 100 reserve_ebda_region head.c 42 0 0 X maxcpus main.c 165 Miss means the branch was not taken. Hit means the branch was taken. The percent is the percentage the branch was taken. This adds a significant amount of overhead and should only be used by those analyzing their system. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 11 ++++++++++- include/linux/compiler.h | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8bccb49981e..eba835a2c2c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -53,6 +53,14 @@ #define LIKELY_PROFILE() #endif +#ifdef CONFIG_PROFILE_ALL_BRANCHES +#define BRANCH_PROFILE() VMLINUX_SYMBOL(__start_branch_profile) = .; \ + *(_ftrace_branch) \ + VMLINUX_SYMBOL(__stop_branch_profile) = .; +#else +#define BRANCH_PROFILE() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -72,7 +80,8 @@ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ - LIKELY_PROFILE() + LIKELY_PROFILE() \ + BRANCH_PROFILE() #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0628a2013fa..ea7c6be354b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -63,8 +63,16 @@ struct ftrace_branch_data { const char *func; const char *file; unsigned line; - unsigned long correct; - unsigned long incorrect; + union { + struct { + unsigned long correct; + unsigned long incorrect; + }; + struct { + unsigned long miss; + unsigned long hit; + }; + }; }; /* @@ -103,6 +111,32 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # ifndef unlikely # define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) # endif + +#ifdef CONFIG_PROFILE_ALL_BRANCHES +/* + * "Define 'is'", Bill Clinton + * "Define 'if'", Steven Rostedt + */ +#define if(cond) if (__builtin_constant_p((cond)) ? !!(cond) : \ + ({ \ + int ______r; \ + static struct ftrace_branch_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_branch"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______r = !!(cond); \ + if (______r) \ + ______f.hit++; \ + else \ + ______f.miss++; \ + ______r; \ + })) +#endif /* CONFIG_PROFILE_ALL_BRANCHES */ + #else # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) -- cgit v1.2.3 From 033601a32b2012b6948e80e739cca40bff4de4a0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 12:41:55 -0500 Subject: ring-buffer: add tracing_off_permanent Impact: feature to permanently disable ring buffer This patch adds a API to the ring buffer code that will permanently disable the ring buffer from ever recording. This should only be called when some serious anomaly is detected, and the system may be in an unstable state. When that happens, shutting down the recording to the ring buffers may be appropriate. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e097c2e6b6d..3bb87a753fa 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -122,6 +122,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); void tracing_on(void); void tracing_off(void); +void tracing_off_permanent(void); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, -- cgit v1.2.3 From 69bb54ec05f57da7f6fac2cec0820cbc970df20f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 12:59:38 -0500 Subject: ftrace: add ftrace_off_permanent Impact: add new API to disable all of ftrace on anomalies It case of a serious anomaly being detected (like something caught by lockdep) it is a good idea to disable all tracing immediately, without grabing any locks. This patch adds ftrace_off_permanent that disables the tracers, function tracing and ring buffers without a way to enable them again. This should only be used when something serious has been detected. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e12..13e9cfc0992 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -257,6 +257,7 @@ extern int ftrace_dump_on_oops; extern void tracing_start(void); extern void tracing_stop(void); +extern void ftrace_off_permanent(void); extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -290,6 +291,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } +static inline void ftrace_off_permanent(void) { } static inline int ftrace_printk(const char *fmt, ...) { -- cgit v1.2.3 From 8d7c6a96164651dbbab449ef0b5c20ae1f76a3a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sun, 23 Nov 2008 12:39:06 +0200 Subject: tracing/stack-tracer: fix style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- include/linux/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 68de51468f5..fd42d685110 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -25,7 +25,7 @@ extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) -# define save_stack_trace_user(trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif -- cgit v1.2.3 From 8d26487fd4ddda7a0237da418fb8669fb06ae557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=B6r=C3=B6k=20Edwin?= Date: Sun, 23 Nov 2008 12:39:08 +0200 Subject: tracing/stack-tracer: introduce CONFIG_USER_STACKTRACE_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup User stack tracing is just implemented for x86, but it is not x86 specific. Introduce a generic config flag, that is currently enabled only for x86. When other arches implement it, they will have to SELECT USER_STACKTRACE_SUPPORT. Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- include/linux/stacktrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index fd42d685110..1a8cecc4f38 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -16,7 +16,7 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, extern void print_stack_trace(struct stack_trace *trace, int spaces); -#ifdef CONFIG_X86 +#ifdef CONFIG_USER_STACKTRACE_SUPPORT extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace_user(trace) do { } while (0) -- cgit v1.2.3