aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/markers.txt14
-rw-r--r--Documentation/tracepoints.txt92
-rw-r--r--arch/x86/include/asm/ftrace.h8
-rw-r--r--arch/x86/kernel/entry_32.S18
-rw-r--r--arch/x86/kernel/ftrace.c286
-rw-r--r--include/asm-generic/vmlinux.lds.h1
-rw-r--r--include/linux/ftrace.h69
-rw-r--r--include/linux/marker.h67
-rw-r--r--include/linux/rcupdate.h2
-rw-r--r--include/linux/tracepoint.h53
-rw-r--r--include/trace/sched.h24
-rw-r--r--init/Kconfig1
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/marker.c114
-rw-r--r--kernel/module.c11
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--kernel/trace/ftrace.c304
-rw-r--r--kernel/trace/trace.c23
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_boot.c3
-rw-r--r--kernel/trace/trace_branch.c3
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_functions_return.c16
-rw-r--r--kernel/trace/trace_irqsoff.c9
-rw-r--r--kernel/trace/trace_mmiotrace.c3
-rw-r--r--kernel/trace/trace_nop.c3
-rw-r--r--kernel/trace/trace_sched_switch.c3
-rw-r--r--kernel/trace/trace_sched_wakeup.c3
-rw-r--r--kernel/trace/trace_selftest.c70
-rw-r--r--kernel/trace/trace_sysprof.c3
-rw-r--r--kernel/tracepoint.c34
-rw-r--r--samples/tracepoints/tp-samples-trace.h4
-rw-r--r--samples/tracepoints/tracepoint-probe-sample.c1
-rw-r--r--samples/tracepoints/tracepoint-probe-sample2.c1
-rw-r--r--samples/tracepoints/tracepoint-sample.c3
39 files changed, 841 insertions, 429 deletions
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
index 089f6138fcd..6d275e4ef38 100644
--- a/Documentation/markers.txt
+++ b/Documentation/markers.txt
@@ -70,6 +70,20 @@ a printk warning which identifies the inconsistency:
"Format mismatch for probe probe_name (format), marker (format)"
+Another way to use markers is to simply define the marker without generating any
+function call to actually call into the marker. This is useful in combination
+with tracepoint probes in a scheme like this :
+
+void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
+
+DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name,
+ "arg1 %u pid %d");
+
+notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
+{
+ struct marker *marker = &GET_MARKER(kernel_irq_entry);
+ /* write data to trace buffers ... */
+}
* Probe / marker example
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
index 5d354e16749..2d42241a25c 100644
--- a/Documentation/tracepoints.txt
+++ b/Documentation/tracepoints.txt
@@ -3,28 +3,30 @@
Mathieu Desnoyers
-This document introduces Linux Kernel Tracepoints and their use. It provides
-examples of how to insert tracepoints in the kernel and connect probe functions
-to them and provides some examples of probe functions.
+This document introduces Linux Kernel Tracepoints and their use. It
+provides examples of how to insert tracepoints in the kernel and
+connect probe functions to them and provides some examples of probe
+functions.
* Purpose of tracepoints
-A tracepoint placed in code provides a hook to call a function (probe) that you
-can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
-"off" (no probe is attached). When a tracepoint is "off" it has no effect,
-except for adding a tiny time penalty (checking a condition for a branch) and
-space penalty (adding a few bytes for the function call at the end of the
-instrumented function and adds a data structure in a separate section). When a
-tracepoint is "on", the function you provide is called each time the tracepoint
-is executed, in the execution context of the caller. When the function provided
-ends its execution, it returns to the caller (continuing from the tracepoint
-site).
+A tracepoint placed in code provides a hook to call a function (probe)
+that you can provide at runtime. A tracepoint can be "on" (a probe is
+connected to it) or "off" (no probe is attached). When a tracepoint is
+"off" it has no effect, except for adding a tiny time penalty
+(checking a condition for a branch) and space penalty (adding a few
+bytes for the function call at the end of the instrumented function
+and adds a data structure in a separate section). When a tracepoint
+is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function
+provided ends its execution, it returns to the caller (continuing from
+the tracepoint site).
You can put tracepoints at important locations in the code. They are
lightweight hooks that can pass an arbitrary number of parameters,
-which prototypes are described in a tracepoint declaration placed in a header
-file.
+which prototypes are described in a tracepoint declaration placed in a
+header file.
They can be used for tracing and performance accounting.
@@ -42,7 +44,7 @@ In include/trace/subsys.h :
#include <linux/tracepoint.h>
-DEFINE_TRACE(subsys_eventname,
+DECLARE_TRACE(subsys_eventname,
TPPTOTO(int firstarg, struct task_struct *p),
TPARGS(firstarg, p));
@@ -50,6 +52,8 @@ In subsys/file.c (where the tracing statement must be added) :
#include <trace/subsys.h>
+DEFINE_TRACE(subsys_eventname);
+
void somefct(void)
{
...
@@ -61,31 +65,41 @@ Where :
- subsys_eventname is an identifier unique to your event
- subsys is the name of your subsystem.
- eventname is the name of the event to trace.
-- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
- called by this tracepoint.
-- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
-Connecting a function (probe) to a tracepoint is done by providing a probe
-(function to call) for the specific tracepoint through
-register_trace_subsys_eventname(). Removing a probe is done through
-unregister_trace_subsys_eventname(); it will remove the probe sure there is no
-caller left using the probe when it returns. Probe removal is preempt-safe
-because preemption is disabled around the probe call. See the "Probe example"
-section below for a sample probe module.
-
-The tracepoint mechanism supports inserting multiple instances of the same
-tracepoint, but a single definition must be made of a given tracepoint name over
-all the kernel to make sure no type conflict will occur. Name mangling of the
-tracepoints is done using the prototypes to make sure typing is correct.
-Verification of probe type correctness is done at the registration site by the
-compiler. Tracepoints can be put in inline functions, inlined static functions,
-and unrolled loops as well as regular functions.
-
-The naming scheme "subsys_event" is suggested here as a convention intended
-to limit collisions. Tracepoint names are global to the kernel: they are
-considered as being the same whether they are in the core kernel image or in
-modules.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the
+ function called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the
+ prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a
+probe (function to call) for the specific tracepoint through
+register_trace_subsys_eventname(). Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe.
+
+tracepoint_synchronize_unregister() must be called before the end of
+the module exit function to make sure there is no caller left using
+the probe. This, and the fact that preemption is disabled around the
+probe call, make sure that probe removal and module unload are safe.
+See the "Probe example" section below for a sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the
+same tracepoint, but a single definition must be made of a given
+tracepoint name over all the kernel to make sure no type conflict will
+occur. Name mangling of the tracepoints is done using the prototypes
+to make sure typing is correct. Verification of probe type correctness
+is done at the registration site by the compiler. Tracepoints can be
+put in inline functions, inlined static functions, and unrolled loops
+as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention
+intended to limit collisions. Tracepoint names are global to the
+kernel: they are considered as being the same whether they are in the
+core kernel image or in modules.
+
+If the tracepoint has to be used in kernel modules, an
+EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
+used to export the defined tracepoints.
* Probe / tracepoint example
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9b6a1fa19e7..2bb43b433e0 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,6 +17,14 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
*/
return addr - 1;
}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+struct dyn_arch_ftrace {
+ /* No extra data needed for x86 */
+};
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f9762114983..74defe21ba4 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1190,7 +1190,7 @@ ENTRY(mcount)
jnz trace
#ifdef CONFIG_FUNCTION_RET_TRACER
cmpl $ftrace_stub, ftrace_function_return
- jnz trace_return
+ jnz ftrace_return_caller
#endif
.globl ftrace_stub
ftrace_stub:
@@ -1211,9 +1211,15 @@ trace:
popl %ecx
popl %eax
jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_RET_TRACER
-trace_return:
+ENTRY(ftrace_return_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
pushl %eax
pushl %ecx
pushl %edx
@@ -1223,7 +1229,8 @@ trace_return:
popl %edx
popl %ecx
popl %eax
- jmp ftrace_stub
+ ret
+END(ftrace_return_caller)
.globl return_to_handler
return_to_handler:
@@ -1237,10 +1244,7 @@ return_to_handler:
popl %ecx
popl %eax
ret
-#endif /* CONFIG_FUNCTION_RET_TRACER */
-END(mcount)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* CONFIG_FUNCTION_TRACER */
+#endif
.section .rodata,"a"
#include "syscall_table_32.S"
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index fe832738e1e..924153edd97 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,133 +24,6 @@
#include <asm/nmi.h>
-
-#ifdef CONFIG_FUNCTION_RET_TRACER
-
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
-
-void ftrace_nmi_enter(void)
-{
- atomic_inc(&in_nmi);
-}
-
-void ftrace_nmi_exit(void)
-{
- atomic_dec(&in_nmi);
-}
-
-/* Add a function return address to the trace stack on thread info.*/
-static int push_return_trace(unsigned long ret, unsigned long long time,
- unsigned long func)
-{
- int index;
- struct thread_info *ti = current_thread_info();
-
- /* The return trace stack is full */
- if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
- return -EBUSY;
-
- index = ++ti->curr_ret_stack;
- ti->ret_stack[index].ret = ret;
- ti->ret_stack[index].func = func;
- ti->ret_stack[index].calltime = time;
-
- return 0;
-}
-
-/* Retrieve a function return address to the trace stack on thread info.*/
-static void pop_return_trace(unsigned long *ret, unsigned long long *time,
- unsigned long *func)
-{
- int index;
-
- struct thread_info *ti = current_thread_info();
- index = ti->curr_ret_stack;
- *ret = ti->ret_stack[index].ret;
- *func = ti->ret_stack[index].func;
- *time = ti->ret_stack[index].calltime;
- ti->curr_ret_stack--;
-}
-
-/*
- * Send the trace to the ring-buffer.
- * @return the original return address.
- */
-unsigned long ftrace_return_to_handler(void)
-{
- struct ftrace_retfunc trace;
- pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
- trace.rettime = cpu_clock(raw_smp_processor_id());
- ftrace_function_return(&trace);
-
- return trace.ret;
-}
-
-/*
- * Hook the return address and push it in the stack of return addrs
- * in current thread info.
- */
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
-{
- unsigned long old;
- unsigned long long calltime;
- int faulted;
- unsigned long return_hooker = (unsigned long)
- &return_to_handler;
-
- /* Nmi's are currently unsupported */
- if (atomic_read(&in_nmi))
- return;
-
- /*
- * Protect against fault, even if it shouldn't
- * happen. This tool is too much intrusive to
- * ignore such a protection.
- */
- asm volatile(
- "1: movl (%[parent_old]), %[old]\n"
- "2: movl %[return_hooker], (%[parent_replaced])\n"
- " movl $0, %[faulted]\n"
-
- ".section .fixup, \"ax\"\n"
- "3: movl $1, %[faulted]\n"
- ".previous\n"
-
- ".section __ex_table, \"a\"\n"
- " .long 1b, 3b\n"
- " .long 2b, 3b\n"
- ".previous\n"
-
- : [parent_replaced] "=r" (parent), [old] "=r" (old),
- [faulted] "=r" (faulted)
- : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
- : "memory"
- );
-
- if (WARN_ON(faulted)) {
- unregister_ftrace_return();
- return;
- }
-
- if (WARN_ON(!__kernel_text_address(old))) {
- unregister_ftrace_return();
- *parent = old;
- return;
- }
-
- calltime = cpu_clock(raw_smp_processor_id());
-
- if (push_return_trace(old, calltime, self_addr) == -EBUSY)
- *parent = old;
-}
-
-#endif
-
#ifdef CONFIG_DYNAMIC_FTRACE
union ftrace_code_union {
@@ -166,7 +39,7 @@ static int ftrace_calc_offset(long ip, long addr)
return (int)(addr - ip);
}
-unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
@@ -311,12 +184,12 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
-unsigned char *ftrace_nop_replace(void)
+static unsigned char *ftrace_nop_replace(void)
{
return ftrace_nop;
}
-int
+static int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
@@ -349,6 +222,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return 0;
}
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_call_replace(ip, addr);
+ new = ftrace_nop_replace();
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_nop_replace();
+ new = ftrace_call_replace(ip, addr);
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -426,3 +322,133 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
#endif
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * These functions are picked from those used on
+ * this page for dynamic ftrace. They have been
+ * simplified to ignore all traces in NMI context.
+ */
+static atomic_t in_nmi;
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+}
+
+void ftrace_nmi_exit(void)
+{
+ atomic_dec(&in_nmi);
+}
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+ unsigned long func)
+{
+ int index;
+ struct thread_info *ti = current_thread_info();
+
+ /* The return trace stack is full */
+ if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
+ return -EBUSY;
+
+ index = ++ti->curr_ret_stack;
+ barrier();
+ ti->ret_stack[index].ret = ret;
+ ti->ret_stack[index].func = func;
+ ti->ret_stack[index].calltime = time;
+
+ return 0;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(unsigned long *ret, unsigned long long *time,
+ unsigned long *func)
+{
+ int index;
+
+ struct thread_info *ti = current_thread_info();
+ index = ti->curr_ret_stack;
+ *ret = ti->ret_stack[index].ret;
+ *func = ti->ret_stack[index].func;
+ *time = ti->ret_stack[index].calltime;
+ ti->curr_ret_stack--;
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+ struct ftrace_retfunc trace;
+ pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
+ trace.rettime = cpu_clock(raw_smp_processor_id());
+ ftrace_function_return(&trace);
+
+ return trace.ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ unsigned long long calltime;
+ int faulted;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ /* Nmi's are currently unsupported */
+ if (atomic_read(&in_nmi))
+ return;
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ */
+ asm volatile(
+ "1: movl (%[parent_old]), %[old]\n"
+ "2: movl %[return_hooker], (%[parent_replaced])\n"
+ " movl $0, %[faulted]\n"
+
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %[faulted]\n"
+ ".previous\n"
+
+ ".section __ex_table, \"a\"\n"
+ " .long 1b, 3b\n"
+ " .long 2b, 3b\n"
+ ".previous\n"
+
+ : [parent_replaced] "=r" (parent), [old] "=r" (old),
+ [faulted] "=r" (faulted)
+ : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (WARN_ON(faulted)) {
+ unregister_ftrace_return();
+ return;
+ }
+
+ if (WARN_ON(!__kernel_text_address(old))) {
+ unregister_ftrace_return();
+ *parent = old;
+ return;
+ }
+
+ calltime = cpu_clock(raw_smp_processor_id());
+
+ if (push_return_trace(old, calltime, self_addr) == -EBUSY)
+ *parent = old;
+}
+
+#endif /* CONFIG_FUNCTION_RET_TRACER */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index a5e4ed9baec..3b46ae46493 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -71,6 +71,7 @@
VMLINUX_SYMBOL(__start___markers) = .; \
*(__markers) \
VMLINUX_SYMBOL(__stop___markers) = .; \
+ . = ALIGN(32); \
VMLINUX_SYMBOL(__start___tracepoints) = .; \
*(__tracepoints) \
VMLINUX_SYMBOL(__stop___tracepoints) = .; \
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 4fbc4a8b86a..f1af1aab00e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -25,6 +25,17 @@ struct ftrace_ops {
extern int function_trace_stop;
+/*
+ * Type of the current tracing.
+ */
+enum ftrace_tracing_type_t {
+ FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */
+ FTRACE_TYPE_RETURN, /* Hook the return of the function */
+};
+
+/* Current tracing type, default is FTRACE_TYPE_ENTER */
+extern enum ftrace_tracing_type_t ftrace_tracing_type;
+
/**
* ftrace_stop - stop function tracer.
*
@@ -74,6 +85,9 @@ static inline void ftrace_start(void) { }
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_DYNAMIC_FTRACE
+/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */
+#include <asm/ftrace.h>
+
enum {
FTRACE_FL_FREE = (1 << 0),
FTRACE_FL_FAILED = (1 << 1),
@@ -88,6 +102,7 @@ struct dyn_ftrace {
struct list_head list;
unsigned long ip; /* address of mcount call-site */
unsigned long flags;
+ struct dyn_arch_ftrace arch;
};
int ftrace_force_update(void);
@@ -95,22 +110,43 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset);
/* defined in arch */
extern int ftrace_ip_converted(unsigned long ip);
-extern unsigned char *ftrace_nop_replace(void);
-extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr);
extern int ftrace_dyn_arch_init(void *data);
extern int ftrace_update_ftrace_func(ftrace_func_t func);
extern void ftrace_caller(void);
extern void ftrace_call(void);
extern void mcount_call(void);
+#ifdef CONFIG_FUNCTION_RET_TRACER
+extern void ftrace_return_caller(void);
+#endif
-/* May be defined in arch */
-extern int ftrace_arch_read_dyn_info(char *buf, int size);
+/**
+ * ftrace_make_nop - convert code into top
+ * @mod: module structure if called by module load initialization
+ * @rec: the mcount call site record
+ * @addr: the address that the call site should be calling
+ *
+ * This is a very sensitive operation and great care needs
+ * to be taken by the arch. The operation should carefully
+ * read the location, check to see if what is read is indeed
+ * what we expect it to be, and then on success of the compare,
+ * it should write to the location.
+ *
+ * The code segment at @rec->ip should be a caller to @addr
+ *
+ * Return must be:
+ * 0 on success
+ * -EFAULT on error reading the location
+ * -EINVAL on a failed compare of the contents
+ * -EPERM on error writing to the location
+ * Any other value will be considered a failure.
+ */
+extern int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr);
/**
- * ftrace_modify_code - modify code segment
- * @ip: the address of the code segment
- * @old_code: the contents of what is expected to be there
- * @new_code: the code to patch in
+ * ftrace_make_call - convert a nop call site into a call to addr
+ * @rec: the mcount call site record
+ * @addr: the address that the call site should call
*
* This is a very sensitive operation and great care needs
* to be taken by the arch. The operation should carefully
@@ -118,6 +154,8 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
* what we expect it to be, and then on success of the compare,
* it should write to the location.
*
+ * The code segment at @rec->ip should be a nop
+ *
* Return must be:
* 0 on success
* -EFAULT on error reading the location
@@ -125,8 +163,11 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
* -EPERM on error writing to the location
* Any other value will be considered a failure.
*/
-extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code,
- unsigned char *new_code);
+extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
+
+
+/* May be defined in arch */
+extern int ftrace_arch_read_dyn_info(char *buf, int size);
extern int skip_trace(unsigned long ip);
@@ -259,11 +300,13 @@ static inline void ftrace_dump(void) { }
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
extern void ftrace_init(void);
-extern void ftrace_init_module(unsigned long *start, unsigned long *end);
+extern void ftrace_init_module(struct module *mod,
+ unsigned long *start, unsigned long *end);
#else
static inline void ftrace_init(void) { }
static inline void
-ftrace_init_module(unsigned long *start, unsigned long *end) { }
+ftrace_init_module(struct module *mod,
+ unsigned long *start, unsigned long *end) { }
#endif
@@ -281,7 +324,7 @@ struct ftrace_retfunc {
/* Type of a callback handler of tracing return function */
typedef void (*trace_function_return_t)(struct ftrace_retfunc *);
-extern void register_ftrace_return(trace_function_return_t func);
+extern int register_ftrace_return(trace_function_return_t func);
/* The current handler in use */
extern trace_function_return_t ftrace_function_return;
extern void unregister_ftrace_return(void);
diff --git a/include/linux/marker.h b/include/linux/marker.h
index 4cf45472d9f..34c14bc957f 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -12,6 +12,7 @@
* See the file COPYING for more details.
*/
+#include <stdarg.h>
#include <linux/types.h>
struct module;
@@ -48,10 +49,28 @@ struct marker {
void (*call)(const struct marker *mdata, void *call_private, ...);
struct marker_probe_closure single;
struct marker_probe_closure *multi;
+ const char *tp_name; /* Optional tracepoint name */
+ void *tp_cb; /* Optional tracepoint callback */
} __attribute__((aligned(8)));
#ifdef CONFIG_MARKERS
+#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \
+ static const char __mstrtab_##name[] \
+ __attribute__((section("__markers_strings"))) \
+ = #name "\0" format; \
+ static struct marker __mark_##name \
+ __attribute__((section("__markers"), aligned(8))) = \
+ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
+ 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\
+ NULL, tp_name_str, tp_cb }
+
+#define DEFINE_MARKER(name, format) \
+ _DEFINE_MARKER(name, NULL, NULL, format)
+
+#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \
+ _DEFINE_MARKER(name, #tp_name, tp_cb, format)
+
/*
* Note : the empty asm volatile with read constraint is used here instead of a
* "used" attribute to fix a gcc 4.1.x bug.
@@ -65,14 +84,7 @@ struct marker {
*/
#define __trace_mark(generic, name, call_private, format, args...) \
do { \
- static const char __mstrtab_##name[] \
- __attribute__((section("__markers_strings"))) \
- = #name "\0" format; \
- static struct marker __mark_##name \
- __attribute__((section("__markers"), aligned(8))) = \
- { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \
- 0, 0, marker_probe_cb, \
- { __mark_empty_function, NULL}, NULL }; \
+ DEFINE_MARKER(name, format); \
__mark_check_format(format, ## args); \
if (unlikely(__mark_##name.state)) { \
(*__mark_##name.call) \
@@ -80,14 +92,39 @@ struct marker {
} \
} while (0)
+#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
+ do { \
+ void __check_tp_type(void) \
+ { \
+ register_trace_##tp_name(tp_cb); \
+ } \
+ DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \
+ __mark_check_format(format, ## args); \
+ (*__mark_##name.call)(&__mark_##name, call_private, \
+ ## args); \
+ } while (0)
+
extern void marker_update_probe_range(struct marker *begin,
struct marker *end);
+
+#define GET_MARKER(name) (__mark_##name)
+
#else /* !CONFIG_MARKERS */
+#define DEFINE_MARKER(name, tp_name, tp_cb, format)
#define __trace_mark(generic, name, call_private, format, args...) \
__mark_check_format(format, ## args)
+#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
+ do { \
+ void __check_tp_type(void) \
+ { \
+ register_trace_##tp_name(tp_cb); \
+ } \
+ __mark_check_format(format, ## args); \
+ } while (0)
static inline void marker_update_probe_range(struct marker *begin,
struct marker *end)
{ }
+#define GET_MARKER(name)
#endif /* CONFIG_MARKERS */
/**
@@ -117,6 +154,20 @@ static inline void marker_update_probe_range(struct marker *begin,
__trace_mark(1, name, NULL, format, ## args)
/**
+ * trace_mark_tp - Marker in a tracepoint callback
+ * @name: marker name, not quoted.
+ * @tp_name: tracepoint name, not quoted.
+ * @tp_cb: tracepoint callback. Should have an associated global symbol so it
+ * is not optimized away by the compiler (should not be static).
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker in a tracepoint callback.
+ */
+#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \
+ __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args)
+
+/**
* MARK_NOARGS - Format string for a marker with no argument.
*/
#define MARK_NOARGS " "
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 86f1f5e43e3..895dc9c1088 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -142,6 +142,7 @@ struct rcu_head {
* on the write-side to insure proper synchronization.
*/
#define rcu_read_lock_sched() preempt_disable()
+#define rcu_read_lock_sched_notrace() preempt_disable_notrace()
/*
* rcu_read_unlock_sched - marks the end of a RCU-classic critical section
@@ -149,6 +150,7 @@ struct rcu_head {
* See rcu_read_lock_sched for more information.
*/
#define rcu_read_unlock_sched() preempt_enable()
+#define rcu_read_unlock_sched_notrace() preempt_enable_notrace()
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63064e9403f..75700545836 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -24,8 +24,12 @@ struct tracepoint {
const char *name; /* Tracepoint name */
int state; /* State. */
void **funcs;
-} __attribute__((aligned(8)));
-
+} __attribute__((aligned(32))); /*
+ * Aligned on 32 bytes because it is
+ * globally visible and gcc happily
+ * align these on the structure size.
+ * Keep in sync with vmlinux.lds.h.
+ */
#define TPPROTO(args...) args
#define TPARGS(args...) args
@@ -40,14 +44,14 @@ struct tracepoint {
do { \
void **it_func; \
\
- rcu_read_lock_sched(); \
+ rcu_read_lock_sched_notrace(); \
it_func = rcu_dereference((tp)->funcs); \
if (it_func) { \
do { \
((void(*)(proto))(*it_func))(args); \
} while (*(++it_func)); \
} \
- rcu_read_unlock_sched(); \
+ rcu_read_unlock_sched_notrace(); \
} while (0)
/*
@@ -55,35 +59,40 @@ struct tracepoint {
* not add unwanted padding between the beginning of the section and the
* structure. Force alignment to the same alignment as the section start.
*/
-#define DEFINE_TRACE(name, proto, args) \
+#define DECLARE_TRACE(name, proto, args) \
+ extern struct tracepoint __tracepoint_##name; \
static inline void trace_##name(proto) \
{ \
- static const char __tpstrtab_##name[] \
- __attribute__((section("__tracepoints_strings"))) \
- = #name ":" #proto; \
- static struct tracepoint __tracepoint_##name \
- __attribute__((section("__tracepoints"), aligned(8))) = \
- { __tpstrtab_##name, 0, NULL }; \
if (unlikely(__tracepoint_##name.state)) \
__DO_TRACE(&__tracepoint_##name, \
TPPROTO(proto), TPARGS(args)); \
} \
static inline int register_trace_##name(void (*probe)(proto)) \
{ \
- return tracepoint_probe_register(#name ":" #proto, \
- (void *)probe); \
+ return tracepoint_probe_register(#name, (void *)probe); \
} \
- static inline void unregister_trace_##name(void (*probe)(proto))\
+ static inline int unregister_trace_##name(void (*probe)(proto)) \
{ \
- tracepoint_probe_unregister(#name ":" #proto, \
- (void *)probe); \
+ return tracepoint_probe_unregister(#name, (void *)probe);\
}
+#define DEFINE_TRACE(name) \
+ static const char __tpstrtab_##name[] \
+ __attribute__((section("__tracepoints_strings"))) = #name; \
+ struct tracepoint __tracepoint_##name \
+ __attribute__((section("__tracepoints"), aligned(32))) = \
+ { __tpstrtab_##name, 0, NULL }
+
+#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \
+ EXPORT_SYMBOL_GPL(__tracepoint_##name)
+#define EXPORT_TRACEPOINT_SYMBOL(name) \
+ EXPORT_SYMBOL(__tracepoint_##name)
+
extern void tracepoint_update_probe_range(struct tracepoint *begin,
struct tracepoint *end);
#else /* !CONFIG_TRACEPOINTS */
-#define DEFINE_TRACE(name, proto, args) \
+#define DECLARE_TRACE(name, proto, args) \
static inline void _do_trace_##name(struct tracepoint *tp, proto) \
{ } \
static inline void trace_##name(proto) \
@@ -92,8 +101,14 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
{ \
return -ENOSYS; \
} \
- static inline void unregister_trace_##name(void (*probe)(proto))\
- { }
+ static inline int unregister_trace_##name(void (*probe)(proto)) \
+ { \
+ return -ENOSYS; \
+ }
+
+#define DEFINE_TRACE(name)
+#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
+#define EXPORT_TRACEPOINT_SYMBOL(name)
static inline void tracepoint_update_probe_range(struct tracepoint *begin,
struct tracepoint *end)
diff --git a/include/trace/sched.h b/include/trace/sched.h
index ad47369d01b..9b2854abf7e 100644
--- a/include/trace/sched.h
+++ b/include/trace/sched.h
@@ -4,52 +4,52 @@
#include <linux/sched.h>
#include <linux/tracepoint.h>
-DEFINE_TRACE(sched_kthread_stop,
+DECLARE_TRACE(sched_kthread_stop,
TPPROTO(struct task_struct *t),
TPARGS(t));
-DEFINE_TRACE(sched_kthread_stop_ret,
+DECLARE_TRACE(sched_kthread_stop_ret,
TPPROTO(int ret),
TPARGS(ret));
-DEFINE_TRACE(sched_wait_task,
+DECLARE_TRACE(sched_wait_task,
TPPROTO(struct rq *rq, struct task_struct *p),
TPARGS(rq, p));
-DEFINE_TRACE(sched_wakeup,
+DECLARE_TRACE(sched_wakeup,
TPPROTO(struct rq *rq, struct task_struct *p),
TPARGS(rq, p));
-DEFINE_TRACE(sched_wakeup_new,
+DECLARE_TRACE(sched_wakeup_new,
TPPROTO(struct rq *rq, struct task_struct *p),
TPARGS(rq, p));
-DEFINE_TRACE(sched_switch,
+DECLARE_TRACE(sched_switch,
TPPROTO(struct rq *rq, struct task_struct *prev,
struct task_struct *next),
TPARGS(rq, prev, next));
-DEFINE_TRACE(sched_migrate_task,
+DECLARE_TRACE(sched_migrate_task,
TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu),
TPARGS(rq, p, dest_cpu));
-DEFINE_TRACE(sched_process_free,
+DECLARE_TRACE(sched_process_free,
TPPROTO(struct task_struct *p),
TPARGS(p));
-DEFINE_TRACE(sched_process_exit,
+DECLARE_TRACE(sched_process_exit,
TPPROTO(struct task_struct *p),
TPARGS(p));
-DEFINE_TRACE(sched_process_wait,
+DECLARE_TRACE(sched_process_wait,
TPPROTO(struct pid *pid),
TPARGS(pid));
-DEFINE_TRACE(sched_process_fork,
+DECLARE_TRACE(sched_process_fork,
TPPROTO(struct task_struct *parent, struct task_struct *child),
TPARGS(parent, child));
-DEFINE_TRACE(sched_signal_send,
+DECLARE_TRACE(sched_signal_send,
TPPROTO(int sig, struct task_struct *p),
TPARGS(sig, p));
diff --git a/init/Kconfig b/init/Kconfig
index 86b00c53fad..f5bacb43871 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -808,6 +808,7 @@ config TRACEPOINTS
config MARKERS
bool "Activate markers"
+ depends on TRACEPOINTS
help
Place an empty function call at each marker site. Can be
dynamically changed for a probe function.
diff --git a/kernel/exit.c b/kernel/exit.c
index ae2b92be5fa..f995d241866 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -54,6 +54,10 @@
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
+DEFINE_TRACE(sched_process_free);
+DEFINE_TRACE(sched_process_exit);
+DEFINE_TRACE(sched_process_wait);
+
static void exit_mm(struct task_struct * tsk);
static inline int task_detached(struct task_struct *p)
diff --git a/kernel/fork.c b/kernel/fork.c
index f6083561dfe..0837d0deee5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -79,6 +79,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
+DEFINE_TRACE(sched_process_fork);
+
int nr_processes(void)
{
int cpu;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8e7a7ce3ed0..4fbc456f393 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock);
static LIST_HEAD(kthread_create_list);
struct task_struct *kthreadd_task;
+DEFINE_TRACE(sched_kthread_stop);
+DEFINE_TRACE(sched_kthread_stop_ret);
+
struct kthread_create_info
{
/* Information passed to kthread() from kthreadd. */
diff --git a/kernel/marker.c b/kernel/marker.c
index 2898b647d41..ea54f264786 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -81,7 +81,7 @@ struct marker_entry {
* though the function pointer change and the marker enabling are two distinct
* operations that modifies the execution flow of preemptible code.
*/
-void __mark_empty_function(void *probe_private, void *call_private,
+notrace void __mark_empty_function(void *probe_private, void *call_private,
const char *fmt, va_list *args)
{
}
@@ -97,7 +97,8 @@ EXPORT_SYMBOL_GPL(__mark_empty_function);
* need to put a full smp_rmb() in this branch. This is why we do not use
* rcu_dereference() for the pointer read.
*/
-void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
+notrace void marker_probe_cb(const struct marker *mdata,
+ void *call_private, ...)
{
va_list args;
char ptype;
@@ -107,7 +108,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
* sure the teardown of the callbacks can be done correctly when they
* are in modules and they insure RCU read coherency.
*/
- rcu_read_lock_sched();
+ rcu_read_lock_sched_notrace();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
@@ -145,7 +146,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
va_end(args);
}
}
- rcu_read_unlock_sched();
+ rcu_read_unlock_sched_notrace();
}
EXPORT_SYMBOL_GPL(marker_probe_cb);
@@ -157,12 +158,13 @@ EXPORT_SYMBOL_GPL(marker_probe_cb);
*
* Should be connected to markers "MARK_NOARGS".
*/
-static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
+static notrace void marker_probe_cb_noarg(const struct marker *mdata,
+ void *call_private, ...)
{
va_list args; /* not initialized */
char ptype;
- rcu_read_lock_sched();
+ rcu_read_lock_sched_notrace();
ptype = mdata->ptype;
if (likely(!ptype)) {
marker_probe_func *func;
@@ -195,7 +197,7 @@ static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
}
- rcu_read_unlock_sched();
+ rcu_read_unlock_sched_notrace();
}
static void free_old_closure(struct rcu_head *head)
@@ -477,7 +479,7 @@ static int marker_set_format(struct marker_entry *entry, const char *format)
static int set_marker(struct marker_entry *entry, struct marker *elem,
int active)
{
- int ret;
+ int ret = 0;
WARN_ON(strcmp(entry->name, elem->name) != 0);
if (entry->format) {
@@ -529,9 +531,40 @@ static int set_marker(struct marker_entry *entry, struct marker *elem,
*/
smp_wmb();
elem->ptype = entry->ptype;
+
+ if (elem->tp_name && (active ^ elem->state)) {
+ WARN_ON(!elem->tp_cb);
+ /*
+ * It is ok to directly call the probe registration because type
+ * checking has been done in the __trace_mark_tp() macro.
+ */
+
+ if (active) {
+ /*
+ * try_module_get should always succeed because we hold
+ * lock_module() to get the tp_cb address.
+ */
+ ret = try_module_get(__module_text_address(
+ (unsigned long)elem->tp_cb));
+ BUG_ON(!ret);
+ ret = tracepoint_probe_register_noupdate(
+ elem->tp_name,
+ elem->tp_cb);
+ } else {
+ ret = tracepoint_probe_unregister_noupdate(
+ elem->tp_name,
+ elem->tp_cb);
+ /*
+ * tracepoint_probe_update_all() must be called
+ * before the module containing tp_cb is unloaded.
+ */
+ module_put(__module_text_address(
+ (unsigned long)elem->tp_cb));
+ }
+ }
elem->state = active;
- return 0;
+ return ret;
}
/*
@@ -542,7 +575,24 @@ static int set_marker(struct marker_entry *entry, struct marker *elem,
*/
static void disable_marker(struct marker *elem)
{
+ int ret;
+
/* leave "call" as is. It is known statically. */
+ if (elem->tp_name && elem->state) {
+ WARN_ON(!elem->tp_cb);
+ /*
+ * It is ok to directly call the probe registration because type
+ * checking has been done in the __trace_mark_tp() macro.
+ */
+ ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
+ elem->tp_cb);
+ WARN_ON(ret);
+ /*
+ * tracepoint_probe_update_all() must be called
+ * before the module containing tp_cb is unloaded.
+ */
+ module_put(__module_text_address((unsigned long)elem->tp_cb));
+ }
elem->state = 0;
elem->single.func = __mark_empty_function;
/* Update the function before setting the ptype */
@@ -606,6 +656,7 @@ static void marker_update_probes(void)
marker_update_probe_range(__start___markers, __stop___markers);
/* Markers in modules. */
module_update_markers();
+ tracepoint_probe_update_all();
}
/**
@@ -653,10 +704,11 @@ int marker_probe_register(const char *name, const char *format,
goto end;
}
mutex_unlock(&markers_mutex);
- marker_update_probes(); /* may update entry */
+ marker_update_probes();
mutex_lock(&markers_mutex);
entry = get_marker(name);
- WARN_ON(!entry);
+ if (!entry)
+ goto end;
if (entry->rcu_pending)
rcu_barrier_sched();
entry->oldptr = old;
@@ -697,7 +749,7 @@ int marker_probe_unregister(const char *name,
rcu_barrier_sched();
old = marker_entry_remove_probe(entry, probe, probe_private);
mutex_unlock(&markers_mutex);
- marker_update_probes(); /* may update entry */
+ marker_update_probes();
mutex_lock(&markers_mutex);
entry = get_marker(name);
if (!entry)
@@ -778,10 +830,11 @@ int marker_probe_unregister_private_data(marker_probe_func *probe,
rcu_barrier_sched();
old = marker_entry_remove_probe(entry, NULL, probe_private);
mutex_unlock(&markers_mutex);
- marker_update_probes(); /* may update entry */
+ marker_update_probes();
mutex_lock(&markers_mutex);
entry = get_marker_from_private_data(probe, probe_private);
- WARN_ON(!entry);
+ if (!entry)
+ goto end;
if (entry->rcu_pending)
rcu_barrier_sched();
entry->oldptr = old;
@@ -842,3 +895,36 @@ void *marker_get_private_data(const char *name, marker_probe_func *probe,
return ERR_PTR(-ENOENT);
}
EXPORT_SYMBOL_GPL(marker_get_private_data);
+
+#ifdef CONFIG_MODULES
+
+int marker_module_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ marker_update_probe_range(mod->markers,
+ mod->markers + mod->num_markers);
+ break;
+ case MODULE_STATE_GOING:
+ marker_update_probe_range(mod->markers,
+ mod->markers + mod->num_markers);
+ break;
+ }
+ return 0;
+}
+
+struct notifier_block marker_module_nb = {
+ .notifier_call = marker_module_notify,
+ .priority = 0,
+};
+
+static int init_markers(void)
+{
+ return register_module_notifier(&marker_module_nb);
+}
+__initcall(init_markers);
+
+#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 1f4cc00e0c2..89bcf7c1327 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2184,24 +2184,15 @@ static noinline struct module *load_module(void __user *umod,
struct mod_debug *debug;
unsigned int num_debug;
-#ifdef CONFIG_MARKERS
- marker_update_probe_range(mod->markers,
- mod->markers + mod->num_markers);
-#endif
debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
sizeof(*debug), &num_debug);
dynamic_printk_setup(debug, num_debug);
-
-#ifdef CONFIG_TRACEPOINTS
- tracepoint_update_probe_range(mod->tracepoints,
- mod->tracepoints + mod->num_tracepoints);
-#endif
}
/* sechdrs[0].sh_size is always zero */
mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
sizeof(*mseg), &num_mcount);
- ftrace_init_module(mseg, mseg + num_mcount);
+ ftrace_init_module(mod, mseg, mseg + num_mcount);
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
diff --git a/kernel/sched.c b/kernel/sched.c
index 50a21f96467..327f91c63c9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -118,6 +118,12 @@
*/
#define RUNTIME_INF ((u64)~0ULL)
+DEFINE_TRACE(sched_wait_task);
+DEFINE_TRACE(sched_wakeup);
+DEFINE_TRACE(sched_wakeup_new);
+DEFINE_TRACE(sched_switch);
+DEFINE_TRACE(sched_migrate_task);
+
#ifdef CONFIG_SMP
/*
* Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
diff --git a/kernel/signal.c b/kernel/signal.c
index 4530fc65445..e9afe63da24 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -41,6 +41,8 @@
static struct kmem_cache *sigqueue_cachep;
+DEFINE_TRACE(sched_signal_send);
+
static void __user *sig_handler(struct task_struct *t, int sig)
{
return t->sighand->action[sig - 1].sa.sa_handler;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 9c89526b6b7..b8378fad29a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -59,7 +59,6 @@ config FUNCTION_TRACER
config FUNCTION_RET_TRACER
bool "Kernel Function return Tracer"
- depends on !DYNAMIC_FTRACE
depends on HAVE_FUNCTION_RET_TRACER
depends on FUNCTION_TRACER
help
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 54cb9a7d15e..f212da48668 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -50,6 +50,9 @@ static int last_ftrace_enabled;
/* Quick disabling of function tracer. */
int function_trace_stop;
+/* By default, current tracing type is normal tracing. */
+enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER;
+
/*
* ftrace_disabled is set when an anomaly is discovered.
* ftrace_disabled is much stronger than ftrace_enabled.
@@ -334,7 +337,7 @@ ftrace_record_ip(unsigned long ip)
{
struct dyn_ftrace *rec;
- if (!ftrace_enabled || ftrace_disabled)
+ if (ftrace_disabled)
return NULL;
rec = ftrace_alloc_dyn_node(ip);
@@ -348,107 +351,138 @@ ftrace_record_ip(unsigned long ip)
return rec;
}
-#define FTRACE_ADDR ((long)(ftrace_caller))
+static void print_ip_ins(const char *fmt, unsigned char *p)
+{
+ int i;
+
+ printk(KERN_CONT "%s", fmt);
+
+ for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+ printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+}
+
+static void ftrace_bug(int failed, unsigned long ip)
+{
+ switch (failed) {
+ case -EFAULT:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on modifying ");
+ print_ip_sym(ip);
+ break;
+ case -EINVAL:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace failed to modify ");
+ print_ip_sym(ip);
+ print_ip_ins(" actual: ", (unsigned char *)ip);
+ printk(KERN_CONT "\n");
+ break;
+ case -EPERM:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on writing ");
+ print_ip_sym(ip);
+ break;
+ default:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on unknown error ");
+ print_ip_sym(ip);
+ }
+}
+
static int
-__ftrace_replace_code(struct dyn_ftrace *rec,
- unsigned char *old, unsigned char *new, int enable)
+__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
{
unsigned long ip, fl;
+ unsigned long ftrace_addr;
+
+#ifdef CONFIG_FUNCTION_RET_TRACER
+ if (ftrace_tracing_type == FTRACE_TYPE_ENTER)
+ ftrace_addr = (unsigned long)ftrace_caller;
+ else
+ ftrace_addr = (unsigned long)ftrace_return_caller;
+#else
+ ftrace_addr = (unsigned long)ftrace_caller;
+#endif
ip = rec->ip;
- if (ftrace_filtered && enable) {
+ /*
+ * If this record is not to be traced and
+ * it is not enabled then do nothing.
+ *
+ * If this record is not to be traced and
+ * it is enabled then disabled it.
+ *
+ */
+ if (rec->flags & FTRACE_FL_NOTRACE) {
+ if (rec->flags & FTRACE_FL_ENABLED)
+ rec->flags &= ~FTRACE_FL_ENABLED;
+ else
+ return 0;
+
+ } else if (ftrace_filtered && enable) {
/*
- * If filtering is on:
- *
- * If this record is set to be filtered and
- * is enabled then do nothing.
- *
- * If this record is set to be filtered and
- * it is not enabled, enable it.
- *
- * If this record is not set to be filtered
- * and it is not enabled do nothing.
- *
- * If this record is set not to trace then
- * do nothing.
- *
- * If this record is set not to trace and
- * it is enabled then disable it.
- *
- * If this record is not set to be filtered and
- * it is enabled, disable it.
+ * Filtering is on:
*/
- fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE |
- FTRACE_FL_ENABLED);
+ fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
- if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) ||
- (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) ||
- !fl || (fl == FTRACE_FL_NOTRACE))
+ /* Record is filtered and enabled, do nothing */
+ if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
return 0;
- /*
- * If it is enabled disable it,
- * otherwise enable it!
- */
- if (fl & FTRACE_FL_ENABLED) {
- /* swap new and old */
- new = old;
- old = ftrace_call_replace(ip, FTRACE_ADDR);
+ /* Record is not filtered and is not enabled do nothing */
+ if (!fl)
+ return 0;
+
+ /* Record is not filtered but enabled, disable it */
+ if (fl == FTRACE_FL_ENABLED)
rec->flags &= ~FTRACE_FL_ENABLED;
- } else {
- new = ftrace_call_replace(ip, FTRACE_ADDR);
+ else
+ /* Otherwise record is filtered but not enabled, enable it */
rec->flags |= FTRACE_FL_ENABLED;
- }
} else {
+ /* Disable or not filtered */
if (enable) {
- /*
- * If this record is set not to trace and is
- * not enabled, do nothing.
- */
- fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED);
- if (fl == FTRACE_FL_NOTRACE)
- return 0;
-
- new = ftrace_call_replace(ip, FTRACE_ADDR);
- } else
- old = ftrace_call_replace(ip, FTRACE_ADDR);
-
- if (enable) {
+ /* if record is enabled, do nothing */
if (rec->flags & FTRACE_FL_ENABLED)
return 0;
+
rec->flags |= FTRACE_FL_ENABLED;
+
} else {
+
+ /* if record is not enabled do nothing */
if (!(rec->flags & FTRACE_FL_ENABLED))
return 0;
+
rec->flags &= ~FTRACE_FL_ENABLED;
}
}
- return ftrace_modify_code(ip, old, new);
+ if (rec->flags & FTRACE_FL_ENABLED)
+ return ftrace_make_call(rec, ftrace_addr);
+ else
+ return ftrace_make_nop(NULL, rec, ftrace_addr);
}
static void ftrace_replace_code(int enable)
{
int i, failed;
- unsigned char *new = NULL, *old = NULL;
struct dyn_ftrace *rec;
struct ftrace_page *pg;
- if (enable)
- old = ftrace_nop_replace();
- else
- new = ftrace_nop_replace();
-
for (pg = ftrace_pages_start; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
rec = &pg->records[i];
- /* don't modify code that has already faulted */
- if (rec->flags & FTRACE_FL_FAILED)
+ /*
+ * Skip over free records and records that have
+ * failed.
+ */
+ if (rec->flags & FTRACE_FL_FREE ||
+ rec->flags & FTRACE_FL_FAILED)
continue;
/* ignore updates to this record's mcount site */
@@ -459,68 +493,30 @@ static void ftrace_replace_code(int enable)
unfreeze_record(rec);
}
- failed = __ftrace_replace_code(rec, old, new, enable);
+ failed = __ftrace_replace_code(rec, enable);
if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
rec->flags |= FTRACE_FL_FAILED;
if ((system_state == SYSTEM_BOOTING) ||
!core_kernel_text(rec->ip)) {
ftrace_free_rec(rec);
- }
+ } else
+ ftrace_bug(failed, rec->ip);
}
}
}
}
-static void print_ip_ins(const char *fmt, unsigned char *p)
-{
- int i;
-
- printk(KERN_CONT "%s", fmt);
-
- for (i = 0; i < MCOUNT_INSN_SIZE; i++)
- printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
-}
-
static int
-ftrace_code_disable(struct dyn_ftrace *rec)
+ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
{
unsigned long ip;
- unsigned char *nop, *call;
int ret;
ip = rec->ip;
- nop = ftrace_nop_replace();
- call = ftrace_call_replace(ip, mcount_addr);
-
- ret = ftrace_modify_code(ip, call, nop);
+ ret = ftrace_make_nop(mod, rec, mcount_addr);
if (ret) {
- switch (ret) {
- case -EFAULT:
- FTRACE_WARN_ON_ONCE(1);
- pr_info("ftrace faulted on modifying ");
- print_ip_sym(ip);
- break;
- case -EINVAL:
- FTRACE_WARN_ON_ONCE(1);
- pr_info("ftrace failed to modify ");
- print_ip_sym(ip);
- print_ip_ins(" expected: ", call);
- print_ip_ins(" actual: ", (unsigned char *)ip);
- print_ip_ins(" replace: ", nop);
- printk(KERN_CONT "\n");
- break;
- case -EPERM:
- FTRACE_WARN_ON_ONCE(1);
- pr_info("ftrace faulted on writing ");
- print_ip_sym(ip);
- break;
- default:
- FTRACE_WARN_ON_ONCE(1);
- pr_info("ftrace faulted on unknown error ");
- print_ip_sym(ip);
- }
-
+ ftrace_bug(ret, ip);
rec->flags |= FTRACE_FL_FAILED;
return 0;
}
@@ -560,8 +556,7 @@ static void ftrace_startup(void)
mutex_lock(&ftrace_start_lock);
ftrace_start_up++;
- if (ftrace_start_up == 1)
- command |= FTRACE_ENABLE_CALLS;
+ command |= FTRACE_ENABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
saved_ftrace_func = ftrace_trace_function;
@@ -639,7 +634,7 @@ static cycle_t ftrace_update_time;
static unsigned long ftrace_update_cnt;
unsigned long ftrace_update_tot_cnt;
-static int ftrace_update_code(void)
+static int ftrace_update_code(struct module *mod)
{
struct dyn_ftrace *p, *t;
cycle_t start, stop;
@@ -656,7 +651,7 @@ static int ftrace_update_code(void)
list_del_init(&p->list);
/* convert record (i.e, patch mcount-call with NOP) */
- if (ftrace_code_disable(p)) {
+ if (ftrace_code_disable(mod, p)) {
p->flags |= FTRACE_FL_CONVERTED;
ftrace_update_cnt++;
} else
@@ -699,7 +694,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
cnt = num_to_init / ENTRIES_PER_PAGE;
pr_info("ftrace: allocating %ld entries in %d pages\n",
- num_to_init, cnt);
+ num_to_init, cnt + 1);
for (i = 0; i < cnt; i++) {
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
@@ -782,13 +777,11 @@ static void *t_start(struct seq_file *m, loff_t *pos)
void *p = NULL;
loff_t l = -1;
- if (*pos != iter->pos) {
- for (p = t_next(m, p, &l); p && l < *pos; p = t_next(m, p, &l))
- ;
- } else {
- l = *pos;
- p = t_next(m, p, &l);
- }
+ if (*pos > iter->pos)
+ *pos = iter->pos;
+
+ l = *pos;
+ p = t_next(m, p, &l);
return p;
}
@@ -799,15 +792,21 @@ static void t_stop(struct seq_file *m, void *p)
static int t_show(struct seq_file *m, void *v)
{
+ struct ftrace_iterator *iter = m->private;
struct dyn_ftrace *rec = v;
char str[KSYM_SYMBOL_LEN];
+ int ret = 0;
if (!rec)
return 0;
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
- seq_printf(m, "%s\n", str);
+ ret = seq_printf(m, "%s\n", str);
+ if (ret < 0) {
+ iter->pos--;
+ iter->idx--;
+ }
return 0;
}
@@ -833,7 +832,7 @@ ftrace_avail_open(struct inode *inode, struct file *file)
return -ENOMEM;
iter->pg = ftrace_pages_start;
- iter->pos = -1;
+ iter->pos = 0;
ret = seq_open(file, &show_ftrace_seq_ops);
if (!ret) {
@@ -920,7 +919,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
if (file->f_mode & FMODE_READ) {
iter->pg = ftrace_pages_start;
- iter->pos = -1;
+ iter->pos = 0;
iter->flags = enable ? FTRACE_ITER_FILTER :
FTRACE_ITER_NOTRACE;
@@ -1211,7 +1210,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
mutex_lock(&ftrace_sysctl_lock);
mutex_lock(&ftrace_start_lock);
- if (iter->filtered && ftrace_start_up && ftrace_enabled)
+ if (ftrace_start_up && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
mutex_unlock(&ftrace_start_lock);
mutex_unlock(&ftrace_sysctl_lock);
@@ -1298,7 +1297,8 @@ static __init int ftrace_init_debugfs(void)
fs_initcall(ftrace_init_debugfs);
-static int ftrace_convert_nops(unsigned long *start,
+static int ftrace_convert_nops(struct module *mod,
+ unsigned long *start,
unsigned long *end)
{
unsigned long *p;
@@ -1309,23 +1309,32 @@ static int ftrace_convert_nops(unsigned long *start,
p = start;
while (p < end) {
addr = ftrace_call_adjust(*p++);
+ /*
+ * Some architecture linkers will pad between
+ * the different mcount_loc sections of different
+ * object files to satisfy alignments.
+ * Skip any NULL pointers.
+ */
+ if (!addr)
+ continue;
ftrace_record_ip(addr);
}
/* disable interrupts to prevent kstop machine */
local_irq_save(flags);
- ftrace_update_code();
+ ftrace_update_code(mod);
local_irq_restore(flags);
mutex_unlock(&ftrace_start_lock);
return 0;
}
-void ftrace_init_module(unsigned long *start, unsigned long *end)
+void ftrace_init_module(struct module *mod,
+ unsigned long *start, unsigned long *end)
{
if (ftrace_disabled || start == end)
return;
- ftrace_convert_nops(start, end);
+ ftrace_convert_nops(mod, start, end);
}
extern unsigned long __start_mcount_loc[];
@@ -1355,7 +1364,8 @@ void __init ftrace_init(void)
last_ftrace_enabled = ftrace_enabled = 1;
- ret = ftrace_convert_nops(__start_mcount_loc,
+ ret = ftrace_convert_nops(NULL,
+ __start_mcount_loc,
__stop_mcount_loc);
return;
@@ -1411,10 +1421,17 @@ int register_ftrace_function(struct ftrace_ops *ops)
return -1;
mutex_lock(&ftrace_sysctl_lock);
+
+ if (ftrace_tracing_type == FTRACE_TYPE_RETURN) {
+ ret = -EBUSY;
+ goto out;
+ }
+
ret = __register_ftrace_function(ops);
ftrace_startup();
- mutex_unlock(&ftrace_sysctl_lock);
+out:
+ mutex_unlock(&ftrace_sysctl_lock);
return ret;
}
@@ -1480,16 +1497,45 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
}
#ifdef CONFIG_FUNCTION_RET_TRACER
+
+/* The callback that hooks the return of a function */
trace_function_return_t ftrace_function_return =
(trace_function_return_t)ftrace_stub;
-void register_ftrace_return(trace_function_return_t func)
+
+int register_ftrace_return(trace_function_return_t func)
{
+ int ret = 0;
+
+ mutex_lock(&ftrace_sysctl_lock);
+
+ /*
+ * Don't launch return tracing if normal function
+ * tracing is already running.
+ */
+ if (ftrace_trace_function != ftrace_stub) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ftrace_tracing_type = FTRACE_TYPE_RETURN;
ftrace_function_return = func;
+ ftrace_startup();
+
+out:
+ mutex_unlock(&ftrace_sysctl_lock);
+ return ret;
}
void unregister_ftrace_return(void)
{
+ mutex_lock(&ftrace_sysctl_lock);
+
ftrace_function_return = (trace_function_return_t)ftrace_stub;
+ ftrace_shutdown();
+ /* Restore normal tracing type */
+ ftrace_tracing_type = FTRACE_TYPE_ENTER;
+
+ mutex_unlock(&ftrace_sysctl_lock);
}
#endif
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4a904623e05..396fda034e3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1051,7 +1051,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
* Need to use raw, since this must be called before the
* recursive protection is performed.
*/
- raw_local_irq_save(flags);
+ local_irq_save(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
@@ -1062,7 +1062,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
}
atomic_dec(&data->disabled);
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
}
#ifdef CONFIG_FUNCTION_RET_TRACER
@@ -2638,8 +2638,11 @@ static int tracing_set_tracer(char *buf)
current_trace->reset(tr);
current_trace = t;
- if (t->init)
- t->init(tr);
+ if (t->init) {
+ ret = t->init(tr);
+ if (ret)
+ goto out;
+ }
trace_branch_enable(tr);
out:
@@ -2655,6 +2658,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
char buf[max_tracer_type_len+1];
int i;
size_t ret;
+ int err;
+
+ ret = cnt;
if (cnt > max_tracer_type_len)
cnt = max_tracer_type_len;
@@ -2668,12 +2674,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
buf[i] = 0;
- ret = tracing_set_tracer(buf);
- if (!ret)
- ret = cnt;
+ err = tracing_set_tracer(buf);
+ if (err)
+ return err;
- if (ret > 0)
- filp->f_pos += ret;
+ filp->f_pos += ret;
return ret;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 790ea8c0e1f..cdbd5cc22be 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -264,7 +264,8 @@ enum print_line_t {
*/
struct tracer {
const char *name;
- void (*init)(struct trace_array *tr);
+ /* Your tracer should raise a warning if init fails */
+ int (*init)(struct trace_array *tr);
void (*reset)(struct trace_array *tr);
void (*start)(struct trace_array *tr);
void (*stop)(struct trace_array *tr);
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index cb333b7fd11..a4fa2c57e34 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -47,7 +47,7 @@ static void reset_boot_trace(struct trace_array *tr)
tracing_reset(tr, cpu);
}
-static void boot_trace_init(struct trace_array *tr)
+static int boot_trace_init(struct trace_array *tr)
{
int cpu;
boot_trace = tr;
@@ -56,6 +56,7 @@ static void boot_trace_init(struct trace_array *tr)
tracing_reset(tr, cpu);
tracing_sched_switch_assign_trace(tr);
+ return 0;
}
static enum print_line_t
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 2511e32572c..23f9b02ce96 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -125,7 +125,7 @@ static void stop_branch_trace(struct trace_array *tr)
disable_branch_tracing();
}
-static void branch_trace_init(struct trace_array *tr)
+static int branch_trace_init(struct trace_array *tr)
{
int cpu;
@@ -133,6 +133,7 @@ static void branch_trace_init(struct trace_array *tr)
tracing_reset(tr, cpu);
start_branch_trace(tr);
+ return 0;
}
static void branch_trace_reset(struct trace_array *tr)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8693b7a0a5b..e74f6d0a321 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -42,9 +42,10 @@ static void stop_function_trace(struct trace_array *tr)
tracing_stop_cmdline_record();
}
-static void function_trace_init(struct trace_array *tr)
+static int function_trace_init(struct trace_array *tr)
{
start_function_trace(tr);
+ return 0;
}
static void function_trace_reset(struct trace_array *tr)
diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
index 7680b21537d..a68564af022 100644
--- a/kernel/trace/trace_functions_return.c
+++ b/kernel/trace/trace_functions_return.c
@@ -14,28 +14,18 @@
#include "trace.h"
-static void start_return_trace(struct trace_array *tr)
-{
- register_ftrace_return(&trace_function_return);
-}
-
-static void stop_return_trace(struct trace_array *tr)
-{
- unregister_ftrace_return();
-}
-
-static void return_trace_init(struct trace_array *tr)
+static int return_trace_init(struct trace_array *tr)
{
int cpu;
for_each_online_cpu(cpu)
tracing_reset(tr, cpu);
- start_return_trace(tr);
+ return register_ftrace_return(&trace_function_return);
}
static void return_trace_reset(struct trace_array *tr)
{
- stop_return_trace(tr);
+ unregister_ftrace_return();
}
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index d919d4eaa7c..7c2e326bbc8 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -416,11 +416,12 @@ static void irqsoff_tracer_close(struct trace_iterator *iter)
}
#ifdef CONFIG_IRQSOFF_TRACER
-static void irqsoff_tracer_init(struct trace_array *tr)
+static int irqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF;
__irqsoff_tracer_init(tr);
+ return 0;
}
static struct tracer irqsoff_tracer __read_mostly =
{
@@ -442,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly =
#endif
#ifdef CONFIG_PREEMPT_TRACER
-static void preemptoff_tracer_init(struct trace_array *tr)
+static int preemptoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_PREEMPT_OFF;
__irqsoff_tracer_init(tr);
+ return 0;
}
static struct tracer preemptoff_tracer __read_mostly =
@@ -471,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly =
#if defined(CONFIG_IRQSOFF_TRACER) && \
defined(CONFIG_PREEMPT_TRACER)
-static void preemptirqsoff_tracer_init(struct trace_array *tr)
+static int preemptirqsoff_tracer_init(struct trace_array *tr)
{
trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
__irqsoff_tracer_init(tr);
+ return 0;
}
static struct tracer preemptirqsoff_tracer __read_mostly =
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 51bcf370215..433d650eda9 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -30,13 +30,14 @@ static void mmio_reset_data(struct trace_array *tr)
tracing_reset(tr, cpu);
}
-static void mmio_trace_init(struct trace_array *tr)
+static int mmio_trace_init(struct trace_array *tr)
{
pr_debug("in %s\n", __func__);
mmio_trace_array = tr;
mmio_reset_data(tr);
enable_mmiotrace();
+ return 0;
}
static void mmio_trace_reset(struct trace_array *tr)
diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c
index 2ef1d227e7d..0e77415caed 100644
--- a/kernel/trace/trace_nop.c
+++ b/kernel/trace/trace_nop.c
@@ -24,7 +24,7 @@ static void stop_nop_trace(struct trace_array *tr)
/* Nothing to do! */
}
-static void nop_trace_init(struct trace_array *tr)
+static int nop_trace_init(struct trace_array *tr)
{
int cpu;
ctx_trace = tr;
@@ -33,6 +33,7 @@ static void nop_trace_init(struct trace_array *tr)
tracing_reset(tr, cpu);
start_nop_trace(tr);
+ return 0;
}
static void nop_trace_reset(struct trace_array *tr)
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index be35bdfe2e3..863390557b4 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -206,10 +206,11 @@ static void stop_sched_trace(struct trace_array *tr)
tracing_stop_sched_switch_record();
}
-static void sched_switch_trace_init(struct trace_array *tr)
+static int sched_switch_trace_init(struct trace_array *tr)
{
ctx_trace = tr;
start_sched_trace(tr);
+ return 0;
}
static void sched_switch_trace_reset(struct trace_array *tr)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 983f2b1478c..0067b49746c 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -331,10 +331,11 @@ static void stop_wakeup_tracer(struct trace_array *tr)
unregister_trace_sched_wakeup(probe_wakeup);
}
-static void wakeup_tracer_init(struct trace_array *tr)
+static int wakeup_tracer_init(struct trace_array *tr)
{
wakeup_trace = tr;
start_wakeup_tracer(tr);
+ return 0;
}
static void wakeup_tracer_reset(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 24e6e075e6d..88c8eb70f54 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -52,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
int cpu, ret = 0;
/* Don't allow flipping of max traces now */
- raw_local_irq_save(flags);
+ local_irq_save(flags);
__raw_spin_lock(&ftrace_max_lock);
cnt = ring_buffer_entries(tr->buffer);
@@ -63,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
break;
}
__raw_spin_unlock(&ftrace_max_lock);
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
if (count)
*count = cnt;
@@ -71,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
return ret;
}
+static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
+{
+ printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n",
+ trace->name, init_ret);
+}
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -111,7 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
ftrace_set_filter(func_name, strlen(func_name), 1);
/* enable tracing */
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ goto out;
+ }
/* Sleep for a 1/10 of a second */
msleep(100);
@@ -181,7 +190,12 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
ftrace_enabled = 1;
tracer_enabled = 1;
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ goto out;
+ }
+
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
@@ -223,7 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* reset the max latency */
tracing_max_latency = 0;
/* disable interrupts for a bit */
@@ -272,7 +291,12 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
}
/* start the tracing */
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* reset the max latency */
tracing_max_latency = 0;
/* disable preemption for a bit */
@@ -321,7 +345,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
}
/* start the tracing */
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ goto out;
+ }
/* reset the max latency */
tracing_max_latency = 0;
@@ -449,7 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
wait_for_completion(&isrt);
/* start the tracing */
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* reset the max latency */
tracing_max_latency = 0;
@@ -505,7 +538,12 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
int ret;
/* start the tracing */
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
@@ -532,7 +570,12 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return 0;
+ }
+
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
@@ -554,7 +597,12 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
int ret;
/* start the tracing */
- trace->init(tr);
+ ret = trace->init(tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
/* Sleep for a 1/10 of a second */
msleep(100);
/* stop the tracing. */
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 05f753422ae..54960edb96d 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -261,11 +261,12 @@ static void stop_stack_trace(struct trace_array *tr)
mutex_unlock(&sample_timer_lock);
}
-static void stack_trace_init(struct trace_array *tr)
+static int stack_trace_init(struct trace_array *tr)
{
sysprof_trace = tr;
start_stack_trace(tr);
+ return 0;
}
static void stack_trace_reset(struct trace_array *tr)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index e96590f17de..79602740bbb 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -262,6 +262,7 @@ static void set_tracepoint(struct tracepoint_entry **entry,
static void disable_tracepoint(struct tracepoint *elem)
{
elem->state = 0;
+ rcu_assign_pointer(elem->funcs, NULL);
}
/**
@@ -540,3 +541,36 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter)
iter->tracepoint = NULL;
}
EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
+
+#ifdef CONFIG_MODULES
+
+int tracepoint_module_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ tracepoint_update_probe_range(mod->tracepoints,
+ mod->tracepoints + mod->num_tracepoints);
+ break;
+ case MODULE_STATE_GOING:
+ tracepoint_update_probe_range(mod->tracepoints,
+ mod->tracepoints + mod->num_tracepoints);
+ break;
+ }
+ return 0;
+}
+
+struct notifier_block tracepoint_module_nb = {
+ .notifier_call = tracepoint_module_notify,
+ .priority = 0,
+};
+
+static int init_tracepoints(void)
+{
+ return register_module_notifier(&tracepoint_module_nb);
+}
+__initcall(init_tracepoints);
+
+#endif /* CONFIG_MODULES */
diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h
index 0216b55bd64..01724e04c55 100644
--- a/samples/tracepoints/tp-samples-trace.h
+++ b/samples/tracepoints/tp-samples-trace.h
@@ -4,10 +4,10 @@
#include <linux/proc_fs.h> /* for struct inode and struct file */
#include <linux/tracepoint.h>
-DEFINE_TRACE(subsys_event,
+DECLARE_TRACE(subsys_event,
TPPROTO(struct inode *inode, struct file *file),
TPARGS(inode, file));
-DEFINE_TRACE(subsys_eventb,
+DECLARE_TRACE(subsys_eventb,
TPPROTO(void),
TPARGS());
#endif
diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c
index 55abfdda4bd..e3a964889dc 100644
--- a/samples/tracepoints/tracepoint-probe-sample.c
+++ b/samples/tracepoints/tracepoint-probe-sample.c
@@ -46,6 +46,7 @@ void __exit tp_sample_trace_exit(void)
{
unregister_trace_subsys_eventb(probe_subsys_eventb);
unregister_trace_subsys_event(probe_subsys_event);
+ tracepoint_synchronize_unregister();
}
module_exit(tp_sample_trace_exit);
diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c
index 5e9fcf4afff..685a5acb456 100644
--- a/samples/tracepoints/tracepoint-probe-sample2.c
+++ b/samples/tracepoints/tracepoint-probe-sample2.c
@@ -33,6 +33,7 @@ module_init(tp_sample_trace_init);
void __exit tp_sample_trace_exit(void)
{
unregister_trace_subsys_event(probe_subsys_event);
+ tracepoint_synchronize_unregister();
}
module_exit(tp_sample_trace_exit);
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 4ae4b7fcc04..00d169792a3 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -13,6 +13,9 @@
#include <linux/proc_fs.h>
#include "tp-samples-trace.h"
+DEFINE_TRACE(subsys_event);
+DEFINE_TRACE(subsys_eventb);
+
struct proc_dir_entry *pentry_example;
static int my_open(struct inode *inode, struct file *file)