From fb52607afcd0629776f1dc9e657647ceae81dd50 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 25 Nov 2008 21:07:04 +0100 Subject: tracing/function-return-tracer: change the name into function-graph-tracer Impact: cleanup This patch changes the name of the "return function tracer" into function-graph-tracer which is a more suitable name for a tracing which makes one able to retrieve the ordered call stack during the code flow. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 19 ++++--- kernel/trace/Makefile | 2 +- kernel/trace/ftrace.c | 26 +++++----- kernel/trace/trace.c | 18 +++---- kernel/trace/trace.h | 12 ++--- kernel/trace/trace_functions_graph.c | 98 ++++++++++++++++++++++++++++++++++++ 6 files changed, 138 insertions(+), 37 deletions(-) create mode 100644 kernel/trace/trace_functions_graph.c (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 620feadff67..eb9b901e077 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -12,7 +12,7 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool -config HAVE_FUNCTION_RET_TRACER +config HAVE_FUNCTION_GRAPH_TRACER bool config HAVE_FUNCTION_TRACE_MCOUNT_TEST @@ -63,15 +63,18 @@ config FUNCTION_TRACER (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. -config FUNCTION_RET_TRACER - bool "Kernel Function return Tracer" - depends on HAVE_FUNCTION_RET_TRACER +config FUNCTION_GRAPH_TRACER + bool "Kernel Function Graph Tracer" + depends on HAVE_FUNCTION_GRAPH_TRACER depends on FUNCTION_TRACER help - Enable the kernel to trace a function at its return. - It's first purpose is to trace the duration of functions. - This is done by setting the current return address on the thread - info structure of the current task. + Enable the kernel to trace a function at both its return + and its entry. + It's first purpose is to trace the duration of functions and + draw a call graph for each thread with some informations like + the return value. + This is done by setting the current return address on the current + task structure into a stack of calls. config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cef4bcb4e82..08c5fe6ddc0 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o -obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_BTS_TRACER) += trace_bts.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 53042f118f2..9e19976af72 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -395,11 +395,11 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ip, fl; unsigned long ftrace_addr; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_tracing_type == FTRACE_TYPE_ENTER) ftrace_addr = (unsigned long)ftrace_caller; else - ftrace_addr = (unsigned long)ftrace_return_caller; + ftrace_addr = (unsigned long)ftrace_graph_caller; #else ftrace_addr = (unsigned long)ftrace_caller; #endif @@ -1496,13 +1496,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, return ret; } -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_retfunc_active; /* The callback that hooks the return of a function */ -trace_function_return_t ftrace_function_return = - (trace_function_return_t)ftrace_stub; +trace_function_graph_t ftrace_graph_function = + (trace_function_graph_t)ftrace_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ @@ -1549,7 +1549,7 @@ free: } /* Allocate a return stack for each task */ -static int start_return_tracing(void) +static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; int ret; @@ -1569,7 +1569,7 @@ static int start_return_tracing(void) return ret; } -int register_ftrace_return(trace_function_return_t func) +int register_ftrace_graph(trace_function_graph_t func) { int ret = 0; @@ -1584,13 +1584,13 @@ int register_ftrace_return(trace_function_return_t func) goto out; } atomic_inc(&ftrace_retfunc_active); - ret = start_return_tracing(); + ret = start_graph_tracing(); if (ret) { atomic_dec(&ftrace_retfunc_active); goto out; } ftrace_tracing_type = FTRACE_TYPE_RETURN; - ftrace_function_return = func; + ftrace_graph_function = func; ftrace_startup(); out: @@ -1598,12 +1598,12 @@ out: return ret; } -void unregister_ftrace_return(void) +void unregister_ftrace_graph(void) { mutex_lock(&ftrace_sysctl_lock); atomic_dec(&ftrace_retfunc_active); - ftrace_function_return = (trace_function_return_t)ftrace_stub; + ftrace_graph_function = (trace_function_graph_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; @@ -1612,7 +1612,7 @@ void unregister_ftrace_return(void) } /* Allocate a return stack for newly created task */ -void ftrace_retfunc_init_task(struct task_struct *t) +void ftrace_graph_init_task(struct task_struct *t) { if (atomic_read(&ftrace_retfunc_active)) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH @@ -1626,7 +1626,7 @@ void ftrace_retfunc_init_task(struct task_struct *t) t->ret_stack = NULL; } -void ftrace_retfunc_exit_task(struct task_struct *t) +void ftrace_graph_exit_task(struct task_struct *t) { struct ftrace_ret_stack *ret_stack = t->ret_stack; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8df8fdd69c9..f21ab2c68fd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -878,15 +878,15 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -static void __trace_function_return(struct trace_array *tr, +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void __trace_function_graph(struct trace_array *tr, struct trace_array_cpu *data, - struct ftrace_retfunc *trace, + struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_ret_entry *entry; + struct ftrace_graph_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -1177,8 +1177,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) local_irq_restore(flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -void trace_function_return(struct ftrace_retfunc *trace) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void trace_function_graph(struct ftrace_graph_ret *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,12 +1193,12 @@ void trace_function_return(struct ftrace_retfunc *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_return(tr, data, trace, flags, pc); + __trace_function_graph(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); } -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -2001,7 +2001,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_FN_RET: { - return print_return_function(iter); + return print_graph_function(iter); break; } case TRACE_BRANCH: { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3abd645e8af..72b5ef86876 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -57,7 +57,7 @@ struct ftrace_entry { }; /* Function return entry */ -struct ftrace_ret_entry { +struct ftrace_graph_entry { struct trace_entry ent; unsigned long ip; unsigned long parent_ip; @@ -264,7 +264,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ + IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ __ftrace_bad_type(); \ } while (0) @@ -398,7 +398,7 @@ void trace_function(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); void -trace_function_return(struct ftrace_retfunc *trace); +trace_function_graph(struct ftrace_graph_ret *trace); void trace_bts(struct trace_array *tr, unsigned long from, @@ -489,11 +489,11 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ -#ifdef CONFIG_FUNCTION_RET_TRACER -extern enum print_line_t print_return_function(struct trace_iterator *iter); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern enum print_line_t print_graph_function(struct trace_iterator *iter); #else static inline enum print_line_t -print_return_function(struct trace_iterator *iter) +print_graph_function(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c new file mode 100644 index 00000000000..f5bad4624d2 --- /dev/null +++ b/kernel/trace/trace_functions_graph.c @@ -0,0 +1,98 @@ +/* + * + * Function graph tracer. + * Copyright (c) 2008 Frederic Weisbecker + * Mostly borrowed from function tracer which + * is Copyright (c) Steven Rostedt + * + */ +#include +#include +#include +#include + +#include "trace.h" + + +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +static struct tracer_opt trace_opts[] = { + /* Display overruns or not */ + { TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) }, + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, /* Don't display overruns by default */ + .opts = trace_opts +}; + + +static int graph_trace_init(struct trace_array *tr) +{ + int cpu; + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + return register_ftrace_graph(&trace_function_graph); +} + +static void graph_trace_reset(struct trace_array *tr) +{ + unregister_ftrace_graph(); +} + + +enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_graph_entry *field; + int ret; + + if (entry->type == TRACE_FN_RET) { + trace_assign_type(field, entry); + ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = seq_print_ip_sym(s, field->ip, + trace_flags & TRACE_ITER_SYM_MASK); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " (%llu ns)", + field->rettime - field->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)", + field->overrun); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer graph_trace __read_mostly = { + .name = "function-graph", + .init = graph_trace_init, + .reset = graph_trace_reset, + .print_line = print_graph_function, + .flags = &tracer_flags, +}; + +static __init int init_graph_trace(void) +{ + return register_tracer(&graph_trace); +} + +device_initcall(init_graph_trace); -- cgit v1.2.3 From 287b6e68ca7209caec40b2f44f837c580a413bae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Nov 2008 00:57:25 +0100 Subject: tracing/function-return-tracer: set a more human readable output Impact: feature This patch sets a C-like output for the function graph tracing. For this aim, we now call two handler for each function: one on the entry and one other on return. This way we can draw a well-ordered call stack. The pid of the previous trace is loosely stored to be compared against the one of the current trace to see if there were a context switch. Without this little feature, the call tree would seem broken at some locations. We could use the sched_tracer to capture these sched_events but this way of processing is much more simpler. 2 spaces have been chosen for indentation to fit the screen while deep calls. The time of execution in nanosecs is printed just after closed braces, it seems more easy this way to find the corresponding function. If the time was printed as a first column, it would be not so easy to find the corresponding function if it is called on a deep depth. I plan to output the return value but on 32 bits CPU, the return value can be 32 or 64, and its difficult to guess on which case we are. I don't know what would be the better solution on X86-32: only print eax (low-part) or even edx (high-part). Actually it's thee same problem when a function return a 8 bits value, the high part of eax could contain junk values... Here is an example of trace: sys_read() { fget_light() { } 526 vfs_read() { rw_verify_area() { security_file_permission() { cap_file_permission() { } 519 } 1564 } 2640 do_sync_read() { pipe_read() { __might_sleep() { } 511 pipe_wait() { prepare_to_wait() { } 760 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { update_min_vruntime() { } 504 } 1587 clear_buddies() { } 512 add_cfs_task_weight() { } 519 update_min_vruntime() { } 511 } 5602 dequeue_entity() { update_curr() { update_min_vruntime() { } 496 } 1631 clear_buddies() { } 496 update_min_vruntime() { } 527 } 4580 hrtick_update() { hrtick_start_fair() { } 488 } 1489 } 13700 } 14949 } 16016 msecs_to_jiffies() { } 496 put_prev_task_fair() { } 504 pick_next_task_fair() { } 489 pick_next_task_rt() { } 496 pick_next_task_fair() { } 489 pick_next_task_idle() { } 489 ------------8<---------- thread 4 ------------8<---------- finish_task_switch() { } 1203 do_softirq() { __do_softirq() { __local_bh_disable() { } 669 rcu_process_callbacks() { __rcu_process_callbacks() { cpu_quiet() { rcu_start_batch() { } 503 } 1647 } 3128 __rcu_process_callbacks() { } 542 } 5362 _local_bh_enable() { } 587 } 8880 } 9986 kthread_should_stop() { } 669 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { calc_delta_mine() { } 511 update_min_vruntime() { } 511 } 2813 Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 30 +++++----- kernel/trace/trace.c | 67 ++++++++++++++++++---- kernel/trace/trace.h | 28 +++++---- kernel/trace/trace_functions_graph.c | 104 ++++++++++++++++++++++++++-------- kernel/trace/trace_functions_return.c | 98 -------------------------------- 5 files changed, 168 insertions(+), 159 deletions(-) delete mode 100644 kernel/trace/trace_functions_return.c (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9e19976af72..7e2d3b91692 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1498,12 +1498,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static atomic_t ftrace_retfunc_active; - -/* The callback that hooks the return of a function */ -trace_function_graph_t ftrace_graph_function = - (trace_function_graph_t)ftrace_stub; +static atomic_t ftrace_graph_active; +/* The callbacks that hook a function */ +trace_func_graph_ret_t ftrace_graph_return = + (trace_func_graph_ret_t)ftrace_stub; +trace_func_graph_ent_t ftrace_graph_entry = + (trace_func_graph_ent_t)ftrace_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -1569,7 +1570,8 @@ static int start_graph_tracing(void) return ret; } -int register_ftrace_graph(trace_function_graph_t func) +int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc) { int ret = 0; @@ -1583,14 +1585,15 @@ int register_ftrace_graph(trace_function_graph_t func) ret = -EBUSY; goto out; } - atomic_inc(&ftrace_retfunc_active); + atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { - atomic_dec(&ftrace_retfunc_active); + atomic_dec(&ftrace_graph_active); goto out; } ftrace_tracing_type = FTRACE_TYPE_RETURN; - ftrace_graph_function = func; + ftrace_graph_return = retfunc; + ftrace_graph_entry = entryfunc; ftrace_startup(); out: @@ -1602,8 +1605,9 @@ void unregister_ftrace_graph(void) { mutex_lock(&ftrace_sysctl_lock); - atomic_dec(&ftrace_retfunc_active); - ftrace_graph_function = (trace_function_graph_t)ftrace_stub; + atomic_dec(&ftrace_graph_active); + ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; + ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; @@ -1614,7 +1618,7 @@ void unregister_ftrace_graph(void) /* Allocate a return stack for newly created task */ void ftrace_graph_init_task(struct task_struct *t) { - if (atomic_read(&ftrace_retfunc_active)) { + if (atomic_read(&ftrace_graph_active)) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); @@ -1638,5 +1642,3 @@ void ftrace_graph_exit_task(struct task_struct *t) } #endif - - diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f21ab2c68fd..9d5f7c94f25 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -879,14 +879,38 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void __trace_function_graph(struct trace_array *tr, +static void __trace_graph_entry(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_graph_ent *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_graph_ent_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_GRAPH_ENT; + entry->graph_ent = *trace; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} + +static void __trace_graph_return(struct trace_array *tr, struct trace_array_cpu *data, struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_graph_entry *entry; + struct ftrace_graph_ret_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -898,12 +922,8 @@ static void __trace_function_graph(struct trace_array *tr, return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_FN_RET; - entry->ip = trace->func; - entry->parent_ip = trace->ret; - entry->rettime = trace->rettime; - entry->calltime = trace->calltime; - entry->overrun = trace->overrun; + entry->ent.type = TRACE_GRAPH_RET; + entry->ret = *trace; ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); } #endif @@ -1178,7 +1198,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -void trace_function_graph(struct ftrace_graph_ret *trace) +void trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,7 +1213,28 @@ void trace_function_graph(struct ftrace_graph_ret *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_graph(tr, data, trace, flags, pc); + __trace_graph_entry(tr, data, trace, flags, pc); + } + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +void trace_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); @@ -2000,9 +2041,11 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } - case TRACE_FN_RET: { + case TRACE_GRAPH_RET: { + return print_graph_function(iter); + } + case TRACE_GRAPH_ENT: { return print_graph_function(iter); - break; } case TRACE_BRANCH: { struct trace_branch *field; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 72b5ef86876..ffe1bb1eb62 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -25,7 +25,8 @@ enum trace_type { TRACE_BRANCH, TRACE_BOOT_CALL, TRACE_BOOT_RET, - TRACE_FN_RET, + TRACE_GRAPH_RET, + TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BTS, @@ -56,14 +57,16 @@ struct ftrace_entry { unsigned long parent_ip; }; +/* Function call entry */ +struct ftrace_graph_ent_entry { + struct trace_entry ent; + struct ftrace_graph_ent graph_ent; +}; + /* Function return entry */ -struct ftrace_graph_entry { - struct trace_entry ent; - unsigned long ip; - unsigned long parent_ip; - unsigned long long calltime; - unsigned long long rettime; - unsigned long overrun; +struct ftrace_graph_ret_entry { + struct trace_entry ent; + struct ftrace_graph_ret ret; }; extern struct tracer boot_tracer; @@ -264,7 +267,10 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\ + IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ + TRACE_GRAPH_ENT); \ + IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ + TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ __ftrace_bad_type(); \ } while (0) @@ -397,9 +403,9 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); -void -trace_function_graph(struct ftrace_graph_ret *trace); +void trace_graph_return(struct ftrace_graph_ret *trace); +void trace_graph_entry(struct ftrace_graph_ent *trace); void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index f5bad4624d2..b6f0cc2a00c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -13,6 +13,7 @@ #include "trace.h" +#define TRACE_GRAPH_INDENT 2 #define TRACE_GRAPH_PRINT_OVERRUN 0x1 static struct tracer_opt trace_opts[] = { @@ -26,6 +27,8 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; +/* pid on the last trace processed */ +static pid_t last_pid = -1; static int graph_trace_init(struct trace_array *tr) { @@ -33,7 +36,8 @@ static int graph_trace_init(struct trace_array *tr) for_each_online_cpu(cpu) tracing_reset(tr, cpu); - return register_ftrace_graph(&trace_function_graph); + return register_ftrace_graph(&trace_graph_return, + &trace_graph_entry); } static void graph_trace_reset(struct trace_array *tr) @@ -41,45 +45,97 @@ static void graph_trace_reset(struct trace_array *tr) unregister_ftrace_graph(); } +/* If the pid changed since the last trace, output this event */ +static int verif_pid(struct trace_seq *s, pid_t pid) +{ + if (last_pid != -1 && last_pid == pid) + return 1; -enum print_line_t -print_graph_function(struct trace_iterator *iter) + last_pid = pid; + return trace_seq_printf(s, "\n------------8<---------- thread %d" + " ------------8<----------\n\n", + pid); +} + +static enum print_line_t +print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s, + struct trace_entry *ent) { - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct ftrace_graph_entry *field; + int i; int ret; - if (entry->type == TRACE_FN_RET) { - trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (!verif_pid(s, ent->pid)) + return TRACE_TYPE_PARTIAL_LINE; - ret = seq_print_ip_sym(s, field->ip, - trace_flags & TRACE_ITER_SYM_MASK); + for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + + ret = seq_print_ip_sym(s, call->func, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, "() {\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, + struct trace_entry *ent) +{ + int i; + int ret; + + if (!verif_pid(s, ent->pid)) + return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, " (%llu ns)", - field->rettime - field->calltime); + for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "} "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)", - field->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + ret = trace_seq_printf(s, "%llu\n", trace->rettime - trace->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, "\n"); + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)\n", + trace->overrun); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + return TRACE_TYPE_HANDLED; +} + +enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; - return TRACE_TYPE_HANDLED; + switch (entry->type) { + case TRACE_GRAPH_ENT: { + struct ftrace_graph_ent_entry *field; + trace_assign_type(field, entry); + return print_graph_entry(&field->graph_ent, s, entry); + } + case TRACE_GRAPH_RET: { + struct ftrace_graph_ret_entry *field; + trace_assign_type(field, entry); + return print_graph_return(&field->ret, s, entry); + } + default: + return TRACE_TYPE_UNHANDLED; } - return TRACE_TYPE_UNHANDLED; } static struct tracer graph_trace __read_mostly = { diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c deleted file mode 100644 index e00d64509c9..00000000000 --- a/kernel/trace/trace_functions_return.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * - * Function return tracer. - * Copyright (c) 2008 Frederic Weisbecker - * Mostly borrowed from function tracer which - * is Copyright (c) Steven Rostedt - * - */ -#include -#include -#include -#include - -#include "trace.h" - - -#define TRACE_RETURN_PRINT_OVERRUN 0x1 -static struct tracer_opt trace_opts[] = { - /* Display overruns or not */ - { TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) }, - { } /* Empty entry */ -}; - -static struct tracer_flags tracer_flags = { - .val = 0, /* Don't display overruns by default */ - .opts = trace_opts -}; - - -static int return_trace_init(struct trace_array *tr) -{ - int cpu; - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); - - return register_ftrace_return(&trace_function_return); -} - -static void return_trace_reset(struct trace_array *tr) -{ - unregister_ftrace_return(); -} - - -enum print_line_t -print_return_function(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct ftrace_ret_entry *field; - int ret; - - if (entry->type == TRACE_FN_RET) { - trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = seq_print_ip_sym(s, field->ip, - trace_flags & TRACE_ITER_SYM_MASK); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, " (%llu ns)", - field->rettime - field->calltime); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)", - field->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = trace_seq_printf(s, "\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; - } - return TRACE_TYPE_UNHANDLED; -} - -static struct tracer return_trace __read_mostly = { - .name = "return", - .init = return_trace_init, - .reset = return_trace_reset, - .print_line = print_return_function, - .flags = &tracer_flags, -}; - -static __init int init_return_trace(void) -{ - return register_tracer(&return_trace); -} - -device_initcall(init_return_trace); -- cgit v1.2.3 From df4fc31558dd2a3a30292ddb3a64c2a5befcec73 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:23 -0500 Subject: ftrace: add function tracing to single thread Impact: feature to function trace a single thread This patch adds the ability to function trace a single thread. The file: /debugfs/tracing/set_ftrace_pid contains the pid to trace. Valid pids are any positive integer. Writing any negative number to this file will disable the pid tracing and the function tracer will go back to tracing all of threads. This feature works with both static and dynamic function tracing. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 209 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 183 insertions(+), 26 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7e2d3b91692..00d98c65fad 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -47,6 +47,9 @@ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; +/* ftrace_pid_trace >= 0 will only trace threads with this pid */ +static int ftrace_pid_trace = -1; + /* Quick disabling of function tracer. */ int function_trace_stop; @@ -61,6 +64,7 @@ static int ftrace_disabled __read_mostly; static DEFINE_SPINLOCK(ftrace_lock); static DEFINE_MUTEX(ftrace_sysctl_lock); +static DEFINE_MUTEX(ftrace_start_lock); static struct ftrace_ops ftrace_list_end __read_mostly = { @@ -70,6 +74,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly = static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; +ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { @@ -86,6 +91,21 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) }; } +static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) +{ + if (current->pid != ftrace_pid_trace) + return; + + ftrace_pid_function(ip, parent_ip); +} + +static void set_ftrace_pid_function(ftrace_func_t func) +{ + /* do not set ftrace_pid_function to itself! */ + if (func != ftrace_pid_func) + ftrace_pid_function = func; +} + /** * clear_ftrace_function - reset the ftrace function * @@ -96,6 +116,7 @@ void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; __ftrace_trace_function = ftrace_stub; + ftrace_pid_function = ftrace_stub; } #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST @@ -128,20 +149,26 @@ static int __register_ftrace_function(struct ftrace_ops *ops) ftrace_list = ops; if (ftrace_enabled) { + ftrace_func_t func; + + if (ops->next == &ftrace_list_end) + func = ops->func; + else + func = ftrace_list_func; + + if (ftrace_pid_trace >= 0) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } + /* * For one func, simply call it directly. * For more than one func, call the chain. */ #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - if (ops->next == &ftrace_list_end) - ftrace_trace_function = ops->func; - else - ftrace_trace_function = ftrace_list_func; + ftrace_trace_function = func; #else - if (ops->next == &ftrace_list_end) - __ftrace_trace_function = ops->func; - else - __ftrace_trace_function = ftrace_list_func; + __ftrace_trace_function = func; ftrace_trace_function = ftrace_test_stop_func; #endif } @@ -182,8 +209,19 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) { /* If we only have one func left, then call that directly */ - if (ftrace_list->next == &ftrace_list_end) - ftrace_trace_function = ftrace_list->func; + if (ftrace_list->next == &ftrace_list_end) { + ftrace_func_t func = ftrace_list->func; + + if (ftrace_pid_trace >= 0) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ftrace_trace_function = func; +#else + __ftrace_trace_function = func; +#endif + } } out: @@ -192,6 +230,38 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) return ret; } +static void ftrace_update_pid_func(void) +{ + ftrace_func_t func; + + /* should not be called from interrupt context */ + spin_lock(&ftrace_lock); + + if (ftrace_trace_function == ftrace_stub) + goto out; + + func = ftrace_trace_function; + + if (ftrace_pid_trace >= 0) { + set_ftrace_pid_function(func); + func = ftrace_pid_func; + } else { + if (func != ftrace_pid_func) + goto out; + + set_ftrace_pid_function(func); + } + +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ftrace_trace_function = func; +#else + __ftrace_trace_function = func; +#endif + + out: + spin_unlock(&ftrace_lock); +} + #ifdef CONFIG_DYNAMIC_FTRACE #ifndef CONFIG_FTRACE_MCOUNT_RECORD # error Dynamic ftrace depends on MCOUNT_RECORD @@ -545,7 +615,19 @@ static void ftrace_run_update_code(int command) static ftrace_func_t saved_ftrace_func; static int ftrace_start_up; -static DEFINE_MUTEX(ftrace_start_lock); + +static void ftrace_startup_enable(int command) +{ + if (saved_ftrace_func != ftrace_trace_function) { + saved_ftrace_func = ftrace_trace_function; + command |= FTRACE_UPDATE_TRACE_FUNC; + } + + if (!command || !ftrace_enabled) + return; + + ftrace_run_update_code(command); +} static void ftrace_startup(void) { @@ -558,16 +640,8 @@ static void ftrace_startup(void) ftrace_start_up++; command |= FTRACE_ENABLE_CALLS; - if (saved_ftrace_func != ftrace_trace_function) { - saved_ftrace_func = ftrace_trace_function; - command |= FTRACE_UPDATE_TRACE_FUNC; - } - - if (!command || !ftrace_enabled) - goto out; + ftrace_startup_enable(command); - ftrace_run_update_code(command); - out: mutex_unlock(&ftrace_start_lock); } @@ -1262,13 +1336,10 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; -static __init int ftrace_init_debugfs(void) +static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { - struct dentry *d_tracer; struct dentry *entry; - d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("available_filter_functions", 0444, d_tracer, NULL, &ftrace_avail_fops); if (!entry) @@ -1295,8 +1366,6 @@ static __init int ftrace_init_debugfs(void) return 0; } -fs_initcall(ftrace_init_debugfs); - static int ftrace_convert_nops(struct module *mod, unsigned long *start, unsigned long *end) @@ -1382,12 +1451,100 @@ static int __init ftrace_nodyn_init(void) } device_initcall(ftrace_nodyn_init); +static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } +static inline void ftrace_startup_enable(int command) { } # define ftrace_startup() do { } while (0) # define ftrace_shutdown() do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ +static ssize_t +ftrace_pid_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + if (ftrace_pid_trace >= 0) + r = sprintf(buf, "%u\n", ftrace_pid_trace); + else + r = sprintf(buf, "no pid\n"); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +ftrace_pid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtol(buf, 10, &val); + if (ret < 0) + return ret; + + mutex_lock(&ftrace_start_lock); + if (ret < 0) { + /* disable pid tracing */ + if (ftrace_pid_trace < 0) + goto out; + ftrace_pid_trace = -1; + + } else { + + if (ftrace_pid_trace == val) + goto out; + + ftrace_pid_trace = val; + } + + /* update the function call */ + ftrace_update_pid_func(); + ftrace_startup_enable(0); + + out: + mutex_unlock(&ftrace_start_lock); + + return cnt; +} + +static struct file_operations ftrace_pid_fops = { + .read = ftrace_pid_read, + .write = ftrace_pid_write, +}; + +static __init int ftrace_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; + + ftrace_init_dyn_debugfs(d_tracer); + + entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, + NULL, &ftrace_pid_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_ftrace_pid' entry\n"); + return 0; +} + +fs_initcall(ftrace_init_debugfs); + /** * ftrace_kill - kill ftrace * -- cgit v1.2.3 From 5a45cfe1c64862e8cd3b0d79d7c4ba71c3118915 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:24 -0500 Subject: ftrace: use code patching for ftrace graph tracer Impact: more efficient code for ftrace graph tracer This patch uses the dynamic patching, when available, to patch the function graph code into the kernel. This patch will ease the way for letting both function tracing and function graph tracing run together. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 00d98c65fad..5f7c8642d58 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -281,6 +281,8 @@ enum { FTRACE_UPDATE_TRACE_FUNC = (1 << 2), FTRACE_ENABLE_MCOUNT = (1 << 3), FTRACE_DISABLE_MCOUNT = (1 << 4), + FTRACE_START_FUNC_RET = (1 << 5), + FTRACE_STOP_FUNC_RET = (1 << 6), }; static int ftrace_filtered; @@ -465,14 +467,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ip, fl; unsigned long ftrace_addr; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (ftrace_tracing_type == FTRACE_TYPE_ENTER) - ftrace_addr = (unsigned long)ftrace_caller; - else - ftrace_addr = (unsigned long)ftrace_graph_caller; -#else ftrace_addr = (unsigned long)ftrace_caller; -#endif ip = rec->ip; @@ -605,6 +600,11 @@ static int __ftrace_modify_code(void *data) if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); + if (*command & FTRACE_START_FUNC_RET) + ftrace_enable_ftrace_graph_caller(); + else if (*command & FTRACE_STOP_FUNC_RET) + ftrace_disable_ftrace_graph_caller(); + return 0; } @@ -629,10 +629,8 @@ static void ftrace_startup_enable(int command) ftrace_run_update_code(command); } -static void ftrace_startup(void) +static void ftrace_startup(int command) { - int command = 0; - if (unlikely(ftrace_disabled)) return; @@ -645,10 +643,8 @@ static void ftrace_startup(void) mutex_unlock(&ftrace_start_lock); } -static void ftrace_shutdown(void) +static void ftrace_shutdown(int command) { - int command = 0; - if (unlikely(ftrace_disabled)) return; @@ -1453,8 +1449,9 @@ device_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) +/* Keep as macros so we do not need to define the commands */ +# define ftrace_startup(command) do { } while (0) +# define ftrace_shutdown(command) do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ @@ -1585,7 +1582,7 @@ int register_ftrace_function(struct ftrace_ops *ops) } ret = __register_ftrace_function(ops); - ftrace_startup(); + ftrace_startup(0); out: mutex_unlock(&ftrace_sysctl_lock); @@ -1604,7 +1601,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); - ftrace_shutdown(); + ftrace_shutdown(0); mutex_unlock(&ftrace_sysctl_lock); return ret; @@ -1751,7 +1748,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, ftrace_tracing_type = FTRACE_TYPE_RETURN; ftrace_graph_return = retfunc; ftrace_graph_entry = entryfunc; - ftrace_startup(); + ftrace_startup(FTRACE_START_FUNC_RET); out: mutex_unlock(&ftrace_sysctl_lock); @@ -1765,7 +1762,7 @@ void unregister_ftrace_graph(void) atomic_dec(&ftrace_graph_active); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; - ftrace_shutdown(); + ftrace_shutdown(FTRACE_STOP_FUNC_RET); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; -- cgit v1.2.3 From e53a6319cca69111c1643dc9f18f4465d7f1cbf0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:25 -0500 Subject: ftrace: let function tracing and function return run together Impact: feature This patch enables function tracing and function return to run together. I've tested this by enabling the stack tracer and return tracer, where both the function entry and function return are used together with dynamic ftrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5f7c8642d58..cbf8b09f63a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -53,9 +53,6 @@ static int ftrace_pid_trace = -1; /* Quick disabling of function tracer. */ int function_trace_stop; -/* By default, current tracing type is normal tracing. */ -enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER; - /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -1576,15 +1573,9 @@ int register_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_sysctl_lock); - if (ftrace_tracing_type == FTRACE_TYPE_RETURN) { - ret = -EBUSY; - goto out; - } - ret = __register_ftrace_function(ops); ftrace_startup(0); -out: mutex_unlock(&ftrace_sysctl_lock); return ret; } @@ -1731,23 +1722,16 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_sysctl_lock); - /* - * Don't launch return tracing if normal function - * tracing is already running. - */ - if (ftrace_trace_function != ftrace_stub) { - ret = -EBUSY; - goto out; - } atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { atomic_dec(&ftrace_graph_active); goto out; } - ftrace_tracing_type = FTRACE_TYPE_RETURN; + ftrace_graph_return = retfunc; ftrace_graph_entry = entryfunc; + ftrace_startup(FTRACE_START_FUNC_RET); out: @@ -1763,8 +1747,6 @@ void unregister_ftrace_graph(void) ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); - /* Restore normal tracing type */ - ftrace_tracing_type = FTRACE_TYPE_ENTER; mutex_unlock(&ftrace_sysctl_lock); } -- cgit v1.2.3 From 660c7f9be96321fc80026d76411bd15e6f418a72 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:26 -0500 Subject: ftrace: add thread comm to function graph tracer Impact: enhancement to function graph tracer Export the trace_find_cmdline so the function graph tracer can use it to print the comms of the threads. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 1 + kernel/trace/trace_functions_graph.c | 21 ++++++++++++++++----- 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9d5f7c94f25..5811e0a5f73 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -804,7 +804,7 @@ static void trace_save_cmdline(struct task_struct *tsk) spin_unlock(&trace_cmdline_lock); } -static char *trace_find_cmdline(int pid) +char *trace_find_cmdline(int pid) { char *cmdline = "<...>"; unsigned map; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ffe1bb1eb62..7adacf349ef 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -450,6 +450,7 @@ struct tracer_switch_ops { struct tracer_switch_ops *next; }; +char *trace_find_cmdline(int pid); #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b6f0cc2a00c..bbb81e7b6c4 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -32,29 +32,40 @@ static pid_t last_pid = -1; static int graph_trace_init(struct trace_array *tr) { - int cpu; + int cpu, ret; + for_each_online_cpu(cpu) tracing_reset(tr, cpu); - return register_ftrace_graph(&trace_graph_return, + ret = register_ftrace_graph(&trace_graph_return, &trace_graph_entry); + if (ret) + return ret; + tracing_start_cmdline_record(); + + return 0; } static void graph_trace_reset(struct trace_array *tr) { - unregister_ftrace_graph(); + tracing_stop_cmdline_record(); + unregister_ftrace_graph(); } /* If the pid changed since the last trace, output this event */ static int verif_pid(struct trace_seq *s, pid_t pid) { + char *comm; + if (last_pid != -1 && last_pid == pid) return 1; last_pid = pid; - return trace_seq_printf(s, "\n------------8<---------- thread %d" + comm = trace_find_cmdline(pid); + + return trace_seq_printf(s, "\n------------8<---------- thread %s-%d" " ------------8<----------\n\n", - pid); + comm, pid); } static enum print_line_t -- cgit v1.2.3 From 437f24fb897d409a9978eb71ecfaf279dcd94acd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:27 -0500 Subject: ftrace: add cpu annotation for function graph tracer Impact: enhancement for function graph tracer When run on a SMP box, the function graph tracer is confusing because it shows the different CPUS as changes in the trace. This patch adds the annotation of 'CPU[###]' where ### is a three digit number. The output will look similar to this: CPU[001] dput() { CPU[000] } 726 CPU[001] } 487 CPU[000] do_softirq() { CPU[001] } 2221 CPU[000] __do_softirq() { CPU[000] __local_bh_disable() { CPU[001] unroll_tree_refs() { CPU[000] } 569 CPU[001] } 501 CPU[000] rcu_process_callbacks() { CPU[001] kfree() { What makes this nice is that now you can grep the file and produce readable format for a particular CPU. # cat /debug/tracing/trace > /tmp/trace # grep '^CPU\[000\]' /tmp/trace > /tmp/trace0 # grep '^CPU\[001\]' /tmp/trace > /tmp/trace1 Will give you: # head /tmp/trace0 CPU[000] ------------8<---------- thread sshd-3899 ------------8<---------- CPU[000] inotify_dentry_parent_queue_event() { CPU[000] } 2531 CPU[000] inotify_inode_queue_event() { CPU[000] } 505 CPU[000] } 69626 CPU[000] } 73089 CPU[000] audit_syscall_exit() { CPU[000] path_put() { CPU[000] dput() { # head /tmp/trace1 CPU[001] ------------8<---------- thread pcscd-3446 ------------8<---------- CPU[001] } 4186 CPU[001] dput() { CPU[001] } 543 CPU[001] vfs_permission() { CPU[001] inode_permission() { CPU[001] shmem_permission() { CPU[001] generic_permission() { CPU[001] } 501 CPU[001] } 2205 Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_functions_graph.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index bbb81e7b6c4..d31d695174a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -28,7 +28,7 @@ static struct tracer_flags tracer_flags = { }; /* pid on the last trace processed */ -static pid_t last_pid = -1; +static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 }; static int graph_trace_init(struct trace_array *tr) { @@ -53,29 +53,34 @@ static void graph_trace_reset(struct trace_array *tr) } /* If the pid changed since the last trace, output this event */ -static int verif_pid(struct trace_seq *s, pid_t pid) +static int verif_pid(struct trace_seq *s, pid_t pid, int cpu) { char *comm; - if (last_pid != -1 && last_pid == pid) + if (last_pid[cpu] != -1 && last_pid[cpu] == pid) return 1; - last_pid = pid; + last_pid[cpu] = pid; comm = trace_find_cmdline(pid); - return trace_seq_printf(s, "\n------------8<---------- thread %s-%d" + return trace_seq_printf(s, "\nCPU[%03d]" + " ------------8<---------- thread %s-%d" " ------------8<----------\n\n", - comm, pid); + cpu, comm, pid); } static enum print_line_t print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s, - struct trace_entry *ent) + struct trace_entry *ent, int cpu) { int i; int ret; - if (!verif_pid(s, ent->pid)) + if (!verif_pid(s, ent->pid, cpu)) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, "CPU[%03d] ", cpu); + if (!ret) return TRACE_TYPE_PARTIAL_LINE; for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { @@ -96,12 +101,16 @@ print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s, static enum print_line_t print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, - struct trace_entry *ent) + struct trace_entry *ent, int cpu) { int i; int ret; - if (!verif_pid(s, ent->pid)) + if (!verif_pid(s, ent->pid, cpu)) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, "CPU[%03d] ", cpu); + if (!ret) return TRACE_TYPE_PARTIAL_LINE; for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { @@ -137,12 +146,13 @@ print_graph_function(struct trace_iterator *iter) case TRACE_GRAPH_ENT: { struct ftrace_graph_ent_entry *field; trace_assign_type(field, entry); - return print_graph_entry(&field->graph_ent, s, entry); + return print_graph_entry(&field->graph_ent, s, entry, + iter->cpu); } case TRACE_GRAPH_RET: { struct ftrace_graph_ret_entry *field; trace_assign_type(field, entry); - return print_graph_return(&field->ret, s, entry); + return print_graph_return(&field->ret, s, entry, iter->cpu); } default: return TRACE_TYPE_UNHANDLED; -- cgit v1.2.3 From f3f47a6768a29448866da4422b6f6bee485c947f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 23 Nov 2008 16:49:58 -0800 Subject: tracing: add "power-tracer": C/P state tracer to help power optimization Impact: new "power-tracer" ftrace plugin This patch adds a C/P-state ftrace plugin that will generate detailed statistics about the C/P-states that are being used, so that we can look at detailed decisions that the C/P-state code is making, rather than the too high level "average" that we have today. An example way of using this is: mount -t debugfs none /sys/kernel/debug echo cstate > /sys/kernel/debug/tracing/current_tracer echo 1 > /sys/kernel/debug/tracing/tracing_enabled sleep 1 echo 0 > /sys/kernel/debug/tracing/tracing_enabled cat /sys/kernel/debug/tracing/trace | perl scripts/trace/cstate.pl > out.svg Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- kernel/trace/Kconfig | 11 +++ kernel/trace/Makefile | 1 + kernel/trace/trace.h | 7 ++ kernel/trace/trace_power.c | 179 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 198 insertions(+) create mode 100644 kernel/trace/trace_power.c (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 620feadff67..d151aab48ed 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -217,6 +217,17 @@ config BRANCH_TRACER Say N if unsure. +config POWER_TRACER + bool "Trace power consumption behavior" + depends on DEBUG_KERNEL + depends on X86 + select TRACING + help + This tracer helps developers to analyze and optimize the kernels + power management decisions, specifically the C-state and P-state + behavior. + + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cef4bcb4e82..acaa06553ec 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -32,5 +32,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_BTS_TRACER) += trace_bts.o +obj-$(CONFIG_POWER_TRACER) += trace_power.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3abd645e8af..4c453778a6a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -28,6 +28,7 @@ enum trace_type { TRACE_FN_RET, TRACE_USER_STACK, TRACE_BTS, + TRACE_POWER, __TRACE_LAST_TYPE }; @@ -160,6 +161,11 @@ struct bts_entry { unsigned long to; }; +struct trace_power { + struct trace_entry ent; + struct power_trace state_data; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -266,6 +272,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ + IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c new file mode 100644 index 00000000000..a7172a352f6 --- /dev/null +++ b/kernel/trace/trace_power.c @@ -0,0 +1,179 @@ +/* + * ring buffer based C-state tracer + * + * Arjan van de Ven + * Copyright (C) 2008 Intel Corporation + * + * Much is borrowed from trace_boot.c which is + * Copyright (C) 2008 Frederic Weisbecker + * + */ + +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *power_trace; +static int __read_mostly trace_power_enabled; + + +static void start_power_trace(struct trace_array *tr) +{ + trace_power_enabled = 1; +} + +static void stop_power_trace(struct trace_array *tr) +{ + trace_power_enabled = 0; +} + + +static int power_trace_init(struct trace_array *tr) +{ + int cpu; + power_trace = tr; + + trace_power_enabled = 1; + + for_each_cpu_mask(cpu, cpu_possible_map) + tracing_reset(tr, cpu); + return 0; +} + +static enum print_line_t power_print_line(struct trace_iterator *iter) +{ + int ret = 0; + struct trace_entry *entry = iter->ent; + struct trace_power *field ; + struct power_trace *it; + struct trace_seq *s = &iter->seq; + struct timespec stamp; + struct timespec duration; + + trace_assign_type(field, entry); + it = &field->state_data; + stamp = ktime_to_timespec(it->stamp); + duration = ktime_to_timespec(ktime_sub(it->end, it->stamp)); + + if (entry->type == TRACE_POWER) { + if (it->type == POWER_CSTATE) + ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n", + stamp.tv_sec, + stamp.tv_nsec, + it->state, iter->cpu, + duration.tv_sec, + duration.tv_nsec); + if (it->type == POWER_PSTATE) + ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n", + stamp.tv_sec, + stamp.tv_nsec, + it->state, iter->cpu); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer power_tracer __read_mostly = +{ + .name = "power", + .init = power_trace_init, + .start = start_power_trace, + .stop = stop_power_trace, + .reset = stop_power_trace, + .print_line = power_print_line, +}; + +static int init_power_trace(void) +{ + return register_tracer(&power_tracer); +} +device_initcall(init_power_trace); + +void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int level) +{ + if (!trace_power_enabled) + return; + + memset(it, 0, sizeof(struct power_trace)); + it->state = level; + it->type = type; + it->stamp = ktime_get(); +} +EXPORT_SYMBOL_GPL(trace_power_start); + + +void trace_power_end(struct power_trace *it) +{ + struct ring_buffer_event *event; + struct trace_power *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = power_trace; + + if (!trace_power_enabled) + return; + + preempt_disable(); + it->end = ktime_get(); + data = tr->data[smp_processor_id()]; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_POWER; + entry->state_data = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} +EXPORT_SYMBOL_GPL(trace_power_end); + +void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int level) +{ + struct ring_buffer_event *event; + struct trace_power *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = power_trace; + + if (!trace_power_enabled) + return; + + memset(it, 0, sizeof(struct power_trace)); + it->state = level; + it->type = type; + it->stamp = ktime_get(); + preempt_disable(); + it->end = it->stamp; + data = tr->data[smp_processor_id()]; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_POWER; + entry->state_data = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} +EXPORT_SYMBOL_GPL(trace_power_mark); -- cgit v1.2.3