From ee08c6eccb7d1295516f7cf420fddf7b14e9146f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 7 Mar 2009 05:52:59 +0100 Subject: tracing/ftrace: syscall tracing infrastructure, basics Provide basic callbacks to do syscall tracing. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Lai Jiangshan LKML-Reference: <1236401580-5758-2-git-send-email-fweisbec@gmail.com> [ simplified it to a trace_printk() for now. ] Signed-off-by: Ingo Molnar --- kernel/trace/trace_syscalls.c | 113 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 kernel/trace/trace_syscalls.c (limited to 'kernel/trace/trace_syscalls.c') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c new file mode 100644 index 00000000000..66cf97449af --- /dev/null +++ b/kernel/trace/trace_syscalls.c @@ -0,0 +1,113 @@ +#include +#include + +#include + +#include "trace_output.h" +#include "trace.h" + +static atomic_t refcount; + +void start_ftrace_syscalls(void) +{ + unsigned long flags; + struct task_struct *g, *t; + + if (atomic_inc_return(&refcount) != 1) + goto out; + + read_lock_irqsave(&tasklist_lock, flags); + + do_each_thread(g, t) { + set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); + } while_each_thread(g, t); + + read_unlock_irqrestore(&tasklist_lock, flags); +out: + atomic_dec(&refcount); +} + +void stop_ftrace_syscalls(void) +{ + unsigned long flags; + struct task_struct *g, *t; + + if (atomic_dec_return(&refcount)) + goto out; + + read_lock_irqsave(&tasklist_lock, flags); + + do_each_thread(g, t) { + clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE); + } while_each_thread(g, t); + + read_unlock_irqrestore(&tasklist_lock, flags); +out: + atomic_inc(&refcount); +} + +void ftrace_syscall_enter(struct pt_regs *regs) +{ + int syscall_nr; + + syscall_nr = syscall_get_nr(current, regs); + + trace_printk("syscall %d enter\n", syscall_nr); +} + +void ftrace_syscall_exit(struct pt_regs *regs) +{ + int syscall_nr; + + syscall_nr = syscall_get_nr(current, regs); + + trace_printk("syscall %d exit\n", syscall_nr); +} + +static int init_syscall_tracer(struct trace_array *tr) +{ + start_ftrace_syscalls(); + + return 0; +} + +static void reset_syscall_tracer(struct trace_array *tr) +{ + stop_ftrace_syscalls(); +} + +static struct trace_event syscall_enter_event = { + .type = TRACE_SYSCALL_ENTER, +}; + +static struct trace_event syscall_exit_event = { + .type = TRACE_SYSCALL_EXIT, +}; + +static struct tracer syscall_tracer __read_mostly = { + .name = "syscall", + .init = init_syscall_tracer, + .reset = reset_syscall_tracer +}; + +__init int register_ftrace_syscalls(void) +{ + int ret; + + ret = register_ftrace_event(&syscall_enter_event); + if (!ret) { + printk(KERN_WARNING "event %d failed to register\n", + syscall_enter_event.type); + WARN_ON_ONCE(1); + } + + ret = register_ftrace_event(&syscall_exit_event); + if (!ret) { + printk(KERN_WARNING "event %d failed to register\n", + syscall_exit_event.type); + WARN_ON_ONCE(1); + } + + return register_tracer(&syscall_tracer); +} +device_initcall(register_ftrace_syscalls); -- cgit v1.2.3 From bed1ffca022cc876fb83161d26670e9b5d3cf36b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Mar 2009 15:42:11 +0100 Subject: tracing/syscalls: core infrastructure for syscalls tracing, enhancements Impact: new feature This adds the generic support for syscalls tracing. This is currently exploited through a devoted tracer but other tracing engines can use it. (They just have to play with {start,stop}_ftrace_syscalls() and use the display callbacks unless they want to override them.) The syscalls prototypes definitions are abused here to steal some metadata informations: - syscall name, param types, param names, number of params The syscall addr is not directly saved during this definition because we don't know if its prototype is available in the namespace. But we don't really need it. The arch has just to build a function able to resolve the syscall number to its metadata struct. The current tracer prints the syscall names, parameters names and values (and their types optionally). Currently the value is a raw hex but higher level values diplaying is on my TODO list. Signed-off-by: Frederic Weisbecker LKML-Reference: <1236955332-10133-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_syscalls.c | 146 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 138 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace_syscalls.c') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 66cf97449af..c72e599230f 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,6 +1,5 @@ -#include #include - +#include #include #include "trace_output.h" @@ -8,6 +7,90 @@ static atomic_t refcount; +/* Our two options */ +enum { + TRACE_SYSCALLS_OPT_TYPES = 0x1, +}; + +static struct tracer_opt syscalls_opts[] = { + { TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) }, + { } +}; + +static struct tracer_flags syscalls_flags = { + .val = 0, /* By default: no args types */ + .opts = syscalls_opts +}; + +enum print_line_t +print_syscall_enter(struct trace_iterator *iter, int flags) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *ent = iter->ent; + struct syscall_trace_enter *trace; + struct syscall_metadata *entry; + int i, ret, syscall; + + trace_assign_type(trace, ent); + + syscall = trace->nr; + + entry = syscall_nr_to_meta(syscall); + if (!entry) + goto end; + + ret = trace_seq_printf(s, "%s(", entry->name); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + for (i = 0; i < entry->nb_args; i++) { + /* parameter types */ + if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) { + ret = trace_seq_printf(s, "%s ", entry->types[i]); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + /* parameter values */ + ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i], + trace->args[i], + i == entry->nb_args - 1 ? ")" : ","); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + +end: + trace_seq_printf(s, "\n"); + return TRACE_TYPE_HANDLED; +} + +enum print_line_t +print_syscall_exit(struct trace_iterator *iter, int flags) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *ent = iter->ent; + struct syscall_trace_exit *trace; + int syscall; + struct syscall_metadata *entry; + int ret; + + trace_assign_type(trace, ent); + + syscall = trace->nr; + + entry = syscall_nr_to_meta(syscall); + if (!entry) { + trace_seq_printf(s, "\n"); + return TRACE_TYPE_HANDLED; + } + + ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, + trace->ret); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + void start_ftrace_syscalls(void) { unsigned long flags; @@ -16,6 +99,7 @@ void start_ftrace_syscalls(void) if (atomic_inc_return(&refcount) != 1) goto out; + arch_init_ftrace_syscalls(); read_lock_irqsave(&tasklist_lock, flags); do_each_thread(g, t) { @@ -48,20 +132,63 @@ out: void ftrace_syscall_enter(struct pt_regs *regs) { + struct syscall_trace_enter *entry; + struct syscall_metadata *sys_data; + struct ring_buffer_event *event; + int size; int syscall_nr; + int cpu; syscall_nr = syscall_get_nr(current, regs); - trace_printk("syscall %d enter\n", syscall_nr); + cpu = raw_smp_processor_id(); + + sys_data = syscall_nr_to_meta(syscall_nr); + if (!sys_data) + return; + + size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; + + event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size, + 0, 0); + if (!event) + return; + + entry = ring_buffer_event_data(event); + entry->nr = syscall_nr; + syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); + + trace_current_buffer_unlock_commit(event, 0, 0); + trace_wake_up(); } void ftrace_syscall_exit(struct pt_regs *regs) { + struct syscall_trace_exit *entry; + struct syscall_metadata *sys_data; + struct ring_buffer_event *event; int syscall_nr; + int cpu; syscall_nr = syscall_get_nr(current, regs); - trace_printk("syscall %d exit\n", syscall_nr); + cpu = raw_smp_processor_id(); + + sys_data = syscall_nr_to_meta(syscall_nr); + if (!sys_data) + return; + + event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT, + sizeof(*entry), 0, 0); + if (!event) + return; + + entry = ring_buffer_event_data(event); + entry->nr = syscall_nr; + entry->ret = syscall_get_return_value(current, regs); + + trace_current_buffer_unlock_commit(event, 0, 0); + trace_wake_up(); } static int init_syscall_tracer(struct trace_array *tr) @@ -77,17 +204,20 @@ static void reset_syscall_tracer(struct trace_array *tr) } static struct trace_event syscall_enter_event = { - .type = TRACE_SYSCALL_ENTER, + .type = TRACE_SYSCALL_ENTER, + .trace = print_syscall_enter, }; static struct trace_event syscall_exit_event = { - .type = TRACE_SYSCALL_EXIT, + .type = TRACE_SYSCALL_EXIT, + .trace = print_syscall_exit, }; static struct tracer syscall_tracer __read_mostly = { - .name = "syscall", + .name = "syscall", .init = init_syscall_tracer, - .reset = reset_syscall_tracer + .reset = reset_syscall_tracer, + .flags = &syscalls_flags, }; __init int register_ftrace_syscalls(void) -- cgit v1.2.3 From ac99c58c9e56967037382e31f865b72b10127965 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 15 Mar 2009 22:10:35 +0100 Subject: tracing/syscalls: fix missing release of tracing Impact: fix 'stuck' syscall tracer The syscall tracer uses a refcounter to enable several users simultaneously. But the refcounter did not behave correctly and always restored its value to 0 after calling start_syscall_tracing(). Therefore, stop_syscall_tracing() couldn't release correctly the tasks from tracing. Also the tracer forgot to reset the buffer when it is released. Drop the pointless refcount decrement on start_syscall_tracing() and reset the buffer when we release the tracer. This fixes two reported issue: - when we switch from syscall tracer to another tracer, syscall tracing continued. - incorrect use of the refcount. Reported-by: Andrew Morton Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker LKML-Reference: <1237151439-6755-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_syscalls.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel/trace/trace_syscalls.c') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c72e599230f..c5fc1d8880f 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -96,8 +96,9 @@ void start_ftrace_syscalls(void) unsigned long flags; struct task_struct *g, *t; + /* Don't enable the flag on the tasks twice */ if (atomic_inc_return(&refcount) != 1) - goto out; + return; arch_init_ftrace_syscalls(); read_lock_irqsave(&tasklist_lock, flags); @@ -107,8 +108,6 @@ void start_ftrace_syscalls(void) } while_each_thread(g, t); read_unlock_irqrestore(&tasklist_lock, flags); -out: - atomic_dec(&refcount); } void stop_ftrace_syscalls(void) @@ -116,8 +115,9 @@ void stop_ftrace_syscalls(void) unsigned long flags; struct task_struct *g, *t; + /* There are perhaps still some users */ if (atomic_dec_return(&refcount)) - goto out; + return; read_lock_irqsave(&tasklist_lock, flags); @@ -126,8 +126,6 @@ void stop_ftrace_syscalls(void) } while_each_thread(g, t); read_unlock_irqrestore(&tasklist_lock, flags); -out: - atomic_inc(&refcount); } void ftrace_syscall_enter(struct pt_regs *regs) @@ -201,6 +199,7 @@ static int init_syscall_tracer(struct trace_array *tr) static void reset_syscall_tracer(struct trace_array *tr) { stop_ftrace_syscalls(); + tracing_reset_online_cpus(tr); } static struct trace_event syscall_enter_event = { -- cgit v1.2.3 From 6404434525bb9f8f2239998f30fd7c93f2efa5b3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 15 Mar 2009 22:10:36 +0100 Subject: tracing/syscalls: various cleanups Impact: cleanup - Drop unused cpu variable - Fix some errors on comments Signed-off-by: Frederic Weisbecker LKML-Reference: <1237151439-6755-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_syscalls.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'kernel/trace/trace_syscalls.c') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c5fc1d8880f..26f9a8679d3 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -7,7 +7,7 @@ static atomic_t refcount; -/* Our two options */ +/* Option to display the parameters types */ enum { TRACE_SYSCALLS_OPT_TYPES = 0x1, }; @@ -18,7 +18,7 @@ static struct tracer_opt syscalls_opts[] = { }; static struct tracer_flags syscalls_flags = { - .val = 0, /* By default: no args types */ + .val = 0, /* By default: no parameters types */ .opts = syscalls_opts }; @@ -135,12 +135,9 @@ void ftrace_syscall_enter(struct pt_regs *regs) struct ring_buffer_event *event; int size; int syscall_nr; - int cpu; syscall_nr = syscall_get_nr(current, regs); - cpu = raw_smp_processor_id(); - sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; @@ -166,12 +163,9 @@ void ftrace_syscall_exit(struct pt_regs *regs) struct syscall_metadata *sys_data; struct ring_buffer_event *event; int syscall_nr; - int cpu; syscall_nr = syscall_get_nr(current, regs); - cpu = raw_smp_processor_id(); - sys_data = syscall_nr_to_meta(syscall_nr); if (!sys_data) return; -- cgit v1.2.3 From 5be71b61f17b0e3bc8ad0b1a1b7b53ab7d574ebb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 15 Mar 2009 22:10:37 +0100 Subject: tracing/syscalls: protect thread flag toggling from races Impact: fix syscall tracer enable/disable race The current thread flag toggling is racy as shown in the following scenario: - task A is the last user of syscall tracing, it releases the TIF_SYSCALL_FTRACE on each tasks - at the same time task B start syscall tracing. refcount == 0 so it sets up TIF_SYSCALL_FTRACE on each tasks. The effect of the mixup is unpredictable. So this fix adds a mutex on {start,stop}_syscall_tracing(). Reported-by: Andrew Morton Reported-by: Ingo Molnar LKML-Reference: <1237151439-6755-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_syscalls.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'kernel/trace/trace_syscalls.c') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 26f9a8679d3..a2a3af29c94 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -5,7 +5,11 @@ #include "trace_output.h" #include "trace.h" -static atomic_t refcount; +/* Keep a counter of the syscall tracing users */ +static int refcount; + +/* Prevent from races on thread flags toggling */ +static DEFINE_MUTEX(syscall_trace_lock); /* Option to display the parameters types */ enum { @@ -96,9 +100,11 @@ void start_ftrace_syscalls(void) unsigned long flags; struct task_struct *g, *t; + mutex_lock(&syscall_trace_lock); + /* Don't enable the flag on the tasks twice */ - if (atomic_inc_return(&refcount) != 1) - return; + if (++refcount != 1) + goto unlock; arch_init_ftrace_syscalls(); read_lock_irqsave(&tasklist_lock, flags); @@ -108,6 +114,9 @@ void start_ftrace_syscalls(void) } while_each_thread(g, t); read_unlock_irqrestore(&tasklist_lock, flags); + +unlock: + mutex_unlock(&syscall_trace_lock); } void stop_ftrace_syscalls(void) @@ -115,9 +124,11 @@ void stop_ftrace_syscalls(void) unsigned long flags; struct task_struct *g, *t; + mutex_lock(&syscall_trace_lock); + /* There are perhaps still some users */ - if (atomic_dec_return(&refcount)) - return; + if (--refcount) + goto unlock; read_lock_irqsave(&tasklist_lock, flags); @@ -126,6 +137,9 @@ void stop_ftrace_syscalls(void) } while_each_thread(g, t); read_unlock_irqrestore(&tasklist_lock, flags); + +unlock: + mutex_unlock(&syscall_trace_lock); } void ftrace_syscall_enter(struct pt_regs *regs) -- cgit v1.2.3