From 5ded3dc6a3c7549b36a8ac27bbd81b33756a2c29 Mon Sep 17 00:00:00 2001 From: David Sharp Date: Wed, 6 Jan 2010 17:12:07 -0800 Subject: ring-buffer: Wrap a list.next reference with rb_list_head() This reference at the end of rb_get_reader_page() was causing off-by-one writes to the prev pointer of the page after the reader page when that page is the head page, and therefore the reader page has the RB_PAGE_HEAD flag in its list.next pointer. This eventually results in a GPF in a subsequent call to rb_set_head_page() (usually from rb_get_reader_page()) when that prev pointer is dereferenced. The dereferenced register would characteristically have an address that appears shifted left by one byte (eg, ffxxxxxxxxxxxxyy instead of ffffxxxxxxxxxxxx) due to being written at an address one byte too high. Signed-off-by: David Sharp LKML-Reference: <1262826727-9090-1-git-send-email-dhsharp@google.com> Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c..d5b7308b7e1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2906,7 +2906,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * * Now make the new head point back to the reader page. */ - reader->list.next->prev = &cpu_buffer->reader_page->list; + rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; rb_inc_page(cpu_buffer, &cpu_buffer->head_page); /* Finally update the reader page to the new head */ -- cgit v1.2.3 From 0e1ff5d72a6393f2ef5dbf74f58bb55a12d63834 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 Jan 2010 20:40:44 -0500 Subject: ring-buffer: Add rb_list_head() wrapper around new reader page next field If the very unlikely case happens where the writer moves the head by one between where the head page is read and where the new reader page is assigned _and_ the writer then writes and wraps the entire ring buffer so that the head page is back to what was originally read as the head page, the page to be swapped will have a corrupted next pointer. Simple solution is to wrap the assignment of the next pointer with a rb_list_head(). Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d5b7308b7e1..edefe3b2801 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2869,7 +2869,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * Splice the empty reader page into the list around the head. */ reader = rb_set_head_page(cpu_buffer); - cpu_buffer->reader_page->list.next = reader->list.next; + cpu_buffer->reader_page->list.next = rb_list_head(reader->list.next); cpu_buffer->reader_page->list.prev = reader->list.prev; /* -- cgit v1.2.3 From 751e9983ee276cb150e8812b1d995f6035a63878 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:53:02 +0800 Subject: ftrace: Fix MATCH_END_ONLY function filter For '*foo' pattern, we should allow any string ending with 'foo', but ftrace filter incorrectly disallows strings like bar_foo_foo: # echo '*io' > set_ftrace_filter # cat set_ftrace_filter | grep 'req_bio_endio' # cat available_filter_functions | grep 'req_bio_endio' req_bio_endio Signed-off-by: Li Zefan LKML-Reference: <4B4E870E.6060607@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7968762c816..1e6640f8045 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1690,7 +1690,7 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin) static int ftrace_match(char *str, char *regex, int len, int type) { int matched = 0; - char *ptr; + int slen; switch (type) { case MATCH_FULL: @@ -1706,8 +1706,8 @@ static int ftrace_match(char *str, char *regex, int len, int type) matched = 1; break; case MATCH_END_ONLY: - ptr = strstr(str, regex); - if (ptr && (ptr[len] == 0)) + slen = strlen(str); + if (slen >= len && memcmp(str + slen - len, regex, len) == 0) matched = 1; break; } -- cgit v1.2.3 From 285caad415f459f336247932b4db95a571357a02 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:53:21 +0800 Subject: tracing/filters: Fix MATCH_FRONT_ONLY filter matching MATCH_FRONT_ONLY actually is a full matching: # ./perf record -R -f -a -e lock:lock_acquire \ --filter 'name ~rcu_*' sleep 1 # ./perf trace (no output) We should pass the length of the pattern string to strncmp(). Signed-off-by: Li Zefan LKML-Reference: <4B4E8721.5090301@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 50504cb228d..11c3973e655 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -261,7 +261,7 @@ static int regex_match_full(char *str, struct regex *r, int len) static int regex_match_front(char *str, struct regex *r, int len) { - if (strncmp(str, r->pattern, len) == 0) + if (strncmp(str, r->pattern, r->len) == 0) return 1; return 0; } -- cgit v1.2.3 From a3291c14ecf0a995e30d993b7f2cae031de98727 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:53:41 +0800 Subject: tracing/filters: Fix MATCH_END_ONLY filter matching For '*foo' pattern, we should allow any string ending with 'foo', but event filtering incorrectly disallows strings like bar_foo_foo: Signed-off-by: Li Zefan LKML-Reference: <4B4E8735.6070604@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 11c3973e655..49e44dd1785 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -275,9 +275,10 @@ static int regex_match_middle(char *str, struct regex *r, int len) static int regex_match_end(char *str, struct regex *r, int len) { - char *ptr = strstr(str, r->pattern); + int strlen = len - 1; - if (ptr && (ptr[r->len] == 0)) + if (strlen >= r->len && + memcmp(str + strlen - r->len, r->pattern, r->len) == 0) return 1; return 0; } -- cgit v1.2.3 From b2af211f284eb1bef19fbb85fc8ef551bb1e7460 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:54:11 +0800 Subject: tracing/filters: Fix MATCH_MIDDLE_ONLY filter matching The @str might not be NULL-terminated if it's of type DYN_STRING or STATIC_STRING, so we should use strnstr() instead of strstr(). Signed-off-by: Li Zefan LKML-Reference: <4B4E8753.2000102@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 49e44dd1785..f364b085397 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -268,7 +268,7 @@ static int regex_match_front(char *str, struct regex *r, int len) static int regex_match_middle(char *str, struct regex *r, int len) { - if (strstr(str, r->pattern)) + if (strnstr(str, r->pattern, len)) return 1; return 0; } -- cgit v1.2.3 From 16da27a8bc7a0d050686d1b2e9efb53fab9ed226 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:54:27 +0800 Subject: tracing/filters: Fix MATCH_FULL filter matching for PTR_STRING MATCH_FULL matching for PTR_STRING is not working correctly: # echo 'func == vt' > events/bkl/lock_kernel/filter # echo 1 > events/bkl/lock_kernel/enable ... # cat trace Xorg-1484 [000] 1973.392586: lock_kernel: ... func=vt_ioctl() gpm-1402 [001] 1974.027740: lock_kernel: ... func=vt_ioctl() We should pass to regex.match(..., len) the length (including '\0') of the source string instead of the length of the pattern string. Signed-off-by: Li Zefan LKML-Reference: <4B4E8763.5070707@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f364b085397..60c2a4efad4 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -211,8 +211,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event, { char **addr = (char **)(event + pred->offset); int cmp, match; + int len = strlen(*addr) + 1; /* including tailing '\0' */ - cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); + cmp = pred->regex.match(*addr, &pred->regex, len); match = cmp ^ pred->not; @@ -782,10 +783,8 @@ static int filter_add_pred(struct filter_parse_state *ps, pred->regex.field_len = field->size; } else if (field->filter_type == FILTER_DYN_STRING) fn = filter_pred_strloc; - else { + else fn = filter_pred_pchar; - pred->regex.field_len = strlen(pred->regex.pattern); - } } else { if (field->is_signed) ret = strict_strtoll(pred->regex.pattern, 0, &val); -- cgit v1.2.3 From d1303dd1d6b220cab375f24fa91a5640e54e169e Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 14 Jan 2010 10:54:40 +0800 Subject: tracing/filters: Add comment for match callbacks We should be clear on 2 things: - the length parameter of a match callback includes tailing '\0'. - the string to be searched might not be NULL-terminated. Signed-off-by: Li Zefan LKML-Reference: <4B4E8770.7000608@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_events_filter.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 60c2a4efad4..e42af9aad69 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -252,7 +252,18 @@ static int filter_pred_none(struct filter_pred *pred, void *event, return 0; } -/* Basic regex callbacks */ +/* + * regex_match_foo - Basic regex callbacks + * + * @str: the string to be searched + * @r: the regex structure containing the pattern string + * @len: the length of the string to be searched (including '\0') + * + * Note: + * - @str might not be NULL-terminated if it's of type DYN_STRING + * or STATIC_STRING + */ + static int regex_match_full(char *str, struct regex *r, int len) { if (strncmp(str, r->pattern, len) == 0) -- cgit v1.2.3 From 74bf4076f2ed79b5510440b72a561823a8852ec0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 25 Jan 2010 15:11:53 -0500 Subject: tracing: Prevent kernel oops with corrupted buffer If the contents of the ftrace ring buffer gets corrupted and the trace file is read, it could create a kernel oops (usualy just killing the user task thread). This is caused by the checking of the pid in the buffer. If the pid is negative, it still references the cmdline cache array, which could point to an invalid address. The simple fix is to test for negative PIDs. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0df1b0f2cb9..eac6875cb99 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -951,6 +951,11 @@ void trace_find_cmdline(int pid, char comm[]) return; } + if (WARN_ON_ONCE(pid < 0)) { + strcpy(comm, ""); + return; + } + if (pid > PID_MAX_DEFAULT) { strcpy(comm, "<...>"); return; -- cgit v1.2.3 From 492a74f4210e15f4701422e2e1c4cd3c1e45ddae Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 25 Jan 2010 15:17:47 -0500 Subject: ring-buffer: Check if ring buffer iterator has stale data Usually reads of the ring buffer is performed by a single task. There are two types of reads from the ring buffer. One is a consuming read which will consume the entry that was read and the next read will be the entry that follows. The other is an iterator that will let the user read the contents of the ring buffer without modifying it. When an iterator is allocated, writes to the ring buffer are disabled to protect the iterator. The problem exists when consuming reads happen while an iterator is allocated. Specifically, the kind of read that swaps out an entire page (used by splice) and replaces it with a new read. If the iterator is on the page that is swapped out, then the next read may read from this swapped out page and return garbage. This patch adds a check when reading the iterator to make sure that the iterator contents are still valid. If a consuming read has taken place, the iterator is reset. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index edefe3b2801..503b630e0bd 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -464,6 +464,8 @@ struct ring_buffer_iter { struct ring_buffer_per_cpu *cpu_buffer; unsigned long head; struct buffer_page *head_page; + struct buffer_page *cache_reader_page; + unsigned long cache_read; u64 read_stamp; }; @@ -2716,6 +2718,8 @@ static void rb_iter_reset(struct ring_buffer_iter *iter) iter->read_stamp = cpu_buffer->read_stamp; else iter->read_stamp = iter->head_page->page->time_stamp; + iter->cache_reader_page = cpu_buffer->reader_page; + iter->cache_read = cpu_buffer->read; } /** @@ -3066,6 +3070,15 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) cpu_buffer = iter->cpu_buffer; buffer = cpu_buffer->buffer; + /* + * Check if someone performed a consuming read to + * the buffer. A consuming read invalidates the iterator + * and we need to reset the iterator in this case. + */ + if (unlikely(iter->cache_read != cpu_buffer->read || + iter->cache_reader_page != cpu_buffer->reader_page)) + rb_iter_reset(iter); + again: /* * We repeat when a timestamp is encountered. -- cgit v1.2.3 From 3c05d7482777f15e71bb4cb1ba78dee2800dfec6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 26 Jan 2010 16:14:08 -0500 Subject: ring-buffer: Check for end of page in iterator If the iterator comes to an empty page for some reason, or if the page is emptied by a consuming read. The iterator code currently does not check if the iterator is pass the contents, and may return a false entry. This patch adds a check to the ring buffer iterator to test if the current page has been completely read and sets the iterator to the next page if necessary. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 503b630e0bd..8c1b2d29071 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3064,9 +3064,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) struct ring_buffer_event *event; int nr_loops = 0; - if (ring_buffer_iter_empty(iter)) - return NULL; - cpu_buffer = iter->cpu_buffer; buffer = cpu_buffer->buffer; @@ -3080,6 +3077,9 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) rb_iter_reset(iter); again: + if (ring_buffer_iter_empty(iter)) + return NULL; + /* * We repeat when a timestamp is encountered. * We can get multiple timestamps by nested interrupts or also @@ -3094,6 +3094,11 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) if (rb_per_cpu_empty(cpu_buffer)) return NULL; + if (iter->head >= local_read(&iter->head_page->page->commit)) { + rb_inc_iter(iter); + goto again; + } + event = rb_iter_head_event(iter); switch (event->type_len) { -- cgit v1.2.3 From 03688970347bfea32823953a7ce5886d1713205f Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 22 Jan 2010 08:12:47 -0500 Subject: tracing/documentation: Cover new frame pointer semantics Update the graph tracer examples to cover the new frame pointer semantics (in terms of passing it along). Move the HAVE_FUNCTION_GRAPH_FP_TEST docs out of the Kconfig, into the right place, and expand on the details. Signed-off-by: Mike Frysinger LKML-Reference: <1264165967-18938-1-git-send-email-vapier@gentoo.org> Signed-off-by: Steven Rostedt --- kernel/trace/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f28..60e2ce0181e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -27,9 +27,7 @@ config HAVE_FUNCTION_GRAPH_TRACER config HAVE_FUNCTION_GRAPH_FP_TEST bool help - An arch may pass in a unique value (frame pointer) to both the - entering and exiting of a function. On exit, the value is compared - and if it does not match, then it will panic the kernel. + See Documentation/trace/ftrace-design.txt config HAVE_FUNCTION_TRACE_MCOUNT_TEST bool -- cgit v1.2.3 From 4f48f8b7fd18c44f8478174f9925cc3c059c6ce4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 2 Feb 2010 15:32:09 +0800 Subject: tracing: Fix circular dead lock in stack trace When we cat /tracing/stack_trace, we may cause circular lock: sys_read() t_start() arch_spin_lock(&max_stack_lock); t_show() seq_printf(), vsnprintf() .... /* they are all trace-able, when they are traced, max_stack_lock may be required again. */ The following script can trigger this circular dead lock very easy: #!/bin/bash echo 1 > /proc/sys/kernel/stack_tracer_enabled mount -t debugfs xxx /mnt > /dev/null 2>&1 ( # make check_stack() zealous to require max_stack_lock for ((; ;)) { echo 1 > /mnt/tracing/stack_max_size } ) & for ((; ;)) { cat /mnt/tracing/stack_trace > /dev/null } To fix this bug, we increase the percpu trace_active before require the lock. Reported-by: Li Zefan Signed-off-by: Lai Jiangshan LKML-Reference: <4B67D4F9.9080905@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_stack.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 678a5120ee3..f4bc9b27de5 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -157,6 +157,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, unsigned long val, flags; char buf[64]; int ret; + int cpu; if (count >= sizeof(buf)) return -EINVAL; @@ -171,9 +172,20 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, return ret; local_irq_save(flags); + + /* + * In case we trace inside arch_spin_lock() or after (NMI), + * we will cause circular lock, so we also need to increase + * the percpu trace_active here. + */ + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)++; + arch_spin_lock(&max_stack_lock); *ptr = val; arch_spin_unlock(&max_stack_lock); + + per_cpu(trace_active, cpu)--; local_irq_restore(flags); return count; @@ -206,7 +218,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { + int cpu; + local_irq_disable(); + + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)++; + arch_spin_lock(&max_stack_lock); if (*pos == 0) @@ -217,7 +235,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void t_stop(struct seq_file *m, void *p) { + int cpu; + arch_spin_unlock(&max_stack_lock); + + cpu = smp_processor_id(); + per_cpu(trace_active, cpu)--; + local_irq_enable(); } -- cgit v1.2.3 From a9bb18f36c8056f0712fb28c52c0f85d98438dfb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Feb 2010 17:23:47 +0100 Subject: tracing/kprobes: Fix probe parsing Trying to add a probe like: echo p:myprobe 0x10000 > /sys/kernel/debug/tracing/kprobe_events will fail since the wrong pointer is passed to strict_strtoul when trying to convert the address to an unsigned long. Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <20100210162346.GA6933@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ea90c0e2c9..50b1b823980 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -689,7 +689,7 @@ static int create_trace_probe(int argc, char **argv) return -EINVAL; } /* an address specified */ - ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); + ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr); if (ret) { pr_info("Failed to parse address.\n"); return ret; -- cgit v1.2.3