From 39e6dd73502f64e2ae3236b304e160ae30de9384 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Aug 2009 15:26:32 -0300 Subject: perf record: Fix typo in pid_synthesize_comm_event We were using 'fd' locally, but there was a global 'fd' too, so when converting from open to fopen the test made against fd should be made against 'fp', but since we have that global it didnt get discovered ... Reported-by: Ulrich Drepper Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra LKML-Reference: <20090814182632.GF3490@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3d051b9cf25..89a5ddcd1de 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -219,7 +219,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full) snprintf(filename, sizeof(filename), "/proc/%d/status", pid); fp = fopen(filename, "r"); - if (fd == NULL) { + if (fp == NULL) { /* * We raced with a task exiting - just return: */ -- cgit v1.2.3 From 2932cffc89e9a1476b28a59896fa4f81e0d4f131 Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Mon, 17 Aug 2009 00:36:21 +0200 Subject: perf: Rename perf-examples.txt to examples.txt Rename it to examples.txt to avoid the perf-*.txt pattern in the Makefile, otherwise 'make doc' fails because perf-examples.txt is not formatted to be a man page: ERROR: perf-examples.txt: line 1: manpage document title is mandatory Signed-off-by: Carlos R. Mafra Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Documentation/examples.txt | 225 +++++++++++++++++++++++++++++ tools/perf/Documentation/perf-examples.txt | 225 ----------------------------- 2 files changed, 225 insertions(+), 225 deletions(-) create mode 100644 tools/perf/Documentation/examples.txt delete mode 100644 tools/perf/Documentation/perf-examples.txt (limited to 'tools') diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt new file mode 100644 index 00000000000..8eb6c489fb1 --- /dev/null +++ b/tools/perf/Documentation/examples.txt @@ -0,0 +1,225 @@ + + ------------------------------ + ****** perf by examples ****** + ------------------------------ + +[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] + + +First, discovery/enumeration of available counters can be done via +'perf list': + +titan:~> perf list + [...] + kmem:kmalloc [Tracepoint event] + kmem:kmem_cache_alloc [Tracepoint event] + kmem:kmalloc_node [Tracepoint event] + kmem:kmem_cache_alloc_node [Tracepoint event] + kmem:kfree [Tracepoint event] + kmem:kmem_cache_free [Tracepoint event] + kmem:mm_page_free_direct [Tracepoint event] + kmem:mm_pagevec_free [Tracepoint event] + kmem:mm_page_alloc [Tracepoint event] + kmem:mm_page_alloc_zone_locked [Tracepoint event] + kmem:mm_page_pcpu_drain [Tracepoint event] + kmem:mm_page_alloc_extfrag [Tracepoint event] + +Then any (or all) of the above event sources can be activated and +measured. For example the page alloc/free properties of a 'hackbench +run' are: + + titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc + -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 + Time: 0.575 + + Performance counter stats for './hackbench 10': + + 13857 kmem:mm_page_pcpu_drain + 27576 kmem:mm_page_alloc + 6025 kmem:mm_pagevec_free + 20934 kmem:mm_page_free_direct + + 0.613972165 seconds time elapsed + +You can observe the statistical properties as well, by using the +'repeat the workload N times' feature of perf stat: + + titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct ./hackbench 10 + Time: 0.627 + Time: 0.644 + Time: 0.564 + Time: 0.559 + Time: 0.626 + + Performance counter stats for './hackbench 10' (5 runs): + + 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) + 25035 kmem:mm_page_alloc ( +- 3.783% ) + 6104 kmem:mm_pagevec_free ( +- 0.934% ) + 18376 kmem:mm_page_free_direct ( +- 4.941% ) + + 0.643954516 seconds time elapsed ( +- 2.363% ) + +Furthermore, these tracepoints can be used to sample the workload as +well. For example the page allocations done by a 'git gc' can be +captured the following way: + + titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] + +To check which functions generated page allocations: + + titan:~/git> perf report + # Samples: 10646 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.57% git-repack /lib64/libc-2.5.so + 21.81% git /lib64/libc-2.5.so + 14.59% git ./git + 11.79% git-repack ./git + 7.12% git /lib64/ld-2.5.so + 3.16% git-repack /lib64/libpthread-2.5.so + 2.09% git-repack /bin/bash + 1.97% rm /lib64/libc-2.5.so + 1.39% mv /lib64/ld-2.5.so + 1.37% mv /lib64/libc-2.5.so + 1.12% git-repack /lib64/ld-2.5.so + 0.95% rm /lib64/ld-2.5.so + 0.90% git-update-serv /lib64/libc-2.5.so + 0.73% git-update-serv /lib64/ld-2.5.so + 0.68% perf /lib64/libpthread-2.5.so + 0.64% git-repack /usr/lib64/libz.so.1.2.3 + +Or to see it on a more finegrained level: + +titan:~/git> perf report --sort comm,dso,symbol +# Samples: 10646 +# +# Overhead Command Shared Object Symbol +# ........ ............... .......................... ...... +# + 9.35% git-repack ./git [.] insert_obj_hash + 9.12% git ./git [.] insert_obj_hash + 7.31% git /lib64/libc-2.5.so [.] memcpy + 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc + 6.24% git-repack /lib64/libc-2.5.so [.] memcpy + 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork + 5.47% git /lib64/libc-2.5.so [.] _int_malloc + 2.99% git /lib64/libc-2.5.so [.] memset + +Furthermore, call-graph sampling can be done too, of page +allocations - to see precisely what kind of page allocations there +are: + + titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] + + titan:~/git> perf report -g + # Samples: 10686 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.25% git-repack /lib64/libc-2.5.so + | + |--50.00%-- _int_free + | + |--37.50%-- __GI___fork + | make_child + | + |--12.50%-- ptmalloc_unlock_all2 + | make_child + | + --6.25%-- __GI_strcpy + 21.61% git /lib64/libc-2.5.so + | + |--30.00%-- __GI_read + | | + | --83.33%-- git_config_from_file + | git_config + | | + [...] + +Or you can observe the whole system's page allocations for 10 +seconds: + +titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e +kmem:mm_page_alloc -e kmem:mm_pagevec_free -e +kmem:mm_page_free_direct sleep 10 + + Performance counter stats for 'sleep 10': + + 171585 kmem:mm_page_pcpu_drain + 322114 kmem:mm_page_alloc + 73623 kmem:mm_pagevec_free + 254115 kmem:mm_page_free_direct + + 10.000591410 seconds time elapsed + +Or observe how fluctuating the page allocations are, via statistical +analysis done over ten 1-second intervals: + + titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct sleep 1 + + Performance counter stats for 'sleep 1' (10 runs): + + 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) + 34394 kmem:mm_page_alloc ( +- 4.617% ) + 7509 kmem:mm_pagevec_free ( +- 4.820% ) + 25653 kmem:mm_page_free_direct ( +- 3.672% ) + + 1.058135029 seconds time elapsed ( +- 3.089% ) + +Or you can annotate the recorded 'git gc' run on a per symbol basis +and check which instructions/source-code generated page allocations: + + titan:~/git> perf annotate __GI___fork + ------------------------------------------------ + Percent | Source code & Disassembly of libc-2.5.so + ------------------------------------------------ + : + : + : Disassembly of section .plt: + : Disassembly of section .text: + : + : 00000031a2e95560 <__fork>: + [...] + 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax + 0.00 : 31a2e95607: 0f 05 syscall + 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax + 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> + 0.00 : 31a2e95615: 85 c0 test %eax,%eax + +( this shows that 83.42% of __GI___fork's page allocations come from + the 0x38 system call it performs. ) + +etc. etc. - a lot more is possible. I could list a dozen of +other different usecases straight away - neither of which is +possible via /proc/vmstat. + +/proc/vmstat is not in the same league really, in terms of +expressive power of system analysis and performance +analysis. + +All that the above results needed were those new tracepoints +in include/tracing/events/kmem.h. + + Ingo + + diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/perf-examples.txt deleted file mode 100644 index 8eb6c489fb1..00000000000 --- a/tools/perf/Documentation/perf-examples.txt +++ /dev/null @@ -1,225 +0,0 @@ - - ------------------------------ - ****** perf by examples ****** - ------------------------------ - -[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] - - -First, discovery/enumeration of available counters can be done via -'perf list': - -titan:~> perf list - [...] - kmem:kmalloc [Tracepoint event] - kmem:kmem_cache_alloc [Tracepoint event] - kmem:kmalloc_node [Tracepoint event] - kmem:kmem_cache_alloc_node [Tracepoint event] - kmem:kfree [Tracepoint event] - kmem:kmem_cache_free [Tracepoint event] - kmem:mm_page_free_direct [Tracepoint event] - kmem:mm_pagevec_free [Tracepoint event] - kmem:mm_page_alloc [Tracepoint event] - kmem:mm_page_alloc_zone_locked [Tracepoint event] - kmem:mm_page_pcpu_drain [Tracepoint event] - kmem:mm_page_alloc_extfrag [Tracepoint event] - -Then any (or all) of the above event sources can be activated and -measured. For example the page alloc/free properties of a 'hackbench -run' are: - - titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc - -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 - Time: 0.575 - - Performance counter stats for './hackbench 10': - - 13857 kmem:mm_page_pcpu_drain - 27576 kmem:mm_page_alloc - 6025 kmem:mm_pagevec_free - 20934 kmem:mm_page_free_direct - - 0.613972165 seconds time elapsed - -You can observe the statistical properties as well, by using the -'repeat the workload N times' feature of perf stat: - - titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e - kmem:mm_page_alloc -e kmem:mm_pagevec_free -e - kmem:mm_page_free_direct ./hackbench 10 - Time: 0.627 - Time: 0.644 - Time: 0.564 - Time: 0.559 - Time: 0.626 - - Performance counter stats for './hackbench 10' (5 runs): - - 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) - 25035 kmem:mm_page_alloc ( +- 3.783% ) - 6104 kmem:mm_pagevec_free ( +- 0.934% ) - 18376 kmem:mm_page_free_direct ( +- 4.941% ) - - 0.643954516 seconds time elapsed ( +- 2.363% ) - -Furthermore, these tracepoints can be used to sample the workload as -well. For example the page allocations done by a 'git gc' can be -captured the following way: - - titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc - Counting objects: 1148, done. - Delta compression using up to 2 threads. - Compressing objects: 100% (450/450), done. - Writing objects: 100% (1148/1148), done. - Total 1148 (delta 690), reused 1148 (delta 690) - [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] - -To check which functions generated page allocations: - - titan:~/git> perf report - # Samples: 10646 - # - # Overhead Command Shared Object - # ........ ............... .......................... - # - 23.57% git-repack /lib64/libc-2.5.so - 21.81% git /lib64/libc-2.5.so - 14.59% git ./git - 11.79% git-repack ./git - 7.12% git /lib64/ld-2.5.so - 3.16% git-repack /lib64/libpthread-2.5.so - 2.09% git-repack /bin/bash - 1.97% rm /lib64/libc-2.5.so - 1.39% mv /lib64/ld-2.5.so - 1.37% mv /lib64/libc-2.5.so - 1.12% git-repack /lib64/ld-2.5.so - 0.95% rm /lib64/ld-2.5.so - 0.90% git-update-serv /lib64/libc-2.5.so - 0.73% git-update-serv /lib64/ld-2.5.so - 0.68% perf /lib64/libpthread-2.5.so - 0.64% git-repack /usr/lib64/libz.so.1.2.3 - -Or to see it on a more finegrained level: - -titan:~/git> perf report --sort comm,dso,symbol -# Samples: 10646 -# -# Overhead Command Shared Object Symbol -# ........ ............... .......................... ...... -# - 9.35% git-repack ./git [.] insert_obj_hash - 9.12% git ./git [.] insert_obj_hash - 7.31% git /lib64/libc-2.5.so [.] memcpy - 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc - 6.24% git-repack /lib64/libc-2.5.so [.] memcpy - 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork - 5.47% git /lib64/libc-2.5.so [.] _int_malloc - 2.99% git /lib64/libc-2.5.so [.] memset - -Furthermore, call-graph sampling can be done too, of page -allocations - to see precisely what kind of page allocations there -are: - - titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc - Counting objects: 1148, done. - Delta compression using up to 2 threads. - Compressing objects: 100% (450/450), done. - Writing objects: 100% (1148/1148), done. - Total 1148 (delta 690), reused 1148 (delta 690) - [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] - - titan:~/git> perf report -g - # Samples: 10686 - # - # Overhead Command Shared Object - # ........ ............... .......................... - # - 23.25% git-repack /lib64/libc-2.5.so - | - |--50.00%-- _int_free - | - |--37.50%-- __GI___fork - | make_child - | - |--12.50%-- ptmalloc_unlock_all2 - | make_child - | - --6.25%-- __GI_strcpy - 21.61% git /lib64/libc-2.5.so - | - |--30.00%-- __GI_read - | | - | --83.33%-- git_config_from_file - | git_config - | | - [...] - -Or you can observe the whole system's page allocations for 10 -seconds: - -titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e -kmem:mm_page_alloc -e kmem:mm_pagevec_free -e -kmem:mm_page_free_direct sleep 10 - - Performance counter stats for 'sleep 10': - - 171585 kmem:mm_page_pcpu_drain - 322114 kmem:mm_page_alloc - 73623 kmem:mm_pagevec_free - 254115 kmem:mm_page_free_direct - - 10.000591410 seconds time elapsed - -Or observe how fluctuating the page allocations are, via statistical -analysis done over ten 1-second intervals: - - titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e - kmem:mm_page_alloc -e kmem:mm_pagevec_free -e - kmem:mm_page_free_direct sleep 1 - - Performance counter stats for 'sleep 1' (10 runs): - - 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) - 34394 kmem:mm_page_alloc ( +- 4.617% ) - 7509 kmem:mm_pagevec_free ( +- 4.820% ) - 25653 kmem:mm_page_free_direct ( +- 3.672% ) - - 1.058135029 seconds time elapsed ( +- 3.089% ) - -Or you can annotate the recorded 'git gc' run on a per symbol basis -and check which instructions/source-code generated page allocations: - - titan:~/git> perf annotate __GI___fork - ------------------------------------------------ - Percent | Source code & Disassembly of libc-2.5.so - ------------------------------------------------ - : - : - : Disassembly of section .plt: - : Disassembly of section .text: - : - : 00000031a2e95560 <__fork>: - [...] - 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax - 0.00 : 31a2e95607: 0f 05 syscall - 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax - 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> - 0.00 : 31a2e95615: 85 c0 test %eax,%eax - -( this shows that 83.42% of __GI___fork's page allocations come from - the 0x38 system call it performs. ) - -etc. etc. - a lot more is possible. I could list a dozen of -other different usecases straight away - neither of which is -possible via /proc/vmstat. - -/proc/vmstat is not in the same league really, in terms of -expressive power of system analysis and performance -analysis. - -All that the above results needed were those new tracepoints -in include/tracing/events/kmem.h. - - Ingo - - -- cgit v1.2.3 From 15f3fa4e7f608c5ce19187b3b4a953222fdfa751 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 18 Aug 2009 13:52:28 +0200 Subject: perf annotate: Fix segmentation fault Linus reported this perf annotate segfault: [torvalds@nehalem git]$ perf annotate unmap_vmas Segmentation fault #0 map__clone (self=) at builtin-annotate.c:236 #1 thread__fork (self=) at builtin-annotate.c:372 The bug here was that builtin-annotate.c was a copy of builtin-report.c and a threading related fix to builtin-report.c didnt get propagated to builtin-annotate.c ... Reported-by: Linus Torvalds Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1dba568e194..343e7b14bf0 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -980,6 +980,13 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)(event->header.size), event->fork.pid, event->fork.ppid); + /* + * A thread clone will have the same PID for both + * parent and child. + */ + if (thread == parent) + return 0; + if (!thread || !parent || thread__fork(thread, parent)) { dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); return -1; -- cgit v1.2.3 From b395cd8a74b4a8d943dd4b5585e676f62f7350b3 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 18 Aug 2009 12:41:25 -0400 Subject: perf tools: Make 'make html' work pushd tools/perf/Documentation make html popd is failing for me... ASCIIDOC perf-annotate.html ERROR: unsafe: include file: /etc/asciidoc/./stylesheets/xhtml11.css ERROR: unsafe: include file: /etc/asciidoc/./stylesheets/xhtml11-manpage.css ERROR: unsafe: include file: /etc/asciidoc/./stylesheets/xhtml11-quirks.css make: *** [perf-annotate.html] Error 1 Apparently asciidoc "unsafe" is the default mode of operation in practice. https://bugzilla.redhat.com/show_bug.cgi?id=506953 Works tidily now. Signed-off-by: Kyle McMartin Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20090818164125.GM25206@bombadil.infradead.org> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 5457192e1b4..bdd3b7ecad0 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -35,7 +35,7 @@ man7dir=$(mandir)/man7 # DESTDIR= ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = +ASCIIDOC_EXTRA = --unsafe MANPAGE_XSL = manpage-normal.xsl XMLTO_EXTRA = INSTALL?=install -- cgit v1.2.3