From c1f47b454ce759d7b13604137a233cad4617e1e8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Jun 2009 13:58:51 +0200 Subject: perf_counter tools: Fix vmlinux fallback when running on a different kernel Lucas De Marchi reported that perf report and perf annotate displays mismatching profile if a perf.data is analyzed on an older kernel - even if the correct vmlinux is specified via the -k option. The reason is the fallback path in util/symbol.c:dso__load_kernel(): int dso__load_kernel(struct dso *self, const char *vmlinux, symbol_filter_t filter, int verbose) { int err = -1; if (vmlinux) err = dso__load_vmlinux(self, vmlinux, filter, verbose); if (err) err = dso__load_kallsyms(self, filter, verbose); return err; } dso__load_vmlinux() returns negative on error, but on success it returns the number of symbols loaded - which confuses the function to load the kallsyms. This is normally harmless, as reporting is usually performed on the same kernel that is analyzed - but if there's a mismatch then we load the wrong kallsyms and create a non-sensical symbol tree. The fix is to only fall back to kallsyms on errors. Reported-by: Lucas De Marchi Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 86e14375e74..01b62fa0399 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -629,7 +629,7 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, if (vmlinux) err = dso__load_vmlinux(self, vmlinux, filter, verbose); - if (err) + if (err < 0) err = dso__load_kallsyms(self, filter, verbose); return err; -- cgit v1.2.3 From 51e268423151fc7bb41945bde7843160b6a14c32 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 16:43:14 +0530 Subject: perf_counter tools: Define separate declarations for H/W and S/W events Define separate declarations for H/W and S/W events to: 1. Shorten name to save some space so that we can add more members 2. Fix alignment 3. Avoid declaring HARDWARE/SOFTWARE again and again. Removed unused CR(x, y) Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245669194.17153.6.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 44 +++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 35d04da38d6..12abab3a0d6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -18,30 +18,30 @@ struct event_symbol { char *symbol; }; -#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y -#define CR(x, y) .type = PERF_TYPE_##x, .config = y +#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x +#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, - { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, - { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, - { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, - { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, - { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, - { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, - - { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, - { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, + { CHW(CPU_CYCLES), "cpu-cycles", }, + { CHW(CPU_CYCLES), "cycles", }, + { CHW(INSTRUCTIONS), "instructions", }, + { CHW(CACHE_REFERENCES), "cache-references", }, + { CHW(CACHE_MISSES), "cache-misses", }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, + { CHW(BRANCH_INSTRUCTIONS), "branches", }, + { CHW(BRANCH_MISSES), "branch-misses", }, + { CHW(BUS_CYCLES), "bus-cycles", }, + + { CSW(CPU_CLOCK), "cpu-clock", }, + { CSW(TASK_CLOCK), "task-clock", }, + { CSW(PAGE_FAULTS), "page-faults", }, + { CSW(PAGE_FAULTS), "faults", }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", }, + { CSW(CONTEXT_SWITCHES), "context-switches", }, + { CSW(CONTEXT_SWITCHES), "cs", }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", }, + { CSW(CPU_MIGRATIONS), "migrations", }, }; #define __PERF_COUNTER_FIELD(config, name) \ -- cgit v1.2.3 From 74d5b5889ea71a95d8924c08f8a7c6e2bdcbc0ba Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 16:44:28 +0530 Subject: perf_counter tools: Introduce alias member in event_symbol By introducing alias member in event_symbol : 1. duplicate lines are removed, like: cpu-cycles and cycles branch-instructions and branches context-switches and cs cpu-migrations and migrations 2. We can also add alias for another events. Now ./perf list looks like : List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults [Software event] faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] rNNN [raw hardware event descriptor] Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245669268.17153.8.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 63 +++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 12abab3a0d6..f5695486ad3 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -16,32 +16,29 @@ struct event_symbol { u8 type; u64 config; char *symbol; + char *alias; }; #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x static struct event_symbol event_symbols[] = { - { CHW(CPU_CYCLES), "cpu-cycles", }, - { CHW(CPU_CYCLES), "cycles", }, - { CHW(INSTRUCTIONS), "instructions", }, - { CHW(CACHE_REFERENCES), "cache-references", }, - { CHW(CACHE_MISSES), "cache-misses", }, - { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", }, - { CHW(BRANCH_INSTRUCTIONS), "branches", }, - { CHW(BRANCH_MISSES), "branch-misses", }, - { CHW(BUS_CYCLES), "bus-cycles", }, - - { CSW(CPU_CLOCK), "cpu-clock", }, - { CSW(TASK_CLOCK), "task-clock", }, - { CSW(PAGE_FAULTS), "page-faults", }, - { CSW(PAGE_FAULTS), "faults", }, - { CSW(PAGE_FAULTS_MIN), "minor-faults", }, - { CSW(PAGE_FAULTS_MAJ), "major-faults", }, - { CSW(CONTEXT_SWITCHES), "context-switches", }, - { CSW(CONTEXT_SWITCHES), "cs", }, - { CSW(CPU_MIGRATIONS), "cpu-migrations", }, - { CSW(CPU_MIGRATIONS), "migrations", }, + { CHW(CPU_CYCLES), "cpu-cycles", "cycles" }, + { CHW(INSTRUCTIONS), "instructions", "" }, + { CHW(CACHE_REFERENCES), "cache-references", "" }, + { CHW(CACHE_MISSES), "cache-misses", "" }, + { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" }, + { CHW(BRANCH_MISSES), "branch-misses", "" }, + { CHW(BUS_CYCLES), "bus-cycles", "" }, + + { CSW(CPU_CLOCK), "cpu-clock", "" }, + { CSW(TASK_CLOCK), "task-clock", "" }, + { CSW(PAGE_FAULTS), "page-faults", "" }, + { CSW(PAGE_FAULTS), "faults", "" }, + { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, + { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, + { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, + { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, }; #define __PERF_COUNTER_FIELD(config, name) \ @@ -196,6 +193,19 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a return 0; } +static int check_events(const char *str, unsigned int i) +{ + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return 1; + + if (strlen(event_symbols[i].alias)) + if (!strncmp(str, event_symbols[i].alias, + strlen(event_symbols[i].alias))) + return 1; + return 0; +} + /* * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. @@ -235,9 +245,7 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) } for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) { - + if (check_events(str, i)) { attr->type = event_symbols[i].type; attr->config = event_symbols[i].config; @@ -289,6 +297,7 @@ void print_events(void) { struct event_symbol *syms = event_symbols; unsigned int i, type, prev_type = -1; + char name[40]; fprintf(stderr, "\n"); fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); @@ -301,14 +310,18 @@ void print_events(void) if (type != prev_type) fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [%s]\n", syms->symbol, + if (strlen(syms->alias)) + sprintf(name, "%s OR %s", syms->symbol, syms->alias); + else + strcpy(name, syms->symbol); + fprintf(stderr, " %-40s [%s]\n", name, event_type_descriptors[type]); prev_type = type; } fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [raw hardware event descriptor]\n", + fprintf(stderr, " %-40s [raw hardware event descriptor]\n", "rNNN"); fprintf(stderr, "\n"); -- cgit v1.2.3 From 520f2c346af463fa00924b236e092da482b344cc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2009 16:52:51 +0200 Subject: perf report: Output more symbol related debug data Print more symbol relocation related info under -vv. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 +++-- tools/perf/util/symbol.c | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5eb5566f0c9..ec230a0146e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -797,7 +797,7 @@ resolve_symbol(struct thread *thread, struct map **mapp, { struct dso *dso = dsop ? *dsop : NULL; struct map *map = mapp ? *mapp : NULL; - uint64_t ip = *ipp; + u64 ip = *ipp; if (!thread) return NULL; @@ -814,7 +814,6 @@ resolve_symbol(struct thread *thread, struct map **mapp, *mapp = map; got_map: ip = map->map_ip(map, ip); - *ipp = ip; dso = map->dso; } else { @@ -828,6 +827,8 @@ got_map: dso = kernel_dso; } dprintf(" ...... dso: %s\n", dso ? dso->name : ""); + dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip); + *ipp = ip; if (dsop) *dsop = dso; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 01b62fa0399..9c659ef6aec 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -535,6 +535,10 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, gelf_getshdr(sec, &shdr); obj_start = sym.st_value; + if (verbose >= 2) + printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", + (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; f = symbol__new(sym.st_value, sym.st_size, -- cgit v1.2.3 From c0c22dbfa8ba3c5045eeb9c76d2822ffc44fefc3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 22 Jun 2009 20:47:26 +0530 Subject: perf_counter tools: Set alias for page-faults "faults" should be alias for "page-faults" Also fixed alignment and 80 characters issue Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245683846.12092.1.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f5695486ad3..06af2fadcd8 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -33,8 +33,7 @@ static struct event_symbol event_symbols[] = { { CSW(CPU_CLOCK), "cpu-clock", "" }, { CSW(TASK_CLOCK), "task-clock", "" }, - { CSW(PAGE_FAULTS), "page-faults", "" }, - { CSW(PAGE_FAULTS), "faults", "" }, + { CSW(PAGE_FAULTS), "page-faults", "faults" }, { CSW(PAGE_FAULTS_MIN), "minor-faults", "" }, { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, @@ -71,24 +70,24 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 -static char *hw_cache [][MAX_ALIASES] = { - { "L1-data" , "l1-d", "l1d" }, - { "L1-instruction" , "l1-i", "l1i" }, - { "L2" , "l2" }, - { "Data-TLB" , "dtlb", "d-tlb" }, - { "Instruction-TLB" , "itlb", "i-tlb" }, - { "Branch" , "bpu" , "btb", "bpc" }, +static char *hw_cache[][MAX_ALIASES] = { + { "L1-data", "l1-d", "l1d" }, + { "L1-instruction", "l1-i", "l1i" }, + { "L2", "l2" }, + { "Data-TLB", "dtlb", "d-tlb" }, + { "Instruction-TLB", "itlb", "i-tlb" }, + { "Branch", "bpu" , "btb", "bpc" }, }; -static char *hw_cache_op [][MAX_ALIASES] = { - { "Load" , "read" }, - { "Store" , "write" }, - { "Prefetch" , "speculative-read", "speculative-load" }, +static char *hw_cache_op[][MAX_ALIASES] = { + { "Load", "read" }, + { "Store", "write" }, + { "Prefetch", "speculative-read", "speculative-load" }, }; -static char *hw_cache_result [][MAX_ALIASES] = { - { "Reference" , "ops", "access" }, - { "Miss" }, +static char *hw_cache_result[][MAX_ALIASES] = { + { "Reference", "ops", "access" }, + { "Miss" }, }; char *event_name(int counter) @@ -160,7 +159,8 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) return -1; } -static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) +static int +parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) { int cache_type = -1, cache_op = 0, cache_result = 0; @@ -201,7 +201,7 @@ static int check_events(const char *str, unsigned int i) if (strlen(event_symbols[i].alias)) if (!strncmp(str, event_symbols[i].alias, - strlen(event_symbols[i].alias))) + strlen(event_symbols[i].alias))) return 1; return 0; } -- cgit v1.2.3 From dee412066aeb16c43cf31599948c1a1de385df56 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 23 Jun 2009 02:22:39 +0530 Subject: perf stat: Fix command option / manpage -l is not supported, it should be -S for scale. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245703959.6167.16.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-stat.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index c368a72721d..0d74346d21a 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics SYNOPSIS -------- [verse] -'perf stat' [-e | --event=EVENT] [-l] [-a] -'perf stat' [-e | --event=EVENT] [-l] [-a] -- [] +'perf stat' [-e | --event=EVENT] [-S] [-a] +'perf stat' [-e | --event=EVENT] [-S] [-a] -- [] DESCRIPTION ----------- @@ -40,7 +40,7 @@ OPTIONS -a:: system-wide collection --l:: +-S:: scale counter values EXAMPLES -- cgit v1.2.3 From 3d906ef10a539ff336010afab8f6f9c4fe379695 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jun 2009 11:23:07 +0200 Subject: perf_counter tools: Handle overlapping MMAP events Martin Schwidefsky reported "perf report" symbol resolution problems on S390. Since we only report MMAP, not MUNMAP, we have to deal with overlapping maps. We used to simply throw out the old map on the assumption whole maps got unmapped. This obviously doesn't deal with partial unmaps. However it appears some dynamic linkers do fancy partial unmaps (s390), so do something more elaborate and truncate the old maps, only removing them when they've been fully covered. This resolves (part of) the S390 symbol resolution problems. Reported-by: Martin Schwidefsky Tested-by: Martin Schwidefsky Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ec230a0146e..b4e76f75ba8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -400,9 +400,27 @@ static void thread__insert_map(struct thread *self, struct map *map) list_for_each_entry_safe(pos, tmp, &self->maps, node) { if (map__overlap(pos, map)) { - list_del_init(&pos->node); - /* XXX leaks dsos */ - free(pos); + if (verbose >= 2) { + printf("overlapping maps:\n"); + map__fprintf(map, stdout); + map__fprintf(pos, stdout); + } + + if (map->start <= pos->start && map->end > pos->start) + pos->start = map->end; + + if (map->end >= pos->end && map->start < pos->end) + pos->end = map->start; + + if (verbose >= 2) { + printf("after collision:\n"); + map__fprintf(pos, stdout); + } + + if (pos->start >= pos->end) { + list_del_init(&pos->node); + free(pos); + } } } -- cgit v1.2.3 From b0a28589b2fc9bee8ed83dee006a497d1ce93841 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Jun 2009 16:39:53 +0200 Subject: perf report: Fix help text typo Reported-by: Brice Goglin Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 52d3fc6846a..40c1db83a16 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -13,7 +13,7 @@ SYNOPSIS DESCRIPTION ----------- This command displays the performance counter profile information recorded -via perf report. +via perf record. OPTIONS ------- -- cgit v1.2.3 From cca03c0aeb18a975abec28df518a2b64ae3e6964 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 23 Jun 2009 17:12:49 +0530 Subject: perf stat: Fix verbose for perf stat Error message should use stderr for verbose (-v), otherwise message will be lost for: $ ./perf stat -v > /dev/null For example on AMD bus-cycles event is not available so now it looks like: $ ./perf stat -v -e bus-cycles ls > /dev/null Error: counter 0, sys_perf_counter_open() syscall returned with -1 (Invalid argument) Performance counter stats for 'ls': bus-cycles 0.006765877 seconds time elapsed. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245757369.3776.1.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6d3eeac1ea2..5e04fcc8d07 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -109,6 +109,10 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; + +#define ERR_PERF_OPEN \ +"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" + static void create_perf_stat_counter(int counter) { struct perf_counter_attr *attr = attrs + counter; @@ -119,20 +123,20 @@ static void create_perf_stat_counter(int counter) if (system_wide) { int cpu; - for (cpu = 0; cpu < nr_cpus; cpu ++) { + for (cpu = 0; cpu < nr_cpus; cpu++) { fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); - } + if (fd[cpu][counter] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[cpu][counter], strerror(errno)); } } else { attr->inherit = inherit; attr->disabled = 1; fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); - if (fd[0][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); - } + if (fd[0][counter] < 0 && verbose) + fprintf(stderr, ERR_PERF_OPEN, counter, + fd[0][counter], strerror(errno)); } } @@ -168,7 +172,7 @@ static void read_counter(int counter) count[0] = count[1] = count[2] = 0; nv = scale ? 3 : 1; - for (cpu = 0; cpu < nr_cpus; cpu ++) { + for (cpu = 0; cpu < nr_cpus; cpu++) { if (fd[cpu][counter] < 0) continue; -- cgit v1.2.3 From f7679dabfaf69840b000d238a020cee7157aca17 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Mon, 22 Jun 2009 18:42:33 +0200 Subject: perf_counter tools: Fix strbuf_fread() error path handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit size_t res cannot be less than 0 - fread returns 0 on error. [ Updated by: René Scharfe ] Reported-by: Ingo Molnar Signed-off-by: Roel Kluin Cc: Andrew Morton Cc: Junio C Hamano LKML-Reference: <4A3FB479.2090902@lsrfire.ath.cx> Signed-off-by: Ingo Molnar --- tools/perf/util/strbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index eaba0930680..464e7ca898c 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -259,7 +259,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) res = fread(sb->buf + sb->len, 1, size, f); if (res > 0) strbuf_setlen(sb, sb->len + res); - else if (res < 0 && oldalloc == 0) + else if (oldalloc == 0) strbuf_release(sb); return res; } -- cgit v1.2.3 From 3d63259583278262d9153316094e315f73ebfcb5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 24 Jun 2009 18:19:34 +0530 Subject: perf stat: Remove dead code Remove dead code and do some code alignment. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245847774.2681.2.camel@ht.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 44 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5e04fcc8d07..8420ec58950 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,42 +59,27 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { }; +#define MAX_RUN 100 + static int system_wide = 0; -static int inherit = 1; static int verbose = 0; - -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; - -static int target_pid = -1; static int nr_cpus = 0; -static unsigned int page_size; +static int run_idx = 0; +static int run_count = 1; +static int inherit = 1; static int scale = 1; +static int target_pid = -1; -static const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; - -#define MAX_RUN 100 - -static int run_count = 1; -static int run_idx = 0; - -static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; -static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; - -//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; - +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static u64 runtime_nsecs[MAX_RUN]; static u64 walltime_nsecs[MAX_RUN]; static u64 runtime_cycles[MAX_RUN]; +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + static u64 event_res_avg[MAX_COUNTERS][3]; static u64 event_res_noise[MAX_COUNTERS][3]; @@ -109,7 +94,6 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; - #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" @@ -470,9 +454,9 @@ static const struct option options[] = { OPT_INTEGER('p', "pid", &target_pid, "stat events on existing pid"), OPT_BOOLEAN('a', "all-cpus", &system_wide, - "system-wide collection from all CPUs"), + "system-wide collection from all CPUs"), OPT_BOOLEAN('S', "scale", &scale, - "scale/normalize counters"), + "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, @@ -484,8 +468,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix) { int status; - page_size = sysconf(_SC_PAGE_SIZE); - memcpy(attrs, default_attrs, sizeof(attrs)); argc = parse_options(argc, argv, options, stat_usage, 0); @@ -515,7 +497,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) status = 0; for (run_idx = 0; run_idx < run_count; run_idx++) { if (run_count != 1 && verbose) - fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); status = run_perf_stat(argc, argv); } -- cgit v1.2.3 From 1b173f77dd0d5fd4f0ff18034aaa79e30da068b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 24 Jun 2009 19:54:29 +0200 Subject: perf_counter tools: Add CREDITS file for Git contributors Much of perf's libraries comes from the Git project. I noticed that the files (in tools/perf/util/*.[ch] and elsewhere) are quite spartan wrt. credits, so lets add a CREDITS file that includes an (incomplete!) list of main contributors. Thanks guys, these libraries are really useful. Special thanks go to Johannes Schindelin and Junio C Hamano for coming up with this list. List-Composed-By: Johannes Schindelin Cc: Junio C Hamano Signed-off-by: Ingo Molnar --- tools/perf/CREDITS | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tools/perf/CREDITS (limited to 'tools/perf') diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS new file mode 100644 index 00000000000..c2ddcb3acbd --- /dev/null +++ b/tools/perf/CREDITS @@ -0,0 +1,30 @@ +Most of the infrastructure that 'perf' uses here has been reused +from the Git project, as of version: + + 66996ec: Sync with 1.6.2.4 + +Here is an (incomplete!) list of main contributors to those files +in util/* and elsewhere: + + Alex Riesen + Christian Couder + Dmitry Potapov + Jeff King + Johannes Schindelin + Johannes Sixt + Junio C Hamano + Linus Torvalds + Matthias Kestenholz + Michal Ostrowski + Miklos Vajna + Petr Baudis + Pierre Habouzit + René Scharfe + Samuel Tardieu + Shawn O. Pearce + Steffen Prohaska + Steve Haslam + +Thanks guys! + +The full history of the files can be found in the upstream Git commits. -- cgit v1.2.3 From febe8345353e8873e43f2c2c9792d062c770b22b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 25 Jun 2009 14:41:57 +0900 Subject: perf_counter tools: add cpu_relax()/rmb() definitions for sh. Simple cpu_relax()/rmb() stubs that perf needs, which were inadvertently omitted from the sh HAVE_PERF_COUNTERS patch. Signed-off-by: Paul Mundt --- tools/perf/perf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ceb68aa51f7..f735b6924a2 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -19,6 +19,16 @@ #define cpu_relax() asm volatile("" ::: "memory"); #endif +#ifdef __sh__ +#include "../../arch/sh/include/asm/unistd.h" +#if defined(__SH4A__) || defined(__SH5__) +# define rmb() asm volatile("synco" ::: "memory") +#else +# define rmb() asm volatile("" ::: "memory") +#endif +#define cpu_relax() asm volatile("" ::: "memory") +#endif + #include #include #include -- cgit v1.2.3 From 76c64c5e4c47b6d28deb3cae8dfa07a93c2229dc Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 24 Jun 2009 21:08:36 +0200 Subject: perf record: Fix filemap pathname parsing in /proc/pid/maps Looking backward for the first space from the end of a line in /proc/pid/maps does not find the start of the pathname of the mapped file if it contains a space. Since the only slashes we have in this file occur in the (absolute!) pathname column of file mappings, looking for the first slash in a line is a safe method to find the name. Signed-off-by: Johannes Weiner Cc: Stefani Seibold Cc: Andrew Morton Cc: "Eric W. Biederman" Cc: Alexey Dobriyan Cc: Peter Zijlstra LKML-Reference: <20090624190835.GA25548@cmpxchg.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d7ebbd75754..9b899ba1b41 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -306,12 +306,11 @@ static void pid_synthesize_mmap_samples(pid_t pid) continue; pbf += n + 3; if (*pbf == 'x') { /* vm_exec */ - char *execname = strrchr(bf, ' '); + char *execname = strchr(bf, '/'); - if (execname == NULL || execname[1] != '/') + if (execname == NULL) continue; - execname += 1; size = strlen(execname); execname[size - 1] = '\0'; /* Remove \n */ memcpy(mmap_ev.filename, execname, size); -- cgit v1.2.3 From 06813f6c743420c16f9248ab59bd2e68a2de57ba Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 17:16:07 +0530 Subject: perf_counter tools: Check for valid cache operations Made new table for cache operartion stat 'hw_cache_stat' as: L1I : Read and prefetch only ITLB and BPU : Read-only introduce is_cache_op_valid() for cache operation validity And checks for valid cache operations. Reported-by : Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245930367.5308.33.camel@localhost.localdomain> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 06af2fadcd8..7939a21130d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -90,6 +90,34 @@ static char *hw_cache_result[][MAX_ALIASES] = { { "Miss" }, }; +#define C(x) PERF_COUNT_HW_CACHE_##x +#define CACHE_READ (1 << C(OP_READ)) +#define CACHE_WRITE (1 << C(OP_WRITE)) +#define CACHE_PREFETCH (1 << C(OP_PREFETCH)) +#define COP(x) (1 << x) + +/* + * cache operartion stat + * L1I : Read and prefetch only + * ITLB and BPU : Read-only + */ +static unsigned long hw_cache_stat[C(MAX)] = { + [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), + [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), + [C(ITLB)] = (CACHE_READ), + [C(BPU)] = (CACHE_READ), +}; + +static int is_cache_op_valid(u8 cache_type, u8 cache_op) +{ + if (hw_cache_stat[cache_type] & COP(cache_op)) + return 1; /* valid */ + else + return 0; /* invalid */ +} + char *event_name(int counter) { u64 config = attrs[counter].config; @@ -123,6 +151,8 @@ char *event_name(int counter) if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) return "unknown-ext-hardware-cache-result"; + if (!is_cache_op_valid(cache_type, cache_op)) + return "invalid-cache"; sprintf(name, "%s-Cache-%s-%ses", hw_cache[cache_type][0], hw_cache_op[cache_op][0], @@ -179,6 +209,9 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) if (cache_op == -1) cache_op = PERF_COUNT_HW_CACHE_OP_READ; + if (!is_cache_op_valid(cache_type, cache_op)) + return -EINVAL; + cache_result = parse_aliases(str, hw_cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); /* -- cgit v1.2.3 From e5c59547791f171b280bc4c4b2c3ff171824c1a3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 18:25:22 +0530 Subject: perf_counter tools: Shorten names for events Added new alias for events. On AMD box: $ ./perf stat -e l1d -e l1d-misses -e l1d-write -e l1d-prefetch -e l1d-prefetch-miss -e l1i -e l1i-misses -e l1i-prefetch -e l2 -e l2-misses -e l2-write -e dtlb -e dtlb-misses -e itlb -e itlb-misses -e bpu -e bpu-misses -- ls -lR /usr/include/ > /dev/null Before : Performance counter stats for 'ls -lR /usr/include/': 248064467 L1-data-Cache-Load-Referencees (scaled from 23.27%) 1001433 L1-data-Cache-Load-Misses (scaled from 23.34%) 153691 L1-data-Cache-Store-Referencees (scaled from 23.34%) 423248 L1-data-Cache-Prefetch-Referencees (scaled from 23.33%) 302138 L1-data-Cache-Prefetch-Misses (scaled from 23.25%) 251217546 L1-instruction-Cache-Load-Referencees (scaled from 23.25%) 5757005 L1-instruction-Cache-Load-Misses (scaled from 23.23%) 93435 L1-instruction-Cache-Prefetch-Referencees (scaled from 23.24%) 6496073 L2-Cache-Load-Referencees (scaled from 23.32%) 609485 L2-Cache-Load-Misses (scaled from 23.45%) 6876991 L2-Cache-Store-Referencees (scaled from 23.71%) 248922840 Data-TLB-Cache-Load-Referencees (scaled from 23.94%) 5828386 Data-TLB-Cache-Load-Misses (scaled from 24.17%) 257613506 Instruction-TLB-Cache-Load-Referencees (scaled from 24.20%) 6833 Instruction-TLB-Cache-Load-Misses (scaled from 23.88%) 109043606 Branch-Cache-Load-Referencees (scaled from 23.64%) 5552296 Branch-Cache-Load-Misses (scaled from 23.42%) 0.413702461 seconds time elapsed. After : Peformance counter stats for 'ls -lR /usr/include/': 266590464 L1-d$-loads (scaled from 23.03%) 1222273 L1-d$-load-misses (scaled from 23.58%) 146204 L1-d$-stores (scaled from 23.83%) 406344 L1-d$-prefetches (scaled from 24.09%) 283748 L1-d$-prefetch-misses (scaled from 24.10%) 249650965 L1-i$-loads (scaled from 23.80%) 3353961 L1-i$-load-misses (scaled from 23.82%) 104599 L1-i$-prefetches (scaled from 23.68%) 4836405 LLC-loads (scaled from 23.67%) 498214 LLC-load-misses (scaled from 23.66%) 4953994 LLC-stores (scaled from 23.64%) 243354097 dTLB-loads (scaled from 23.77%) 6468584 dTLB-load-misses (scaled from 23.74%) 249719549 iTLB-loads (scaled from 23.25%) 5060 iTLB-load-misses (scaled from 23.00%) 112343016 branch-loads (scaled from 22.76%) 5528876 branch-load-misses (scaled from 22.54%) 0.427154051 seconds time elapsed. Reported-by : Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1245934522.5308.39.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 45 ++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7939a21130d..430f0608320 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -71,23 +71,23 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 static char *hw_cache[][MAX_ALIASES] = { - { "L1-data", "l1-d", "l1d" }, - { "L1-instruction", "l1-i", "l1i" }, - { "L2", "l2" }, - { "Data-TLB", "dtlb", "d-tlb" }, - { "Instruction-TLB", "itlb", "i-tlb" }, - { "Branch", "bpu" , "btb", "bpc" }, + { "L1-d$", "l1-d", "L1-data", }, + { "L1-i$", "l1-i", "L1-instruction", }, + { "LLC", "L2" }, + { "dTLB", "d-tlb", "Data-TLB", }, + { "iTLB", "i-tlb", "Instruction-TLB", }, + { "branch", "branches", "bpu", "btb", "bpc", }, }; static char *hw_cache_op[][MAX_ALIASES] = { - { "Load", "read" }, - { "Store", "write" }, - { "Prefetch", "speculative-read", "speculative-load" }, + { "load", "loads", "read", }, + { "store", "stores", "write", }, + { "prefetch", "prefetches", "speculative-read", "speculative-load", }, }; static char *hw_cache_result[][MAX_ALIASES] = { - { "Reference", "ops", "access" }, - { "Miss" }, + { "refs", "Reference", "ops", "access", }, + { "misses", "miss", }, }; #define C(x) PERF_COUNT_HW_CACHE_##x @@ -118,6 +118,22 @@ static int is_cache_op_valid(u8 cache_type, u8 cache_op) return 0; /* invalid */ } +static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result) +{ + static char name[50]; + + if (cache_result) { + sprintf(name, "%s-%s-%s", hw_cache[cache_type][0], + hw_cache_op[cache_op][0], + hw_cache_result[cache_result][0]); + } else { + sprintf(name, "%s-%s", hw_cache[cache_type][0], + hw_cache_op[cache_op][1]); + } + + return name; +} + char *event_name(int counter) { u64 config = attrs[counter].config; @@ -137,7 +153,6 @@ char *event_name(int counter) case PERF_TYPE_HW_CACHE: { u8 cache_type, cache_op, cache_result; - static char name[100]; cache_type = (config >> 0) & 0xff; if (cache_type > PERF_COUNT_HW_CACHE_MAX) @@ -153,12 +168,8 @@ char *event_name(int counter) if (!is_cache_op_valid(cache_type, cache_op)) return "invalid-cache"; - sprintf(name, "%s-Cache-%s-%ses", - hw_cache[cache_type][0], - hw_cache_op[cache_op][0], - hw_cache_result[cache_result][0]); - return name; + return event_cache_name(cache_type, cache_op, cache_result); } case PERF_TYPE_SOFTWARE: -- cgit v1.2.3 From 7c6a1c65bbd3be688e581511f45818663efc1877 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2009 17:05:54 +0200 Subject: perf_counter tools: Rework the file format Create a structured file format that includes the full perf_counter_attr and all its relevant counter IDs so that the reporting program has full information. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 3 +- tools/perf/builtin-record.c | 100 +++++++++++------- tools/perf/builtin-report.c | 37 +++++-- tools/perf/perf.h | 8 +- tools/perf/types.h | 17 ---- tools/perf/util/header.c | 242 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 37 +++++++ tools/perf/util/string.h | 2 +- tools/perf/util/symbol.h | 2 +- tools/perf/util/types.h | 17 ++++ 10 files changed, 394 insertions(+), 71 deletions(-) delete mode 100644 tools/perf/types.h create mode 100644 tools/perf/util/header.c create mode 100644 tools/perf/util/header.h create mode 100644 tools/perf/util/types.h (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 36d7eef4991..d3887ed51a6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -290,7 +290,7 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h LIB_H += perf.h -LIB_H += types.h +LIB_H += util/types.h LIB_H += util/list.h LIB_H += util/rbtree.h LIB_H += util/levenshtein.h @@ -328,6 +328,7 @@ LIB_OBJS += util/sigchain.o LIB_OBJS += util/symbol.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o +LIB_OBJS += util/header.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9b899ba1b41..f4f0240d230 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -14,6 +14,8 @@ #include "util/parse-events.h" #include "util/string.h" +#include "util/header.h" + #include #include @@ -52,7 +54,8 @@ static int nr_poll; static int nr_cpu; static int file_new = 1; -static struct perf_file_header file_header; + +struct perf_header *header; struct mmap_event { struct perf_event_header header; @@ -328,7 +331,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) fclose(fp); } -static void synthesize_samples(void) +static void synthesize_all(void) { DIR *proc; struct dirent dirent, *next; @@ -352,10 +355,35 @@ static void synthesize_samples(void) static int group_fd; +static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr) +{ + struct perf_header_attr *h_attr; + + if (nr < header->attrs) { + h_attr = header->attr[nr]; + } else { + h_attr = perf_header_attr__new(a); + perf_header__add_attr(header, h_attr); + } + + return h_attr; +} + static void create_counter(int counter, int cpu, pid_t pid) { struct perf_counter_attr *attr = attrs + counter; - int track = 1; + struct perf_header_attr *h_attr; + int track = !counter; /* only the first counter needs these */ + struct { + u64 count; + u64 time_enabled; + u64 time_running; + u64 id; + } read_data; + + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_ID; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; @@ -368,22 +396,11 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; - if (file_new) { - file_header.sample_type = attr->sample_type; - } else { - if (file_header.sample_type != attr->sample_type) { - fprintf(stderr, "incompatible append\n"); - exit(-1); - } - } - attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; attr->disabled = 1; - track = 0; /* only the first counter needs these */ - try_again: fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); @@ -414,6 +431,19 @@ try_again: exit(-1); } + h_attr = get_header_attr(attr, counter); + + if (!file_new) { + if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { + fprintf(stderr, "incompatible append\n"); + exit(-1); + } + } + + read(fd[nr_cpu][counter], &read_data, sizeof(read_data)); + + perf_header_attr__add_id(h_attr, read_data.id); + assert(fd[nr_cpu][counter] >= 0); fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); @@ -444,11 +474,6 @@ static void open_counters(int cpu, pid_t pid) { int counter; - if (pid > 0) { - pid_synthesize_comm_event(pid, 0); - pid_synthesize_mmap_samples(pid); - } - group_fd = -1; for (counter = 0; counter < nr_counters; counter++) create_counter(counter, cpu, pid); @@ -458,17 +483,16 @@ static void open_counters(int cpu, pid_t pid) static void atexit_header(void) { - file_header.data_size += bytes_written; + header->data_size += bytes_written; - if (pwrite(output, &file_header, sizeof(file_header), 0) == -1) - perror("failed to write on file headers"); + perf_header__write(header, output); } static int __cmd_record(int argc, const char **argv) { int i, counter; struct stat st; - pid_t pid; + pid_t pid = 0; int flags; int ret; @@ -499,22 +523,31 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } - if (!file_new) { - if (read(output, &file_header, sizeof(file_header)) == -1) { - perror("failed to read file headers"); - exit(-1); - } - - lseek(output, file_header.data_size, SEEK_CUR); - } + if (!file_new) + header = perf_header__read(output); + else + header = perf_header__new(); atexit(atexit_header); if (!system_wide) { - open_counters(-1, target_pid != -1 ? target_pid : getpid()); + pid = target_pid; + if (pid == -1) + pid = getpid(); + + open_counters(-1, pid); } else for (i = 0; i < nr_cpus; i++) open_counters(i, target_pid); + if (file_new) + perf_header__write(header, output); + + if (!system_wide) { + pid_synthesize_comm_event(pid, 0); + pid_synthesize_mmap_samples(pid); + } else + synthesize_all(); + if (target_pid == -1 && argc) { pid = fork(); if (pid < 0) @@ -538,9 +571,6 @@ static int __cmd_record(int argc, const char **argv) } } - if (system_wide) - synthesize_samples(); - while (!done) { int hits = samples; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b4e76f75ba8..e575f303976 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -17,6 +17,7 @@ #include "util/string.h" #include "perf.h" +#include "util/header.h" #include "util/parse-options.h" #include "util/parse-events.h" @@ -1385,13 +1386,27 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static struct perf_file_header file_header; +static struct perf_header *header; + +static int perf_header__has_sample(u64 sample_mask) +{ + int i; + + for (i = 0; i < header->attrs; i++) { + struct perf_header_attr *attr = header->attr[i]; + + if (!(attr->attr.sample_type & sample_mask)) + return 0; + } + + return 1; +} static int __cmd_report(void) { int ret, rc = EXIT_FAILURE; unsigned long offset = 0; - unsigned long head = sizeof(file_header); + unsigned long head, shift; struct stat stat; event_t *event; uint32_t size; @@ -1419,13 +1434,11 @@ static int __cmd_report(void) exit(0); } - if (read(input, &file_header, sizeof(file_header)) == -1) { - perror("failed to read file headers"); - exit(-1); - } + header = perf_header__read(input); + head = header->data_offset; if (sort__has_parent && - !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { + !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) { fprintf(stderr, "selected --sort parent, but no callchain data\n"); exit(-1); } @@ -1445,6 +1458,11 @@ static int __cmd_report(void) cwd = NULL; cwdlen = 0; } + + shift = page_size * (head / page_size); + offset += shift; + head -= shift; + remap: buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, MAP_SHARED, input, offset); @@ -1461,9 +1479,10 @@ more: size = 8; if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); int ret; + shift = page_size * (head / page_size); + ret = munmap(buf, page_size * mmap_window); assert(ret == 0); @@ -1501,7 +1520,7 @@ more: head += size; - if (offset + head >= sizeof(file_header) + file_header.data_size) + if (offset + head >= header->data_offset + header->data_size) goto done; if (offset + head < stat.st_size) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index bccb529dac0..16c84fd73c8 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -19,7 +19,7 @@ #include #include "../../include/linux/perf_counter.h" -#include "types.h" +#include "util/types.h" /* * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all @@ -66,10 +66,4 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 -struct perf_file_header { - u64 version; - u64 sample_type; - u64 data_size; -}; - #endif diff --git a/tools/perf/types.h b/tools/perf/types.h deleted file mode 100644 index 5e75f900594..00000000000 --- a/tools/perf/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _PERF_TYPES_H -#define _PERF_TYPES_H - -/* - * We define u64 as unsigned long long for every architecture - * so that we can print it with %Lx without getting warnings. - */ -typedef unsigned long long u64; -typedef signed long long s64; -typedef unsigned int u32; -typedef signed int s32; -typedef unsigned short u16; -typedef signed short s16; -typedef unsigned char u8; -typedef signed char s8; - -#endif /* _PERF_TYPES_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c new file mode 100644 index 00000000000..450384b3bbe --- /dev/null +++ b/tools/perf/util/header.c @@ -0,0 +1,242 @@ +#include +#include +#include +#include + +#include "util.h" +#include "header.h" + +/* + * + */ + +struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr) +{ + struct perf_header_attr *self = malloc(sizeof(*self)); + + if (!self) + die("nomem"); + + self->attr = *attr; + self->ids = 0; + self->size = 1; + self->id = malloc(sizeof(u64)); + + if (!self->id) + die("nomem"); + + return self; +} + +void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) +{ + int pos = self->ids; + + self->ids++; + if (self->ids > self->size) { + self->size *= 2; + self->id = realloc(self->id, self->size * sizeof(u64)); + if (!self->id) + die("nomem"); + } + self->id[pos] = id; +} + +/* + * + */ + +struct perf_header *perf_header__new(void) +{ + struct perf_header *self = malloc(sizeof(*self)); + + if (!self) + die("nomem"); + + self->frozen = 0; + + self->attrs = 0; + self->size = 1; + self->attr = malloc(sizeof(void *)); + + if (!self->attr) + die("nomem"); + + self->data_offset = 0; + self->data_size = 0; + + return self; +} + +void perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr) +{ + int pos = self->attrs; + + if (self->frozen) + die("frozen"); + + self->attrs++; + if (self->attrs > self->size) { + self->size *= 2; + self->attr = realloc(self->attr, self->size * sizeof(void *)); + if (!self->attr) + die("nomem"); + } + self->attr[pos] = attr; +} + +static const char *__perf_magic = "PERFFILE"; + +#define PERF_MAGIC (*(u64 *)__perf_magic) + +struct perf_file_section { + u64 offset; + u64 size; +}; + +struct perf_file_attr { + struct perf_counter_attr attr; + struct perf_file_section ids; +}; + +struct perf_file_header { + u64 magic; + u64 size; + u64 attr_size; + struct perf_file_section attrs; + struct perf_file_section data; +}; + +static void do_write(int fd, void *buf, size_t size) +{ + while (size) { + int ret = write(fd, buf, size); + + if (ret < 0) + die("failed to write"); + + size -= ret; + buf += ret; + } +} + +void perf_header__write(struct perf_header *self, int fd) +{ + struct perf_file_header f_header; + struct perf_file_attr f_attr; + struct perf_header_attr *attr; + int i; + + lseek(fd, sizeof(f_header), SEEK_SET); + + + for (i = 0; i < self->attrs; i++) { + attr = self->attr[i]; + + attr->id_offset = lseek(fd, 0, SEEK_CUR); + do_write(fd, attr->id, attr->ids * sizeof(u64)); + } + + + self->attr_offset = lseek(fd, 0, SEEK_CUR); + + for (i = 0; i < self->attrs; i++) { + attr = self->attr[i]; + + f_attr = (struct perf_file_attr){ + .attr = attr->attr, + .ids = { + .offset = attr->id_offset, + .size = attr->ids * sizeof(u64), + } + }; + do_write(fd, &f_attr, sizeof(f_attr)); + } + + + self->data_offset = lseek(fd, 0, SEEK_CUR); + + f_header = (struct perf_file_header){ + .magic = PERF_MAGIC, + .size = sizeof(f_header), + .attr_size = sizeof(f_attr), + .attrs = { + .offset = self->attr_offset, + .size = self->attrs * sizeof(f_attr), + }, + .data = { + .offset = self->data_offset, + .size = self->data_size, + }, + }; + + lseek(fd, 0, SEEK_SET); + do_write(fd, &f_header, sizeof(f_header)); + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + + self->frozen = 1; +} + +static void do_read(int fd, void *buf, size_t size) +{ + while (size) { + int ret = read(fd, buf, size); + + if (ret < 0) + die("failed to read"); + + size -= ret; + buf += ret; + } +} + +struct perf_header *perf_header__read(int fd) +{ + struct perf_header *self = perf_header__new(); + struct perf_file_header f_header; + struct perf_file_attr f_attr; + u64 f_id; + + int nr_attrs, nr_ids, i, j; + + lseek(fd, 0, SEEK_SET); + do_read(fd, &f_header, sizeof(f_header)); + + if (f_header.magic != PERF_MAGIC || + f_header.size != sizeof(f_header) || + f_header.attr_size != sizeof(f_attr)) + die("incompatible file format"); + + nr_attrs = f_header.attrs.size / sizeof(f_attr); + lseek(fd, f_header.attrs.offset, SEEK_SET); + + for (i = 0; i < nr_attrs; i++) { + struct perf_header_attr *attr; + off_t tmp = lseek(fd, 0, SEEK_CUR); + + do_read(fd, &f_attr, sizeof(f_attr)); + + attr = perf_header_attr__new(&f_attr.attr); + + nr_ids = f_attr.ids.size / sizeof(u64); + lseek(fd, f_attr.ids.offset, SEEK_SET); + + for (j = 0; j < nr_ids; j++) { + do_read(fd, &f_id, sizeof(f_id)); + + perf_header_attr__add_id(attr, f_id); + } + perf_header__add_attr(self, attr); + lseek(fd, tmp, SEEK_SET); + } + + self->data_offset = f_header.data.offset; + self->data_size = f_header.data.size; + + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + + self->frozen = 1; + + return self; +} diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h new file mode 100644 index 00000000000..b5ef53ad4c7 --- /dev/null +++ b/tools/perf/util/header.h @@ -0,0 +1,37 @@ +#ifndef _PERF_HEADER_H +#define _PERF_HEADER_H + +#include "../../../include/linux/perf_counter.h" +#include +#include "types.h" + +struct perf_header_attr { + struct perf_counter_attr attr; + int ids, size; + u64 *id; + off_t id_offset; +}; + +struct perf_header { + int frozen; + int attrs, size; + struct perf_header_attr **attr; + off_t attr_offset; + u64 data_offset; + u64 data_size; +}; + +struct perf_header *perf_header__read(int fd); +void perf_header__write(struct perf_header *self, int fd); + +void perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr); + +struct perf_header_attr * +perf_header_attr__new(struct perf_counter_attr *attr); +void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); + + +struct perf_header *perf_header__new(void); + +#endif /* _PERF_HEADER_H */ diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 37b03255b42..3dca2f654cd 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -1,7 +1,7 @@ #ifndef _PERF_STRING_H_ #define _PERF_STRING_H_ -#include "../types.h" +#include "types.h" int hex2u64(const char *ptr, u64 *val); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ea332e56e45..940b432db16 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -2,7 +2,7 @@ #define _PERF_SYMBOL_ 1 #include -#include "../types.h" +#include "types.h" #include "list.h" #include "rbtree.h" diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h new file mode 100644 index 00000000000..5e75f900594 --- /dev/null +++ b/tools/perf/util/types.h @@ -0,0 +1,17 @@ +#ifndef _PERF_TYPES_H +#define _PERF_TYPES_H + +/* + * We define u64 as unsigned long long for every architecture + * so that we can print it with %Lx without getting warnings. + */ +typedef unsigned long long u64; +typedef signed long long s64; +typedef unsigned int u32; +typedef signed int s32; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned char u8; +typedef signed char s8; + +#endif /* _PERF_TYPES_H */ -- cgit v1.2.3 From e6e18ec79b023d5fe84226cef533cf0e3770ce93 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2009 11:27:12 +0200 Subject: perf_counter: Rework the sample ABI The PERF_EVENT_READ implementation made me realize we don't actually need the sample_type int the output sample, since we already have that in the perf_counter_attr information. Therefore, remove the PERF_EVENT_MISC_OVERFLOW bit and the event->type overloading, and imply put counter overflow samples in a PERF_EVENT_SAMPLE type. This also fixes the issue that event->type was only 32-bit and sample_type had 64 usable bits. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 8 ++++---- tools/perf/builtin-report.c | 32 +++++++++++++++++++------------- tools/perf/builtin-top.c | 11 ++++++----- 3 files changed, 29 insertions(+), 22 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7e58e3ad150..722c0f54e54 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -855,7 +855,7 @@ static unsigned long total = 0, total_unknown = 0; static int -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) +process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; int show = 0; @@ -1013,10 +1013,10 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) static int process_event(event_t *event, unsigned long offset, unsigned long head) { - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) - return process_overflow_event(event, offset, head); - switch (event->header.type) { + case PERF_EVENT_SAMPLE: + return process_sample_event(event, offset, head); + case PERF_EVENT_MMAP: return process_mmap_event(event, offset, head); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e575f303976..ec5361c67bf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,8 @@ static regex_t parent_regex; static int exclude_other = 1; +static u64 sample_type; + struct ip_event { struct perf_event_header header; u64 ip; @@ -1135,7 +1137,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) } static int -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) +process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; int show = 0; @@ -1147,12 +1149,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; - if (event->header.type & PERF_SAMPLE_PERIOD) { + if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; more_data += sizeof(u64); } - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", + dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1160,7 +1162,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); - if (event->header.type & PERF_SAMPLE_CALLCHAIN) { + if (sample_type & PERF_SAMPLE_CALLCHAIN) { int i; chain = (void *)more_data; @@ -1352,10 +1354,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head) { trace_event(event); - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) - return process_overflow_event(event, offset, head); - switch (event->header.type) { + case PERF_EVENT_SAMPLE: + return process_sample_event(event, offset, head); + case PERF_EVENT_MMAP: return process_mmap_event(event, offset, head); @@ -1388,18 +1390,21 @@ process_event(event_t *event, unsigned long offset, unsigned long head) static struct perf_header *header; -static int perf_header__has_sample(u64 sample_mask) +static u64 perf_header__sample_type(void) { + u64 sample_type = 0; int i; for (i = 0; i < header->attrs; i++) { struct perf_header_attr *attr = header->attr[i]; - if (!(attr->attr.sample_type & sample_mask)) - return 0; + if (!sample_type) + sample_type = attr->attr.sample_type; + else if (sample_type != attr->attr.sample_type) + die("non matching sample_type"); } - return 1; + return sample_type; } static int __cmd_report(void) @@ -1437,8 +1442,9 @@ static int __cmd_report(void) header = perf_header__read(input); head = header->data_offset; - if (sort__has_parent && - !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) { + sample_type = perf_header__sample_type(); + + if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { fprintf(stderr, "selected --sort parent, but no callchain data\n"); exit(-1); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5352b5e352e..cf0d21f1ae1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -392,11 +392,11 @@ static void record_ip(u64 ip, int counter) samples--; } -static void process_event(u64 ip, int counter) +static void process_event(u64 ip, int counter, int user) { samples++; - if (ip < min_ip || ip > max_ip) { + if (user) { userspace_samples++; return; } @@ -509,9 +509,10 @@ static void mmap_read_counter(struct mmap_data *md) old += size; - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { - if (event->header.type & PERF_SAMPLE_IP) - process_event(event->ip.ip, md->counter); + if (event->header.type == PERF_EVENT_SAMPLE) { + int user = + (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER; + process_event(event->ip.ip, md->counter, user); } } -- cgit v1.2.3 From 649c48a9e7fafcc72bfcc99471d9dea98d789d59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2009 21:12:48 +0200 Subject: perf-report: Add modes for inherited stats and no-samples Now that we can collect per task statistics, add modes that make use of that facility. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f4f0240d230..798a56d890e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -41,6 +41,8 @@ static int force = 0; static int append_file = 0; static int call_graph = 0; static int verbose = 0; +static int inherit_stat = 0; +static int no_samples = 0; static long samples; static struct timeval last_read; @@ -393,6 +395,12 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->sample_freq = freq; } + if (no_samples) + attr->sample_freq = 0; + + if (inherit_stat) + attr->inherit_stat = 1; + if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; @@ -571,7 +579,7 @@ static int __cmd_record(int argc, const char **argv) } } - while (!done) { + for (;;) { int hits = samples; for (i = 0; i < nr_cpu; i++) { @@ -579,8 +587,11 @@ static int __cmd_record(int argc, const char **argv) mmap_read(&mmap_array[i][counter]); } - if (hits == samples) + if (hits == samples) { + if (done) + break; ret = poll(event_array, nr_poll, 100); + } } /* @@ -629,6 +640,10 @@ static const struct option options[] = { "do call-graph (stack chain/backtrace) recording"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_BOOLEAN('s', "stat", &inherit_stat, + "per thread counts"), + OPT_BOOLEAN('n', "no-samples", &no_samples, + "don't sample"), OPT_END() }; -- cgit v1.2.3 From e9ea2fde7a07ae60a119171a2946ed2ae778271e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2009 22:46:04 +0200 Subject: perf-report: Add bare minimum PERF_EVENT_READ parsing Provide the basic infrastructure to provide per task stats. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ec5361c67bf..681c2233f88 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -100,6 +100,13 @@ struct lost_event { u64 lost; }; +struct read_event { + struct perf_event_header header; + u32 pid,tid; + u64 value; + u64 format[3]; +}; + typedef union event_union { struct perf_event_header header; struct ip_event ip; @@ -108,6 +115,7 @@ typedef union event_union { struct fork_event fork; struct period_event period; struct lost_event lost; + struct read_event read; } event_t; static LIST_HEAD(dsos); @@ -1349,6 +1357,19 @@ static void trace_event(event_t *event) dprintf(".\n"); } +static int +process_read_event(event_t *event, unsigned long offset, unsigned long head) +{ + dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->read.pid, + event->read.tid, + event->read.value); + + return 0; +} + static int process_event(event_t *event, unsigned long offset, unsigned long head) { @@ -1373,6 +1394,9 @@ process_event(event_t *event, unsigned long offset, unsigned long head) case PERF_EVENT_LOST: return process_lost_event(event, offset, head); + case PERF_EVENT_READ: + return process_read_event(event, offset, head); + /* * We dont process them right now but they are fine: */ -- cgit v1.2.3 From 4418351f06d9ce73acc846158c20186965f920f3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 25 Jun 2009 21:27:42 +0530 Subject: perf_counter tools: Add alias for 'l1d' and 'l1i' Add 'l1d' and 'l1i' aliases again as shortcuts - just dont make them the primary display alias. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1245945462.9157.11.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 430f0608320..4d042f104cd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -71,8 +71,8 @@ static char *sw_event_names[] = { #define MAX_ALIASES 8 static char *hw_cache[][MAX_ALIASES] = { - { "L1-d$", "l1-d", "L1-data", }, - { "L1-i$", "l1-i", "L1-instruction", }, + { "L1-d$", "l1-d", "l1d", "L1-data", }, + { "L1-i$", "l1-i", "l1i", "L1-instruction", }, { "LLC", "L2" }, { "dTLB", "d-tlb", "Data-TLB", }, { "iTLB", "i-tlb", "Instruction-TLB", }, -- cgit v1.2.3 From 3928ddbe994cce1da1b6365b0db04d5765f254f4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Jun 2009 22:21:27 +0200 Subject: perf record: Fix unhandled io return value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building latest perfcounter fails on the following error: builtin-record.c: In function ‘create_counter’: builtin-record.c:451: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result make: *** [builtin-record.o] Erreur 1 Just check if we successfully read the perf file descriptor. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1245961287-5327-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-record.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 798a56d890e..d18546f37d7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -448,7 +448,10 @@ try_again: } } - read(fd[nr_cpu][counter], &read_data, sizeof(read_data)); + if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) { + perror("Unable to read perf file descriptor\n"); + exit(-1); + } perf_header_attr__add_id(h_attr, read_data.id); -- cgit v1.2.3 From 8cb76d99d715741637b6d0884f389e17e9cb05d2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Jun 2009 16:28:00 +0200 Subject: perf_counter tools: Prepare a small callchain framework We plan to display the callchains depending on some user-configurable parameters. To gather the callchains stats from the recorded stream in a fast way, this patch introduces an ad hoc radix tree adapted for callchains and also a rbtree to sort these callchains once we have gathered every events from the stream. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1246026481-8314-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 1 + tools/perf/builtin-report.c | 5 -- tools/perf/perf.h | 5 ++ tools/perf/util/callchain.c | 174 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/callchain.h | 33 +++++++++ 5 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 tools/perf/util/callchain.c create mode 100644 tools/perf/util/callchain.h (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d3887ed51a6..1c1296d8a64 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -329,6 +329,7 @@ LIB_OBJS += util/symbol.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o LIB_OBJS += util/header.o +LIB_OBJS += util/callchain.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 681c2233f88..28d1cb2127e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -62,11 +62,6 @@ struct ip_event { unsigned char __more_data[]; }; -struct ip_callchain { - u64 nr; - u64 ips[0]; -}; - struct mmap_event { struct perf_event_header header; u32 pid, tid; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 16c84fd73c8..a49842b69a5 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -66,4 +66,9 @@ sys_perf_counter_open(struct perf_counter_attr *attr, #define MAX_COUNTERS 256 #define MAX_NR_CPUS 256 +struct ip_callchain { + u64 nr; + u64 ips[0]; +}; + #endif diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c new file mode 100644 index 00000000000..ad3c2857896 --- /dev/null +++ b/tools/perf/util/callchain.c @@ -0,0 +1,174 @@ +/* + * Copyright (C) 2009, Frederic Weisbecker + * + * Handle the callchains from the stream in an ad-hoc radix tree and then + * sort them in an rbtree. + * + */ + +#include +#include +#include +#include + +#include "callchain.h" + + +static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct callchain_node *rnode; + + while (*p) { + parent = *p; + rnode = rb_entry(parent, struct callchain_node, rb_node); + + if (rnode->hit < chain->hit) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&chain->rb_node, parent, p); + rb_insert_color(&chain->rb_node, root); +} + +/* + * Once we get every callchains from the stream, we can now + * sort them by hit + */ +void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) +{ + struct callchain_node *child; + + list_for_each_entry(child, &node->children, brothers) + sort_chain_to_rbtree(rb_root, child); + + if (node->hit) + rb_insert_callchain(rb_root, node); +} + +static struct callchain_node *create_child(struct callchain_node *parent) +{ + struct callchain_node *new; + + new = malloc(sizeof(*new)); + if (!new) { + perror("not enough memory to create child for code path tree"); + return NULL; + } + new->parent = parent; + INIT_LIST_HEAD(&new->children); + INIT_LIST_HEAD(&new->val); + list_add_tail(&new->brothers, &parent->children); + + return new; +} + +static void +fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) +{ + int i; + + for (i = start; i < chain->nr; i++) { + struct callchain_list *call; + + call = malloc(sizeof(*chain)); + if (!call) { + perror("not enough memory for the code path tree"); + return; + } + call->ip = chain->ips[i]; + list_add_tail(&call->list, &node->val); + } + node->val_nr = i - start; +} + +static void add_child(struct callchain_node *parent, struct ip_callchain *chain) +{ + struct callchain_node *new; + + new = create_child(parent); + fill_node(new, chain, parent->val_nr); + + new->hit = 1; +} + +static void +split_add_child(struct callchain_node *parent, struct ip_callchain *chain, + struct callchain_list *to_split, int idx) +{ + struct callchain_node *new; + + /* split */ + new = create_child(parent); + list_move_tail(&to_split->list, &new->val); + new->hit = parent->hit; + parent->hit = 0; + parent->val_nr = idx; + + /* create the new one */ + add_child(parent, chain); +} + +static int +__append_chain(struct callchain_node *root, struct ip_callchain *chain, + int start); + +static int +__append_chain_children(struct callchain_node *root, struct ip_callchain *chain) +{ + struct callchain_node *rnode; + + /* lookup in childrens */ + list_for_each_entry(rnode, &root->children, brothers) { + int ret = __append_chain(rnode, chain, root->val_nr); + if (!ret) + return 0; + } + return -1; +} + +static int +__append_chain(struct callchain_node *root, struct ip_callchain *chain, + int start) +{ + struct callchain_list *cnode; + int i = start; + bool found = false; + + /* lookup in the current node */ + list_for_each_entry(cnode, &root->val, list) { + if (cnode->ip != chain->ips[i++]) + break; + if (!found) + found = true; + if (i == chain->nr) + break; + } + + /* matches not, relay on the parent */ + if (!found) + return -1; + + /* we match only a part of the node. Split it and add the new chain */ + if (i < root->val_nr) { + split_add_child(root, chain, cnode, i); + return 0; + } + + /* we match 100% of the path, increment the hit */ + if (i == root->val_nr) { + root->hit++; + return 0; + } + + return __append_chain_children(root, chain); +} + +void append_chain(struct callchain_node *root, struct ip_callchain *chain) +{ + if (__append_chain_children(root, chain) == -1) + add_child(root, chain); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h new file mode 100644 index 00000000000..fa1cd2f71fd --- /dev/null +++ b/tools/perf/util/callchain.h @@ -0,0 +1,33 @@ +#ifndef __PERF_CALLCHAIN_H +#define __PERF_CALLCHAIN_H + +#include "../perf.h" +#include "list.h" +#include "rbtree.h" + + +struct callchain_node { + struct callchain_node *parent; + struct list_head brothers; + struct list_head children; + struct list_head val; + struct rb_node rb_node; + int val_nr; + int hit; +}; + +struct callchain_list { + unsigned long ip; + struct list_head list; +}; + +static inline void callchain_init(struct callchain_node *node) +{ + INIT_LIST_HEAD(&node->brothers); + INIT_LIST_HEAD(&node->children); + INIT_LIST_HEAD(&node->val); +} + +void append_chain(struct callchain_node *root, struct ip_callchain *chain); +void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); +#endif -- cgit v1.2.3 From f55c555226b1010b249730ec6b232e5470286950 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Jun 2009 16:28:01 +0200 Subject: perf report: Print sorted callchains per histogram entries Use the newly created callchains radix tree to gather the chains stats from the recorded events and then print the callchains for all of them, sorted by hits, using the "-c" parameter with perf report. Example: 66.15% [k] atm_clip_exit 63.08% 0xffffffffffffff80 0xffffffff810196a8 0xffffffff810c14c8 0xffffffff8101a79c 0xffffffff810194f3 0xffffffff8106ab7f 0xffffffff8106abe5 0xffffffff8106acde 0xffffffff8100d94b 0xffffffff8153e7ea [...] 1.54% 0xffffffffffffff80 0xffffffff810196a8 0xffffffff810c14c8 0xffffffff8101a79c [...] Symbols are not yet resolved. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1246026481-8314-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 82 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 11 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 28d1cb2127e..ed391db9e0f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -15,6 +15,7 @@ #include "util/rbtree.h" #include "util/symbol.h" #include "util/string.h" +#include "util/callchain.h" #include "perf.h" #include "util/header.h" @@ -52,6 +53,7 @@ static char *parent_pattern = default_parent_pattern; static regex_t parent_regex; static int exclude_other = 1; +static int callchain; static u64 sample_type; @@ -488,17 +490,19 @@ static size_t threads__fprintf(FILE *fp) static struct rb_root hist; struct hist_entry { - struct rb_node rb_node; - - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - struct symbol *parent; - u64 ip; - char level; - - u64 count; + struct rb_node rb_node; + + struct thread *thread; + struct map *map; + struct dso *dso; + struct symbol *sym; + struct symbol *parent; + u64 ip; + char level; + struct callchain_node callchain; + struct rb_root sorted_chain; + + u64 count; }; /* @@ -768,6 +772,48 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } +static size_t +callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) +{ + struct callchain_list *chain; + size_t ret = 0; + + if (!self) + return 0; + + ret += callchain__fprintf(fp, self->parent, total_samples); + + + list_for_each_entry(chain, &self->val, list) + ret += fprintf(fp, " %p\n", (void *)chain->ip); + + return ret; +} + +static size_t +hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, + u64 total_samples) +{ + struct rb_node *rb_node; + struct callchain_node *chain; + size_t ret = 0; + + rb_node = rb_first(&self->sorted_chain); + while (rb_node) { + double percent; + + chain = rb_entry(rb_node, struct callchain_node, rb_node); + percent = chain->hit * 100.0 / total_samples; + ret += fprintf(fp, " %6.2f%%\n", percent); + ret += callchain__fprintf(fp, chain, total_samples); + ret += fprintf(fp, "\n"); + rb_node = rb_next(rb_node); + } + + return ret; +} + + static size_t hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) { @@ -808,6 +854,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) ret += fprintf(fp, "\n"); + if (callchain) + hist_entry_callchain__fprintf(fp, self, total_samples); + return ret; } @@ -892,6 +941,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, .level = level, .count = count, .parent = NULL, + .sorted_chain = RB_ROOT }; int cmp; @@ -934,6 +984,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!cmp) { he->count += count; + if (callchain) + append_chain(&he->callchain, chain); return 0; } @@ -947,6 +999,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!he) return -ENOMEM; *he = entry; + if (callchain) { + callchain_init(&he->callchain); + append_chain(&he->callchain, chain); + } rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, &hist); @@ -1023,6 +1079,9 @@ static void output__insert_entry(struct hist_entry *he) struct rb_node *parent = NULL; struct hist_entry *iter; + if (callchain) + sort_chain_to_rbtree(&he->sorted_chain, &he->callchain); + while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node); @@ -1599,6 +1658,7 @@ static const struct option options[] = { "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), + OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), OPT_END() }; -- cgit v1.2.3 From fde953c1c67986e1c381fa50d8207b1578b5cefa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:06:39 +0200 Subject: perf_counter tools: Remove dead code Vince Weaver reported that there's a handful of #ifdef __MINGW32__ sections in the code. Remove them as they are in essence dead code - as unlike upstream Git, the perf tool is unlikely to be ported to Windows. Reported-by: Vince Weaver Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/help.c | 15 ------- tools/perf/util/pager.c | 5 +-- tools/perf/util/run-command.c | 95 +------------------------------------------ tools/perf/util/run-command.h | 5 --- tools/perf/util/util.h | 15 ------- 5 files changed, 3 insertions(+), 132 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 6653f7dd1d7..17a00e0df2c 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -126,21 +126,6 @@ static int is_executable(const char *name) !S_ISREG(st.st_mode)) return 0; -#ifdef __MINGW32__ - /* cannot trust the executable bit, peek into the file instead */ - char buf[3] = { 0 }; - int n; - int fd = open(name, O_RDONLY); - st.st_mode &= ~S_IXUSR; - if (fd >= 0) { - n = read(fd, buf, 2); - if (n == 2) - /* DOS executables start with "MZ" */ - if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) - st.st_mode |= S_IXUSR; - close(fd); - } -#endif return st.st_mode & S_IXUSR; } diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index a28bccae545..1915de20dca 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -9,7 +9,6 @@ static int spawned_pager; -#ifndef __MINGW32__ static void pager_preexec(void) { /* @@ -24,7 +23,6 @@ static void pager_preexec(void) setenv("LESS", "FRSX", 0); } -#endif static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; static struct child_process pager_process; @@ -70,9 +68,8 @@ void setup_pager(void) pager_argv[2] = pager; pager_process.argv = pager_argv; pager_process.in = -1; -#ifndef __MINGW32__ pager_process.preexec_cb = pager_preexec; -#endif + if (start_command(&pager_process)) return; diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c index b2f5e854f40..a3935343091 100644 --- a/tools/perf/util/run-command.c +++ b/tools/perf/util/run-command.c @@ -65,7 +65,6 @@ int start_command(struct child_process *cmd) cmd->err = fderr[0]; } -#ifndef __MINGW32__ fflush(NULL); cmd->pid = fork(); if (!cmd->pid) { @@ -118,71 +117,6 @@ int start_command(struct child_process *cmd) } exit(127); } -#else - int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ - const char **sargv = cmd->argv; - char **env = environ; - - if (cmd->no_stdin) { - s0 = dup(0); - dup_devnull(0); - } else if (need_in) { - s0 = dup(0); - dup2(fdin[0], 0); - } else if (cmd->in) { - s0 = dup(0); - dup2(cmd->in, 0); - } - - if (cmd->no_stderr) { - s2 = dup(2); - dup_devnull(2); - } else if (need_err) { - s2 = dup(2); - dup2(fderr[1], 2); - } - - if (cmd->no_stdout) { - s1 = dup(1); - dup_devnull(1); - } else if (cmd->stdout_to_stderr) { - s1 = dup(1); - dup2(2, 1); - } else if (need_out) { - s1 = dup(1); - dup2(fdout[1], 1); - } else if (cmd->out > 1) { - s1 = dup(1); - dup2(cmd->out, 1); - } - - if (cmd->dir) - die("chdir in start_command() not implemented"); - if (cmd->env) { - env = copy_environ(); - for (; *cmd->env; cmd->env++) - env = env_setenv(env, *cmd->env); - } - - if (cmd->perf_cmd) { - cmd->argv = prepare_perf_cmd(cmd->argv); - } - - cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); - - if (cmd->env) - free_environ(env); - if (cmd->perf_cmd) - free(cmd->argv); - - cmd->argv = sargv; - if (s0 >= 0) - dup2(s0, 0), close(s0); - if (s1 >= 0) - dup2(s1, 1), close(s1); - if (s2 >= 0) - dup2(s2, 2), close(s2); -#endif if (cmd->pid < 0) { int err = errno; @@ -288,14 +222,6 @@ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const return run_command(&cmd); } -#ifdef __MINGW32__ -static __stdcall unsigned run_thread(void *data) -{ - struct async *async = data; - return async->proc(async->fd_for_proc, async->data); -} -#endif - int start_async(struct async *async) { int pipe_out[2]; @@ -304,7 +230,6 @@ int start_async(struct async *async) return error("cannot create pipe: %s", strerror(errno)); async->out = pipe_out[0]; -#ifndef __MINGW32__ /* Flush stdio before fork() to avoid cloning buffers */ fflush(NULL); @@ -319,33 +244,17 @@ int start_async(struct async *async) exit(!!async->proc(pipe_out[1], async->data)); } close(pipe_out[1]); -#else - async->fd_for_proc = pipe_out[1]; - async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); - if (!async->tid) { - error("cannot create thread: %s", strerror(errno)); - close_pair(pipe_out); - return -1; - } -#endif + return 0; } int finish_async(struct async *async) { -#ifndef __MINGW32__ int ret = 0; if (wait_or_whine(async->pid)) ret = error("waitpid (async) failed"); -#else - DWORD ret = 0; - if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) - ret = error("waiting for thread failed: %lu", GetLastError()); - else if (!GetExitCodeThread(async->tid, &ret)) - ret = error("cannot get thread exit code: %lu", GetLastError()); - CloseHandle(async->tid); -#endif + return ret; } diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h index 328289f2366..cc1837deba8 100644 --- a/tools/perf/util/run-command.h +++ b/tools/perf/util/run-command.h @@ -79,12 +79,7 @@ struct async { int (*proc)(int fd, void *data); void *data; int out; /* caller reads from here and closes it */ -#ifndef __MINGW32__ pid_t pid; -#else - HANDLE tid; - int fd_for_proc; -#endif }; int start_async(struct async *async); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b8cfed776d8..b4be6071c10 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -67,7 +67,6 @@ #include #include #include -#ifndef __MINGW32__ #include #include #include @@ -81,20 +80,6 @@ #include #include #include -#if defined(__CYGWIN__) -#undef _XOPEN_SOURCE -#include -#define _XOPEN_SOURCE 600 -#include "compat/cygwin.h" -#else -#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ -#include -#define _ALL_SOURCE 1 -#endif -#else /* __MINGW32__ */ -/* pull in Windows compatibility stuff */ -#include "compat/mingw.h" -#endif /* __MINGW32__ */ #ifndef NO_ICONV #include -- cgit v1.2.3 From 0cfb7a13b8e4e0afd4b856156ab16a182de7505b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:10:30 +0200 Subject: perf stat: Add -n/--null option to run without counters Allow a no-counters run. This can be useful to measure just elapsed wall-clock time - or to assess the raw overhead of perf stat itself, without running any counters. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8420ec58950..cdcd058fac0 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -70,6 +70,7 @@ static int run_count = 1; static int inherit = 1; static int scale = 1; static int target_pid = -1; +static int null_run = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; @@ -461,6 +462,8 @@ static const struct option options[] = { "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &run_count, "repeat command and print average + stddev (max: 100)"), + OPT_BOOLEAN('n', "null", &null_run, + "null run - dont start any counters"), OPT_END() }; @@ -476,7 +479,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) if (run_count <= 0 || run_count > MAX_RUN) usage_with_options(stat_usage, options); - if (!nr_counters) + if (!null_run && !nr_counters) nr_counters = 8; nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); -- cgit v1.2.3 From 566747e6298289c5cb02d4939cb3abf1c4fe7e5a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 27 Jun 2009 06:24:32 +0200 Subject: perf stat: Fix multi-run stats In multi-run (-r/--repeat) printouts, print out the noise of the wall-clock average as well. Also, fix a bug in printing out scaled counters: if it was not scaled then we should not update the average with -1. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index cdcd058fac0..52c176cc683 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -353,8 +353,11 @@ static void calc_avg(void) event_res_avg[j]+1, event_res[i][j]+1); update_avg("counter/2", j, event_res_avg[j]+2, event_res[i][j]+2); - update_avg("scaled", j, - event_scaled_avg + j, event_scaled[i]+j); + if (event_scaled[i][j] != -1) + update_avg("scaled", j, + event_scaled_avg + j, event_scaled[i]+j); + else + event_scaled_avg[j] = -1; } } runtime_nsecs_avg /= run_count; @@ -420,9 +423,13 @@ static void print_stat(int argc, const char **argv) fprintf(stderr, "\n"); - fprintf(stderr, " %14.9f seconds time elapsed.\n", + fprintf(stderr, " %14.9f seconds time elapsed", (double)walltime_nsecs_avg/1e9); - fprintf(stderr, "\n"); + if (run_count > 1) { + fprintf(stderr, " ( +- %7.3f%% )", + 100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg); + } + fprintf(stderr, "\n\n"); } static volatile int signr = -1; -- cgit v1.2.3 From 6e750a8fc009fd0ae98704525d1d8e80d60e8cc9 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Jun 2009 03:02:07 +0530 Subject: perf stat: Improve output Increase size for event name to handle bigger names like 'L1-d$-prefetch-misses' Changed scaled counters from percentage to a multiplicative factor because the latter is more expressive. Also aligned the scaling factor, otherwise sometimes it looks like: 384 iTLB-load-misses (4.74x scaled) 452029 branch-loads (8.00x scaled) 5892 branch-load-misses (20.39x scaled) 972315 iTLB-loads (3.24x scaled) Before: 150708 L1-d$-stores (scaled from 23.57%) 428804 L1-d$-prefetches (scaled from 23.47%) 314446 L1-d$-prefetch-misses (scaled from 23.42%) 252626137 L1-i$-loads (scaled from 23.24%) 5297550 dTLB-load-misses (scaled from 23.96%) 106992392 branch-loads (scaled from 23.67%) 5239561 branch-load-misses (scaled from 23.43%) After: 1731713 L1-d$-loads ( 14.25x scaled) 44241 L1-d$-prefetches ( 3.88x scaled) 21076 L1-d$-prefetch-misses ( 3.40x scaled) 5789421 L1-i$-loads ( 3.78x scaled) 29645 dTLB-load-misses ( 2.95x scaled) 461474 branch-loads ( 6.52x scaled) 7493 branch-load-misses ( 26.57x scaled) Reported-by: Ingo Molnar Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246051927.2988.10.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 52c176cc683..3840a70f05b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -32,6 +32,7 @@ * Wu Fengguang * Mike Galbraith * Paul Mackerras + * Jaswinder Singh Rajput * * Released under the GPL v2. (and only v2, not any later version) */ @@ -251,7 +252,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) { double msecs = (double)count[0] / 1000000; - fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { @@ -265,7 +266,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) static void abs_printout(int counter, u64 *count, u64 *noise) { - fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); if (runtime_cycles_avg && attrs[counter].type == PERF_TYPE_HARDWARE && @@ -295,7 +296,7 @@ static void print_counter(int counter) scaled = event_scaled_avg[counter]; if (scaled == -1) { - fprintf(stderr, " %14s %-20s\n", + fprintf(stderr, " %14s %-24s\n", "", event_name(counter)); return; } @@ -306,8 +307,7 @@ static void print_counter(int counter) abs_printout(counter, count, noise); if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); + fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]); fprintf(stderr, "\n"); } @@ -421,7 +421,6 @@ static void print_stat(int argc, const char **argv) for (counter = 0; counter < nr_counters; counter++) print_counter(counter); - fprintf(stderr, "\n"); fprintf(stderr, " %14.9f seconds time elapsed", (double)walltime_nsecs_avg/1e9); -- cgit v1.2.3 From c3043569dc8fbe9228b76174f15d1a7152c48a20 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Jun 2009 23:49:09 +0530 Subject: perf stat: Micro-optimize the code: memcpy is only required if no event is selected and !null_run Set attrs and nr_counters if no event is selected and !null_run. Setting of attrs should depend on number of counters, so we need to memcpy only for sizeof(default_attrs) Also set nr_counters as ARRAY_SIZE(default_attrs) in place of hardcoded value. Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246126749.32198.16.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3840a70f05b..3e5ea4e2e5f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,7 +46,7 @@ #include #include -static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { +static struct perf_counter_attr default_attrs[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, @@ -477,16 +477,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix) { int status; - memcpy(attrs, default_attrs, sizeof(attrs)); - argc = parse_options(argc, argv, options, stat_usage, 0); if (!argc) usage_with_options(stat_usage, options); if (run_count <= 0 || run_count > MAX_RUN) usage_with_options(stat_usage, options); - if (!null_run && !nr_counters) - nr_counters = 8; + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (!null_run && !nr_counters) { + memcpy(attrs, default_attrs, sizeof(default_attrs)); + nr_counters = ARRAY_SIZE(default_attrs); + } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); -- cgit v1.2.3 From 210ad39fb7ef0bc0494483f517f42524f16bb2a7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 29 Jun 2009 21:50:54 +0200 Subject: perf stat: Use percentages for scaling output Peter expressed a strong preference for percentage based display of scaled values - so revert to that from the recently introduced multiplication-factor unit. Reported-by: Peter Zijlstra Cc: Jaswinder Singh Rajput Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3e5ea4e2e5f..c5a290727a9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -307,7 +307,8 @@ static void print_counter(int counter) abs_printout(counter, count, noise); if (scaled) - fprintf(stderr, " (%7.2fx scaled)", (double)count[1]/count[2]); + fprintf(stderr, " (scaled from %.2f%%)", + (double) count[2] / count[1] * 100); fprintf(stderr, "\n"); } -- cgit v1.2.3 From 051ae7f7344f453616b6b10332d4d8e1d40ed823 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 29 Jun 2009 21:13:21 +1000 Subject: perf_counter tools: Reduce perf stat measurement overhead/skew Vince Weaver reported a 'perf stat' measurement overhead in the count of retired instructions, which can amount to a +6000 instructions inflated count in the reported count. At present, perf stat creates its counters on the perf process. Thus the counters count the fork and various other activity in both the parent and child, such as the resolver overhead for resolving PLT entries for any libc functions that haven't been called before, such as execvp. This reduces the overhead by creating the counters on the child process after the fork, using a couple of pipes to synchronize so that the child process waits until the parent has created the counters before doing the exec. To eliminate the PLT resolution overhead on calling execvp, this does a dummy execvp first which will always fail. With this, the overhead of executing a program goes down from over 4800 instructions to about 90 instructions on powerpc (32-bit). This was measured with a statically-linked program written in assembler which only does the 3 instructions needed to call _exit(0). Before: $ perf stat -e 0:1:u ./three Performance counter stats for './three': 4858 instructions 0.001274523 seconds time elapsed After: $ perf stat -e 0:1:u ./three Performance counter stats for './three': 92 instructions 0.000468153 seconds time elapsed Reported-by: Vince Weaver Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <19016.41425.814043.870352@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 64 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 14 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c5a290727a9..201ef2367dc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -99,7 +99,7 @@ static u64 runtime_cycles_noise; #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" -static void create_perf_stat_counter(int counter) +static void create_perf_stat_counter(int counter, int pid) { struct perf_counter_attr *attr = attrs + counter; @@ -119,7 +119,7 @@ static void create_perf_stat_counter(int counter) attr->inherit = inherit; attr->disabled = 1; - fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); + fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[0][counter], strerror(errno)); @@ -205,12 +205,58 @@ static int run_perf_stat(int argc, const char **argv) int status = 0; int counter; int pid; + int child_ready_pipe[2], go_pipe[2]; + char buf; if (!system_wide) nr_cpus = 1; + if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) { + perror("failed to create pipes"); + exit(1); + } + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + close(child_ready_pipe[0]); + close(go_pipe[1]); + fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + + /* + * Do a dummy execvp to get the PLT entry resolved, + * so we avoid the resolver overhead on the real + * execvp call. + */ + execvp("", (char **)argv); + + /* + * Tell the parent we're ready to go + */ + close(child_ready_pipe[1]); + + /* + * Wait until the parent tells us to go. + */ + read(go_pipe[0], &buf, 1); + + execvp(argv[0], (char **)argv); + + perror(argv[0]); + exit(-1); + } + + /* + * Wait for the child to be ready to exec. + */ + close(child_ready_pipe[1]); + close(go_pipe[0]); + read(child_ready_pipe[0], &buf, 1); + close(child_ready_pipe[0]); + for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); + create_perf_stat_counter(counter, pid); /* * Enable counters and exec the command: @@ -218,19 +264,9 @@ static int run_perf_stat(int argc, const char **argv) t0 = rdclock(); prctl(PR_TASK_PERF_COUNTERS_ENABLE); - if ((pid = fork()) < 0) - perror("failed to fork"); - - if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); - } - } - + close(go_pipe[1]); wait(&status); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); t1 = rdclock(); walltime_nsecs[run_idx] = t1 - t0; -- cgit v1.2.3 From 57e7986ed142417498155ebcd5eaf617ac37136d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 30 Jun 2009 16:07:19 +1000 Subject: perf_counter: Provide a way to enable counters on exec This provides a way to mark a counter to be enabled on the next exec. This is useful for measuring the total activity of a program without including overhead from the process that launches it. This also changes the perf stat command to use this new facility. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <19017.43927.838745.689203@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 201ef2367dc..2e03524a1de 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -116,8 +116,9 @@ static void create_perf_stat_counter(int counter, int pid) fd[cpu][counter], strerror(errno)); } } else { - attr->inherit = inherit; - attr->disabled = 1; + attr->inherit = inherit; + attr->disabled = 1; + attr->enable_on_exec = 1; fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0); if (fd[0][counter] < 0 && verbose) @@ -262,7 +263,6 @@ static int run_perf_stat(int argc, const char **argv) * Enable counters and exec the command: */ t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); close(go_pipe[1]); wait(&status); -- cgit v1.2.3 From f5812a7a336fb952d819e4427b9a2dce02368e82 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 11:43:17 -0300 Subject: perf_counter tools: Adjust only prelinked symbol's addresses I.e. we can't handle these two kinds of files in the same way: 1) prelinked system library: [acme@doppio pahole]$ readelf -s /usr/lib64/libdw-0.141.so | egrep 'FUNC.+GLOBAL.+dwfl_report_elf' 278: 00000030450105a0 261 FUNC GLOBAL DEFAULT 12 dwfl_report_elf@@ELFUTILS_0.122 2) not prelinked library with debug information from a -debuginfo package: [acme@doppio pahole]$ readelf -s /usr/lib/debug/usr/lib64/libdw-0.141.so.debug | egrep 'FUNC.+GLOBAL.+dwfl_report_elf' 629: 00000000000105a0 261 FUNC GLOBAL DEFAULT 12 dwfl_report_elf [acme@doppio pahole]$ Now the numbers I got for a pahole perf run are in line with the numbers I get from oprofile. Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090630144317.GB12663@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 16 +++++++++++----- tools/perf/util/symbol.h | 3 ++- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9c659ef6aec..78c2efde01b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -520,7 +520,9 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - + self->prelinked = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL; elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; u64 obj_start; @@ -535,11 +537,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, gelf_getshdr(sec, &shdr); obj_start = sym.st_value; - if (verbose >= 2) - printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", - (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); + if (self->prelinked) { + if (verbose >= 2) + printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", + (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + } f = symbol__new(sym.st_value, sym.st_size, elf_sym__name(&sym, symstrs), @@ -573,6 +577,8 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) if (!name) return -1; + self->prelinked = 0; + if (strncmp(self->name, "/tmp/perf-", 10) == 0) return dso__load_perf_map(self, filter, verbose); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 940b432db16..2c48ace8203 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -20,8 +20,9 @@ struct symbol { struct dso { struct list_head node; struct rb_root syms; - unsigned int sym_priv_size; struct symbol *(*find_symbol)(struct dso *, u64 ip); + unsigned int sym_priv_size; + unsigned char prelinked; char name[0]; }; -- cgit v1.2.3 From 25903407da21552419e0955705d6d8c8e601cb2e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:20 -0300 Subject: perf report: Add --dsos parameter So that we can filter by dso. Symbols in other dsos won't be accounted for. Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-2-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 + tools/perf/Makefile | 2 + tools/perf/builtin-report.c | 16 +++ tools/perf/util/strlist.c | 184 +++++++++++++++++++++++++++++++ tools/perf/util/strlist.h | 32 ++++++ 5 files changed, 238 insertions(+) create mode 100644 tools/perf/util/strlist.c create mode 100644 tools/perf/util/strlist.h (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 40c1db83a16..13d85ca8c91 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -20,6 +20,10 @@ OPTIONS -i:: --input=:: Input file name. (default: perf.data) +-d:: +--dsos=:: + Only consider symbols in these dsos. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1c1296d8a64..9c6d0ae3708 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -301,6 +301,7 @@ LIB_H += util/util.h LIB_H += util/help.h LIB_H += util/strbuf.h LIB_H += util/string.h +LIB_H += util/strlist.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h @@ -322,6 +323,7 @@ LIB_OBJS += util/run-command.o LIB_OBJS += util/quote.o LIB_OBJS += util/strbuf.o LIB_OBJS += util/string.o +LIB_OBJS += util/strlist.o LIB_OBJS += util/usage.o LIB_OBJS += util/wrapper.o LIB_OBJS += util/sigchain.o diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ed391db9e0f..7c6b6e77671 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -16,6 +16,7 @@ #include "util/symbol.h" #include "util/string.h" #include "util/callchain.h" +#include "util/strlist.h" #include "perf.h" #include "util/header.h" @@ -32,6 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; +static char *dso_list_str; +static struct strlist *dso_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -1272,6 +1275,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (show & show_mask) { struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); + if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) + return 0; + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { eprintf("problem incrementing symbol count, skipping event\n"); return -1; @@ -1659,6 +1665,8 @@ static const struct option options[] = { OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), + OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", + "only consider symbols in these dsos"), OPT_END() }; @@ -1698,6 +1706,14 @@ int cmd_report(int argc, const char **argv, const char *prefix) if (argc) usage_with_options(report_usage, options); + if (dso_list_str) { + dso_list = strlist__new(true, dso_list_str); + if (!dso_list) { + fprintf(stderr, "problems parsing dso list\n"); + exit(129); + } + } + setup_pager(); return __cmd_report(); diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c new file mode 100644 index 00000000000..025a78edfff --- /dev/null +++ b/tools/perf/util/strlist.c @@ -0,0 +1,184 @@ +/* + * (c) 2009 Arnaldo Carvalho de Melo + * + * Licensed under the GPLv2. + */ + +#include "strlist.h" +#include +#include +#include +#include + +static struct str_node *str_node__new(const char *s, bool dupstr) +{ + struct str_node *self = malloc(sizeof(*self)); + + if (self != NULL) { + if (dupstr) { + s = strdup(s); + if (s == NULL) + goto out_delete; + } + self->s = s; + } + + return self; + +out_delete: + free(self); + return NULL; +} + +static void str_node__delete(struct str_node *self, bool dupstr) +{ + if (dupstr) + free((void *)self->s); + free(self); +} + +int strlist__add(struct strlist *self, const char *new_entry) +{ + struct rb_node **p = &self->entries.rb_node; + struct rb_node *parent = NULL; + struct str_node *sn; + + while (*p != NULL) { + int rc; + + parent = *p; + sn = rb_entry(parent, struct str_node, rb_node); + rc = strcmp(sn->s, new_entry); + + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else + return -EEXIST; + } + + sn = str_node__new(new_entry, self->dupstr); + if (sn == NULL) + return -ENOMEM; + + rb_link_node(&sn->rb_node, parent, p); + rb_insert_color(&sn->rb_node, &self->entries); + + return 0; +} + +int strlist__load(struct strlist *self, const char *filename) +{ + char entry[1024]; + int err; + FILE *fp = fopen(filename, "r"); + + if (fp == NULL) + return errno; + + while (fgets(entry, sizeof(entry), fp) != NULL) { + const size_t len = strlen(entry); + + if (len == 0) + continue; + entry[len - 1] = '\0'; + + err = strlist__add(self, entry); + if (err != 0) + goto out; + } + + err = 0; +out: + fclose(fp); + return err; +} + +void strlist__remove(struct strlist *self, struct str_node *sn) +{ + rb_erase(&sn->rb_node, &self->entries); + str_node__delete(sn, self->dupstr); +} + +bool strlist__has_entry(struct strlist *self, const char *entry) +{ + struct rb_node **p = &self->entries.rb_node; + struct rb_node *parent = NULL; + + while (*p != NULL) { + struct str_node *sn; + int rc; + + parent = *p; + sn = rb_entry(parent, struct str_node, rb_node); + rc = strcmp(sn->s, entry); + + if (rc > 0) + p = &(*p)->rb_left; + else if (rc < 0) + p = &(*p)->rb_right; + else + return true; + } + + return false; +} + +static int strlist__parse_list_entry(struct strlist *self, const char *s) +{ + if (strncmp(s, "file://", 7) == 0) + return strlist__load(self, s + 7); + + return strlist__add(self, s); +} + +int strlist__parse_list(struct strlist *self, const char *s) +{ + char *sep; + int err; + + while ((sep = strchr(s, ',')) != NULL) { + *sep = '\0'; + err = strlist__parse_list_entry(self, s); + *sep = ','; + if (err != 0) + return err; + s = sep + 1; + } + + return *s ? strlist__parse_list_entry(self, s) : 0; +} + +struct strlist *strlist__new(bool dupstr, const char *slist) +{ + struct strlist *self = malloc(sizeof(*self)); + + if (self != NULL) { + self->entries = RB_ROOT; + self->dupstr = dupstr; + if (slist && strlist__parse_list(self, slist) != 0) + goto out_error; + } + + return self; +out_error: + free(self); + return NULL; +} + +void strlist__delete(struct strlist *self) +{ + if (self != NULL) { + struct str_node *pos; + struct rb_node *next = rb_first(&self->entries); + + while (next) { + pos = rb_entry(next, struct str_node, rb_node); + next = rb_next(&pos->rb_node); + strlist__remove(self, pos); + } + self->entries = RB_ROOT; + free(self); + } +} diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h new file mode 100644 index 00000000000..2fb117fb4b6 --- /dev/null +++ b/tools/perf/util/strlist.h @@ -0,0 +1,32 @@ +#ifndef STRLIST_H_ +#define STRLIST_H_ + +#include "rbtree.h" +#include + +struct str_node { + struct rb_node rb_node; + const char *s; +}; + +struct strlist { + struct rb_root entries; + bool dupstr; +}; + +struct strlist *strlist__new(bool dupstr, const char *slist); +void strlist__delete(struct strlist *self); + +void strlist__remove(struct strlist *self, struct str_node *sn); +int strlist__load(struct strlist *self, const char *filename); +int strlist__add(struct strlist *self, const char *str); + +bool strlist__has_entry(struct strlist *self, const char *entry); + +static inline bool strlist__empty(const struct strlist *self) +{ + return rb_first(&self->entries) == NULL; +} + +int strlist__parse_list(struct strlist *self, const char *s); +#endif /* STRLIST_H_ */ -- cgit v1.2.3 From cc8b88b15ab8e5ae162a46c4b6b286b555190dd1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:21 -0300 Subject: perf report: Add --comms parameter So that we can filter by comm. Symbols in other comms won't be accounted for. Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-3-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 33 ++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 13d85ca8c91..4c44ef1747b 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -24,6 +24,10 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. +-C:: +--comms=:: + Only consider symbols in these comms. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7c6b6e77671..8143477b7ef 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,8 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; -static char *dso_list_str; -static struct strlist *dso_list; +static char *dso_list_str, *comm_list_str; +static struct strlist *dso_list, *comm_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -240,7 +240,7 @@ static u64 vdso__map_ip(struct map *map, u64 ip) static inline int is_anon_memory(const char *filename) { - return strcmp(filename, "//anon") == 0; + return strcmp(filename, "//anon") == 0; } static struct map *map__new(struct mmap_event *event) @@ -1253,6 +1253,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } + if (comm_list && !strlist__has_entry(comm_list, thread->comm)) + return 0; + if (event->header.misc & PERF_EVENT_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1667,6 +1670,8 @@ static const struct option options[] = { OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), + OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", + "only consider symbols in these comms"), OPT_END() }; @@ -1685,6 +1690,19 @@ static void setup_sorting(void) free(str); } +static void setup_list(struct strlist **list, const char *list_str, + const char *list_name) +{ + if (list_str) { + *list = strlist__new(true, list_str); + if (!*list) { + fprintf(stderr, "problems parsing %s list\n", + list_name); + exit(129); + } + } +} + int cmd_report(int argc, const char **argv, const char *prefix) { symbol__init(); @@ -1706,13 +1724,8 @@ int cmd_report(int argc, const char **argv, const char *prefix) if (argc) usage_with_options(report_usage, options); - if (dso_list_str) { - dso_list = strlist__new(true, dso_list_str); - if (!dso_list) { - fprintf(stderr, "problems parsing dso list\n"); - exit(129); - } - } + setup_list(&dso_list, dso_list_str, "dso"); + setup_list(&comm_list, comm_list_str, "comm"); setup_pager(); -- cgit v1.2.3 From 7bec7a9134c25cecb0d7029199b59f7b1bef35b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 30 Jun 2009 19:01:22 -0300 Subject: perf report: Add --symbols parameter So that we can filter by symbol name. The 'pfunct' utility in the 'dwarves' package can be used to create a file with the functions one wants. Example: [acme@doppio pahole]$ pfunct /usr/lib/debug/usr/lib64/libdw-0.141.so.debug | grep dwarf > /tmp/dwarf.symbols [acme@doppio pahole]$ wc -l /tmp/dwarf.symbols 93 /tmp/dwarf.symbols [acme@doppio pahole]$ head -3 /tmp/dwarf.symbols dwfl_addrdwarf dwfl_module_getdwarf dwfl_getdwarf [acme@doppio pahole]$ perf report --sort comm,dso,symbol --comms pahole --dsos /usr/lib64/libdw-0.141.so --symbols file:///tmp/dwarf.symbols 33.99% pahole /usr/lib64/libdw-0.141.so [.] dwarf_tag 29.07% pahole /usr/lib64/libdw-0.141.so [.] dwarf_decl_file 27.71% pahole /usr/lib64/libdw-0.141.so [.] dwarf_getsrclines 4.54% pahole /usr/lib64/libdw-0.141.so 0x00000000007400 3.93% pahole /usr/lib64/libdw-0.141.so [.] dwarf_decl_line 0.46% pahole /usr/lib64/libdw-0.141.so [.] dwarf_getlocation 0.18% pahole /usr/lib64/libdw-0.141.so [.] __libdwarf_next_prime 0.13% pahole /usr/lib64/libdw-0.141.so [.] dwarf_diecu [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1246399282-20934-4-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 10 ++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4c44ef1747b..8aa3f8c8870 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -28,6 +28,10 @@ OPTIONS --comms=:: Only consider symbols in these comms. CSV that understands file://filename entries. +-S:: +--symbols=:: + Only consider these symbols. CSV that understands + file://filename entries. SEE ALSO -------- diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8143477b7ef..135b7837e6b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -33,8 +33,8 @@ static char *vmlinux = NULL; static char default_sort_order[] = "comm,dso"; static char *sort_order = default_sort_order; -static char *dso_list_str, *comm_list_str; -static struct strlist *dso_list, *comm_list; +static char *dso_list_str, *comm_list_str, *sym_list_str; +static struct strlist *dso_list, *comm_list, *sym_list; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -1281,6 +1281,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) return 0; + if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) + return 0; + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { eprintf("problem incrementing symbol count, skipping event\n"); return -1; @@ -1672,6 +1675,8 @@ static const struct option options[] = { "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", "only consider symbols in these comms"), + OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]", + "only consider these symbols"), OPT_END() }; @@ -1726,6 +1731,7 @@ int cmd_report(int argc, const char **argv, const char *prefix) setup_list(&dso_list, dso_list_str, "dso"); setup_list(&comm_list, comm_list_str, "comm"); + setup_list(&sym_list, sym_list_str, "symbol"); setup_pager(); -- cgit v1.2.3 From 1f208ea67821703fd4de056ea6f0baa81f4ad4a5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:44 +1000 Subject: perf report: Fix -z option Fix a copy and paste error, -z was setting the group option. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.714204656@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index cf0d21f1ae1..5c2965562c5 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -675,7 +675,7 @@ static const struct option options[] = { "put the counters into a counter group"), OPT_STRING('s', "sym-filter", &sym_filter, "pattern", "only display symbols matchig this pattern"), - OPT_BOOLEAN('z', "zero", &group, + OPT_BOOLEAN('z', "zero", &zero, "zero history across updates"), OPT_INTEGER('F', "freq", &freq, "profile at this frequency"), -- cgit v1.2.3 From 6717534ddc328ae5cdf89f1ef802db83fc451f19 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:45 +1000 Subject: perf_counter tools: Remove zlib dependency The zlib devel libraries may not be installed and since we aren't using zlib we may as well remove it. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.802078956@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 6 ------ 1 file changed, 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9c6d0ae3708..f572c90f610 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -381,12 +381,6 @@ ifndef CC_LD_DYNPATH endif endif -ifdef ZLIB_PATH - BASIC_CFLAGS += -I$(ZLIB_PATH)/include - EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) -endif -EXTLIBS += -lz - ifdef NEEDS_SOCKET EXTLIBS += -lsocket endif -- cgit v1.2.3 From 2ab52083ffc057014e502cf3473adc41436922fa Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:46 +1000 Subject: perf top: Move skip symbols to an array Move the list of symbols we skip into an array, making it easier to add new ones. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.904782938@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5c2965562c5..731ec6d79c1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -286,11 +286,22 @@ static void *display_thread(void *arg) return NULL; } +/* Tag samples to be skipped. */ +char *skip_symbols[] = { + "default_idle", + "cpu_idle", + "enter_idle", + "exit_idle", + "mwait_idle", + NULL +}; + static int symbol_filter(struct dso *self, struct symbol *sym) { static int filter_match; struct sym_entry *syme; const char *name = sym->name; + int i; if (!strcmp(name, "_text") || !strcmp(name, "_etext") || @@ -302,13 +313,12 @@ static int symbol_filter(struct dso *self, struct symbol *sym) return 1; syme = dso__sym_priv(self, sym); - /* Tag samples to be skipped. */ - if (!strcmp("default_idle", name) || - !strcmp("cpu_idle", name) || - !strcmp("enter_idle", name) || - !strcmp("exit_idle", name) || - !strcmp("mwait_idle", name)) - syme->skip = 1; + for (i = 0; skip_symbols[i]; i++) { + if (!strcmp(skip_symbols[i], name)) { + syme->skip = 1; + break; + } + } if (filter_match == 1) { filter_end = sym->start; -- cgit v1.2.3 From 3a3393ef75a14ae259a82f3f38624efa17884168 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:47 +1000 Subject: perf top: Add ppc64 specific skip symbols and strip ppc64 . prefix Filter out some ppc64 specific idle loop functions and remove leading '.' on ppc64 text symbols. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230140.995643441@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-top.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 731ec6d79c1..0506cd6e04c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -293,6 +293,8 @@ char *skip_symbols[] = { "enter_idle", "exit_idle", "mwait_idle", + "ppc64_runlatch_off", + "pseries_dedicated_idle_sleep", NULL }; @@ -303,6 +305,13 @@ static int symbol_filter(struct dso *self, struct symbol *sym) const char *name = sym->name; int i; + /* + * ppc64 uses function descriptors and appends a '.' to the + * start of every instruction address. Remove it. + */ + if (name[0] == '.') + name++; + if (!strcmp(name, "_text") || !strcmp(name, "_etext") || !strcmp(name, "_sinittext") || -- cgit v1.2.3 From d8db1b57d31a6b30ea2f0df318eab50fc92b38d6 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:48 +1000 Subject: perf report: Fix reporting of hypervisor PERF_EVENT_MISC_* is not a bitmask, so we have to mask and compare. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230141.088394681@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 135b7837e6b..88e88c510ae 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1213,6 +1213,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) struct map *map = NULL; void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; + int cpumode; if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; @@ -1256,7 +1257,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (comm_list && !strlist__has_entry(comm_list, thread->comm)) return 0; - if (event->header.misc & PERF_EVENT_MISC_KERNEL) { + cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + + if (cpumode == PERF_EVENT_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1264,7 +1267,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dprintf(" ...... dso: %s\n", dso->name); - } else if (event->header.misc & PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_EVENT_MISC_USER) { show = SHOW_USER; level = '.'; -- cgit v1.2.3 From fb9c818873a788c5c01c9868cc6050df96e2c7df Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 1 Jul 2009 09:00:49 +1000 Subject: perf report: Add hypervisor dso Add a dso for hypervisor samples. We don't get any symbol information on the ppc64 hypervisor but this at least gives us a high level summary of the time spent in there. Signed-off-by: Anton Blanchard Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org LKML-Reference: <20090630230141.182536873@samba.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 88e88c510ae..3f5d8ea05ff 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -121,6 +121,7 @@ typedef union event_union { static LIST_HEAD(dsos); static struct dso *kernel_dso; static struct dso *vdso; +static struct dso *hypervisor_dso; static void dsos__add(struct dso *dso) { @@ -202,6 +203,11 @@ static int load_kernel(void) dsos__add(vdso); + hypervisor_dso = dso__new("[hypervisor]", 0); + if (!hypervisor_dso) + return -1; + dsos__add(hypervisor_dso); + return err; } @@ -640,7 +646,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self) if (self->sym) { ret += fprintf(fp, "[%c] %s", - self->dso == kernel_dso ? 'k' : '.', self->sym->name); + self->dso == kernel_dso ? 'k' : + self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); } else { ret += fprintf(fp, "%#016llx", (u64)self->ip); } @@ -963,6 +970,9 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, } switch (context) { + case PERF_CONTEXT_HV: + dso = hypervisor_dso; + break; case PERF_CONTEXT_KERNEL: dso = kernel_dso; break; @@ -1275,6 +1285,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } else { show = SHOW_HV; level = 'H'; + + dso = hypervisor_dso; + dprintf(" ...... dso: [hypervisor]\n"); } -- cgit v1.2.3 From 9198aa77b69647d1d91207f8075763abe7dc0bf4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:13 +0200 Subject: perf_counter tools: Fix storage size allocation of callchain list Fix a confusion while giving the size of a callchain list during its allocation. We are using the wrong structure size. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index ad3c2857896..bbf7813fefe 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -74,7 +74,7 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) for (i = start; i < chain->nr; i++) { struct callchain_list *call; - call = malloc(sizeof(*chain)); + call = malloc(sizeof(*call)); if (!call) { perror("not enough memory for the code path tree"); return; -- cgit v1.2.3 From 4424961ad6621a02c6b4c9093e801002c1bb9f65 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:14 +0200 Subject: perf_counter tools: Resolve symbols in callchains This patch resolves the names, when possible, of each ip present in the callchains while using the -c option with perf report. Example: 5.40% [k] __d_lookup 5.37% perf_callchain perf_counter_overflow intel_pmu_handle_irq perf_counter_nmi_handler notifier_call_chain atomic_notifier_call_chain notify_die do_nmi nmi do_lookup __link_path_walk path_walk do_path_lookup user_path_at sys_faccessat sys_access system_call_fastpath 0x7fb609846f77 0.01% perf_callchain perf_counter_overflow intel_pmu_handle_irq perf_counter_nmi_handler notifier_call_chain atomic_notifier_call_chain notify_die do_nmi nmi do_lookup __link_path_walk path_walk do_path_lookup user_path_at sys_faccessat Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 102 +++++++++++++++++++++++++++++--------------- tools/perf/util/callchain.c | 33 ++++++++------ tools/perf/util/callchain.h | 5 ++- 3 files changed, 90 insertions(+), 50 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3f5d8ea05ff..197793051fa 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -794,8 +794,15 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) ret += callchain__fprintf(fp, self->parent, total_samples); - list_for_each_entry(chain, &self->val, list) - ret += fprintf(fp, " %p\n", (void *)chain->ip); + list_for_each_entry(chain, &self->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->sym) + ret += fprintf(fp, " %s\n", chain->sym->name); + else + ret += fprintf(fp, " %p\n", + (void *)chain->ip); + } return ret; } @@ -930,6 +937,55 @@ static int call__match(struct symbol *sym) return 0; } +static struct symbol ** +resolve_callchain(struct thread *thread, struct map *map, + struct ip_callchain *chain, struct hist_entry *entry) +{ + int i; + struct symbol **syms; + u64 context = PERF_CONTEXT_MAX; + + if (callchain) { + syms = calloc(chain->nr, sizeof(*syms)); + if (!syms) { + fprintf(stderr, "Can't allocate memory for symbols\n"); + exit(-1); + } + } + + for (i = 0; i < chain->nr; i++) { + u64 ip = chain->ips[i]; + struct dso *dso = NULL; + struct symbol *sym; + + if (ip >= PERF_CONTEXT_MAX) { + context = ip; + continue; + } + + switch (context) { + case PERF_CONTEXT_KERNEL: + dso = kernel_dso; + break; + default: + break; + } + + sym = resolve_symbol(thread, NULL, &dso, &ip); + + if (sym) { + if (sort__has_parent && call__match(sym) && + !entry->parent) + entry->parent = sym; + if (!callchain) + break; + syms[i] = sym; + } + } + + return syms; +} + /* * collect histogram counts */ @@ -942,6 +998,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; struct hist_entry *he; + struct symbol **syms = NULL; struct hist_entry entry = { .thread = thread, .map = map, @@ -955,39 +1012,11 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; - if (sort__has_parent && chain) { - u64 context = PERF_CONTEXT_MAX; - int i; - - for (i = 0; i < chain->nr; i++) { - u64 ip = chain->ips[i]; - struct dso *dso = NULL; - struct symbol *sym; - - if (ip >= PERF_CONTEXT_MAX) { - context = ip; - continue; - } - - switch (context) { case PERF_CONTEXT_HV: dso = hypervisor_dso; break; - case PERF_CONTEXT_KERNEL: - dso = kernel_dso; - break; - default: - break; - } - - sym = resolve_symbol(thread, NULL, &dso, &ip); - - if (sym && call__match(sym)) { - entry.parent = sym; - break; - } - } - } + if ((sort__has_parent || callchain) && chain) + syms = resolve_callchain(thread, map, chain, &entry); while (*p != NULL) { parent = *p; @@ -997,8 +1026,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, if (!cmp) { he->count += count; - if (callchain) - append_chain(&he->callchain, chain); + if (callchain) { + append_chain(&he->callchain, chain, syms); + free(syms); + } return 0; } @@ -1014,7 +1045,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, *he = entry; if (callchain) { callchain_init(&he->callchain); - append_chain(&he->callchain, chain); + append_chain(&he->callchain, chain, syms); + free(syms); } rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, &hist); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index bbf7813fefe..6568cb198ba 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -67,7 +67,8 @@ static struct callchain_node *create_child(struct callchain_node *parent) } static void -fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) +fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, + struct symbol **syms) { int i; @@ -80,24 +81,26 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start) return; } call->ip = chain->ips[i]; + call->sym = syms[i]; list_add_tail(&call->list, &node->val); } node->val_nr = i - start; } -static void add_child(struct callchain_node *parent, struct ip_callchain *chain) +static void add_child(struct callchain_node *parent, struct ip_callchain *chain, + struct symbol **syms) { struct callchain_node *new; new = create_child(parent); - fill_node(new, chain, parent->val_nr); + fill_node(new, chain, parent->val_nr, syms); new->hit = 1; } static void split_add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct callchain_list *to_split, int idx) + struct callchain_list *to_split, int idx, struct symbol **syms) { struct callchain_node *new; @@ -109,21 +112,22 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, parent->val_nr = idx; /* create the new one */ - add_child(parent, chain); + add_child(parent, chain, syms); } static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start); + int start, struct symbol **syms); static int -__append_chain_children(struct callchain_node *root, struct ip_callchain *chain) +__append_chain_children(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, root->val_nr); + int ret = __append_chain(rnode, chain, root->val_nr, syms); if (!ret) return 0; } @@ -132,7 +136,7 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain) static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start) + int start, struct symbol **syms) { struct callchain_list *cnode; int i = start; @@ -154,7 +158,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match only a part of the node. Split it and add the new chain */ if (i < root->val_nr) { - split_add_child(root, chain, cnode, i); + split_add_child(root, chain, cnode, i, syms); return 0; } @@ -164,11 +168,12 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, return 0; } - return __append_chain_children(root, chain); + return __append_chain_children(root, chain, syms); } -void append_chain(struct callchain_node *root, struct ip_callchain *chain) +void append_chain(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms) { - if (__append_chain_children(root, chain) == -1) - add_child(root, chain); + if (__append_chain_children(root, chain, syms) == -1) + add_child(root, chain, syms); } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index fa1cd2f71fd..c942daa712e 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -4,6 +4,7 @@ #include "../perf.h" #include "list.h" #include "rbtree.h" +#include "symbol.h" struct callchain_node { @@ -18,6 +19,7 @@ struct callchain_node { struct callchain_list { unsigned long ip; + struct symbol *sym; struct list_head list; }; @@ -28,6 +30,7 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->val); } -void append_chain(struct callchain_node *root, struct ip_callchain *chain); +void append_chain(struct callchain_node *root, struct ip_callchain *chain, + struct symbol **syms); void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); #endif -- cgit v1.2.3 From deac911cbdcb124fa0cee47c588e0cb0400b23b7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 05:35:15 +0200 Subject: perf_counter tools: Various fixes for callchains The symbol resolving has of course revealed some bugs in the callchain tree handling. This patch fixes some of them, including: - inherit the children from the parents while splitting a node - fix list range moving - fix indexes setting in callchains - create a child on the current node if the path doesn't match in the existent children (was only done on the root) - compare using symbols when possible so that we can match a function using any ip inside by referring to its start address. The practical effects are: - remove double callchains - fix upside down or any random order of callchains - fix wrong paths - fix bad hits and percentage accounts Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246419315-9968-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 122 ++++++++++++++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 32 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 6568cb198ba..440db12c635 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -4,6 +4,9 @@ * Handle the callchains from the stream in an ad-hoc radix tree and then * sort them in an rbtree. * + * Using a radix for code path provides a fast retrieval and factorizes + * memory use. Also that lets us use the paths in a hierarchical graph view. + * */ #include @@ -14,7 +17,8 @@ #include "callchain.h" -static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) +static void +rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -49,7 +53,12 @@ void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) rb_insert_callchain(rb_root, node); } -static struct callchain_node *create_child(struct callchain_node *parent) +/* + * Create a child for a parent. If inherit_children, then the new child + * will become the new parent of it's parent children + */ +static struct callchain_node * +create_child(struct callchain_node *parent, bool inherit_children) { struct callchain_node *new; @@ -61,14 +70,27 @@ static struct callchain_node *create_child(struct callchain_node *parent) new->parent = parent; INIT_LIST_HEAD(&new->children); INIT_LIST_HEAD(&new->val); + + if (inherit_children) { + struct callchain_node *next; + + list_splice(&parent->children, &new->children); + INIT_LIST_HEAD(&parent->children); + + list_for_each_entry(next, &new->children, brothers) + next->parent = new; + } list_add_tail(&new->brothers, &parent->children); return new; } +/* + * Fill the node with callchain values + */ static void -fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, - struct symbol **syms) +fill_node(struct callchain_node *node, struct ip_callchain *chain, + int start, struct symbol **syms) { int i; @@ -84,54 +106,80 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, call->sym = syms[i]; list_add_tail(&call->list, &node->val); } - node->val_nr = i - start; + node->val_nr = chain->nr - start; + if (!node->val_nr) + printf("Warning: empty node in callchain tree\n"); } -static void add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct symbol **syms) +static void +add_child(struct callchain_node *parent, struct ip_callchain *chain, + int start, struct symbol **syms) { struct callchain_node *new; - new = create_child(parent); - fill_node(new, chain, parent->val_nr, syms); + new = create_child(parent, false); + fill_node(new, chain, start, syms); new->hit = 1; } +/* + * Split the parent in two parts (a new child is created) and + * give a part of its callchain to the created child. + * Then create another child to host the given callchain of new branch + */ static void split_add_child(struct callchain_node *parent, struct ip_callchain *chain, - struct callchain_list *to_split, int idx, struct symbol **syms) + struct callchain_list *to_split, int idx_parents, int idx_local, + struct symbol **syms) { struct callchain_node *new; + struct list_head *old_tail; + int idx_total = idx_parents + idx_local; /* split */ - new = create_child(parent); - list_move_tail(&to_split->list, &new->val); - new->hit = parent->hit; - parent->hit = 0; - parent->val_nr = idx; + new = create_child(parent, true); + + /* split the callchain and move a part to the new child */ + old_tail = parent->val.prev; + list_del_range(&to_split->list, old_tail); + new->val.next = &to_split->list; + new->val.prev = old_tail; + to_split->list.prev = &new->val; + old_tail->next = &new->val; - /* create the new one */ - add_child(parent, chain, syms); + /* split the hits */ + new->hit = parent->hit; + new->val_nr = parent->val_nr - idx_local; + parent->val_nr = idx_local; + + /* create a new child for the new branch if any */ + if (idx_total < chain->nr) { + parent->hit = 0; + add_child(parent, chain, idx_total, syms); + } else { + parent->hit = 1; + } } static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, int start, struct symbol **syms); -static int +static void __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, - struct symbol **syms) + struct symbol **syms, int start) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, root->val_nr, syms); + int ret = __append_chain(rnode, chain, start, syms); if (!ret) - return 0; + return; } - return -1; + /* nothing in children, add to the current node */ + add_child(root, chain, start, syms); } static int @@ -142,14 +190,22 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, int i = start; bool found = false; - /* lookup in the current node */ + /* + * Lookup in the current node + * If we have a symbol, then compare the start to match + * anywhere inside a function. + */ list_for_each_entry(cnode, &root->val, list) { - if (cnode->ip != chain->ips[i++]) + if (i == chain->nr) + break; + if (cnode->sym && syms[i]) { + if (cnode->sym->start != syms[i]->start) + break; + } else if (cnode->ip != chain->ips[i]) break; if (!found) found = true; - if (i == chain->nr) - break; + i++; } /* matches not, relay on the parent */ @@ -157,23 +213,25 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, return -1; /* we match only a part of the node. Split it and add the new chain */ - if (i < root->val_nr) { - split_add_child(root, chain, cnode, i, syms); + if (i - start < root->val_nr) { + split_add_child(root, chain, cnode, start, i - start, syms); return 0; } /* we match 100% of the path, increment the hit */ - if (i == root->val_nr) { + if (i - start == root->val_nr && i == chain->nr) { root->hit++; return 0; } - return __append_chain_children(root, chain, syms); + /* We match the node and still have a part remaining */ + __append_chain_children(root, chain, syms, i); + + return 0; } void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms) { - if (__append_chain_children(root, chain, syms) == -1) - add_child(root, chain, syms); + __append_chain_children(root, chain, syms, 0); } -- cgit v1.2.3 From 61c45981ddbd718136d49464f00d2f11938aaa6e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 1 Jul 2009 13:04:34 +1000 Subject: perf_counter tools: Rework event string parsing/syntax This reworks the parser for event descriptors to make it more consistent in what it accepts. It is now structured as a recursive descent parser for the following grammar: events ::= event ( ("," | space) space* event )* event ::= ( raw_event | numeric_event | symbolic_event | generic_hw_event ) [ event_modifier ] raw_event ::= "r" hex_number numeric_event ::= number ":" number number ::= decimal_number | "0x" hex_number | "0" octal_number symbolic_event ::= string_from_event_symbols_array generic_hw_event::= cache_type ( "-" ( cache_op | cache_result ) )* event_modifier ::= ":" ( "u" | "k" | "h" )+ with the extra restriction that you can have at most one cache_op and at most one cache_result. We pass the current string pointer by reference (i.e. as a const char **) to the various parsing functions so that they can advance the pointer to indicate how much they consumed. They return 0 if they didn't recognize the thing at the pointer or 1 if they did (and advance the pointer past it). This also fixes parse_aliases to take the longest matching alias from the table, not the first one. Otherwise "l1-data" would match the "l1-d" alias and the "ata" would not be consumed. This allows event modifiers indicating what processor modes to count in to be applied to any event, not just numeric events, and adds a ":h" modifier to indicate counting in hypervisor mode. Specifying ":u" now sets both exclude_kernel and exclude_hv, and so on. Multiple modes can be specified, e.g. ":uk" will count in user or hypervisor mode (i.e. only exclude_kernel will be set). Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <19018.53826.843815.189847@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 232 ++++++++++++++++++++++++++++------------- 1 file changed, 160 insertions(+), 72 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4d042f104cd..e6b83a3311a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -184,16 +184,20 @@ char *event_name(int counter) return "unknown"; } -static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) +static int parse_aliases(const char **str, char *names[][MAX_ALIASES], int size) { int i, j; + int n, longest = -1; for (i = 0; i < size; i++) { - for (j = 0; j < MAX_ALIASES; j++) { - if (!names[i][j]) - break; - if (strcasestr(str, names[i][j])) - return i; + for (j = 0; j < MAX_ALIASES && names[i][j]; j++) { + n = strlen(names[i][j]); + if (n > longest && !strncasecmp(*str, names[i][j], n)) + longest = n; + } + if (longest > 0) { + *str += longest; + return i; } } @@ -201,30 +205,53 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) } static int -parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) +parse_generic_hw_event(const char **str, struct perf_counter_attr *attr) { - int cache_type = -1, cache_op = 0, cache_result = 0; + const char *s = *str; + int cache_type = -1, cache_op = -1, cache_result = -1; - cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX); + cache_type = parse_aliases(&s, hw_cache, PERF_COUNT_HW_CACHE_MAX); /* * No fallback - if we cannot get a clear cache type * then bail out: */ if (cache_type == -1) - return -EINVAL; + return 0; + + while ((cache_op == -1 || cache_result == -1) && *s == '-') { + ++s; + + if (cache_op == -1) { + cache_op = parse_aliases(&s, hw_cache_op, + PERF_COUNT_HW_CACHE_OP_MAX); + if (cache_op >= 0) { + if (!is_cache_op_valid(cache_type, cache_op)) + return 0; + continue; + } + } + + if (cache_result == -1) { + cache_result = parse_aliases(&s, hw_cache_result, + PERF_COUNT_HW_CACHE_RESULT_MAX); + if (cache_result >= 0) + continue; + } + + /* + * Can't parse this as a cache op or result, so back up + * to the '-'. + */ + --s; + break; + } - cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); /* * Fall back to reads: */ if (cache_op == -1) cache_op = PERF_COUNT_HW_CACHE_OP_READ; - if (!is_cache_op_valid(cache_type, cache_op)) - return -EINVAL; - - cache_result = parse_aliases(str, hw_cache_result, - PERF_COUNT_HW_CACHE_RESULT_MAX); /* * Fall back to accesses: */ @@ -234,93 +261,154 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) attr->config = cache_type | (cache_op << 8) | (cache_result << 16); attr->type = PERF_TYPE_HW_CACHE; - return 0; + *str = s; + return 1; } static int check_events(const char *str, unsigned int i) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return 1; + int n; - if (strlen(event_symbols[i].alias)) - if (!strncmp(str, event_symbols[i].alias, - strlen(event_symbols[i].alias))) - return 1; + n = strlen(event_symbols[i].symbol); + if (!strncmp(str, event_symbols[i].symbol, n)) + return n; + + n = strlen(event_symbols[i].alias); + if (n) + if (!strncmp(str, event_symbols[i].alias, n)) + return n; return 0; } -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) +static int +parse_symbolic_event(const char **strp, struct perf_counter_attr *attr) { - u64 config, id; - int type; + const char *str = *strp; unsigned int i; - const char *sep, *pstr; + int n; - if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) { - attr->type = PERF_TYPE_RAW; - attr->config = config; + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + n = check_events(str, i); + if (n > 0) { + attr->type = event_symbols[i].type; + attr->config = event_symbols[i].config; + *strp = str + n; + return 1; + } + } + return 0; +} + +static int parse_raw_event(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + u64 config; + int n; + if (*str != 'r') return 0; + n = hex2u64(str + 1, &config); + if (n > 0) { + *strp = str + n + 1; + attr->type = PERF_TYPE_RAW; + attr->config = config; + return 1; } + return 0; +} - pstr = str; - sep = strchr(pstr, ':'); - if (sep) { - type = atoi(pstr); - pstr = sep + 1; - id = atoi(pstr); - sep = strchr(pstr, ':'); - if (sep) { - pstr = sep + 1; - if (strchr(pstr, 'k')) - attr->exclude_user = 1; - if (strchr(pstr, 'u')) - attr->exclude_kernel = 1; +static int +parse_numeric_event(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + char *endp; + unsigned long type; + u64 config; + + type = strtoul(str, &endp, 0); + if (endp > str && type < PERF_TYPE_MAX && *endp == ':') { + str = endp + 1; + config = strtoul(str, &endp, 0); + if (endp > str) { + attr->type = type; + attr->config = config; + *strp = endp; + return 1; } - attr->type = type; - attr->config = id; + } + return 0; +} + +static int +parse_event_modifier(const char **strp, struct perf_counter_attr *attr) +{ + const char *str = *strp; + int eu = 1, ek = 1, eh = 1; + if (*str++ != ':') return 0; + while (*str) { + if (*str == 'u') + eu = 0; + else if (*str == 'k') + ek = 0; + else if (*str == 'h') + eh = 0; + else + break; + ++str; } + if (str >= *strp + 2) { + *strp = str; + attr->exclude_user = eu; + attr->exclude_kernel = ek; + attr->exclude_hv = eh; + return 1; + } + return 0; +} - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (check_events(str, i)) { - attr->type = event_symbols[i].type; - attr->config = event_symbols[i].config; +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) +{ + if (!(parse_raw_event(str, attr) || + parse_numeric_event(str, attr) || + parse_symbolic_event(str, attr) || + parse_generic_hw_event(str, attr))) + return 0; - return 0; - } - } + parse_event_modifier(str, attr); - return parse_generic_hw_symbols(str, attr); + return 1; } int parse_events(const struct option *opt, const char *str, int unset) { struct perf_counter_attr attr; - int ret; - memset(&attr, 0, sizeof(attr)); -again: - if (nr_counters == MAX_COUNTERS) - return -1; + for (;;) { + if (nr_counters == MAX_COUNTERS) + return -1; + + memset(&attr, 0, sizeof(attr)); + if (!parse_event_symbols(&str, &attr)) + return -1; - ret = parse_event_symbols(str, &attr); - if (ret < 0) - return ret; + if (!(*str == 0 || *str == ',' || isspace(*str))) + return -1; - attrs[nr_counters] = attr; - nr_counters++; + attrs[nr_counters] = attr; + nr_counters++; - str = strstr(str, ","); - if (str) { - str++; - goto again; + if (*str == 0) + break; + if (*str == ',') + ++str; + while (isspace(*str)) + ++str; } return 0; -- cgit v1.2.3 From 88a69dfbc6ab1e3b51bba8c9103055e21089ebb9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2009 11:17:20 +0200 Subject: perf report: Fix HV bit mismerge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: builtin-report.c: In function ‘hist_entry__add’: builtin-report.c:1015: error: case label not within a switch statement builtin-report.c:1017: error: break statement not within loop or switch Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 197793051fa..7d2b49adcdc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -964,6 +964,9 @@ resolve_callchain(struct thread *thread, struct map *map, } switch (context) { + case PERF_CONTEXT_HV: + dso = hypervisor_dso; + break; case PERF_CONTEXT_KERNEL: dso = kernel_dso; break; @@ -1012,9 +1015,6 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; - case PERF_CONTEXT_HV: - dso = hypervisor_dso; - break; if ((sort__has_parent || callchain) && chain) syms = resolve_callchain(thread, map, chain, &entry); -- cgit v1.2.3 From f37a291c527c954df4da568de718ebb36b8261c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Jul 2009 12:37:06 +0200 Subject: perf_counter tools: Add more warnings and fix/annotate them Enable -Wextra. This found a few real bugs plus a number of signed/unsigned type mismatches/uncleanlinesses. It also required a few annotations All things considered it was still worth it so lets try with this enabled for now. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 +- tools/perf/builtin-annotate.c | 12 +++++------ tools/perf/builtin-help.c | 6 ++++-- tools/perf/builtin-list.c | 2 +- tools/perf/builtin-record.c | 4 ++-- tools/perf/builtin-report.c | 22 ++++++++++---------- tools/perf/builtin-stat.c | 18 +++++++++------- tools/perf/builtin-top.c | 8 +++---- tools/perf/perf.c | 5 +---- tools/perf/perf.h | 2 ++ tools/perf/util/alias.c | 2 +- tools/perf/util/cache.h | 1 + tools/perf/util/callchain.c | 15 +++++++------- tools/perf/util/callchain.h | 10 ++++----- tools/perf/util/color.c | 10 ++++++--- tools/perf/util/config.c | 18 +++++++++------- tools/perf/util/exec_cmd.c | 5 ++++- tools/perf/util/help.c | 26 +++++++++++++---------- tools/perf/util/help.h | 6 +++--- tools/perf/util/parse-events.c | 2 +- tools/perf/util/parse-options.c | 2 +- tools/perf/util/parse-options.h | 25 +++++++++++----------- tools/perf/util/quote.c | 46 ++++++++++++++++++++++------------------- tools/perf/util/quote.h | 2 +- tools/perf/util/strbuf.c | 13 ++++++------ tools/perf/util/strbuf.h | 10 ++++----- tools/perf/util/wrapper.c | 5 +++-- 27 files changed, 151 insertions(+), 128 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f572c90f610..eddf076b19d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -164,7 +164,7 @@ endif # CFLAGS and LDFLAGS are for the users to override from the command line. -CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 +CFLAGS = $(M64) -ggdb3 -Wall -Wextra -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 722c0f54e54..6cba70daf12 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -160,7 +160,7 @@ static void dsos__fprintf(FILE *fp) static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { - return dso__find_symbol(kernel_dso, ip); + return dso__find_symbol(dso, ip); } static int load_kernel(void) @@ -203,7 +203,7 @@ static u64 map__map_ip(struct map *map, u64 ip) return ip - map->start + map->pgoff; } -static u64 vdso__map_ip(struct map *map, u64 ip) +static u64 vdso__map_ip(struct map *map __used, u64 ip) { return ip; } @@ -600,7 +600,7 @@ static LIST_HEAD(hist_entry__sort_list); static int sort_dimension__add(char *tok) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { struct sort_dimension *sd = &sort_dimensions[i]; @@ -1069,7 +1069,7 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) static const char *prev_color; unsigned int offset; size_t line_len; - u64 line_ip; + s64 line_ip; int ret; char *c; @@ -1428,7 +1428,7 @@ more: head += size; - if (offset + head < stat.st_size) + if (offset + head < (unsigned long)stat.st_size) goto more; rc = EXIT_SUCCESS; @@ -1492,7 +1492,7 @@ static void setup_sorting(void) free(str); } -int cmd_annotate(int argc, const char **argv, const char *prefix) +int cmd_annotate(int argc, const char **argv, const char *prefix __used) { symbol__init(); diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 0f32dc3f3c4..2599d86a733 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -3,6 +3,7 @@ * * Builtin help command */ +#include "perf.h" #include "util/cache.h" #include "builtin.h" #include "util/exec_cmd.h" @@ -277,7 +278,7 @@ static struct cmdnames main_cmds, other_cmds; void list_common_cmds_help(void) { - int i, longest = 0; + unsigned int i, longest = 0; for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { if (longest < strlen(common_cmds[i].name)) @@ -415,9 +416,10 @@ static void show_html_page(const char *perf_cmd) open_html(page_path.buf); } -int cmd_help(int argc, const char **argv, const char *prefix) +int cmd_help(int argc, const char **argv, const char *prefix __used) { const char *alias; + load_command_list("perf-", &main_cmds, &other_cmds); perf_config(perf_help_config, NULL); diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index fe60e37c96e..f990fa8a35c 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -13,7 +13,7 @@ #include "util/parse-options.h" #include "util/parse-events.h" -int cmd_list(int argc, const char **argv, const char *prefix) +int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) { print_events(); return 0; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index d18546f37d7..4ef78a5e6f3 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -294,7 +294,7 @@ static void pid_synthesize_mmap_samples(pid_t pid) while (1) { char bf[BUFSIZ], *pbf = bf; struct mmap_event mmap_ev = { - .header.type = PERF_EVENT_MMAP, + .header = { .type = PERF_EVENT_MMAP }, }; int n; size_t size; @@ -650,7 +650,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_record(int argc, const char **argv, const char *prefix) +int cmd_record(int argc, const char **argv, const char *prefix __used) { int counter; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7d2b49adcdc..007363db3b1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -177,7 +177,7 @@ static void dsos__fprintf(FILE *fp) static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { - return dso__find_symbol(kernel_dso, ip); + return dso__find_symbol(dso, ip); } static int load_kernel(void) @@ -239,7 +239,7 @@ static u64 map__map_ip(struct map *map, u64 ip) return ip - map->start + map->pgoff; } -static u64 vdso__map_ip(struct map *map, u64 ip) +static u64 vdso__map_ip(struct map *map __used, u64 ip) { return ip; } @@ -712,7 +712,7 @@ static LIST_HEAD(hist_entry__sort_list); static int sort_dimension__add(char *tok) { - int i; + unsigned int i; for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { struct sort_dimension *sd = &sort_dimensions[i]; @@ -801,7 +801,7 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) ret += fprintf(fp, " %s\n", chain->sym->name); else ret += fprintf(fp, " %p\n", - (void *)chain->ip); + (void *)(long)chain->ip); } return ret; @@ -938,12 +938,12 @@ static int call__match(struct symbol *sym) } static struct symbol ** -resolve_callchain(struct thread *thread, struct map *map, +resolve_callchain(struct thread *thread, struct map *map __used, struct ip_callchain *chain, struct hist_entry *entry) { - int i; - struct symbol **syms; u64 context = PERF_CONTEXT_MAX; + struct symbol **syms; + unsigned int i; if (callchain) { syms = calloc(chain->nr, sizeof(*syms)); @@ -1183,7 +1183,7 @@ static size_t output__fprintf(FILE *fp, u64 total_samples) fprintf(fp, "# ........"); list_for_each_entry(se, &hist_entry__sort_list, list) { - int i; + unsigned int i; if (exclude_other && (se == &sort_parent)) continue; @@ -1271,7 +1271,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) (long long)period); if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int i; + unsigned int i; chain = (void *)more_data; @@ -1667,7 +1667,7 @@ more: if (offset + head >= header->data_offset + header->data_size) goto done; - if (offset + head < stat.st_size) + if (offset + head < (unsigned long)stat.st_size) goto more; done: @@ -1756,7 +1756,7 @@ static void setup_list(struct strlist **list, const char *list_str, } } -int cmd_report(int argc, const char **argv, const char *prefix) +int cmd_report(int argc, const char **argv, const char *prefix __used) { symbol__init(); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2e03524a1de..095a90e012a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -64,7 +64,7 @@ static struct perf_counter_attr default_attrs[] = { static int system_wide = 0; static int verbose = 0; -static int nr_cpus = 0; +static unsigned int nr_cpus = 0; static int run_idx = 0; static int run_count = 1; @@ -108,7 +108,8 @@ static void create_perf_stat_counter(int counter, int pid) PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) { - int cpu; + unsigned int cpu; + for (cpu = 0; cpu < nr_cpus; cpu++) { fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); if (fd[cpu][counter] < 0 && verbose) @@ -150,8 +151,8 @@ static inline int nsec_counter(int counter) static void read_counter(int counter) { u64 *count, single_count[3]; - ssize_t res; - int cpu, nv; + unsigned int cpu; + size_t res, nv; int scaled; count = event_res[run_idx][counter]; @@ -165,6 +166,7 @@ static void read_counter(int counter) res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); assert(res == nv * sizeof(u64)); + close(fd[cpu][counter]); fd[cpu][counter] = -1; @@ -200,7 +202,7 @@ static void read_counter(int counter) runtime_cycles[run_idx] = count[0]; } -static int run_perf_stat(int argc, const char **argv) +static int run_perf_stat(int argc __used, const char **argv) { unsigned long long t0, t1; int status = 0; @@ -390,7 +392,7 @@ static void calc_avg(void) event_res_avg[j]+1, event_res[i][j]+1); update_avg("counter/2", j, event_res_avg[j]+2, event_res[i][j]+2); - if (event_scaled[i][j] != -1) + if (event_scaled[i][j] != (u64)-1) update_avg("scaled", j, event_scaled_avg + j, event_scaled[i]+j); else @@ -510,7 +512,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_stat(int argc, const char **argv, const char *prefix) +int cmd_stat(int argc, const char **argv, const char *prefix __used) { int status; @@ -528,7 +530,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); + assert((int)nr_cpus >= 0); /* * We dont want to block the signals - that would cause diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0506cd6e04c..5f5e7df8302 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -269,7 +269,7 @@ static void print_sym_table(void) } } -static void *display_thread(void *arg) +static void *display_thread(void *arg __used) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; int delay_msecs = delay_secs * 1000; @@ -287,7 +287,7 @@ static void *display_thread(void *arg) } /* Tag samples to be skipped. */ -char *skip_symbols[] = { +static const char *skip_symbols[] = { "default_idle", "cpu_idle", "enter_idle", @@ -426,7 +426,7 @@ static void process_event(u64 ip, int counter, int user) struct mmap_data { int counter; void *base; - unsigned int mask; + int mask; unsigned int prev; }; @@ -705,7 +705,7 @@ static const struct option options[] = { OPT_END() }; -int cmd_top(int argc, const char **argv, const char *prefix) +int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4eb72593370..c5656784c61 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -229,9 +229,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) use_pager = 1; commit_pager_choice(); - if (p->option & NEED_WORK_TREE) - /* setup_work_tree() */; - status = p->fn(argc, argv, prefix); if (status) return status & 0xff; @@ -266,7 +263,7 @@ static void handle_internal_command(int argc, const char **argv) { "annotate", cmd_annotate, 0 }, { "version", cmd_version, 0 }, }; - int i; + unsigned int i; static const char ext[] = STRIP_EXTENSION; if (sizeof(ext) > 1) { diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ce394192c85..27887c91643 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -52,6 +52,8 @@ static inline unsigned long long rdclock(void) #define __user #define asmlinkage +#define __used __attribute__((__unused__)) + #define unlikely(x) __builtin_expect(!!(x), 0) #define min(x, y) ({ \ typeof(x) _min1 = (x); \ diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c index 9b3dd2b428d..b8144e80bb1 100644 --- a/tools/perf/util/alias.c +++ b/tools/perf/util/alias.c @@ -3,7 +3,7 @@ static const char *alias_key; static char *alias_val; -static int alias_lookup_cb(const char *k, const char *v, void *cb) +static int alias_lookup_cb(const char *k, const char *v, void *cb __used) { if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { if (!v) diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 393d6146d13..161d5f413e2 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -3,6 +3,7 @@ #include "util.h" #include "strbuf.h" +#include "../perf.h" #define PERF_DIR_ENVIRONMENT "PERF_DIR" #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 440db12c635..3dceabd9b5e 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -92,7 +92,7 @@ static void fill_node(struct callchain_node *node, struct ip_callchain *chain, int start, struct symbol **syms) { - int i; + unsigned int i; for (i = start; i < chain->nr; i++) { struct callchain_list *call; @@ -135,7 +135,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, { struct callchain_node *new; struct list_head *old_tail; - int idx_total = idx_parents + idx_local; + unsigned int idx_total = idx_parents + idx_local; /* split */ new = create_child(parent, true); @@ -164,17 +164,18 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start, struct symbol **syms); + unsigned int start, struct symbol **syms); static void __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, - struct symbol **syms, int start) + struct symbol **syms, unsigned int start) { struct callchain_node *rnode; /* lookup in childrens */ list_for_each_entry(rnode, &root->children, brothers) { - int ret = __append_chain(rnode, chain, start, syms); + unsigned int ret = __append_chain(rnode, chain, start, syms); + if (!ret) return; } @@ -184,10 +185,10 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, static int __append_chain(struct callchain_node *root, struct ip_callchain *chain, - int start, struct symbol **syms) + unsigned int start, struct symbol **syms) { struct callchain_list *cnode; - int i = start; + unsigned int i = start; bool found = false; /* diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c942daa712e..251d99ecd22 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -10,15 +10,15 @@ struct callchain_node { struct callchain_node *parent; struct list_head brothers; - struct list_head children; - struct list_head val; + struct list_head children; + struct list_head val; struct rb_node rb_node; - int val_nr; - int hit; + unsigned int val_nr; + u64 hit; }; struct callchain_list { - unsigned long ip; + u64 ip; struct symbol *sym; struct list_head list; }; diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 9a8c20ccc53..26f82318b86 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -11,7 +11,8 @@ static int parse_color(const char *name, int len) }; char *end; int i; - for (i = 0; i < ARRAY_SIZE(color_names); i++) { + + for (i = 0; i < (int)ARRAY_SIZE(color_names); i++) { const char *str = color_names[i]; if (!strncasecmp(name, str, len) && !str[len]) return i - 1; @@ -28,7 +29,8 @@ static int parse_attr(const char *name, int len) static const char * const attr_names[] = { "bold", "dim", "ul", "blink", "reverse" }; - int i; + unsigned int i; + for (i = 0; i < ARRAY_SIZE(attr_names); i++) { const char *str = attr_names[i]; if (!strncasecmp(name, str, len) && !str[len]) @@ -222,10 +224,12 @@ int color_fwrite_lines(FILE *fp, const char *color, { if (!*color) return fwrite(buf, count, 1, fp) != 1; + while (count) { char *p = memchr(buf, '\n', count); + if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? p - buf : count, 1, fp) != 1 || + fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 || fputs(PERF_COLOR_RESET, fp) < 0)) return -1; if (!p) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 3dd13faa6a2..780df541006 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -47,10 +47,12 @@ static int get_next_char(void) static char *parse_value(void) { static char value[1024]; - int quote = 0, comment = 0, len = 0, space = 0; + int quote = 0, comment = 0, space = 0; + size_t len = 0; for (;;) { int c = get_next_char(); + if (len >= sizeof(value) - 1) return NULL; if (c == '\n') { @@ -353,13 +355,13 @@ int perf_config_string(const char **dest, const char *var, const char *value) return 0; } -static int perf_default_core_config(const char *var, const char *value) +static int perf_default_core_config(const char *var __used, const char *value __used) { /* Add other config variables here and to Documentation/config.txt. */ return 0; } -int perf_default_config(const char *var, const char *value, void *dummy) +int perf_default_config(const char *var, const char *value, void *dummy __used) { if (!prefixcmp(var, "core.")) return perf_default_core_config(var, value); @@ -471,10 +473,10 @@ static int matches(const char* key, const char* value) !regexec(store.value_regex, value, 0, NULL, 0))); } -static int store_aux(const char* key, const char* value, void *cb) +static int store_aux(const char* key, const char* value, void *cb __used) { + int section_len; const char *ep; - size_t section_len; switch (store.state) { case KEY_SEEN: @@ -551,7 +553,7 @@ static int store_write_section(int fd, const char* key) strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); } - success = write_in_full(fd, sb.buf, sb.len) == sb.len; + success = (write_in_full(fd, sb.buf, sb.len) == (ssize_t)sb.len); strbuf_release(&sb); return success; @@ -599,7 +601,7 @@ static int store_write_pair(int fd, const char* key, const char* value) } strbuf_addf(&sb, "%s\n", quote); - success = write_in_full(fd, sb.buf, sb.len) == sb.len; + success = (write_in_full(fd, sb.buf, sb.len) == (ssize_t)sb.len); strbuf_release(&sb); return success; @@ -741,7 +743,7 @@ int perf_config_set_multivar(const char* key, const char* value, } else { struct stat st; char* contents; - size_t contents_sz, copy_begin, copy_end; + ssize_t contents_sz, copy_begin, copy_end; int i, new_line = 0; if (value_regex == NULL) diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c index d3929226315..34a35286738 100644 --- a/tools/perf/util/exec_cmd.c +++ b/tools/perf/util/exec_cmd.c @@ -1,6 +1,9 @@ #include "cache.h" #include "exec_cmd.h" #include "quote.h" + +#include + #define MAX_ARGS 32 extern char **environ; @@ -51,7 +54,7 @@ const char *perf_extract_argv0_path(const char *argv0) slash--; if (slash >= argv0) { - argv0_path = strndup(argv0, slash - argv0); + argv0_path = xstrndup(argv0, slash - argv0); return slash + 1; } diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c index 17a00e0df2c..fbb00978b2e 100644 --- a/tools/perf/util/help.c +++ b/tools/perf/util/help.c @@ -26,7 +26,7 @@ static int term_columns(void) return 80; } -void add_cmdname(struct cmdnames *cmds, const char *name, int len) +void add_cmdname(struct cmdnames *cmds, const char *name, size_t len) { struct cmdname *ent = malloc(sizeof(*ent) + len + 1); @@ -40,7 +40,8 @@ void add_cmdname(struct cmdnames *cmds, const char *name, int len) static void clean_cmdnames(struct cmdnames *cmds) { - int i; + unsigned int i; + for (i = 0; i < cmds->cnt; ++i) free(cmds->names[i]); free(cmds->names); @@ -57,7 +58,7 @@ static int cmdname_compare(const void *a_, const void *b_) static void uniq(struct cmdnames *cmds) { - int i, j; + unsigned int i, j; if (!cmds->cnt) return; @@ -71,7 +72,7 @@ static void uniq(struct cmdnames *cmds) void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) { - int ci, cj, ei; + size_t ci, cj, ei; int cmp; ci = cj = ei = 0; @@ -106,8 +107,9 @@ static void pretty_print_string_list(struct cmdnames *cmds, int longest) printf(" "); for (j = 0; j < cols; j++) { - int n = j * rows + i; - int size = space; + unsigned int n = j * rows + i; + unsigned int size = space; + if (n >= cmds->cnt) break; if (j == cols-1 || n + rows >= cmds->cnt) @@ -208,7 +210,7 @@ void load_command_list(const char *prefix, void list_commands(const char *title, struct cmdnames *main_cmds, struct cmdnames *other_cmds) { - int i, longest = 0; + unsigned int i, longest = 0; for (i = 0; i < main_cmds->cnt; i++) if (longest < main_cmds->names[i]->len) @@ -239,7 +241,8 @@ void list_commands(const char *title, struct cmdnames *main_cmds, int is_in_cmdlist(struct cmdnames *c, const char *s) { - int i; + unsigned int i; + for (i = 0; i < c->cnt; i++) if (!strcmp(s, c->names[i]->name)) return 1; @@ -271,7 +274,8 @@ static int levenshtein_compare(const void *p1, const void *p2) static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) { - int i; + unsigned int i; + ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); for (i = 0; i < old->cnt; i++) @@ -283,7 +287,7 @@ static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) const char *help_unknown_cmd(const char *cmd) { - int i, n = 0, best_similarity = 0; + unsigned int i, n = 0, best_similarity = 0; struct cmdnames main_cmds, other_cmds; memset(&main_cmds, 0, sizeof(main_cmds)); @@ -345,7 +349,7 @@ const char *help_unknown_cmd(const char *cmd) exit(1); } -int cmd_version(int argc, const char **argv, const char *prefix) +int cmd_version(int argc __used, const char **argv __used, const char *prefix __used) { printf("perf version %s\n", perf_version_string); return 0; diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h index 56bc15406ff..7128783637b 100644 --- a/tools/perf/util/help.h +++ b/tools/perf/util/help.h @@ -2,8 +2,8 @@ #define HELP_H struct cmdnames { - int alloc; - int cnt; + size_t alloc; + size_t cnt; struct cmdname { size_t len; /* also used for similarity index in help.c */ char name[FLEX_ARRAY]; @@ -19,7 +19,7 @@ static inline void mput_char(char c, unsigned int num) void load_command_list(const char *prefix, struct cmdnames *main_cmds, struct cmdnames *other_cmds); -void add_cmdname(struct cmdnames *cmds, const char *name, int len); +void add_cmdname(struct cmdnames *cmds, const char *name, size_t len); /* Here we require that excludes is a sorted list. */ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); int is_in_cmdlist(struct cmdnames *c, const char *s); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e6b83a3311a..aed70901df2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -385,7 +385,7 @@ static int parse_event_symbols(const char **str, struct perf_counter_attr *attr) return 1; } -int parse_events(const struct option *opt, const char *str, int unset) +int parse_events(const struct option *opt __used, const char *str, int unset __used) { struct perf_counter_attr attr; diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index b3affb1658d..9a897b7cce7 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -485,7 +485,7 @@ int parse_options_usage(const char * const *usagestr, } -int parse_opt_verbosity_cb(const struct option *opt, const char *arg, +int parse_opt_verbosity_cb(const struct option *opt, const char *arg __used, int unset) { int *target = opt->value; diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index a1039a6ce0e..15c8aba9c62 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -90,21 +90,20 @@ struct option { intptr_t defval; }; -#define OPT_END() { OPTION_END } -#define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } -#define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } -#define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } -#define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } -#define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } -#define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } -#define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } -#define OPT_LONG(s, l, v, h) { OPTION_LONG, (s), (l), (v), NULL, (h) } -#define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } +#define OPT_END() { .type = OPTION_END } +#define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) } +#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } +#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (b) } +#define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_SET_INT(s, l, v, h, i) { .type = OPTION_SET_INT, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (i) } +#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) } +#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = (v), .help = (h) } +#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h) } #define OPT_DATE(s, l, v, h) \ - { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ - parse_opt_approxidate_cb } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ - { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) } /* parse_options() will filter out the processed options and leave the * non-option argments in argv[]. diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c index f18c5212bc9..c6e5dc0dc82 100644 --- a/tools/perf/util/quote.c +++ b/tools/perf/util/quote.c @@ -162,12 +162,16 @@ static inline int sq_must_quote(char c) return sq_lookup[(unsigned char)c] + quote_path_fully > 0; } -/* returns the longest prefix not needing a quote up to maxlen if positive. - This stops at the first \0 because it's marked as a character needing an - escape */ -static size_t next_quote_pos(const char *s, ssize_t maxlen) +/* + * Returns the longest prefix not needing a quote up to maxlen if + * positive. + * This stops at the first \0 because it's marked as a character + * needing an escape. + */ +static ssize_t next_quote_pos(const char *s, ssize_t maxlen) { - size_t len; + ssize_t len; + if (maxlen < 0) { for (len = 0; !sq_must_quote(s[len]); len++); } else { @@ -192,22 +196,22 @@ static size_t next_quote_pos(const char *s, ssize_t maxlen) static size_t quote_c_style_counted(const char *name, ssize_t maxlen, struct strbuf *sb, FILE *fp, int no_dq) { -#undef EMIT -#define EMIT(c) \ - do { \ - if (sb) strbuf_addch(sb, (c)); \ - if (fp) fputc((c), fp); \ - count++; \ +#define EMIT(c) \ + do { \ + if (sb) strbuf_addch(sb, (c)); \ + if (fp) fputc((c), fp); \ + count++; \ } while (0) -#define EMITBUF(s, l) \ - do { \ - int __ret; \ - if (sb) strbuf_add(sb, (s), (l)); \ - if (fp) __ret = fwrite((s), (l), 1, fp); \ - count += (l); \ + +#define EMITBUF(s, l) \ + do { \ + int __ret; \ + if (sb) strbuf_add(sb, (s), (l)); \ + if (fp) __ret = fwrite((s), (l), 1, fp); \ + count += (l); \ } while (0) - size_t len, count = 0; + ssize_t len, count = 0; const char *p = name; for (;;) { @@ -273,8 +277,8 @@ void write_name_quoted(const char *name, FILE *fp, int terminator) fputc(terminator, fp); } -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, - const char *name, FILE *fp, int terminator) +void write_name_quotedpfx(const char *pfx, ssize_t pfxlen, + const char *name, FILE *fp, int terminator) { int needquote = 0; @@ -306,7 +310,7 @@ char *quote_path_relative(const char *in, int len, len = strlen(in); /* "../" prefix itself does not need quoting, but "in" might. */ - needquote = next_quote_pos(in, len) < len; + needquote = (next_quote_pos(in, len) < len); strbuf_setlen(out, 0); strbuf_grow(out, len); diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index 5dfad89816d..a5454a1d1c1 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -53,7 +53,7 @@ extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); extern void write_name_quoted(const char *name, FILE *, int terminator); -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, +extern void write_name_quotedpfx(const char *pfx, ssize_t pfxlen, const char *name, FILE *, int terminator); /* quote path as relative to the given prefix */ diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 464e7ca898c..5249d5a1b0c 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -16,7 +16,7 @@ int prefixcmp(const char *str, const char *prefix) */ char strbuf_slopbuf[1]; -void strbuf_init(struct strbuf *sb, size_t hint) +void strbuf_init(struct strbuf *sb, ssize_t hint) { sb->alloc = sb->len = 0; sb->buf = strbuf_slopbuf; @@ -92,7 +92,8 @@ void strbuf_ltrim(struct strbuf *sb) void strbuf_tolower(struct strbuf *sb) { - int i; + unsigned int i; + for (i = 0; i < sb->len; i++) sb->buf[i] = tolower(sb->buf[i]); } @@ -264,7 +265,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) return res; } -ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) +ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint) { size_t oldlen = sb->len; size_t oldalloc = sb->alloc; @@ -293,7 +294,7 @@ ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) #define STRBUF_MAXLINK (2*PATH_MAX) -int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) +int strbuf_readlink(struct strbuf *sb, const char *path, ssize_t hint) { size_t oldalloc = sb->alloc; @@ -301,7 +302,7 @@ int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) hint = 32; while (hint < STRBUF_MAXLINK) { - int len; + ssize_t len; strbuf_grow(sb, hint); len = readlink(path, sb->buf, hint); @@ -343,7 +344,7 @@ int strbuf_getline(struct strbuf *sb, FILE *fp, int term) return 0; } -int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) +int strbuf_read_file(struct strbuf *sb, const char *path, ssize_t hint) { int fd, len; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 9ee908a3ec5..d2aa86c014c 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -50,7 +50,7 @@ struct strbuf { #define STRBUF_INIT { 0, 0, strbuf_slopbuf } /*----- strbuf life cycle -----*/ -extern void strbuf_init(struct strbuf *, size_t); +extern void strbuf_init(struct strbuf *buf, ssize_t hint); extern void strbuf_release(struct strbuf *); extern char *strbuf_detach(struct strbuf *, size_t *); extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); @@ -61,7 +61,7 @@ static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { } /*----- strbuf size related -----*/ -static inline size_t strbuf_avail(const struct strbuf *sb) { +static inline ssize_t strbuf_avail(const struct strbuf *sb) { return sb->alloc ? sb->alloc - sb->len - 1 : 0; } @@ -122,9 +122,9 @@ extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); /* XXX: if read fails, any partial read is undone */ -extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); -extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); -extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); +extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); +extern int strbuf_read_file(struct strbuf *sb, const char *path, ssize_t hint); +extern int strbuf_readlink(struct strbuf *sb, const char *path, ssize_t hint); extern int strbuf_getline(struct strbuf *, FILE *, int); diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 6350d65f6d9..4574ac28396 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -7,7 +7,7 @@ * There's no pack memory to release - but stay close to the Git * version so wrap this away: */ -static inline void release_pack_memory(size_t size, int flag) +static inline void release_pack_memory(size_t size __used, int flag __used) { } @@ -59,7 +59,8 @@ void *xmemdupz(const void *data, size_t len) char *xstrndup(const char *str, size_t len) { char *p = memchr(str, '\0', len); - return xmemdupz(str, p ? p - str : len); + + return xmemdupz(str, p ? (size_t)(p - str) : len); } void *xrealloc(void *ptr, size_t size) -- cgit v1.2.3 From b9ebdcc0ce1c676ebf5dc4f6df6b440d8fcf88ab Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 15:05:09 +0530 Subject: perf stat: Define MATCH_EVENT for easy attr checking MATCH_EVENT is useful: 1. for multiple attrs checking 2. avoid repetition of PERF_TYPE_ and PERF_COUNT_ and save space 3. avoids line breakage Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra LKML-Reference: <1246440909.3403.5.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 095a90e012a..01cc07efb72 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -96,6 +96,10 @@ static u64 walltime_nsecs_noise; static u64 runtime_cycles_avg; static u64 runtime_cycles_noise; +#define MATCH_EVENT(t, c, counter) \ + (attrs[counter].type == PERF_TYPE_##t && \ + attrs[counter].config == PERF_COUNT_##c) + #define ERR_PERF_OPEN \ "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n" @@ -133,13 +137,8 @@ static void create_perf_stat_counter(int counter, int pid) */ static inline int nsec_counter(int counter) { - if (attrs[counter].type != PERF_TYPE_SOFTWARE) - return 0; - - if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) - return 1; - - if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) || + MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) return 1; return 0; @@ -194,11 +193,9 @@ static void read_counter(int counter) /* * Save the full runtime - to allow normalization during printout: */ - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) runtime_nsecs[run_idx] = count[0]; - if (attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) + if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) runtime_cycles[run_idx] = count[0]; } @@ -292,9 +289,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise) fprintf(stderr, " %14.6f %-24s", msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { - + if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) { if (walltime_nsecs_avg) fprintf(stderr, " # %10.3f CPUs ", (double)count[0] / (double)walltime_nsecs_avg); @@ -307,9 +302,7 @@ static void abs_printout(int counter, u64 *count, u64 *noise) fprintf(stderr, " %14Ld %-24s", count[0], event_name(counter)); if (runtime_cycles_avg && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - + MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) { fprintf(stderr, " # %10.3f IPC ", (double)count[0] / (double)runtime_cycles_avg); } else { -- cgit v1.2.3 From 73c24cb86c51ff6445b292d9914d31236204393b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 18:36:18 +0530 Subject: perf list: Add cache events After: $ ./perf list List of pre-defined events (to be used in -e): cpu-cycles OR cycles [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults OR faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] L1-d$-loads [Hardware cache event] L1-d$-load-misses [Hardware cache event] L1-d$-stores [Hardware cache event] L1-d$-store-misses [Hardware cache event] L1-d$-prefetches [Hardware cache event] L1-d$-prefetch-misses [Hardware cache event] L1-i$-loads [Hardware cache event] L1-i$-load-misses [Hardware cache event] L1-i$-prefetches [Hardware cache event] L1-i$-prefetch-misses [Hardware cache event] LLC-loads [Hardware cache event] LLC-load-misses [Hardware cache event] LLC-stores [Hardware cache event] LLC-store-misses [Hardware cache event] LLC-prefetches [Hardware cache event] LLC-prefetch-misses [Hardware cache event] dTLB-loads [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-stores [Hardware cache event] dTLB-store-misses [Hardware cache event] dTLB-prefetches [Hardware cache event] dTLB-prefetch-misses [Hardware cache event] iTLB-loads [Hardware cache event] iTLB-load-misses [Hardware cache event] branch-loads [Hardware cache event] branch-load-misses [Hardware cache event] rNNN [raw hardware event descriptor] Signed-off-by: Jaswinder Singh Rajput Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1246453578.3072.1.camel@ht.satnam> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-events.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index aed70901df2..5184959e061 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -428,7 +428,7 @@ static const char * const event_type_descriptors[] = { void print_events(void) { struct event_symbol *syms = event_symbols; - unsigned int i, type, prev_type = -1; + unsigned int i, type, op, prev_type = -1; char name[40]; fprintf(stderr, "\n"); @@ -452,6 +452,21 @@ void print_events(void) prev_type = type; } + fprintf(stderr, "\n"); + for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + /* skip invalid cache type */ + if (!is_cache_op_valid(type, op)) + continue; + + for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + fprintf(stderr, " %-40s [%s]\n", + event_cache_name(type, op, i), + event_type_descriptors[4]); + } + } + } + fprintf(stderr, "\n"); fprintf(stderr, " %-40s [raw hardware event descriptor]\n", "rNNN"); -- cgit v1.2.3 From 43cbcd8acb4c992cbd22d1ec8a08c0591be5d719 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 1 Jul 2009 12:28:37 -0300 Subject: perf_counter tools: Share rbtree.with the kernel The tools/perf/util/rbtree.c copy already drifted by three csets: 4b324126e0c6c3a5080ca3ec0981e8766ed6f1ee 4c60117811171d867d4f27f17ea07d7419d45dae 16c047add3ceaf0ab882e3e094d1ec904d02312d So remove the copy and use the lib/rbtree.c directly, sharing the source code while still generating a separate object file, since tools/perf uses a far more agressive -O6 switch. Signed-off-by: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090701152837.GG15682@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 7 +- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/callchain.h | 2 +- tools/perf/util/include/linux/kernel.h | 21 ++ tools/perf/util/include/linux/module.h | 6 + tools/perf/util/include/linux/rbtree.h | 1 + tools/perf/util/rbtree.c | 383 --------------------------------- tools/perf/util/rbtree.h | 171 --------------- tools/perf/util/strlist.h | 2 +- tools/perf/util/symbol.h | 2 +- 12 files changed, 39 insertions(+), 562 deletions(-) create mode 100644 tools/perf/util/include/linux/kernel.h create mode 100644 tools/perf/util/include/linux/module.h create mode 100644 tools/perf/util/include/linux/rbtree.h delete mode 100644 tools/perf/util/rbtree.c delete mode 100644 tools/perf/util/rbtree.h (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index eddf076b19d..c4f48825a6e 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -223,7 +223,7 @@ SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ # Those must not be GNU-specific; they are shared with perl/ which may # be built by a different compiler. (Note that this is an artifact now # but it still might be nice to keep that distinction.) -BASIC_CFLAGS = +BASIC_CFLAGS = -Iutil/include BASIC_LDFLAGS = # Guard against environment variables @@ -289,10 +289,10 @@ export PERL_PATH LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h +LIB_H += ../../include/linux/rbtree.h LIB_H += perf.h LIB_H += util/types.h LIB_H += util/list.h -LIB_H += util/rbtree.h LIB_H += util/levenshtein.h LIB_H += util/parse-options.h LIB_H += util/parse-events.h @@ -691,6 +691,9 @@ builtin-init-db.o: builtin-init-db.c PERF-CFLAGS util/config.o: util/config.c PERF-CFLAGS $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + perf-%$X: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6cba70daf12..1960c22b816 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -12,7 +12,7 @@ #include "util/color.h" #include "util/list.h" #include "util/cache.h" -#include "util/rbtree.h" +#include #include "util/symbol.h" #include "util/string.h" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 007363db3b1..0d35e2b8881 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -12,7 +12,7 @@ #include "util/color.h" #include "util/list.h" #include "util/cache.h" -#include "util/rbtree.h" +#include #include "util/symbol.h" #include "util/string.h" #include "util/callchain.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 5f5e7df8302..cdc74cfb151 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -23,7 +23,7 @@ #include "util/symbol.h" #include "util/color.h" #include "util/util.h" -#include "util/rbtree.h" +#include #include "util/parse-options.h" #include "util/parse-events.h" diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 251d99ecd22..0606b8fd05a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -3,7 +3,7 @@ #include "../perf.h" #include "list.h" -#include "rbtree.h" +#include #include "symbol.h" diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h new file mode 100644 index 00000000000..99c1b3d1edd --- /dev/null +++ b/tools/perf/util/include/linux/kernel.h @@ -0,0 +1,21 @@ +#ifndef PERF_LINUX_KERNEL_H_ +#define PERF_LINUX_KERNEL_H_ + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#endif diff --git a/tools/perf/util/include/linux/module.h b/tools/perf/util/include/linux/module.h new file mode 100644 index 00000000000..b43e2dc21e0 --- /dev/null +++ b/tools/perf/util/include/linux/module.h @@ -0,0 +1,6 @@ +#ifndef PERF_LINUX_MODULE_H +#define PERF_LINUX_MODULE_H + +#define EXPORT_SYMBOL(name) + +#endif diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h new file mode 100644 index 00000000000..7a243a14303 --- /dev/null +++ b/tools/perf/util/include/linux/rbtree.h @@ -0,0 +1 @@ +#include "../../../../include/linux/rbtree.h" diff --git a/tools/perf/util/rbtree.c b/tools/perf/util/rbtree.c deleted file mode 100644 index b15ba9c7cb3..00000000000 --- a/tools/perf/util/rbtree.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - Red Black Trees - (C) 1999 Andrea Arcangeli - (C) 2002 David Woodhouse - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/lib/rbtree.c -*/ - -#include "rbtree.h" - -static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *right = node->rb_right; - struct rb_node *parent = rb_parent(node); - - if ((node->rb_right = right->rb_left)) - rb_set_parent(right->rb_left, node); - right->rb_left = node; - - rb_set_parent(right, parent); - - if (parent) - { - if (node == parent->rb_left) - parent->rb_left = right; - else - parent->rb_right = right; - } - else - root->rb_node = right; - rb_set_parent(node, right); -} - -static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *left = node->rb_left; - struct rb_node *parent = rb_parent(node); - - if ((node->rb_left = left->rb_right)) - rb_set_parent(left->rb_right, node); - left->rb_right = node; - - rb_set_parent(left, parent); - - if (parent) - { - if (node == parent->rb_right) - parent->rb_right = left; - else - parent->rb_left = left; - } - else - root->rb_node = left; - rb_set_parent(node, left); -} - -void rb_insert_color(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *parent, *gparent; - - while ((parent = rb_parent(node)) && rb_is_red(parent)) - { - gparent = rb_parent(parent); - - if (parent == gparent->rb_left) - { - { - register struct rb_node *uncle = gparent->rb_right; - if (uncle && rb_is_red(uncle)) - { - rb_set_black(uncle); - rb_set_black(parent); - rb_set_red(gparent); - node = gparent; - continue; - } - } - - if (parent->rb_right == node) - { - register struct rb_node *tmp; - __rb_rotate_left(parent, root); - tmp = parent; - parent = node; - node = tmp; - } - - rb_set_black(parent); - rb_set_red(gparent); - __rb_rotate_right(gparent, root); - } else { - { - register struct rb_node *uncle = gparent->rb_left; - if (uncle && rb_is_red(uncle)) - { - rb_set_black(uncle); - rb_set_black(parent); - rb_set_red(gparent); - node = gparent; - continue; - } - } - - if (parent->rb_left == node) - { - register struct rb_node *tmp; - __rb_rotate_right(parent, root); - tmp = parent; - parent = node; - node = tmp; - } - - rb_set_black(parent); - rb_set_red(gparent); - __rb_rotate_left(gparent, root); - } - } - - rb_set_black(root->rb_node); -} - -static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, - struct rb_root *root) -{ - struct rb_node *other; - - while ((!node || rb_is_black(node)) && node != root->rb_node) - { - if (parent->rb_left == node) - { - other = parent->rb_right; - if (rb_is_red(other)) - { - rb_set_black(other); - rb_set_red(parent); - __rb_rotate_left(parent, root); - other = parent->rb_right; - } - if ((!other->rb_left || rb_is_black(other->rb_left)) && - (!other->rb_right || rb_is_black(other->rb_right))) - { - rb_set_red(other); - node = parent; - parent = rb_parent(node); - } - else - { - if (!other->rb_right || rb_is_black(other->rb_right)) - { - rb_set_black(other->rb_left); - rb_set_red(other); - __rb_rotate_right(other, root); - other = parent->rb_right; - } - rb_set_color(other, rb_color(parent)); - rb_set_black(parent); - rb_set_black(other->rb_right); - __rb_rotate_left(parent, root); - node = root->rb_node; - break; - } - } - else - { - other = parent->rb_left; - if (rb_is_red(other)) - { - rb_set_black(other); - rb_set_red(parent); - __rb_rotate_right(parent, root); - other = parent->rb_left; - } - if ((!other->rb_left || rb_is_black(other->rb_left)) && - (!other->rb_right || rb_is_black(other->rb_right))) - { - rb_set_red(other); - node = parent; - parent = rb_parent(node); - } - else - { - if (!other->rb_left || rb_is_black(other->rb_left)) - { - rb_set_black(other->rb_right); - rb_set_red(other); - __rb_rotate_left(other, root); - other = parent->rb_left; - } - rb_set_color(other, rb_color(parent)); - rb_set_black(parent); - rb_set_black(other->rb_left); - __rb_rotate_right(parent, root); - node = root->rb_node; - break; - } - } - } - if (node) - rb_set_black(node); -} - -void rb_erase(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *child, *parent; - int color; - - if (!node->rb_left) - child = node->rb_right; - else if (!node->rb_right) - child = node->rb_left; - else - { - struct rb_node *old = node, *left; - - node = node->rb_right; - while ((left = node->rb_left) != NULL) - node = left; - child = node->rb_right; - parent = rb_parent(node); - color = rb_color(node); - - if (child) - rb_set_parent(child, parent); - if (parent == old) { - parent->rb_right = child; - parent = node; - } else - parent->rb_left = child; - - node->rb_parent_color = old->rb_parent_color; - node->rb_right = old->rb_right; - node->rb_left = old->rb_left; - - if (rb_parent(old)) - { - if (rb_parent(old)->rb_left == old) - rb_parent(old)->rb_left = node; - else - rb_parent(old)->rb_right = node; - } else - root->rb_node = node; - - rb_set_parent(old->rb_left, node); - if (old->rb_right) - rb_set_parent(old->rb_right, node); - goto color; - } - - parent = rb_parent(node); - color = rb_color(node); - - if (child) - rb_set_parent(child, parent); - if (parent) - { - if (parent->rb_left == node) - parent->rb_left = child; - else - parent->rb_right = child; - } - else - root->rb_node = child; - - color: - if (color == RB_BLACK) - __rb_erase_color(child, parent, root); -} - -/* - * This function returns the first node (in sort order) of the tree. - */ -struct rb_node *rb_first(const struct rb_root *root) -{ - struct rb_node *n; - - n = root->rb_node; - if (!n) - return NULL; - while (n->rb_left) - n = n->rb_left; - return n; -} - -struct rb_node *rb_last(const struct rb_root *root) -{ - struct rb_node *n; - - n = root->rb_node; - if (!n) - return NULL; - while (n->rb_right) - n = n->rb_right; - return n; -} - -struct rb_node *rb_next(const struct rb_node *node) -{ - struct rb_node *parent; - - if (rb_parent(node) == node) - return NULL; - - /* If we have a right-hand child, go down and then left as far - as we can. */ - if (node->rb_right) { - node = node->rb_right; - while (node->rb_left) - node=node->rb_left; - return (struct rb_node *)node; - } - - /* No right-hand children. Everything down and left is - smaller than us, so any 'next' node must be in the general - direction of our parent. Go up the tree; any time the - ancestor is a right-hand child of its parent, keep going - up. First time it's a left-hand child of its parent, said - parent is our 'next' node. */ - while ((parent = rb_parent(node)) && node == parent->rb_right) - node = parent; - - return parent; -} - -struct rb_node *rb_prev(const struct rb_node *node) -{ - struct rb_node *parent; - - if (rb_parent(node) == node) - return NULL; - - /* If we have a left-hand child, go down and then right as far - as we can. */ - if (node->rb_left) { - node = node->rb_left; - while (node->rb_right) - node=node->rb_right; - return (struct rb_node *)node; - } - - /* No left-hand children. Go up till we find an ancestor which - is a right-hand child of its parent */ - while ((parent = rb_parent(node)) && node == parent->rb_left) - node = parent; - - return parent; -} - -void rb_replace_node(struct rb_node *victim, struct rb_node *new, - struct rb_root *root) -{ - struct rb_node *parent = rb_parent(victim); - - /* Set the surrounding nodes to point to the replacement */ - if (parent) { - if (victim == parent->rb_left) - parent->rb_left = new; - else - parent->rb_right = new; - } else { - root->rb_node = new; - } - if (victim->rb_left) - rb_set_parent(victim->rb_left, new); - if (victim->rb_right) - rb_set_parent(victim->rb_right, new); - - /* Copy the pointers/colour from the victim to the replacement */ - *new = *victim; -} diff --git a/tools/perf/util/rbtree.h b/tools/perf/util/rbtree.h deleted file mode 100644 index 6bdc488a47f..00000000000 --- a/tools/perf/util/rbtree.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - Red Black Trees - (C) 1999 Andrea Arcangeli - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/include/linux/rbtree.h - - To use rbtrees you'll have to implement your own insert and search cores. - This will avoid us to use callbacks and to drop drammatically performances. - I know it's not the cleaner way, but in C (not in C++) to get - performances and genericity... - - Some example of insert and search follows here. The search is a plain - normal search over an ordered tree. The insert instead must be implemented - int two steps: as first thing the code must insert the element in - order as a red leaf in the tree, then the support library function - rb_insert_color() must be called. Such function will do the - not trivial work to rebalance the rbtree if necessary. - ------------------------------------------------------------------------ -static inline struct page * rb_search_page_cache(struct inode * inode, - unsigned long offset) -{ - struct rb_node * n = inode->i_rb_page_cache.rb_node; - struct page * page; - - while (n) - { - page = rb_entry(n, struct page, rb_page_cache); - - if (offset < page->offset) - n = n->rb_left; - else if (offset > page->offset) - n = n->rb_right; - else - return page; - } - return NULL; -} - -static inline struct page * __rb_insert_page_cache(struct inode * inode, - unsigned long offset, - struct rb_node * node) -{ - struct rb_node ** p = &inode->i_rb_page_cache.rb_node; - struct rb_node * parent = NULL; - struct page * page; - - while (*p) - { - parent = *p; - page = rb_entry(parent, struct page, rb_page_cache); - - if (offset < page->offset) - p = &(*p)->rb_left; - else if (offset > page->offset) - p = &(*p)->rb_right; - else - return page; - } - - rb_link_node(node, parent, p); - - return NULL; -} - -static inline struct page * rb_insert_page_cache(struct inode * inode, - unsigned long offset, - struct rb_node * node) -{ - struct page * ret; - if ((ret = __rb_insert_page_cache(inode, offset, node))) - goto out; - rb_insert_color(node, &inode->i_rb_page_cache); - out: - return ret; -} ------------------------------------------------------------------------ -*/ - -#ifndef _LINUX_RBTREE_H -#define _LINUX_RBTREE_H - -#include - -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -struct rb_node -{ - unsigned long rb_parent_color; -#define RB_RED 0 -#define RB_BLACK 1 - struct rb_node *rb_right; - struct rb_node *rb_left; -} __attribute__((aligned(sizeof(long)))); - /* The alignment might seem pointless, but allegedly CRIS needs it */ - -struct rb_root -{ - struct rb_node *rb_node; -}; - - -#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) -#define rb_color(r) ((r)->rb_parent_color & 1) -#define rb_is_red(r) (!rb_color(r)) -#define rb_is_black(r) rb_color(r) -#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) -#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) - -static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) -{ - rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; -} -static inline void rb_set_color(struct rb_node *rb, int color) -{ - rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; -} - -#define RB_ROOT (struct rb_root) { NULL, } -#define rb_entry(ptr, type, member) container_of(ptr, type, member) - -#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -#define RB_EMPTY_NODE(node) (rb_parent(node) == node) -#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) - -extern void rb_insert_color(struct rb_node *, struct rb_root *); -extern void rb_erase(struct rb_node *, struct rb_root *); - -/* Find logical next and previous nodes in a tree */ -extern struct rb_node *rb_next(const struct rb_node *); -extern struct rb_node *rb_prev(const struct rb_node *); -extern struct rb_node *rb_first(const struct rb_root *); -extern struct rb_node *rb_last(const struct rb_root *); - -/* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, - struct rb_root *root); - -static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, - struct rb_node ** rb_link) -{ - node->rb_parent_color = (unsigned long )parent; - node->rb_left = node->rb_right = NULL; - - *rb_link = node; -} - -#endif /* _LINUX_RBTREE_H */ diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 2fb117fb4b6..2fdcfee8758 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -1,7 +1,7 @@ #ifndef STRLIST_H_ #define STRLIST_H_ -#include "rbtree.h" +#include #include struct str_node { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2c48ace8203..3a275db1fa6 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -4,7 +4,7 @@ #include #include "types.h" #include "list.h" -#include "rbtree.h" +#include struct symbol { struct rb_node rb_node; -- cgit v1.2.3 From 5da50258584469ddfee6545feb4eb2252a8d7e7b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 1 Jul 2009 14:46:08 -0300 Subject: perf_counter tools: Share list.h with the kernel The copy we were using came from another copy I did for the dwarves (pahole) package, that came from the kernel years ago. The only function that is used by the perf tools and that isn't in the kernel is list_del_range, that I'm leaving in the perf tools only for now. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20090701174608.GA5823@ghostprotocols.net> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 3 +- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/util/callchain.h | 2 +- tools/perf/util/include/asm/system.h | 1 + tools/perf/util/include/linux/list.h | 18 + tools/perf/util/include/linux/poison.h | 1 + tools/perf/util/include/linux/prefetch.h | 6 + tools/perf/util/list.h | 603 ------------------------------- tools/perf/util/symbol.h | 2 +- 10 files changed, 32 insertions(+), 608 deletions(-) create mode 100644 tools/perf/util/include/asm/system.h create mode 100644 tools/perf/util/include/linux/list.h create mode 100644 tools/perf/util/include/linux/poison.h create mode 100644 tools/perf/util/include/linux/prefetch.h delete mode 100644 tools/perf/util/list.h (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index c4f48825a6e..8f7fd1b050a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -290,9 +290,10 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h LIB_H += ../../include/linux/rbtree.h +LIB_H += ../../include/linux/list.h +LIB_H += util/include/linux/list.h LIB_H += perf.h LIB_H += util/types.h -LIB_H += util/list.h LIB_H += util/levenshtein.h LIB_H += util/parse-options.h LIB_H += util/parse-events.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1960c22b816..132de8b28db 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -10,7 +10,7 @@ #include "util/util.h" #include "util/color.h" -#include "util/list.h" +#include #include "util/cache.h" #include #include "util/symbol.h" diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0d35e2b8881..be1b7584796 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -10,7 +10,7 @@ #include "util/util.h" #include "util/color.h" -#include "util/list.h" +#include #include "util/cache.h" #include #include "util/symbol.h" diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 0606b8fd05a..e9bd5e882f3 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -2,7 +2,7 @@ #define __PERF_CALLCHAIN_H #include "../perf.h" -#include "list.h" +#include #include #include "symbol.h" diff --git a/tools/perf/util/include/asm/system.h b/tools/perf/util/include/asm/system.h new file mode 100644 index 00000000000..710cecca972 --- /dev/null +++ b/tools/perf/util/include/asm/system.h @@ -0,0 +1 @@ +/* Empty */ diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h new file mode 100644 index 00000000000..dbe4b814382 --- /dev/null +++ b/tools/perf/util/include/linux/list.h @@ -0,0 +1,18 @@ +#include "../../../../include/linux/list.h" + +#ifndef PERF_LIST_H +#define PERF_LIST_H +/** + * list_del_range - deletes range of entries from list. + * @begin: first element in the range to delete from the list. + * @end: last element in the range to delete from the list. + * Note: list_empty on the range of entries does not return true after this, + * the entries is in an undefined state. + */ +static inline void list_del_range(struct list_head *begin, + struct list_head *end) +{ + begin->prev->next = end->next; + end->next->prev = begin->prev; +} +#endif diff --git a/tools/perf/util/include/linux/poison.h b/tools/perf/util/include/linux/poison.h new file mode 100644 index 00000000000..fef6dbc9ce1 --- /dev/null +++ b/tools/perf/util/include/linux/poison.h @@ -0,0 +1 @@ +#include "../../../../include/linux/poison.h" diff --git a/tools/perf/util/include/linux/prefetch.h b/tools/perf/util/include/linux/prefetch.h new file mode 100644 index 00000000000..7841e485d8c --- /dev/null +++ b/tools/perf/util/include/linux/prefetch.h @@ -0,0 +1,6 @@ +#ifndef PERF_LINUX_PREFETCH_H +#define PERF_LINUX_PREFETCH_H + +static inline void prefetch(void *a __attribute__((unused))) { } + +#endif diff --git a/tools/perf/util/list.h b/tools/perf/util/list.h deleted file mode 100644 index e2548e8072c..00000000000 --- a/tools/perf/util/list.h +++ /dev/null @@ -1,603 +0,0 @@ -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H -/* - Copyright (C) Cast of dozens, comes from the Linux kernel - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. -*/ - -#include - -/* - * These are non-NULL pointers that will result in page faults - * under normal circumstances, used to verify that nobody uses - * non-initialized list entries. - */ -#define LIST_POISON1 ((void *)0x00100100) -#define LIST_POISON2 ((void *)0x00200200) - -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -static inline void INIT_LIST_HEAD(struct list_head *list) -{ - list->next = list; - list->prev = list; -} - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is - * in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; -} - -/** - * list_del_range - deletes range of entries from list. - * @beging: first element in the range to delete from the list. - * @beging: first element in the range to delete from the list. - * Note: list_empty on the range of entries does not return true after this, - * the entries is in an undefined state. - */ -static inline void list_del_range(struct list_head *begin, - struct list_head *end) -{ - begin->prev->next = end->next; - end->next->prev = begin->prev; -} - -/** - * list_replace - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * Note: if 'old' was empty, it will be overwritten. - */ -static inline void list_replace(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->next->prev = new; - new->prev = old->prev; - new->prev->next = new; -} - -static inline void list_replace_init(struct list_head *old, - struct list_head *new) -{ - list_replace(old, new); - INIT_LIST_HEAD(old); -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_is_last - tests whether @list is the last entry in list @head - * @list: the entry to test - * @head: the head of the list - */ -static inline int list_is_last(const struct list_head *list, - const struct list_head *head) -{ - return list->next == head; -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(const struct list_head *head) -{ - return head->next == head; -} - -/** - * list_empty_careful - tests whether a list is empty and not being modified - * @head: the list to test - * - * Description: - * tests whether a list is empty _and_ checks that no other CPU might be - * in the process of modifying either member (next or prev) - * - * NOTE: using list_empty_careful() without synchronization - * can only be safe if the only activity that can happen - * to the list entry is list_del_init(). Eg. it cannot be used - * if another CPU could re-list_add() it. - */ -static inline int list_empty_careful(const struct list_head *head) -{ - struct list_head *next = head->next; - return (next == head) && (next == head->prev); -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - container_of(ptr, type, member) - -/** - * list_first_entry - get the first element from a list - * @ptr: the list head to take the element from. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - * - * Note, that list is expected to be not empty. - */ -#define list_first_entry(ptr, type, member) \ - list_entry((ptr)->next, type, member) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); \ - pos = pos->next) - -/** - * __list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This variant differs from list_for_each() in that it's the - * simplest possible list iteration code, no prefetching is done. - * Use this for code that knows the list to be very short (empty - * or 1 entry) most of the time. - */ -#define __list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - -/** - * list_for_each_prev - iterate over a list backwards - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev; pos != (head); \ - pos = pos->prev) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_reverse - iterate backwards over list of given type. - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.prev, typeof(*pos), member)) - -/** - * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue - * @pos: the type * to use as a start point - * @head: the head of the list - * @member: the name of the list_struct within the struct. - * - * Prepares a pos entry for use as a start point in list_for_each_entry_continue. - */ -#define list_prepare_entry(pos, head, member) \ - ((pos) ? : list_entry(head, typeof(*pos), member)) - -/** - * list_for_each_entry_continue - continue iteration over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Continue to iterate over list of given type, continuing after - * the current position. - */ -#define list_for_each_entry_continue(pos, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_from - iterate over list of given type from the current point - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing from current position. - */ -#define list_for_each_entry_from(pos, head, member) \ - for (; &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_continue - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing after current point, - * safe against removal of list entry. - */ -#define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_from - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type from current point, safe against - * removal of list entry. - */ -#define list_for_each_entry_safe_from(pos, n, head, member) \ - for (n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_reverse - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate backwards over list of given type, safe against removal - * of list entry. - */ -#define list_for_each_entry_safe_reverse(pos, n, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member), \ - n = list_entry(pos->member.prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.prev, typeof(*n), member)) - -/* - * Double linked lists with a single pointer list head. - * Mostly useful for hash tables where the two pointer list head is - * too wasteful. - * You lose the ability to access the tail in O(1). - */ - -struct hlist_head { - struct hlist_node *first; -}; - -struct hlist_node { - struct hlist_node *next, **pprev; -}; - -#define HLIST_HEAD_INIT { .first = NULL } -#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } -#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) -static inline void INIT_HLIST_NODE(struct hlist_node *h) -{ - h->next = NULL; - h->pprev = NULL; -} - -static inline int hlist_unhashed(const struct hlist_node *h) -{ - return !h->pprev; -} - -static inline int hlist_empty(const struct hlist_head *h) -{ - return !h->first; -} - -static inline void __hlist_del(struct hlist_node *n) -{ - struct hlist_node *next = n->next; - struct hlist_node **pprev = n->pprev; - *pprev = next; - if (next) - next->pprev = pprev; -} - -static inline void hlist_del(struct hlist_node *n) -{ - __hlist_del(n); - n->next = LIST_POISON1; - n->pprev = LIST_POISON2; -} - -static inline void hlist_del_init(struct hlist_node *n) -{ - if (!hlist_unhashed(n)) { - __hlist_del(n); - INIT_HLIST_NODE(n); - } -} - -static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - if (first) - first->pprev = &n->next; - h->first = n; - n->pprev = &h->first; -} - -/* next must be != NULL */ -static inline void hlist_add_before(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - next->pprev = &n->next; - *(n->pprev) = n; -} - -static inline void hlist_add_after(struct hlist_node *n, - struct hlist_node *next) -{ - next->next = n->next; - n->next = next; - next->pprev = &n->next; - - if(next->next) - next->next->pprev = &next->next; -} - -#define hlist_entry(ptr, type, member) container_of(ptr,type,member) - -#define hlist_for_each(pos, head) \ - for (pos = (head)->first; pos; \ - pos = pos->next) - -#define hlist_for_each_safe(pos, n, head) \ - for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ - pos = n) - -/** - * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_continue - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_from - iterate over a hlist continuing from current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @n: another &struct hlist_node to use as temporary storage - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) - -#endif diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 3a275db1fa6..65a8449b91f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -3,7 +3,7 @@ #include #include "types.h" -#include "list.h" +#include #include struct symbol { -- cgit v1.2.3 From a92bef0f216bbf3d05c0c0709ea02e267f2b920e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Jul 2009 21:02:10 +0200 Subject: perf stat: Handle pipe read failures in perf stat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building builtin-stat.c reports the following errors: cc1: warnings being treated as errors builtin-stat.c: In function ‘run_perf_stat’: builtin-stat.c:242: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result builtin-stat.c:255: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result make: *** [builtin-stat.o] Erreur 1 This patch handles the possible pipe read failures. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246474930-6088-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 01cc07efb72..27921a8ce1a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -239,7 +239,8 @@ static int run_perf_stat(int argc __used, const char **argv) /* * Wait until the parent tells us to go. */ - read(go_pipe[0], &buf, 1); + if (read(go_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); execvp(argv[0], (char **)argv); @@ -252,7 +253,8 @@ static int run_perf_stat(int argc __used, const char **argv) */ close(child_ready_pipe[1]); close(go_pipe[0]); - read(child_ready_pipe[0], &buf, 1); + if (read(child_ready_pipe[0], &buf, 1) == -1) + perror("unable to read pipe"); close(child_ready_pipe[0]); for (counter = 0; counter < nr_counters; counter++) -- cgit v1.2.3 From 9974f496782b7612e36a143bedda858f1cb953d4 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Jul 2009 08:05:58 +0200 Subject: perf_counter tools: Make symbol loading consistently return number of loaded symbols perf_counter tools: Make symbol loading consistently return number of loaded symbols. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246514758.13293.42.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/symbol.c | 9 ++++++--- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 132de8b28db..3becc8a35be 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -172,7 +172,7 @@ static int load_kernel(void) return -1; err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); - if (err) { + if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; } else diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index be1b7584796..58d1612894f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -189,7 +189,7 @@ static int load_kernel(void) return -1; err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); - if (err) { + if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; } else diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index cdc74cfb151..97fde1d8436 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -364,7 +364,7 @@ static int parse_symbols(void) if (kernel_dso == NULL) return -1; - if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0) + if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) <= 0) goto out_delete_dso; node = rb_first(&kernel_dso->syms); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 78c2efde01b..c077b6a1469 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -146,6 +146,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb char *line = NULL; size_t n; FILE *file = fopen("/proc/kallsyms", "r"); + int count = 0; if (file == NULL) goto out_failure; @@ -188,8 +189,10 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb if (filter && filter(self, sym)) symbol__delete(sym, self->sym_priv_size); - else + else { dso__insert_symbol(self, sym); + count++; + } } /* @@ -212,7 +215,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb free(line); fclose(file); - return 0; + return count; out_delete_line: free(line); @@ -639,7 +642,7 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, if (vmlinux) err = dso__load_vmlinux(self, vmlinux, filter, verbose); - if (err < 0) + if (err <= 0) err = dso__load_kallsyms(self, filter, verbose); return err; -- cgit v1.2.3 From 208b4b4a59351011b7f212e273f2b7bc47a9c482 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Jul 2009 08:07:10 +0200 Subject: perf_counter tools: Add infrastructure to support loading of kernel module symbols Add infrastructure for module path discovery and section load addresses. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246514830.13293.44.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 + tools/perf/util/module.c | 509 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/module.h | 53 +++++ 3 files changed, 564 insertions(+) create mode 100644 tools/perf/util/module.c create mode 100644 tools/perf/util/module.h (limited to 'tools/perf') diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8f7fd1b050a..7822b3d6bac 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -306,6 +306,7 @@ LIB_H += util/strlist.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h +LIB_H += util/module.h LIB_H += util/color.h LIB_OBJS += util/abspath.o @@ -329,6 +330,7 @@ LIB_OBJS += util/usage.o LIB_OBJS += util/wrapper.o LIB_OBJS += util/sigchain.o LIB_OBJS += util/symbol.o +LIB_OBJS += util/module.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o LIB_OBJS += util/header.o diff --git a/tools/perf/util/module.c b/tools/perf/util/module.c new file mode 100644 index 00000000000..ddabe925d65 --- /dev/null +++ b/tools/perf/util/module.c @@ -0,0 +1,509 @@ +#include "util.h" +#include "../perf.h" +#include "string.h" +#include "module.h" + +#include +#include +#include +#include +#include + +static unsigned int crc32(const char *p, unsigned int len) +{ + int i; + unsigned int crc = 0; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); + } + return crc; +} + +/* module section methods */ + +struct sec_dso *sec_dso__new_dso(const char *name) +{ + struct sec_dso *self = malloc(sizeof(*self) + strlen(name) + 1); + + if (self != NULL) { + strcpy(self->name, name); + self->secs = RB_ROOT; + self->find_section = sec_dso__find_section; + } + + return self; +} + +static void sec_dso__delete_section(struct section *self) +{ + free(((void *)self)); +} + +void sec_dso__delete_sections(struct sec_dso *self) +{ + struct section *pos; + struct rb_node *next = rb_first(&self->secs); + + while (next) { + pos = rb_entry(next, struct section, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, &self->secs); + sec_dso__delete_section(pos); + } +} + +void sec_dso__delete_self(struct sec_dso *self) +{ + sec_dso__delete_sections(self); + free(self); +} + +static void sec_dso__insert_section(struct sec_dso *self, struct section *sec) +{ + struct rb_node **p = &self->secs.rb_node; + struct rb_node *parent = NULL; + const u64 hash = sec->hash; + struct section *s; + + while (*p != NULL) { + parent = *p; + s = rb_entry(parent, struct section, rb_node); + if (hash < s->hash) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&sec->rb_node, parent, p); + rb_insert_color(&sec->rb_node, &self->secs); +} + +struct section *sec_dso__find_section(struct sec_dso *self, const char *name) +{ + struct rb_node *n; + u64 hash; + int len; + + if (self == NULL) + return NULL; + + len = strlen(name); + hash = crc32(name, len); + + n = self->secs.rb_node; + + while (n) { + struct section *s = rb_entry(n, struct section, rb_node); + + if (hash < s->hash) + n = n->rb_left; + else if (hash > s->hash) + n = n->rb_right; + else { + if (!strcmp(name, s->name)) + return s; + else + n = rb_next(&s->rb_node); + } + } + + return NULL; +} + +static size_t sec_dso__fprintf_section(struct section *self, FILE *fp) +{ + return fprintf(fp, "name:%s vma:%llx path:%s\n", + self->name, self->vma, self->path); +} + +size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp) +{ + size_t ret = fprintf(fp, "dso: %s\n", self->name); + + struct rb_node *nd; + for (nd = rb_first(&self->secs); nd; nd = rb_next(nd)) { + struct section *pos = rb_entry(nd, struct section, rb_node); + ret += sec_dso__fprintf_section(pos, fp); + } + + return ret; +} + +static struct section *section__new(const char *name, const char *path) +{ + struct section *self = calloc(1, sizeof(*self)); + + if (!self) + goto out_failure; + + self->name = calloc(1, strlen(name) + 1); + if (!self->name) + goto out_failure; + + self->path = calloc(1, strlen(path) + 1); + if (!self->path) + goto out_failure; + + strcpy(self->name, name); + strcpy(self->path, path); + self->hash = crc32(self->name, strlen(name)); + + return self; + +out_failure: + if (self) { + if (self->name) + free(self->name); + if (self->path) + free(self->path); + free(self); + } + + return NULL; +} + +/* module methods */ + +struct mod_dso *mod_dso__new_dso(const char *name) +{ + struct mod_dso *self = malloc(sizeof(*self) + strlen(name) + 1); + + if (self != NULL) { + strcpy(self->name, name); + self->mods = RB_ROOT; + self->find_module = mod_dso__find_module; + } + + return self; +} + +static void mod_dso__delete_module(struct module *self) +{ + free(((void *)self)); +} + +void mod_dso__delete_modules(struct mod_dso *self) +{ + struct module *pos; + struct rb_node *next = rb_first(&self->mods); + + while (next) { + pos = rb_entry(next, struct module, rb_node); + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, &self->mods); + mod_dso__delete_module(pos); + } +} + +void mod_dso__delete_self(struct mod_dso *self) +{ + mod_dso__delete_modules(self); + free(self); +} + +static void mod_dso__insert_module(struct mod_dso *self, struct module *mod) +{ + struct rb_node **p = &self->mods.rb_node; + struct rb_node *parent = NULL; + const u64 hash = mod->hash; + struct module *m; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct module, rb_node); + if (hash < m->hash) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&mod->rb_node, parent, p); + rb_insert_color(&mod->rb_node, &self->mods); +} + +struct module *mod_dso__find_module(struct mod_dso *self, const char *name) +{ + struct rb_node *n; + u64 hash; + int len; + + if (self == NULL) + return NULL; + + len = strlen(name); + hash = crc32(name, len); + + n = self->mods.rb_node; + + while (n) { + struct module *m = rb_entry(n, struct module, rb_node); + + if (hash < m->hash) + n = n->rb_left; + else if (hash > m->hash) + n = n->rb_right; + else { + if (!strcmp(name, m->name)) + return m; + else + n = rb_next(&m->rb_node); + } + } + + return NULL; +} + +static size_t mod_dso__fprintf_module(struct module *self, FILE *fp) +{ + return fprintf(fp, "name:%s path:%s\n", self->name, self->path); +} + +size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp) +{ + struct rb_node *nd; + size_t ret; + + ret = fprintf(fp, "dso: %s\n", self->name); + + for (nd = rb_first(&self->mods); nd; nd = rb_next(nd)) { + struct module *pos = rb_entry(nd, struct module, rb_node); + + ret += mod_dso__fprintf_module(pos, fp); + } + + return ret; +} + +static struct module *module__new(const char *name, const char *path) +{ + struct module *self = calloc(1, sizeof(*self)); + + if (!self) + goto out_failure; + + self->name = calloc(1, strlen(name) + 1); + if (!self->name) + goto out_failure; + + self->path = calloc(1, strlen(path) + 1); + if (!self->path) + goto out_failure; + + strcpy(self->name, name); + strcpy(self->path, path); + self->hash = crc32(self->name, strlen(name)); + + return self; + +out_failure: + if (self) { + if (self->name) + free(self->name); + if (self->path) + free(self->path); + free(self); + } + + return NULL; +} + +static int mod_dso__load_sections(struct module *mod) +{ + int count = 0, path_len; + struct dirent *entry; + char *line = NULL; + char *dir_path; + DIR *dir; + size_t n; + + path_len = strlen("/sys/module/"); + path_len += strlen(mod->name); + path_len += strlen("/sections/"); + + dir_path = calloc(1, path_len + 1); + if (dir_path == NULL) + goto out_failure; + + strcat(dir_path, "/sys/module/"); + strcat(dir_path, mod->name); + strcat(dir_path, "/sections/"); + + dir = opendir(dir_path); + if (dir == NULL) + goto out_free; + + while ((entry = readdir(dir))) { + struct section *section; + char *path, *vma; + int line_len; + FILE *file; + + if (!strcmp(".", entry->d_name) || !strcmp("..", entry->d_name)) + continue; + + path = calloc(1, path_len + strlen(entry->d_name) + 1); + if (path == NULL) + break; + strcat(path, dir_path); + strcat(path, entry->d_name); + + file = fopen(path, "r"); + if (file == NULL) { + free(path); + break; + } + + line_len = getline(&line, &n, file); + if (line_len < 0) { + free(path); + fclose(file); + break; + } + + if (!line) { + free(path); + fclose(file); + break; + } + + line[--line_len] = '\0'; /* \n */ + + vma = strstr(line, "0x"); + if (!vma) { + free(path); + fclose(file); + break; + } + vma += 2; + + section = section__new(entry->d_name, path); + if (!section) { + fprintf(stderr, "load_sections: allocation error\n"); + free(path); + fclose(file); + break; + } + + hex2u64(vma, §ion->vma); + sec_dso__insert_section(mod->sections, section); + + free(path); + fclose(file); + count++; + } + + closedir(dir); + free(line); + free(dir_path); + + return count; + +out_free: + free(dir_path); + +out_failure: + return count; +} + +static int mod_dso__load_module_paths(struct mod_dso *self) +{ + struct utsname uts; + int count = 0, len; + char *line = NULL; + FILE *file; + char *path; + size_t n; + + if (uname(&uts) < 0) + goto out_failure; + + len = strlen("/lib/modules/"); + len += strlen(uts.release); + len += strlen("/modules.dep"); + + path = calloc(1, len); + if (path == NULL) + goto out_failure; + + strcat(path, "/lib/modules/"); + strcat(path, uts.release); + strcat(path, "/modules.dep"); + + file = fopen(path, "r"); + free(path); + if (file == NULL) + goto out_failure; + + while (!feof(file)) { + char *path, *name, *tmp; + struct module *module; + int line_len, len; + + line_len = getline(&line, &n, file); + if (line_len < 0) + break; + + if (!line) + goto out_failure; + + line[--line_len] = '\0'; /* \n */ + + path = strtok(line, ":"); + if (!path) + goto out_failure; + + name = strdup(path); + name = strtok(name, "/"); + + tmp = name; + + while (tmp) { + tmp = strtok(NULL, "/"); + if (tmp) + name = tmp; + } + name = strsep(&name, "."); + + /* Quirk: replace '-' with '_' in sound modules */ + for (len = strlen(name); len; len--) { + if (*(name+len) == '-') + *(name+len) = '_'; + } + + module = module__new(name, path); + if (!module) { + fprintf(stderr, "load_module_paths: allocation error\n"); + goto out_failure; + } + mod_dso__insert_module(self, module); + + module->sections = sec_dso__new_dso("sections"); + if (!module->sections) { + fprintf(stderr, "load_module_paths: allocation error\n"); + goto out_failure; + } + + module->active = mod_dso__load_sections(module); + + if (module->active > 0) + count++; + } + + free(line); + fclose(file); + + return count; + +out_failure: + return -1; +} + +int mod_dso__load_modules(struct mod_dso *dso) +{ + int err; + + err = mod_dso__load_module_paths(dso); + + return err; +} diff --git a/tools/perf/util/module.h b/tools/perf/util/module.h new file mode 100644 index 00000000000..8a592ef641c --- /dev/null +++ b/tools/perf/util/module.h @@ -0,0 +1,53 @@ +#ifndef _PERF_MODULE_ +#define _PERF_MODULE_ 1 + +#include +#include "../types.h" +#include +#include + +struct section { + struct rb_node rb_node; + u64 hash; + u64 vma; + char *name; + char *path; +}; + +struct sec_dso { + struct list_head node; + struct rb_root secs; + struct section *(*find_section)(struct sec_dso *, const char *name); + char name[0]; +}; + +struct module { + struct rb_node rb_node; + u64 hash; + char *name; + char *path; + struct sec_dso *sections; + int active; +}; + +struct mod_dso { + struct list_head node; + struct rb_root mods; + struct module *(*find_module)(struct mod_dso *, const char *name); + char name[0]; +}; + +struct sec_dso *sec_dso__new_dso(const char *name); +void sec_dso__delete_sections(struct sec_dso *self); +void sec_dso__delete_self(struct sec_dso *self); +size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp); +struct section *sec_dso__find_section(struct sec_dso *self, const char *name); + +struct mod_dso *mod_dso__new_dso(const char *name); +void mod_dso__delete_modules(struct mod_dso *self); +void mod_dso__delete_self(struct mod_dso *self); +size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp); +struct module *mod_dso__find_module(struct mod_dso *self, const char *name); +int mod_dso__load_modules(struct mod_dso *dso); + +#endif /* _PERF_MODULE_ */ -- cgit v1.2.3 From 6cfcc53ed4f3ecb9319e73a03f34f1eddcb644dd Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Jul 2009 08:08:36 +0200 Subject: perf_counter tools: Connect module support infrastructure to symbol loading infrastructure Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246514916.13293.46.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/symbol.c | 159 +++++++++++++++++++++++++++++++++++++++--- tools/perf/util/symbol.h | 5 +- 5 files changed, 156 insertions(+), 14 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 3becc8a35be..88205686eb6 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -171,7 +171,7 @@ static int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, 0); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 58d1612894f..38d136fedfb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -188,7 +188,7 @@ static int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, 0); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 97fde1d8436..9bb25fc3d4c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -364,7 +364,7 @@ static int parse_symbols(void) if (kernel_dso == NULL) return -1; - if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) <= 0) + if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1, 0) <= 0) goto out_delete_dso; node = rb_first(&kernel_dso->syms); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c077b6a1469..98a13114de7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -35,7 +35,7 @@ static struct symbol *symbol__new(u64 start, u64 len, self = ((void *)self) + priv_size; } self->start = start; - self->end = start + len - 1; + self->end = len ? start + len - 1 : start; memcpy(self->name, name, namelen); return self; @@ -48,8 +48,12 @@ static void symbol__delete(struct symbol *self, unsigned int priv_size) static size_t symbol__fprintf(struct symbol *self, FILE *fp) { - return fprintf(fp, " %llx-%llx %s\n", + if (!self->module) + return fprintf(fp, " %llx-%llx %s\n", self->start, self->end, self->name); + else + return fprintf(fp, " %llx-%llx %s \t[%s]\n", + self->start, self->end, self->name, self->module->name); } struct dso *dso__new(const char *name, unsigned int sym_priv_size) @@ -310,6 +314,26 @@ static inline int elf_sym__is_function(const GElf_Sym *sym) sym->st_size != 0; } +static inline int elf_sym__is_label(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_NOTYPE && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF && + sym->st_shndx != SHN_ABS; +} + +static inline const char *elf_sec__name(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return secstrs->d_buf + shdr->sh_name; +} + +static inline int elf_sec__is_text(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return strstr(elf_sec__name(shdr, secstrs), "text") != NULL; +} + static inline const char *elf_sym__name(const GElf_Sym *sym, const Elf_Data *symstrs) { @@ -451,9 +475,9 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, } static int dso__load_sym(struct dso *self, int fd, const char *name, - symbol_filter_t filter, int verbose) + symbol_filter_t filter, int verbose, struct module *mod) { - Elf_Data *symstrs; + Elf_Data *symstrs, *secstrs; uint32_t nr_syms; int err = -1; uint32_t index; @@ -461,7 +485,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, GElf_Shdr shdr; Elf_Data *syms; GElf_Sym sym; - Elf_Scn *sec, *sec_dynsym; + Elf_Scn *sec, *sec_dynsym, *sec_strndx; Elf *elf; size_t dynsym_idx; int nr = 0; @@ -520,6 +544,14 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, if (symstrs == NULL) goto out_elf_end; + sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); + if (sec_strndx == NULL) + goto out_elf_end; + + secstrs = elf_getdata(sec_strndx, NULL); + if (symstrs == NULL) + goto out_elf_end; + nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); @@ -529,8 +561,11 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; u64 obj_start; + struct section *section = NULL; + int is_label = elf_sym__is_label(&sym); + const char *section_name; - if (!elf_sym__is_function(&sym)) + if (!is_label && !elf_sym__is_function(&sym)) continue; sec = elf_getscn(elf, sym.st_shndx); @@ -538,6 +573,11 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, goto out_elf_end; gelf_getshdr(sec, &shdr); + + if (is_label && !elf_sec__is_text(&shdr, secstrs)) + continue; + + section_name = elf_sec__name(&shdr, secstrs); obj_start = sym.st_value; if (self->prelinked) { @@ -548,6 +588,17 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, sym.st_value -= shdr.sh_addr - shdr.sh_offset; } + if (mod) { + section = mod->sections->find_section(mod->sections, section_name); + if (section) + sym.st_value += section->vma; + else { + fprintf(stderr, "dso__load_sym() module %s lookup of %s failed\n", + mod->name, section_name); + goto out_elf_end; + } + } + f = symbol__new(sym.st_value, sym.st_size, elf_sym__name(&sym, symstrs), self->sym_priv_size, obj_start, verbose); @@ -557,6 +608,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, if (filter && filter(self, f)) symbol__delete(f, self->sym_priv_size); else { + f->module = mod; dso__insert_symbol(self, f); nr++; } @@ -606,7 +658,7 @@ more: fd = open(name, O_RDONLY); } while (fd < 0); - ret = dso__load_sym(self, fd, name, filter, verbose); + ret = dso__load_sym(self, fd, name, filter, verbose, NULL); close(fd); /* @@ -620,6 +672,86 @@ out: return ret; } +static int dso__load_module(struct dso *self, struct mod_dso *mods, const char *name, + symbol_filter_t filter, int verbose) +{ + struct module *mod = mod_dso__find_module(mods, name); + int err = 0, fd; + + if (mod == NULL || !mod->active) + return err; + + fd = open(mod->path, O_RDONLY); + + if (fd < 0) + return err; + + err = dso__load_sym(self, fd, name, filter, verbose, mod); + close(fd); + + return err; +} + +int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose) +{ + struct mod_dso *mods = mod_dso__new_dso("modules"); + struct module *pos; + struct rb_node *next; + int err; + + err = mod_dso__load_modules(mods); + + if (err <= 0) + return err; + + /* + * Iterate over modules, and load active symbols. + */ + next = rb_first(&mods->mods); + while (next) { + pos = rb_entry(next, struct module, rb_node); + err = dso__load_module(self, mods, pos->name, filter, verbose); + + if (err < 0) + break; + + next = rb_next(&pos->rb_node); + } + + if (err < 0) { + mod_dso__delete_modules(mods); + mod_dso__delete_self(mods); + } + + return err; +} + +static inline void dso__fill_symbol_holes(struct dso *self) +{ + struct symbol *prev = NULL; + struct rb_node *nd; + + for (nd = rb_last(&self->syms); nd; nd = rb_prev(nd)) { + struct symbol *pos = rb_entry(nd, struct symbol, rb_node); + + if (prev) { + u64 hole = 0; + int alias = pos->start == prev->start; + + if (!alias) + hole = prev->start - pos->end - 1; + + if (hole || alias) { + if (alias) + pos->end = prev->end; + else if (hole) + pos->end = prev->start - 1; + } + } + prev = pos; + } +} + static int dso__load_vmlinux(struct dso *self, const char *vmlinux, symbol_filter_t filter, int verbose) { @@ -628,19 +760,26 @@ static int dso__load_vmlinux(struct dso *self, const char *vmlinux, if (fd < 0) return -1; - err = dso__load_sym(self, fd, vmlinux, filter, verbose); + err = dso__load_sym(self, fd, vmlinux, filter, verbose, NULL); + + if (err > 0) + dso__fill_symbol_holes(self); + close(fd); return err; } int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose) + symbol_filter_t filter, int verbose, int modules) { int err = -1; - if (vmlinux) + if (vmlinux) { err = dso__load_vmlinux(self, vmlinux, filter, verbose); + if (err > 0 && modules) + err = dso__load_modules(self, filter, verbose); + } if (err <= 0) err = dso__load_kallsyms(self, filter, verbose); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 65a8449b91f..4e141a30911 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,6 +5,7 @@ #include "types.h" #include #include +#include "module.h" struct symbol { struct rb_node rb_node; @@ -13,6 +14,7 @@ struct symbol { u64 obj_start; u64 hist_sum; u64 *hist; + struct module *module; void *priv; char name[0]; }; @@ -41,7 +43,8 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose); + symbol_filter_t filter, int verbose, int modules); +int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); int dso__load(struct dso *self, symbol_filter_t filter, int verbose); size_t dso__fprintf(struct dso *self, FILE *fp); -- cgit v1.2.3 From 429764873cf3fc3e73142872a674bb27cda589c1 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Jul 2009 08:09:46 +0200 Subject: perf_counter tools: Enable kernel module symbol loading in tools Add the -m/--modules option to perf report and perf annotate, which enables live module symbol/image loading. To be used with -k/--vmlinux. (Also give perf annotate a -P/--full-paths option.) Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246514986.13293.48.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 25 ++++++++++++++++++++----- tools/perf/builtin-report.c | 9 ++++++++- tools/perf/builtin-top.c | 12 ++++++++++-- 3 files changed, 38 insertions(+), 8 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 88205686eb6..08ea6c5329d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -43,6 +43,10 @@ static int dump_trace = 0; static int verbose; +static int modules; + +static int full_paths; + static int print_line; static unsigned long page_size; @@ -171,7 +175,7 @@ static int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, 0); + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, modules); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; @@ -1268,19 +1272,25 @@ static void print_summary(char *filename) static void annotate_sym(struct dso *dso, struct symbol *sym) { - char *filename = dso->name; + char *filename = dso->name, *d_filename; u64 start, end, len; char command[PATH_MAX*2]; FILE *file; if (!filename) return; - if (dso == kernel_dso) + if (sym->module) + filename = sym->module->path; + else if (dso == kernel_dso) filename = vmlinux; start = sym->obj_start; if (!start) start = sym->start; + if (full_paths) + d_filename = filename; + else + d_filename = basename(filename); end = start + sym->end - sym->start + 1; len = sym->end - sym->start; @@ -1291,13 +1301,14 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) } printf("\n\n------------------------------------------------\n"); - printf(" Percent | Source code & Disassembly of %s\n", filename); + printf(" Percent | Source code & Disassembly of %s\n", d_filename); printf("------------------------------------------------\n"); if (verbose >= 2) printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); - sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (u64)start, (u64)end, filename); + sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s", + (u64)start, (u64)end, filename, filename); if (verbose >= 3) printf("doing: %s\n", command); @@ -1472,8 +1483,12 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_BOOLEAN('m', "modules", &modules, + "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('l', "print-line", &print_line, "print matching source lines (may be slow)"), + OPT_BOOLEAN('P', "full-paths", &full_paths, + "Don't shorten the displayed pathnames"), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 38d136fedfb..b44476ca239 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -46,6 +46,8 @@ static int dump_trace = 0; static int verbose; #define eprintf(x...) do { if (verbose) fprintf(stderr, x); } while (0) +static int modules; + static int full_paths; static unsigned long page_size; @@ -188,7 +190,7 @@ static int load_kernel(void) if (!kernel_dso) return -1; - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, 0); + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose, modules); if (err <= 0) { dso__delete(kernel_dso); kernel_dso = NULL; @@ -648,6 +650,9 @@ sort__sym_print(FILE *fp, struct hist_entry *self) ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : self->dso == hypervisor_dso ? 'h' : '.', self->sym->name); + + if (self->sym->module) + ret += fprintf(fp, "\t[%s]", self->sym->module->name); } else { ret += fprintf(fp, "%#016llx", (u64)self->ip); } @@ -1710,6 +1715,8 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_BOOLEAN('m', "modules", &modules, + "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 9bb25fc3d4c..aa044ea1482 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -66,6 +66,7 @@ static unsigned int page_size; static unsigned int mmap_pages = 16; static int freq = 0; static int verbose = 0; +static char *vmlinux = NULL; static char *sym_filter; static unsigned long filter_start; @@ -265,7 +266,10 @@ static void print_sym_table(void) printf("%9.1f %10ld - ", syme->weight, syme->snap_count); color_fprintf(stdout, color, "%4.1f%%", pcnt); - printf(" - %016llx : %s\n", sym->start, sym->name); + printf(" - %016llx : %s", sym->start, sym->name); + if (sym->module) + printf("\t[%s]", sym->module->name); + printf("\n"); } } @@ -359,12 +363,13 @@ static int parse_symbols(void) { struct rb_node *node; struct symbol *sym; + int modules = vmlinux ? 1 : 0; kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); if (kernel_dso == NULL) return -1; - if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1, 0) <= 0) + if (dso__load_kernel(kernel_dso, vmlinux, symbol_filter, verbose, modules) <= 0) goto out_delete_dso; node = rb_first(&kernel_dso->syms); @@ -680,6 +685,7 @@ static const struct option options[] = { "system-wide collection from all CPUs"), OPT_INTEGER('C', "CPU", &profile_cpu, "CPU to profile on"), + OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), OPT_INTEGER('r', "realtime", &realtime_prio, @@ -709,6 +715,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; + symbol__init(); + page_size = sysconf(_SC_PAGE_SIZE); argc = parse_options(argc, argv, options, top_usage, 0); -- cgit v1.2.3 From 14f4654cbd531d48651e005cf05907c14bddb193 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 17:58:19 +0200 Subject: perf_counter tools: Create new chain_for_each_child() iterator Iterating through children of a node in the callchain tree shows something that may be quite confusing at a first glance. The head is the children field of the parent and the list nodes are in the brothers field of the children. This is because the childs are linked to the parent as a list of "brothers" using the "children" list of the parent as a head: --------------- | Parent (head) |------------------------------------- --------------- | | | children | | | ----------- ----------- | | 1st child |---brother---| 2nd child |---brother----- ----------- ----------- This makes the following strange pattern often occuring: list_for_each_entry(child, &parent->children, brothers) { // do something with children } Abstract it to chain_for_each_child() to factorize and simplify this pattern. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246550301-8954-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3dceabd9b5e..3c4a91fea62 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -16,6 +16,9 @@ #include "callchain.h" +#define chain_for_each_child(child, parent) \ + list_for_each_entry(child, &parent->children, brothers) + static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) @@ -46,7 +49,7 @@ void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) { struct callchain_node *child; - list_for_each_entry(child, &node->children, brothers) + chain_for_each_child(child, node) sort_chain_to_rbtree(rb_root, child); if (node->hit) @@ -77,7 +80,7 @@ create_child(struct callchain_node *parent, bool inherit_children) list_splice(&parent->children, &new->children); INIT_LIST_HEAD(&parent->children); - list_for_each_entry(next, &new->children, brothers) + chain_for_each_child(next, new) next->parent = new; } list_add_tail(&new->brothers, &parent->children); @@ -173,7 +176,7 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, struct callchain_node *rnode; /* lookup in childrens */ - list_for_each_entry(rnode, &root->children, brothers) { + chain_for_each_child(rnode, root) { unsigned int ret = __append_chain(rnode, chain, start, syms); if (!ret) -- cgit v1.2.3 From 5a4b181721375700124513cdd9f97056e1c66675 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 17:58:20 +0200 Subject: perf_counter tools: Add new OPT_CALLBACK_DEFAULT option There is no predefined macro to create an option that can have a custom value or a default one if none is given. This patch provides a new helper OPT_CALLBACK_DEFAULT() which defines such kind of option. For example, considering an option -c, we want to get the default value in the following cases: perf command -c -d perf command -d -c And the foo value when it's given: perf command -c foo -d perf command -d -c foo That's also why PARSE_OPT_LASTARG_DEFAULT is extended here to support default values whatever the position of the option, not only in the end. Should it now be renamed to PARSE_OPT_ARG_DEFAULT ? Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo Cc: git@vger.kernel.org LKML-Reference: <1246550301-8954-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/parse-options.c | 3 ++- tools/perf/util/parse-options.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 9a897b7cce7..1bf67190c82 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -20,7 +20,8 @@ static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, if (p->opt) { *arg = p->opt; p->opt = NULL; - } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { + } else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 || + **(p->argv + 1) == '-')) { *arg = (const char *)opt->defval; } else if (p->argc > 1) { p->argc--; diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 15c8aba9c62..8aa3464c709 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -104,6 +104,8 @@ struct option { { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) } +#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } /* parse_options() will filter out the processed options and leave the * non-option argments in argv[]. -- cgit v1.2.3 From 4eb3e4788b8a5e220a0aeb590f88c28850726ebe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 17:58:21 +0200 Subject: perf report: Add support for callchain graph output Currently, the printing of callchains is done in a single vertical level, this is the "flat" mode: 8.25% [k] copy_user_generic_string 4.19% copy_user_generic_string generic_file_aio_read do_sync_read vfs_read sys_pread64 system_call_fastpath pread64 This patch introduces a new "graph" mode which provides a hierarchical output of factorized paths recursively sorted: 8.25% [k] copy_user_generic_string | |--4.31%-- generic_file_aio_read | do_sync_read | vfs_read | | | |--4.19%-- sys_pread64 | | system_call_fastpath | | pread64 | | | --0.12%-- sys_read | system_call_fastpath | __read | |--3.24%-- generic_file_buffered_write | __generic_file_aio_write_nolock | generic_file_aio_write | do_sync_write | reiserfs_file_write | vfs_write | | | |--3.14%-- sys_pwrite64 | | system_call_fastpath | | __pwrite64 | | | --0.10%-- sys_write [...] The command line has then changed. By providing the -c option, the callchain will output in the flat mode by default. But you can override it: perf report -c graph or perf report -c flat You can also pass the abreviated mode: perf report -c g or perf report -c gra will both make use of the graph mode. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246550301-8954-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 141 +++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/callchain.c | 51 +++++++++++++--- tools/perf/util/callchain.h | 11 +++- 3 files changed, 185 insertions(+), 18 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b44476ca239..0ca46386d93 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,6 +59,7 @@ static regex_t parent_regex; static int exclude_other = 1; static int callchain; +static enum chain_mode callchain_mode; static u64 sample_type; @@ -787,8 +788,103 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } +static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask) +{ + int i; + size_t ret = 0; + + ret += fprintf(fp, "%s", " "); + + for (i = 0; i < depth; i++) + if (depth_mask & (1 << i)) + ret += fprintf(fp, "| "); + else + ret += fprintf(fp, " "); + + ret += fprintf(fp, "\n"); + + return ret; +} static size_t -callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) +ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, + int depth_mask, int count, u64 total_samples, + int hits) +{ + int i; + size_t ret = 0; + + ret += fprintf(fp, "%s", " "); + for (i = 0; i < depth; i++) { + if (depth_mask & (1 << i)) + ret += fprintf(fp, "|"); + else + ret += fprintf(fp, " "); + if (!count && i == depth - 1) { + double percent; + + percent = hits * 100.0 / total_samples; + ret += fprintf(fp, "--%2.2f%%-- ", percent); + } else + ret += fprintf(fp, "%s", " "); + } + if (chain->sym) + ret += fprintf(fp, "%s\n", chain->sym->name); + else + ret += fprintf(fp, "%p\n", (void *)(long)chain->ip); + + return ret; +} + +static size_t +callchain__fprintf_graph(FILE *fp, struct callchain_node *self, + u64 total_samples, int depth, int depth_mask) +{ + struct rb_node *node, *next; + struct callchain_node *child; + struct callchain_list *chain; + int new_depth_mask = depth_mask; + size_t ret = 0; + int i; + + node = rb_first(&self->rb_root); + while (node) { + child = rb_entry(node, struct callchain_node, rb_node); + + /* + * The depth mask manages the output of pipes that show + * the depth. We don't want to keep the pipes of the current + * level for the last child of this depth + */ + next = rb_next(node); + if (!next) + new_depth_mask &= ~(1 << (depth - 1)); + + /* + * But we keep the older depth mask for the line seperator + * to keep the level link until we reach the last child + */ + ret += ipchain__fprintf_graph_line(fp, depth, depth_mask); + i = 0; + list_for_each_entry(chain, &child->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + ret += ipchain__fprintf_graph(fp, chain, depth, + new_depth_mask, i++, + total_samples, + child->cumul_hit); + } + ret += callchain__fprintf_graph(fp, child, total_samples, + depth + 1, + new_depth_mask | (1 << depth)); + node = next; + } + + return ret; +} + +static size_t +callchain__fprintf_flat(FILE *fp, struct callchain_node *self, + u64 total_samples) { struct callchain_list *chain; size_t ret = 0; @@ -796,7 +892,7 @@ callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples) if (!self) return 0; - ret += callchain__fprintf(fp, self->parent, total_samples); + ret += callchain__fprintf_flat(fp, self->parent, total_samples); list_for_each_entry(chain, &self->val, list) { @@ -826,8 +922,13 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, chain = rb_entry(rb_node, struct callchain_node, rb_node); percent = chain->hit * 100.0 / total_samples; - ret += fprintf(fp, " %6.2f%%\n", percent); - ret += callchain__fprintf(fp, chain, total_samples); + if (callchain_mode == FLAT) { + ret += fprintf(fp, " %6.2f%%\n", percent); + ret += callchain__fprintf_flat(fp, chain, total_samples); + } else if (callchain_mode == GRAPH) { + ret += callchain__fprintf_graph(fp, chain, + total_samples, 1, 1); + } ret += fprintf(fp, "\n"); rb_node = rb_next(rb_node); } @@ -1129,8 +1230,12 @@ static void output__insert_entry(struct hist_entry *he) struct rb_node *parent = NULL; struct hist_entry *iter; - if (callchain) - sort_chain_to_rbtree(&he->sorted_chain, &he->callchain); + if (callchain) { + if (callchain_mode == FLAT) + sort_chain_flat(&he->sorted_chain, &he->callchain); + else if (callchain_mode == GRAPH) + sort_chain_graph(&he->sorted_chain, &he->callchain); + } while (*p != NULL) { parent = *p; @@ -1702,6 +1807,26 @@ done: return rc; } +static int +parse_callchain_opt(const struct option *opt __used, const char *arg, + int unset __used) +{ + callchain = 1; + + if (!arg) + return 0; + + if (!strncmp(arg, "graph", strlen(arg))) + callchain_mode = GRAPH; + + else if (!strncmp(arg, "flat", strlen(arg))) + callchain_mode = FLAT; + else + return -1; + + return 0; +} + static const char * const report_usage[] = { "perf report [] ", NULL @@ -1725,7 +1850,9 @@ static const struct option options[] = { "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), - OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"), + OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type", + "Display callchains with output_type: flat, graph. " + "Default to flat", &parse_callchain_opt, "flat"), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3c4a91fea62..a9873aafcd9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -19,9 +19,9 @@ #define chain_for_each_child(child, parent) \ list_for_each_entry(child, &parent->children, brothers) - static void -rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) +rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, + enum chain_mode mode) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -31,10 +31,22 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node); - if (rnode->hit < chain->hit) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; + switch (mode) { + case FLAT: + if (rnode->hit < chain->hit) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + break; + case GRAPH: + if (rnode->cumul_hit < chain->cumul_hit) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + break; + default: + break; + } } rb_link_node(&chain->rb_node, parent, p); @@ -45,15 +57,36 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain) * Once we get every callchains from the stream, we can now * sort them by hit */ -void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node) +void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node) { struct callchain_node *child; chain_for_each_child(child, node) - sort_chain_to_rbtree(rb_root, child); + sort_chain_flat(rb_root, child); if (node->hit) - rb_insert_callchain(rb_root, node); + rb_insert_callchain(rb_root, node, FLAT); +} + +static void __sort_chain_graph(struct callchain_node *node) +{ + struct callchain_node *child; + + node->rb_root = RB_ROOT; + node->cumul_hit = node->hit; + + chain_for_each_child(child, node) { + __sort_chain_graph(child); + rb_insert_callchain(&node->rb_root, child, GRAPH); + node->cumul_hit += child->cumul_hit; + } +} + +void +sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root) +{ + __sort_chain_graph(chain_root); + rb_root->rb_node = chain_root->rb_root.rb_node; } /* diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index e9bd5e882f3..dfa56008d9a 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -6,15 +6,21 @@ #include #include "symbol.h" +enum chain_mode { + FLAT, + GRAPH +}; struct callchain_node { struct callchain_node *parent; struct list_head brothers; struct list_head children; struct list_head val; - struct rb_node rb_node; + struct rb_node rb_node; /* to sort nodes in an rbtree */ + struct rb_root rb_root; /* sorted tree of children */ unsigned int val_nr; u64 hit; + u64 cumul_hit; /* hit + hits of children */ }; struct callchain_list { @@ -32,5 +38,6 @@ static inline void callchain_init(struct callchain_node *node) void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node); +void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node); +void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node); #endif -- cgit v1.2.3 From c20ab37ef30f4a874cf27e84c12c73e584e6f5cc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 20:14:33 +0200 Subject: perf_counter tools: Set the minimum percent for callchains to be displayed Callchains output may become a burden on a trace because even rarely hit site are exposed. This can be too much information. Let the user set a threshold as a minimum percent of hits using the new pattern for the -c option: -c mode,min_percent Example: $ perf report -s sym -c flat,4 8.25% [k] copy_user_generic_string 4.19% copy_user_generic_string generic_file_aio_read do_sync_read vfs_read sys_pread64 system_call_fastpath pread64 5.39% [k] search_by_key 4.63% 0x00000000009e0a 2.36% [k] memcpy_c [...] $ perf report -s sym -c graph,2 8.25% [k] copy_user_generic_string | |--4.31%-- generic_file_aio_read | do_sync_read | vfs_read | | | --4.19%-- sys_pread64 | system_call_fastpath | pread64 | --3.24%-- generic_file_buffered_write __generic_file_aio_write_nolock generic_file_aio_write do_sync_write reiserfs_file_write vfs_write | --3.14%-- sys_pwrite64 system_call_fastpath __pwrite64 5.39% [k] search_by_key | --2.23%-- reiserfs_update_sd_size 4.63% 0x00000000009e0a 2.36% [k] memcpy_c [...] You can also omit it and it will default to 0. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246558475-10624-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 45 ++++++++++++++++++++++++++++++++++----------- tools/perf/util/callchain.c | 19 +++++++++++-------- tools/perf/util/callchain.h | 6 ++++-- 3 files changed, 49 insertions(+), 21 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0ca46386d93..e8c98179fe4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -60,6 +60,7 @@ static regex_t parent_regex; static int exclude_other = 1; static int callchain; static enum chain_mode callchain_mode; +static double callchain_min_percent = 0.0; static u64 sample_type; @@ -1224,7 +1225,7 @@ static void collapse__resort(void) static struct rb_root output_hists; -static void output__insert_entry(struct hist_entry *he) +static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits) { struct rb_node **p = &output_hists.rb_node; struct rb_node *parent = NULL; @@ -1232,9 +1233,11 @@ static void output__insert_entry(struct hist_entry *he) if (callchain) { if (callchain_mode == FLAT) - sort_chain_flat(&he->sorted_chain, &he->callchain); + sort_chain_flat(&he->sorted_chain, &he->callchain, + min_callchain_hits); else if (callchain_mode == GRAPH) - sort_chain_graph(&he->sorted_chain, &he->callchain); + sort_chain_graph(&he->sorted_chain, &he->callchain, + min_callchain_hits); } while (*p != NULL) { @@ -1251,11 +1254,14 @@ static void output__insert_entry(struct hist_entry *he) rb_insert_color(&he->rb_node, &output_hists); } -static void output__resort(void) +static void output__resort(u64 total_samples) { struct rb_node *next; struct hist_entry *n; struct rb_root *tree = &hist; + u64 min_callchain_hits; + + min_callchain_hits = total_samples * (callchain_min_percent / 100); if (sort__need_collapse) tree = &collapse_hists; @@ -1267,7 +1273,7 @@ static void output__resort(void) next = rb_next(&n->rb_node); rb_erase(&n->rb_node, tree); - output__insert_entry(n); + output__insert_entry(n, min_callchain_hits); } } @@ -1801,7 +1807,7 @@ done: dsos__fprintf(stdout); collapse__resort(); - output__resort(); + output__resort(total); output__fprintf(stdout, total); return rc; @@ -1811,19 +1817,36 @@ static int parse_callchain_opt(const struct option *opt __used, const char *arg, int unset __used) { + char *tok; + char *endptr; + callchain = 1; if (!arg) return 0; - if (!strncmp(arg, "graph", strlen(arg))) + tok = strtok((char *)arg, ","); + if (!tok) + return -1; + + /* get the output mode */ + if (!strncmp(tok, "graph", strlen(arg))) callchain_mode = GRAPH; - else if (!strncmp(arg, "flat", strlen(arg))) + else if (!strncmp(tok, "flat", strlen(arg))) callchain_mode = FLAT; else return -1; + /* get the min percentage */ + tok = strtok(NULL, ","); + if (!tok) + return 0; + + callchain_min_percent = strtod(tok, &endptr); + if (tok == endptr) + return -1; + return 0; } @@ -1850,9 +1873,9 @@ static const struct option options[] = { "regex filter to identify parent, see: '--sort parent'"), OPT_BOOLEAN('x', "exclude-other", &exclude_other, "Only display entries with parent-match"), - OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type", - "Display callchains with output_type: flat, graph. " - "Default to flat", &parse_callchain_opt, "flat"), + OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type,min_percent", + "Display callchains using output_type and min percent threshold. " + "Default: flat,0", &parse_callchain_opt, "flat,100"), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index a9873aafcd9..c9900fe6b8b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -57,18 +57,19 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, * Once we get every callchains from the stream, we can now * sort them by hit */ -void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node) +void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit) { struct callchain_node *child; chain_for_each_child(child, node) - sort_chain_flat(rb_root, child); + sort_chain_flat(rb_root, child, min_hit); - if (node->hit) + if (node->hit && node->hit >= min_hit) rb_insert_callchain(rb_root, node, FLAT); } -static void __sort_chain_graph(struct callchain_node *node) +static void __sort_chain_graph(struct callchain_node *node, u64 min_hit) { struct callchain_node *child; @@ -76,16 +77,18 @@ static void __sort_chain_graph(struct callchain_node *node) node->cumul_hit = node->hit; chain_for_each_child(child, node) { - __sort_chain_graph(child); - rb_insert_callchain(&node->rb_root, child, GRAPH); + __sort_chain_graph(child, min_hit); + if (child->cumul_hit >= min_hit) + rb_insert_callchain(&node->rb_root, child, GRAPH); node->cumul_hit += child->cumul_hit; } } void -sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root) +sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root, + u64 min_hit) { - __sort_chain_graph(chain_root); + __sort_chain_graph(chain_root, min_hit); rb_root->rb_node = chain_root->rb_root.rb_node; } diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index dfa56008d9a..f3e4776e743 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -38,6 +38,8 @@ static inline void callchain_init(struct callchain_node *node) void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node); -void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node); +void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit); +void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit); #endif -- cgit v1.2.3 From 1e11fd82d247e4e48a1d6c49402214434538d3fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 20:14:34 +0200 Subject: perf_counter tools: Provide helper to print percents color Among perf annotate, perf report and perf top, we can find the common colored printing of percents according to the following rules: High overhead = > 5%, colored in red Mid overhead = > 0.5%, colored in green Low overhead = < 0.5%, default color Factorize these multiple checks in a single function named percent_color_fprintf() and also provide a get_percent_color() for sites which print percentages and other things at the same time. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246558475-10624-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-annotate.c | 26 ++------------------------ tools/perf/builtin-report.c | 21 +++------------------ tools/perf/builtin-top.c | 15 +-------------- tools/perf/util/color.c | 27 +++++++++++++++++++++++++++ tools/perf/util/color.h | 5 +++++ 5 files changed, 38 insertions(+), 56 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 08ea6c5329d..5f9eefecc57 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -25,10 +25,6 @@ #define SHOW_USER 2 #define SHOW_HV 4 -#define MIN_GREEN 0.5 -#define MIN_RED 5.0 - - static char const *input_name = "perf.data"; static char *vmlinux = "vmlinux"; @@ -1047,24 +1043,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static char *get_color(double percent) -{ - char *color = PERF_COLOR_NORMAL; - - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (percent >= MIN_RED) - color = PERF_COLOR_RED; - else { - if (percent > MIN_GREEN) - color = PERF_COLOR_GREEN; - } - return color; -} - static int parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) { @@ -1126,7 +1104,7 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) } else if (sym->hist_sum) percent = 100.0 * hits / sym->hist_sum; - color = get_color(percent); + color = get_percent_color(percent); /* * Also color the filename and line if needed, with @@ -1262,7 +1240,7 @@ static void print_summary(char *filename) sym_ext = rb_entry(node, struct sym_ext, node); percent = sym_ext->percent; - color = get_color(percent); + color = get_percent_color(percent); path = sym_ext->path; color_fprintf(stdout, color, " %7.2f %s", percent, path); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e8c98179fe4..c9dbe331549 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -947,25 +947,10 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) if (exclude_other && !self->parent) return 0; - if (total_samples) { - double percent = self->count * 100.0 / total_samples; - char *color = PERF_COLOR_NORMAL; - - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (percent >= 5.0) { - color = PERF_COLOR_RED; - } else { - if (percent >= 0.5) - color = PERF_COLOR_GREEN; - } - - ret = color_fprintf(fp, color, " %6.2f%%", + if (total_samples) + ret = percent_color_fprintf(fp, " %6.2f%%", (self->count * 100.0) / total_samples); - } else + else ret = fprintf(fp, "%12Ld ", self->count); list_for_each_entry(se, &hist_entry__sort_list, list) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index aa044ea1482..95d5c0ae375 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -239,7 +239,6 @@ static void print_sym_table(void) for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); struct symbol *sym = (struct symbol *)(syme + 1); - char *color = PERF_COLOR_NORMAL; double pcnt; if (++printed > print_entries || syme->snap_count < count_filter) @@ -248,24 +247,12 @@ static void print_sym_table(void) pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / sum_ksamples)); - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (pcnt >= 5.0) { - color = PERF_COLOR_RED; - } else { - if (pcnt >= 0.5) - color = PERF_COLOR_GREEN; - } - if (nr_counters == 1) printf("%20.2f - ", syme->weight); else printf("%9.1f %10ld - ", syme->weight, syme->snap_count); - color_fprintf(stdout, color, "%4.1f%%", pcnt); + percent_color_fprintf(stdout, "%4.1f%%", pcnt); printf(" - %016llx : %s", sym->start, sym->name); if (sym->module) printf("\t[%s]", sym->module->name); diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 26f82318b86..90a044d1fe7 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -242,4 +242,31 @@ int color_fwrite_lines(FILE *fp, const char *color, return 0; } +char *get_percent_color(double percent) +{ + char *color = PERF_COLOR_NORMAL; + + /* + * We color high-overhead entries in red, mid-overhead + * entries in green - and keep the low overhead places + * normal: + */ + if (percent >= MIN_RED) + color = PERF_COLOR_RED; + else { + if (percent > MIN_GREEN) + color = PERF_COLOR_GREEN; + } + return color; +} +int percent_color_fprintf(FILE *fp, const char *fmt, double percent) +{ + int r; + char *color; + + color = get_percent_color(percent); + r = color_fprintf(fp, color, fmt, percent); + + return r; +} diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 5abfd379582..706cec50bd2 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -15,6 +15,9 @@ #define PERF_COLOR_CYAN "\033[36m" #define PERF_COLOR_BG_RED "\033[41m" +#define MIN_GREEN 0.5 +#define MIN_RED 5.0 + /* * This variable stores the value of color.ui */ @@ -32,5 +35,7 @@ void color_parse_mem(const char *value, int len, const char *var, char *dst); int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); +int percent_color_fprintf(FILE *fp, const char *fmt, double percent); +char *get_percent_color(double percent); #endif /* COLOR_H */ -- cgit v1.2.3 From 24b57c6988c5791628c89a8838910991abc9cc1e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 2 Jul 2009 20:14:35 +0200 Subject: perf_counter tools: Display percents of hits in callchain with overhead colors This adds the use of colors to signal at a glance the important overhead thresholds in callchains hit rates. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246558475-10624-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c9dbe331549..283773d91aa 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -824,7 +824,7 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, double percent; percent = hits * 100.0 / total_samples; - ret += fprintf(fp, "--%2.2f%%-- ", percent); + ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); } else ret += fprintf(fp, "%s", " "); } @@ -924,7 +924,8 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, chain = rb_entry(rb_node, struct callchain_node, rb_node); percent = chain->hit * 100.0 / total_samples; if (callchain_mode == FLAT) { - ret += fprintf(fp, " %6.2f%%\n", percent); + ret += percent_color_fprintf(fp, " %6.2f%%\n", + percent); ret += callchain__fprintf_flat(fp, chain, total_samples); } else if (callchain_mode == GRAPH) { ret += callchain__fprintf_graph(fp, chain, -- cgit v1.2.3 From 2d4618dce6a318ff4ec78dfe492cc3793015d540 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 23 Jun 2009 21:38:49 -0400 Subject: parisc: perf: wire up sys_perf_counter_open Signed-off-by: Kyle McMartin --- tools/perf/perf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/perf.h b/tools/perf/perf.h index ceb68aa51f7..188801b4ebc 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -19,6 +19,12 @@ #define cpu_relax() asm volatile("" ::: "memory"); #endif +#ifdef __hppa__ +#include "../../arch/parisc/include/asm/unistd.h" +#define rmb() asm volatile("" ::: "memory") +#define cpu_relax() asm volatile("" ::: "memory"); +#endif + #include #include #include -- cgit v1.2.3 From 30d7a77dd5a9720430af72f6f62f5156fe073e55 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Jul 2009 21:24:14 -0300 Subject: perf_counter tools: Adjust symbols in ET_EXEC files too Ingo Molnar wrote: > i just bisected a 'perf report' bug that would cause us to not > resolve all user-space symbols in a 'git gc' run to: > > f5812a7a336fb952d819e4427b9a2dce02368e82 is first bad commit > commit f5812a7a336fb952d819e4427b9a2dce02368e82 > Author: Arnaldo Carvalho de Melo > Date: Tue Jun 30 11:43:17 2009 -0300 > > perf_counter tools: Adjust only prelinked symbol's addresses Rename ->prelinked to ->adjust_symbols and making what was done only for prelinked libraries also to ET_EXEC binaries, such as /usr/bin/git: [acme@doppio pahole]$ readelf -h /usr/bin/git | grep Type Type: EXEC (Executable file) [acme@doppio pahole]$ And after installing the 'git-debuginfo' package, I get correct results: [acme@doppio linux-2.6-tip]$ perf report --sort comm,dso,symbol -d /usr/bin/git | head -20 # # (1139614 samples) # # Overhead Command Shared Object Symbol # ........ ................ ......................... ...... # 34.98% git /usr/bin/git [.] send_sideband 33.39% git /usr/bin/git [.] enter_repo 6.81% git /usr/bin/git [.] diff_opt_parse 4.95% git /usr/bin/git [.] is_repository_shallow 3.24% git /usr/bin/git [.] odb_mkstemp 1.39% git /usr/bin/git [.] output 1.34% git /usr/bin/git [.] xmmap 1.25% git /usr/bin/git [.] receive_pack_config 1.16% git /usr/bin/git [.] git_pathdup 0.90% git /usr/bin/git [.] read_object_with_reference 0.86% git /usr/bin/git [.] show_patch_diff 0.85% git /usr/bin/git 0x00000000095e2e 0.69% git /usr/bin/git [.] display [acme@doppio linux-2.6-tip]$ I'll check what are the last cases where we can't resolve symbols, like this 0x00000000095e2e later. And I guess this will fix the problems Mike were seeing too: [acme@doppio linux-2.6-tip]$ readelf -h ../build/perf/vmlinux | grep Type Type: EXEC (Executable file) [acme@doppio linux-2.6-tip]$ Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/util/symbol.c | 11 ++++++----- tools/perf/util/symbol.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 98a13114de7..4683b67b5ee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -555,9 +555,10 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, nr_syms = shdr.sh_size / shdr.sh_entsize; memset(&sym, 0, sizeof(sym)); - self->prelinked = elf_section_by_name(elf, &ehdr, &shdr, - ".gnu.prelink_undo", - NULL) != NULL; + self->adjust_symbols = (ehdr.e_type == ET_EXEC || + elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL); elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { struct symbol *f; u64 obj_start; @@ -580,7 +581,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, section_name = elf_sec__name(&shdr, secstrs); obj_start = sym.st_value; - if (self->prelinked) { + if (self->adjust_symbols) { if (verbose >= 2) printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); @@ -632,7 +633,7 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose) if (!name) return -1; - self->prelinked = 0; + self->adjust_symbols = 0; if (strncmp(self->name, "/tmp/perf-", 10) == 0) return dso__load_perf_map(self, filter, verbose); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 4e141a30911..7918cffb23c 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -24,7 +24,7 @@ struct dso { struct rb_root syms; struct symbol *(*find_symbol)(struct dso *, u64 ip); unsigned int sym_priv_size; - unsigned char prelinked; + unsigned char adjust_symbols; char name[0]; }; -- cgit v1.2.3 From 029e5b1636d0511ef143af3a20c83c48e44c03f3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Jul 2009 13:17:28 +0200 Subject: perf report: Annotate variable initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain versions of GCC dont see the initialization that is done here: builtin-report.c: In function ‘__cmd_report’: builtin-report.c:1038: warning: ‘syms’ may be used uninitialized in this function So annotate it with a NULL initialization. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 283773d91aa..fa937f5c3c3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1035,7 +1035,7 @@ resolve_callchain(struct thread *thread, struct map *map __used, struct ip_callchain *chain, struct hist_entry *entry) { u64 context = PERF_CONTEXT_MAX; - struct symbol **syms; + struct symbol **syms = NULL; unsigned int i; if (callchain) { -- cgit v1.2.3 From 91b4eaea93f5be95f4477554399680a53aff2343 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:17 +0200 Subject: perf report: Warn on callchain output request from non-callchain file perf report segfaults while trying to handle callchains from a non callchain data file. Instead of a segfault, print a useful message to the user. Reported-by: Jens Axboe Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fa937f5c3c3..9f9575afab0 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1684,9 +1684,19 @@ static int __cmd_report(void) sample_type = perf_header__sample_type(); - if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { - fprintf(stderr, "selected --sort parent, but no callchain data\n"); - exit(-1); + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + if (sort__has_parent) { + fprintf(stderr, "selected --sort parent, but no" + " callchain data. Did you call" + " perf record without -g?\n"); + exit(-1); + } + if (callchain) { + fprintf(stderr, "selected -c but no callchain data." + " Did you call perf record without" + " -g?\n"); + exit(-1); + } } if (load_kernel() < 0) { -- cgit v1.2.3 From be9038859e56f729cc9d3b070a35fb8829a73696 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:18 +0200 Subject: perf report: Use a modifiable string for default callchain options If the user doesn't provide options to tune his callchain output (ie: if he uses -c without arguments) then the default value passed in the OPT_CALLBACK_DEFAULT() macro is used. But it's parsed later by strtok() which will replace comma separators to a zero. This may segfault as we are using a read-only string. Use a modifiable one instead, and also fix the "100%" default minimum threshold value by turning it into a 0 (output every callchains) as it was intended in the origin. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9f9575afab0..3db99fd9986 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -58,6 +58,8 @@ static char *parent_pattern = default_parent_pattern; static regex_t parent_regex; static int exclude_other = 1; + +static char callchain_default_opt[] = "flat,0"; static int callchain; static enum chain_mode callchain_mode; static double callchain_min_percent = 0.0; @@ -1871,7 +1873,7 @@ static const struct option options[] = { "Only display entries with parent-match"), OPT_CALLBACK_DEFAULT('c', "callchain", NULL, "output_type,min_percent", "Display callchains using output_type and min percent threshold. " - "Default: flat,0", &parse_callchain_opt, "flat,100"), + "Default: flat,0", &parse_callchain_opt, callchain_default_opt), OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]", "only consider symbols in these dsos"), OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]", -- cgit v1.2.3 From 94a8eb028a57854157a936c7e66b09e2559f115a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:19 +0200 Subject: perf report: Change default callchain parameters The default callchain parameters are set to use the flat mode and never filter any overhead threshold of backtrace. But flat mode is boring compared to graph mode. Also the number of callchains may be very high if none is filtered. Let's change this to set the graph view and a minimum overhead of 0.5% as default parameters. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3db99fd9986..8bd58651128 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,10 +59,10 @@ static regex_t parent_regex; static int exclude_other = 1; -static char callchain_default_opt[] = "flat,0"; +static char callchain_default_opt[] = "graph,0.5"; static int callchain; static enum chain_mode callchain_mode; -static double callchain_min_percent = 0.0; +static double callchain_min_percent = 0.5; static u64 sample_type; -- cgit v1.2.3 From e05b876c222178bc6abcfa9f23d8311731691046 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:20 +0200 Subject: perf_counter tools: callchains: Manage the cumul hits on the fly The cumul hits are the number of hits of every childs of a node plus the hits of the current nodes, required for percentage computing of a branch. Theses numbers are calculated during the sorting of the branches of the callchain tree using a depth first postfix traversal, so that cumulative hits are propagated in the right order. But if we plan to implement percentages relative to the parent and not absolute percentages (relative to the whole overhead), we need to know the cumulative hits of the parent before computing the children because the relative minimum acceptable number of entries (ie: minimum rate against the cumulative hits from the parent) is the basis to filter the children against a given rate. Then we need to handle the cumul hits on the fly to prepare the implementation of relative overhead rates. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/util/callchain.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index c9900fe6b8b..5d244afb7cd 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -74,13 +74,11 @@ static void __sort_chain_graph(struct callchain_node *node, u64 min_hit) struct callchain_node *child; node->rb_root = RB_ROOT; - node->cumul_hit = node->hit; chain_for_each_child(child, node) { __sort_chain_graph(child, min_hit); if (child->cumul_hit >= min_hit) rb_insert_callchain(&node->rb_root, child, GRAPH); - node->cumul_hit += child->cumul_hit; } } @@ -159,7 +157,7 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain, new = create_child(parent, false); fill_node(new, chain, start, syms); - new->hit = 1; + new->cumul_hit = new->hit = 1; } /* @@ -189,6 +187,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain, /* split the hits */ new->hit = parent->hit; + new->cumul_hit = parent->cumul_hit; new->val_nr = parent->val_nr - idx_local; parent->val_nr = idx_local; @@ -216,10 +215,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain, unsigned int ret = __append_chain(rnode, chain, start, syms); if (!ret) - return; + goto cumul; } /* nothing in children, add to the current node */ add_child(root, chain, start, syms); + +cumul: + root->cumul_hit++; } static int @@ -261,6 +263,8 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain, /* we match 100% of the path, increment the hit */ if (i - start == root->val_nr && i == chain->nr) { root->hit++; + root->cumul_hit++; + return 0; } -- cgit v1.2.3 From 805d127d62472f17c7d79baa001a7651afe2fa47 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 5 Jul 2009 07:39:21 +0200 Subject: perf report: Add "Fractal" mode output - support callchains with relative overhead rate The current callchain displays the overhead rates as absolute: relative to the total overhead. This patch provides relative overhead percentage, in which each branch of the callchain tree is a independant instrumentated object. This provides a 'fractal' view of the call-chain profile: each sub-graph looks like a profile in itself - relative to its parent. You can produce such output by using the "fractal" mode that you can abbreviate via f, fr, fra, frac, etc... ./perf report -s sym -c fractal Example: 8.46% [k] copy_user_generic_string | |--52.01%-- generic_file_aio_read | do_sync_read | vfs_read | | | |--97.20%-- sys_pread64 | | system_call_fastpath | | pread64 | | | --2.81%-- sys_read | system_call_fastpath | __read | |--39.85%-- generic_file_buffered_write | __generic_file_aio_write_nolock | generic_file_aio_write | do_sync_write | reiserfs_file_write | vfs_write | | | |--97.05%-- sys_pwrite64 | | system_call_fastpath | | __pwrite64 | | | --2.95%-- sys_write | system_call_fastpath | __write_nocancel [...] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <1246772361-9960-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 60 +++++++++++++++++++++----------- tools/perf/util/callchain.c | 84 +++++++++++++++++++++++++++++++++++++-------- tools/perf/util/callchain.h | 21 ++++++++---- 3 files changed, 124 insertions(+), 41 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8bd58651128..4e5cc266311 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,10 +59,15 @@ static regex_t parent_regex; static int exclude_other = 1; -static char callchain_default_opt[] = "graph,0.5"; +static char callchain_default_opt[] = "fractal,0.5"; + static int callchain; -static enum chain_mode callchain_mode; -static double callchain_min_percent = 0.5; + +static +struct callchain_param callchain_param = { + .mode = CHAIN_GRAPH_ABS, + .min_percent = 0.5 +}; static u64 sample_type; @@ -846,9 +851,15 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, struct callchain_node *child; struct callchain_list *chain; int new_depth_mask = depth_mask; + u64 new_total; size_t ret = 0; int i; + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = self->cumul_hit; + else + new_total = total_samples; + node = rb_first(&self->rb_root); while (node) { child = rb_entry(node, struct callchain_node, rb_node); @@ -873,10 +884,10 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, continue; ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, - total_samples, + new_total, child->cumul_hit); } - ret += callchain__fprintf_graph(fp, child, total_samples, + ret += callchain__fprintf_graph(fp, child, new_total, depth + 1, new_depth_mask | (1 << depth)); node = next; @@ -925,13 +936,18 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, chain = rb_entry(rb_node, struct callchain_node, rb_node); percent = chain->hit * 100.0 / total_samples; - if (callchain_mode == FLAT) { + switch (callchain_param.mode) { + case CHAIN_FLAT: ret += percent_color_fprintf(fp, " %6.2f%%\n", percent); ret += callchain__fprintf_flat(fp, chain, total_samples); - } else if (callchain_mode == GRAPH) { + break; + case CHAIN_GRAPH_ABS: /* Falldown */ + case CHAIN_GRAPH_REL: ret += callchain__fprintf_graph(fp, chain, total_samples, 1, 1); + default: + break; } ret += fprintf(fp, "\n"); rb_node = rb_next(rb_node); @@ -1219,14 +1235,9 @@ static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits) struct rb_node *parent = NULL; struct hist_entry *iter; - if (callchain) { - if (callchain_mode == FLAT) - sort_chain_flat(&he->sorted_chain, &he->callchain, - min_callchain_hits); - else if (callchain_mode == GRAPH) - sort_chain_graph(&he->sorted_chain, &he->callchain, - min_callchain_hits); - } + if (callchain) + callchain_param.sort(&he->sorted_chain, &he->callchain, + min_callchain_hits, &callchain_param); while (*p != NULL) { parent = *p; @@ -1249,7 +1260,7 @@ static void output__resort(u64 total_samples) struct rb_root *tree = &hist; u64 min_callchain_hits; - min_callchain_hits = total_samples * (callchain_min_percent / 100); + min_callchain_hits = total_samples * (callchain_param.min_percent / 100); if (sort__need_collapse) tree = &collapse_hists; @@ -1829,22 +1840,31 @@ parse_callchain_opt(const struct option *opt __used, const char *arg, /* get the output mode */ if (!strncmp(tok, "graph", strlen(arg))) - callchain_mode = GRAPH; + callchain_param.mode = CHAIN_GRAPH_ABS; else if (!strncmp(tok, "flat", strlen(arg))) - callchain_mode = FLAT; + callchain_param.mode = CHAIN_FLAT; + + else if (!strncmp(tok, "fractal", strlen(arg))) + callchain_param.mode = CHAIN_GRAPH_REL; + else return -1; /* get the min percentage */ tok = strtok(NULL, ","); if (!tok) - return 0; + goto setup; - callchain_min_percent = strtod(tok, &endptr); + callchain_param.min_percent = strtod(tok, &endptr); if (tok == endptr) return -1; +setup: + if (register_callchain_param(&callchain_param) < 0) { + fprintf(stderr, "Can't register callchain params\n"); + return -1; + } return 0; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 5d244afb7cd..9d3c8141b8c 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -32,13 +32,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, rnode = rb_entry(parent, struct callchain_node, rb_node); switch (mode) { - case FLAT: + case CHAIN_FLAT: if (rnode->hit < chain->hit) p = &(*p)->rb_left; else p = &(*p)->rb_right; break; - case GRAPH: + case CHAIN_GRAPH_ABS: /* Falldown */ + case CHAIN_GRAPH_REL: if (rnode->cumul_hit < chain->cumul_hit) p = &(*p)->rb_left; else @@ -53,43 +54,96 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, rb_insert_color(&chain->rb_node, root); } +static void +__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit) +{ + struct callchain_node *child; + + chain_for_each_child(child, node) + __sort_chain_flat(rb_root, child, min_hit); + + if (node->hit && node->hit >= min_hit) + rb_insert_callchain(rb_root, node, CHAIN_FLAT); +} + /* * Once we get every callchains from the stream, we can now * sort them by hit */ -void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, - u64 min_hit) +static void +sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, + u64 min_hit, struct callchain_param *param __used) +{ + __sort_chain_flat(rb_root, node, min_hit); +} + +static void __sort_chain_graph_abs(struct callchain_node *node, + u64 min_hit) { struct callchain_node *child; - chain_for_each_child(child, node) - sort_chain_flat(rb_root, child, min_hit); + node->rb_root = RB_ROOT; - if (node->hit && node->hit >= min_hit) - rb_insert_callchain(rb_root, node, FLAT); + chain_for_each_child(child, node) { + __sort_chain_graph_abs(child, min_hit); + if (child->cumul_hit >= min_hit) + rb_insert_callchain(&node->rb_root, child, + CHAIN_GRAPH_ABS); + } +} + +static void +sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root, + u64 min_hit, struct callchain_param *param __used) +{ + __sort_chain_graph_abs(chain_root, min_hit); + rb_root->rb_node = chain_root->rb_root.rb_node; } -static void __sort_chain_graph(struct callchain_node *node, u64 min_hit) +static void __sort_chain_graph_rel(struct callchain_node *node, + double min_percent) { struct callchain_node *child; + u64 min_hit; node->rb_root = RB_ROOT; + min_hit = node->cumul_hit * min_percent / 100.0; chain_for_each_child(child, node) { - __sort_chain_graph(child, min_hit); + __sort_chain_graph_rel(child, min_percent); if (child->cumul_hit >= min_hit) - rb_insert_callchain(&node->rb_root, child, GRAPH); + rb_insert_callchain(&node->rb_root, child, + CHAIN_GRAPH_REL); } } -void -sort_chain_graph(struct rb_root *rb_root, struct callchain_node *chain_root, - u64 min_hit) +static void +sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, + u64 min_hit __used, struct callchain_param *param) { - __sort_chain_graph(chain_root, min_hit); + __sort_chain_graph_rel(chain_root, param->min_percent); rb_root->rb_node = chain_root->rb_root.rb_node; } +int register_callchain_param(struct callchain_param *param) +{ + switch (param->mode) { + case CHAIN_GRAPH_ABS: + param->sort = sort_chain_graph_abs; + break; + case CHAIN_GRAPH_REL: + param->sort = sort_chain_graph_rel; + break; + case CHAIN_FLAT: + param->sort = sort_chain_flat; + break; + default: + return -1; + } + return 0; +} + /* * Create a child for a parent. If inherit_children, then the new child * will become the new parent of it's parent children diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index f3e4776e743..7812122bea1 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -7,8 +7,9 @@ #include "symbol.h" enum chain_mode { - FLAT, - GRAPH + CHAIN_FLAT, + CHAIN_GRAPH_ABS, + CHAIN_GRAPH_REL }; struct callchain_node { @@ -23,6 +24,17 @@ struct callchain_node { u64 cumul_hit; /* hit + hits of children */ }; +struct callchain_param; + +typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *, + u64, struct callchain_param *); + +struct callchain_param { + enum chain_mode mode; + double min_percent; + sort_chain_func_t sort; +}; + struct callchain_list { u64 ip; struct symbol *sym; @@ -36,10 +48,7 @@ static inline void callchain_init(struct callchain_node *node) INIT_LIST_HEAD(&node->val); } +int register_callchain_param(struct callchain_param *param); void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -void sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, - u64 min_hit); -void sort_chain_graph(struct rb_root *rb_root, struct callchain_node *node, - u64 min_hit); #endif -- cgit v1.2.3