From a12b51c478899fe0b7e874a559b05ba35f1128ee Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 10 Mar 2010 20:36:09 +1100 Subject: perf tools: Fix sparse CPU numbering related bugs At present, the perf subcommands that do system-wide monitoring (perf stat, perf record and perf top) don't work properly unless the online cpus are numbered 0, 1, ..., N-1. These tools ask for the number of online cpus with sysconf(_SC_NPROCESSORS_ONLN) and then try to create events for cpus 0, 1, ..., N-1. This creates problems for systems where the online cpus are numbered sparsely. For example, a POWER6 system in single-threaded mode (i.e. only running 1 hardware thread per core) will have only even-numbered cpus online. This fixes the problem by reading the /sys/devices/system/cpu/online file to find out which cpus are online. The code that does that is in tools/perf/util/cpumap.[ch], and consists of a read_cpu_map() function that sets up a cpumap[] array and returns the number of online cpus. If /sys/devices/system/cpu/online can't be read or can't be parsed successfully, it falls back to using sysconf to ask how many cpus are online and sets up an identity map in cpumap[]. The perf record, perf stat and perf top code then calls read_cpu_map() in the system-wide monitoring case (instead of sysconf) and uses cpumap[] to get the cpu numbers to pass to perf_event_open. Signed-off-by: Paul Mackerras Cc: Anton Blanchard Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <20100310093609.GA3959@brick.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/Makefile | 2 ++ tools/perf/builtin-record.c | 7 +++--- tools/perf/builtin-stat.c | 10 +++++--- tools/perf/builtin-top.c | 9 +++---- tools/perf/util/cpumap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cpumap.h | 7 ++++++ 6 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 tools/perf/util/cpumap.c create mode 100644 tools/perf/util/cpumap.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 2d537382c68..5840499e2d2 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -387,6 +387,7 @@ LIB_H += util/thread.h LIB_H += util/trace-event.h LIB_H += util/probe-finder.h LIB_H += util/probe-event.h +LIB_H += util/cpumap.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -433,6 +434,7 @@ LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o LIB_OBJS += util/probe-event.o LIB_OBJS += util/util.o +LIB_OBJS += util/cpumap.o BUILTIN_OBJS += builtin-annotate.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f573bbb8357..b09d3b27ca1 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -22,6 +22,7 @@ #include "util/debug.h" #include "util/session.h" #include "util/symbol.h" +#include "util/cpumap.h" #include #include @@ -421,9 +422,6 @@ static int __cmd_record(int argc, const char **argv) char buf; page_size = sysconf(_SC_PAGE_SIZE); - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); atexit(sig_atexit); signal(SIGCHLD, sig_handler); @@ -547,8 +545,9 @@ static int __cmd_record(int argc, const char **argv) if ((!system_wide && !inherit) || profile_cpu != -1) { open_counters(profile_cpu, target_pid); } else { + nr_cpus = read_cpu_map(); for (i = 0; i < nr_cpus; i++) - open_counters(i, target_pid); + open_counters(cpumap[i], target_pid); } if (file_new) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e8c85d5aec4..95db31cff6f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -45,6 +45,7 @@ #include "util/event.h" #include "util/debug.h" #include "util/header.h" +#include "util/cpumap.h" #include #include @@ -151,7 +152,7 @@ static void create_perf_stat_counter(int counter, int pid) unsigned int cpu; for (cpu = 0; cpu < nr_cpus; cpu++) { - fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0); + fd[cpu][counter] = sys_perf_event_open(attr, -1, cpumap[cpu], -1, 0); if (fd[cpu][counter] < 0 && verbose) fprintf(stderr, ERR_PERF_OPEN, counter, fd[cpu][counter], strerror(errno)); @@ -519,9 +520,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) nr_counters = ARRAY_SIZE(default_attrs); } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert((int)nr_cpus >= 0); + if (system_wide) + nr_cpus = read_cpu_map(); + else + nr_cpus = 1; /* * We dont want to block the signals - that would cause diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 31f2e597800..0b719e3dde0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -28,6 +28,7 @@ #include #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/cpumap.h" #include "util/debug.h" @@ -1123,7 +1124,7 @@ static void start_counter(int i, int counter) cpu = profile_cpu; if (target_pid == -1 && profile_cpu == -1) - cpu = i; + cpu = cpumap[i]; attr = attrs + counter; @@ -1347,12 +1348,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) attrs[counter].sample_period = default_interval; } - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - if (target_pid != -1 || profile_cpu != -1) nr_cpus = 1; + else + nr_cpus = read_cpu_map(); get_term_dimensions(&winsize); if (print_entries == 0) { diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c new file mode 100644 index 00000000000..4e01490e51e --- /dev/null +++ b/tools/perf/util/cpumap.c @@ -0,0 +1,59 @@ +#include "util.h" +#include "../perf.h" +#include "cpumap.h" +#include +#include + +int cpumap[MAX_NR_CPUS]; + +static int default_cpu_map(void) +{ + int nr_cpus, i; + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert((int)nr_cpus >= 0); + + for (i = 0; i < nr_cpus; ++i) + cpumap[i] = i; + + return nr_cpus; +} + +int read_cpu_map(void) +{ + FILE *onlnf; + int nr_cpus = 0; + int n, cpu, prev; + char sep; + + onlnf = fopen("/sys/devices/system/cpu/online", "r"); + if (!onlnf) + return default_cpu_map(); + + sep = 0; + prev = -1; + for (;;) { + n = fscanf(onlnf, "%u%c", &cpu, &sep); + if (n <= 0) + break; + if (prev >= 0) { + assert(nr_cpus + cpu - prev - 1 < MAX_NR_CPUS); + while (++prev < cpu) + cpumap[nr_cpus++] = prev; + } + assert (nr_cpus < MAX_NR_CPUS); + cpumap[nr_cpus++] = cpu; + if (n == 2 && sep == '-') + prev = cpu; + else + prev = -1; + if (n == 1 || sep == '\n') + break; + } + fclose(onlnf); + if (nr_cpus > 0) + return nr_cpus; + + return default_cpu_map(); +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h new file mode 100644 index 00000000000..86c78bb3309 --- /dev/null +++ b/tools/perf/util/cpumap.h @@ -0,0 +1,7 @@ +#ifndef __PERF_CPUMAP_H +#define __PERF_CPUMAP_H + +extern int read_cpu_map(void); +extern int cpumap[]; + +#endif /* __PERF_CPUMAP_H */ -- cgit v1.2.3