From f29ac756a40d0f1bb07d682ea521e7b666ff06d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Jun 2009 18:27:26 +0200 Subject: perf_counter: Optimize perf_swcounter_event() Similar to tracepoints, use an enable variable to reduce overhead when unused. Only look for a counter of a particular event type when we know there is at least one in the system. Signed-off-by: Peter Zijlstra LKML-Reference: Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 89698d8aba5..e7213e46cf9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -669,7 +669,16 @@ static inline int is_software_counter(struct perf_counter *counter) (counter->attr.type != PERF_TYPE_HW_CACHE); } -extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); +extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX]; + +extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); + +static inline void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) +{ + if (atomic_read(&perf_swcounter_enabled[event])) + __perf_swcounter_event(event, nr, nmi, regs, addr); +} extern void __perf_counter_mmap(struct vm_area_struct *vma); -- cgit v1.2.3 From 41f95331b972a039f519ae0c70f051b7121f7346 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jun 2009 17:55:18 +0200 Subject: perf_counter: Split the mmap control page in two parts Since there are two distinct sections to the control page, move them apart so that possible extentions don't overlap. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e7213e46cf9..489d5cbfbcc 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -233,6 +233,12 @@ struct perf_counter_mmap_page { __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + /* + * Hole for extension of the self monitor capabilities + */ + + __u64 __reserved[125]; /* align to 1k */ + /* * Control data for the mmap() data buffer. * -- cgit v1.2.3 From 7f8b4e4e0988dadfd22330fd147ad2453e19f510 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2009 14:34:35 +0200 Subject: perf_counter: Add scale information to the mmap control page Add the needed time scale to the self-profile mmap information. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 489d5cbfbcc..bcbf1c43ed4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -232,12 +232,14 @@ struct perf_counter_mmap_page { __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + __u64 time_enabled; /* time counter active */ + __u64 time_running; /* time counter on cpu */ /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[125]; /* align to 1k */ + __u64 __reserved[123]; /* align to 1k */ /* * Control data for the mmap() data buffer. -- cgit v1.2.3 From 38b200d67636a30cb8dc1508137908e7a649b5c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Jun 2009 20:13:11 +0200 Subject: perf_counter: Add PERF_EVENT_READ Provide a read() like event which can be used to log the counter value at specific sites such as child->parent folding on exit. In order to be useful, we log the counter parent ID, not the actual counter ID, since userspace can only relate parent IDs to perf_counter_attr constructs. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index bcbf1c43ed4..6a384f04755 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -334,6 +334,18 @@ enum perf_event_type { */ PERF_EVENT_FORK = 7, + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 parent_id; } && PERF_FORMAT_ID + * }; + */ + PERF_EVENT_READ = 8, + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_SAMPLE_* -- cgit v1.2.3 From bfbd3381e63aa2a14c6706afb50ce4630aa0d9a2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 24 Jun 2009 21:11:59 +0200 Subject: perf_counter: Implement more accurate per task statistics With the introduction of PERF_EVENT_READ we have the possibility to provide accurate counter values for individual tasks in a task hierarchy. However, due to the lazy context switching used for similar counter contexts our current per task counts are way off. In order to maintain some of the lazy switch benefits we don't disable it out-right, but simply iterate the active counters and flip the values between the contexts. This only reads the counters but does not need to reprogram the full PMU. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6a384f04755..de70a10b5ec 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -178,8 +178,9 @@ struct perf_counter_attr { mmap : 1, /* include mmap data */ comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ - __reserved_1 : 53; + __reserved_1 : 52; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; @@ -602,6 +603,7 @@ struct perf_counter_context { int nr_counters; int nr_active; int is_active; + int nr_stat; atomic_t refcount; struct task_struct *task; -- cgit v1.2.3 From e6e18ec79b023d5fe84226cef533cf0e3770ce93 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2009 11:27:12 +0200 Subject: perf_counter: Rework the sample ABI The PERF_EVENT_READ implementation made me realize we don't actually need the sample_type int the output sample, since we already have that in the perf_counter_attr information. Therefore, remove the PERF_EVENT_MISC_OVERFLOW bit and the event->type overloading, and imply put counter overflow samples in a PERF_EVENT_SAMPLE type. This also fixes the issue that event->type was only 32-bit and sample_type had 64 usable bits. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index de70a10b5ec..3078e23c91e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -262,7 +262,6 @@ struct perf_counter_mmap_page { #define PERF_EVENT_MISC_KERNEL (1 << 0) #define PERF_EVENT_MISC_USER (2 << 0) #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -348,9 +347,6 @@ enum perf_event_type { PERF_EVENT_READ = 8, /* - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field - * will be PERF_SAMPLE_* - * * struct { * struct perf_event_header header; * @@ -358,8 +354,9 @@ enum perf_event_type { * { u32 pid, tid; } && PERF_SAMPLE_TID * { u64 time; } && PERF_SAMPLE_TIME * { u64 addr; } && PERF_SAMPLE_ADDR - * { u64 config; } && PERF_SAMPLE_CONFIG + * { u64 id; } && PERF_SAMPLE_ID * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD * * { u64 nr; * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP @@ -368,6 +365,9 @@ enum perf_event_type { * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN * }; */ + PERF_EVENT_SAMPLE = 9, + + PERF_EVENT_MAX, /* non-ABI */ }; enum perf_callchain_context { -- cgit v1.2.3 From 57e7986ed142417498155ebcd5eaf617ac37136d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 30 Jun 2009 16:07:19 +1000 Subject: perf_counter: Provide a way to enable counters on exec This provides a way to mark a counter to be enabled on the next exec. This is useful for measuring the total activity of a program without including overhead from the process that launches it. This also changes the perf stat command to use this new facility. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <19017.43927.838745.689203@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 3078e23c91e..5e970c7d3fd 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -179,8 +179,9 @@ struct perf_counter_attr { comm : 1, /* include comm data */ freq : 1, /* use freq, not period */ inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ - __reserved_1 : 52; + __reserved_1 : 51; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; -- cgit v1.2.3