From 49f474331e563a6ecf3b1e87ec27ec5482b3e4f1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 27 Dec 2009 11:51:52 +0100 Subject: perf events: Remove arg from perf sched hooks Since we only ever schedule the local cpu, there is no need to pass the cpu number to the perf sched hooks. This micro-optimizes things a bit. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c66b34f75ee..a494e750129 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -746,10 +746,10 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); -extern void perf_event_task_sched_in(struct task_struct *task, int cpu); +extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu); -extern void perf_event_task_tick(struct task_struct *task, int cpu); + struct task_struct *next); +extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); @@ -870,12 +870,12 @@ extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); #else static inline void -perf_event_task_sched_in(struct task_struct *task, int cpu) { } +perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) { } + struct task_struct *next) { } static inline void -perf_event_task_tick(struct task_struct *task, int cpu) { } +perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } -- cgit v1.2.3 From 07b139c8c81b97bbe55c68daf0cbeca8b1c609ca Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 21 Dec 2009 14:27:35 +0800 Subject: perf events: Remove CONFIG_EVENT_PROFILE Quoted from Ingo: | This reminds me - i think we should eliminate CONFIG_EVENT_PROFILE - | it's an unnecessary Kconfig complication. If both PERF_EVENTS and | EVENT_TRACING is enabled we should expose generic tracepoints. | | Nor is it limited to event 'profiling', so it has become a misnomer as | well. Signed-off-by: Li Zefan Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4B2F1557.2050705@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a494e750129..9a1d276db75 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -658,7 +658,7 @@ struct perf_event { perf_overflow_handler_t overflow_handler; -#ifdef CONFIG_EVENT_PROFILE +#ifdef CONFIG_EVENT_TRACING struct event_filter *filter; #endif -- cgit v1.2.3 From 889ff0150661512d79484219612b7e2e024b6c07 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 9 Jan 2010 20:04:47 +0100 Subject: perf/core: Split context's event group list into pinned and non-pinned lists Split-up struct perf_event_context::group_list into pinned_groups and flexible_groups (non-pinned). This first appears to be useless as it duplicates various loops around the group list handlings. But it scales better in the fast-path in perf_sched_in(). We don't anymore iterate twice through the entire list to separate pinned and non-pinned scheduling. Instead we interate through two distinct lists. The another desired effect is that it makes easier to define distinct scheduling rules on both. Changes in v2: - Respectively rename pinned_grp_list and volatile_grp_list into pinned_groups and flexible_groups as per Ingo suggestion. - Various cleanups Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9a1d276db75..cdbc2aa64a0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -683,7 +683,8 @@ struct perf_event_context { */ struct mutex mutex; - struct list_head group_list; + struct list_head pinned_groups; + struct list_head flexible_groups; struct list_head event_list; int nr_events; int nr_active; -- cgit v1.2.3 From d6f962b57bfaab62891c7abbf1469212a56d6103 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 10 Jan 2010 01:25:51 +0100 Subject: perf: Export software-only event group characteristic as a flag Before scheduling an event group, we first check if a group can go on. We first check if the group is made of software only events first, in which case it is enough to know if the group can be scheduled in. For that purpose, we iterate through the whole group, which is wasteful as we could do this check when we add/delete an event to a group. So we create a group_flags field in perf event that can host characteristics from a group of events, starting with a first PERF_GROUP_SOFTWARE flag that reduces the check on the fast path. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cdbc2aa64a0..c6f812e4d05 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -565,6 +565,10 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int, struct perf_sample_data *, struct pt_regs *regs); +enum perf_group_flag { + PERF_GROUP_SOFTWARE = 0x1, +}; + /** * struct perf_event - performance event kernel representation: */ @@ -574,6 +578,7 @@ struct perf_event { struct list_head event_entry; struct list_head sibling_list; int nr_siblings; + int group_flags; struct perf_event *group_leader; struct perf_event *output; const struct pmu *pmu; -- cgit v1.2.3 From abd50713944c8ea9e0af5b7bffa0aacae21cc91a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Jan 2010 18:50:16 +0100 Subject: perf: Reimplement frequency driven sampling There was a bug in the old period code that caused intel_pmu_enable_all() or native_write_msr_safe() to show up quite high in the profiles. In staring at that code it made my head hurt, so I rewrote it in a hopefully simpler fashion. Its now fully symetric between tick and overflow driven adjustments and uses less data to boot. The only complication is that it basically wants to do a u128 division. The code approximates that in a rather simple truncate until it fits fashion, taking care to balance the terms while truncating. This version does not generate that sampling artefact. Signed-off-by: Peter Zijlstra LKML-Reference: Cc: Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c6f812e4d05..72b2615600d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -498,9 +498,8 @@ struct hw_perf_event { atomic64_t period_left; u64 interrupts; - u64 freq_count; - u64 freq_interrupts; - u64 freq_stamp; + u64 freq_time_stamp; + u64 freq_count_stamp; #endif }; -- cgit v1.2.3 From 184f412c3341cd24fbd26604634a5800b83dbdc3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Jan 2010 08:39:39 +0100 Subject: perf, x86: Clean up event constraints code a bit - Remove stray debug code - Improve ugly macros a bit - Remove some whitespace damage - (Also fix up some accumulated damage in perf_event.h) Signed-off-by: Ingo Molnar Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: --- include/linux/perf_event.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 72b2615600d..953c17731e0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -290,7 +290,7 @@ struct perf_event_mmap_page { }; #define PERF_RECORD_MISC_CPUMODE_MASK (3 << 0) -#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) #define PERF_RECORD_MISC_USER (2 << 0) #define PERF_RECORD_MISC_HYPERVISOR (3 << 0) @@ -356,8 +356,8 @@ enum perf_event_type { * u64 stream_id; * }; */ - PERF_RECORD_THROTTLE = 5, - PERF_RECORD_UNTHROTTLE = 6, + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, /* * struct { @@ -371,10 +371,10 @@ enum perf_event_type { /* * struct { - * struct perf_event_header header; - * u32 pid, tid; + * struct perf_event_header header; + * u32 pid, tid; * - * struct read_format values; + * struct read_format values; * }; */ PERF_RECORD_READ = 8, @@ -412,7 +412,7 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * }; */ - PERF_RECORD_SAMPLE = 9, + PERF_RECORD_SAMPLE = 9, PERF_RECORD_MAX, /* non-ABI */ }; @@ -752,8 +752,7 @@ extern int perf_max_events; extern const struct pmu *hw_perf_event_init(struct perf_event *event); extern void perf_event_task_sched_in(struct task_struct *task); -extern void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next); +extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); @@ -853,8 +852,7 @@ extern int sysctl_perf_event_mlock; extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); -extern void perf_tp_event(int event_id, u64 addr, u64 count, - void *record, int entry_size); +extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags @@ -895,13 +893,13 @@ static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } static inline void -perf_bp_event(struct perf_event *event, void *data) { } +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } -- cgit v1.2.3 From 447a194b393f32699607fd99617a40abd6a95114 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 1 Feb 2010 14:50:01 +0200 Subject: perf_events, x86: Fix bug in hw_perf_enable() We cannot assume that because hwc->idx == assign[i], we can avoid reprogramming the counter in hw_perf_enable(). The event may have been scheduled out and another event may have been programmed into this counter. Thus, we need a more robust way of verifying if the counter still contains config/data related to an event. This patch adds a generation number to each counter on each cpu. Using this mechanism we can verify reliabilty whether the content of a counter corresponds to an event. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4b66dc67.0b38560a.1635.ffffae18@mx.google.com> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 556b0f4a668..071a7db5254 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -478,9 +478,11 @@ struct hw_perf_event { union { struct { /* hardware */ u64 config; + u64 last_tag; unsigned long config_base; unsigned long event_base; int idx; + int last_cpu; }; struct { /* software */ s64 remaining; -- cgit v1.2.3 From d76a0812ac4139ceb54daab3cc70e1bd8bd9d43a Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 8 Feb 2010 17:06:01 +0200 Subject: perf_events: Add new start/stop PMU callbacks In certain situations, the kernel may need to stop and start the same event rapidly. The current PMU callbacks do not distinguish between stop and release (i.e., stop + free the resource). Thus, a counter may be released, then it will be immediately re-acquired. Event scheduling will again take place with no guarantee to assign the same counter. On some processors, this may event yield to failure to assign the event back due to competion between cores. This patch is adding a new pair of callback to stop and restart a counter without actually release the underlying counter resource. On stop, the counter is stopped, its values saved and that's it. On start, the value is reloaded and counter is restarted (on x86, actual restart is delayed until perf_enable()). Signed-off-by: Stephane Eranian [ added fallback to ->enable/->disable for all other PMUs fixed x86_pmu_start() to call x86_pmu.enable() merged __x86_pmu_disable into x86_pmu_stop() ] Signed-off-by: Peter Zijlstra LKML-Reference: <4b703875.0a04d00a.7896.ffffb824@mx.google.com> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 071a7db5254..b08dfdad08c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -513,6 +513,8 @@ struct perf_event; struct pmu { int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); + int (*start) (struct perf_event *event); + void (*stop) (struct perf_event *event); void (*read) (struct perf_event *event); void (*unthrottle) (struct perf_event *event); }; -- cgit v1.2.3 From 6e37738a2fac964583debe91099bc3248554f6e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Feb 2010 13:21:58 +0100 Subject: perf_events: Simplify code by removing cpu argument to hw_perf_group_sched_in() Since the cpu argument to hw_perf_group_sched_in() is always smp_processor_id(), simplify the code a little by removing this argument and using the current cpu where needed. Signed-off-by: Peter Zijlstra Cc: David Miller Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker LKML-Reference: <1265890918.5396.3.camel@laptop> Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b08dfdad08c..d0e072c5b58 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -772,7 +772,7 @@ extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx, int cpu); + struct perf_event_context *ctx); extern void perf_event_update_userpage(struct perf_event *event); extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * -- cgit v1.2.3