From e49a5bd38159dfb1928fd25b173bc9de4bbadb21 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 22 Mar 2010 19:40:03 +0100 Subject: perf: Use hot regs with software sched switch/migrate events Scheduler's task migration events don't work because they always pass NULL regs perf_sw_event(). The event hence gets filtered in perf_swevent_add(). Scheduler's context switches events use task_pt_regs() to get the context when the event occured which is a wrong thing to do as this won't give us the place in the kernel where we went to sleep but the place where we left userspace. The result is even more wrong if we switch from a kernel thread. Use the hot regs snapshot for both events as they belong to the non-interrupt/exception based events family. Unlike page faults or so that provide the regs matching the exact origin of the event, we need to save the current context. This makes the task migration event working and fix the context switch callchains and origin ip. Example: perf record -a -e cs Before: 10.91% ksoftirqd/0 0 [k] 0000000000000000 | --- (nil) perf_callchain perf_prepare_sample __perf_event_overflow perf_swevent_overflow perf_swevent_add perf_swevent_ctx_event do_perf_sw_event __perf_sw_event perf_event_task_sched_out schedule run_ksoftirqd kthread kernel_thread_helper After: 23.77% hald-addon-stor [kernel.kallsyms] [k] schedule | --- schedule | |--60.00%-- schedule_timeout | wait_for_common | wait_for_completion | blk_execute_rq | scsi_execute | scsi_execute_req | sr_test_unit_ready | | | |--66.67%-- sr_media_change | | media_changed | | cdrom_media_changed | | sr_block_media_changed | | check_disk_change | | cdrom_open v2: Always build perf_arch_fetch_caller_regs() now that software events need that too. They don't need it from modules, unlike trace events, so we keep the EXPORT_SYMBOL in trace_event_perf.c Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Ingo Molnar Cc: David Miller --- arch/x86/kernel/cpu/perf_event.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 60398a0d947..5fb490c6ee5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1702,7 +1702,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } -#ifdef CONFIG_EVENT_TRACING void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) { regs->ip = ip; @@ -1714,4 +1713,3 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski regs->cs = __KERNEL_CS; local_save_flags(regs->flags); } -#endif -- cgit v1.2.3 From b38b24ead33417146e051453d04bf60b8d2d7e25 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 23 Mar 2010 19:31:15 +0100 Subject: perf, x86: Fix AMD hotplug & constraint initialization Commit 3f6da39 ("perf: Rework and fix the arch CPU-hotplug hooks") moved the amd northbridge allocation from CPUS_ONLINE to CPUS_PREPARE_UP however amd_nb_id() doesn't work yet on prepare so it would simply bail basically reverting to a state where we do not properly track node wide constraints - causing weird perf results. Fix up the AMD NorthBridge initialization code by allocating from CPU_UP_PREPARE and installing it from CPU_STARTING once we have the proper nb_id. It also properly deals with the allocation failing. Signed-off-by: Peter Zijlstra [ robustify using amd_has_nb() ] Signed-off-by: Stephane Eranian LKML-Reference: <1269353485.5109.48.camel@twins> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 8 ++-- arch/x86/kernel/cpu/perf_event_amd.c | 80 +++++++++++++++++++++--------------- 2 files changed, 52 insertions(+), 36 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5fb490c6ee5..bd28cf9d8a8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -158,7 +158,7 @@ struct x86_pmu { struct perf_event *event); struct event_constraint *event_constraints; - void (*cpu_prepare)(int cpu); + int (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); @@ -1333,11 +1333,12 @@ static int __cpuinit x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; + int ret = NOTIFY_OK; switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: if (x86_pmu.cpu_prepare) - x86_pmu.cpu_prepare(cpu); + ret = x86_pmu.cpu_prepare(cpu); break; case CPU_STARTING: @@ -1350,6 +1351,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) x86_pmu.cpu_dying(cpu); break; + case CPU_UP_CANCELED: case CPU_DEAD: if (x86_pmu.cpu_dead) x86_pmu.cpu_dead(cpu); @@ -1359,7 +1361,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) break; } - return NOTIFY_OK; + return ret; } static void __init pmu_check_apic(void) diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index b87e0b6970c..db6f7d4056e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -137,6 +137,13 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) return (hwc->config & 0xe0) == 0xe0; } +static inline int amd_has_nb(struct cpu_hw_events *cpuc) +{ + struct amd_nb *nb = cpuc->amd_nb; + + return nb && nb->nb_id != -1; +} + static void amd_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -147,7 +154,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, /* * only care about NB events */ - if (!(nb && amd_is_nb_event(hwc))) + if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) return; /* @@ -214,7 +221,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) /* * if not NB event or no NB, then no constraints */ - if (!(nb && amd_is_nb_event(hwc))) + if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) return &unconstrained; /* @@ -293,51 +300,55 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) return nb; } -static void amd_pmu_cpu_online(int cpu) +static int amd_pmu_cpu_prepare(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + WARN_ON_ONCE(cpuc->amd_nb); + + if (boot_cpu_data.x86_max_cores < 2) + return NOTIFY_OK; + + cpuc->amd_nb = amd_alloc_nb(cpu, -1); + if (!cpuc->amd_nb) + return NOTIFY_BAD; + + return NOTIFY_OK; +} + +static void amd_pmu_cpu_starting(int cpu) { - struct cpu_hw_events *cpu1, *cpu2; - struct amd_nb *nb = NULL; + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct amd_nb *nb; int i, nb_id; if (boot_cpu_data.x86_max_cores < 2) return; - /* - * function may be called too early in the - * boot process, in which case nb_id is bogus - */ nb_id = amd_get_nb_id(cpu); - if (nb_id == BAD_APICID) - return; - - cpu1 = &per_cpu(cpu_hw_events, cpu); - cpu1->amd_nb = NULL; + WARN_ON_ONCE(nb_id == BAD_APICID); raw_spin_lock(&amd_nb_lock); for_each_online_cpu(i) { - cpu2 = &per_cpu(cpu_hw_events, i); - nb = cpu2->amd_nb; - if (!nb) + nb = per_cpu(cpu_hw_events, i).amd_nb; + if (WARN_ON_ONCE(!nb)) continue; - if (nb->nb_id == nb_id) - goto found; - } - nb = amd_alloc_nb(cpu, nb_id); - if (!nb) { - pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); - raw_spin_unlock(&amd_nb_lock); - return; + if (nb->nb_id == nb_id) { + kfree(cpuc->amd_nb); + cpuc->amd_nb = nb; + break; + } } -found: - nb->refcnt++; - cpu1->amd_nb = nb; + + cpuc->amd_nb->nb_id = nb_id; + cpuc->amd_nb->refcnt++; raw_spin_unlock(&amd_nb_lock); } -static void amd_pmu_cpu_offline(int cpu) +static void amd_pmu_cpu_dead(int cpu) { struct cpu_hw_events *cpuhw; @@ -349,8 +360,10 @@ static void amd_pmu_cpu_offline(int cpu) raw_spin_lock(&amd_nb_lock); if (cpuhw->amd_nb) { - if (--cpuhw->amd_nb->refcnt == 0) - kfree(cpuhw->amd_nb); + struct amd_nb *nb = cpuhw->amd_nb; + + if (nb->nb_id == -1 || --nb->refcnt == 0) + kfree(nb); cpuhw->amd_nb = NULL; } @@ -379,8 +392,9 @@ static __initconst struct x86_pmu amd_pmu = { .get_event_constraints = amd_get_event_constraints, .put_event_constraints = amd_put_event_constraints, - .cpu_prepare = amd_pmu_cpu_online, - .cpu_dead = amd_pmu_cpu_offline, + .cpu_prepare = amd_pmu_cpu_prepare, + .cpu_starting = amd_pmu_cpu_starting, + .cpu_dead = amd_pmu_cpu_dead, }; static __init int amd_pmu_init(void) -- cgit v1.2.3 From 257ef9d21f1b008a6c7425544b36641c4325a922 Mon Sep 17 00:00:00 2001 From: Torok Edwin Date: Wed, 17 Mar 2010 12:07:16 +0200 Subject: perf, x86: Fix callgraphs of 32-bit processes on 64-bit kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When profiling a 32-bit process on a 64-bit kernel, callgraph tracing stopped after the first function, because it has seen a garbage memory address (tried to interpret the frame pointer, and return address as a 64-bit pointer). Fix this by using a struct stack_frame with 32-bit pointers when the TIF_IA32 flag is set. Note that TIF_IA32 flag must be used, and not is_compat_task(), because the latter is only set when the 32-bit process is executing a syscall, which may not always be the case (when tracing page fault events for example). Signed-off-by: Török Edwin Signed-off-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: "H. Peter Anvin" Cc: Paul Mackerras Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org LKML-Reference: <1268820436-13145-1-git-send-email-edwintorok@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 44 +++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bd28cf9d8a8..53ea4cf1a87 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -28,6 +28,7 @@ #include #include #include +#include static u64 perf_event_mask __read_mostly; @@ -1630,14 +1631,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +#ifdef CONFIG_COMPAT +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) { - unsigned long bytes; + /* 32-bit process in 64-bit kernel. */ + struct stack_frame_ia32 frame; + const void __user *fp; + + if (!test_thread_flag(TIF_IA32)) + return 0; + + fp = compat_ptr(regs->bp); + while (entry->nr < PERF_MAX_STACK_DEPTH) { + unsigned long bytes; + frame.next_frame = 0; + frame.return_address = 0; - bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); + bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); + if (bytes != sizeof(frame)) + break; + + if (fp < compat_ptr(regs->sp)) + break; - return bytes == sizeof(*frame); + callchain_store(entry, frame.return_address); + fp = compat_ptr(frame.next_frame); + } + return 1; } +#else +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + return 0; +} +#endif static void perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) @@ -1653,11 +1682,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, PERF_CONTEXT_USER); callchain_store(entry, regs->ip); + if (perf_callchain_user32(regs, entry)) + return; + while (entry->nr < PERF_MAX_STACK_DEPTH) { + unsigned long bytes; frame.next_frame = NULL; frame.return_address = 0; - if (!copy_stack_frame(fp, &frame)) + bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); + if (bytes != sizeof(frame)) break; if ((unsigned long)fp < regs->sp) -- cgit v1.2.3