From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Sep 2009 12:02:48 +0200 Subject: perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian Acked-by: Peter Zijlstra Acked-by: Paul Mackerras Reviewed-by: Arjan van de Ven Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Benjamin Herrenschmidt Cc: David Howells Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- arch/arm/include/asm/unistd.h | 2 +- arch/arm/kernel/calls.S | 2 +- arch/blackfin/include/asm/unistd.h | 2 +- arch/blackfin/mach-common/entry.S | 2 +- arch/frv/Kconfig | 2 +- arch/frv/include/asm/perf_counter.h | 17 - arch/frv/include/asm/perf_event.h | 17 + arch/frv/include/asm/unistd.h | 2 +- arch/frv/kernel/entry.S | 2 +- arch/frv/lib/Makefile | 2 +- arch/frv/lib/perf_counter.c | 19 - arch/frv/lib/perf_event.c | 19 + arch/m68k/include/asm/unistd.h | 2 +- arch/m68k/kernel/entry.S | 2 +- arch/m68knommu/kernel/syscalltable.S | 2 +- arch/microblaze/include/asm/unistd.h | 2 +- arch/microblaze/kernel/syscall_table.S | 2 +- arch/mips/include/asm/unistd.h | 6 +- arch/mips/kernel/scall32-o32.S | 2 +- arch/mips/kernel/scall64-64.S | 2 +- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- arch/mn10300/include/asm/unistd.h | 2 +- arch/mn10300/kernel/entry.S | 2 +- arch/parisc/Kconfig | 2 +- arch/parisc/include/asm/perf_counter.h | 7 - arch/parisc/include/asm/perf_event.h | 7 + arch/parisc/include/asm/unistd.h | 4 +- arch/parisc/kernel/syscall_table.S | 2 +- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/hw_irq.h | 22 +- arch/powerpc/include/asm/paca.h | 2 +- arch/powerpc/include/asm/perf_counter.h | 110 -- arch/powerpc/include/asm/perf_event.h | 110 ++ arch/powerpc/include/asm/systbl.h | 2 +- arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/entry_64.S | 8 +- arch/powerpc/kernel/irq.c | 8 +- arch/powerpc/kernel/mpc7450-pmu.c | 2 +- arch/powerpc/kernel/perf_callchain.c | 2 +- arch/powerpc/kernel/perf_counter.c | 1315 ------------------ arch/powerpc/kernel/perf_event.c | 1315 ++++++++++++++++++ arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- arch/powerpc/kernel/time.c | 30 +- arch/powerpc/mm/fault.c | 8 +- arch/powerpc/platforms/Kconfig.cputype | 4 +- arch/s390/Kconfig | 2 +- arch/s390/include/asm/perf_counter.h | 10 - arch/s390/include/asm/perf_event.h | 10 + arch/s390/include/asm/unistd.h | 2 +- arch/s390/kernel/compat_wrapper.S | 8 +- arch/s390/kernel/syscalls.S | 2 +- arch/s390/mm/fault.c | 8 +- arch/sh/Kconfig | 2 +- arch/sh/include/asm/perf_counter.h | 9 - arch/sh/include/asm/perf_event.h | 9 + arch/sh/include/asm/unistd_32.h | 2 +- arch/sh/include/asm/unistd_64.h | 2 +- arch/sh/kernel/syscalls_32.S | 2 +- arch/sh/kernel/syscalls_64.S | 2 +- arch/sh/mm/fault_32.c | 8 +- arch/sh/mm/tlbflush_64.c | 8 +- arch/sparc/Kconfig | 4 +- arch/sparc/include/asm/perf_counter.h | 14 - arch/sparc/include/asm/perf_event.h | 14 + arch/sparc/include/asm/unistd.h | 2 +- arch/sparc/kernel/Makefile | 2 +- arch/sparc/kernel/nmi.c | 4 +- arch/sparc/kernel/pcr.c | 10 +- arch/sparc/kernel/perf_counter.c | 556 -------- arch/sparc/kernel/perf_event.c | 556 ++++++++ arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 +- arch/x86/Kconfig | 2 +- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/include/asm/entry_arch.h | 2 +- arch/x86/include/asm/perf_counter.h | 108 -- arch/x86/include/asm/perf_event.h | 108 ++ arch/x86/include/asm/unistd_32.h | 2 +- arch/x86/include/asm/unistd_64.h | 4 +- arch/x86/kernel/apic/apic.c | 6 +- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/common.c | 4 +- arch/x86/kernel/cpu/perf_counter.c | 2298 ------------------------------- arch/x86/kernel/cpu/perf_event.c | 2298 +++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +- arch/x86/kernel/entry_64.S | 2 +- arch/x86/kernel/irqinit.c | 2 +- arch/x86/kernel/syscall_table_32.S | 2 +- arch/x86/mm/fault.c | 8 +- arch/x86/oprofile/op_model_ppro.c | 4 +- arch/x86/oprofile/op_x86_model.h | 2 +- 99 files changed, 4604 insertions(+), 4604 deletions(-) delete mode 100644 arch/frv/include/asm/perf_counter.h create mode 100644 arch/frv/include/asm/perf_event.h delete mode 100644 arch/frv/lib/perf_counter.c create mode 100644 arch/frv/lib/perf_event.c delete mode 100644 arch/parisc/include/asm/perf_counter.h create mode 100644 arch/parisc/include/asm/perf_event.h delete mode 100644 arch/powerpc/include/asm/perf_counter.h create mode 100644 arch/powerpc/include/asm/perf_event.h delete mode 100644 arch/powerpc/kernel/perf_counter.c create mode 100644 arch/powerpc/kernel/perf_event.c delete mode 100644 arch/s390/include/asm/perf_counter.h create mode 100644 arch/s390/include/asm/perf_event.h delete mode 100644 arch/sh/include/asm/perf_counter.h create mode 100644 arch/sh/include/asm/perf_event.h delete mode 100644 arch/sparc/include/asm/perf_counter.h create mode 100644 arch/sparc/include/asm/perf_event.h delete mode 100644 arch/sparc/kernel/perf_counter.c create mode 100644 arch/sparc/kernel/perf_event.c delete mode 100644 arch/x86/include/asm/perf_counter.h create mode 100644 arch/x86/include/asm/perf_event.h delete mode 100644 arch/x86/kernel/cpu/perf_counter.c create mode 100644 arch/x86/kernel/cpu/perf_event.c (limited to 'arch') diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 9122c9ee18f..89f7eade20a 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -390,7 +390,7 @@ #define __NR_preadv (__NR_SYSCALL_BASE+361) #define __NR_pwritev (__NR_SYSCALL_BASE+362) #define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363) -#define __NR_perf_counter_open (__NR_SYSCALL_BASE+364) +#define __NR_perf_event_open (__NR_SYSCALL_BASE+364) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index ecfa98954d1..fafce1b5c69 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -373,7 +373,7 @@ CALL(sys_preadv) CALL(sys_pwritev) CALL(sys_rt_tgsigqueueinfo) - CALL(sys_perf_counter_open) + CALL(sys_perf_event_open) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index c8e7ee4768c..02b1529dad5 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -381,7 +381,7 @@ #define __NR_preadv 366 #define __NR_pwritev 367 #define __NR_rt_tgsigqueueinfo 368 -#define __NR_perf_counter_open 369 +#define __NR_perf_event_open 369 #define __NR_syscall 370 #define NR_syscalls __NR_syscall diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 01af24cde36..1e7cac23e25 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1620,7 +1620,7 @@ ENTRY(_sys_call_table) .long _sys_preadv .long _sys_pwritev .long _sys_rt_tgsigqueueinfo - .long _sys_perf_counter_open + .long _sys_perf_event_open .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index b86e19c9b5b..4b5830bcbe2 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -7,7 +7,7 @@ config FRV default y select HAVE_IDE select HAVE_ARCH_TRACEHOOK - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config ZONE_DMA bool diff --git a/arch/frv/include/asm/perf_counter.h b/arch/frv/include/asm/perf_counter.h deleted file mode 100644 index ccf726e61b2..00000000000 --- a/arch/frv/include/asm/perf_counter.h +++ /dev/null @@ -1,17 +0,0 @@ -/* FRV performance counter support - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#ifndef _ASM_PERF_COUNTER_H -#define _ASM_PERF_COUNTER_H - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#endif /* _ASM_PERF_COUNTER_H */ diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h new file mode 100644 index 00000000000..a69e0155d14 --- /dev/null +++ b/arch/frv/include/asm/perf_event.h @@ -0,0 +1,17 @@ +/* FRV performance event support + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_PERF_EVENT_H +#define _ASM_PERF_EVENT_H + +#define PERF_EVENT_INDEX_OFFSET 0 + +#endif /* _ASM_PERF_EVENT_H */ diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index 4a8fb427ce0..be6ef0f5cd4 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -342,7 +342,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #ifdef __KERNEL__ diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index fde1e446b44..189397ec012 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1525,6 +1525,6 @@ sys_call_table: .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open syscall_table_size = (. - sys_call_table) diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile index 0a377210c89..f4709756d0d 100644 --- a/arch/frv/lib/Makefile +++ b/arch/frv/lib/Makefile @@ -5,4 +5,4 @@ lib-y := \ __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \ checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \ - outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_counter.o + outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o diff --git a/arch/frv/lib/perf_counter.c b/arch/frv/lib/perf_counter.c deleted file mode 100644 index 2000feecd57..00000000000 --- a/arch/frv/lib/perf_counter.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Performance counter handling - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include - -/* - * mark the performance counter as pending - */ -void set_perf_counter_pending(void) -{ -} diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c new file mode 100644 index 00000000000..9ac5acfd2e9 --- /dev/null +++ b/arch/frv/lib/perf_event.c @@ -0,0 +1,19 @@ +/* Performance event handling + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include + +/* + * mark the performance event as pending + */ +void set_perf_event_pending(void) +{ +} diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 946d8691f2b..48b87f5ced5 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -335,7 +335,7 @@ #define __NR_preadv 329 #define __NR_pwritev 330 #define __NR_rt_tgsigqueueinfo 331 -#define __NR_perf_counter_open 332 +#define __NR_perf_event_open 332 #ifdef __KERNEL__ diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 922f52e7ed1..c5b33634c98 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -756,5 +756,5 @@ sys_call_table: .long sys_preadv .long sys_pwritev /* 330 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index 0ae123e0898..23535cc415a 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -350,7 +350,7 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev /* 330 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 0b852327c0e..cb05a07e55e 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -381,7 +381,7 @@ #define __NR_preadv 363 /* new */ #define __NR_pwritev 364 /* new */ #define __NR_rt_tgsigqueueinfo 365 /* new */ -#define __NR_perf_counter_open 366 /* new */ +#define __NR_perf_event_open 366 /* new */ #define __NR_syscalls 367 diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index 457216097df..ecec1915513 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -370,4 +370,4 @@ ENTRY(sys_call_table) .long sys_ni_syscall .long sys_ni_syscall .long sys_rt_tgsigqueueinfo /* 365 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index e753a777949..8c9dfa9e901 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -353,7 +353,7 @@ #define __NR_preadv (__NR_Linux + 330) #define __NR_pwritev (__NR_Linux + 331) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 332) -#define __NR_perf_counter_open (__NR_Linux + 333) +#define __NR_perf_event_open (__NR_Linux + 333) #define __NR_accept4 (__NR_Linux + 334) /* @@ -664,7 +664,7 @@ #define __NR_preadv (__NR_Linux + 289) #define __NR_pwritev (__NR_Linux + 290) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 291) -#define __NR_perf_counter_open (__NR_Linux + 292) +#define __NR_perf_event_open (__NR_Linux + 292) #define __NR_accept4 (__NR_Linux + 293) /* @@ -979,7 +979,7 @@ #define __NR_preadv (__NR_Linux + 293) #define __NR_pwritev (__NR_Linux + 294) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 295) -#define __NR_perf_counter_open (__NR_Linux + 296) +#define __NR_perf_event_open (__NR_Linux + 296) #define __NR_accept4 (__NR_Linux + 297) /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 7c2de4f091c..fd2a9bb620d 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -581,7 +581,7 @@ einval: li v0, -ENOSYS sys sys_preadv 6 /* 4330 */ sys sys_pwritev 6 sys sys_rt_tgsigqueueinfo 4 - sys sys_perf_counter_open 5 + sys sys_perf_event_open 5 sys sys_accept4 4 .endm diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index b97b993846d..18bf7f32c5e 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -418,6 +418,6 @@ sys_call_table: PTR sys_preadv PTR sys_pwritev /* 5390 */ PTR sys_rt_tgsigqueueinfo - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 1a6ae124635..6ebc0797669 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -416,6 +416,6 @@ EXPORT(sysn32_call_table) PTR sys_preadv PTR sys_pwritev PTR compat_sys_rt_tgsigqueueinfo /* 5295 */ - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index cd31087a651..9bbf9775e0b 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -536,6 +536,6 @@ sys_call_table: PTR compat_sys_preadv /* 4330 */ PTR compat_sys_pwritev PTR compat_sys_rt_tgsigqueueinfo - PTR sys_perf_counter_open + PTR sys_perf_event_open PTR sys_accept4 .size sys_call_table,.-sys_call_table diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index fad68616af3..2a983931c11 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -347,7 +347,7 @@ #define __NR_preadv 334 #define __NR_pwritev 335 #define __NR_rt_tgsigqueueinfo 336 -#define __NR_perf_counter_open 337 +#define __NR_perf_event_open 337 #ifdef __KERNEL__ diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index e0d2563af4f..a94e7ea3faa 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -723,7 +723,7 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev /* 335 */ .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open nr_syscalls=(.-sys_call_table)/4 diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 06f8d5b5b0f..f388dc68f60 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -16,7 +16,7 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/include/asm/perf_counter.h b/arch/parisc/include/asm/perf_counter.h deleted file mode 100644 index dc9e829f701..00000000000 --- a/arch/parisc/include/asm/perf_counter.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_PARISC_PERF_COUNTER_H -#define __ASM_PARISC_PERF_COUNTER_H - -/* parisc only supports software counters through this interface. */ -static inline void set_perf_counter_pending(void) { } - -#endif /* __ASM_PARISC_PERF_COUNTER_H */ diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h new file mode 100644 index 00000000000..cc146427d8f --- /dev/null +++ b/arch/parisc/include/asm/perf_event.h @@ -0,0 +1,7 @@ +#ifndef __ASM_PARISC_PERF_EVENT_H +#define __ASM_PARISC_PERF_EVENT_H + +/* parisc only supports software events through this interface. */ +static inline void set_perf_event_pending(void) { } + +#endif /* __ASM_PARISC_PERF_EVENT_H */ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index f3d3b8b012c..cda158318c6 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -810,9 +810,9 @@ #define __NR_preadv (__NR_Linux + 315) #define __NR_pwritev (__NR_Linux + 316) #define __NR_rt_tgsigqueueinfo (__NR_Linux + 317) -#define __NR_perf_counter_open (__NR_Linux + 318) +#define __NR_perf_event_open (__NR_Linux + 318) -#define __NR_Linux_syscalls (__NR_perf_counter_open + 1) +#define __NR_Linux_syscalls (__NR_perf_event_open + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index cf145eb026b..843f423dec6 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -416,7 +416,7 @@ ENTRY_COMP(preadv) /* 315 */ ENTRY_COMP(pwritev) ENTRY_COMP(rt_tgsigqueueinfo) - ENTRY_SAME(perf_counter_open) + ENTRY_SAME(perf_event_open) /* Nothing yet */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8250902265c..4fd479059d6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -129,7 +129,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e73d554538d..abbc2aaaced 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -135,43 +135,43 @@ static inline int irqs_disabled_flags(unsigned long flags) */ struct irq_chip; -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PPC64 -static inline unsigned long test_perf_counter_pending(void) +static inline unsigned long test_perf_event_pending(void) { unsigned long x; asm volatile("lbz %0,%1(13)" : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_counter_pending))); + : "i" (offsetof(struct paca_struct, perf_event_pending))); return x; } -static inline void set_perf_counter_pending(void) +static inline void set_perf_event_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (1), - "i" (offsetof(struct paca_struct, perf_counter_pending))); + "i" (offsetof(struct paca_struct, perf_event_pending))); } -static inline void clear_perf_counter_pending(void) +static inline void clear_perf_event_pending(void) { asm volatile("stb %0,%1(13)" : : "r" (0), - "i" (offsetof(struct paca_struct, perf_counter_pending))); + "i" (offsetof(struct paca_struct, perf_event_pending))); } #endif /* CONFIG_PPC64 */ -#else /* CONFIG_PERF_COUNTERS */ +#else /* CONFIG_PERF_EVENTS */ -static inline unsigned long test_perf_counter_pending(void) +static inline unsigned long test_perf_event_pending(void) { return 0; } -static inline void clear_perf_counter_pending(void) {} -#endif /* CONFIG_PERF_COUNTERS */ +static inline void clear_perf_event_pending(void) {} +#endif /* CONFIG_PERF_EVENTS */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index b634456ea89..154f405b642 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -122,7 +122,7 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ - u8 perf_counter_pending; /* PM interrupt while soft-disabled */ + u8 perf_event_pending; /* PM interrupt while soft-disabled */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h deleted file mode 100644 index 0ea0639fcf7..00000000000 --- a/arch/powerpc/include/asm/perf_counter.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Performance counter support - PowerPC-specific definitions. - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include - -#include - -#define MAX_HWCOUNTERS 8 -#define MAX_EVENT_ALTERNATIVES 8 -#define MAX_LIMITED_HWCOUNTERS 2 - -/* - * This struct provides the constants and functions needed to - * describe the PMU on a particular POWER-family CPU. - */ -struct power_pmu { - const char *name; - int n_counter; - int max_alternatives; - unsigned long add_fields; - unsigned long test_adder; - int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], unsigned long mmcr[]); - int (*get_constraint)(u64 event, unsigned long *mskp, - unsigned long *valp); - int (*get_alternatives)(u64 event, unsigned int flags, - u64 alt[]); - void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); - int (*limited_pmc_event)(u64 event); - u32 flags; - int n_generic; - int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; -}; - -/* - * Values for power_pmu.flags - */ -#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ -#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ - -/* - * Values for flags to get_alternatives() - */ -#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ -#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ -#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ - -extern int register_power_pmu(struct power_pmu *); - -struct pt_regs; -extern unsigned long perf_misc_flags(struct pt_regs *regs); -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); - -#define PERF_COUNTER_INDEX_OFFSET 1 - -/* - * Only override the default definitions in include/linux/perf_counter.h - * if we have hardware PMU support. - */ -#ifdef CONFIG_PPC_PERF_CTRS -#define perf_misc_flags(regs) perf_misc_flags(regs) -#endif - -/* - * The power_pmu.get_constraint function returns a 32/64-bit value and - * a 32/64-bit mask that express the constraints between this event and - * other events. - * - * The value and mask are divided up into (non-overlapping) bitfields - * of three different types: - * - * Select field: this expresses the constraint that some set of bits - * in MMCR* needs to be set to a specific value for this event. For a - * select field, the mask contains 1s in every bit of the field, and - * the value contains a unique value for each possible setting of the - * MMCR* bits. The constraint checking code will ensure that two events - * that set the same field in their masks have the same value in their - * value dwords. - * - * Add field: this expresses the constraint that there can be at most - * N events in a particular class. A field of k bits can be used for - * N <= 2^(k-1) - 1. The mask has the most significant bit of the field - * set (and the other bits 0), and the value has only the least significant - * bit of the field set. In addition, the 'add_fields' and 'test_adder' - * in the struct power_pmu for this processor come into play. The - * add_fields value contains 1 in the LSB of the field, and the - * test_adder contains 2^(k-1) - 1 - N in the field. - * - * NAND field: this expresses the constraint that you may not have events - * in all of a set of classes. (For example, on PPC970, you can't select - * events from the FPU, ISU and IDU simultaneously, although any two are - * possible.) For N classes, the field is N+1 bits wide, and each class - * is assigned one bit from the least-significant N bits. The mask has - * only the most-significant bit set, and the value has only the bit - * for the event's class set. The test_adder has the least significant - * bit set in the field. - * - * If an event is not subject to the constraint expressed by a particular - * field, then it will have 0 in both the mask and value for that field. - */ diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h new file mode 100644 index 00000000000..2499aaadaeb --- /dev/null +++ b/arch/powerpc/include/asm/perf_event.h @@ -0,0 +1,110 @@ +/* + * Performance event support - PowerPC-specific definitions. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#include + +#define MAX_HWEVENTS 8 +#define MAX_EVENT_ALTERNATIVES 8 +#define MAX_LIMITED_HWEVENTS 2 + +/* + * This struct provides the constants and functions needed to + * describe the PMU on a particular POWER-family CPU. + */ +struct power_pmu { + const char *name; + int n_event; + int max_alternatives; + unsigned long add_fields; + unsigned long test_adder; + int (*compute_mmcr)(u64 events[], int n_ev, + unsigned int hwc[], unsigned long mmcr[]); + int (*get_constraint)(u64 event_id, unsigned long *mskp, + unsigned long *valp); + int (*get_alternatives)(u64 event_id, unsigned int flags, + u64 alt[]); + void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); + int (*limited_pmc_event)(u64 event_id); + u32 flags; + int n_generic; + int *generic_events; + int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; +}; + +/* + * Values for power_pmu.flags + */ +#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ +#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ + +/* + * Values for flags to get_alternatives() + */ +#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ +#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ +#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ + +extern int register_power_pmu(struct power_pmu *); + +struct pt_regs; +extern unsigned long perf_misc_flags(struct pt_regs *regs); +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); + +#define PERF_EVENT_INDEX_OFFSET 1 + +/* + * Only override the default definitions in include/linux/perf_event.h + * if we have hardware PMU support. + */ +#ifdef CONFIG_PPC_PERF_CTRS +#define perf_misc_flags(regs) perf_misc_flags(regs) +#endif + +/* + * The power_pmu.get_constraint function returns a 32/64-bit value and + * a 32/64-bit mask that express the constraints between this event_id and + * other events. + * + * The value and mask are divided up into (non-overlapping) bitfields + * of three different types: + * + * Select field: this expresses the constraint that some set of bits + * in MMCR* needs to be set to a specific value for this event_id. For a + * select field, the mask contains 1s in every bit of the field, and + * the value contains a unique value for each possible setting of the + * MMCR* bits. The constraint checking code will ensure that two events + * that set the same field in their masks have the same value in their + * value dwords. + * + * Add field: this expresses the constraint that there can be at most + * N events in a particular class. A field of k bits can be used for + * N <= 2^(k-1) - 1. The mask has the most significant bit of the field + * set (and the other bits 0), and the value has only the least significant + * bit of the field set. In addition, the 'add_fields' and 'test_adder' + * in the struct power_pmu for this processor come into play. The + * add_fields value contains 1 in the LSB of the field, and the + * test_adder contains 2^(k-1) - 1 - N in the field. + * + * NAND field: this expresses the constraint that you may not have events + * in all of a set of classes. (For example, on PPC970, you can't select + * events from the FPU, ISU and IDU simultaneously, although any two are + * possible.) For N classes, the field is N+1 bits wide, and each class + * is assigned one bit from the least-significant N bits. The mask has + * only the most-significant bit set, and the value has only the bit + * for the event_id's class set. The test_adder has the least significant + * bit set in the field. + * + * If an event_id is not subject to the constraint expressed by a particular + * field, then it will have 0 in both the mask and value for that field. + */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index ed24bd92fe4..c7d671a7d9a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,7 +322,7 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) -SYSCALL_SPU(perf_counter_open) +SYSCALL_SPU(perf_event_open) COMPAT_SYS_SPU(preadv) COMPAT_SYS_SPU(pwritev) COMPAT_SYS(rt_tgsigqueueinfo) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index cef080bfc60..f6ca7617676 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -341,7 +341,7 @@ #define __NR_dup3 316 #define __NR_pipe2 317 #define __NR_inotify_init1 318 -#define __NR_perf_counter_open 319 +#define __NR_perf_event_open 319 #define __NR_preadv 320 #define __NR_pwritev 321 #define __NR_rt_tgsigqueueinfo 322 diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 569f79ccd31..b23664a0b86 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f0df285f0f8..0812b0f414b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -133,7 +133,7 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); + DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 66bcda34a6b..900e0eea009 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,14 +556,14 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_COUNTERS - /* check paca->perf_counter_pending if we're enabling ints */ +#ifdef CONFIG_PERF_EVENTS + /* check paca->perf_event_pending if we're enabling ints */ lbz r3,PACAPERFPEND(r13) and. r3,r3,r5 beq 27f - bl .perf_counter_do_pending + bl .perf_event_do_pending 27: -#endif /* CONFIG_PERF_COUNTERS */ +#endif /* CONFIG_PERF_EVENTS */ /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index f7f376ea7b1..e5d12117798 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -138,9 +138,9 @@ notrace void raw_local_irq_restore(unsigned long en) } #endif /* CONFIG_PPC_STD_MMU_64 */ - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); } /* diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c index cc466d039af..09d72028f31 100644 --- a/arch/powerpc/kernel/mpc7450-pmu.c +++ b/arch/powerpc/kernel/mpc7450-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index f74b62c6751..0a03cf70d24 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -10,7 +10,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c deleted file mode 100644 index 5ccf9bca96c..00000000000 --- a/arch/powerpc/kernel/perf_counter.c +++ /dev/null @@ -1,1315 +0,0 @@ -/* - * Performance counter support - powerpc architecture code - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct cpu_hw_counters { - int n_counters; - int n_percpu; - int disabled; - int n_added; - int n_limited; - u8 pmcs_enabled; - struct perf_counter *counter[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int flags[MAX_HWCOUNTERS]; - unsigned long mmcr[3]; - struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; - u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; - u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; -}; -DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); - -struct power_pmu *ppmu; - -/* - * Normally, to ignore kernel events we set the FCS (freeze counters - * in supervisor mode) bit in MMCR0, but if the kernel runs with the - * hypervisor bit set in the MSR, or if we are running on a processor - * where the hypervisor bit is forced to 1 (as on Apple G5 processors), - * then we need to use the FCHV bit to ignore kernel events. - */ -static unsigned int freeze_counters_kernel = MMCR0_FCS; - -/* - * 32-bit doesn't have MMCRA but does have an MMCR2, - * and a few other names are different. - */ -#ifdef CONFIG_PPC32 - -#define MMCR0_FCHV 0 -#define MMCR0_PMCjCE MMCR0_PMCnCE - -#define SPRN_MMCRA SPRN_MMCR2 -#define MMCRA_SAMPLE_ENABLE 0 - -static inline unsigned long perf_ip_adjust(struct pt_regs *regs) -{ - return 0; -} -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } -static inline u32 perf_get_misc_flags(struct pt_regs *regs) -{ - return 0; -} -static inline void perf_read_regs(struct pt_regs *regs) { } -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return 0; -} - -#endif /* CONFIG_PPC32 */ - -/* - * Things that are specific to 64-bit implementations. - */ -#ifdef CONFIG_PPC64 - -static inline unsigned long perf_ip_adjust(struct pt_regs *regs) -{ - unsigned long mmcra = regs->dsisr; - - if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { - unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; - if (slot > 1) - return 4 * (slot - 1); - } - return 0; -} - -/* - * The user wants a data address recorded. - * If we're not doing instruction sampling, give them the SDAR - * (sampled data address). If we are doing instruction sampling, then - * only give them the SDAR if it corresponds to the instruction - * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC - * bit in MMCRA. - */ -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) -{ - unsigned long mmcra = regs->dsisr; - unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? - POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; - - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - *addrp = mfspr(SPRN_SDAR); -} - -static inline u32 perf_get_misc_flags(struct pt_regs *regs) -{ - unsigned long mmcra = regs->dsisr; - - if (TRAP(regs) != 0xf00) - return 0; /* not a PMU interrupt */ - - if (ppmu->flags & PPMU_ALT_SIPR) { - if (mmcra & POWER6_MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & POWER6_MMCRA_SIPR) ? - PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; - } - if (mmcra & MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Overload regs->dsisr to store MMCRA so we only need to read it once - * on each interrupt. - */ -static inline void perf_read_regs(struct pt_regs *regs) -{ - regs->dsisr = mfspr(SPRN_MMCRA); -} - -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return !regs->softe; -} - -#endif /* CONFIG_PPC64 */ - -static void perf_counter_interrupt(struct pt_regs *regs); - -void perf_counter_print_debug(void) -{ -} - -/* - * Read one performance monitor counter (PMC). - */ -static unsigned long read_pmc(int idx) -{ - unsigned long val; - - switch (idx) { - case 1: - val = mfspr(SPRN_PMC1); - break; - case 2: - val = mfspr(SPRN_PMC2); - break; - case 3: - val = mfspr(SPRN_PMC3); - break; - case 4: - val = mfspr(SPRN_PMC4); - break; - case 5: - val = mfspr(SPRN_PMC5); - break; - case 6: - val = mfspr(SPRN_PMC6); - break; -#ifdef CONFIG_PPC64 - case 7: - val = mfspr(SPRN_PMC7); - break; - case 8: - val = mfspr(SPRN_PMC8); - break; -#endif /* CONFIG_PPC64 */ - default: - printk(KERN_ERR "oops trying to read PMC%d\n", idx); - val = 0; - } - return val; -} - -/* - * Write one PMC. - */ -static void write_pmc(int idx, unsigned long val) -{ - switch (idx) { - case 1: - mtspr(SPRN_PMC1, val); - break; - case 2: - mtspr(SPRN_PMC2, val); - break; - case 3: - mtspr(SPRN_PMC3, val); - break; - case 4: - mtspr(SPRN_PMC4, val); - break; - case 5: - mtspr(SPRN_PMC5, val); - break; - case 6: - mtspr(SPRN_PMC6, val); - break; -#ifdef CONFIG_PPC64 - case 7: - mtspr(SPRN_PMC7, val); - break; - case 8: - mtspr(SPRN_PMC8, val); - break; -#endif /* CONFIG_PPC64 */ - default: - printk(KERN_ERR "oops trying to write PMC%d\n", idx); - } -} - -/* - * Check if a set of events can all go on the PMU at once. - * If they can't, this will look at alternative codes for the events - * and see if any combination of alternative codes is feasible. - * The feasible set is returned in event[]. - */ -static int power_check_constraints(struct cpu_hw_counters *cpuhw, - u64 event[], unsigned int cflags[], - int n_ev) -{ - unsigned long mask, value, nv; - unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; - int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; - int i, j; - unsigned long addf = ppmu->add_fields; - unsigned long tadd = ppmu->test_adder; - - if (n_ev > ppmu->n_counter) - return -1; - - /* First see if the events will go on as-is */ - for (i = 0; i < n_ev; ++i) { - if ((cflags[i] & PPMU_LIMITED_PMC_REQD) - && !ppmu->limited_pmc_event(event[i])) { - ppmu->get_alternatives(event[i], cflags[i], - cpuhw->alternatives[i]); - event[i] = cpuhw->alternatives[i][0]; - } - if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0], - &cpuhw->avalues[i][0])) - return -1; - } - value = mask = 0; - for (i = 0; i < n_ev; ++i) { - nv = (value | cpuhw->avalues[i][0]) + - (value & cpuhw->avalues[i][0] & addf); - if ((((nv + tadd) ^ value) & mask) != 0 || - (((nv + tadd) ^ cpuhw->avalues[i][0]) & - cpuhw->amasks[i][0]) != 0) - break; - value = nv; - mask |= cpuhw->amasks[i][0]; - } - if (i == n_ev) - return 0; /* all OK */ - - /* doesn't work, gather alternatives... */ - if (!ppmu->get_alternatives) - return -1; - for (i = 0; i < n_ev; ++i) { - choice[i] = 0; - n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], - cpuhw->alternatives[i]); - for (j = 1; j < n_alt[i]; ++j) - ppmu->get_constraint(cpuhw->alternatives[i][j], - &cpuhw->amasks[i][j], - &cpuhw->avalues[i][j]); - } - - /* enumerate all possibilities and see if any will work */ - i = 0; - j = -1; - value = mask = nv = 0; - while (i < n_ev) { - if (j >= 0) { - /* we're backtracking, restore context */ - value = svalues[i]; - mask = smasks[i]; - j = choice[i]; - } - /* - * See if any alternative k for event i, - * where k > j, will satisfy the constraints. - */ - while (++j < n_alt[i]) { - nv = (value | cpuhw->avalues[i][j]) + - (value & cpuhw->avalues[i][j] & addf); - if ((((nv + tadd) ^ value) & mask) == 0 && - (((nv + tadd) ^ cpuhw->avalues[i][j]) - & cpuhw->amasks[i][j]) == 0) - break; - } - if (j >= n_alt[i]) { - /* - * No feasible alternative, backtrack - * to event i-1 and continue enumerating its - * alternatives from where we got up to. - */ - if (--i < 0) - return -1; - } else { - /* - * Found a feasible alternative for event i, - * remember where we got up to with this event, - * go on to the next event, and start with - * the first alternative for it. - */ - choice[i] = j; - svalues[i] = value; - smasks[i] = mask; - value = nv; - mask |= cpuhw->amasks[i][j]; - ++i; - j = -1; - } - } - - /* OK, we have a feasible combination, tell the caller the solution */ - for (i = 0; i < n_ev; ++i) - event[i] = cpuhw->alternatives[i][choice[i]]; - return 0; -} - -/* - * Check if newly-added counters have consistent settings for - * exclude_{user,kernel,hv} with each other and any previously - * added counters. - */ -static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], - int n_prev, int n_new) -{ - int eu = 0, ek = 0, eh = 0; - int i, n, first; - struct perf_counter *counter; - - n = n_prev + n_new; - if (n <= 1) - return 0; - - first = 1; - for (i = 0; i < n; ++i) { - if (cflags[i] & PPMU_LIMITED_PMC_OK) { - cflags[i] &= ~PPMU_LIMITED_PMC_REQD; - continue; - } - counter = ctrs[i]; - if (first) { - eu = counter->attr.exclude_user; - ek = counter->attr.exclude_kernel; - eh = counter->attr.exclude_hv; - first = 0; - } else if (counter->attr.exclude_user != eu || - counter->attr.exclude_kernel != ek || - counter->attr.exclude_hv != eh) { - return -EAGAIN; - } - } - - if (eu || ek || eh) - for (i = 0; i < n; ++i) - if (cflags[i] & PPMU_LIMITED_PMC_OK) - cflags[i] |= PPMU_LIMITED_PMC_REQD; - - return 0; -} - -static void power_pmu_read(struct perf_counter *counter) -{ - s64 val, delta, prev; - - if (!counter->hw.idx) - return; - /* - * Performance monitor interrupts come even when interrupts - * are soft-disabled, as long as interrupts are hard-enabled. - * Therefore we treat them like NMIs. - */ - do { - prev = atomic64_read(&counter->hw.prev_count); - barrier(); - val = read_pmc(counter->hw.idx); - } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); - - /* The counters are only 32 bits wide */ - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &counter->hw.period_left); -} - -/* - * On some machines, PMC5 and PMC6 can't be written, don't respect - * the freeze conditions, and don't generate interrupts. This tells - * us if `counter' is using such a PMC. - */ -static int is_limited_pmc(int pmcnum) -{ - return (ppmu->flags & PPMU_LIMITED_PMC5_6) - && (pmcnum == 5 || pmcnum == 6); -} - -static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val, prev, delta; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - if (!counter->hw.idx) - continue; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - prev = atomic64_read(&counter->hw.prev_count); - counter->hw.idx = 0; - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - } -} - -static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - counter->hw.idx = cpuhw->limited_hwidx[i]; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - atomic64_set(&counter->hw.prev_count, val); - perf_counter_update_userpage(counter); - } -} - -/* - * Since limited counters don't respect the freeze conditions, we - * have to read them immediately after freezing or unfreezing the - * other counters. We try to keep the values from the limited - * counters as consistent as possible by keeping the delay (in - * cycles and instructions) between freezing/unfreezing and reading - * the limited counters as small and consistent as possible. - * Therefore, if any limited counters are in use, we read them - * both, and always in the same order, to minimize variability, - * and do it inside the same asm that writes MMCR0. - */ -static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) -{ - unsigned long pmc5, pmc6; - - if (!cpuhw->n_limited) { - mtspr(SPRN_MMCR0, mmcr0); - return; - } - - /* - * Write MMCR0, then read PMC5 and PMC6 immediately. - * To ensure we don't get a performance monitor interrupt - * between writing MMCR0 and freezing/thawing the limited - * counters, we first write MMCR0 with the counter overflow - * interrupt enable bits turned off. - */ - asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" - : "=&r" (pmc5), "=&r" (pmc6) - : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), - "i" (SPRN_MMCR0), - "i" (SPRN_PMC5), "i" (SPRN_PMC6)); - - if (mmcr0 & MMCR0_FC) - freeze_limited_counters(cpuhw, pmc5, pmc6); - else - thaw_limited_counters(cpuhw, pmc5, pmc6); - - /* - * Write the full MMCR0 including the counter overflow interrupt - * enable bits, if necessary. - */ - if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) - mtspr(SPRN_MMCR0, mmcr0); -} - -/* - * Disable all counters to prevent PMU interrupts and to allow - * counters to be added or removed. - */ -void hw_perf_disable(void) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - - if (!ppmu) - return; - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - - if (!cpuhw->disabled) { - cpuhw->disabled = 1; - cpuhw->n_added = 0; - - /* - * Check if we ever enabled the PMU on this cpu. - */ - if (!cpuhw->pmcs_enabled) { - ppc_enable_pmcs(); - cpuhw->pmcs_enabled = 1; - } - - /* - * Disable instruction sampling if it was enabled - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mb(); - } - - /* - * Set the 'freeze counters' bit. - * The barrier is to make sure the mtspr has been - * executed and the PMU has frozen the counters - * before we return. - */ - write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); - mb(); - } - local_irq_restore(flags); -} - -/* - * Re-enable all counters if disable == 0. - * If we were previously disabled and counters were added, then - * put the new config on the PMU. - */ -void hw_perf_enable(void) -{ - struct perf_counter *counter; - struct cpu_hw_counters *cpuhw; - unsigned long flags; - long i; - unsigned long val; - s64 left; - unsigned int hwc_index[MAX_HWCOUNTERS]; - int n_lim; - int idx; - - if (!ppmu) - return; - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - if (!cpuhw->disabled) { - local_irq_restore(flags); - return; - } - cpuhw->disabled = 0; - - /* - * If we didn't change anything, or only removed counters, - * no need to recalculate MMCR* settings and reset the PMCs. - * Just reenable the PMU with the current MMCR* settings - * (possibly updated for removal of counters). - */ - if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - if (cpuhw->n_counters == 0) - ppc_set_pmu_inuse(0); - goto out_enable; - } - - /* - * Compute MMCR* values for the new set of counters - */ - if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, - cpuhw->mmcr)) { - /* shouldn't ever get here */ - printk(KERN_ERR "oops compute_mmcr failed\n"); - goto out; - } - - /* - * Add in MMCR0 freeze bits corresponding to the - * attr.exclude_* bits for the first counter. - * We have already checked that all counters have the - * same values for these bits as the first counter. - */ - counter = cpuhw->counter[0]; - if (counter->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; - if (counter->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_counters_kernel; - if (counter->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; - - /* - * Write the new configuration to MMCR* with the freeze - * bit set and set the hardware counters to their initial values. - * Then unfreeze the counters. - */ - ppc_set_pmu_inuse(1); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) - | MMCR0_FC); - - /* - * Read off any pre-existing counters that need to move - * to another PMC. - */ - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { - power_pmu_read(counter); - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - } - - /* - * Initialize the PMCs for all the new and moved counters. - */ - cpuhw->n_limited = n_lim = 0; - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx) - continue; - idx = hwc_index[i] + 1; - if (is_limited_pmc(idx)) { - cpuhw->limited_counter[n_lim] = counter; - cpuhw->limited_hwidx[n_lim] = idx; - ++n_lim; - continue; - } - val = 0; - if (counter->hw.sample_period) { - left = atomic64_read(&counter->hw.period_left); - if (left < 0x80000000L) - val = 0x80000000L - left; - } - atomic64_set(&counter->hw.prev_count, val); - counter->hw.idx = idx; - write_pmc(idx, val); - perf_counter_update_userpage(counter); - } - cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; - - out_enable: - mb(); - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - /* - * Enable instruction sampling if necessary - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); - } - - out: - local_irq_restore(flags); -} - -static int collect_events(struct perf_counter *group, int max_count, - struct perf_counter *ctrs[], u64 *events, - unsigned int *flags) -{ - int n = 0; - struct perf_counter *counter; - - if (!is_software_counter(group)) { - if (n >= max_count) - return -1; - ctrs[n] = group; - flags[n] = group->hw.counter_base; - events[n++] = group->hw.config; - } - list_for_each_entry(counter, &group->sibling_list, list_entry) { - if (!is_software_counter(counter) && - counter->state != PERF_COUNTER_STATE_OFF) { - if (n >= max_count) - return -1; - ctrs[n] = counter; - flags[n] = counter->hw.counter_base; - events[n++] = counter->hw.config; - } - } - return n; -} - -static void counter_sched_in(struct perf_counter *counter, int cpu) -{ - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; - counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; - if (is_software_counter(counter)) - counter->pmu->enable(counter); -} - -/* - * Called to enable a whole group of counters. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - */ -int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) -{ - struct cpu_hw_counters *cpuhw; - long i, n, n0; - struct perf_counter *sub; - - if (!ppmu) - return 0; - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->counter[n0], &cpuhw->events[n0], - &cpuhw->flags[n0]); - if (n < 0) - return -EAGAIN; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) - return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); - if (i < 0) - return -EAGAIN; - cpuhw->n_counters = n0 + n; - cpuhw->n_added += n; - - /* - * OK, this group can go on; update counter states etc., - * and enable any software counters - */ - for (i = n0; i < n0 + n; ++i) - cpuhw->counter[i]->hw.config = cpuhw->events[i]; - cpuctx->active_oncpu += n; - n = 1; - counter_sched_in(group_leader, cpu); - list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { - if (sub->state != PERF_COUNTER_STATE_OFF) { - counter_sched_in(sub, cpu); - ++n; - } - } - ctx->nr_active += n; - - return 1; -} - -/* - * Add a counter to the PMU. - * If all counters are not already frozen, then we disable and - * re-enable the PMU in order to get hw_perf_enable to do the - * actual work of reconfiguring the PMU. - */ -static int power_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - int n0; - int ret = -EAGAIN; - - local_irq_save(flags); - perf_disable(); - - /* - * Add the counter to the list (if there is room) - * and check whether the total set is still feasible. - */ - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - if (n0 >= ppmu->n_counter) - goto out; - cpuhw->counter[n0] = counter; - cpuhw->events[n0] = counter->hw.config; - cpuhw->flags[n0] = counter->hw.counter_base; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) - goto out; - if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) - goto out; - - counter->hw.config = cpuhw->events[n0]; - ++cpuhw->n_counters; - ++cpuhw->n_added; - - ret = 0; - out: - perf_enable(); - local_irq_restore(flags); - return ret; -} - -/* - * Remove a counter from the PMU. - */ -static void power_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - long i; - unsigned long flags; - - local_irq_save(flags); - perf_disable(); - - power_pmu_read(counter); - - cpuhw = &__get_cpu_var(cpu_hw_counters); - for (i = 0; i < cpuhw->n_counters; ++i) { - if (counter == cpuhw->counter[i]) { - while (++i < cpuhw->n_counters) - cpuhw->counter[i-1] = cpuhw->counter[i]; - --cpuhw->n_counters; - ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); - if (counter->hw.idx) { - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - perf_counter_update_userpage(counter); - break; - } - } - for (i = 0; i < cpuhw->n_limited; ++i) - if (counter == cpuhw->limited_counter[i]) - break; - if (i < cpuhw->n_limited) { - while (++i < cpuhw->n_limited) { - cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; - cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; - } - --cpuhw->n_limited; - } - if (cpuhw->n_counters == 0) { - /* disable exceptions if no counters are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); - } - - perf_enable(); - local_irq_restore(flags); -} - -/* - * Re-enable interrupts on a counter after they were throttled - * because they were coming too fast. - */ -static void power_pmu_unthrottle(struct perf_counter *counter) -{ - s64 val, left; - unsigned long flags; - - if (!counter->hw.idx || !counter->hw.sample_period) - return; - local_irq_save(flags); - perf_disable(); - power_pmu_read(counter); - left = counter->hw.sample_period; - counter->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); - perf_enable(); - local_irq_restore(flags); -} - -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, -}; - -/* - * Return 1 if we might be able to put counter on a limited PMC, - * or 0 if not. - * A counter can only go on a limited PMC if it counts something - * that a limited PMC can count, doesn't require interrupts, and - * doesn't exclude any processor mode. - */ -static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, - unsigned int flags) -{ - int n; - u64 alt[MAX_EVENT_ALTERNATIVES]; - - if (counter->attr.exclude_user - || counter->attr.exclude_kernel - || counter->attr.exclude_hv - || counter->attr.sample_period) - return 0; - - if (ppmu->limited_pmc_event(ev)) - return 1; - - /* - * The requested event isn't on a limited PMC already; - * see if any alternative code goes on a limited PMC. - */ - if (!ppmu->get_alternatives) - return 0; - - flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; - n = ppmu->get_alternatives(ev, flags, alt); - - return n > 0; -} - -/* - * Find an alternative event that goes on a normal PMC, if possible, - * and return the event code, or 0 if there is no such alternative. - * (Note: event code 0 is "don't count" on all machines.) - */ -static u64 normal_pmc_alternative(u64 ev, unsigned long flags) -{ - u64 alt[MAX_EVENT_ALTERNATIVES]; - int n; - - flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); - n = ppmu->get_alternatives(ev, flags, alt); - if (!n) - return 0; - return alt[0]; -} - -/* Number of perf_counters counting hardware events */ -static atomic_t num_counters; -/* Used to avoid races in calling reserve/release_pmc_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - -/* - * Release the PMU if this is the last perf_counter. - */ -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (!atomic_add_unless(&num_counters, -1, 1)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_counters) == 0) - release_pmc_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -/* - * Translate a generic cache event config to a raw event code. - */ -static int hw_perf_cache_event(u64 config, u64 *eventp) -{ - unsigned long type, op, result; - int ev; - - if (!ppmu->cache_events) - return -EINVAL; - - /* unpack config */ - type = config & 0xff; - op = (config >> 8) & 0xff; - result = (config >> 16) & 0xff; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ev = (*ppmu->cache_events)[type][op][result]; - if (ev == 0) - return -EOPNOTSUPP; - if (ev == -1) - return -EINVAL; - *eventp = ev; - return 0; -} - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - u64 ev; - unsigned long flags; - struct perf_counter *ctrs[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int cflags[MAX_HWCOUNTERS]; - int n; - int err; - struct cpu_hw_counters *cpuhw; - - if (!ppmu) - return ERR_PTR(-ENXIO); - switch (counter->attr.type) { - case PERF_TYPE_HARDWARE: - ev = counter->attr.config; - if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); - ev = ppmu->generic_events[ev]; - break; - case PERF_TYPE_HW_CACHE: - err = hw_perf_cache_event(counter->attr.config, &ev); - if (err) - return ERR_PTR(err); - break; - case PERF_TYPE_RAW: - ev = counter->attr.config; - break; - default: - return ERR_PTR(-EINVAL); - } - counter->hw.config_base = ev; - counter->hw.idx = 0; - - /* - * If we are not running on a hypervisor, force the - * exclude_hv bit to 0 so that we don't care what - * the user set it to. - */ - if (!firmware_has_feature(FW_FEATURE_LPAR)) - counter->attr.exclude_hv = 0; - - /* - * If this is a per-task counter, then we can use - * PM_RUN_* events interchangeably with their non RUN_* - * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. - * XXX we should check if the task is an idle task. - */ - flags = 0; - if (counter->ctx->task) - flags |= PPMU_ONLY_COUNT_RUN; - - /* - * If this machine has limited counters, check whether this - * event could go on a limited counter. - */ - if (ppmu->flags & PPMU_LIMITED_PMC5_6) { - if (can_go_on_limited_pmc(counter, ev, flags)) { - flags |= PPMU_LIMITED_PMC_OK; - } else if (ppmu->limited_pmc_event(ev)) { - /* - * The requested event is on a limited PMC, - * but we can't use a limited PMC; see if any - * alternative goes on a normal PMC. - */ - ev = normal_pmc_alternative(ev, flags); - if (!ev) - return ERR_PTR(-EINVAL); - } - } - - /* - * If this is in a group, check if it can go on with all the - * other hardware counters in the group. We assume the counter - * hasn't been linked into its leader's sibling list at this point. - */ - n = 0; - if (counter->group_leader != counter) { - n = collect_events(counter->group_leader, ppmu->n_counter - 1, - ctrs, events, cflags); - if (n < 0) - return ERR_PTR(-EINVAL); - } - events[n] = ev; - ctrs[n] = counter; - cflags[n] = flags; - if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); - - cpuhw = &get_cpu_var(cpu_hw_counters); - err = power_check_constraints(cpuhw, events, cflags, n + 1); - put_cpu_var(cpu_hw_counters); - if (err) - return ERR_PTR(-EINVAL); - - counter->hw.config = events[n]; - counter->hw.counter_base = cflags[n]; - counter->hw.last_period = counter->hw.sample_period; - atomic64_set(&counter->hw.period_left, counter->hw.last_period); - - /* - * See if we need to reserve the PMU. - * If no counters are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing - * reserve_pmc_hardware or release_pmc_hardware. - */ - err = 0; - if (!atomic_inc_not_zero(&num_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&num_counters) == 0 && - reserve_pmc_hardware(perf_counter_interrupt)) - err = -EBUSY; - else - atomic_inc(&num_counters); - mutex_unlock(&pmc_reserve_mutex); - } - counter->destroy = hw_perf_counter_destroy; - - if (err) - return ERR_PTR(err); - return &power_pmu; -} - -/* - * A counter has overflowed; update its count and record - * things if requested. Note that interrupts are hard-disabled - * here so there is no possibility of being interrupted. - */ -static void record_and_restart(struct perf_counter *counter, unsigned long val, - struct pt_regs *regs, int nmi) -{ - u64 period = counter->hw.sample_period; - s64 prev, delta, left; - int record = 0; - - /* we don't have to worry about interrupts here */ - prev = atomic64_read(&counter->hw.prev_count); - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - - /* - * See if the total period for this counter has expired, - * and update for the next period. - */ - val = 0; - left = atomic64_read(&counter->hw.period_left) - delta; - if (period) { - if (left <= 0) { - left += period; - if (left <= 0) - left = period; - record = 1; - } - if (left < 0x80000000LL) - val = 0x80000000LL - left; - } - - /* - * Finally record data if requested. - */ - if (record) { - struct perf_sample_data data = { - .addr = 0, - .period = counter->hw.last_period, - }; - - if (counter->attr.sample_type & PERF_SAMPLE_ADDR) - perf_get_data_addr(regs, &data.addr); - - if (perf_counter_overflow(counter, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the counter to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each counter counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } - } - - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); -} - -/* - * Called from generic code to get the misc flags (i.e. processor mode) - * for an event. - */ -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - u32 flags = perf_get_misc_flags(regs); - - if (flags) - return flags; - return user_mode(regs) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Called from generic code to get the instruction pointer - * for an event. - */ -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - unsigned long ip; - - if (TRAP(regs) != 0xf00) - return regs->nip; /* not a PMU interrupt */ - - ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); - return ip; -} - -/* - * Performance monitor interrupt stuff - */ -static void perf_counter_interrupt(struct pt_regs *regs) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - unsigned long val; - int found = 0; - int nmi; - - if (cpuhw->n_limited) - freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), - mfspr(SPRN_PMC6)); - - perf_read_regs(regs); - - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (!counter->hw.idx || is_limited_pmc(counter->hw.idx)) - continue; - val = read_pmc(counter->hw.idx); - if ((int)val < 0) { - /* counter has overflowed */ - found = 1; - record_and_restart(counter, val, regs, nmi); - } - } - - /* - * In case we didn't find and reset the counter that caused - * the interrupt, scan all counters and reset any that are - * negative, to avoid getting continual interrupts. - * Any that we processed in the previous loop will not be negative. - */ - if (!found) { - for (i = 0; i < ppmu->n_counter; ++i) { - if (is_limited_pmc(i + 1)) - continue; - val = read_pmc(i + 1); - if ((int)val < 0) - write_pmc(i + 1, 0); - } - } - - /* - * Reset MMCR0 to its normal value. This will set PMXE and - * clear FC (freeze counters) and PMAO (perf mon alert occurred) - * and thus allow interrupts to occur again. - * XXX might want to use MSR.PM to keep the counters frozen until - * we get back out of this interrupt. - */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - if (nmi) - nmi_exit(); - else - irq_exit(); -} - -void hw_perf_counter_setup(int cpu) -{ - struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); - - if (!ppmu) - return; - memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; -} - -int register_power_pmu(struct power_pmu *pmu) -{ - if (ppmu) - return -EBUSY; /* something's already registered */ - - ppmu = pmu; - pr_info("%s performance monitor hardware support registered\n", - pmu->name); - -#ifdef MSR_HV - /* - * Use FCHV to ignore kernel events if MSR.HV is set. - */ - if (mfmsr() & MSR_HV) - freeze_counters_kernel = MMCR0_FCHV; -#endif /* CONFIG_PPC64 */ - - return 0; -} diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c new file mode 100644 index 00000000000..c98321fcb45 --- /dev/null +++ b/arch/powerpc/kernel/perf_event.c @@ -0,0 +1,1315 @@ +/* + * Performance event support - powerpc architecture code + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct cpu_hw_events { + int n_events; + int n_percpu; + int disabled; + int n_added; + int n_limited; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; + u64 events[MAX_HWEVENTS]; + unsigned int flags[MAX_HWEVENTS]; + unsigned long mmcr[3]; + struct perf_event *limited_event[MAX_LIMITED_HWEVENTS]; + u8 limited_hwidx[MAX_LIMITED_HWEVENTS]; + u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +struct power_pmu *ppmu; + +/* + * Normally, to ignore kernel events we set the FCS (freeze events + * in supervisor mode) bit in MMCR0, but if the kernel runs with the + * hypervisor bit set in the MSR, or if we are running on a processor + * where the hypervisor bit is forced to 1 (as on Apple G5 processors), + * then we need to use the FCHV bit to ignore kernel events. + */ +static unsigned int freeze_events_kernel = MMCR0_FCS; + +/* + * 32-bit doesn't have MMCRA but does have an MMCR2, + * and a few other names are different. + */ +#ifdef CONFIG_PPC32 + +#define MMCR0_FCHV 0 +#define MMCR0_PMCjCE MMCR0_PMCnCE + +#define SPRN_MMCRA SPRN_MMCR2 +#define MMCRA_SAMPLE_ENABLE 0 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + return 0; +} +static inline void perf_read_regs(struct pt_regs *regs) { } +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return 0; +} + +#endif /* CONFIG_PPC32 */ + +/* + * Things that are specific to 64-bit implementations. + */ +#ifdef CONFIG_PPC64 + +static inline unsigned long perf_ip_adjust(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { + unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; + if (slot > 1) + return 4 * (slot - 1); + } + return 0; +} + +/* + * The user wants a data address recorded. + * If we're not doing instruction sampling, give them the SDAR + * (sampled data address). If we are doing instruction sampling, then + * only give them the SDAR if it corresponds to the instruction + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC + * bit in MMCRA. + */ +static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +{ + unsigned long mmcra = regs->dsisr; + unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? + POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; + + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + *addrp = mfspr(SPRN_SDAR); +} + +static inline u32 perf_get_misc_flags(struct pt_regs *regs) +{ + unsigned long mmcra = regs->dsisr; + + if (TRAP(regs) != 0xf00) + return 0; /* not a PMU interrupt */ + + if (ppmu->flags & PPMU_ALT_SIPR) { + if (mmcra & POWER6_MMCRA_SIHV) + return PERF_RECORD_MISC_HYPERVISOR; + return (mmcra & POWER6_MMCRA_SIPR) ? + PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL; + } + if (mmcra & MMCRA_SIHV) + return PERF_RECORD_MISC_HYPERVISOR; + return (mmcra & MMCRA_SIPR) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} + +/* + * Overload regs->dsisr to store MMCRA so we only need to read it once + * on each interrupt. + */ +static inline void perf_read_regs(struct pt_regs *regs) +{ + regs->dsisr = mfspr(SPRN_MMCRA); +} + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ + return !regs->softe; +} + +#endif /* CONFIG_PPC64 */ + +static void perf_event_interrupt(struct pt_regs *regs); + +void perf_event_print_debug(void) +{ +} + +/* + * Read one performance monitor event (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 1: + val = mfspr(SPRN_PMC1); + break; + case 2: + val = mfspr(SPRN_PMC2); + break; + case 3: + val = mfspr(SPRN_PMC3); + break; + case 4: + val = mfspr(SPRN_PMC4); + break; + case 5: + val = mfspr(SPRN_PMC5); + break; + case 6: + val = mfspr(SPRN_PMC6); + break; +#ifdef CONFIG_PPC64 + case 7: + val = mfspr(SPRN_PMC7); + break; + case 8: + val = mfspr(SPRN_PMC8); + break; +#endif /* CONFIG_PPC64 */ + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 1: + mtspr(SPRN_PMC1, val); + break; + case 2: + mtspr(SPRN_PMC2, val); + break; + case 3: + mtspr(SPRN_PMC3, val); + break; + case 4: + mtspr(SPRN_PMC4, val); + break; + case 5: + mtspr(SPRN_PMC5, val); + break; + case 6: + mtspr(SPRN_PMC6, val); + break; +#ifdef CONFIG_PPC64 + case 7: + mtspr(SPRN_PMC7, val); + break; + case 8: + mtspr(SPRN_PMC8, val); + break; +#endif /* CONFIG_PPC64 */ + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } +} + +/* + * Check if a set of events can all go on the PMU at once. + * If they can't, this will look at alternative codes for the events + * and see if any combination of alternative codes is feasible. + * The feasible set is returned in event_id[]. + */ +static int power_check_constraints(struct cpu_hw_events *cpuhw, + u64 event_id[], unsigned int cflags[], + int n_ev) +{ + unsigned long mask, value, nv; + unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS]; + int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS]; + int i, j; + unsigned long addf = ppmu->add_fields; + unsigned long tadd = ppmu->test_adder; + + if (n_ev > ppmu->n_event) + return -1; + + /* First see if the events will go on as-is */ + for (i = 0; i < n_ev; ++i) { + if ((cflags[i] & PPMU_LIMITED_PMC_REQD) + && !ppmu->limited_pmc_event(event_id[i])) { + ppmu->get_alternatives(event_id[i], cflags[i], + cpuhw->alternatives[i]); + event_id[i] = cpuhw->alternatives[i][0]; + } + if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0], + &cpuhw->avalues[i][0])) + return -1; + } + value = mask = 0; + for (i = 0; i < n_ev; ++i) { + nv = (value | cpuhw->avalues[i][0]) + + (value & cpuhw->avalues[i][0] & addf); + if ((((nv + tadd) ^ value) & mask) != 0 || + (((nv + tadd) ^ cpuhw->avalues[i][0]) & + cpuhw->amasks[i][0]) != 0) + break; + value = nv; + mask |= cpuhw->amasks[i][0]; + } + if (i == n_ev) + return 0; /* all OK */ + + /* doesn't work, gather alternatives... */ + if (!ppmu->get_alternatives) + return -1; + for (i = 0; i < n_ev; ++i) { + choice[i] = 0; + n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i], + cpuhw->alternatives[i]); + for (j = 1; j < n_alt[i]; ++j) + ppmu->get_constraint(cpuhw->alternatives[i][j], + &cpuhw->amasks[i][j], + &cpuhw->avalues[i][j]); + } + + /* enumerate all possibilities and see if any will work */ + i = 0; + j = -1; + value = mask = nv = 0; + while (i < n_ev) { + if (j >= 0) { + /* we're backtracking, restore context */ + value = svalues[i]; + mask = smasks[i]; + j = choice[i]; + } + /* + * See if any alternative k for event_id i, + * where k > j, will satisfy the constraints. + */ + while (++j < n_alt[i]) { + nv = (value | cpuhw->avalues[i][j]) + + (value & cpuhw->avalues[i][j] & addf); + if ((((nv + tadd) ^ value) & mask) == 0 && + (((nv + tadd) ^ cpuhw->avalues[i][j]) + & cpuhw->amasks[i][j]) == 0) + break; + } + if (j >= n_alt[i]) { + /* + * No feasible alternative, backtrack + * to event_id i-1 and continue enumerating its + * alternatives from where we got up to. + */ + if (--i < 0) + return -1; + } else { + /* + * Found a feasible alternative for event_id i, + * remember where we got up to with this event_id, + * go on to the next event_id, and start with + * the first alternative for it. + */ + choice[i] = j; + svalues[i] = value; + smasks[i] = mask; + value = nv; + mask |= cpuhw->amasks[i][j]; + ++i; + j = -1; + } + } + + /* OK, we have a feasible combination, tell the caller the solution */ + for (i = 0; i < n_ev; ++i) + event_id[i] = cpuhw->alternatives[i][choice[i]]; + return 0; +} + +/* + * Check if newly-added events have consistent settings for + * exclude_{user,kernel,hv} with each other and any previously + * added events. + */ +static int check_excludes(struct perf_event **ctrs, unsigned int cflags[], + int n_prev, int n_new) +{ + int eu = 0, ek = 0, eh = 0; + int i, n, first; + struct perf_event *event; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + first = 1; + for (i = 0; i < n; ++i) { + if (cflags[i] & PPMU_LIMITED_PMC_OK) { + cflags[i] &= ~PPMU_LIMITED_PMC_REQD; + continue; + } + event = ctrs[i]; + if (first) { + eu = event->attr.exclude_user; + ek = event->attr.exclude_kernel; + eh = event->attr.exclude_hv; + first = 0; + } else if (event->attr.exclude_user != eu || + event->attr.exclude_kernel != ek || + event->attr.exclude_hv != eh) { + return -EAGAIN; + } + } + + if (eu || ek || eh) + for (i = 0; i < n; ++i) + if (cflags[i] & PPMU_LIMITED_PMC_OK) + cflags[i] |= PPMU_LIMITED_PMC_REQD; + + return 0; +} + +static void power_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + if (!event->hw.idx) + return; + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The events are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * On some machines, PMC5 and PMC6 can't be written, don't respect + * the freeze conditions, and don't generate interrupts. This tells + * us if `event' is using such a PMC. + */ +static int is_limited_pmc(int pmcnum) +{ + return (ppmu->flags & PPMU_LIMITED_PMC5_6) + && (pmcnum == 5 || pmcnum == 6); +} + +static void freeze_limited_events(struct cpu_hw_events *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_event *event; + u64 val, prev, delta; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + event = cpuhw->limited_event[i]; + if (!event->hw.idx) + continue; + val = (event->hw.idx == 5) ? pmc5 : pmc6; + prev = atomic64_read(&event->hw.prev_count); + event->hw.idx = 0; + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + } +} + +static void thaw_limited_events(struct cpu_hw_events *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_event *event; + u64 val; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + event = cpuhw->limited_event[i]; + event->hw.idx = cpuhw->limited_hwidx[i]; + val = (event->hw.idx == 5) ? pmc5 : pmc6; + atomic64_set(&event->hw.prev_count, val); + perf_event_update_userpage(event); + } +} + +/* + * Since limited events don't respect the freeze conditions, we + * have to read them immediately after freezing or unfreezing the + * other events. We try to keep the values from the limited + * events as consistent as possible by keeping the delay (in + * cycles and instructions) between freezing/unfreezing and reading + * the limited events as small and consistent as possible. + * Therefore, if any limited events are in use, we read them + * both, and always in the same order, to minimize variability, + * and do it inside the same asm that writes MMCR0. + */ +static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) +{ + unsigned long pmc5, pmc6; + + if (!cpuhw->n_limited) { + mtspr(SPRN_MMCR0, mmcr0); + return; + } + + /* + * Write MMCR0, then read PMC5 and PMC6 immediately. + * To ensure we don't get a performance monitor interrupt + * between writing MMCR0 and freezing/thawing the limited + * events, we first write MMCR0 with the event overflow + * interrupt enable bits turned off. + */ + asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" + : "=&r" (pmc5), "=&r" (pmc6) + : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), + "i" (SPRN_MMCR0), + "i" (SPRN_PMC5), "i" (SPRN_PMC6)); + + if (mmcr0 & MMCR0_FC) + freeze_limited_events(cpuhw, pmc5, pmc6); + else + thaw_limited_events(cpuhw, pmc5, pmc6); + + /* + * Write the full MMCR0 including the event overflow interrupt + * enable bits, if necessary. + */ + if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCR0, mmcr0); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + if (!ppmu) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + cpuhw->n_added = 0; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + /* + * Disable instruction sampling if it was enabled + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mtspr(SPRN_MMCRA, + cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mb(); + } + + /* + * Set the 'freeze events' bit. + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the events + * before we return. + */ + write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); + mb(); + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct perf_event *event; + struct cpu_hw_events *cpuhw; + unsigned long flags; + long i; + unsigned long val; + s64 left; + unsigned int hwc_index[MAX_HWEVENTS]; + int n_lim; + int idx; + + if (!ppmu) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) { + local_irq_restore(flags); + return; + } + cpuhw->disabled = 0; + + /* + * If we didn't change anything, or only removed events, + * no need to recalculate MMCR* settings and reset the PMCs. + * Just reenable the PMU with the current MMCR* settings + * (possibly updated for removal of events). + */ + if (!cpuhw->n_added) { + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + if (cpuhw->n_events == 0) + ppc_set_pmu_inuse(0); + goto out_enable; + } + + /* + * Compute MMCR* values for the new set of events + */ + if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index, + cpuhw->mmcr)) { + /* shouldn't ever get here */ + printk(KERN_ERR "oops compute_mmcr failed\n"); + goto out; + } + + /* + * Add in MMCR0 freeze bits corresponding to the + * attr.exclude_* bits for the first event. + * We have already checked that all events have the + * same values for these bits as the first event. + */ + event = cpuhw->event[0]; + if (event->attr.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (event->attr.exclude_kernel) + cpuhw->mmcr[0] |= freeze_events_kernel; + if (event->attr.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + + /* + * Write the new configuration to MMCR* with the freeze + * bit set and set the hardware events to their initial values. + * Then unfreeze the events. + */ + ppc_set_pmu_inuse(1); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + | MMCR0_FC); + + /* + * Read off any pre-existing events that need to move + * to another PMC. + */ + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) { + power_pmu_read(event); + write_pmc(event->hw.idx, 0); + event->hw.idx = 0; + } + } + + /* + * Initialize the PMCs for all the new and moved events. + */ + cpuhw->n_limited = n_lim = 0; + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (event->hw.idx) + continue; + idx = hwc_index[i] + 1; + if (is_limited_pmc(idx)) { + cpuhw->limited_event[n_lim] = event; + cpuhw->limited_hwidx[n_lim] = idx; + ++n_lim; + continue; + } + val = 0; + if (event->hw.sample_period) { + left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + event->hw.idx = idx; + write_pmc(idx, val); + perf_event_update_userpage(event); + } + cpuhw->n_limited = n_lim; + cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + + out_enable: + mb(); + write_mmcr0(cpuhw, cpuhw->mmcr[0]); + + /* + * Enable instruction sampling if necessary + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mb(); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[], u64 *events, + unsigned int *flags) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + flags[n] = group->hw.event_base; + events[n++] = group->hw.config; + } + list_for_each_entry(event, &group->sibling_list, list_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + flags[n] = event->hw.event_base; + events[n++] = event->hw.config; + } + } + return n; +} + +static void event_sched_in(struct perf_event *event, int cpu) +{ + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; + event->tstamp_running += event->ctx->time - event->tstamp_stopped; + if (is_software_event(event)) + event->pmu->enable(event); +} + +/* + * Called to enable a whole group of events. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + */ +int hw_perf_group_sched_in(struct perf_event *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + struct cpu_hw_events *cpuhw; + long i, n, n0; + struct perf_event *sub; + + if (!ppmu) + return 0; + cpuhw = &__get_cpu_var(cpu_hw_events); + n0 = cpuhw->n_events; + n = collect_events(group_leader, ppmu->n_event - n0, + &cpuhw->event[n0], &cpuhw->events[n0], + &cpuhw->flags[n0]); + if (n < 0) + return -EAGAIN; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, n)) + return -EAGAIN; + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); + if (i < 0) + return -EAGAIN; + cpuhw->n_events = n0 + n; + cpuhw->n_added += n; + + /* + * OK, this group can go on; update event states etc., + * and enable any software events + */ + for (i = n0; i < n0 + n; ++i) + cpuhw->event[i]->hw.config = cpuhw->events[i]; + cpuctx->active_oncpu += n; + n = 1; + event_sched_in(group_leader, cpu); + list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { + if (sub->state != PERF_EVENT_STATE_OFF) { + event_sched_in(sub, cpu); + ++n; + } + } + ctx->nr_active += n; + + return 1; +} + +/* + * Add a event to the PMU. + * If all events are not already frozen, then we disable and + * re-enable the PMU in order to get hw_perf_enable to do the + * actual work of reconfiguring the PMU. + */ +static int power_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + int n0; + int ret = -EAGAIN; + + local_irq_save(flags); + perf_disable(); + + /* + * Add the event to the list (if there is room) + * and check whether the total set is still feasible. + */ + cpuhw = &__get_cpu_var(cpu_hw_events); + n0 = cpuhw->n_events; + if (n0 >= ppmu->n_event) + goto out; + cpuhw->event[n0] = event; + cpuhw->events[n0] = event->hw.config; + cpuhw->flags[n0] = event->hw.event_base; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) + goto out; + if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) + goto out; + + event->hw.config = cpuhw->events[n0]; + ++cpuhw->n_events; + ++cpuhw->n_added; + + ret = 0; + out: + perf_enable(); + local_irq_restore(flags); + return ret; +} + +/* + * Remove a event from the PMU. + */ +static void power_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + long i; + unsigned long flags; + + local_irq_save(flags); + perf_disable(); + + power_pmu_read(event); + + cpuhw = &__get_cpu_var(cpu_hw_events); + for (i = 0; i < cpuhw->n_events; ++i) { + if (event == cpuhw->event[i]) { + while (++i < cpuhw->n_events) + cpuhw->event[i-1] = cpuhw->event[i]; + --cpuhw->n_events; + ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr); + if (event->hw.idx) { + write_pmc(event->hw.idx, 0); + event->hw.idx = 0; + } + perf_event_update_userpage(event); + break; + } + } + for (i = 0; i < cpuhw->n_limited; ++i) + if (event == cpuhw->limited_event[i]) + break; + if (i < cpuhw->n_limited) { + while (++i < cpuhw->n_limited) { + cpuhw->limited_event[i-1] = cpuhw->limited_event[i]; + cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; + } + --cpuhw->n_limited; + } + if (cpuhw->n_events == 0) { + /* disable exceptions if no events are running */ + cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + } + + perf_enable(); + local_irq_restore(flags); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + */ +static void power_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (!event->hw.idx || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + power_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +struct pmu power_pmu = { + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, +}; + +/* + * Return 1 if we might be able to put event on a limited PMC, + * or 0 if not. + * A event can only go on a limited PMC if it counts something + * that a limited PMC can count, doesn't require interrupts, and + * doesn't exclude any processor mode. + */ +static int can_go_on_limited_pmc(struct perf_event *event, u64 ev, + unsigned int flags) +{ + int n; + u64 alt[MAX_EVENT_ALTERNATIVES]; + + if (event->attr.exclude_user + || event->attr.exclude_kernel + || event->attr.exclude_hv + || event->attr.sample_period) + return 0; + + if (ppmu->limited_pmc_event(ev)) + return 1; + + /* + * The requested event_id isn't on a limited PMC already; + * see if any alternative code goes on a limited PMC. + */ + if (!ppmu->get_alternatives) + return 0; + + flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; + n = ppmu->get_alternatives(ev, flags, alt); + + return n > 0; +} + +/* + * Find an alternative event_id that goes on a normal PMC, if possible, + * and return the event_id code, or 0 if there is no such alternative. + * (Note: event_id code 0 is "don't count" on all machines.) + */ +static u64 normal_pmc_alternative(u64 ev, unsigned long flags) +{ + u64 alt[MAX_EVENT_ALTERNATIVES]; + int n; + + flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); + n = ppmu->get_alternatives(ev, flags, alt); + if (!n) + return 0; + return alt[0]; +} + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + unsigned long flags; + struct perf_event *ctrs[MAX_HWEVENTS]; + u64 events[MAX_HWEVENTS]; + unsigned int cflags[MAX_HWEVENTS]; + int n; + int err; + struct cpu_hw_events *cpuhw; + + if (!ppmu) + return ERR_PTR(-ENXIO); + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + default: + return ERR_PTR(-EINVAL); + } + event->hw.config_base = ev; + event->hw.idx = 0; + + /* + * If we are not running on a hypervisor, force the + * exclude_hv bit to 0 so that we don't care what + * the user set it to. + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + event->attr.exclude_hv = 0; + + /* + * If this is a per-task event, then we can use + * PM_RUN_* events interchangeably with their non RUN_* + * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. + * XXX we should check if the task is an idle task. + */ + flags = 0; + if (event->ctx->task) + flags |= PPMU_ONLY_COUNT_RUN; + + /* + * If this machine has limited events, check whether this + * event_id could go on a limited event. + */ + if (ppmu->flags & PPMU_LIMITED_PMC5_6) { + if (can_go_on_limited_pmc(event, ev, flags)) { + flags |= PPMU_LIMITED_PMC_OK; + } else if (ppmu->limited_pmc_event(ev)) { + /* + * The requested event_id is on a limited PMC, + * but we can't use a limited PMC; see if any + * alternative goes on a normal PMC. + */ + ev = normal_pmc_alternative(ev, flags); + if (!ev) + return ERR_PTR(-EINVAL); + } + } + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, ppmu->n_event - 1, + ctrs, events, cflags); + if (n < 0) + return ERR_PTR(-EINVAL); + } + events[n] = ev; + ctrs[n] = event; + cflags[n] = flags; + if (check_excludes(ctrs, cflags, n, 1)) + return ERR_PTR(-EINVAL); + + cpuhw = &get_cpu_var(cpu_hw_events); + err = power_check_constraints(cpuhw, events, cflags, n + 1); + put_cpu_var(cpu_hw_events); + if (err) + return ERR_PTR(-EINVAL); + + event->hw.config = events[n]; + event->hw.event_base = cflags[n]; + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &power_pmu; +} + +/* + * A event has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data = { + .addr = 0, + .period = event->hw.last_period, + }; + + if (event->attr.sample_type & PERF_SAMPLE_ADDR) + perf_get_data_addr(regs, &data.addr); + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +/* + * Called from generic code to get the misc flags (i.e. processor mode) + * for an event_id. + */ +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + u32 flags = perf_get_misc_flags(regs); + + if (flags) + return flags; + return user_mode(regs) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; +} + +/* + * Called from generic code to get the instruction pointer + * for an event_id. + */ +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + unsigned long ip; + + if (TRAP(regs) != 0xf00) + return regs->nip; /* not a PMU interrupt */ + + ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); + return ip; +} + +/* + * Performance monitor interrupt stuff + */ +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + if (cpuhw->n_limited) + freeze_limited_events(cpuhw, mfspr(SPRN_PMC5), + mfspr(SPRN_PMC6)); + + perf_read_regs(regs); + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < cpuhw->n_events; ++i) { + event = cpuhw->event[i]; + if (!event->hw.idx || is_limited_pmc(event->hw.idx)) + continue; + val = read_pmc(event->hw.idx); + if ((int)val < 0) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } + } + + /* + * In case we didn't find and reset the event that caused + * the interrupt, scan all events and reset any that are + * negative, to avoid getting continual interrupts. + * Any that we processed in the previous loop will not be negative. + */ + if (!found) { + for (i = 0; i < ppmu->n_event; ++i) { + if (is_limited_pmc(i + 1)) + continue; + val = read_pmc(i + 1); + if ((int)val < 0) + write_pmc(i + 1, 0); + } + } + + /* + * Reset MMCR0 to its normal value. This will set PMXE and + * clear FC (freeze events) and PMAO (perf mon alert occurred) + * and thus allow interrupts to occur again. + * XXX might want to use MSR.PM to keep the events frozen until + * we get back out of this interrupt. + */ + write_mmcr0(cpuhw, cpuhw->mmcr[0]); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + if (!ppmu) + return; + memset(cpuhw, 0, sizeof(*cpuhw)); + cpuhw->mmcr[0] = MMCR0_FC; +} + +int register_power_pmu(struct power_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + +#ifdef MSR_HV + /* + * Use FCHV to ignore kernel events if MSR.HV is set. + */ + if (mfmsr() & MSR_HV) + freeze_events_kernel = MMCR0_FCHV; +#endif /* CONFIG_PPC64 */ + + return 0; +} diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 3c90a3d9173..2a361cdda63 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 31918af3e35..0f4c1c73a6a 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 867f6f66396..c351b3a57fb 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index fa21890531d..ca399ba5034 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 018d094d92f..28a4daacdc0 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 75dccb71a04..479574413a9 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 465e498bcb3..df45a7449a6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include @@ -527,25 +527,25 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_counter_pending); +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) +DEFINE_PER_CPU(u8, perf_event_pending); -void set_perf_counter_pending(void) +void set_perf_event_pending(void) { - get_cpu_var(perf_counter_pending) = 1; + get_cpu_var(perf_event_pending) = 1; set_dec(1); - put_cpu_var(perf_counter_pending); + put_cpu_var(perf_event_pending); } -#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending) -#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0 +#define test_perf_event_pending() __get_cpu_var(perf_event_pending) +#define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ +#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ -#define test_perf_counter_pending() 0 -#define clear_perf_counter_pending() +#define test_perf_event_pending() 0 +#define clear_perf_event_pending() -#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ /* * For iSeries shared processors, we have to let the hypervisor @@ -573,9 +573,9 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 830bef0a113..e7dae82c128 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -312,7 +312,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { @@ -323,7 +323,7 @@ good_area: #endif } else { current->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9efc8bda01b..e382cae678b 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -280,9 +280,9 @@ config PPC_HAVE_PMU_SUPPORT config PPC_PERF_CTRS def_bool y - depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT + depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT help - This enables the powerpc-specific perf_counter back-end. + This enables the powerpc-specific perf_event back-end. config SMP depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 1c866efd217..43c0acad716 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -94,7 +94,7 @@ config S390 select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config SCHED_OMIT_FRAME_POINTER bool diff --git a/arch/s390/include/asm/perf_counter.h b/arch/s390/include/asm/perf_counter.h deleted file mode 100644 index 7015188c2cc..00000000000 --- a/arch/s390/include/asm/perf_counter.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * Performance counter support - s390 specific definitions. - * - * Copyright 2009 Martin Schwidefsky, IBM Corporation. - */ - -static inline void set_perf_counter_pending(void) {} -static inline void clear_perf_counter_pending(void) {} - -#define PERF_COUNTER_INDEX_OFFSET 0 diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h new file mode 100644 index 00000000000..3840cbe7763 --- /dev/null +++ b/arch/s390/include/asm/perf_event.h @@ -0,0 +1,10 @@ +/* + * Performance event support - s390 specific definitions. + * + * Copyright 2009 Martin Schwidefsky, IBM Corporation. + */ + +static inline void set_perf_event_pending(void) {} +static inline void clear_perf_event_pending(void) {} + +#define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index c80602d7c88..cb5232df151 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -268,7 +268,7 @@ #define __NR_preadv 328 #define __NR_pwritev 329 #define __NR_rt_tgsigqueueinfo 330 -#define __NR_perf_counter_open 331 +#define __NR_perf_event_open 331 #define NR_syscalls 332 /* diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 88a83366819..624790042d4 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1832,11 +1832,11 @@ compat_sys_rt_tgsigqueueinfo_wrapper: llgtr %r5,%r5 # struct compat_siginfo * jg compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call - .globl sys_perf_counter_open_wrapper -sys_perf_counter_open_wrapper: - llgtr %r2,%r2 # const struct perf_counter_attr * + .globl sys_perf_event_open_wrapper +sys_perf_event_open_wrapper: + llgtr %r2,%r2 # const struct perf_event_attr * lgfr %r3,%r3 # pid_t lgfr %r4,%r4 # int lgfr %r5,%r5 # int llgfr %r6,%r6 # unsigned long - jg sys_perf_counter_open # branch to system call + jg sys_perf_event_open # branch to system call diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index ad1acd20038..0b5083681e7 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -339,4 +339,4 @@ SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper) SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper) SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */ -SYSCALL(sys_perf_counter_open,sys_perf_counter_open,sys_perf_counter_open_wrapper) +SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 1abbadd497e..6d507462967 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -10,7 +10,7 @@ * Copyright (C) 1995 Linus Torvalds */ -#include +#include #include #include #include @@ -306,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write) * interrupts again and then search the VMAs */ local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); down_read(&mm->mmap_sem); si_code = SEGV_MAPERR; @@ -366,11 +366,11 @@ good_area: } if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } up_read(&mm->mmap_sem); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 4df3570fe51..b940424f8cc 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -16,7 +16,7 @@ config SUPERH select HAVE_IOREMAP_PROT if MMU select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA diff --git a/arch/sh/include/asm/perf_counter.h b/arch/sh/include/asm/perf_counter.h deleted file mode 100644 index d8e6bb9c0cc..00000000000 --- a/arch/sh/include/asm/perf_counter.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_SH_PERF_COUNTER_H -#define __ASM_SH_PERF_COUNTER_H - -/* SH only supports software counters through this interface. */ -static inline void set_perf_counter_pending(void) {} - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#endif /* __ASM_SH_PERF_COUNTER_H */ diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h new file mode 100644 index 00000000000..11a302297ab --- /dev/null +++ b/arch/sh/include/asm/perf_event.h @@ -0,0 +1,9 @@ +#ifndef __ASM_SH_PERF_EVENT_H +#define __ASM_SH_PERF_EVENT_H + +/* SH only supports software events through this interface. */ +static inline void set_perf_event_pending(void) {} + +#define PERF_EVENT_INDEX_OFFSET 0 + +#endif /* __ASM_SH_PERF_EVENT_H */ diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h index 925dd40d9d5..f3fd1b9eb6b 100644 --- a/arch/sh/include/asm/unistd_32.h +++ b/arch/sh/include/asm/unistd_32.h @@ -344,7 +344,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #define NR_syscalls 337 diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h index 2b84bc916bc..343ce8f073e 100644 --- a/arch/sh/include/asm/unistd_64.h +++ b/arch/sh/include/asm/unistd_64.h @@ -384,7 +384,7 @@ #define __NR_preadv 361 #define __NR_pwritev 362 #define __NR_rt_tgsigqueueinfo 363 -#define __NR_perf_counter_open 364 +#define __NR_perf_event_open 364 #ifdef __KERNEL__ diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index 16ba225ede8..19fd11dd987 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -352,4 +352,4 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index af6fb7410c2..5bfde6c7749 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -390,4 +390,4 @@ sys_call_table: .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c index 781b413ff82..47530104e0a 100644 --- a/arch/sh/mm/fault_32.c +++ b/arch/sh/mm/fault_32.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -157,7 +157,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, if ((regs->sr & SR_IMASK) != SR_IMASK) local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -208,11 +208,11 @@ survive: } if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c index 2dcc48528f7..de0b0e88182 100644 --- a/arch/sh/mm/tlbflush_64.c +++ b/arch/sh/mm/tlbflush_64.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, /* Not an IO address, so reenable interrupts */ local_irq_enable(); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt or have no user @@ -201,11 +201,11 @@ survive: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 86b82348b97..97fca4695e0 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,7 +25,7 @@ config SPARC select ARCH_WANT_OPTIONAL_GPIOLIB select RTC_CLASS select RTC_DRV_M48T59 - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG @@ -47,7 +47,7 @@ config SPARC64 select RTC_DRV_BQ4802 select RTC_DRV_SUN4V select RTC_DRV_STARFIRE - select HAVE_PERF_COUNTERS + select HAVE_PERF_EVENTS config ARCH_DEFCONFIG string diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h deleted file mode 100644 index 5d7a8ca0e49..00000000000 --- a/arch/sparc/include/asm/perf_counter.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef __ASM_SPARC_PERF_COUNTER_H -#define __ASM_SPARC_PERF_COUNTER_H - -extern void set_perf_counter_pending(void); - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -#else -static inline void init_hw_perf_counters(void) { } -#endif - -#endif diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h new file mode 100644 index 00000000000..7e2669894ce --- /dev/null +++ b/arch/sparc/include/asm/perf_event.h @@ -0,0 +1,14 @@ +#ifndef __ASM_SPARC_PERF_EVENT_H +#define __ASM_SPARC_PERF_EVENT_H + +extern void set_perf_event_pending(void); + +#define PERF_EVENT_INDEX_OFFSET 0 + +#ifdef CONFIG_PERF_EVENTS +extern void init_hw_perf_events(void); +#else +static inline void init_hw_perf_events(void) { } +#endif + +#endif diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index 706df669f3b..42f2316c3ea 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -395,7 +395,7 @@ #define __NR_preadv 324 #define __NR_pwritev 325 #define __NR_rt_tgsigqueueinfo 326 -#define __NR_perf_counter_open 327 +#define __NR_perf_event_open 327 #define NR_SYSCALLS 328 diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 247cc620cee..3a048fad7ee 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -104,5 +104,5 @@ obj-$(CONFIG_AUDIT) += audit.o audit--$(CONFIG_AUDIT) := compat_audit.o obj-$(CONFIG_COMPAT) += $(audit--y) -pc--$(CONFIG_PERF_COUNTERS) := perf_counter.o +pc--$(CONFIG_PERF_EVENTS) := perf_event.o obj-$(CONFIG_SPARC64) += $(pc--y) diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 378eb53e077..b129611590a 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include @@ -265,7 +265,7 @@ int __init nmi_init(void) } } if (!err) - init_hw_perf_counters(); + init_hw_perf_events(); return err; } diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 68ff0010707..2d94e7a03af 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include @@ -15,7 +15,7 @@ /* This code is shared between various users of the performance * counters. Users will be oprofile, pseudo-NMI watchdog, and the - * perf_counter support layer. + * perf_event support layer. */ #define PCR_SUN4U_ENABLE (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) @@ -42,14 +42,14 @@ void deferred_pcr_work_irq(int irq, struct pt_regs *regs) old_regs = set_irq_regs(regs); irq_enter(); -#ifdef CONFIG_PERF_COUNTERS - perf_counter_do_pending(); +#ifdef CONFIG_PERF_EVENTS + perf_event_do_pending(); #endif irq_exit(); set_irq_regs(old_regs); } -void set_perf_counter_pending(void) +void set_perf_event_pending(void) { set_softint(1 << PIL_DEFERRED_PCR_WORK); } diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c deleted file mode 100644 index b1265ce8a05..00000000000 --- a/arch/sparc/kernel/perf_counter.c +++ /dev/null @@ -1,556 +0,0 @@ -/* Performance counter support for sparc64. - * - * Copyright (C) 2009 David S. Miller - * - * This code is based almost entirely upon the x86 perf counter - * code, which is: - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* Sparc64 chips have two performance counters, 32-bits each, with - * overflow interrupts generated on transition from 0xffffffff to 0. - * The counters are accessed in one go using a 64-bit register. - * - * Both counters are controlled using a single control register. The - * only way to stop all sampling is to clear all of the context (user, - * supervisor, hypervisor) sampling enable bits. But these bits apply - * to both counters, thus the two counters can't be enabled/disabled - * individually. - * - * The control register has two event fields, one for each of the two - * counters. It's thus nearly impossible to have one counter going - * while keeping the other one stopped. Therefore it is possible to - * get overflow interrupts for counters not currently "in use" and - * that condition must be checked in the overflow interrupt handler. - * - * So we use a hack, in that we program inactive counters with the - * "sw_count0" and "sw_count1" events. These count how many times - * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an - * unusual way to encode a NOP and therefore will not trigger in - * normal code. - */ - -#define MAX_HWCOUNTERS 2 -#define MAX_PERIOD ((1UL << 32) - 1) - -#define PIC_UPPER_INDEX 0 -#define PIC_LOWER_INDEX 1 - -struct cpu_hw_counters { - struct perf_counter *counters[MAX_HWCOUNTERS]; - unsigned long used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)]; - unsigned long active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)]; - int enabled; -}; -DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; - -struct perf_event_map { - u16 encoding; - u8 pic_mask; -#define PIC_NONE 0x00 -#define PIC_UPPER 0x01 -#define PIC_LOWER 0x02 -}; - -struct sparc_pmu { - const struct perf_event_map *(*event_map)(int); - int max_events; - int upper_shift; - int lower_shift; - int event_mask; - int hv_bit; - int irq_bit; - int upper_nop; - int lower_nop; -}; - -static const struct perf_event_map ultra3i_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, - [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, -}; - -static const struct perf_event_map *ultra3i_event_map(int event) -{ - return &ultra3i_perfmon_event_map[event]; -} - -static const struct sparc_pmu ultra3i_pmu = { - .event_map = ultra3i_event_map, - .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), - .upper_shift = 11, - .lower_shift = 4, - .event_mask = 0x3f, - .upper_nop = 0x1c, - .lower_nop = 0x14, -}; - -static const struct perf_event_map niagara2_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, - [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, -}; - -static const struct perf_event_map *niagara2_event_map(int event) -{ - return &niagara2_perfmon_event_map[event]; -} - -static const struct sparc_pmu niagara2_pmu = { - .event_map = niagara2_event_map, - .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), - .upper_shift = 19, - .lower_shift = 6, - .event_mask = 0xfff, - .hv_bit = 0x8, - .irq_bit = 0x03, - .upper_nop = 0x220, - .lower_nop = 0x220, -}; - -static const struct sparc_pmu *sparc_pmu __read_mostly; - -static u64 event_encoding(u64 event, int idx) -{ - if (idx == PIC_UPPER_INDEX) - event <<= sparc_pmu->upper_shift; - else - event <<= sparc_pmu->lower_shift; - return event; -} - -static u64 mask_for_index(int idx) -{ - return event_encoding(sparc_pmu->event_mask, idx); -} - -static u64 nop_for_index(int idx) -{ - return event_encoding(idx == PIC_UPPER_INDEX ? - sparc_pmu->upper_nop : - sparc_pmu->lower_nop, idx); -} - -static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc, - int idx) -{ - u64 val, mask = mask_for_index(idx); - - val = pcr_ops->read(); - pcr_ops->write((val & ~mask) | hwc->config); -} - -static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc, - int idx) -{ - u64 mask = mask_for_index(idx); - u64 nop = nop_for_index(idx); - u64 val = pcr_ops->read(); - - pcr_ops->write((val & ~mask) | nop); -} - -void hw_perf_enable(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - int i; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - val = pcr_ops->read(); - - for (i = 0; i < MAX_HWCOUNTERS; i++) { - struct perf_counter *cp = cpuc->counters[i]; - struct hw_perf_counter *hwc; - - if (!cp) - continue; - hwc = &cp->hw; - val |= hwc->config_base; - } - - pcr_ops->write(val); -} - -void hw_perf_disable(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - - val = pcr_ops->read(); - val &= ~(PCR_UTRACE | PCR_STRACE | - sparc_pmu->hv_bit | sparc_pmu->irq_bit); - pcr_ops->write(val); -} - -static u32 read_pmc(int idx) -{ - u64 val; - - read_pic(val); - if (idx == PIC_UPPER_INDEX) - val >>= 32; - - return val & 0xffffffff; -} - -static void write_pmc(int idx, u64 val) -{ - u64 shift, mask, pic; - - shift = 0; - if (idx == PIC_UPPER_INDEX) - shift = 32; - - mask = ((u64) 0xffffffff) << shift; - val <<= shift; - - read_pic(pic); - pic &= ~mask; - pic |= val; - write_pic(pic); -} - -static int sparc_perf_counter_set_period(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - if (left > MAX_PERIOD) - left = MAX_PERIOD; - - atomic64_set(&hwc->prev_count, (u64)-left); - - write_pmc(idx, (u64)(-left) & 0xffffffff); - - perf_counter_update_userpage(counter); - - return ret; -} - -static int sparc_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - sparc_pmu_disable_counter(hwc, idx); - - cpuc->counters[idx] = counter; - set_bit(idx, cpuc->active_mask); - - sparc_perf_counter_set_period(counter, hwc, idx); - sparc_pmu_enable_counter(hwc, idx); - perf_counter_update_userpage(counter); - return 0; -} - -static u64 sparc_perf_counter_update(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - int shift = 64 - 32; - u64 prev_raw_count, new_raw_count; - s64 delta; - -again: - prev_raw_count = atomic64_read(&hwc->prev_count); - new_raw_count = read_pmc(idx); - - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void sparc_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - clear_bit(idx, cpuc->active_mask); - sparc_pmu_disable_counter(hwc, idx); - - barrier(); - - sparc_perf_counter_update(counter, hwc, idx); - cpuc->counters[idx] = NULL; - clear_bit(idx, cpuc->used_mask); - - perf_counter_update_userpage(counter); -} - -static void sparc_pmu_read(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - sparc_perf_counter_update(counter, hwc, hwc->idx); -} - -static void sparc_pmu_unthrottle(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - sparc_pmu_enable_counter(hwc, hwc->idx); -} - -static atomic_t active_counters = ATOMIC_INIT(0); -static DEFINE_MUTEX(pmc_grab_mutex); - -void perf_counter_grab_pmc(void) -{ - if (atomic_inc_not_zero(&active_counters)) - return; - - mutex_lock(&pmc_grab_mutex); - if (atomic_read(&active_counters) == 0) { - if (atomic_read(&nmi_active) > 0) { - on_each_cpu(stop_nmi_watchdog, NULL, 1); - BUG_ON(atomic_read(&nmi_active) != 0); - } - atomic_inc(&active_counters); - } - mutex_unlock(&pmc_grab_mutex); -} - -void perf_counter_release_pmc(void) -{ - if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) { - if (atomic_read(&nmi_active) == 0) - on_each_cpu(start_nmi_watchdog, NULL, 1); - mutex_unlock(&pmc_grab_mutex); - } -} - -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - perf_counter_release_pmc(); -} - -static int __hw_perf_counter_init(struct perf_counter *counter) -{ - struct perf_counter_attr *attr = &counter->attr; - struct hw_perf_counter *hwc = &counter->hw; - const struct perf_event_map *pmap; - u64 enc; - - if (atomic_read(&nmi_active) < 0) - return -ENODEV; - - if (attr->type != PERF_TYPE_HARDWARE) - return -EOPNOTSUPP; - - if (attr->config >= sparc_pmu->max_events) - return -EINVAL; - - perf_counter_grab_pmc(); - counter->destroy = hw_perf_counter_destroy; - - /* We save the enable bits in the config_base. So to - * turn off sampling just write 'config', and to enable - * things write 'config | config_base'. - */ - hwc->config_base = sparc_pmu->irq_bit; - if (!attr->exclude_user) - hwc->config_base |= PCR_UTRACE; - if (!attr->exclude_kernel) - hwc->config_base |= PCR_STRACE; - if (!attr->exclude_hv) - hwc->config_base |= sparc_pmu->hv_bit; - - if (!hwc->sample_period) { - hwc->sample_period = MAX_PERIOD; - hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); - } - - pmap = sparc_pmu->event_map(attr->config); - - enc = pmap->encoding; - if (pmap->pic_mask & PIC_UPPER) { - hwc->idx = PIC_UPPER_INDEX; - enc <<= sparc_pmu->upper_shift; - } else { - hwc->idx = PIC_LOWER_INDEX; - enc <<= sparc_pmu->lower_shift; - } - - hwc->config |= enc; - return 0; -} - -static const struct pmu pmu = { - .enable = sparc_pmu_enable, - .disable = sparc_pmu_disable, - .read = sparc_pmu_read, - .unthrottle = sparc_pmu_unthrottle, -}; - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - int err = __hw_perf_counter_init(counter); - - if (err) - return ERR_PTR(err); - return &pmu; -} - -void perf_counter_print_debug(void) -{ - unsigned long flags; - u64 pcr, pic; - int cpu; - - if (!sparc_pmu) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - - pcr = pcr_ops->read(); - read_pic(pic); - - pr_info("\n"); - pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", - cpu, pcr, pic); - - local_irq_restore(flags); -} - -static int __kprobes perf_counter_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) -{ - struct die_args *args = __args; - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct pt_regs *regs; - int idx; - - if (!atomic_read(&active_counters)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - for (idx = 0; idx < MAX_HWCOUNTERS; idx++) { - struct perf_counter *counter = cpuc->counters[idx]; - struct hw_perf_counter *hwc; - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - hwc = &counter->hw; - val = sparc_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << 31)) - continue; - - data.period = counter->hw.last_period; - if (!sparc_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - sparc_pmu_disable_counter(hwc, idx); - } - - return NOTIFY_STOP; -} - -static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler, -}; - -static bool __init supported_pmu(void) -{ - if (!strcmp(sparc_pmu_type, "ultra3i")) { - sparc_pmu = &ultra3i_pmu; - return true; - } - if (!strcmp(sparc_pmu_type, "niagara2")) { - sparc_pmu = &niagara2_pmu; - return true; - } - return false; -} - -void __init init_hw_perf_counters(void) -{ - pr_info("Performance counters: "); - - if (!supported_pmu()) { - pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); - return; - } - - pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - - /* All sparc64 PMUs currently have 2 counters. But this simple - * driver only supports one active counter at a time. - */ - perf_max_counters = 1; - - register_die_notifier(&perf_counter_nmi_notifier); -} diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c new file mode 100644 index 00000000000..2d6a1b10c81 --- /dev/null +++ b/arch/sparc/kernel/perf_event.c @@ -0,0 +1,556 @@ +/* Performance event support for sparc64. + * + * Copyright (C) 2009 David S. Miller + * + * This code is based almost entirely upon the x86 perf event + * code, which is: + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* Sparc64 chips have two performance counters, 32-bits each, with + * overflow interrupts generated on transition from 0xffffffff to 0. + * The counters are accessed in one go using a 64-bit register. + * + * Both counters are controlled using a single control register. The + * only way to stop all sampling is to clear all of the context (user, + * supervisor, hypervisor) sampling enable bits. But these bits apply + * to both counters, thus the two counters can't be enabled/disabled + * individually. + * + * The control register has two event fields, one for each of the two + * counters. It's thus nearly impossible to have one counter going + * while keeping the other one stopped. Therefore it is possible to + * get overflow interrupts for counters not currently "in use" and + * that condition must be checked in the overflow interrupt handler. + * + * So we use a hack, in that we program inactive counters with the + * "sw_count0" and "sw_count1" events. These count how many times + * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an + * unusual way to encode a NOP and therefore will not trigger in + * normal code. + */ + +#define MAX_HWEVENTS 2 +#define MAX_PERIOD ((1UL << 32) - 1) + +#define PIC_UPPER_INDEX 0 +#define PIC_LOWER_INDEX 1 + +struct cpu_hw_events { + struct perf_event *events[MAX_HWEVENTS]; + unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)]; + int enabled; +}; +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; + +struct perf_event_map { + u16 encoding; + u8 pic_mask; +#define PIC_NONE 0x00 +#define PIC_UPPER 0x01 +#define PIC_LOWER 0x02 +}; + +struct sparc_pmu { + const struct perf_event_map *(*event_map)(int); + int max_events; + int upper_shift; + int lower_shift; + int event_mask; + int hv_bit; + int irq_bit; + int upper_nop; + int lower_nop; +}; + +static const struct perf_event_map ultra3i_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER }, +}; + +static const struct perf_event_map *ultra3i_event_map(int event_id) +{ + return &ultra3i_perfmon_event_map[event_id]; +} + +static const struct sparc_pmu ultra3i_pmu = { + .event_map = ultra3i_event_map, + .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map), + .upper_shift = 11, + .lower_shift = 4, + .event_mask = 0x3f, + .upper_nop = 0x1c, + .lower_nop = 0x14, +}; + +static const struct perf_event_map niagara2_perfmon_event_map[] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER }, +}; + +static const struct perf_event_map *niagara2_event_map(int event_id) +{ + return &niagara2_perfmon_event_map[event_id]; +} + +static const struct sparc_pmu niagara2_pmu = { + .event_map = niagara2_event_map, + .max_events = ARRAY_SIZE(niagara2_perfmon_event_map), + .upper_shift = 19, + .lower_shift = 6, + .event_mask = 0xfff, + .hv_bit = 0x8, + .irq_bit = 0x03, + .upper_nop = 0x220, + .lower_nop = 0x220, +}; + +static const struct sparc_pmu *sparc_pmu __read_mostly; + +static u64 event_encoding(u64 event_id, int idx) +{ + if (idx == PIC_UPPER_INDEX) + event_id <<= sparc_pmu->upper_shift; + else + event_id <<= sparc_pmu->lower_shift; + return event_id; +} + +static u64 mask_for_index(int idx) +{ + return event_encoding(sparc_pmu->event_mask, idx); +} + +static u64 nop_for_index(int idx) +{ + return event_encoding(idx == PIC_UPPER_INDEX ? + sparc_pmu->upper_nop : + sparc_pmu->lower_nop, idx); +} + +static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + u64 val, mask = mask_for_index(idx); + + val = pcr_ops->read(); + pcr_ops->write((val & ~mask) | hwc->config); +} + +static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + u64 mask = mask_for_index(idx); + u64 nop = nop_for_index(idx); + u64 val = pcr_ops->read(); + + pcr_ops->write((val & ~mask) | nop); +} + +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + int i; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + val = pcr_ops->read(); + + for (i = 0; i < MAX_HWEVENTS; i++) { + struct perf_event *cp = cpuc->events[i]; + struct hw_perf_event *hwc; + + if (!cp) + continue; + hwc = &cp->hw; + val |= hwc->config_base; + } + + pcr_ops->write(val); +} + +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + + val = pcr_ops->read(); + val &= ~(PCR_UTRACE | PCR_STRACE | + sparc_pmu->hv_bit | sparc_pmu->irq_bit); + pcr_ops->write(val); +} + +static u32 read_pmc(int idx) +{ + u64 val; + + read_pic(val); + if (idx == PIC_UPPER_INDEX) + val >>= 32; + + return val & 0xffffffff; +} + +static void write_pmc(int idx, u64 val) +{ + u64 shift, mask, pic; + + shift = 0; + if (idx == PIC_UPPER_INDEX) + shift = 32; + + mask = ((u64) 0xffffffff) << shift; + val <<= shift; + + read_pic(pic); + pic &= ~mask; + pic |= val; + write_pic(pic); +} + +static int sparc_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + if (left > MAX_PERIOD) + left = MAX_PERIOD; + + atomic64_set(&hwc->prev_count, (u64)-left); + + write_pmc(idx, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +static int sparc_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + sparc_pmu_disable_event(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + sparc_perf_event_set_period(event, hwc, idx); + sparc_pmu_enable_event(hwc, idx); + perf_event_update_userpage(event); + return 0; +} + +static u64 sparc_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + int shift = 64 - 32; + u64 prev_raw_count, new_raw_count; + s64 delta; + +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + new_raw_count = read_pmc(idx); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); + atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void sparc_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + clear_bit(idx, cpuc->active_mask); + sparc_pmu_disable_event(hwc, idx); + + barrier(); + + sparc_perf_event_update(event, hwc, idx); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +static void sparc_pmu_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + sparc_perf_event_update(event, hwc, hwc->idx); +} + +static void sparc_pmu_unthrottle(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + sparc_pmu_enable_event(hwc, hwc->idx); +} + +static atomic_t active_events = ATOMIC_INIT(0); +static DEFINE_MUTEX(pmc_grab_mutex); + +void perf_event_grab_pmc(void) +{ + if (atomic_inc_not_zero(&active_events)) + return; + + mutex_lock(&pmc_grab_mutex); + if (atomic_read(&active_events) == 0) { + if (atomic_read(&nmi_active) > 0) { + on_each_cpu(stop_nmi_watchdog, NULL, 1); + BUG_ON(atomic_read(&nmi_active) != 0); + } + atomic_inc(&active_events); + } + mutex_unlock(&pmc_grab_mutex); +} + +void perf_event_release_pmc(void) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) { + if (atomic_read(&nmi_active) == 0) + on_each_cpu(start_nmi_watchdog, NULL, 1); + mutex_unlock(&pmc_grab_mutex); + } +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + perf_event_release_pmc(); +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + const struct perf_event_map *pmap; + u64 enc; + + if (atomic_read(&nmi_active) < 0) + return -ENODEV; + + if (attr->type != PERF_TYPE_HARDWARE) + return -EOPNOTSUPP; + + if (attr->config >= sparc_pmu->max_events) + return -EINVAL; + + perf_event_grab_pmc(); + event->destroy = hw_perf_event_destroy; + + /* We save the enable bits in the config_base. So to + * turn off sampling just write 'config', and to enable + * things write 'config | config_base'. + */ + hwc->config_base = sparc_pmu->irq_bit; + if (!attr->exclude_user) + hwc->config_base |= PCR_UTRACE; + if (!attr->exclude_kernel) + hwc->config_base |= PCR_STRACE; + if (!attr->exclude_hv) + hwc->config_base |= sparc_pmu->hv_bit; + + if (!hwc->sample_period) { + hwc->sample_period = MAX_PERIOD; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } + + pmap = sparc_pmu->event_map(attr->config); + + enc = pmap->encoding; + if (pmap->pic_mask & PIC_UPPER) { + hwc->idx = PIC_UPPER_INDEX; + enc <<= sparc_pmu->upper_shift; + } else { + hwc->idx = PIC_LOWER_INDEX; + enc <<= sparc_pmu->lower_shift; + } + + hwc->config |= enc; + return 0; +} + +static const struct pmu pmu = { + .enable = sparc_pmu_enable, + .disable = sparc_pmu_disable, + .read = sparc_pmu_read, + .unthrottle = sparc_pmu_unthrottle, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err = __hw_perf_event_init(event); + + if (err) + return ERR_PTR(err); + return &pmu; +} + +void perf_event_print_debug(void) +{ + unsigned long flags; + u64 pcr, pic; + int cpu; + + if (!sparc_pmu) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + pcr = pcr_ops->read(); + read_pic(pic); + + pr_info("\n"); + pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", + cpu, pcr, pic); + + local_irq_restore(flags); +} + +static int __kprobes perf_event_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!atomic_read(&active_events)) + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx < MAX_HWEVENTS; idx++) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + hwc = &event->hw; + val = sparc_perf_event_update(event, hwc, idx); + if (val & (1ULL << 31)) + continue; + + data.period = event->hw.last_period; + if (!sparc_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + sparc_pmu_disable_event(hwc, idx); + } + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, +}; + +static bool __init supported_pmu(void) +{ + if (!strcmp(sparc_pmu_type, "ultra3i")) { + sparc_pmu = &ultra3i_pmu; + return true; + } + if (!strcmp(sparc_pmu_type, "niagara2")) { + sparc_pmu = &niagara2_pmu; + return true; + } + return false; +} + +void __init init_hw_perf_events(void) +{ + pr_info("Performance events: "); + + if (!supported_pmu()) { + pr_cont("No support for PMU type '%s'\n", sparc_pmu_type); + return; + } + + pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); + + /* All sparc64 PMUs currently have 2 events. But this simple + * driver only supports one active event at a time. + */ + perf_max_events = 1; + + register_die_notifier(&perf_event_nmi_notifier); +} diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 04181577cb6..0f1658d3749 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -82,5 +82,5 @@ sys_call_table: /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv -/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 91b06b7f7ac..009825f6e73 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -83,7 +83,7 @@ sys_call_table32: /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv - .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_counter_open + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open #endif /* CONFIG_COMPAT */ @@ -158,4 +158,4 @@ sys_call_table: /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv - .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open + .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 51c59015b28..e4ff5d1280c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,7 +24,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE - select HAVE_PERF_COUNTERS if (!M386 && !M486) + select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index ba331bfd111..74619c4f9fd 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -831,5 +831,5 @@ ia32_sys_call_table: .quad compat_sys_preadv .quad compat_sys_pwritev .quad compat_sys_rt_tgsigqueueinfo /* 335 */ - .quad sys_perf_counter_open + .quad sys_perf_event_open ia32_syscall_end: diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 5e3f2044f0d..f5693c81a1d 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) #endif diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h deleted file mode 100644 index e7b7c938ae2..00000000000 --- a/arch/x86/include/asm/perf_counter.h +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef _ASM_X86_PERF_COUNTER_H -#define _ASM_X86_PERF_COUNTER_H - -/* - * Performance counter hw details: - */ - -#define X86_PMC_MAX_GENERIC 8 -#define X86_PMC_MAX_FIXED 3 - -#define X86_PMC_IDX_GENERIC 0 -#define X86_PMC_IDX_FIXED 32 -#define X86_PMC_IDX_MAX 64 - -#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 -#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 - -#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 -#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 - -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) - -/* - * Includes eventsel and unit mask as well: - */ -#define ARCH_PERFMON_EVENT_MASK 0xffff - -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ - (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) - -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 - -/* - * Intel "Architectural Performance Monitoring" CPUID - * detection/enumeration details: - */ -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -union cpuid10_edx { - struct { - unsigned int num_counters_fixed:4; - unsigned int reserved:28; - } split; - unsigned int full; -}; - - -/* - * Fixed-purpose performance counters: - */ - -/* - * All 3 fixed-mode PMCs are configured via this single MSR: - */ -#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d - -/* - * The counts are available in three separate MSRs: - */ - -/* Instr_Retired.Any: */ -#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 -#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) - -/* CPU_CLK_Unhalted.Core: */ -#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a -#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) - -/* CPU_CLK_Unhalted.Ref: */ -#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b -#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) - -/* - * We model BTS tracing as another fixed-mode PMC. - * - * We choose a value in the middle of the fixed counter range, since lower - * values are used by actual fixed counters and higher values are used - * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. - */ -#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) - - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -extern void perf_counters_lapic_init(void); - -#define PERF_COUNTER_INDEX_OFFSET 0 - -#else -static inline void init_hw_perf_counters(void) { } -static inline void perf_counters_lapic_init(void) { } -#endif - -#endif /* _ASM_X86_PERF_COUNTER_H */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h new file mode 100644 index 00000000000..ad7ce3fd506 --- /dev/null +++ b/arch/x86/include/asm/perf_event.h @@ -0,0 +1,108 @@ +#ifndef _ASM_X86_PERF_EVENT_H +#define _ASM_X86_PERF_EVENT_H + +/* + * Performance event hw details: + */ + +#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_FIXED 3 + +#define X86_PMC_IDX_GENERIC 0 +#define X86_PMC_IDX_FIXED 32 +#define X86_PMC_IDX_MAX 64 + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +/* + * Includes eventsel and unit mask as well: + */ +#define ARCH_PERFMON_EVENT_MASK 0xffff + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 + +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_events:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +union cpuid10_edx { + struct { + unsigned int num_events_fixed:4; + unsigned int reserved:28; + } split; + unsigned int full; +}; + + +/* + * Fixed-purpose performance events: + */ + +/* + * All 3 fixed-mode PMCs are configured via this single MSR: + */ +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d + +/* + * The counts are available in three separate MSRs: + */ + +/* Instr_Retired.Any: */ +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) + +/* CPU_CLK_Unhalted.Core: */ +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) + +/* CPU_CLK_Unhalted.Ref: */ +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) + +/* + * We model BTS tracing as another fixed-mode PMC. + * + * We choose a value in the middle of the fixed event range, since lower + * values are used by actual fixed events and higher values are used + * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. + */ +#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) + + +#ifdef CONFIG_PERF_EVENTS +extern void init_hw_perf_events(void); +extern void perf_events_lapic_init(void); + +#define PERF_EVENT_INDEX_OFFSET 0 + +#else +static inline void init_hw_perf_events(void) { } +static inline void perf_events_lapic_init(void) { } +#endif + +#endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 8deaada61bc..6fb3c209a7e 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -341,7 +341,7 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 +#define __NR_perf_event_open 336 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index b9f3c60de5f..8d3ad0adbc6 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv) __SYSCALL(__NR_pwritev, sys_pwritev) #define __NR_rt_tgsigqueueinfo 297 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) -#define __NR_perf_counter_open 298 -__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) +#define __NR_perf_event_open 298 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a34601f5298..754174d09de 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -14,7 +14,7 @@ * Mikael Pettersson : PM converted to driver model. */ -#include +#include #include #include #include @@ -35,7 +35,7 @@ #include #include -#include +#include #include #include #include @@ -1189,7 +1189,7 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - perf_counters_lapic_init(); + perf_events_lapic_init(); preempt_disable(); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 8dd30638fe4..68537e957a9 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2fea97eccf7..cc25c2b4a56 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include @@ -869,7 +869,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_counters(); + init_hw_perf_events(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c deleted file mode 100644 index b1f115696c8..00000000000 --- a/arch/x86/kernel/cpu/perf_counter.c +++ /dev/null @@ -1,2298 +0,0 @@ -/* - * Performance counter x86 architecture code - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright (C) 2009 Intel Corporation, - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static u64 perf_counter_mask __read_mostly; - -/* The maximal number of PEBS counters: */ -#define MAX_PEBS_COUNTERS 4 - -/* The size of a BTS record in bytes: */ -#define BTS_RECORD_SIZE 24 - -/* The size of a per-cpu BTS buffer in bytes: */ -#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) - -/* The BTS overflow threshold in bytes from the end of the buffer: */ -#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) - - -/* - * Bits in the debugctlmsr controlling branch tracing. - */ -#define X86_DEBUGCTL_TR (1 << 6) -#define X86_DEBUGCTL_BTS (1 << 7) -#define X86_DEBUGCTL_BTINT (1 << 8) -#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) -#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) - -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; -}; - -struct cpu_hw_counters { - struct perf_counter *counters[X86_PMC_IDX_MAX]; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long interrupts; - int enabled; - struct debug_store *ds; -}; - -/* - * struct x86_pmu - generic x86 pmu - */ -struct x86_pmu { - const char *name; - int version; - int (*handle_irq)(struct pt_regs *); - void (*disable_all)(void); - void (*enable_all)(void); - void (*enable)(struct hw_perf_counter *, int); - void (*disable)(struct hw_perf_counter *, int); - unsigned eventsel; - unsigned perfctr; - u64 (*event_map)(int); - u64 (*raw_event)(u64); - int max_events; - int num_counters; - int num_counters_fixed; - int counter_bits; - u64 counter_mask; - int apic; - u64 max_period; - u64 intel_ctrl; - void (*enable_bts)(u64 config); - void (*disable_bts)(void); -}; - -static struct x86_pmu x86_pmu __read_mostly; - -static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { - .enabled = 1, -}; - -/* - * Not sure about some of these - */ -static const u64 p6_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, -}; - -static u64 p6_pmu_event_map(int hw_event) -{ - return p6_perfmon_event_map[hw_event]; -} - -/* - * Counter setting that is specified not to count anything. - * We use this to effectively disable a counter. - * - * L2_RQSTS with 0 MESI unit mask. - */ -#define P6_NOP_COUNTER 0x0000002EULL - -static u64 p6_pmu_raw_event(u64 hw_event) -{ -#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL -#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL -#define P6_EVNTSEL_INV_MASK 0x00800000ULL -#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL - -#define P6_EVNTSEL_MASK \ - (P6_EVNTSEL_EVENT_MASK | \ - P6_EVNTSEL_UNIT_MASK | \ - P6_EVNTSEL_EDGE_MASK | \ - P6_EVNTSEL_INV_MASK | \ - P6_EVNTSEL_COUNTER_MASK) - - return hw_event & P6_EVNTSEL_MASK; -} - - -/* - * Intel PerfMon v3. Used on Core2 and later. - */ -static const u64 intel_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, -}; - -static u64 intel_pmu_event_map(int hw_event) -{ - return intel_perfmon_event_map[hw_event]; -} - -/* - * Generalized hw caching related hw_event table, filled - * in on a per model basis. A value of 0 means - * 'not supported', -1 means 'hw_event makes no sense on - * this CPU', any other value means the raw hw_event - * ID. - */ - -#define C(x) PERF_COUNT_HW_CACHE_##x - -static u64 __read_mostly hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - -static const u64 nehalem_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static const u64 core2_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static const u64 atom_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static u64 intel_pmu_raw_event(u64 hw_event) -{ -#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL -#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL -#define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL - -#define CORE_EVNTSEL_MASK \ - (CORE_EVNTSEL_EVENT_MASK | \ - CORE_EVNTSEL_UNIT_MASK | \ - CORE_EVNTSEL_EDGE_MASK | \ - CORE_EVNTSEL_INV_MASK | \ - CORE_EVNTSEL_COUNTER_MASK) - - return hw_event & CORE_EVNTSEL_MASK; -} - -static const u64 amd_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ - [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ - [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ - [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ - [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -/* - * AMD Performance Monitor K7 and later. - */ -static const u64 amd_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, -}; - -static u64 amd_pmu_event_map(int hw_event) -{ - return amd_perfmon_event_map[hw_event]; -} - -static u64 amd_pmu_raw_event(u64 hw_event) -{ -#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL -#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL -#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL -#define K7_EVNTSEL_INV_MASK 0x000800000ULL -#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL - -#define K7_EVNTSEL_MASK \ - (K7_EVNTSEL_EVENT_MASK | \ - K7_EVNTSEL_UNIT_MASK | \ - K7_EVNTSEL_EDGE_MASK | \ - K7_EVNTSEL_INV_MASK | \ - K7_EVNTSEL_COUNTER_MASK) - - return hw_event & K7_EVNTSEL_MASK; -} - -/* - * Propagate counter elapsed time into the generic counter. - * Can only be executed on the CPU where the counter is active. - * Returns the delta events processed. - */ -static u64 -x86_perf_counter_update(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - int shift = 64 - x86_pmu.counter_bits; - u64 prev_raw_count, new_raw_count; - s64 delta; - - if (idx == X86_PMC_IDX_FIXED_BTS) - return 0; - - /* - * Careful: an NMI might modify the previous counter value. - * - * Our tactic to handle this is to first atomically read and - * exchange a new raw count - then add that new-prev delta - * count to the generic counter atomically: - */ -again: - prev_raw_count = atomic64_read(&hwc->prev_count); - rdmsrl(hwc->counter_base + idx, new_raw_count); - - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - /* - * Now we have the new raw value and have updated the prev - * timestamp already. We can now calculate the elapsed delta - * (counter-)time and add that to the generic counter. - * - * Careful, not all hw sign-extends above the physical width - * of the count. - */ - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static atomic_t active_counters; -static DEFINE_MUTEX(pmc_reserve_mutex); - -static bool reserve_pmc_hardware(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int i; - - if (nmi_watchdog == NMI_LOCAL_APIC) - disable_lapic_nmi_watchdog(); - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) - goto perfctr_fail; - } - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) - goto eventsel_fail; - } -#endif - - return true; - -#ifdef CONFIG_X86_LOCAL_APIC -eventsel_fail: - for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu.eventsel + i); - - i = x86_pmu.num_counters; - -perfctr_fail: - for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu.perfctr + i); - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); - - return false; -#endif -} - -static void release_pmc_hardware(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - int i; - - for (i = 0; i < x86_pmu.num_counters; i++) { - release_perfctr_nmi(x86_pmu.perfctr + i); - release_evntsel_nmi(x86_pmu.eventsel + i); - } - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); -#endif -} - -static inline bool bts_available(void) -{ - return x86_pmu.enable_bts != NULL; -} - -static inline void init_debug_store_on_cpu(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; - - if (!ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, - (u32)((u64)(unsigned long)ds), - (u32)((u64)(unsigned long)ds >> 32)); -} - -static inline void fini_debug_store_on_cpu(int cpu) -{ - if (!per_cpu(cpu_hw_counters, cpu).ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); -} - -static void release_bts_hardware(void) -{ - int cpu; - - if (!bts_available()) - return; - - get_online_cpus(); - - for_each_online_cpu(cpu) - fini_debug_store_on_cpu(cpu); - - for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; - - if (!ds) - continue; - - per_cpu(cpu_hw_counters, cpu).ds = NULL; - - kfree((void *)(unsigned long)ds->bts_buffer_base); - kfree(ds); - } - - put_online_cpus(); -} - -static int reserve_bts_hardware(void) -{ - int cpu, err = 0; - - if (!bts_available()) - return 0; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - struct debug_store *ds; - void *buffer; - - err = -ENOMEM; - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) { - kfree(buffer); - break; - } - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = - ds->bts_buffer_base + BTS_BUFFER_SIZE; - ds->bts_interrupt_threshold = - ds->bts_absolute_maximum - BTS_OVFL_TH; - - per_cpu(cpu_hw_counters, cpu).ds = ds; - err = 0; - } - - if (err) - release_bts_hardware(); - else { - for_each_online_cpu(cpu) - init_debug_store_on_cpu(cpu); - } - - put_online_cpus(); - - return err; -} - -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { - release_pmc_hardware(); - release_bts_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -static inline int x86_pmu_initialized(void) -{ - return x86_pmu.handle_irq != NULL; -} - -static inline int -set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) -{ - unsigned int cache_type, cache_op, cache_result; - u64 config, val; - - config = attr->config; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - val = hw_cache_event_ids[cache_type][cache_op][cache_result]; - - if (val == 0) - return -ENOENT; - - if (val == -1) - return -EINVAL; - - hwc->config |= val; - - return 0; -} - -static void intel_pmu_enable_bts(u64 config) -{ - unsigned long debugctlmsr; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr |= X86_DEBUGCTL_TR; - debugctlmsr |= X86_DEBUGCTL_BTS; - debugctlmsr |= X86_DEBUGCTL_BTINT; - - if (!(config & ARCH_PERFMON_EVENTSEL_OS)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; - - if (!(config & ARCH_PERFMON_EVENTSEL_USR)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; - - update_debugctlmsr(debugctlmsr); -} - -static void intel_pmu_disable_bts(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - unsigned long debugctlmsr; - - if (!cpuc->ds) - return; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr &= - ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | - X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); - - update_debugctlmsr(debugctlmsr); -} - -/* - * Setup the hardware configuration for a given attr_type - */ -static int __hw_perf_counter_init(struct perf_counter *counter) -{ - struct perf_counter_attr *attr = &counter->attr; - struct hw_perf_counter *hwc = &counter->hw; - u64 config; - int err; - - if (!x86_pmu_initialized()) - return -ENODEV; - - err = 0; - if (!atomic_inc_not_zero(&active_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0) { - if (!reserve_pmc_hardware()) - err = -EBUSY; - else - err = reserve_bts_hardware(); - } - if (!err) - atomic_inc(&active_counters); - mutex_unlock(&pmc_reserve_mutex); - } - if (err) - return err; - - counter->destroy = hw_perf_counter_destroy; - - /* - * Generate PMC IRQs: - * (keep 'enabled' bit clear for now) - */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; - - /* - * Count user and OS events unless requested not to. - */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - - if (!hwc->sample_period) { - hwc->sample_period = x86_pmu.max_period; - hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); - } else { - /* - * If we have a PMU initialized but no APIC - * interrupts, we cannot sample hardware - * counters (user-space has to fall back and - * sample via a hrtimer based software counter): - */ - if (!x86_pmu.apic) - return -EOPNOTSUPP; - } - - /* - * Raw hw_event type provide the config in the hw_event structure - */ - if (attr->type == PERF_TYPE_RAW) { - hwc->config |= x86_pmu.raw_event(attr->config); - return 0; - } - - if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, attr); - - if (attr->config >= x86_pmu.max_events) - return -EINVAL; - - /* - * The generic map: - */ - config = x86_pmu.event_map(attr->config); - - if (config == 0) - return -ENOENT; - - if (config == -1LL) - return -EINVAL; - - /* - * Branch tracing: - */ - if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && - (hwc->sample_period == 1)) { - /* BTS is not supported by this architecture. */ - if (!bts_available()) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - return -EOPNOTSUPP; - } - - hwc->config |= config; - - return 0; -} - -static void p6_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void intel_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) - intel_pmu_disable_bts(); -} - -static void amd_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int idx; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - /* - * ensure we write the disable before we start disabling the - * counters proper, so that amd_pmu_enable_counter() does the - * right thing. - */ - barrier(); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) - continue; - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } -} - -void hw_perf_disable(void) -{ - if (!x86_pmu_initialized()) - return; - return x86_pmu.disable_all(); -} - -static void p6_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - unsigned long val; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void intel_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { - struct perf_counter *counter = - cpuc->counters[X86_PMC_IDX_FIXED_BTS]; - - if (WARN_ON_ONCE(!counter)) - return; - - intel_pmu_enable_bts(counter->hw.config); - } -} - -static void amd_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int idx; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_counter *counter = cpuc->counters[idx]; - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - val = counter->hw.config; - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } -} - -void hw_perf_enable(void) -{ - if (!x86_pmu_initialized()) - return; - x86_pmu.enable_all(); -} - -static inline u64 intel_pmu_get_status(void) -{ - u64 status; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - - return status; -} - -static inline void intel_pmu_ack_status(u64 ack) -{ - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); -} - -static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - (void)checking_wrmsrl(hwc->config_base + idx, - hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); -} - -static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); -} - -static inline void -intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, mask; - - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - (void)checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static inline void -p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val = P6_NOP_COUNTER; - - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - (void)checking_wrmsrl(hwc->config_base + idx, val); -} - -static inline void -intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - intel_pmu_disable_bts(); - return; - } - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_disable_fixed(hwc, idx); - return; - } - - x86_pmu_disable_counter(hwc, idx); -} - -static inline void -amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - x86_pmu_disable_counter(hwc, idx); -} - -static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); - -/* - * Set the next IRQ period, based on the hwc->period_left value. - * To be called with the counter disabled in hw: - */ -static int -x86_perf_counter_set_period(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int err, ret = 0; - - if (idx == X86_PMC_IDX_FIXED_BTS) - return 0; - - /* - * If we are way outside a reasoable range then just skip forward: - */ - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - /* - * Quirk: certain CPUs dont like it if just 1 hw_event is left: - */ - if (unlikely(left < 2)) - left = 2; - - if (left > x86_pmu.max_period) - left = x86_pmu.max_period; - - per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; - - /* - * The hw counter starts counting from this counter offset, - * mark it to be able to extra future deltas: - */ - atomic64_set(&hwc->prev_count, (u64)-left); - - err = checking_wrmsrl(hwc->counter_base + idx, - (u64)(-left) & x86_pmu.counter_mask); - - perf_counter_update_userpage(counter); - - return ret; -} - -static inline void -intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, bits, mask; - int err; - - /* - * Enable IRQ generation (0x8), - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) - * if requested: - */ - bits = 0x8ULL; - if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) - bits |= 0x2; - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - bits |= 0x1; - bits <<= (idx * 4); - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - ctrl_val |= bits; - err = checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - u64 val; - - val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - (void)checking_wrmsrl(hwc->config_base + idx, val); -} - - -static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - if (!__get_cpu_var(cpu_hw_counters).enabled) - return; - - intel_pmu_enable_bts(hwc->config); - return; - } - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc, idx); - return; - } - - x86_pmu_enable_counter(hwc, idx); -} - -static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (cpuc->enabled) - x86_pmu_enable_counter(hwc, idx); -} - -static int -fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) -{ - unsigned int hw_event; - - hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; - - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (hwc->sample_period == 1))) - return X86_PMC_IDX_FIXED_BTS; - - if (!x86_pmu.num_counters_fixed) - return -1; - - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) - return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) - return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) - return X86_PMC_IDX_FIXED_BUS_CYCLES; - - return -1; -} - -/* - * Find a PMC slot for the freshly enabled / scheduled in counter: - */ -static int x86_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx; - - idx = fixed_mode_idx(counter, hwc); - if (idx == X86_PMC_IDX_FIXED_BTS) { - /* BTS is already occupied. */ - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - hwc->config_base = 0; - hwc->counter_base = 0; - hwc->idx = idx; - } else if (idx >= 0) { - /* - * Try to get the fixed counter, if that is already taken - * then try to get a generic counter: - */ - if (test_and_set_bit(idx, cpuc->used_mask)) - goto try_generic; - - hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that counter_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->counter_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; - hwc->idx = idx; - } else { - idx = hwc->idx; - /* Try to get the previous generic counter again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { -try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_counters); - if (idx == x86_pmu.num_counters) - return -EAGAIN; - - set_bit(idx, cpuc->used_mask); - hwc->idx = idx; - } - hwc->config_base = x86_pmu.eventsel; - hwc->counter_base = x86_pmu.perfctr; - } - - perf_counters_lapic_init(); - - x86_pmu.disable(hwc, idx); - - cpuc->counters[idx] = counter; - set_bit(idx, cpuc->active_mask); - - x86_perf_counter_set_period(counter, hwc, idx); - x86_pmu.enable(hwc, idx); - - perf_counter_update_userpage(counter); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - - if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || - cpuc->counters[hwc->idx] != counter)) - return; - - x86_pmu.enable(hwc, hwc->idx); -} - -void perf_counter_print_debug(void) -{ - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; - struct cpu_hw_counters *cpuc; - unsigned long flags; - int cpu, idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - if (x86_pmu.version >= 2) { - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); - - pr_info("\n"); - pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); - pr_info("CPU#%d: status: %016llx\n", cpu, status); - pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); - pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); - } - pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); - rdmsrl(x86_pmu.perfctr + idx, pmc_count); - - prev_left = per_cpu(pmc_prev_left[idx], cpu); - - pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", - cpu, idx, pmc_ctrl); - pr_info("CPU#%d: gen-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - pr_info("CPU#%d: gen-PMC%d left: %016llx\n", - cpu, idx, prev_left); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); - - pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - } - local_irq_restore(flags); -} - -static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc) -{ - struct debug_store *ds = cpuc->ds; - struct bts_record { - u64 from; - u64 to; - u64 flags; - }; - struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_sample_data data; - struct pt_regs regs; - - if (!counter) - return; - - if (!ds) - return; - - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; - - if (top <= at) - return; - - ds->bts_index = ds->bts_buffer_base; - - - data.period = counter->hw.last_period; - data.addr = 0; - regs.ip = 0; - - /* - * Prepare a generic sample, i.e. fill in the invariant fields. - * We will overwrite the from and to address before we output - * the sample. - */ - perf_prepare_sample(&header, &data, counter, ®s); - - if (perf_output_begin(&handle, counter, - header.size * (top - at), 1, 1)) - return; - - for (; at < top; at++) { - data.ip = at->from; - data.addr = at->to; - - perf_output_sample(&handle, &header, &data, counter); - } - - perf_output_end(&handle); - - /* There's new data available. */ - counter->hw.interrupts++; - counter->pending_kill = POLL_IN; -} - -static void x86_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - /* - * Must be done before we disable, otherwise the nmi handler - * could reenable again: - */ - clear_bit(idx, cpuc->active_mask); - x86_pmu.disable(hwc, idx); - - /* - * Make sure the cleared pointer becomes visible before we - * (potentially) free the counter: - */ - barrier(); - - /* - * Drain the remaining delta count out of a counter - * that we are disabling: - */ - x86_perf_counter_update(counter, hwc, idx); - - /* Drain the remaining BTS records. */ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) - intel_pmu_drain_bts_buffer(cpuc); - - cpuc->counters[idx] = NULL; - clear_bit(idx, cpuc->used_mask); - - perf_counter_update_userpage(counter); -} - -/* - * Save and restart an expired counter. Called by NMI contexts, - * so it has to be careful about preempting normal counter ops: - */ -static int intel_pmu_save_and_restart(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - int ret; - - x86_perf_counter_update(counter, hwc, idx); - ret = x86_perf_counter_set_period(counter, hwc, idx); - - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - intel_pmu_enable_counter(hwc, idx); - - return ret; -} - -static void intel_pmu_reset(void) -{ - struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; - unsigned long flags; - int idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - printk("clearing PMU state on CPU#%d\n", smp_processor_id()); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); - checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); - } - if (ds) - ds->bts_index = ds->bts_buffer_base; - - local_irq_restore(flags); -} - -static int p6_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - int idx, handled = 0; - u64 val; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - counter = cpuc->counters[idx]; - hwc = &counter->hw; - - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) - continue; - - /* - * counter overflow - */ - handled = 1; - data.period = counter->hw.last_period; - - if (!x86_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - p6_pmu_disable_counter(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - -/* - * This handler is triggered by the local APIC, so the APIC IRQ handling - * rules apply: - */ -static int intel_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - int bit, loops; - u64 ack, status; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - perf_disable(); - intel_pmu_drain_bts_buffer(cpuc); - status = intel_pmu_get_status(); - if (!status) { - perf_enable(); - return 0; - } - - loops = 0; -again: - if (++loops > 100) { - WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); - perf_counter_print_debug(); - intel_pmu_reset(); - perf_enable(); - return 1; - } - - inc_irq_stat(apic_perf_irqs); - ack = status; - for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; - - clear_bit(bit, (unsigned long *) &status); - if (!test_bit(bit, cpuc->active_mask)) - continue; - - if (!intel_pmu_save_and_restart(counter)) - continue; - - data.period = counter->hw.last_period; - - if (perf_counter_overflow(counter, 1, &data, regs)) - intel_pmu_disable_counter(&counter->hw, bit); - } - - intel_pmu_ack_status(ack); - - /* - * Repeat if there is more work to be done: - */ - status = intel_pmu_get_status(); - if (status) - goto again; - - perf_enable(); - - return 1; -} - -static int amd_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - int idx, handled = 0; - u64 val; - - data.addr = 0; - - cpuc = &__get_cpu_var(cpu_hw_counters); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - counter = cpuc->counters[idx]; - hwc = &counter->hw; - - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) - continue; - - /* - * counter overflow - */ - handled = 1; - data.period = counter->hw.last_period; - - if (!x86_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data, regs)) - amd_pmu_disable_counter(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - -void smp_perf_pending_interrupt(struct pt_regs *regs) -{ - irq_enter(); - ack_APIC_irq(); - inc_irq_stat(apic_pending_irqs); - perf_counter_do_pending(); - irq_exit(); -} - -void set_perf_counter_pending(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - apic->send_IPI_self(LOCAL_PENDING_VECTOR); -#endif -} - -void perf_counters_lapic_init(void) -{ -#ifdef CONFIG_X86_LOCAL_APIC - if (!x86_pmu.apic || !x86_pmu_initialized()) - return; - - /* - * Always use NMI for PMU - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif -} - -static int __kprobes -perf_counter_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) -{ - struct die_args *args = __args; - struct pt_regs *regs; - - if (!atomic_read(&active_counters)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - case DIE_NMI_IPI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; - -#ifdef CONFIG_X86_LOCAL_APIC - apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif - /* - * Can't rely on the handled return value to say it was our NMI, two - * counters could trigger 'simultaneously' raising two back-to-back NMIs. - * - * If the first NMI handles both, the latter will be empty and daze - * the CPU. - */ - x86_pmu.handle_irq(regs); - - return NOTIFY_STOP; -} - -static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler, - .next = NULL, - .priority = 1 -}; - -static struct x86_pmu p6_pmu = { - .name = "p6", - .handle_irq = p6_pmu_handle_irq, - .disable_all = p6_pmu_disable_all, - .enable_all = p6_pmu_enable_all, - .enable = p6_pmu_enable_counter, - .disable = p6_pmu_disable_counter, - .eventsel = MSR_P6_EVNTSEL0, - .perfctr = MSR_P6_PERFCTR0, - .event_map = p6_pmu_event_map, - .raw_event = p6_pmu_raw_event, - .max_events = ARRAY_SIZE(p6_perfmon_event_map), - .apic = 1, - .max_period = (1ULL << 31) - 1, - .version = 0, - .num_counters = 2, - /* - * Counters have 40 bits implemented. However they are designed such - * that bits [32-39] are sign extensions of bit 31. As such the - * effective width of a counter for P6-like PMU is 32 bits only. - * - * See IA-32 Intel Architecture Software developer manual Vol 3B - */ - .counter_bits = 32, - .counter_mask = (1ULL << 32) - 1, -}; - -static struct x86_pmu intel_pmu = { - .name = "Intel", - .handle_irq = intel_pmu_handle_irq, - .disable_all = intel_pmu_disable_all, - .enable_all = intel_pmu_enable_all, - .enable = intel_pmu_enable_counter, - .disable = intel_pmu_disable_counter, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .raw_event = intel_pmu_raw_event, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic counter period: - */ - .max_period = (1ULL << 31) - 1, - .enable_bts = intel_pmu_enable_bts, - .disable_bts = intel_pmu_disable_bts, -}; - -static struct x86_pmu amd_pmu = { - .name = "AMD", - .handle_irq = amd_pmu_handle_irq, - .disable_all = amd_pmu_disable_all, - .enable_all = amd_pmu_enable_all, - .enable = amd_pmu_enable_counter, - .disable = amd_pmu_disable_counter, - .eventsel = MSR_K7_EVNTSEL0, - .perfctr = MSR_K7_PERFCTR0, - .event_map = amd_pmu_event_map, - .raw_event = amd_pmu_raw_event, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 4, - .counter_bits = 48, - .counter_mask = (1ULL << 48) - 1, - .apic = 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, -}; - -static int p6_pmu_init(void) -{ - switch (boot_cpu_data.x86_model) { - case 1: - case 3: /* Pentium Pro */ - case 5: - case 6: /* Pentium II */ - case 7: - case 8: - case 11: /* Pentium III */ - break; - case 9: - case 13: - /* Pentium M */ - break; - default: - pr_cont("unsupported p6 CPU model %d ", - boot_cpu_data.x86_model); - return -ENODEV; - } - - x86_pmu = p6_pmu; - - if (!cpu_has_apic) { - pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - pr_info("no hardware sampling interrupt available.\n"); - x86_pmu.apic = 0; - } - - return 0; -} - -static int intel_pmu_init(void) -{ - union cpuid10_edx edx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int ebx; - int version; - - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - /* check for P6 processor family */ - if (boot_cpu_data.x86 == 6) { - return p6_pmu_init(); - } else { - return -ENODEV; - } - } - - /* - * Check whether the Architectural PerfMon supports - * Branch Misses Retired hw_event or not. - */ - cpuid(10, &eax.full, &ebx, &unused, &edx.full); - if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) - return -ENODEV; - - version = eax.split.version_id; - if (version < 2) - return -ENODEV; - - x86_pmu = intel_pmu; - x86_pmu.version = version; - x86_pmu.num_counters = eax.split.num_counters; - x86_pmu.counter_bits = eax.split.bit_width; - x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; - - /* - * Quirk: v2 perfmon does not report fixed-purpose counters, so - * assume at least 3 counters: - */ - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); - - /* - * Install the hw-cache-events table: - */ - switch (boot_cpu_data.x86_model) { - case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 29: /* six-core 45 nm xeon "Dunnington" */ - memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Core2 events, "); - break; - default: - case 26: - memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Nehalem/Corei7 events, "); - break; - case 28: - memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Atom events, "); - break; - } - return 0; -} - -static int amd_pmu_init(void) -{ - /* Performance-monitoring supported from K7 and later: */ - if (boot_cpu_data.x86 < 6) - return -ENODEV; - - x86_pmu = amd_pmu; - - /* Events are common for all AMDs */ - memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - return 0; -} - -void __init init_hw_perf_counters(void) -{ - int err; - - pr_info("Performance Counters: "); - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - err = intel_pmu_init(); - break; - case X86_VENDOR_AMD: - err = amd_pmu_init(); - break; - default: - return; - } - if (err != 0) { - pr_cont("no PMU driver, software counters only.\n"); - return; - } - - pr_cont("%s PMU driver.\n", x86_pmu.name); - - if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { - WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - x86_pmu.num_counters, X86_PMC_MAX_GENERIC); - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; - } - perf_counter_mask = (1 << x86_pmu.num_counters) - 1; - perf_max_counters = x86_pmu.num_counters; - - if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { - WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; - } - - perf_counter_mask |= - ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; - x86_pmu.intel_ctrl = perf_counter_mask; - - perf_counters_lapic_init(); - register_die_notifier(&perf_counter_nmi_notifier); - - pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.counter_bits); - pr_info("... generic counters: %d\n", x86_pmu.num_counters); - pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); - pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); - pr_info("... counter mask: %016Lx\n", perf_counter_mask); -} - -static inline void x86_pmu_read(struct perf_counter *counter) -{ - x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); -} - -static const struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, -}; - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - int err; - - err = __hw_perf_counter_init(counter); - if (err) { - if (counter->destroy) - counter->destroy(counter); - return ERR_PTR(err); - } - - return &pmu; -} - -/* - * callchain support - */ - -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_nmi_frame); - - -static void -backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - -static int backtrace_stack(void *data, char *name) -{ - per_cpu(in_nmi_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name); - - return 0; -} - -static void backtrace_address(void *data, unsigned long addr, int reliable) -{ - struct perf_callchain_entry *entry = data; - - if (per_cpu(in_nmi_frame, smp_processor_id())) - return; - - if (reliable) - callchain_store(entry, addr); -} - -static const struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, -}; - -#include "../dumpstack.h" - -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); - - dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); -} - -/* - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context - */ -static unsigned long -copy_from_user_nmi(void *to, const void __user *from, unsigned long n) -{ - unsigned long offset, addr = (unsigned long)from; - int type = in_nmi() ? KM_NMI : KM_IRQ0; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page, type); - memcpy(to, map+offset, size); - kunmap_atomic(map, type); - put_page(page); - - len += size; - to += size; - addr += size; - - } while (len < n); - - return len; -} - -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) -{ - unsigned long bytes; - - bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); - - return bytes == sizeof(*frame); -} - -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - struct stack_frame frame; - const void __user *fp; - - if (!user_mode(regs)) - regs = task_pt_regs(current); - - fp = (void __user *)regs->bp; - - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); - - while (entry->nr < PERF_MAX_STACK_DEPTH) { - frame.next_frame = NULL; - frame.return_address = 0; - - if (!copy_stack_frame(fp, &frame)) - break; - - if ((unsigned long)fp < regs->sp) - break; - - callchain_store(entry, frame.return_address); - fp = frame.next_frame; - } -} - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || current->pid == 0) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} - -void hw_perf_counter_setup_online(int cpu) -{ - init_debug_store_on_cpu(cpu); -} diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c new file mode 100644 index 00000000000..0d03629fb1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event.c @@ -0,0 +1,2298 @@ +/* + * Performance events x86 architecture code + * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright (C) 2009 Intel Corporation, + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static u64 perf_event_mask __read_mostly; + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +/* The size of a per-cpu BTS buffer in bytes: */ +#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) + +/* The BTS overflow threshold in bytes from the end of the buffer: */ +#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) + + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +}; + +struct cpu_hw_events { + struct perf_event *events[X86_PMC_IDX_MAX]; + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long interrupts; + int enabled; + struct debug_store *ds; +}; + +/* + * struct x86_pmu - generic x86 pmu + */ +struct x86_pmu { + const char *name; + int version; + int (*handle_irq)(struct pt_regs *); + void (*disable_all)(void); + void (*enable_all)(void); + void (*enable)(struct hw_perf_event *, int); + void (*disable)(struct hw_perf_event *, int); + unsigned eventsel; + unsigned perfctr; + u64 (*event_map)(int); + u64 (*raw_event)(u64); + int max_events; + int num_events; + int num_events_fixed; + int event_bits; + u64 event_mask; + int apic; + u64 max_period; + u64 intel_ctrl; + void (*enable_bts)(u64 config); + void (*disable_bts)(void); +}; + +static struct x86_pmu x86_pmu __read_mostly; + +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { + .enabled = 1, +}; + +/* + * Not sure about some of these + */ +static const u64 p6_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, +}; + +static u64 p6_pmu_event_map(int hw_event) +{ + return p6_perfmon_event_map[hw_event]; +} + +/* + * Event setting that is specified not to count anything. + * We use this to effectively disable a counter. + * + * L2_RQSTS with 0 MESI unit mask. + */ +#define P6_NOP_EVENT 0x0000002EULL + +static u64 p6_pmu_raw_event(u64 hw_event) +{ +#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL +#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL +#define P6_EVNTSEL_INV_MASK 0x00800000ULL +#define P6_EVNTSEL_REG_MASK 0xFF000000ULL + +#define P6_EVNTSEL_MASK \ + (P6_EVNTSEL_EVENT_MASK | \ + P6_EVNTSEL_UNIT_MASK | \ + P6_EVNTSEL_EDGE_MASK | \ + P6_EVNTSEL_INV_MASK | \ + P6_EVNTSEL_REG_MASK) + + return hw_event & P6_EVNTSEL_MASK; +} + + +/* + * Intel PerfMon v3. Used on Core2 and later. + */ +static const u64 intel_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, +}; + +static u64 intel_pmu_event_map(int hw_event) +{ + return intel_perfmon_event_map[hw_event]; +} + +/* + * Generalized hw caching related hw_event table, filled + * in on a per model basis. A value of 0 means + * 'not supported', -1 means 'hw_event makes no sense on + * this CPU', any other value means the raw hw_event + * ID. + */ + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static u64 __read_mostly hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + +static const u64 nehalem_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static const u64 core2_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static const u64 atom_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static u64 intel_pmu_raw_event(u64 hw_event) +{ +#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL +#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL +#define CORE_EVNTSEL_INV_MASK 0x00800000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL + +#define CORE_EVNTSEL_MASK \ + (CORE_EVNTSEL_EVENT_MASK | \ + CORE_EVNTSEL_UNIT_MASK | \ + CORE_EVNTSEL_EDGE_MASK | \ + CORE_EVNTSEL_INV_MASK | \ + CORE_EVNTSEL_REG_MASK) + + return hw_event & CORE_EVNTSEL_MASK; +} + +static const u64 amd_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ + [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ + [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ + [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +/* + * AMD Performance Monitor K7 and later. + */ +static const u64 amd_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, +}; + +static u64 amd_pmu_event_map(int hw_event) +{ + return amd_perfmon_event_map[hw_event]; +} + +static u64 amd_pmu_raw_event(u64 hw_event) +{ +#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL +#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL +#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL +#define K7_EVNTSEL_INV_MASK 0x000800000ULL +#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL + +#define K7_EVNTSEL_MASK \ + (K7_EVNTSEL_EVENT_MASK | \ + K7_EVNTSEL_UNIT_MASK | \ + K7_EVNTSEL_EDGE_MASK | \ + K7_EVNTSEL_INV_MASK | \ + K7_EVNTSEL_REG_MASK) + + return hw_event & K7_EVNTSEL_MASK; +} + +/* + * Propagate event elapsed time into the generic event. + * Can only be executed on the CPU where the event is active. + * Returns the delta events processed. + */ +static u64 +x86_perf_event_update(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + int shift = 64 - x86_pmu.event_bits; + u64 prev_raw_count, new_raw_count; + s64 delta; + + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + + /* + * Careful: an NMI might modify the previous event value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic event atomically: + */ +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + rdmsrl(hwc->event_base + idx, new_raw_count); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + atomic64_add(delta, &event->count); + atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static atomic_t active_events; +static DEFINE_MUTEX(pmc_reserve_mutex); + +static bool reserve_pmc_hardware(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int i; + + if (nmi_watchdog == NMI_LOCAL_APIC) + disable_lapic_nmi_watchdog(); + + for (i = 0; i < x86_pmu.num_events; i++) { + if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) + goto perfctr_fail; + } + + for (i = 0; i < x86_pmu.num_events; i++) { + if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) + goto eventsel_fail; + } +#endif + + return true; + +#ifdef CONFIG_X86_LOCAL_APIC +eventsel_fail: + for (i--; i >= 0; i--) + release_evntsel_nmi(x86_pmu.eventsel + i); + + i = x86_pmu.num_events; + +perfctr_fail: + for (i--; i >= 0; i--) + release_perfctr_nmi(x86_pmu.perfctr + i); + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); + + return false; +#endif +} + +static void release_pmc_hardware(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + int i; + + for (i = 0; i < x86_pmu.num_events; i++) { + release_perfctr_nmi(x86_pmu.perfctr + i); + release_evntsel_nmi(x86_pmu.eventsel + i); + } + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); +#endif +} + +static inline bool bts_available(void) +{ + return x86_pmu.enable_bts != NULL; +} + +static inline void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(unsigned long)ds), + (u32)((u64)(unsigned long)ds >> 32)); +} + +static inline void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_events, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_bts_hardware(void) +{ + int cpu; + + if (!bts_available()) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_events, cpu).ds = NULL; + + kfree((void *)(unsigned long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_bts_hardware(void) +{ + int cpu, err = 0; + + if (!bts_available()) + return 0; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + + err = -ENOMEM; + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) { + kfree(buffer); + break; + } + + ds->bts_buffer_base = (u64)(unsigned long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = + ds->bts_buffer_base + BTS_BUFFER_SIZE; + ds->bts_interrupt_threshold = + ds->bts_absolute_maximum - BTS_OVFL_TH; + + per_cpu(cpu_hw_events, cpu).ds = ds; + err = 0; + } + + if (err) + release_bts_hardware(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { + release_pmc_hardware(); + release_bts_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +static inline int x86_pmu_initialized(void) +{ + return x86_pmu.handle_irq != NULL; +} + +static inline int +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) +{ + unsigned int cache_type, cache_op, cache_result; + u64 config, val; + + config = attr->config; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + val = hw_cache_event_ids[cache_type][cache_op][cache_result]; + + if (val == 0) + return -ENOENT; + + if (val == -1) + return -EINVAL; + + hwc->config |= val; + + return 0; +} + +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + +/* + * Setup the hardware configuration for a given attr_type + */ +static int __hw_perf_event_init(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + u64 config; + int err; + + if (!x86_pmu_initialized()) + return -ENODEV; + + err = 0; + if (!atomic_inc_not_zero(&active_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&active_events) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + err = reserve_bts_hardware(); + } + if (!err) + atomic_inc(&active_events); + mutex_unlock(&pmc_reserve_mutex); + } + if (err) + return err; + + event->destroy = hw_perf_event_destroy; + + /* + * Generate PMC IRQs: + * (keep 'enabled' bit clear for now) + */ + hwc->config = ARCH_PERFMON_EVENTSEL_INT; + + /* + * Count user and OS events unless requested not to. + */ + if (!attr->exclude_user) + hwc->config |= ARCH_PERFMON_EVENTSEL_USR; + if (!attr->exclude_kernel) + hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + + if (!hwc->sample_period) { + hwc->sample_period = x86_pmu.max_period; + hwc->last_period = hwc->sample_period; + atomic64_set(&hwc->period_left, hwc->sample_period); + } else { + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * events (user-space has to fall back and + * sample via a hrtimer based software event): + */ + if (!x86_pmu.apic) + return -EOPNOTSUPP; + } + + /* + * Raw hw_event type provide the config in the hw_event structure + */ + if (attr->type == PERF_TYPE_RAW) { + hwc->config |= x86_pmu.raw_event(attr->config); + return 0; + } + + if (attr->type == PERF_TYPE_HW_CACHE) + return set_ext_hw_attr(hwc, attr); + + if (attr->config >= x86_pmu.max_events) + return -EINVAL; + + /* + * The generic map: + */ + config = x86_pmu.event_map(attr->config); + + if (config == 0) + return -ENOENT; + + if (config == -1LL) + return -EINVAL; + + /* + * Branch tracing: + */ + if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && + (hwc->sample_period == 1)) { + /* BTS is not supported by this architecture. */ + if (!bts_available()) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + return -EOPNOTSUPP; + } + + hwc->config |= config; + + return 0; +} + +static void p6_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void intel_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); +} + +static void amd_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + /* + * ensure we write the disable before we start disabling the + * events proper, so that amd_pmu_enable_event() does the + * right thing. + */ + barrier(); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + rdmsrl(MSR_K7_EVNTSEL0 + idx, val); + if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) + continue; + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } +} + +void hw_perf_disable(void) +{ + if (!x86_pmu_initialized()) + return; + return x86_pmu.disable_all(); +} + +static void p6_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long val; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void intel_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_event *event = + cpuc->events[X86_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!event)) + return; + + intel_pmu_enable_bts(event->hw.config); + } +} + +static void amd_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + u64 val; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + val = event->hw.config; + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } +} + +void hw_perf_enable(void) +{ + if (!x86_pmu_initialized()) + return; + x86_pmu.enable_all(); +} + +static inline u64 intel_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void intel_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + (void)checking_wrmsrl(hwc->config_base + idx, + hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); +} + +static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); +} + +static inline void +intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, mask; + + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + (void)checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static inline void +p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val = P6_NOP_EVENT; + + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + +static inline void +intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + intel_pmu_disable_bts(); + return; + } + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_disable_fixed(hwc, idx); + return; + } + + x86_pmu_disable_event(hwc, idx); +} + +static inline void +amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + x86_pmu_disable_event(hwc, idx); +} + +static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); + +/* + * Set the next IRQ period, based on the hwc->period_left value. + * To be called with the event disabled in hw: + */ +static int +x86_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int err, ret = 0; + + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + + /* + * If we are way outside a reasoable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + /* + * Quirk: certain CPUs dont like it if just 1 hw_event is left: + */ + if (unlikely(left < 2)) + left = 2; + + if (left > x86_pmu.max_period) + left = x86_pmu.max_period; + + per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; + + /* + * The hw event starts counting from this event offset, + * mark it to be able to extra future deltas: + */ + atomic64_set(&hwc->prev_count, (u64)-left); + + err = checking_wrmsrl(hwc->event_base + idx, + (u64)(-left) & x86_pmu.event_mask); + + perf_event_update_userpage(event); + + return ret; +} + +static inline void +intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, bits, mask; + int err; + + /* + * Enable IRQ generation (0x8), + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: + */ + bits = 0x8ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + ctrl_val |= bits; + err = checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + val = hwc->config; + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + + +static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + if (!__get_cpu_var(cpu_hw_events).enabled) + return; + + intel_pmu_enable_bts(hwc->config); + return; + } + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_enable_fixed(hwc, idx); + return; + } + + x86_pmu_enable_event(hwc, idx); +} + +static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->enabled) + x86_pmu_enable_event(hwc, idx); +} + +static int +fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +{ + unsigned int hw_event; + + hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely((hw_event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (hwc->sample_period == 1))) + return X86_PMC_IDX_FIXED_BTS; + + if (!x86_pmu.num_events_fixed) + return -1; + + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) + return X86_PMC_IDX_FIXED_INSTRUCTIONS; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) + return X86_PMC_IDX_FIXED_CPU_CYCLES; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) + return X86_PMC_IDX_FIXED_BUS_CYCLES; + + return -1; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = fixed_mode_idx(event, hwc); + if (idx == X86_PMC_IDX_FIXED_BTS) { + /* BTS is already occupied. */ + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + hwc->config_base = 0; + hwc->event_base = 0; + hwc->idx = idx; + } else if (idx >= 0) { + /* + * Try to get the fixed event, if that is already taken + * then try to get a generic event: + */ + if (test_and_set_bit(idx, cpuc->used_mask)) + goto try_generic; + + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + /* + * We set it so that event_base + idx in wrmsr/rdmsr maps to + * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: + */ + hwc->event_base = + MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + hwc->idx = idx; + } else { + idx = hwc->idx; + /* Try to get the previous generic event again */ + if (test_and_set_bit(idx, cpuc->used_mask)) { +try_generic: + idx = find_first_zero_bit(cpuc->used_mask, + x86_pmu.num_events); + if (idx == x86_pmu.num_events) + return -EAGAIN; + + set_bit(idx, cpuc->used_mask); + hwc->idx = idx; + } + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; + } + + perf_events_lapic_init(); + + x86_pmu.disable(hwc, idx); + + cpuc->events[idx] = event; + set_bit(idx, cpuc->active_mask); + + x86_perf_event_set_period(event, hwc, idx); + x86_pmu.enable(hwc, idx); + + perf_event_update_userpage(event); + + return 0; +} + +static void x86_pmu_unthrottle(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + + if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || + cpuc->events[hwc->idx] != event)) + return; + + x86_pmu.enable(hwc, hwc->idx); +} + +void perf_event_print_debug(void) +{ + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + struct cpu_hw_events *cpuc; + unsigned long flags; + int cpu, idx; + + if (!x86_pmu.num_events) + return; + + local_irq_save(flags); + + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_events, cpu); + + if (x86_pmu.version >= 2) { + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + + pr_info("\n"); + pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); + pr_info("CPU#%d: status: %016llx\n", cpu, status); + pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); + pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); + } + pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); + rdmsrl(x86_pmu.perfctr + idx, pmc_count); + + prev_left = per_cpu(pmc_prev_left[idx], cpu); + + pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", + cpu, idx, pmc_ctrl); + pr_info("CPU#%d: gen-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + pr_info("CPU#%d: gen-PMC%d left: %016llx\n", + cpu, idx, prev_left); + } + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); + + pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + } + local_irq_restore(flags); +} + +static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) +{ + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; + struct bts_record *at, *top; + struct perf_output_handle handle; + struct perf_event_header header; + struct perf_sample_data data; + struct pt_regs regs; + + if (!event) + return; + + if (!ds) + return; + + at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; + + if (top <= at) + return; + + ds->bts_index = ds->bts_buffer_base; + + + data.period = event->hw.last_period; + data.addr = 0; + regs.ip = 0; + + /* + * Prepare a generic sample, i.e. fill in the invariant fields. + * We will overwrite the from and to address before we output + * the sample. + */ + perf_prepare_sample(&header, &data, event, ®s); + + if (perf_output_begin(&handle, event, + header.size * (top - at), 1, 1)) + return; + + for (; at < top; at++) { + data.ip = at->from; + data.addr = at->to; + + perf_output_sample(&handle, &header, &data, event); + } + + perf_output_end(&handle); + + /* There's new data available. */ + event->hw.interrupts++; + event->pending_kill = POLL_IN; +} + +static void x86_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + /* + * Must be done before we disable, otherwise the nmi handler + * could reenable again: + */ + clear_bit(idx, cpuc->active_mask); + x86_pmu.disable(hwc, idx); + + /* + * Make sure the cleared pointer becomes visible before we + * (potentially) free the event: + */ + barrier(); + + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event, hwc, idx); + + /* Drain the remaining BTS records. */ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) + intel_pmu_drain_bts_buffer(cpuc); + + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + +/* + * Save and restart an expired event. Called by NMI contexts, + * so it has to be careful about preempting normal event ops: + */ +static int intel_pmu_save_and_restart(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + int ret; + + x86_perf_event_update(event, hwc, idx); + ret = x86_perf_event_set_period(event, hwc, idx); + + if (event->state == PERF_EVENT_STATE_ACTIVE) + intel_pmu_enable_event(hwc, idx); + + return ret; +} + +static void intel_pmu_reset(void) +{ + struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; + unsigned long flags; + int idx; + + if (!x86_pmu.num_events) + return; + + local_irq_save(flags); + + printk("clearing PMU state on CPU#%d\n", smp_processor_id()); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); + checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); + } + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + } + if (ds) + ds->bts_index = ds->bts_buffer_base; + + local_irq_restore(flags); +} + +static int p6_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + + val = x86_perf_event_update(event, hwc, idx); + if (val & (1ULL << (x86_pmu.event_bits - 1))) + continue; + + /* + * event overflow + */ + handled = 1; + data.period = event->hw.last_period; + + if (!x86_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + p6_pmu_disable_event(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static int intel_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int bit, loops; + u64 ack, status; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + perf_disable(); + intel_pmu_drain_bts_buffer(cpuc); + status = intel_pmu_get_status(); + if (!status) { + perf_enable(); + return 0; + } + + loops = 0; +again: + if (++loops > 100) { + WARN_ONCE(1, "perfevents: irq loop stuck!\n"); + perf_event_print_debug(); + intel_pmu_reset(); + perf_enable(); + return 1; + } + + inc_irq_stat(apic_perf_irqs); + ack = status; + for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + clear_bit(bit, (unsigned long *) &status); + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + data.period = event->hw.last_period; + + if (perf_event_overflow(event, 1, &data, regs)) + intel_pmu_disable_event(&event->hw, bit); + } + + intel_pmu_ack_status(ack); + + /* + * Repeat if there is more work to be done: + */ + status = intel_pmu_get_status(); + if (status) + goto again; + + perf_enable(); + + return 1; +} + +static int amd_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + data.addr = 0; + + cpuc = &__get_cpu_var(cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + + val = x86_perf_event_update(event, hwc, idx); + if (val & (1ULL << (x86_pmu.event_bits - 1))) + continue; + + /* + * event overflow + */ + handled = 1; + data.period = event->hw.last_period; + + if (!x86_perf_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 1, &data, regs)) + amd_pmu_disable_event(hwc, idx); + } + + if (handled) + inc_irq_stat(apic_perf_irqs); + + return handled; +} + +void smp_perf_pending_interrupt(struct pt_regs *regs) +{ + irq_enter(); + ack_APIC_irq(); + inc_irq_stat(apic_pending_irqs); + perf_event_do_pending(); + irq_exit(); +} + +void set_perf_event_pending(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + apic->send_IPI_self(LOCAL_PENDING_VECTOR); +#endif +} + +void perf_events_lapic_init(void) +{ +#ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) + return; + + /* + * Always use NMI for PMU + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif +} + +static int __kprobes +perf_event_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct pt_regs *regs; + + if (!atomic_read(&active_events)) + return NOTIFY_DONE; + + switch (cmd) { + case DIE_NMI: + case DIE_NMI_IPI: + break; + + default: + return NOTIFY_DONE; + } + + regs = args->regs; + +#ifdef CONFIG_X86_LOCAL_APIC + apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif + /* + * Can't rely on the handled return value to say it was our NMI, two + * events could trigger 'simultaneously' raising two back-to-back NMIs. + * + * If the first NMI handles both, the latter will be empty and daze + * the CPU. + */ + x86_pmu.handle_irq(regs); + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, + .next = NULL, + .priority = 1 +}; + +static struct x86_pmu p6_pmu = { + .name = "p6", + .handle_irq = p6_pmu_handle_irq, + .disable_all = p6_pmu_disable_all, + .enable_all = p6_pmu_enable_all, + .enable = p6_pmu_enable_event, + .disable = p6_pmu_disable_event, + .eventsel = MSR_P6_EVNTSEL0, + .perfctr = MSR_P6_PERFCTR0, + .event_map = p6_pmu_event_map, + .raw_event = p6_pmu_raw_event, + .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_events = 2, + /* + * Events have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a event for P6-like PMU is 32 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ + .event_bits = 32, + .event_mask = (1ULL << 32) - 1, +}; + +static struct x86_pmu intel_pmu = { + .name = "Intel", + .handle_irq = intel_pmu_handle_irq, + .disable_all = intel_pmu_disable_all, + .enable_all = intel_pmu_enable_all, + .enable = intel_pmu_enable_event, + .disable = intel_pmu_disable_event, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .raw_event = intel_pmu_raw_event, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL << 31) - 1, + .enable_bts = intel_pmu_enable_bts, + .disable_bts = intel_pmu_disable_bts, +}; + +static struct x86_pmu amd_pmu = { + .name = "AMD", + .handle_irq = amd_pmu_handle_irq, + .disable_all = amd_pmu_disable_all, + .enable_all = amd_pmu_enable_all, + .enable = amd_pmu_enable_event, + .disable = amd_pmu_disable_event, + .eventsel = MSR_K7_EVNTSEL0, + .perfctr = MSR_K7_PERFCTR0, + .event_map = amd_pmu_event_map, + .raw_event = amd_pmu_raw_event, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_events = 4, + .event_bits = 48, + .event_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, +}; + +static int p6_pmu_init(void) +{ + switch (boot_cpu_data.x86_model) { + case 1: + case 3: /* Pentium Pro */ + case 5: + case 6: /* Pentium II */ + case 7: + case 8: + case 11: /* Pentium III */ + break; + case 9: + case 13: + /* Pentium M */ + break; + default: + pr_cont("unsupported p6 CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + x86_pmu = p6_pmu; + + if (!cpu_has_apic) { + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); + x86_pmu.apic = 0; + } + + return 0; +} + +static int intel_pmu_init(void) +{ + union cpuid10_edx edx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int ebx; + int version; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + /* check for P6 processor family */ + if (boot_cpu_data.x86 == 6) { + return p6_pmu_init(); + } else { + return -ENODEV; + } + } + + /* + * Check whether the Architectural PerfMon supports + * Branch Misses Retired hw_event or not. + */ + cpuid(10, &eax.full, &ebx, &unused, &edx.full); + if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) + return -ENODEV; + + version = eax.split.version_id; + if (version < 2) + return -ENODEV; + + x86_pmu = intel_pmu; + x86_pmu.version = version; + x86_pmu.num_events = eax.split.num_events; + x86_pmu.event_bits = eax.split.bit_width; + x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; + + /* + * Quirk: v2 perfmon does not report fixed-purpose events, so + * assume at least 3 events: + */ + x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + + /* + * Install the hw-cache-events table: + */ + switch (boot_cpu_data.x86_model) { + case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 29: /* six-core 45 nm xeon "Dunnington" */ + memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Core2 events, "); + break; + default: + case 26: + memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Nehalem/Corei7 events, "); + break; + case 28: + memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + pr_cont("Atom events, "); + break; + } + return 0; +} + +static int amd_pmu_init(void) +{ + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + + x86_pmu = amd_pmu; + + /* Events are common for all AMDs */ + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; +} + +void __init init_hw_perf_events(void) +{ + int err; + + pr_info("Performance Events: "); + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + err = intel_pmu_init(); + break; + case X86_VENDOR_AMD: + err = amd_pmu_init(); + break; + default: + return; + } + if (err != 0) { + pr_cont("no PMU driver, software events only.\n"); + return; + } + + pr_cont("%s PMU driver.\n", x86_pmu.name); + + if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { + WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", + x86_pmu.num_events, X86_PMC_MAX_GENERIC); + x86_pmu.num_events = X86_PMC_MAX_GENERIC; + } + perf_event_mask = (1 << x86_pmu.num_events) - 1; + perf_max_events = x86_pmu.num_events; + + if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { + WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", + x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); + x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; + } + + perf_event_mask |= + ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; + x86_pmu.intel_ctrl = perf_event_mask; + + perf_events_lapic_init(); + register_die_notifier(&perf_event_nmi_notifier); + + pr_info("... version: %d\n", x86_pmu.version); + pr_info("... bit width: %d\n", x86_pmu.event_bits); + pr_info("... generic events: %d\n", x86_pmu.num_events); + pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); + pr_info("... max period: %016Lx\n", x86_pmu.max_period); + pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); + pr_info("... event mask: %016Lx\n", perf_event_mask); +} + +static inline void x86_pmu_read(struct perf_event *event) +{ + x86_perf_event_update(event, &event->hw, event->hw.idx); +} + +static const struct pmu pmu = { + .enable = x86_pmu_enable, + .disable = x86_pmu_disable, + .read = x86_pmu_read, + .unthrottle = x86_pmu_unthrottle, +}; + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + int err; + + err = __hw_perf_event_init(event); + if (err) { + if (event->destroy) + event->destroy(event); + return ERR_PTR(err); + } + + return &pmu; +} + +/* + * callchain support + */ + +static inline +void callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); +static DEFINE_PER_CPU(int, in_nmi_frame); + + +static void +backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + /* Ignore warnings */ +} + +static void backtrace_warning(void *data, char *msg) +{ + /* Ignore warnings */ +} + +static int backtrace_stack(void *data, char *name) +{ + per_cpu(in_nmi_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name); + + return 0; +} + +static void backtrace_address(void *data, unsigned long addr, int reliable) +{ + struct perf_callchain_entry *entry = data; + + if (per_cpu(in_nmi_frame, smp_processor_id())) + return; + + if (reliable) + callchain_store(entry, addr); +} + +static const struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; + +#include "../dumpstack.h" + +static void +perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->ip); + + dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); +} + +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; + + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; + + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); + + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + unsigned long bytes; + + bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); + + return bytes == sizeof(*frame); +} + +static void +perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + struct stack_frame frame; + const void __user *fp; + + if (!user_mode(regs)) + regs = task_pt_regs(current); + + fp = (void __user *)regs->bp; + + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, regs->ip); + + while (entry->nr < PERF_MAX_STACK_DEPTH) { + frame.next_frame = NULL; + frame.return_address = 0; + + if (!copy_stack_frame(fp, &frame)) + break; + + if ((unsigned long)fp < regs->sp) + break; + + callchain_store(entry, frame.return_address); + fp = frame.next_frame; + } +} + +static void +perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + int is_user; + + if (!regs) + return; + + is_user = user_mode(regs); + + if (!current || current->pid == 0) + return; + + if (is_user && current->state != TASK_RUNNING) + return; + + if (!is_user) + perf_callchain_kernel(regs, entry); + + if (current->mm) + perf_callchain_user(regs, entry); +} + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry; + + if (in_nmi()) + entry = &__get_cpu_var(pmc_nmi_entry); + else + entry = &__get_cpu_var(pmc_irq_entry); + + entry->nr = 0; + + perf_do_callchain(regs, entry); + + return entry; +} + +void hw_perf_event_setup_online(int cpu) +{ + init_debug_store_on_cpu(cpu); +} diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 392bea43b89..fab786f60ed 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -20,7 +20,7 @@ #include #include -#include +#include struct nmi_watchdog_ctlblk { unsigned int cccr_msr; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d59fe323807..681c3fda739 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1021,7 +1021,7 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt -#ifdef CONFIG_PERF_COUNTERS +#ifdef CONFIG_PERF_EVENTS apicinterrupt LOCAL_PENDING_VECTOR \ perf_pending_interrupt smp_perf_pending_interrupt #endif diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 300883112e3..40f30773fb2 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -208,7 +208,7 @@ static void __init apic_intr_init(void) alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); /* Performance monitoring interrupts: */ -# ifdef CONFIG_PERF_COUNTERS +# ifdef CONFIG_PERF_EVENTS alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); # endif diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d51321ddafd..0157cd26d7c 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -335,4 +335,4 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open + .long sys_perf_event_open diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 775a020990a..82728f2c6d5 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -10,7 +10,7 @@ #include /* max_low_pfn */ #include /* __kprobes, ... */ #include /* kmmio_handler, ... */ -#include /* perf_swcounter_event */ +#include /* perf_sw_event */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ @@ -1017,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1114,11 +1114,11 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 4899215999d..8eb05878554 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void) if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) { eax.split.version_id = 2; - eax.split.num_counters = 2; + eax.split.num_events = 2; eax.split.bit_width = 40; } - num_counters = eax.split.num_counters; + num_counters = eax.split.num_events; op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index b83776180c7..7b8e75d1608 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -13,7 +13,7 @@ #define OP_X86_MODEL_H #include -#include +#include struct op_msr { unsigned long addr; -- cgit v1.2.3