From d662ed26734473d4cb5f3d78cebfec8f9126e97c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 17:01:53 +1100 Subject: powerpc/perf_counter: Add perf_counter system call on powerpc ... with an empty/dummy asm/perf_counter.h so it builds. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/perf_counter.h | 10 ++++++++++ arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 3 ++- arch/powerpc/platforms/Kconfig.cputype | 1 + 4 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/include/asm/perf_counter.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h new file mode 100644 index 00000000000..59530ae1d53 --- /dev/null +++ b/arch/powerpc/include/asm/perf_counter.h @@ -0,0 +1,10 @@ +/* + * Performance counter support - PowerPC-specific definitions. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 803def23665..da300c4d288 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,3 +322,4 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) +SYSCALL(perf_counter_open) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e07d0c76ed7..7cef5afe89d 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -341,10 +341,11 @@ #define __NR_dup3 316 #define __NR_pipe2 317 #define __NR_inotify_init1 318 +#define __NR_perf_counter_open 319 #ifdef __KERNEL__ -#define __NR_syscalls 319 +#define __NR_syscalls 320 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3d0c776f888..94dd1fb9a00 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,6 +1,7 @@ config PPC64 bool "64-bit kernel" default n + select HAVE_PERF_COUNTERS help This option selects whether a 32-bit or a 64-bit kernel will be built. -- cgit v1.2.3 From 93a6d3ce6962044fe9badf528fed46b455d58292 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 16:52:19 +1100 Subject: powerpc: Provide a way to defer perf counter work until interrupts are enabled Because 64-bit powerpc uses lazy (soft) interrupt disabling, it is possible for a performance monitor exception to come in when the kernel thinks interrupts are disabled (i.e. when they are soft-disabled but hard-enabled). In such a situation the performance monitor exception handler might have some processing to do (such as process wakeups) which can't be done in what is effectively an NMI handler. This provides a way to defer that work until interrupts get enabled, either in raw_local_irq_restore() or by returning from an interrupt handler to code that had interrupts enabled. We have a per-processor flag that indicates that there is work pending to do when interrupts subsequently get re-enabled. This flag is checked in the interrupt return path and in raw_local_irq_restore(), and if it is set, perf_counter_do_pending() is called to do the pending work. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/hw_irq.h | 31 +++++++++++++++++++++++++++++++ arch/powerpc/include/asm/paca.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/entry_64.S | 9 +++++++++ arch/powerpc/kernel/irq.c | 10 ++++++++++ 5 files changed, 52 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index f75a5fc64d2..e10f151c3db 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -131,5 +131,36 @@ static inline int irqs_disabled_flags(unsigned long flags) */ struct hw_interrupt_type; +#ifdef CONFIG_PERF_COUNTERS +static inline unsigned long get_perf_counter_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, perf_counter_pending))); + return x; +} + +static inline void set_perf_counter_pending(int x) +{ + asm volatile("stb %0,%1(13)" : : + "r" (x), + "i" (offsetof(struct paca_struct, perf_counter_pending))); +} + +extern void perf_counter_do_pending(void); + +#else + +static inline unsigned long get_perf_counter_pending(void) +{ + return 0; +} + +static inline void set_perf_counter_pending(int x) {} +static inline void perf_counter_do_pending(void) {} +#endif /* CONFIG_PERF_COUNTERS */ + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 082b3aedf14..6ef05572301 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -99,6 +99,7 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ + u8 perf_counter_pending; /* PM interrupt while soft-disabled */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 661d07d2146..cea46290011 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -127,6 +127,7 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); + DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 383ed6eb008..f30b4e553c5 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); +#ifdef CONFIG_PERF_COUNTERS + /* check paca->perf_counter_pending if we're enabling ints */ + lbz r3,PACAPERFPEND(r13) + and. r3,r3,r5 + beq 27f + bl .perf_counter_do_pending +27: +#endif /* CONFIG_PERF_COUNTERS */ + /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ac222d0ab12..4efb886ea43 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,6 +104,13 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } +#ifdef CONFIG_PERF_COUNTERS +notrace void __weak perf_counter_do_pending(void) +{ + set_perf_counter_pending(0); +} +#endif + notrace void raw_local_irq_restore(unsigned long en) { /* @@ -135,6 +142,9 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } + if (get_perf_counter_pending()) + perf_counter_do_pending(); + /* * if (get_paca()->hard_enabled) return; * But again we need to take care that gcc gets hard_enabled directly -- cgit v1.2.3 From 4574910e5087085a1f330ff8373cee4503f5c77c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 20:21:55 +1100 Subject: powerpc/perf_counter: Add generic support for POWER-family PMU hardware This provides the architecture-specific functions needed to access PMU hardware on the 64-bit PowerPC processors. It has been designed for the IBM POWER family (POWER 4/4+/5/5+/6 and PPC970) but will hopefully also suit other 64-bit PowerPC machines (although probably not Cell given how different it is in this area). This doesn't include back-ends for any specific processors. This implements a system which allows back-ends to express the constraints that their hardware has on what events can be counted simultaneously. The constraints are expressed as a 64-bit mask + 64-bit value for each event, and the encoding is capable of expressing the constraints arising from having a set of multiplexers feeding an event bus, with some events being available through multiple multiplexer settings, such as we get on POWER4 and PPC970. Furthermore, the back-end can supply alternative event codes for each event, and the constraint checking code will try all possible combinations of alternative event codes to try to find a combination that will fit. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/perf_counter.h | 62 +++ arch/powerpc/kernel/Makefile | 1 + arch/powerpc/kernel/perf_counter.c | 754 ++++++++++++++++++++++++++++++++ 3 files changed, 817 insertions(+) create mode 100644 arch/powerpc/kernel/perf_counter.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 59530ae1d53..9d7ff6d7fb5 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -8,3 +8,65 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include + +#define MAX_HWCOUNTERS 8 +#define MAX_EVENT_ALTERNATIVES 8 + +/* + * This struct provides the constants and functions needed to + * describe the PMU on a particular POWER-family CPU. + */ +struct power_pmu { + int n_counter; + int max_alternatives; + u64 add_fields; + u64 test_adder; + int (*compute_mmcr)(unsigned int events[], int n_ev, + unsigned int hwc[], u64 mmcr[]); + int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); + int (*get_alternatives)(unsigned int event, unsigned int alt[]); + void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); + int n_generic; + int *generic_events; +}; + +extern struct power_pmu *ppmu; + +/* + * The power_pmu.get_constraint function returns a 64-bit value and + * a 64-bit mask that express the constraints between this event and + * other events. + * + * The value and mask are divided up into (non-overlapping) bitfields + * of three different types: + * + * Select field: this expresses the constraint that some set of bits + * in MMCR* needs to be set to a specific value for this event. For a + * select field, the mask contains 1s in every bit of the field, and + * the value contains a unique value for each possible setting of the + * MMCR* bits. The constraint checking code will ensure that two events + * that set the same field in their masks have the same value in their + * value dwords. + * + * Add field: this expresses the constraint that there can be at most + * N events in a particular class. A field of k bits can be used for + * N <= 2^(k-1) - 1. The mask has the most significant bit of the field + * set (and the other bits 0), and the value has only the least significant + * bit of the field set. In addition, the 'add_fields' and 'test_adder' + * in the struct power_pmu for this processor come into play. The + * add_fields value contains 1 in the LSB of the field, and the + * test_adder contains 2^(k-1) - 1 - N in the field. + * + * NAND field: this expresses the constraint that you may not have events + * in all of a set of classes. (For example, on PPC970, you can't select + * events from the FPU, ISU and IDU simultaneously, although any two are + * possible.) For N classes, the field is N+1 bits wide, and each class + * is assigned one bit from the least-significant N bits. The mask has + * only the most-significant bit set, and the value has only the bit + * for the event's class set. The test_adder has the least significant + * bit set in the field. + * + * If an event is not subject to the constraint expressed by a particular + * field, then it will have 0 in both the mask and value for that field. + */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1308a86e907..fde190bbb2b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c new file mode 100644 index 00000000000..c7d4c2966a5 --- /dev/null +++ b/arch/powerpc/kernel/perf_counter.c @@ -0,0 +1,754 @@ +/* + * Performance counter support - powerpc architecture code + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +struct cpu_hw_counters { + int n_counters; + int n_percpu; + int disabled; + int n_added; + struct perf_counter *counter[MAX_HWCOUNTERS]; + unsigned int events[MAX_HWCOUNTERS]; + u64 mmcr[3]; +}; +DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); + +struct power_pmu *ppmu; + +void perf_counter_print_debug(void) +{ +} + +/* + * Return 1 for a software counter, 0 for a hardware counter + */ +static inline int is_software_counter(struct perf_counter *counter) +{ + return !counter->hw_event.raw && counter->hw_event.type < 0; +} + +/* + * Read one performance monitor counter (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 1: + val = mfspr(SPRN_PMC1); + break; + case 2: + val = mfspr(SPRN_PMC2); + break; + case 3: + val = mfspr(SPRN_PMC3); + break; + case 4: + val = mfspr(SPRN_PMC4); + break; + case 5: + val = mfspr(SPRN_PMC5); + break; + case 6: + val = mfspr(SPRN_PMC6); + break; + case 7: + val = mfspr(SPRN_PMC7); + break; + case 8: + val = mfspr(SPRN_PMC8); + break; + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 1: + mtspr(SPRN_PMC1, val); + break; + case 2: + mtspr(SPRN_PMC2, val); + break; + case 3: + mtspr(SPRN_PMC3, val); + break; + case 4: + mtspr(SPRN_PMC4, val); + break; + case 5: + mtspr(SPRN_PMC5, val); + break; + case 6: + mtspr(SPRN_PMC6, val); + break; + case 7: + mtspr(SPRN_PMC7, val); + break; + case 8: + mtspr(SPRN_PMC8, val); + break; + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } +} + +/* + * Check if a set of events can all go on the PMU at once. + * If they can't, this will look at alternative codes for the events + * and see if any combination of alternative codes is feasible. + * The feasible set is returned in event[]. + */ +static int power_check_constraints(unsigned int event[], int n_ev) +{ + u64 mask, value, nv; + unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; + int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; + int i, j; + u64 addf = ppmu->add_fields; + u64 tadd = ppmu->test_adder; + + if (n_ev > ppmu->n_counter) + return -1; + + /* First see if the events will go on as-is */ + for (i = 0; i < n_ev; ++i) { + alternatives[i][0] = event[i]; + if (ppmu->get_constraint(event[i], &amasks[i][0], + &avalues[i][0])) + return -1; + choice[i] = 0; + } + value = mask = 0; + for (i = 0; i < n_ev; ++i) { + nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); + if ((((nv + tadd) ^ value) & mask) != 0 || + (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) + break; + value = nv; + mask |= amasks[i][0]; + } + if (i == n_ev) + return 0; /* all OK */ + + /* doesn't work, gather alternatives... */ + if (!ppmu->get_alternatives) + return -1; + for (i = 0; i < n_ev; ++i) { + n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]); + for (j = 1; j < n_alt[i]; ++j) + ppmu->get_constraint(alternatives[i][j], + &amasks[i][j], &avalues[i][j]); + } + + /* enumerate all possibilities and see if any will work */ + i = 0; + j = -1; + value = mask = nv = 0; + while (i < n_ev) { + if (j >= 0) { + /* we're backtracking, restore context */ + value = svalues[i]; + mask = smasks[i]; + j = choice[i]; + } + /* + * See if any alternative k for event i, + * where k > j, will satisfy the constraints. + */ + while (++j < n_alt[i]) { + nv = (value | avalues[i][j]) + + (value & avalues[i][j] & addf); + if ((((nv + tadd) ^ value) & mask) == 0 && + (((nv + tadd) ^ avalues[i][j]) + & amasks[i][j]) == 0) + break; + } + if (j >= n_alt[i]) { + /* + * No feasible alternative, backtrack + * to event i-1 and continue enumerating its + * alternatives from where we got up to. + */ + if (--i < 0) + return -1; + } else { + /* + * Found a feasible alternative for event i, + * remember where we got up to with this event, + * go on to the next event, and start with + * the first alternative for it. + */ + choice[i] = j; + svalues[i] = value; + smasks[i] = mask; + value = nv; + mask |= amasks[i][j]; + ++i; + j = -1; + } + } + + /* OK, we have a feasible combination, tell the caller the solution */ + for (i = 0; i < n_ev; ++i) + event[i] = alternatives[i][choice[i]]; + return 0; +} + +static void power_perf_read(struct perf_counter *counter) +{ + long val, delta, prev; + + if (!counter->hw.idx) + return; + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&counter->hw.prev_count); + barrier(); + val = read_pmc(counter->hw.idx); + } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); + + /* The counters are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &counter->count); + atomic64_sub(delta, &counter->hw.period_left); +} + +/* + * Disable all counters to prevent PMU interrupts and to allow + * counters to be added or removed. + */ +u64 hw_perf_save_disable(void) +{ + struct cpu_hw_counters *cpuhw; + unsigned long ret; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_counters); + + ret = cpuhw->disabled; + if (!ret) { + cpuhw->disabled = 1; + cpuhw->n_added = 0; + + /* + * Set the 'freeze counters' bit. + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the counters + * before we return. + */ + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); + mb(); + } + local_irq_restore(flags); + return ret; +} + +/* + * Re-enable all counters if disable == 0. + * If we were previously disabled and counters were added, then + * put the new config on the PMU. + */ +void hw_perf_restore(u64 disable) +{ + struct perf_counter *counter; + struct cpu_hw_counters *cpuhw; + unsigned long flags; + long i; + unsigned long val; + s64 left; + unsigned int hwc_index[MAX_HWCOUNTERS]; + + if (disable) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_counters); + cpuhw->disabled = 0; + + /* + * If we didn't change anything, or only removed counters, + * no need to recalculate MMCR* settings and reset the PMCs. + * Just reenable the PMU with the current MMCR* settings + * (possibly updated for removal of counters). + */ + if (!cpuhw->n_added) { + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + goto out; + } + + /* + * Compute MMCR* values for the new set of counters + */ + if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, + cpuhw->mmcr)) { + /* shouldn't ever get here */ + printk(KERN_ERR "oops compute_mmcr failed\n"); + goto out; + } + + /* + * Write the new configuration to MMCR* with the freeze + * bit set and set the hardware counters to their initial values. + * Then unfreeze the counters. + */ + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + | MMCR0_FC); + + /* + * Read off any pre-existing counters that need to move + * to another PMC. + */ + for (i = 0; i < cpuhw->n_counters; ++i) { + counter = cpuhw->counter[i]; + if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { + power_perf_read(counter); + write_pmc(counter->hw.idx, 0); + counter->hw.idx = 0; + } + } + + /* + * Initialize the PMCs for all the new and moved counters. + */ + for (i = 0; i < cpuhw->n_counters; ++i) { + counter = cpuhw->counter[i]; + if (counter->hw.idx) + continue; + val = 0; + if (counter->hw_event.irq_period) { + left = atomic64_read(&counter->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&counter->hw.prev_count, val); + counter->hw.idx = hwc_index[i] + 1; + write_pmc(counter->hw.idx, val); + } + mb(); + cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_counter *group, int max_count, + struct perf_counter *ctrs[], unsigned int *events) +{ + int n = 0; + struct perf_counter *counter; + + if (!is_software_counter(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + events[n++] = group->hw.config; + } + list_for_each_entry(counter, &group->sibling_list, list_entry) { + if (!is_software_counter(counter) && + counter->state != PERF_COUNTER_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = counter; + events[n++] = counter->hw.config; + } + } + return n; +} + +static void counter_sched_in(struct perf_counter *counter, int cpu) +{ + counter->state = PERF_COUNTER_STATE_ACTIVE; + counter->oncpu = cpu; + if (is_software_counter(counter)) + counter->hw_ops->enable(counter); +} + +/* + * Called to enable a whole group of counters. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + */ +int hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu) +{ + struct cpu_hw_counters *cpuhw; + long i, n, n0; + struct perf_counter *sub; + + cpuhw = &__get_cpu_var(cpu_hw_counters); + n0 = cpuhw->n_counters; + n = collect_events(group_leader, ppmu->n_counter - n0, + &cpuhw->counter[n0], &cpuhw->events[n0]); + if (n < 0) + return -EAGAIN; + if (power_check_constraints(cpuhw->events, n + n0)) + return -EAGAIN; + cpuhw->n_counters = n0 + n; + cpuhw->n_added += n; + + /* + * OK, this group can go on; update counter states etc., + * and enable any software counters + */ + for (i = n0; i < n0 + n; ++i) + cpuhw->counter[i]->hw.config = cpuhw->events[i]; + n = 1; + counter_sched_in(group_leader, cpu); + list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { + if (sub->state != PERF_COUNTER_STATE_OFF) { + counter_sched_in(sub, cpu); + ++n; + } + } + cpuctx->active_oncpu += n; + ctx->nr_active += n; + + return 1; +} + +/* + * Add a counter to the PMU. + * If all counters are not already frozen, then we disable and + * re-enable the PMU in order to get hw_perf_restore to do the + * actual work of reconfiguring the PMU. + */ +static int power_perf_enable(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuhw; + unsigned long flags; + u64 pmudis; + int n0; + int ret = -EAGAIN; + + local_irq_save(flags); + pmudis = hw_perf_save_disable(); + + /* + * Add the counter to the list (if there is room) + * and check whether the total set is still feasible. + */ + cpuhw = &__get_cpu_var(cpu_hw_counters); + n0 = cpuhw->n_counters; + if (n0 >= ppmu->n_counter) + goto out; + cpuhw->counter[n0] = counter; + cpuhw->events[n0] = counter->hw.config; + if (power_check_constraints(cpuhw->events, n0 + 1)) + goto out; + + counter->hw.config = cpuhw->events[n0]; + ++cpuhw->n_counters; + ++cpuhw->n_added; + + ret = 0; + out: + hw_perf_restore(pmudis); + local_irq_restore(flags); + return ret; +} + +/* + * Remove a counter from the PMU. + */ +static void power_perf_disable(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuhw; + long i; + u64 pmudis; + unsigned long flags; + + local_irq_save(flags); + pmudis = hw_perf_save_disable(); + + power_perf_read(counter); + + cpuhw = &__get_cpu_var(cpu_hw_counters); + for (i = 0; i < cpuhw->n_counters; ++i) { + if (counter == cpuhw->counter[i]) { + while (++i < cpuhw->n_counters) + cpuhw->counter[i-1] = cpuhw->counter[i]; + --cpuhw->n_counters; + ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); + write_pmc(counter->hw.idx, 0); + counter->hw.idx = 0; + break; + } + } + if (cpuhw->n_counters == 0) { + /* disable exceptions if no counters are running */ + cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + } + + hw_perf_restore(pmudis); + local_irq_restore(flags); +} + +struct hw_perf_counter_ops power_perf_ops = { + .enable = power_perf_enable, + .disable = power_perf_disable, + .read = power_perf_read +}; + +const struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter) +{ + unsigned long ev; + struct perf_counter *ctrs[MAX_HWCOUNTERS]; + unsigned int events[MAX_HWCOUNTERS]; + int n; + + if (!ppmu) + return NULL; + if ((s64)counter->hw_event.irq_period < 0) + return NULL; + ev = counter->hw_event.type; + if (!counter->hw_event.raw) { + if (ev >= ppmu->n_generic || + ppmu->generic_events[ev] == 0) + return NULL; + ev = ppmu->generic_events[ev]; + } + counter->hw.config_base = ev; + counter->hw.idx = 0; + + /* + * If this is in a group, check if it can go on with all the + * other hardware counters in the group. We assume the counter + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (counter->group_leader != counter) { + n = collect_events(counter->group_leader, ppmu->n_counter - 1, + ctrs, events); + if (n < 0) + return NULL; + } + events[n++] = ev; + if (power_check_constraints(events, n)) + return NULL; + + counter->hw.config = events[n - 1]; + atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); + return &power_perf_ops; +} + +/* + * Handle wakeups. + */ +void perf_counter_do_pending(void) +{ + int i; + struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); + struct perf_counter *counter; + + set_perf_counter_pending(0); + for (i = 0; i < cpuhw->n_counters; ++i) { + counter = cpuhw->counter[i]; + if (counter && counter->wakeup_pending) { + counter->wakeup_pending = 0; + wake_up(&counter->waitq); + } + } +} + +/* + * Record data for an irq counter. + * This function was lifted from the x86 code; maybe it should + * go in the core? + */ +static void perf_store_irq_data(struct perf_counter *counter, u64 data) +{ + struct perf_data *irqdata = counter->irqdata; + + if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { + irqdata->overrun++; + } else { + u64 *p = (u64 *) &irqdata->data[irqdata->len]; + + *p = data; + irqdata->len += sizeof(u64); + } +} + +/* + * Record all the values of the counters in a group + */ +static void perf_handle_group(struct perf_counter *counter) +{ + struct perf_counter *leader, *sub; + + leader = counter->group_leader; + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + if (sub != counter) + sub->hw_ops->read(sub); + perf_store_irq_data(counter, sub->hw_event.type); + perf_store_irq_data(counter, atomic64_read(&sub->count)); + } +} + +/* + * A counter has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_counter *counter, long val, + struct pt_regs *regs) +{ + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&counter->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &counter->count); + + /* + * See if the total period for this counter has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&counter->hw.period_left) - delta; + if (counter->hw_event.irq_period) { + if (left <= 0) { + left += counter->hw_event.irq_period; + if (left <= 0) + left = counter->hw_event.irq_period; + record = 1; + } + if (left < 0x80000000L) + val = 0x80000000L - left; + } + write_pmc(counter->hw.idx, val); + atomic64_set(&counter->hw.prev_count, val); + atomic64_set(&counter->hw.period_left, left); + + /* + * Finally record data if requested. + */ + if (record) { + switch (counter->hw_event.record_type) { + case PERF_RECORD_SIMPLE: + break; + case PERF_RECORD_IRQ: + perf_store_irq_data(counter, instruction_pointer(regs)); + counter->wakeup_pending = 1; + break; + case PERF_RECORD_GROUP: + perf_handle_group(counter); + counter->wakeup_pending = 1; + break; + } + } +} + +/* + * Performance monitor interrupt stuff + */ +static void perf_counter_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); + struct perf_counter *counter; + long val; + int need_wakeup = 0, found = 0; + + for (i = 0; i < cpuhw->n_counters; ++i) { + counter = cpuhw->counter[i]; + val = read_pmc(counter->hw.idx); + if ((int)val < 0) { + /* counter has overflowed */ + found = 1; + record_and_restart(counter, val, regs); + if (counter->wakeup_pending) + need_wakeup = 1; + } + } + + /* + * In case we didn't find and reset the counter that caused + * the interrupt, scan all counters and reset any that are + * negative, to avoid getting continual interrupts. + * Any that we processed in the previous loop will not be negative. + */ + if (!found) { + for (i = 0; i < ppmu->n_counter; ++i) { + val = read_pmc(i + 1); + if ((int)val < 0) + write_pmc(i + 1, 0); + } + } + + /* + * Reset MMCR0 to its normal value. This will set PMXE and + * clear FC (freeze counters) and PMAO (perf mon alert occurred) + * and thus allow interrupts to occur again. + * XXX might want to use MSR.PM to keep the counters frozen until + * we get back out of this interrupt. + */ + mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + + /* + * If we need a wakeup, check whether interrupts were soft-enabled + * when we took the interrupt. If they were, we can wake stuff up + * immediately; otherwise we'll have to set a flag and do the + * wakeup when interrupts get soft-enabled. + */ + if (need_wakeup) { + if (regs->softe) { + irq_enter(); + perf_counter_do_pending(); + irq_exit(); + } else { + set_perf_counter_pending(1); + } + } +} + +static int init_perf_counters(void) +{ + if (reserve_pmc_hardware(perf_counter_interrupt)) { + printk(KERN_ERR "Couldn't init performance monitor subsystem\n"); + return -EBUSY; + } + + return 0; +} + +arch_initcall(init_perf_counters); -- cgit v1.2.3 From 16b067993dee3dfde61b20027e0b168dc06201ee Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 10 Jan 2009 16:34:07 +1100 Subject: powerpc/perf_counter: Add support for PPC970 family This adds the back-end for the PMU on the PPC970 family. The PPC970 allows events from the ISU to be selected in two different ways. Rather than use alternative event codes to express this, we instead use a single encoding for ISU events and express the resulting constraint (that you can't select events from all three of FPU/IFU/VPU, ISU and IDU/STS at the same time, since they all come in through only 2 multiplexers) using a NAND constraint field, and work out which multiplexer is used for ISU events at compute_mmcr time. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/perf_counter.c | 13 ++ arch/powerpc/kernel/ppc970-pmu.c | 375 +++++++++++++++++++++++++++++++++++++ 3 files changed, 389 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/ppc970-pmu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fde190bbb2b..45798f6fb13 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index c7d4c2966a5..5561ecb02a4 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -741,13 +741,26 @@ static void perf_counter_interrupt(struct pt_regs *regs) } } +extern struct power_pmu ppc970_pmu; + static int init_perf_counters(void) { + unsigned long pvr; + if (reserve_pmc_hardware(perf_counter_interrupt)) { printk(KERN_ERR "Couldn't init performance monitor subsystem\n"); return -EBUSY; } + /* XXX should get this from cputable */ + pvr = mfspr(SPRN_PVR); + switch (PVR_VER(pvr)) { + case PV_970: + case PV_970FX: + case PV_970MP: + ppmu = &ppc970_pmu; + break; + } return 0; } diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c new file mode 100644 index 00000000000..c3256580be1 --- /dev/null +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -0,0 +1,375 @@ +/* + * Performance counter support for PPC970-family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for PPC970 + */ +#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_BYTE_SH 4 /* Byte number of event bus to use */ +#define PM_BYTE_MSK 3 +#define PM_PMCSEL_MSK 0xf + +/* Values in PM_UNIT field */ +#define PM_NONE 0 +#define PM_FPU 1 +#define PM_VPU 2 +#define PM_ISU 3 +#define PM_IFU 4 +#define PM_IDU 5 +#define PM_STS 6 +#define PM_LSU0 7 +#define PM_LSU1U 8 +#define PM_LSU1L 9 +#define PM_LASTUNIT 9 + +/* + * Bits in MMCR0 for PPC970 + */ +#define MMCR0_PMC1SEL_SH 8 +#define MMCR0_PMC2SEL_SH 1 +#define MMCR_PMCSEL_MSK 0x1f + +/* + * Bits in MMCR1 for PPC970 + */ +#define MMCR1_TTM0SEL_SH 62 +#define MMCR1_TTM1SEL_SH 59 +#define MMCR1_TTM3SEL_SH 53 +#define MMCR1_TTMSEL_MSK 3 +#define MMCR1_TD_CP_DBG0SEL_SH 50 +#define MMCR1_TD_CP_DBG1SEL_SH 48 +#define MMCR1_TD_CP_DBG2SEL_SH 46 +#define MMCR1_TD_CP_DBG3SEL_SH 44 +#define MMCR1_PMC1_ADDER_SEL_SH 39 +#define MMCR1_PMC2_ADDER_SEL_SH 38 +#define MMCR1_PMC6_ADDER_SEL_SH 37 +#define MMCR1_PMC5_ADDER_SEL_SH 36 +#define MMCR1_PMC8_ADDER_SEL_SH 35 +#define MMCR1_PMC7_ADDER_SEL_SH 34 +#define MMCR1_PMC3_ADDER_SEL_SH 33 +#define MMCR1_PMC4_ADDER_SEL_SH 32 +#define MMCR1_PMC3SEL_SH 27 +#define MMCR1_PMC4SEL_SH 22 +#define MMCR1_PMC5SEL_SH 17 +#define MMCR1_PMC6SEL_SH 12 +#define MMCR1_PMC7SEL_SH 7 +#define MMCR1_PMC8SEL_SH 2 + +static short mmcr1_adder_bits[8] = { + MMCR1_PMC1_ADDER_SEL_SH, + MMCR1_PMC2_ADDER_SEL_SH, + MMCR1_PMC3_ADDER_SEL_SH, + MMCR1_PMC4_ADDER_SEL_SH, + MMCR1_PMC5_ADDER_SEL_SH, + MMCR1_PMC6_ADDER_SEL_SH, + MMCR1_PMC7_ADDER_SEL_SH, + MMCR1_PMC8_ADDER_SEL_SH +}; + +/* + * Bits in MMCRA + */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * <><>[ >[ >[ >< >< >< >< ><><><><><><><><> + * T0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 + * + * T0 - TTM0 constraint + * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 + * + * T1 - TTM1 constraint + * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 + * + * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS + * 43: UC3 error 0x0800_0000_0000 + * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 + * 41: ISU events needed 0x0200_0000_0000 + * 40: IDU|STS events needed 0x0100_0000_0000 + * + * PS1 + * 39: PS1 error 0x0080_0000_0000 + * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 + * + * PS2 + * 35: PS2 error 0x0008_0000_0000 + * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 + * + * B0 + * 28-31: Byte 0 event source 0xf000_0000 + * Encoding as for the event code + * + * B1, B2, B3 + * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources + * + * P1 + * 15: P1 error 0x8000 + * 14-15: Count of events needing PMC1 + * + * P2..P8 + * 0-13: Count of events needing PMC2..PMC8 + */ + +/* Masks and values for using events from the various units */ +static u64 unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, + [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, + [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, + [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, + [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, + [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, +}; + +static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, unit, sh; + u64 mask = 0, value = 0; + int grp = -1; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 8) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + grp = ((pmc - 1) >> 1) & 1; + } + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit) { + if (unit > PM_LASTUNIT) + return -1; + mask |= unit_cons[unit][0]; + value |= unit_cons[unit][1]; + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + /* + * Bus events on bytes 0 and 2 can be counted + * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. + */ + if (!pmc) + grp = byte & 1; + /* Set byte lane select field */ + mask |= 0xfULL << (28 - 4 * byte); + value |= (u64)unit << (28 - 4 * byte); + } + if (grp == 0) { + /* increment PMC1/2/5/6 field */ + mask |= 0x8000000000ull; + value |= 0x1000000000ull; + } else if (grp == 1) { + /* increment PMC3/4/7/8 field */ + mask |= 0x800000000ull; + value |= 0x100000000ull; + } + *maskp = mask; + *valp = value; + return 0; +} + +static int p970_get_alternatives(unsigned int event, unsigned int alt[]) +{ + alt[0] = event; + + /* 2 alternatives for LSU empty */ + if (event == 0x2002 || event == 0x3002) { + alt[1] = event ^ 0x1000; + return 2; + } + + return 1; +} + +static int p970_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; + unsigned int pmc, unit, byte, psel; + unsigned int ttm, grp; + unsigned int pmc_inuse = 0; + unsigned int pmc_grp_use[2]; + unsigned char busbyte[4]; + unsigned char unituse[16]; + unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; + unsigned char ttmuse[2]; + unsigned char pmcsel[8]; + int i; + + if (n_ev > 8) + return -1; + + /* First pass to count resource use */ + pmc_grp_use[0] = pmc_grp_use[1] = 0; + memset(busbyte, 0, sizeof(busbyte)); + memset(unituse, 0, sizeof(unituse)); + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + /* count 1/2/5/6 vs 3/4/7/8 use */ + ++pmc_grp_use[((pmc - 1) >> 1) & 1]; + } + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + if (unit) { + if (unit > PM_LASTUNIT) + return -1; + if (!pmc) + ++pmc_grp_use[byte & 1]; + if (busbyte[byte] && busbyte[byte] != unit) + return -1; + busbyte[byte] = unit; + unituse[unit] = 1; + } + } + if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) + return -1; + + /* + * Assign resources and set multiplexer selects. + * + * PM_ISU can go either on TTM0 or TTM1, but that's the only + * choice we have to deal with. + */ + if (unituse[PM_ISU] & + (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) + unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ + /* Set TTM[01]SEL fields. */ + ttmuse[0] = ttmuse[1] = 0; + for (i = PM_FPU; i <= PM_STS; ++i) { + if (!unituse[i]) + continue; + ttm = unitmap[i]; + ++ttmuse[(ttm >> 2) & 1]; + mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; + } + /* Check only one unit per TTMx */ + if (ttmuse[0] > 1 || ttmuse[1] > 1) + return -1; + + /* Set byte lane select fields and TTM3SEL. */ + for (byte = 0; byte < 4; ++byte) { + unit = busbyte[byte]; + if (!unit) + continue; + if (unit <= PM_STS) + ttm = (unitmap[unit] >> 2) & 1; + else if (unit == PM_LSU0) + ttm = 2; + else { + ttm = 3; + if (unit == PM_LSU1L && byte >= 2) + mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + } + mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + } + + /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ + memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + psel = event[i] & PM_PMCSEL_MSK; + if (!pmc) { + /* Bus event or any-PMC direct event */ + if (unit) + psel |= 0x10 | ((byte & 2) << 2); + else + psel |= 8; + for (pmc = 0; pmc < 8; ++pmc) { + if (pmc_inuse & (1 << pmc)) + continue; + grp = (pmc >> 1) & 1; + if (unit) { + if (grp == (byte & 1)) + break; + } else if (pmc_grp_use[grp] < 4) { + ++pmc_grp_use[grp]; + break; + } + } + pmc_inuse |= 1 << pmc; + } else { + /* Direct event */ + --pmc; + if (psel == 0 && (byte & 2)) + /* add events on higher-numbered bus */ + mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; + } + pmcsel[pmc] = psel; + hwc[i] = pmc; + } + for (pmc = 0; pmc < 2; ++pmc) + mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); + for (; pmc < 8; ++pmc) + mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); + if (pmc_inuse & 1) + mmcr0 |= MMCR0_PMC1CE; + if (pmc_inuse & 0xfe) + mmcr0 |= MMCR0_PMCjCE; + + mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ + + /* Return MMCRx values */ + mmcr[0] = mmcr0; + mmcr[1] = mmcr1; + mmcr[2] = mmcra; + return 0; +} + +static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + int shift, i; + + if (pmc <= 1) { + shift = MMCR0_PMC1SEL_SH - 7 * pmc; + i = 0; + } else { + shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); + i = 1; + } + /* + * Setting the PMCxSEL field to 0x08 disables PMC x. + */ + mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); +} + +static int ppc970_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 7, + [PERF_COUNT_INSTRUCTIONS] = 1, + [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ + [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ +}; + +struct power_pmu ppc970_pmu = { + .n_counter = 8, + .max_alternatives = 2, + .add_fields = 0x001100005555ull, + .test_adder = 0x013300000000ull, + .compute_mmcr = p970_compute_mmcr, + .get_constraint = p970_get_constraint, + .get_alternatives = p970_get_alternatives, + .disable_pmc = p970_disable_pmc, + .n_generic = ARRAY_SIZE(ppc970_generic_events), + .generic_events = ppc970_generic_events, +}; -- cgit v1.2.3 From f78628374a13bc150db77c6e02d4f2c0a7f932ef Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 21:05:35 +1100 Subject: powerpc/perf_counter: Add support for POWER6 This adds the back-end for the PMU on the POWER6 processor. Fortunately, the event selection hardware is somewhat simpler on POWER6 than on other POWER family processors, so the constraints fit into only 32 bits. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/perf_counter.c | 4 + arch/powerpc/kernel/power6-pmu.c | 283 +++++++++++++++++++++++++++++++++++++ 3 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/power6-pmu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 45798f6fb13..0ebf4d04d4b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power6-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5561ecb02a4..df3fe057dee 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -742,6 +742,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) } extern struct power_pmu ppc970_pmu; +extern struct power_pmu power6_pmu; static int init_perf_counters(void) { @@ -760,6 +761,9 @@ static int init_perf_counters(void) case PV_970MP: ppmu = &ppc970_pmu; break; + case 0x3e: + ppmu = &power6_pmu; + break; } return 0; } diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c new file mode 100644 index 00000000000..b1f61f3c97b --- /dev/null +++ b/arch/powerpc/kernel/power6-pmu.c @@ -0,0 +1,283 @@ +/* + * Performance counter support for POWER6 processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER6 + */ +#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0x7 +#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) +#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ +#define PM_UNIT_MSK 0xf +#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) +#define PM_LLAV 0x8000 /* Load lookahead match value */ +#define PM_LLA 0x4000 /* Load lookahead match enable */ +#define PM_BYTE_SH 12 /* Byte of event bus to use */ +#define PM_BYTE_MSK 3 +#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ +#define PM_SUBUNIT_MSK 7 +#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) +#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ +#define PM_BUSEVENT_MSK 0xf3700 + +/* + * Bits in MMCR1 for POWER6 + */ +#define MMCR1_TTM0SEL_SH 60 +#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) +#define MMCR1_TTMSEL_MSK 0xf +#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) +#define MMCR1_NESTSEL_SH 45 +#define MMCR1_NESTSEL_MSK 0x7 +#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) +#define MMCR1_PMC1_LLA ((u64)1 << 44) +#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) +#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) +#define MMCR1_PMC1SEL_SH 24 +#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) +#define MMCR1_PMCSEL_MSK 0xff + +/* + * Assign PMC numbers and compute MMCR1 value for a set of events + */ +static int p6_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr1 = 0; + int i; + unsigned int pmc, ev, b, u, s, psel; + unsigned int ttmset = 0; + unsigned int pmc_inuse = 0; + + if (n_ev > 4) + return -1; + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc_inuse & (1 << (pmc - 1))) + return -1; /* collision! */ + pmc_inuse |= 1 << (pmc - 1); + } + } + for (i = 0; i < n_ev; ++i) { + ev = event[i]; + pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + --pmc; + } else { + /* can go on any PMC; find a free one */ + for (pmc = 0; pmc < 4; ++pmc) + if (!(pmc_inuse & (1 << pmc))) + break; + pmc_inuse |= 1 << pmc; + } + hwc[i] = pmc; + psel = ev & PM_PMCSEL_MSK; + if (ev & PM_BUSEVENT_MSK) { + /* this event uses the event bus */ + b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; + u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; + /* check for conflict on this byte of event bus */ + if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) + return -1; + mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); + ttmset |= 1 << b; + if (u == 5) { + /* Nest events have a further mux */ + s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; + if ((ttmset & 0x10) && + MMCR1_NESTSEL(mmcr1) != s) + return -1; + ttmset |= 0x10; + mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; + } + if (0x30 <= psel && psel <= 0x3d) { + /* these need the PMCx_ADDR_SEL bits */ + if (b >= 2) + mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; + } + /* bus select values are different for PMC3/4 */ + if (pmc >= 2 && (psel & 0x90) == 0x80) + psel ^= 0x20; + } + if (ev & PM_LLA) { + mmcr1 |= MMCR1_PMC1_LLA >> pmc; + if (ev & PM_LLAV) + mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; + } + mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); + } + mmcr[0] = 0; + if (pmc_inuse & 1) + mmcr[0] = MMCR0_PMC1CE; + if (pmc_inuse & 0xe) + mmcr[0] |= MMCR0_PMCjCE; + mmcr[1] = mmcr1; + mmcr[2] = 0; + return 0; +} + +/* + * Layout of constraint bits: + * + * 0-1 add field: number of uses of PMC1 (max 1) + * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4 + * 8-10 select field: nest (subunit) event selector + * 16-19 select field: unit on byte 0 of event bus + * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 + */ +static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, sh; + unsigned int mask = 0, value = 0; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 4) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + } + if (event & PM_BUSEVENT_MSK) { + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + sh = byte * 4; + mask |= PM_UNIT_MSKS << sh; + value |= (event & PM_UNIT_MSKS) << sh; + if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { + mask |= PM_SUBUNIT_MSKS; + value |= event & PM_SUBUNIT_MSKS; + } + } + *maskp = mask; + *valp = value; + return 0; +} + +#define MAX_ALT 4 /* at most 4 alternatives for any event */ + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ + { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ + { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ + { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */ + { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ + { 0x10000e, 0x400010 }, /* PM_PURR */ + { 0x100010, 0x4000f8 }, /* PM_FLUSH */ + { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ + { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ + { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ + { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ + { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ + { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ + { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ + { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ + { 0x200012, 0x300012 }, /* PM_INST_DISP */ + { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ + { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ + { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ + { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ + { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ + { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ + { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ +}; + +/* + * This could be made more efficient with a binary search on + * a presorted list, if necessary + */ +static int find_alternatives_list(unsigned int event) +{ + int i, j; + unsigned int alt; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + return -1; + for (j = 0; j < MAX_ALT; ++j) { + alt = event_alternatives[i][j]; + if (!alt || event < alt) + break; + if (event == alt) + return i; + } + } + return -1; +} + +static int p6_get_alternatives(unsigned int event, unsigned int alt[]) +{ + int i, j; + unsigned int aevent, psel, pmc; + unsigned int nalt = 1; + + alt[0] = event; + + /* check the alternatives table */ + i = find_alternatives_list(event); + if (i >= 0) { + /* copy out alternatives from list */ + for (j = 0; j < MAX_ALT; ++j) { + aevent = event_alternatives[i][j]; + if (!aevent) + break; + if (aevent != event) + alt[nalt++] = aevent; + } + + } else { + /* Check for alternative ways of computing sum events */ + /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ + psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc && (psel == 0x32 || psel == 0x34)) + alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | + ((5 - pmc) << PM_PMC_SH); + + /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ + if (pmc && (psel == 0x38 || psel == 0x3a)) + alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | + ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); + } + + return nalt; +} + +static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + /* Set PMCxSEL to 0 to disable PMCx */ + mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); +} + +static int power6_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 0x1e, + [PERF_COUNT_INSTRUCTIONS] = 2, + [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ + [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ +}; + +struct power_pmu power6_pmu = { + .n_counter = 4, + .max_alternatives = MAX_ALT, + .add_fields = 0x55, + .test_adder = 0, + .compute_mmcr = p6_compute_mmcr, + .get_constraint = p6_get_constraint, + .get_alternatives = p6_get_alternatives, + .disable_pmc = p6_disable_pmc, + .n_generic = ARRAY_SIZE(power6_generic_events), + .generic_events = power6_generic_events, +}; -- cgit v1.2.3 From 01d0287f068de2934109ba9b989d8807526cccc2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 14 Jan 2009 13:44:19 +1100 Subject: powerpc/perf_counter: Make sure PMU gets enabled properly This makes sure that we call the platform-specific ppc_md.enable_pmcs function on each CPU before we try to use the PMU on that CPU. If the CPU goes off-line and then on-line, we need to do the enable_pmcs call again, so we use the hw_perf_counter_setup hook to ensure that. It gets called as each CPU comes online, but it isn't called on the CPU that is coming up, so this adds the CPU number as an argument to it (there were no non-empty instances of hw_perf_counter_setup before). This also arranges to set the pmcregs_in_use field of the lppaca (data structure shared with the hypervisor) on each CPU when we are using the PMU and clear it when we are not. This allows the hypervisor to optimize partition switches by not saving/restoring the PMU registers when we aren't using the PMU. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index df3fe057dee..85ad25923c2 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -15,6 +15,7 @@ #include #include #include +#include struct cpu_hw_counters { int n_counters; @@ -24,6 +25,7 @@ struct cpu_hw_counters { struct perf_counter *counter[MAX_HWCOUNTERS]; unsigned int events[MAX_HWCOUNTERS]; u64 mmcr[3]; + u8 pmcs_enabled; }; DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); @@ -261,6 +263,15 @@ u64 hw_perf_save_disable(void) cpuhw->disabled = 1; cpuhw->n_added = 0; + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + if (ppc_md.enable_pmcs) + ppc_md.enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + /* * Set the 'freeze counters' bit. * The barrier is to make sure the mtspr has been @@ -305,6 +316,8 @@ void hw_perf_restore(u64 disable) mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + if (cpuhw->n_counters == 0) + get_lppaca()->pmcregs_in_use = 0; goto out; } @@ -323,6 +336,7 @@ void hw_perf_restore(u64 disable) * bit set and set the hardware counters to their initial values. * Then unfreeze the counters. */ + get_lppaca()->pmcregs_in_use = 1; mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) @@ -741,6 +755,14 @@ static void perf_counter_interrupt(struct pt_regs *regs) } } +void hw_perf_counter_setup(int cpu) +{ + struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); + + memset(cpuhw, 0, sizeof(*cpuhw)); + cpuhw->mmcr[0] = MMCR0_FC; +} + extern struct power_pmu ppc970_pmu; extern struct power_pmu power6_pmu; -- cgit v1.2.3 From 3b6f9e5cb21964b7ce12bf81076f830885563ec8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 14 Jan 2009 21:00:30 +1100 Subject: perf_counter: Add support for pinned and exclusive counter groups Impact: New perf_counter features A pinned counter group is one that the user wants to have on the CPU whenever possible, i.e. whenever the associated task is running, for a per-task group, or always for a per-cpu group. If the system cannot satisfy that, it puts the group into an error state where it is not scheduled any more and reads from it return EOF (i.e. 0 bytes read). The group can be released from error state and made readable again using prctl(PR_TASK_PERF_COUNTERS_ENABLE). When we have finer-grained enable/disable controls on counters we'll be able to reset the error state on individual groups. An exclusive group is one that the user wants to be the only group using the CPU performance monitor hardware whenever it is on. The counter group scheduler will not schedule an exclusive group if there are already other groups on the CPU and will not schedule other groups onto the CPU if there is an exclusive group scheduled (that statement does not apply to groups containing only software counters, which can always go on and which do not prevent an exclusive group from going on). With an exclusive group, we will be able to let users program PMU registers at a low level without the concern that those settings will perturb other measurements. Along the way this reorganizes things a little: - is_software_counter() is moved to perf_counter.h. - cpuctx->active_oncpu now records the number of hardware counters on the CPU, i.e. it now excludes software counters. Nothing was reading cpuctx->active_oncpu before, so this change is harmless. - A new cpuctx->exclusive field records whether we currently have an exclusive group on the CPU. - counter_sched_out moves higher up in perf_counter.c and gets called from __perf_counter_remove_from_context and __perf_counter_exit_task, where we used to have essentially the same code. - __perf_counter_sched_in now goes through the counter list twice, doing the pinned counters in the first loop and the non-pinned counters in the second loop, in order to give the pinned counters the best chance to be scheduled in. Note that only a group leader can be exclusive or pinned, and that attribute applies to the whole group. This avoids some awkwardness in some corner cases (e.g. where a group leader is closed and the other group members get added to the context list). If we want to relax that restriction later, we can, and it is easier to relax a restriction than to apply a new one. This doesn't yet handle the case where a pinned counter is inherited and goes into error state in the child - the error state is not propagated up to the parent when the child exits, and arguably it should. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 85ad25923c2..5b0211348c7 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -35,14 +35,6 @@ void perf_counter_print_debug(void) { } -/* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return !counter->hw_event.raw && counter->hw_event.type < 0; -} - /* * Read one performance monitor counter (PMC). */ @@ -443,6 +435,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, */ for (i = n0; i < n0 + n; ++i) cpuhw->counter[i]->hw.config = cpuhw->events[i]; + cpuctx->active_oncpu += n; n = 1; counter_sched_in(group_leader, cpu); list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { @@ -451,7 +444,6 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, ++n; } } - cpuctx->active_oncpu += n; ctx->nr_active += n; return 1; -- cgit v1.2.3 From 0475f9ea8e2cc030298908949e0d5da9f2fc2cfe Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 11 Feb 2009 14:35:35 +1100 Subject: perf_counters: allow users to count user, kernel and/or hypervisor events Impact: new perf_counter feature This extends the perf_counter_hw_event struct with bits that specify that events in user, kernel and/or hypervisor mode should not be counted (i.e. should be excluded), and adds code to program the PMU mode selection bits accordingly on x86 and powerpc. For software counters, we don't currently have the infrastructure to distinguish which mode an event occurs in, so we currently fail the counter initialization if the setting of the hw_event.exclude_* bits would require us to distinguish. Context switches and CPU migrations are currently considered to occur in kernel mode. On x86, this changes the previous policy that only root can count kernel events. Now non-root users can count kernel events or exclude them. Non-root users still can't use NMI events, though. On x86 we don't appear to have any way to control whether hypervisor events are counted or not, so hw_event.exclude_hv is ignored. On powerpc, the selection of whether to count events in user, kernel and/or hypervisor mode is PMU-wide, not per-counter, so this adds a check that the hw_event.exclude_* settings are the same as other events on the PMU. Counters being added to a group have to have the same settings as the other hardware counters in the group. Counters and groups can only be enabled in hw_perf_group_sched_in or power_perf_enable if they have the same settings as any other counters already on the PMU. If we are not running on a hypervisor, the exclude_hv setting is ignored (by forcing it to 0) since we can't ever get any hypervisor events. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 68 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5b0211348c7..bd6ba85beb5 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -16,6 +16,7 @@ #include #include #include +#include struct cpu_hw_counters { int n_counters; @@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev) return 0; } +/* + * Check if newly-added counters have consistent settings for + * exclude_{user,kernel,hv} with each other and any previously + * added counters. + */ +static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) +{ + int eu, ek, eh; + int i, n; + struct perf_counter *counter; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + eu = ctrs[0]->hw_event.exclude_user; + ek = ctrs[0]->hw_event.exclude_kernel; + eh = ctrs[0]->hw_event.exclude_hv; + if (n_prev == 0) + n_prev = 1; + for (i = n_prev; i < n; ++i) { + counter = ctrs[i]; + if (counter->hw_event.exclude_user != eu || + counter->hw_event.exclude_kernel != ek || + counter->hw_event.exclude_hv != eh) + return -EAGAIN; + } + return 0; +} + static void power_perf_read(struct perf_counter *counter) { long val, delta, prev; @@ -323,6 +354,20 @@ void hw_perf_restore(u64 disable) goto out; } + /* + * Add in MMCR0 freeze bits corresponding to the + * hw_event.exclude_* bits for the first counter. + * We have already checked that all counters have the + * same values for these bits as the first counter. + */ + counter = cpuhw->counter[0]; + if (counter->hw_event.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (counter->hw_event.exclude_kernel) + cpuhw->mmcr[0] |= MMCR0_FCS; + if (counter->hw_event.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + /* * Write the new configuration to MMCR* with the freeze * bit set and set the hardware counters to their initial values. @@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, &cpuhw->counter[n0], &cpuhw->events[n0]); if (n < 0) return -EAGAIN; + if (check_excludes(cpuhw->counter, n0, n)) + return -EAGAIN; if (power_check_constraints(cpuhw->events, n + n0)) return -EAGAIN; cpuhw->n_counters = n0 + n; @@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter) goto out; cpuhw->counter[n0] = counter; cpuhw->events[n0] = counter->hw.config; + if (check_excludes(cpuhw->counter, n0, 1)) + goto out; if (power_check_constraints(cpuhw->events, n0 + 1)) goto out; @@ -554,6 +603,17 @@ hw_perf_counter_init(struct perf_counter *counter) counter->hw.config_base = ev; counter->hw.idx = 0; + /* + * If we are not running on a hypervisor, force the + * exclude_hv bit to 0 so that we don't care what + * the user set it to. This also means that we don't + * set the MMCR0_FCHV bit, which unconditionally freezes + * the counters on the PPC970 variants used in Apple G5 + * machines (since MSR.HV is always 1 on those machines). + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + counter->hw_event.exclude_hv = 0; + /* * If this is in a group, check if it can go on with all the * other hardware counters in the group. We assume the counter @@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter) if (n < 0) return NULL; } - events[n++] = ev; - if (power_check_constraints(events, n)) + events[n] = ev; + if (check_excludes(ctrs, n, 1)) + return NULL; + if (power_check_constraints(events, n + 1)) return NULL; - counter->hw.config = events[n - 1]; + counter->hw.config = events[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); return &power_perf_ops; } -- cgit v1.2.3 From d095cd46dac104e4d2a4967c7c19b55a12f78240 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 23 Feb 2009 23:01:28 +1100 Subject: perfcounters/powerpc: Make exclude_kernel bit work on Apple G5 processors Currently, setting hw_event.exclude_kernel does nothing on the PPC970 variants used in Apple G5 machines, because they have the HV (hypervisor) bit in the MSR forced to 1, so as far as the PMU is concerned, the kernel runs in hypervisor mode. Thus we have to use the MMCR0_FCHV (freeze counters in hypervisor mode) bit rather than the MMCR0_FCS (freeze counters in supervisor mode) bit. This checks the MSR.HV bit at startup, and if it is set, we set the freeze_counters_kernel variable to MMCR0_FCHV (it was initialized to MMCR0_FCS). We then use that whenever we need to exclude kernel events. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bd6ba85beb5..6e27913ec0d 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -32,6 +32,15 @@ DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); struct power_pmu *ppmu; +/* + * Normally, to ignore kernel events we set the FCS (freeze counters + * in supervisor mode) bit in MMCR0, but if the kernel runs with the + * hypervisor bit set in the MSR, or if we are running on a processor + * where the hypervisor bit is forced to 1 (as on Apple G5 processors), + * then we need to use the FCHV bit to ignore kernel events. + */ +static unsigned int freeze_counters_kernel = MMCR0_FCS; + void perf_counter_print_debug(void) { } @@ -364,7 +373,7 @@ void hw_perf_restore(u64 disable) if (counter->hw_event.exclude_user) cpuhw->mmcr[0] |= MMCR0_FCP; if (counter->hw_event.exclude_kernel) - cpuhw->mmcr[0] |= MMCR0_FCS; + cpuhw->mmcr[0] |= freeze_counters_kernel; if (counter->hw_event.exclude_hv) cpuhw->mmcr[0] |= MMCR0_FCHV; @@ -606,10 +615,7 @@ hw_perf_counter_init(struct perf_counter *counter) /* * If we are not running on a hypervisor, force the * exclude_hv bit to 0 so that we don't care what - * the user set it to. This also means that we don't - * set the MMCR0_FCHV bit, which unconditionally freezes - * the counters on the PPC970 variants used in Apple G5 - * machines (since MSR.HV is always 1 on those machines). + * the user set it to. */ if (!firmware_has_feature(FW_FEATURE_LPAR)) counter->hw_event.exclude_hv = 0; @@ -841,6 +847,13 @@ static int init_perf_counters(void) ppmu = &power6_pmu; break; } + + /* + * Use FCHV to ignore kernel events if MSR.HV is set. + */ + if (mfmsr() & MSR_HV) + freeze_counters_kernel = MMCR0_FCHV; + return 0; } -- cgit v1.2.3 From 742bd95ba96e19b3f7196c3a0834ebc17c8ba006 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 24 Feb 2009 11:33:56 +1100 Subject: perfcounters/powerpc: Add support for POWER5 processors This adds the back-end for the PMU on the POWER5 processor. This knows how to use the fixed-function PMC5 and PMC6 (instructions completed and run cycles). Unlike POWER6, PMC5/6 obey the freeze conditions and can generate interrupts, so their use doesn't impose any extra restrictions. POWER5+ is different and is not supported by this patch. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/Makefile | 3 +- arch/powerpc/kernel/perf_counter.c | 4 + arch/powerpc/kernel/power5-pmu.c | 475 +++++++++++++++++++++++++++++++++++++ 3 files changed, 481 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/power5-pmu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 7c941ec3b23..b4c6f466164 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,7 +94,8 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power6-pmu.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power5-pmu.o \ + power6-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 6e27913ec0d..112332d07fc 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -824,6 +824,7 @@ void hw_perf_counter_setup(int cpu) } extern struct power_pmu ppc970_pmu; +extern struct power_pmu power5_pmu; extern struct power_pmu power6_pmu; static int init_perf_counters(void) @@ -843,6 +844,9 @@ static int init_perf_counters(void) case PV_970MP: ppmu = &ppc970_pmu; break; + case PV_POWER5: + ppmu = &power5_pmu; + break; case 0x3e: ppmu = &power6_pmu; break; diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c new file mode 100644 index 00000000000..379ed1087cc --- /dev/null +++ b/arch/powerpc/kernel/power5-pmu.c @@ -0,0 +1,475 @@ +/* + * Performance counter support for POWER5 (not POWER5++) processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER5 (not POWER5++) + */ +#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) +#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_BYTE_SH 12 /* Byte number of event bus to use */ +#define PM_BYTE_MSK 7 +#define PM_GRS_SH 8 /* Storage subsystem mux select */ +#define PM_GRS_MSK 7 +#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ +#define PM_PMCSEL_MSK 0x7f + +/* Values in PM_UNIT field */ +#define PM_FPU 0 +#define PM_ISU0 1 +#define PM_IFU 2 +#define PM_ISU1 3 +#define PM_IDU 4 +#define PM_ISU0_ALT 6 +#define PM_GRS 7 +#define PM_LSU0 8 +#define PM_LSU1 0xc +#define PM_LASTUNIT 0xc + +/* + * Bits in MMCR1 for POWER5 + */ +#define MMCR1_TTM0SEL_SH 62 +#define MMCR1_TTM1SEL_SH 60 +#define MMCR1_TTM2SEL_SH 58 +#define MMCR1_TTM3SEL_SH 56 +#define MMCR1_TTMSEL_MSK 3 +#define MMCR1_TD_CP_DBG0SEL_SH 54 +#define MMCR1_TD_CP_DBG1SEL_SH 52 +#define MMCR1_TD_CP_DBG2SEL_SH 50 +#define MMCR1_TD_CP_DBG3SEL_SH 48 +#define MMCR1_GRS_L2SEL_SH 46 +#define MMCR1_GRS_L2SEL_MSK 3 +#define MMCR1_GRS_L3SEL_SH 44 +#define MMCR1_GRS_L3SEL_MSK 3 +#define MMCR1_GRS_MCSEL_SH 41 +#define MMCR1_GRS_MCSEL_MSK 7 +#define MMCR1_GRS_FABSEL_SH 39 +#define MMCR1_GRS_FABSEL_MSK 3 +#define MMCR1_PMC1_ADDER_SEL_SH 35 +#define MMCR1_PMC2_ADDER_SEL_SH 34 +#define MMCR1_PMC3_ADDER_SEL_SH 33 +#define MMCR1_PMC4_ADDER_SEL_SH 32 +#define MMCR1_PMC1SEL_SH 25 +#define MMCR1_PMC2SEL_SH 17 +#define MMCR1_PMC3SEL_SH 9 +#define MMCR1_PMC4SEL_SH 1 +#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) +#define MMCR1_PMCSEL_MSK 0x7f + +/* + * Bits in MMCRA + */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> + * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 + * + * T0 - TTM0 constraint + * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 + * + * T1 - TTM1 constraint + * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 + * + * NC - number of counters + * 51: NC error 0x0008_0000_0000_0000 + * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 + * + * G0..G3 - GRS mux constraints + * 46-47: GRS_L2SEL value + * 44-45: GRS_L3SEL value + * 41-44: GRS_MCSEL value + * 39-40: GRS_FABSEL value + * Note that these match up with their bit positions in MMCR1 + * + * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS + * 37: UC3 error 0x20_0000_0000 + * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 + * 35: ISU0 events needed 0x08_0000_0000 + * 34: IDU|GRS events needed 0x04_0000_0000 + * + * PS1 + * 33: PS1 error 0x2_0000_0000 + * 31-32: count of events needing PMC1/2 0x1_8000_0000 + * + * PS2 + * 30: PS2 error 0x4000_0000 + * 28-29: count of events needing PMC3/4 0x3000_0000 + * + * B0 + * 24-27: Byte 0 event source 0x0f00_0000 + * Encoding as for the event code + * + * B1, B2, B3 + * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources + * + * P1..P6 + * 0-11: Count of events needing PMC1..PMC6 + */ + +static const int grsel_shift[8] = { + MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, + MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, + MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH +}; + +/* Masks and values for using events from the various units */ +static u64 unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, + [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, + [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, + [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, + [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, + [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, +}; + +static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, unit, sh; + int bit, fmask; + u64 mask = 0, value = 0; + int grp = -1; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 6) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + if (pmc <= 4) + grp = (pmc - 1) >> 1; + else if (event != 0x500009 && event != 0x600005) + return -1; + } + if (event & PM_BUSEVENT_MSK) { + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit > PM_LASTUNIT) + return -1; + if (unit == PM_ISU0_ALT) + unit = PM_ISU0; + mask |= unit_cons[unit][0]; + value |= unit_cons[unit][1]; + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + if (byte >= 4) { + if (unit != PM_LSU1) + return -1; + /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ + ++unit; + byte &= 3; + } + if (unit == PM_GRS) { + bit = event & 7; + fmask = (bit == 6)? 7: 3; + sh = grsel_shift[bit]; + mask |= (u64)fmask << sh; + value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; + } + /* + * Bus events on bytes 0 and 2 can be counted + * on PMC1/2; bytes 1 and 3 on PMC3/4. + */ + if (!pmc) + grp = byte & 1; + /* Set byte lane select field */ + mask |= 0xfULL << (24 - 4 * byte); + value |= (u64)unit << (24 - 4 * byte); + } + if (grp == 0) { + /* increment PMC1/2 field */ + mask |= 0x200000000ull; + value |= 0x080000000ull; + } else if (grp == 1) { + /* increment PMC3/4 field */ + mask |= 0x40000000ull; + value |= 0x10000000ull; + } + if (pmc < 5) { + /* need a counter from PMC1-4 set */ + mask |= 0x8000000000000ull; + value |= 0x1000000000000ull; + } + *maskp = mask; + *valp = value; + return 0; +} + +#define MAX_ALT 3 /* at most 3 alternatives for any event */ + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ + { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ + { 0x100005, 0x600005 }, /* PM_RUN_CYC */ + { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ + { 0x300009, 0x400009 }, /* PM_INST_DISP */ +}; + +/* + * Scan the alternatives table for a match and return the + * index into the alternatives table if found, else -1. + */ +static int find_alternative(unsigned int event) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + break; + for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) + if (event == event_alternatives[i][j]) + return i; + } + return -1; +} + +static const unsigned char bytedecode_alternatives[4][4] = { + /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, + /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, + /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, + /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } +}; + +/* + * Some direct events for decodes of event bus byte 3 have alternative + * PMCSEL values on other counters. This returns the alternative + * event code for those that do, or -1 otherwise. + */ +static int find_alternative_bdecode(unsigned int event) +{ + int pmc, altpmc, pp, j; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc == 0 || pmc > 4) + return -1; + altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ + pp = event & PM_PMCSEL_MSK; + for (j = 0; j < 4; ++j) { + if (bytedecode_alternatives[pmc - 1][j] == pp) { + return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | + (altpmc << PM_PMC_SH) | + bytedecode_alternatives[altpmc - 1][j]; + } + } + return -1; +} + +static int power5_get_alternatives(unsigned int event, unsigned int alt[]) +{ + int i, j, ae, nalt = 1; + + alt[0] = event; + nalt = 1; + i = find_alternative(event); + if (i >= 0) { + for (j = 0; j < MAX_ALT; ++j) { + ae = event_alternatives[i][j]; + if (ae && ae != event) + alt[nalt++] = ae; + } + } else { + ae = find_alternative_bdecode(event); + if (ae > 0) + alt[nalt++] = ae; + } + return nalt; +} + +static int power5_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr1 = 0; + unsigned int pmc, unit, byte, psel; + unsigned int ttm, grp; + int i, isbus, bit, grsel; + unsigned int pmc_inuse = 0; + unsigned int pmc_grp_use[2]; + unsigned char busbyte[4]; + unsigned char unituse[16]; + int ttmuse; + + if (n_ev > 6) + return -1; + + /* First pass to count resource use */ + pmc_grp_use[0] = pmc_grp_use[1] = 0; + memset(busbyte, 0, sizeof(busbyte)); + memset(unituse, 0, sizeof(unituse)); + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 6) + return -1; + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + /* count 1/2 vs 3/4 use */ + if (pmc <= 4) + ++pmc_grp_use[(pmc - 1) >> 1]; + } + if (event[i] & PM_BUSEVENT_MSK) { + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + if (unit > PM_LASTUNIT) + return -1; + if (unit == PM_ISU0_ALT) + unit = PM_ISU0; + if (byte >= 4) { + if (unit != PM_LSU1) + return -1; + ++unit; + byte &= 3; + } + if (!pmc) + ++pmc_grp_use[byte & 1]; + if (busbyte[byte] && busbyte[byte] != unit) + return -1; + busbyte[byte] = unit; + unituse[unit] = 1; + } + } + if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) + return -1; + + /* + * Assign resources and set multiplexer selects. + * + * PM_ISU0 can go either on TTM0 or TTM1, but that's the only + * choice we have to deal with. + */ + if (unituse[PM_ISU0] & + (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { + unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ + unituse[PM_ISU0] = 0; + } + /* Set TTM[01]SEL fields. */ + ttmuse = 0; + for (i = PM_FPU; i <= PM_ISU1; ++i) { + if (!unituse[i]) + continue; + if (ttmuse++) + return -1; + mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; + } + ttmuse = 0; + for (; i <= PM_GRS; ++i) { + if (!unituse[i]) + continue; + if (ttmuse++) + return -1; + mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; + } + if (ttmuse > 1) + return -1; + + /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ + for (byte = 0; byte < 4; ++byte) { + unit = busbyte[byte]; + if (!unit) + continue; + if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { + /* get ISU0 through TTM1 rather than TTM0 */ + unit = PM_ISU0_ALT; + } else if (unit == PM_LSU1 + 1) { + /* select lower word of LSU1 for this byte */ + mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + } + ttm = unit >> 2; + mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + } + + /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + psel = event[i] & PM_PMCSEL_MSK; + isbus = event[i] & PM_BUSEVENT_MSK; + if (!pmc) { + /* Bus event or any-PMC direct event */ + for (pmc = 0; pmc < 4; ++pmc) { + if (pmc_inuse & (1 << pmc)) + continue; + grp = (pmc >> 1) & 1; + if (isbus) { + if (grp == (byte & 1)) + break; + } else if (pmc_grp_use[grp] < 2) { + ++pmc_grp_use[grp]; + break; + } + } + pmc_inuse |= 1 << pmc; + } else if (pmc <= 4) { + /* Direct event */ + --pmc; + if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) + /* add events on higher-numbered bus */ + mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + } else { + /* Instructions or run cycles on PMC5/6 */ + --pmc; + } + if (isbus && unit == PM_GRS) { + bit = psel & 7; + grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; + mmcr1 |= (u64)grsel << grsel_shift[bit]; + } + if (pmc <= 3) + mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); + hwc[i] = pmc; + } + + /* Return MMCRx values */ + mmcr[0] = 0; + if (pmc_inuse & 1) + mmcr[0] = MMCR0_PMC1CE; + if (pmc_inuse & 0x3e) + mmcr[0] |= MMCR0_PMCjCE; + mmcr[1] = mmcr1; + mmcr[2] = 0; + return 0; +} + +static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + if (pmc <= 3) + mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); +} + +static int power5_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 0xf, + [PERF_COUNT_INSTRUCTIONS] = 0x100009, + [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ + [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ +}; + +struct power_pmu power5_pmu = { + .n_counter = 6, + .max_alternatives = MAX_ALT, + .add_fields = 0x7000090000555ull, + .test_adder = 0x3000490000000ull, + .compute_mmcr = power5_compute_mmcr, + .get_constraint = power5_get_constraint, + .get_alternatives = power5_get_alternatives, + .disable_pmc = power5_disable_pmc, + .n_generic = ARRAY_SIZE(power5_generic_events), + .generic_events = power5_generic_events, +}; -- cgit v1.2.3 From f3dfd2656deb81a0addee4f4ceff66b50a387388 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 26 Feb 2009 22:43:46 +1100 Subject: perfcounters: fix a few minor cleanliness issues This fixes three issues noticed by Arnd Bergmann: - Add #ifdef __KERNEL__ and move some things around in perf_counter.h to make sure only the bits that userspace needs are exported to userspace. - Use __u64, __s64, __u32 types in the structs exported to userspace rather than u64, s64, u32. - Make the sys_perf_counter_open syscall available to the SPUs on Cell platforms. And one issue that I noticed in looking at the code again: - Wrap the perf_counter_open syscall with SYSCALL_DEFINE4 so we get the proper handling of int arguments on ppc64 (and some other 64-bit architectures). Reported-by: Arnd Bergmann Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/systbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 4c8095f6bec..d312eec8abb 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,4 +322,4 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) -SYSCALL(perf_counter_open) +SYSCALL_SPU(perf_counter_open) -- cgit v1.2.3 From 86028598de16538f02519141756ccf4accfc29a6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 5 Mar 2009 14:05:57 +1100 Subject: perfcounters/powerpc: fix oops with multiple counters in a group Impact: fix oops-causing bug This fixes a bug in the powerpc hw_perf_counter_init where the code didn't initialize ctrs[n] before passing the ctrs array to check_excludes, leading to possible oopses and other incorrect behaviour. This fixes it by initializing ctrs[n] correctly. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 112332d07fc..4fec112386f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -633,6 +633,7 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; } events[n] = ev; + ctrs[n] = counter; if (check_excludes(ctrs, n, 1)) return NULL; if (power_check_constraints(events, n + 1)) -- cgit v1.2.3 From aabbaa6036fd847c583f585c6bae82b5a033e6c7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 6 Mar 2009 16:27:10 +1100 Subject: perfcounters/powerpc: add support for POWER5+ processors Impact: more hardware support This adds the back-end for the PMU on the POWER5+ processors (i.e. GS, including GS DD3 aka POWER5++). This doesn't use the fixed-function PMC5 and PMC6 since they don't respect the freeze conditions and don't generate interrupts, as on POWER6. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/Makefile | 4 +- arch/powerpc/kernel/perf_counter.c | 4 + arch/powerpc/kernel/power5+-pmu.c | 452 +++++++++++++++++++++++++++++++++++++ 3 files changed, 458 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/kernel/power5+-pmu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b4c6f466164..49851e0d8fd 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,8 +94,8 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power5-pmu.o \ - power6-pmu.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o \ + power5-pmu.o power5+-pmu.o power6-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 4fec112386f..162f3981fa2 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -826,6 +826,7 @@ void hw_perf_counter_setup(int cpu) extern struct power_pmu ppc970_pmu; extern struct power_pmu power5_pmu; +extern struct power_pmu power5p_pmu; extern struct power_pmu power6_pmu; static int init_perf_counters(void) @@ -848,6 +849,9 @@ static int init_perf_counters(void) case PV_POWER5: ppmu = &power5_pmu; break; + case PV_POWER5p: + ppmu = &power5p_pmu; + break; case 0x3e: ppmu = &power6_pmu; break; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c new file mode 100644 index 00000000000..cec21ea65b0 --- /dev/null +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -0,0 +1,452 @@ +/* + * Performance counter support for POWER5 (not POWER5++) processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) + */ +#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) +#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_BYTE_SH 12 /* Byte number of event bus to use */ +#define PM_BYTE_MSK 7 +#define PM_GRS_SH 8 /* Storage subsystem mux select */ +#define PM_GRS_MSK 7 +#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ +#define PM_PMCSEL_MSK 0x7f + +/* Values in PM_UNIT field */ +#define PM_FPU 0 +#define PM_ISU0 1 +#define PM_IFU 2 +#define PM_ISU1 3 +#define PM_IDU 4 +#define PM_ISU0_ALT 6 +#define PM_GRS 7 +#define PM_LSU0 8 +#define PM_LSU1 0xc +#define PM_LASTUNIT 0xc + +/* + * Bits in MMCR1 for POWER5+ + */ +#define MMCR1_TTM0SEL_SH 62 +#define MMCR1_TTM1SEL_SH 60 +#define MMCR1_TTM2SEL_SH 58 +#define MMCR1_TTM3SEL_SH 56 +#define MMCR1_TTMSEL_MSK 3 +#define MMCR1_TD_CP_DBG0SEL_SH 54 +#define MMCR1_TD_CP_DBG1SEL_SH 52 +#define MMCR1_TD_CP_DBG2SEL_SH 50 +#define MMCR1_TD_CP_DBG3SEL_SH 48 +#define MMCR1_GRS_L2SEL_SH 46 +#define MMCR1_GRS_L2SEL_MSK 3 +#define MMCR1_GRS_L3SEL_SH 44 +#define MMCR1_GRS_L3SEL_MSK 3 +#define MMCR1_GRS_MCSEL_SH 41 +#define MMCR1_GRS_MCSEL_MSK 7 +#define MMCR1_GRS_FABSEL_SH 39 +#define MMCR1_GRS_FABSEL_MSK 3 +#define MMCR1_PMC1_ADDER_SEL_SH 35 +#define MMCR1_PMC2_ADDER_SEL_SH 34 +#define MMCR1_PMC3_ADDER_SEL_SH 33 +#define MMCR1_PMC4_ADDER_SEL_SH 32 +#define MMCR1_PMC1SEL_SH 25 +#define MMCR1_PMC2SEL_SH 17 +#define MMCR1_PMC3SEL_SH 9 +#define MMCR1_PMC4SEL_SH 1 +#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) +#define MMCR1_PMCSEL_MSK 0x7f + +/* + * Bits in MMCRA + */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * [ ><><>< ><> <><>[ > < >< >< >< ><><><><> + * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P4P3P2P1 + * + * NC - number of counters + * 51: NC error 0x0008_0000_0000_0000 + * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 + * + * G0..G3 - GRS mux constraints + * 46-47: GRS_L2SEL value + * 44-45: GRS_L3SEL value + * 41-44: GRS_MCSEL value + * 39-40: GRS_FABSEL value + * Note that these match up with their bit positions in MMCR1 + * + * T0 - TTM0 constraint + * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 + * + * T1 - TTM1 constraint + * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 + * + * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS + * 33: UC3 error 0x02_0000_0000 + * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 + * 31: ISU0 events needed 0x01_8000_0000 + * 30: IDU|GRS events needed 0x00_4000_0000 + * + * B0 + * 20-23: Byte 0 event source 0x00f0_0000 + * Encoding as for the event code + * + * B1, B2, B3 + * 16-19, 12-15, 8-11: Byte 1, 2, 3 event sources + * + * P4 + * 7: P1 error 0x80 + * 6-7: Count of events needing PMC4 + * + * P1..P3 + * 0-6: Count of events needing PMC1..PMC3 + */ + +static const int grsel_shift[8] = { + MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, + MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, + MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH +}; + +/* Masks and values for using events from the various units */ +static u64 unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, + [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, + [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, + [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, + [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, + [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, +}; + +static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, unit, sh; + int bit, fmask; + u64 mask = 0, value = 0; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 4) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + } + if (event & PM_BUSEVENT_MSK) { + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit > PM_LASTUNIT) + return -1; + if (unit == PM_ISU0_ALT) + unit = PM_ISU0; + mask |= unit_cons[unit][0]; + value |= unit_cons[unit][1]; + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + if (byte >= 4) { + if (unit != PM_LSU1) + return -1; + /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ + ++unit; + byte &= 3; + } + if (unit == PM_GRS) { + bit = event & 7; + fmask = (bit == 6)? 7: 3; + sh = grsel_shift[bit]; + mask |= (u64)fmask << sh; + value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; + } + /* Set byte lane select field */ + mask |= 0xfULL << (20 - 4 * byte); + value |= (u64)unit << (20 - 4 * byte); + } + mask |= 0x8000000000000ull; + value |= 0x1000000000000ull; + *maskp = mask; + *valp = value; + return 0; +} + +#define MAX_ALT 3 /* at most 3 alternatives for any event */ + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ + { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ + { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ + { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ + { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ + { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ + { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ + { 0x100009, 0x200009 }, /* PM_INST_CMPL */ + { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ + { 0x300009, 0x400009 }, /* PM_INST_DISP */ +}; + +/* + * Scan the alternatives table for a match and return the + * index into the alternatives table if found, else -1. + */ +static int find_alternative(unsigned int event) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + break; + for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) + if (event == event_alternatives[i][j]) + return i; + } + return -1; +} + +static const unsigned char bytedecode_alternatives[4][4] = { + /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, + /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, + /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, + /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } +}; + +/* + * Some direct events for decodes of event bus byte 3 have alternative + * PMCSEL values on other counters. This returns the alternative + * event code for those that do, or -1 otherwise. This also handles + * alternative PCMSEL values for add events. + */ +static int find_alternative_bdecode(unsigned int event) +{ + int pmc, altpmc, pp, j; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc == 0 || pmc > 4) + return -1; + altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ + pp = event & PM_PMCSEL_MSK; + for (j = 0; j < 4; ++j) { + if (bytedecode_alternatives[pmc - 1][j] == pp) { + return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | + (altpmc << PM_PMC_SH) | + bytedecode_alternatives[altpmc - 1][j]; + } + } + + /* new decode alternatives for power5+ */ + if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) + return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); + if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) + return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); + + /* alternative add event encodings */ + if (pp == 0x10 || pp == 0x28) + return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | + (altpmc << PM_PMC_SH); + + return -1; +} + +static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) +{ + int i, j, ae, nalt = 1; + + alt[0] = event; + nalt = 1; + i = find_alternative(event); + if (i >= 0) { + for (j = 0; j < MAX_ALT; ++j) { + ae = event_alternatives[i][j]; + if (ae && ae != event) + alt[nalt++] = ae; + } + } else { + ae = find_alternative_bdecode(event); + if (ae > 0) + alt[nalt++] = ae; + } + return nalt; +} + +static int power5p_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr1 = 0; + unsigned int pmc, unit, byte, psel; + unsigned int ttm; + int i, isbus, bit, grsel; + unsigned int pmc_inuse = 0; + unsigned char busbyte[4]; + unsigned char unituse[16]; + int ttmuse; + + if (n_ev > 4) + return -1; + + /* First pass to count resource use */ + memset(busbyte, 0, sizeof(busbyte)); + memset(unituse, 0, sizeof(unituse)); + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 4) + return -1; + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + } + if (event[i] & PM_BUSEVENT_MSK) { + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + if (unit > PM_LASTUNIT) + return -1; + if (unit == PM_ISU0_ALT) + unit = PM_ISU0; + if (byte >= 4) { + if (unit != PM_LSU1) + return -1; + ++unit; + byte &= 3; + } + if (busbyte[byte] && busbyte[byte] != unit) + return -1; + busbyte[byte] = unit; + unituse[unit] = 1; + } + } + + /* + * Assign resources and set multiplexer selects. + * + * PM_ISU0 can go either on TTM0 or TTM1, but that's the only + * choice we have to deal with. + */ + if (unituse[PM_ISU0] & + (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { + unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ + unituse[PM_ISU0] = 0; + } + /* Set TTM[01]SEL fields. */ + ttmuse = 0; + for (i = PM_FPU; i <= PM_ISU1; ++i) { + if (!unituse[i]) + continue; + if (ttmuse++) + return -1; + mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; + } + ttmuse = 0; + for (; i <= PM_GRS; ++i) { + if (!unituse[i]) + continue; + if (ttmuse++) + return -1; + mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; + } + if (ttmuse > 1) + return -1; + + /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ + for (byte = 0; byte < 4; ++byte) { + unit = busbyte[byte]; + if (!unit) + continue; + if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { + /* get ISU0 through TTM1 rather than TTM0 */ + unit = PM_ISU0_ALT; + } else if (unit == PM_LSU1 + 1) { + /* select lower word of LSU1 for this byte */ + mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + } + ttm = unit >> 2; + mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + } + + /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + psel = event[i] & PM_PMCSEL_MSK; + isbus = event[i] & PM_BUSEVENT_MSK; + if (!pmc) { + /* Bus event or any-PMC direct event */ + for (pmc = 0; pmc < 4; ++pmc) { + if (!(pmc_inuse & (1 << pmc))) + break; + } + if (pmc >= 4) + return -1; + pmc_inuse |= 1 << pmc; + } else { + /* Direct event */ + --pmc; + if (isbus && (byte & 2) && + (psel == 8 || psel == 0x10 || psel == 0x28)) + /* add events on higher-numbered bus */ + mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + } + if (isbus && unit == PM_GRS) { + bit = psel & 7; + grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; + mmcr1 |= (u64)grsel << grsel_shift[bit]; + } + if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) + /* select alternate byte lane */ + psel |= 0x10; + if (pmc <= 3) + mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); + hwc[i] = pmc; + } + + /* Return MMCRx values */ + mmcr[0] = 0; + if (pmc_inuse & 1) + mmcr[0] = MMCR0_PMC1CE; + if (pmc_inuse & 0x3e) + mmcr[0] |= MMCR0_PMCjCE; + mmcr[1] = mmcr1; + mmcr[2] = 0; + return 0; +} + +static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + if (pmc <= 3) + mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); +} + +static int power5p_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 0xf, + [PERF_COUNT_INSTRUCTIONS] = 0x100009, + [PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ + [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ +}; + +struct power_pmu power5p_pmu = { + .n_counter = 4, + .max_alternatives = MAX_ALT, + .add_fields = 0x7000000000055ull, + .test_adder = 0x3000040000000ull, + .compute_mmcr = power5p_compute_mmcr, + .get_constraint = power5p_get_constraint, + .get_alternatives = power5p_get_alternatives, + .disable_pmc = power5p_disable_pmc, + .n_generic = ARRAY_SIZE(power5p_generic_events), + .generic_events = power5p_generic_events, +}; -- cgit v1.2.3 From 880860e392d92c457e8116cdee39ec4d109174ee Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 6 Mar 2009 16:30:52 +1100 Subject: perfcounters/powerpc: add support for POWER4 processors Impact: more hardware support This adds the back-end for the PMU on the POWER4 and POWER4+ processors (GP and GQ). This is quite similar to the PPC970, with 8 PMCs, but has fewer events than the PPC970. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/Makefile | 2 +- arch/powerpc/kernel/perf_counter.c | 5 + arch/powerpc/kernel/power4-pmu.c | 557 +++++++++++++++++++++++++++++++++++++ 3 files changed, 563 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/power4-pmu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 49851e0d8fd..8e5e2c74971 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o \ +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ power5-pmu.o power5+-pmu.o power6-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 162f3981fa2..0e33d27cd46 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -824,6 +824,7 @@ void hw_perf_counter_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } +extern struct power_pmu power4_pmu; extern struct power_pmu ppc970_pmu; extern struct power_pmu power5_pmu; extern struct power_pmu power5p_pmu; @@ -841,6 +842,10 @@ static int init_perf_counters(void) /* XXX should get this from cputable */ pvr = mfspr(SPRN_PVR); switch (PVR_VER(pvr)) { + case PV_POWER4: + case PV_POWER4p: + ppmu = &power4_pmu; + break; case PV_970: case PV_970FX: case PV_970MP: diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c new file mode 100644 index 00000000000..1407b19ab61 --- /dev/null +++ b/arch/powerpc/kernel/power4-pmu.c @@ -0,0 +1,557 @@ +/* + * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER4 + */ +#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_LOWER_SH 6 +#define PM_LOWER_MSK 1 +#define PM_LOWER_MSKS 0x40 +#define PM_BYTE_SH 4 /* Byte number of event bus to use */ +#define PM_BYTE_MSK 3 +#define PM_PMCSEL_MSK 7 + +/* + * Unit code values + */ +#define PM_FPU 1 +#define PM_ISU1 2 +#define PM_IFU 3 +#define PM_IDU0 4 +#define PM_ISU1_ALT 6 +#define PM_ISU2 7 +#define PM_IFU_ALT 8 +#define PM_LSU0 9 +#define PM_LSU1 0xc +#define PM_GPS 0xf + +/* + * Bits in MMCR0 for POWER4 + */ +#define MMCR0_PMC1SEL_SH 8 +#define MMCR0_PMC2SEL_SH 1 +#define MMCR_PMCSEL_MSK 0x1f + +/* + * Bits in MMCR1 for POWER4 + */ +#define MMCR1_TTM0SEL_SH 62 +#define MMCR1_TTC0SEL_SH 61 +#define MMCR1_TTM1SEL_SH 59 +#define MMCR1_TTC1SEL_SH 58 +#define MMCR1_TTM2SEL_SH 56 +#define MMCR1_TTC2SEL_SH 55 +#define MMCR1_TTM3SEL_SH 53 +#define MMCR1_TTC3SEL_SH 52 +#define MMCR1_TTMSEL_MSK 3 +#define MMCR1_TD_CP_DBG0SEL_SH 50 +#define MMCR1_TD_CP_DBG1SEL_SH 48 +#define MMCR1_TD_CP_DBG2SEL_SH 46 +#define MMCR1_TD_CP_DBG3SEL_SH 44 +#define MMCR1_DEBUG0SEL_SH 43 +#define MMCR1_DEBUG1SEL_SH 42 +#define MMCR1_DEBUG2SEL_SH 41 +#define MMCR1_DEBUG3SEL_SH 40 +#define MMCR1_PMC1_ADDER_SEL_SH 39 +#define MMCR1_PMC2_ADDER_SEL_SH 38 +#define MMCR1_PMC6_ADDER_SEL_SH 37 +#define MMCR1_PMC5_ADDER_SEL_SH 36 +#define MMCR1_PMC8_ADDER_SEL_SH 35 +#define MMCR1_PMC7_ADDER_SEL_SH 34 +#define MMCR1_PMC3_ADDER_SEL_SH 33 +#define MMCR1_PMC4_ADDER_SEL_SH 32 +#define MMCR1_PMC3SEL_SH 27 +#define MMCR1_PMC4SEL_SH 22 +#define MMCR1_PMC5SEL_SH 17 +#define MMCR1_PMC6SEL_SH 12 +#define MMCR1_PMC7SEL_SH 7 +#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ + +static short mmcr1_adder_bits[8] = { + MMCR1_PMC1_ADDER_SEL_SH, + MMCR1_PMC2_ADDER_SEL_SH, + MMCR1_PMC3_ADDER_SEL_SH, + MMCR1_PMC4_ADDER_SEL_SH, + MMCR1_PMC5_ADDER_SEL_SH, + MMCR1_PMC6_ADDER_SEL_SH, + MMCR1_PMC7_ADDER_SEL_SH, + MMCR1_PMC8_ADDER_SEL_SH +}; + +/* + * Bits in MMCRA + */ +#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> + * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 + * \SMPL ||\TTC3SEL + * |\TTC_IFU_SEL + * \TTM2SEL0 + * + * SMPL - SAMPLE_ENABLE constraint + * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 + * + * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 + * 55: UC1 error 0x0080_0000_0000_0000 + * 54: FPU events needed 0x0040_0000_0000_0000 + * 53: ISU1 events needed 0x0020_0000_0000_0000 + * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 + * + * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 + * 51: UC2 error 0x0008_0000_0000_0000 + * 50: FPU events needed 0x0004_0000_0000_0000 + * 49: IFU events needed 0x0002_0000_0000_0000 + * 48: LSU0 events needed 0x0001_0000_0000_0000 + * + * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 + * 47: UC3 error 0x8000_0000_0000 + * 46: LSU0 events needed 0x4000_0000_0000 + * 45: IFU events needed 0x2000_0000_0000 + * 44: IDU0|ISU2 events needed 0x1000_0000_0000 + * 43: ISU1 events needed 0x0800_0000_0000 + * + * TTM2SEL0 + * 42: 0 = IDU0 events needed + * 1 = ISU2 events needed 0x0400_0000_0000 + * + * TTC_IFU_SEL + * 41: 0 = IFU.U events needed + * 1 = IFU.L events needed 0x0200_0000_0000 + * + * TTC3SEL + * 40: 0 = LSU1.U events needed + * 1 = LSU1.L events needed 0x0100_0000_0000 + * + * PS1 + * 39: PS1 error 0x0080_0000_0000 + * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 + * + * PS2 + * 35: PS2 error 0x0008_0000_0000 + * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 + * + * B0 + * 28-31: Byte 0 event source 0xf000_0000 + * 1 = FPU + * 2 = ISU1 + * 3 = IFU + * 4 = IDU0 + * 7 = ISU2 + * 9 = LSU0 + * c = LSU1 + * f = GPS + * + * B1, B2, B3 + * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources + * + * P8 + * 15: P8 error 0x8000 + * 14-15: Count of events needing PMC8 + * + * P1..P7 + * 0-13: Count of events needing PMC1..PMC7 + * + * Note: this doesn't allow events using IFU.U to be combined with events + * using IFU.L, though that is feasible (using TTM0 and TTM2). However + * there are no listed events for IFU.L (they are debug events not + * verified for performance monitoring) so this shouldn't cause a + * problem. + */ + +static struct unitinfo { + u64 value, mask; + int unit; + int lowerbit; +} p4_unitinfo[16] = { + [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, + [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, + [PM_ISU1_ALT] = + { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, + [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, + [PM_IFU_ALT] = + { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, + [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, + [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, + [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, + [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, + [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } +}; + +static unsigned char direct_marked_event[8] = { + (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ + (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ + (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ + (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ + (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ + (1<<3) | (1<<4) | (1<<5), + /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ + (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ + (1<<4), /* PMC8: PM_MRK_LSU_FIN */ +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int p4_marked_instr_event(unsigned int event) +{ + int pmc, psel, unit, byte, bit; + unsigned int mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if (pmc) { + if (direct_marked_event[pmc - 1] & (1 << psel)) + return 1; + if (psel == 0) /* add events */ + bit = (pmc <= 4)? pmc - 1: 8 - pmc; + else if (psel == 6) /* decode events */ + bit = 4; + else + return 0; + } else + bit = psel; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + mask = 0; + switch (unit) { + case PM_LSU1: + if (event & PM_LOWER_MSKS) + mask = 1 << 28; /* byte 7 bit 4 */ + else + mask = 6 << 24; /* byte 3 bits 1 and 2 */ + break; + case PM_LSU0: + /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ + mask = 0x083dff00; + } + return (mask >> (byte * 8 + bit)) & 1; +} + +static int p4_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, unit, lower, sh; + u64 mask = 0, value = 0; + int grp = -1; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 8) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + grp = ((pmc - 1) >> 1) & 1; + } + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + if (unit) { + lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; + + /* + * Bus events on bytes 0 and 2 can be counted + * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. + */ + if (!pmc) + grp = byte & 1; + + if (!p4_unitinfo[unit].unit) + return -1; + mask |= p4_unitinfo[unit].mask; + value |= p4_unitinfo[unit].value; + sh = p4_unitinfo[unit].lowerbit; + if (sh > 1) + value |= (u64)lower << sh; + else if (lower != sh) + return -1; + unit = p4_unitinfo[unit].unit; + + /* Set byte lane select field */ + mask |= 0xfULL << (28 - 4 * byte); + value |= (u64)unit << (28 - 4 * byte); + } + if (grp == 0) { + /* increment PMC1/2/5/6 field */ + mask |= 0x8000000000ull; + value |= 0x1000000000ull; + } else { + /* increment PMC3/4/7/8 field */ + mask |= 0x800000000ull; + value |= 0x100000000ull; + } + + /* Marked instruction events need sample_enable set */ + if (p4_marked_instr_event(event)) { + mask |= 1ull << 56; + value |= 1ull << 56; + } + + /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ + if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) + mask |= 1ull << 56; + + *maskp = mask; + *valp = value; + return 0; +} + +static unsigned int ppc_inst_cmpl[] = { + 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 +}; + +static int p4_get_alternatives(unsigned int event, unsigned int alt[]) +{ + int i, j, na; + + alt[0] = event; + na = 1; + + /* 2 possibilities for PM_GRP_DISP_REJECT */ + if (event == 0x8003 || event == 0x0224) { + alt[1] = event ^ (0x8003 ^ 0x0224); + return 2; + } + + /* 2 possibilities for PM_ST_MISS_L1 */ + if (event == 0x0c13 || event == 0x0c23) { + alt[1] = event ^ (0x0c13 ^ 0x0c23); + return 2; + } + + /* several possibilities for PM_INST_CMPL */ + for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { + if (event == ppc_inst_cmpl[i]) { + for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) + if (j != i) + alt[na++] = ppc_inst_cmpl[j]; + break; + } + } + + return na; +} + +static int p4_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; + unsigned int pmc, unit, byte, psel, lower; + unsigned int ttm, grp; + unsigned int pmc_inuse = 0; + unsigned int pmc_grp_use[2]; + unsigned char busbyte[4]; + unsigned char unituse[16]; + unsigned int unitlower = 0; + int i; + + if (n_ev > 8) + return -1; + + /* First pass to count resource use */ + pmc_grp_use[0] = pmc_grp_use[1] = 0; + memset(busbyte, 0, sizeof(busbyte)); + memset(unituse, 0, sizeof(unituse)); + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + /* count 1/2/5/6 vs 3/4/7/8 use */ + ++pmc_grp_use[((pmc - 1) >> 1) & 1]; + } + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; + if (unit) { + if (!pmc) + ++pmc_grp_use[byte & 1]; + if (unit == 6 || unit == 8) + /* map alt ISU1/IFU codes: 6->2, 8->3 */ + unit = (unit >> 1) - 1; + if (busbyte[byte] && busbyte[byte] != unit) + return -1; + busbyte[byte] = unit; + lower <<= unit; + if (unituse[unit] && lower != (unitlower & lower)) + return -1; + unituse[unit] = 1; + unitlower |= lower; + } + } + if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) + return -1; + + /* + * Assign resources and set multiplexer selects. + * + * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. + * Each TTMx can only select one unit, but since + * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, + * we have some choices. + */ + if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { + unituse[6] = 1; /* Move 2 to 6 */ + unituse[2] = 0; + } + if (unituse[3] & (unituse[1] | unituse[2])) { + unituse[8] = 1; /* Move 3 to 8 */ + unituse[3] = 0; + unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); + } + /* Check only one unit per TTMx */ + if (unituse[1] + unituse[2] + unituse[3] > 1 || + unituse[4] + unituse[6] + unituse[7] > 1 || + unituse[8] + unituse[9] > 1 || + (unituse[5] | unituse[10] | unituse[11] | + unituse[13] | unituse[14])) + return -1; + + /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ + mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; + mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; + mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; + + /* Set TTCxSEL fields. */ + if (unitlower & 0xe) + mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; + if (unitlower & 0xf0) + mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; + if (unitlower & 0xf00) + mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; + if (unitlower & 0x7000) + mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; + + /* Set byte lane select fields. */ + for (byte = 0; byte < 4; ++byte) { + unit = busbyte[byte]; + if (!unit) + continue; + if (unit == 0xf) { + /* special case for GPS */ + mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); + } else { + if (!unituse[unit]) + ttm = unit - 1; /* 2->1, 3->2 */ + else + ttm = unit >> 2; + mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); + } + } + + /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + psel = event[i] & PM_PMCSEL_MSK; + if (!pmc) { + /* Bus event or 00xxx direct event (off or cycles) */ + if (unit) + psel |= 0x10 | ((byte & 2) << 2); + for (pmc = 0; pmc < 8; ++pmc) { + if (pmc_inuse & (1 << pmc)) + continue; + grp = (pmc >> 1) & 1; + if (unit) { + if (grp == (byte & 1)) + break; + } else if (pmc_grp_use[grp] < 4) { + ++pmc_grp_use[grp]; + break; + } + } + pmc_inuse |= 1 << pmc; + } else { + /* Direct event */ + --pmc; + if (psel == 0 && (byte & 2)) + /* add events on higher-numbered bus */ + mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; + else if (psel == 6 && byte == 3) + /* seem to need to set sample_enable here */ + mmcra |= MMCRA_SAMPLE_ENABLE; + psel |= 8; + } + if (pmc <= 1) + mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); + else + mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); + if (pmc == 7) /* PMC8 */ + mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; + hwc[i] = pmc; + if (p4_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; + } + + if (pmc_inuse & 1) + mmcr0 |= MMCR0_PMC1CE; + if (pmc_inuse & 0xfe) + mmcr0 |= MMCR0_PMCjCE; + + mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ + + /* Return MMCRx values */ + mmcr[0] = mmcr0; + mmcr[1] = mmcr1; + mmcr[2] = mmcra; + return 0; +} + +static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + /* + * Setting the PMCxSEL field to 0 disables PMC x. + * (Note that pmc is 0-based here, not 1-based.) + */ + if (pmc <= 1) { + mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); + } else { + mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); + if (pmc == 7) + mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); + } +} + +static int p4_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 7, + [PERF_COUNT_INSTRUCTIONS] = 0x1001, + [PERF_COUNT_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ + [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ +}; + +struct power_pmu power4_pmu = { + .n_counter = 8, + .max_alternatives = 5, + .add_fields = 0x0000001100005555ull, + .test_adder = 0x0011083300000000ull, + .compute_mmcr = p4_compute_mmcr, + .get_constraint = p4_get_constraint, + .get_alternatives = p4_get_alternatives, + .disable_pmc = p4_disable_pmc, + .n_generic = ARRAY_SIZE(p4_generic_events), + .generic_events = p4_generic_events, +}; -- cgit v1.2.3 From 7dd1fcc258b65da718f01e4684a7b9244501a9fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:33 +0100 Subject: perf_counter: provide pagefault software events We use the generic software counter infrastructure to provide page fault events. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/powerpc/mm/fault.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 76993941cac..eda5b0ca4af 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an -- cgit v1.2.3 From ac17dc8e58f3069ea895cfff963adf98ff3cf6b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:34 +0100 Subject: perf_counter: provide major/minor page fault software events Provide separate sw counters for major and minor page faults. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/powerpc/mm/fault.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eda5b0ca4af..17bbf6f91fb 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -312,6 +312,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { preempt_disable(); @@ -319,8 +320,10 @@ good_area: preempt_enable(); } #endif - } else + } else { current->min_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + } up_read(&mm->mmap_sem); return 0; -- cgit v1.2.3 From b6c5a71da1477d261bc36254fe1f20d32b57598d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 16 Mar 2009 21:00:00 +1100 Subject: perf_counter: abstract wakeup flag setting in core to fix powerpc build Impact: build fix for powerpc Commit bd753921015e7905 ("perf_counter: software counter event infrastructure") introduced a use of TIF_PERF_COUNTERS into the core perfcounter code. This breaks the build on powerpc because we use a flag in a per-cpu area to signal wakeups on powerpc rather than a thread_info flag, because the thread_info flags have to be manipulated with atomic operations and are thus slower than per-cpu flags. This fixes the by changing the core to use an abstracted set_perf_counter_pending() function, which is defined on x86 to set the TIF_PERF_COUNTERS flag and on powerpc to set the per-cpu flag (paca->perf_counter_pending). It changes the previous powerpc definition of set_perf_counter_pending to not take an argument and adds a clear_perf_counter_pending, so as to simplify the definition on x86. On x86, set_perf_counter_pending() is defined as a macro. Defining it as a static inline in arch/x86/include/asm/perf_counters.h causes compile failures because gets included early in , and the definitions of set_tsk_thread_flag etc. are therefore not available in . (On powerpc this problem is avoided by defining set_perf_counter_pending etc. in .) Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/hw_irq.h | 14 +++++++++++--- arch/powerpc/kernel/irq.c | 11 +++-------- arch/powerpc/kernel/perf_counter.c | 3 +-- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b43076ff92c..cb32d571c9c 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -142,10 +142,17 @@ static inline unsigned long get_perf_counter_pending(void) return x; } -static inline void set_perf_counter_pending(int x) +static inline void set_perf_counter_pending(void) { asm volatile("stb %0,%1(13)" : : - "r" (x), + "r" (1), + "i" (offsetof(struct paca_struct, perf_counter_pending))); +} + +static inline void clear_perf_counter_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), "i" (offsetof(struct paca_struct, perf_counter_pending))); } @@ -158,7 +165,8 @@ static inline unsigned long get_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(int x) {} +static inline void set_perf_counter_pending(void) {} +static inline void clear_perf_counter_pending(void) {} static inline void perf_counter_do_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0d2e37c5773..469e9635ff0 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,13 +104,6 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -#ifdef CONFIG_PERF_COUNTERS -notrace void __weak perf_counter_do_pending(void) -{ - set_perf_counter_pending(0); -} -#endif - notrace void raw_local_irq_restore(unsigned long en) { /* @@ -142,8 +135,10 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) + if (get_perf_counter_pending()) { + clear_perf_counter_pending(); perf_counter_do_pending(); + } /* * if (get_paca()->hard_enabled) return; diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0e33d27cd46..5008762e8bf 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -653,7 +653,6 @@ void perf_counter_do_pending(void) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; - set_perf_counter_pending(0); for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter && counter->wakeup_pending) { @@ -811,7 +810,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) perf_counter_do_pending(); irq_exit(); } else { - set_perf_counter_pending(1); + set_perf_counter_pending(); } } } -- cgit v1.2.3 From b8e83514b64577b48bfb794fe85fcde40a9343ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:18 +0100 Subject: perf_counter: revamp syscall input ABI Impact: modify ABI The hardware/software classification in hw_event->type became a little strained due to the addition of tracepoint tracing. Instead split up the field and provide a type field to explicitly specify the counter type, while using the event_id field to specify which event to use. Raw counters still work as before, only the raw config now goes into raw_event. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.836807573@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5008762e8bf..26f69dc7130 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,7 +602,7 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - ev = counter->hw_event.type; + ev = counter->hw_event.event_id; if (!counter->hw_event.raw) { if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) @@ -692,7 +692,7 @@ static void perf_handle_group(struct perf_counter *counter) list_for_each_entry(sub, &leader->sibling_list, list_entry) { if (sub != counter) sub->hw_ops->read(sub); - perf_store_irq_data(counter, sub->hw_event.type); + perf_store_irq_data(counter, sub->hw_event.event_config); perf_store_irq_data(counter, atomic64_read(&sub->count)); } } -- cgit v1.2.3 From 0322cd6ec504b0bf08ca7b2c3d7f43bda37d79c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:19 +0100 Subject: perf_counter: unify irq output code Impact: cleanup Having 3 slightly different copies of the same code around does nobody any good. First step in revamping the output format. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.929962222@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 51 ++------------------------------------ 1 file changed, 2 insertions(+), 49 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 26f69dc7130..88b72eb4af1 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -662,41 +662,6 @@ void perf_counter_do_pending(void) } } -/* - * Record data for an irq counter. - * This function was lifted from the x86 code; maybe it should - * go in the core? - */ -static void perf_store_irq_data(struct perf_counter *counter, u64 data) -{ - struct perf_data *irqdata = counter->irqdata; - - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; - - *p = data; - irqdata->len += sizeof(u64); - } -} - -/* - * Record all the values of the counters in a group - */ -static void perf_handle_group(struct perf_counter *counter) -{ - struct perf_counter *leader, *sub; - - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->hw_ops->read(sub); - perf_store_irq_data(counter, sub->hw_event.event_config); - perf_store_irq_data(counter, atomic64_read(&sub->count)); - } -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -736,20 +701,8 @@ static void record_and_restart(struct perf_counter *counter, long val, /* * Finally record data if requested. */ - if (record) { - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - break; - case PERF_RECORD_IRQ: - perf_store_irq_data(counter, instruction_pointer(regs)); - counter->wakeup_pending = 1; - break; - case PERF_RECORD_GROUP: - perf_handle_group(counter); - counter->wakeup_pending = 1; - break; - } - } + if (record) + perf_counter_output(counter, 1, regs); } /* -- cgit v1.2.3 From db4fb5acf20295063d1d5105e67724eb51440207 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 19 Mar 2009 20:26:20 +0100 Subject: perf_counter: powerpc: clean up perc_counter_interrupt Impact: cleanup This updates the powerpc perf_counter_interrupt following on from the "perf_counter: unify irq output code" patch. Since we now use the generic perf_counter_output code, which sets the perf_counter_pending flag directly, we no longer need the need_wakeup variable. This removes need_wakeup and makes perf_counter_interrupt use get_perf_counter_pending() instead. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Orig-LKML-Reference: <20090319194234.024464535@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 88b72eb4af1..830ca9c4494 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -723,8 +723,6 @@ static void perf_counter_interrupt(struct pt_regs *regs) /* counter has overflowed */ found = 1; record_and_restart(counter, val, regs); - if (counter->wakeup_pending) - need_wakeup = 1; } } @@ -754,17 +752,14 @@ static void perf_counter_interrupt(struct pt_regs *regs) /* * If we need a wakeup, check whether interrupts were soft-enabled * when we took the interrupt. If they were, we can wake stuff up - * immediately; otherwise we'll have to set a flag and do the - * wakeup when interrupts get soft-enabled. + * immediately; otherwise we'll have do the wakeup when interrupts + * get soft-enabled. */ - if (need_wakeup) { - if (regs->softe) { - irq_enter(); - perf_counter_do_pending(); - irq_exit(); - } else { - set_perf_counter_pending(); - } + if (get_perf_counter_pending() && regs->softe) { + irq_enter(); + clear_perf_counter_pending(); + perf_counter_do_pending(); + irq_exit(); } } -- cgit v1.2.3 From 9aaa131a279834dff75c290c91f0058f62d72d46 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 21 Mar 2009 15:31:47 +1100 Subject: perf_counter: fix type/event_id layout on big-endian systems Impact: build fix for powerpc Commit db3a944aca35ae61 ("perf_counter: revamp syscall input ABI") expanded the hw_event.type field into a union of structs containing bitfields. In particular it introduced a type field and a raw_type field, with the intention that the 1-bit raw_type field should overlay the most-significant bit of the 8-bit type field, and in fact perf_counter_alloc() now assumes that (or at least, assumes that raw_type doesn't overlay any of the bits that are 1 in the values of PERF_TYPE_{HARDWARE,SOFTWARE,TRACEPOINT}). Unfortunately this is not true on big-endian systems such as PowerPC, where bitfields are laid out from left to right, i.e. from most significant bit to least significant. This means that setting hw_event.type = PERF_TYPE_SOFTWARE will set hw_event.raw_type to 1. This fixes it by making the layout depend on whether or not __BIG_ENDIAN_BITFIELD is defined. It's a bit ugly, but that's what we get for using bitfields in a user/kernel ABI. Also, that commit didn't fix up some places in arch/powerpc/kernel/ perf_counter.c where hw_event.raw and hw_event.event_id were used. This fixes them too. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/perf_counter.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 830ca9c4494..6413d9c0313 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,12 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - ev = counter->hw_event.event_id; - if (!counter->hw_event.raw) { - if (ev >= ppmu->n_generic || - ppmu->generic_events[ev] == 0) + if (!counter->hw_event.raw_type) { + ev = counter->hw_event.event_id; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return NULL; ev = ppmu->generic_events[ev]; + } else { + ev = counter->hw_event.raw_event_id; } counter->hw.config_base = ev; counter->hw.idx = 0; -- cgit v1.2.3 From f4a2deb4860497f4332cf6a1acddab3dd628ddf0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:06 +0100 Subject: perf_counter: remove the event config bitfields Since the bitfields turned into a bit of a mess, remove them and rely on good old masks. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.059499915@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 6413d9c0313..d05651584d4 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,13 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - if (!counter->hw_event.raw_type) { - ev = counter->hw_event.event_id; + if (!perf_event_raw(&counter->hw_event)) { + ev = perf_event_id(&counter->hw_event); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return NULL; ev = ppmu->generic_events[ev]; } else { - ev = counter->hw_event.raw_event_id; + ev = perf_event_config(&counter->hw_event); } counter->hw.config_base = ev; counter->hw.idx = 0; -- cgit v1.2.3 From 37d81828385f8ff823caaaf1a83e72d065b6cfa1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 23 Mar 2009 18:22:08 +0100 Subject: perf_counter: add an mmap method to allow userspace to read hardware counters Impact: new feature giving performance improvement This adds the ability for userspace to do an mmap on a hardware counter fd and get access to a read-only page that contains the information needed to translate a hardware counter value to the full 64-bit counter value that would be returned by a read on the fd. This is useful on architectures that allow user programs to read the hardware counters, such as PowerPC. The mmap will only succeed if the counter is a hardware counter monitoring the current process. On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter and translate it to the full 64-bit value in about 30ns using the mmapped page, compared to about 830ns for the read syscall on the counter, so this does give a significant performance improvement. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090323172417.297057964@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index d05651584d4..e4349281b07 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -417,6 +417,8 @@ void hw_perf_restore(u64 disable) atomic64_set(&counter->hw.prev_count, val); counter->hw.idx = hwc_index[i] + 1; write_pmc(counter->hw.idx, val); + if (counter->user_page) + perf_counter_update_userpage(counter); } mb(); cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; @@ -572,6 +574,8 @@ static void power_perf_disable(struct perf_counter *counter) ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; + if (counter->user_page) + perf_counter_update_userpage(counter); break; } } @@ -698,6 +702,8 @@ static void record_and_restart(struct perf_counter *counter, long val, write_pmc(counter->hw.idx, val); atomic64_set(&counter->hw.prev_count, val); atomic64_set(&counter->hw.period_left, left); + if (counter->user_page) + perf_counter_update_userpage(counter); /* * Finally record data if requested. -- cgit v1.2.3 From 7b732a75047738e4f85438ed2f9cd34bf5f2a19a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:10 +0100 Subject: perf_counter: new output ABI - part 1 Impact: Rework the perfcounter output ABI use sys_read() only for instant data and provide mmap() output for all async overflow data. The first mmap() determines the size of the output buffer. The mmap() size must be a PAGE_SIZE multiple of 1+pages, where pages must be a power of 2 or 0. Further mmap()s of the same fd must have the same size. Once all maps are gone, you can again mmap() with a new size. In case of 0 extra pages there is no data output and the first page only contains meta data. When there are data pages, a poll() event will be generated for each full page of data. Furthermore, the output is circular. This means that although 1 page is a valid configuration, its useless, since we'll start overwriting it the instant we report a full page. Future work will focus on the output format (currently maintained) where we'll likey want each entry denoted by a header which includes a type and length. Further future work will allow to splice() the fd, also containing the async overflow data -- splice() would be mutually exclusive with mmap() of the data. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.470536358@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index e4349281b07..d48596ab655 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -417,8 +417,7 @@ void hw_perf_restore(u64 disable) atomic64_set(&counter->hw.prev_count, val); counter->hw.idx = hwc_index[i] + 1; write_pmc(counter->hw.idx, val); - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); } mb(); cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; @@ -574,8 +573,7 @@ static void power_perf_disable(struct perf_counter *counter) ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); break; } } @@ -702,8 +700,7 @@ static void record_and_restart(struct perf_counter *counter, long val, write_pmc(counter->hw.idx, val); atomic64_set(&counter->hw.prev_count, val); atomic64_set(&counter->hw.period_left, left); - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); /* * Finally record data if requested. -- cgit v1.2.3 From 53cfbf593758916aac41db728f029986a62f1254 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 25 Mar 2009 22:46:58 +1100 Subject: perf_counter: record time running and time enabled for each counter Impact: new functionality Currently, if there are more counters enabled than can fit on the CPU, the kernel will multiplex the counters on to the hardware using round-robin scheduling. That isn't too bad for sampling counters, but for counting counters it means that the value read from a counter represents some unknown fraction of the true count of events that occurred while the counter was enabled. This remedies the situation by keeping track of how long each counter is enabled for, and how long it is actually on the cpu and counting events. These times are recorded in nanoseconds using the task clock for per-task counters and the cpu clock for per-cpu counters. These values can be supplied to userspace on a read from the counter. Userspace requests that they be supplied after the counter value by setting the PERF_FORMAT_TOTAL_TIME_ENABLED and/or PERF_FORMAT_TOTAL_TIME_RUNNING bits in the hw_event.read_format field when creating the counter. (There is no way to change the read format after the counter is created, though it would be possible to add some way to do that.) Using this information it is possible for userspace to scale the count it reads from the counter to get an estimate of the true count: true_count_estimate = count * total_time_enabled / total_time_running This also lets userspace detect the situation where the counter never got to go on the cpu: total_time_running == 0. This functionality has been requested by the PAPI developers, and will be generally needed for interpreting the count values from counting counters correctly. In the implementation, this keeps 5 time values (in nanoseconds) for each counter: total_time_enabled and total_time_running are used when the counter is in state OFF or ERROR and for reporting back to userspace. When the counter is in state INACTIVE or ACTIVE, it is the tstamp_enabled, tstamp_running and tstamp_stopped values that are relevant, and total_time_enabled and total_time_running are determined from them. (tstamp_stopped is only used in INACTIVE state.) The reason for doing it like this is that it means that only counters being enabled or disabled at sched-in and sched-out time need to be updated. There are no new loops that iterate over all counters to update total_time_enabled or total_time_running. This also keeps separate child_total_time_running and child_total_time_enabled fields that get added in when reporting the totals to userspace. They are separate fields so that they can be atomic. We don't want to use atomics for total_time_running, total_time_enabled etc., because then we would have to use atomic sequences to update them, which are slower than regular arithmetic and memory accesses. It is possible to measure total_time_running by adding a task_clock counter to each group of counters, and total_time_enabled can be measured approximately with a top-level task_clock counter (though inaccuracies will creep in if you need to disable and enable groups since it is not possible in general to disable/enable the top-level task_clock counter simultaneously with another group). However, that adds extra overhead - I measured around 15% increase in the context switch latency reported by lat_ctx (from lmbench) when a task_clock counter was added to each of 2 groups, and around 25% increase when a task_clock counter was added to each of 4 groups. (In both cases a top-level task-clock counter was also added.) In contrast, the code added in this commit gives better information with no overhead that I could measure (in fact in some cases I measured lower times with this code, but the differences were all less than one standard deviation). [ v2: address review comments by Andrew Morton. ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Andrew Morton Orig-LKML-Reference: <18890.6578.728637.139402@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index d48596ab655..df007fe0cc0 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -455,6 +455,8 @@ static void counter_sched_in(struct perf_counter *counter, int cpu) { counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; + counter->tstamp_running += counter->ctx->time_now - + counter->tstamp_stopped; if (is_software_counter(counter)) counter->hw_ops->enable(counter); } -- cgit v1.2.3 From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:02 +0200 Subject: perf_counter: unify and fix delayed counter wakeup While going over the wakeup code I noticed delayed wakeups only work for hardware counters but basically all software counters rely on them. This patch unifies and generalizes the delayed wakeup to fix this issue. Since we're dealing with NMI context bits here, use a cmpxchg() based single link list implementation to track counters that have pending wakeups. [ This should really be generic code for delayed wakeups, but since we cannot use cmpxchg()/xchg() in generic code, I've let it live in the perf_counter code. -- Eric Dumazet could use it to aggregate the network wakeups. ] Furthermore, the x86 method of using TIF flags was flawed in that its quite possible to end up setting the bit on the idle task, loosing the wakeup. The powerpc method uses per-cpu storage and does appear to be sufficient. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.153932974@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/hw_irq.h | 4 ++-- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/perf_counter.c | 22 ++-------------------- 3 files changed, 5 insertions(+), 23 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index cb32d571c9c..20a44d0c9fd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -132,7 +132,7 @@ static inline int irqs_disabled_flags(unsigned long flags) struct irq_chip; #ifdef CONFIG_PERF_COUNTERS -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { unsigned long x; @@ -160,7 +160,7 @@ extern void perf_counter_do_pending(void); #else -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { return 0; } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 469e9635ff0..2cd471f92fe 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -135,7 +135,7 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) { + if (test_perf_counter_pending()) { clear_perf_counter_pending(); perf_counter_do_pending(); } diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index df007fe0cc0..cde720fc495 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -649,24 +649,6 @@ hw_perf_counter_init(struct perf_counter *counter) return &power_perf_ops; } -/* - * Handle wakeups. - */ -void perf_counter_do_pending(void) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter && counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -720,7 +702,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; long val; - int need_wakeup = 0, found = 0; + int found = 0; for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; @@ -761,7 +743,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) * immediately; otherwise we'll have do the wakeup when interrupts * get soft-enabled. */ - if (get_perf_counter_pending() && regs->softe) { + if (test_perf_counter_pending() && regs->softe) { irq_enter(); clear_perf_counter_pending(); perf_counter_do_pending(); -- cgit v1.2.3 From 7595d63b3a9ce65d14c4fbd0e7de448a343d7215 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Mar 2009 19:07:07 +0200 Subject: perf_counter: powerpc: only reserve PMU hardware when we need it Impact: cooperate with oprofile At present, on PowerPC, if you have perf_counters compiled in, oprofile doesn't work. There is code to allow the PMU to be shared between competing subsystems, such as perf_counters and oprofile, but currently the perf_counter subsystem reserves the PMU for itself at boot time, and never releases it. This makes perf_counter play nicely with oprofile. Now we keep a count of how many perf_counter instances are counting hardware events, and reserve the PMU when that count becomes non-zero, and release the PMU when that count becomes zero. This means that it is possible to have perf_counters compiled in and still use oprofile, as long as there are no hardware perf_counters active. This also means that if oprofile is active, sys_perf_counter_open will fail if the hw_event specifies a hardware event. To avoid races with other tasks creating and destroying perf_counters, we use a mutex. We use atomic_inc_not_zero and atomic_add_unless to avoid having to take the mutex unless there is a possibility of the count going between 0 and 1. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090330171023.627912475@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 47 ++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index cde720fc495..560dd1e7b52 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -41,6 +41,8 @@ struct power_pmu *ppmu; */ static unsigned int freeze_counters_kernel = MMCR0_FCS; +static void perf_counter_interrupt(struct pt_regs *regs); + void perf_counter_print_debug(void) { } @@ -594,6 +596,24 @@ struct hw_perf_counter_ops power_perf_ops = { .read = power_perf_read }; +/* Number of perf_counters counting hardware events */ +static atomic_t num_counters; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Release the PMU if this is the last perf_counter. + */ +static void hw_perf_counter_destroy(struct perf_counter *counter) +{ + if (!atomic_add_unless(&num_counters, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_counters) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter) { @@ -601,6 +621,7 @@ hw_perf_counter_init(struct perf_counter *counter) struct perf_counter *ctrs[MAX_HWCOUNTERS]; unsigned int events[MAX_HWCOUNTERS]; int n; + int err; if (!ppmu) return NULL; @@ -646,6 +667,27 @@ hw_perf_counter_init(struct perf_counter *counter) counter->hw.config = events[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); + + /* + * See if we need to reserve the PMU. + * If no counters are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_counters)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_counters) == 0 && + reserve_pmc_hardware(perf_counter_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_counters); + mutex_unlock(&pmc_reserve_mutex); + } + counter->destroy = hw_perf_counter_destroy; + + if (err) + return NULL; return &power_perf_ops; } @@ -769,11 +811,6 @@ static int init_perf_counters(void) { unsigned long pvr; - if (reserve_pmc_hardware(perf_counter_interrupt)) { - printk(KERN_ERR "Couldn't init performance monitor subsystem\n"); - return -EBUSY; - } - /* XXX should get this from cputable */ pvr = mfspr(SPRN_PVR); switch (PVR_VER(pvr)) { -- cgit v1.2.3 From d5d2bc0dd0379deddb9ede66fec90a3083eaec57 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Mar 2009 19:07:08 +0200 Subject: perf_counter: make it possible for hw_perf_counter_init to return error codes Impact: better error reporting At present, if hw_perf_counter_init encounters an error, all it can do is return NULL, which causes sys_perf_counter_open to return an EINVAL error to userspace. This isn't very informative for userspace; it means that userspace can't tell the difference between "sorry, oprofile is already using the PMU" and "we don't support this CPU" and "this CPU doesn't support the requested generic hardware event". This commit uses the PTR_ERR/ERR_PTR/IS_ERR set of macros to let hw_perf_counter_init return an error code on error rather than just NULL if it wishes. If it does so, that error code will be returned from sys_perf_counter_open to userspace. If it returns NULL, an EINVAL error will be returned to userspace, as before. This also adapts the powerpc hw_perf_counter_init to make use of this to return ENXIO, EINVAL, EBUSY, or EOPNOTSUPP as appropriate. It would be good to add extra error numbers in future to allow userspace to distinguish the various errors that are currently reported as EINVAL, i.e. irq_period < 0, too many events in a group, conflict between exclude_* settings in a group, and PMU resource conflict in a group. [ v2: fix a bug pointed out by Corey Ashford where error returns from hw_perf_counter_init were not handled correctly in the case of raw hardware events.] Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090330171023.682428180@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 560dd1e7b52..0a4d14f279a 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -624,13 +624,13 @@ hw_perf_counter_init(struct perf_counter *counter) int err; if (!ppmu) - return NULL; + return ERR_PTR(-ENXIO); if ((s64)counter->hw_event.irq_period < 0) - return NULL; + return ERR_PTR(-EINVAL); if (!perf_event_raw(&counter->hw_event)) { ev = perf_event_id(&counter->hw_event); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return NULL; + return ERR_PTR(-EOPNOTSUPP); ev = ppmu->generic_events[ev]; } else { ev = perf_event_config(&counter->hw_event); @@ -656,14 +656,14 @@ hw_perf_counter_init(struct perf_counter *counter) n = collect_events(counter->group_leader, ppmu->n_counter - 1, ctrs, events); if (n < 0) - return NULL; + return ERR_PTR(-EINVAL); } events[n] = ev; ctrs[n] = counter; if (check_excludes(ctrs, n, 1)) - return NULL; + return ERR_PTR(-EINVAL); if (power_check_constraints(events, n + 1)) - return NULL; + return ERR_PTR(-EINVAL); counter->hw.config = events[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); @@ -687,7 +687,7 @@ hw_perf_counter_init(struct perf_counter *counter) counter->destroy = hw_perf_counter_destroy; if (err) - return NULL; + return ERR_PTR(err); return &power_perf_ops; } -- cgit v1.2.3 From f6c7d5fe58b4846ee0cb4b98b6042489705eced4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:04 +0200 Subject: perf_counter: theres more to overflow than writing events Prepare for more generic overflow handling. The new perf_counter_overflow() method will handle the generic bits of the counter overflow, and can return a !0 return value, in which case the counter should be (soft) disabled, so that it won't count until it's properly disabled. XXX: do powerpc and swcounter Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.812109629@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0a4d14f279a..f88c35d0710 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -732,7 +732,7 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) - perf_counter_output(counter, 1, regs); + perf_counter_overflow(counter, 1, regs); } /* -- cgit v1.2.3 From dc66270b51a62b1a6888d5309229e638a305c47b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Apr 2009 20:30:10 +1000 Subject: perf_counter: fix powerpc build Commit 4af4998b ("perf_counter: rework context time") changed struct perf_counter_context to have a 'time' field instead of a 'time_now' field, but neglected to fix the place in the powerpc perf_counter.c where the time_now field was accessed. This fixes it. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18908.31922.411398.147810@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index f88c35d0710..0e5651385dd 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -457,8 +457,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu) { counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; - counter->tstamp_running += counter->ctx->time_now - - counter->tstamp_stopped; + counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; if (is_software_counter(counter)) counter->hw_ops->enable(counter); } -- cgit v1.2.3 From f708223d49ac39f5af1643985056206c98033f5b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Apr 2009 20:30:18 +1000 Subject: perf_counter: powerpc: set sample enable bit for marked instruction events Impact: enable access to hardware feature POWER processors have the ability to "mark" a subset of the instructions and provide more detailed information on what happens to the marked instructions as they flow through the pipeline. This marking is enabled by the "sample enable" bit in MMCRA, and there are synchronization requirements around setting and clearing the bit. This adds logic to the processor-specific back-ends so that they know which events relate to marked instructions and set the sampling enable bit if any event that we want to put on the PMU is a marked instruction event. It also adds logic to the generic powerpc code to do the necessary synchronization if that bit is set. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18908.31930.1024.228867@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 28 +++++++-- arch/powerpc/kernel/power5+-pmu.c | 103 +++++++++++++++++++++++++++++- arch/powerpc/kernel/power5-pmu.c | 96 +++++++++++++++++++++++++++- arch/powerpc/kernel/power6-pmu.c | 126 ++++++++++++++++++++++++++++++++++++- arch/powerpc/kernel/ppc970-pmu.c | 72 ++++++++++++++++++++- 5 files changed, 413 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0e5651385dd..0697ade84dd 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -306,6 +306,15 @@ u64 hw_perf_save_disable(void) cpuhw->pmcs_enabled = 1; } + /* + * Disable instruction sampling if it was enabled + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mtspr(SPRN_MMCRA, + cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mb(); + } + /* * Set the 'freeze counters' bit. * The barrier is to make sure the mtspr has been @@ -347,12 +356,11 @@ void hw_perf_restore(u64 disable) * (possibly updated for removal of counters). */ if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); if (cpuhw->n_counters == 0) get_lppaca()->pmcregs_in_use = 0; - goto out; + goto out_enable; } /* @@ -385,7 +393,7 @@ void hw_perf_restore(u64 disable) * Then unfreeze the counters. */ get_lppaca()->pmcregs_in_use = 1; - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | MMCR0_FC); @@ -421,10 +429,20 @@ void hw_perf_restore(u64 disable) write_pmc(counter->hw.idx, val); perf_counter_update_userpage(counter); } - mb(); cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + + out_enable: + mb(); mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + /* + * Enable instruction sampling if necessary + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mb(); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + } + out: local_irq_restore(flags); } diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index cec21ea65b0..1222c8ea3c2 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -1,5 +1,5 @@ /* - * Performance counter support for POWER5 (not POWER5++) processors. + * Performance counter support for POWER5+/++ (not POWER5) processors. * * Copyright 2009 Paul Mackerras, IBM Corporation. * @@ -281,10 +281,107 @@ static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) return nalt; } +/* + * Map of which direct events on which PMCs are marked instruction events. + * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. + * Bit 0 is set if it is marked for all PMCs. + * The 0x80 bit indicates a byte decode PMCSEL value. + */ +static unsigned char direct_event_is_marked[0x28] = { + 0, /* 00 */ + 0x1f, /* 01 PM_IOPS_CMPL */ + 0x2, /* 02 PM_MRK_GRP_DISP */ + 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ + 0, /* 04 */ + 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ + 0x80, /* 06 */ + 0x80, /* 07 */ + 0, 0, 0,/* 08 - 0a */ + 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ + 0, /* 0c */ + 0x80, /* 0d */ + 0x80, /* 0e */ + 0, /* 0f */ + 0, /* 10 */ + 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ + 0, /* 12 */ + 0x10, /* 13 PM_MRK_GRP_CMPL */ + 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ + 0x2, /* 15 PM_MRK_GRP_ISSUED */ + 0x80, /* 16 */ + 0x80, /* 17 */ + 0, 0, 0, 0, 0, + 0x80, /* 1d */ + 0x80, /* 1e */ + 0, /* 1f */ + 0x80, /* 20 */ + 0x80, /* 21 */ + 0x80, /* 22 */ + 0x80, /* 23 */ + 0x80, /* 24 */ + 0x80, /* 25 */ + 0x80, /* 26 */ + 0x80, /* 27 */ +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int power5p_marked_instr_event(unsigned int event) +{ + int pmc, psel; + int bit, byte, unit; + u32 mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if (pmc >= 5) + return 0; + + bit = -1; + if (psel < sizeof(direct_event_is_marked)) { + if (direct_event_is_marked[psel] & (1 << pmc)) + return 1; + if (direct_event_is_marked[psel] & 0x80) + bit = 4; + else if (psel == 0x08) + bit = pmc - 1; + else if (psel == 0x10) + bit = 4 - pmc; + else if (psel == 0x1b && (pmc == 1 || pmc == 3)) + bit = 4; + } else if ((psel & 0x48) == 0x40) { + bit = psel & 7; + } else if (psel == 0x28) { + bit = pmc - 1; + } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { + bit = 4; + } + + if (!(event & PM_BUSEVENT_MSK) || bit == -1) + return 0; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit == PM_LSU0) { + /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ + mask = 0x5dff00; + } else if (unit == PM_LSU1 && byte >= 4) { + byte -= 4; + /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ + mask = 0x5f11c000; + } else + return 0; + + return (mask >> (byte * 8 + bit)) & 1; +} + static int power5p_compute_mmcr(unsigned int event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; + u64 mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm; int i, isbus, bit, grsel; @@ -404,6 +501,8 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; mmcr1 |= (u64)grsel << grsel_shift[bit]; } + if (power5p_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) /* select alternate byte lane */ psel |= 0x10; @@ -419,7 +518,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, if (pmc_inuse & 0x3e) mmcr[0] |= MMCR0_PMCjCE; mmcr[1] = mmcr1; - mmcr[2] = 0; + mmcr[2] = mmcra; return 0; } diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 379ed1087cc..116c4bb1809 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -290,10 +290,102 @@ static int power5_get_alternatives(unsigned int event, unsigned int alt[]) return nalt; } +/* + * Map of which direct events on which PMCs are marked instruction events. + * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. + * Bit 0 is set if it is marked for all PMCs. + * The 0x80 bit indicates a byte decode PMCSEL value. + */ +static unsigned char direct_event_is_marked[0x28] = { + 0, /* 00 */ + 0x1f, /* 01 PM_IOPS_CMPL */ + 0x2, /* 02 PM_MRK_GRP_DISP */ + 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ + 0, /* 04 */ + 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ + 0x80, /* 06 */ + 0x80, /* 07 */ + 0, 0, 0,/* 08 - 0a */ + 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ + 0, /* 0c */ + 0x80, /* 0d */ + 0x80, /* 0e */ + 0, /* 0f */ + 0, /* 10 */ + 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ + 0, /* 12 */ + 0x10, /* 13 PM_MRK_GRP_CMPL */ + 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ + 0x2, /* 15 PM_MRK_GRP_ISSUED */ + 0x80, /* 16 */ + 0x80, /* 17 */ + 0, 0, 0, 0, 0, + 0x80, /* 1d */ + 0x80, /* 1e */ + 0, /* 1f */ + 0x80, /* 20 */ + 0x80, /* 21 */ + 0x80, /* 22 */ + 0x80, /* 23 */ + 0x80, /* 24 */ + 0x80, /* 25 */ + 0x80, /* 26 */ + 0x80, /* 27 */ +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int power5_marked_instr_event(unsigned int event) +{ + int pmc, psel; + int bit, byte, unit; + u32 mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if (pmc >= 5) + return 0; + + bit = -1; + if (psel < sizeof(direct_event_is_marked)) { + if (direct_event_is_marked[psel] & (1 << pmc)) + return 1; + if (direct_event_is_marked[psel] & 0x80) + bit = 4; + else if (psel == 0x08) + bit = pmc - 1; + else if (psel == 0x10) + bit = 4 - pmc; + else if (psel == 0x1b && (pmc == 1 || pmc == 3)) + bit = 4; + } else if ((psel & 0x58) == 0x40) + bit = psel & 7; + + if (!(event & PM_BUSEVENT_MSK)) + return 0; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit == PM_LSU0) { + /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ + mask = 0x5dff00; + } else if (unit == PM_LSU1 && byte >= 4) { + byte -= 4; + /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ + mask = 0x5f00c0aa; + } else + return 0; + + return (mask >> (byte * 8 + bit)) & 1; +} + static int power5_compute_mmcr(unsigned int event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; + u64 mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; @@ -430,6 +522,8 @@ static int power5_compute_mmcr(unsigned int event[], int n_ev, grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; mmcr1 |= (u64)grsel << grsel_shift[bit]; } + if (power5_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; if (pmc <= 3) mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); hwc[i] = pmc; @@ -442,7 +536,7 @@ static int power5_compute_mmcr(unsigned int event[], int n_ev, if (pmc_inuse & 0x3e) mmcr[0] |= MMCR0_PMCjCE; mmcr[1] = mmcr1; - mmcr[2] = 0; + mmcr[2] = mmcra; return 0; } diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index b1f61f3c97b..fce1fc290a1 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -48,6 +48,127 @@ #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) #define MMCR1_PMCSEL_MSK 0xff +/* + * Map of which direct events on which PMCs are marked instruction events. + * Indexed by PMCSEL value >> 1. + * Bottom 4 bits are a map of which PMCs are interesting, + * top 4 bits say what sort of event: + * 0 = direct marked event, + * 1 = byte decode event, + * 4 = add/and event (PMC1 -> bits 0 & 4), + * 5 = add/and event (PMC1 -> bits 1 & 5), + * 6 = add/and event (PMC1 -> bits 2 & 6), + * 7 = add/and event (PMC1 -> bits 3 & 7). + */ +static unsigned char direct_event_is_marked[0x60 >> 1] = { + 0, /* 00 */ + 0, /* 02 */ + 0, /* 04 */ + 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ + 0x04, /* 08 PM_MRK_DFU_FIN */ + 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ + 0, /* 0c */ + 0, /* 0e */ + 0x02, /* 10 PM_MRK_INST_DISP */ + 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ + 0, /* 14 */ + 0, /* 16 */ + 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ + 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ + 0x01, /* 1c PM_MRK_INST_ISSUED */ + 0, /* 1e */ + 0, /* 20 */ + 0, /* 22 */ + 0, /* 24 */ + 0, /* 26 */ + 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ + 0, /* 2a */ + 0, /* 2c */ + 0, /* 2e */ + 0x4f, /* 30 */ + 0x7f, /* 32 */ + 0x4f, /* 34 */ + 0x5f, /* 36 */ + 0x6f, /* 38 */ + 0x4f, /* 3a */ + 0, /* 3c */ + 0x08, /* 3e PM_MRK_INST_TIMEO */ + 0x1f, /* 40 */ + 0x1f, /* 42 */ + 0x1f, /* 44 */ + 0x1f, /* 46 */ + 0x1f, /* 48 */ + 0x1f, /* 4a */ + 0x1f, /* 4c */ + 0x1f, /* 4e */ + 0, /* 50 */ + 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ + 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ + 0x02, /* 56 PM_MRK_LD_MISS_L1 */ + 0, /* 58 */ + 0, /* 5a */ + 0, /* 5c */ + 0, /* 5e */ +}; + +/* + * Masks showing for each unit which bits are marked events. + * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. + */ +static u32 marked_bus_events[16] = { + 0x01000000, /* direct events set 1: byte 3 bit 0 */ + 0x00010000, /* direct events set 2: byte 2 bit 0 */ + 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ + 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ + 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ + 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ + 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ + 0, /* LSU set 3 */ + 0x00000010, /* VMX set 3: byte 0 bit 4 */ + 0, /* BFP set 1 */ + 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ + 0, 0 +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int power6_marked_instr_event(unsigned int event) +{ + int pmc, psel, ptype; + int bit, byte, unit; + u32 mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ + if (pmc >= 5) + return 0; + + bit = -1; + if (psel < sizeof(direct_event_is_marked)) { + ptype = direct_event_is_marked[psel]; + if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) + return 0; + ptype >>= 4; + if (ptype == 0) + return 1; + if (ptype == 1) + bit = 0; + else + bit = ptype ^ (pmc - 1); + } else if ((psel & 0x48) == 0x40) + bit = psel & 7; + + if (!(event & PM_BUSEVENT_MSK) || bit == -1) + return 0; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + mask = marked_bus_events[unit]; + return (mask >> (byte * 8 + bit)) & 1; +} + /* * Assign PMC numbers and compute MMCR1 value for a set of events */ @@ -55,6 +176,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; + u64 mmcra = 0; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; @@ -116,6 +238,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, if (ev & PM_LLAV) mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; } + if (power6_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); } mmcr[0] = 0; @@ -124,7 +248,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, if (pmc_inuse & 0xe) mmcr[0] |= MMCR0_PMCjCE; mmcr[1] = mmcr1; - mmcr[2] = 0; + mmcr[2] = mmcra; return 0; } diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index c3256580be1..aed8ccd7c07 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -19,6 +19,8 @@ #define PM_PMC_MSK 0xf #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ #define PM_UNIT_MSK 0xf +#define PM_SPCSEL_SH 6 +#define PM_SPCSEL_MSK 3 #define PM_BYTE_SH 4 /* Byte number of event bus to use */ #define PM_BYTE_MSK 3 #define PM_PMCSEL_MSK 0xf @@ -88,8 +90,11 @@ static short mmcr1_adder_bits[8] = { * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 - * <><>[ >[ >[ >< >< >< >< ><><><><><><><><> - * T0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 + * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> + * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 + * + * SP - SPCSEL constraint + * 48-49: SPCSEL value 0x3_0000_0000_0000 * * T0 - TTM0 constraint * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 @@ -126,6 +131,57 @@ static short mmcr1_adder_bits[8] = { * 0-13: Count of events needing PMC2..PMC8 */ +static unsigned char direct_marked_event[8] = { + (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ + (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ + (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ + (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ + (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ + (1<<3) | (1<<4) | (1<<5), + /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ + (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ + (1<<4) /* PMC8: PM_MRK_LSU_FIN */ +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int p970_marked_instr_event(unsigned int event) +{ + int pmc, psel, unit, byte, bit; + unsigned int mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if (pmc) { + if (direct_marked_event[pmc - 1] & (1 << psel)) + return 1; + if (psel == 0) /* add events */ + bit = (pmc <= 4)? pmc - 1: 8 - pmc; + else if (psel == 7 || psel == 13) /* decode events */ + bit = 4; + else + return 0; + } else + bit = psel; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + mask = 0; + switch (unit) { + case PM_VPU: + mask = 0x4c; /* byte 0 bits 2,3,6 */ + case PM_LSU0: + /* byte 2 bits 0,2,3,4,6; all of byte 1 */ + mask = 0x085dff00; + case PM_LSU1L: + mask = 0x50 << 24; /* byte 3 bits 4,6 */ + break; + } + return (mask >> (byte * 8 + bit)) & 1; +} + /* Masks and values for using events from the various units */ static u64 unit_cons[PM_LASTUNIT+1][2] = { [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, @@ -138,7 +194,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) { - int pmc, byte, unit, sh; + int pmc, byte, unit, sh, spcsel; u64 mask = 0, value = 0; int grp = -1; @@ -177,6 +233,11 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) mask |= 0x800000000ull; value |= 0x100000000ull; } + spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; + if (spcsel) { + mask |= 3ull << 48; + value |= (u64)spcsel << 48; + } *maskp = mask; *valp = value; return 0; @@ -209,6 +270,7 @@ static int p970_compute_mmcr(unsigned int event[], int n_ev, unsigned char ttmuse[2]; unsigned char pmcsel[8]; int i; + int spcsel; if (n_ev > 8) return -1; @@ -316,6 +378,10 @@ static int p970_compute_mmcr(unsigned int event[], int n_ev, } pmcsel[pmc] = psel; hwc[i] = pmc; + spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; + mmcr1 |= spcsel; + if (p970_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; } for (pmc = 0; pmc < 2; ++pmc) mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); -- cgit v1.2.3 From 78f13e9525ba777da25c4ddab89f28e9366a8b7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:33 +0200 Subject: perf_counter: allow for data addresses to be recorded Paul suggested we allow for data addresses to be recorded along with the traditional IPs as power can provide these. For now, only the software pagefault events provide data addresses, but in the future power might as well for some events. x86 doesn't seem capable of providing this atm. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.394816925@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 2 +- arch/powerpc/mm/fault.c | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0697ade84dd..c9d019f1907 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) - perf_counter_overflow(counter, 1, regs); + perf_counter_overflow(counter, 1, regs, 0); } /* diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 17bbf6f91fb..ac0e112031b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -312,7 +312,8 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { preempt_disable(); @@ -322,7 +323,8 @@ good_area: #endif } else { current->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + regs, address); } up_read(&mm->mmap_sem); return 0; -- cgit v1.2.3 From ca8f2d7f019a8547f39ddb9ed0144932f12807f2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 9 Apr 2009 14:42:56 +1000 Subject: perf_counter: powerpc: add nmi_enter/nmi_exit calls Impact: fix potential deadlocks on powerpc Now that the core is using in_nmi() (added in e30e08f6, "perf_counter: fix NMI race in task clock"), we need the powerpc perf_counter_interrupt to call nmi_enter() and nmi_exit() in those cases where the interrupt happens when interrupts are soft-disabled. If interrupts were soft-enabled, we can treat it as a regular interrupt and do irq_enter/irq_exit around the whole routine. This lets us get rid of the test_perf_counter_pending() call at the end of perf_counter_interrupt, thus simplifying things a little. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18909.31952.873098.336615@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index c9d019f1907..bd76d0fa2c3 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -714,7 +714,7 @@ hw_perf_counter_init(struct perf_counter *counter) * here so there is no possibility of being interrupted. */ static void record_and_restart(struct perf_counter *counter, long val, - struct pt_regs *regs) + struct pt_regs *regs, int nmi) { s64 prev, delta, left; int record = 0; @@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) - perf_counter_overflow(counter, 1, regs, 0); + perf_counter_overflow(counter, nmi, regs, 0); } /* @@ -762,6 +762,17 @@ static void perf_counter_interrupt(struct pt_regs *regs) struct perf_counter *counter; long val; int found = 0; + int nmi; + + /* + * If interrupts were soft-disabled when this PMU interrupt + * occurred, treat it as an NMI. + */ + nmi = !regs->softe; + if (nmi) + nmi_enter(); + else + irq_enter(); for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; @@ -769,7 +780,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) if ((int)val < 0) { /* counter has overflowed */ found = 1; - record_and_restart(counter, val, regs); + record_and_restart(counter, val, regs, nmi); } } @@ -796,18 +807,10 @@ static void perf_counter_interrupt(struct pt_regs *regs) */ mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); - /* - * If we need a wakeup, check whether interrupts were soft-enabled - * when we took the interrupt. If they were, we can wake stuff up - * immediately; otherwise we'll have do the wakeup when interrupts - * get soft-enabled. - */ - if (test_perf_counter_pending() && regs->softe) { - irq_enter(); - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (nmi) + nmi_exit(); + else irq_exit(); - } } void hw_perf_counter_setup(int cpu) -- cgit v1.2.3 From d5dedd4507d307eb3f35f21b6e16f336fdc0d82a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:59:21 -0700 Subject: irq: change ->set_affinity() to return status according to Ingo, change set_affinity() in irq_chip should return int, because that way we can handle failure cases in a much cleaner way, in the genirq layer. v2: fix two typos [ Impact: extend API ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: linux-arch@vger.kernel.org LKML-Reference: <49F654E9.4070809@kernel.org> Signed-off-by: Ingo Molnar --- arch/powerpc/platforms/pseries/xics.c | 12 +++++++----- arch/powerpc/sysdev/mpic.c | 4 +++- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 80b513449f4..be3581a8c29 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq) lpar_xirr_info_set((0xff << 24) | irq); } -static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) +static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) { unsigned int irq; int status; @@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) irq = (unsigned int)irq_map[virq].hwirq; if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) - return; + return -1; status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", __func__, irq, status); - return; + return -1; } /* @@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) printk(KERN_WARNING "%s: No online cpus in the mask %s for irq %d\n", __func__, cpulist, virq); - return; + return -1; } status = rtas_call(ibm_set_xive, 3, 1, NULL, @@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", __func__, irq, status); - return; + return -1; } + + return 0; } static struct irq_chip xics_pic_direct = { diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 21b95670159..f4cbd15cf22 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq) #endif /* CONFIG_SMP */ -void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); @@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), mpic_physmask(cpus_addr(tmp)[0])); } + + return 0; } static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) -- cgit v1.2.3 From 4aeb0b4239bb3b67ed402cb9cef3e000c892cadf Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:03 +0200 Subject: perfcounters: rename struct hw_perf_counter_ops into struct pmu This patch renames struct hw_perf_counter_ops into struct pmu. It introduces a structure to describe a cpu specific pmu (performance monitoring unit). It may contain ops and data. The new name of the structure fits better, is shorter, and thus better to handle. Where it was appropriate, names of function and variable have been changed too. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-7-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bd76d0fa2c3..d9bbe5efc64 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -256,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) return 0; } -static void power_perf_read(struct perf_counter *counter) +static void power_pmu_read(struct perf_counter *counter) { long val, delta, prev; @@ -405,7 +405,7 @@ void hw_perf_restore(u64 disable) for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { - power_perf_read(counter); + power_pmu_read(counter); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; } @@ -477,7 +477,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu) counter->oncpu = cpu; counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; if (is_software_counter(counter)) - counter->hw_ops->enable(counter); + counter->pmu->enable(counter); } /* @@ -533,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, * re-enable the PMU in order to get hw_perf_restore to do the * actual work of reconfiguring the PMU. */ -static int power_perf_enable(struct perf_counter *counter) +static int power_pmu_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; unsigned long flags; @@ -573,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter) /* * Remove a counter from the PMU. */ -static void power_perf_disable(struct perf_counter *counter) +static void power_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; long i; @@ -583,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter) local_irq_save(flags); pmudis = hw_perf_save_disable(); - power_perf_read(counter); + power_pmu_read(counter); cpuhw = &__get_cpu_var(cpu_hw_counters); for (i = 0; i < cpuhw->n_counters; ++i) { @@ -607,10 +607,10 @@ static void power_perf_disable(struct perf_counter *counter) local_irq_restore(flags); } -struct hw_perf_counter_ops power_perf_ops = { - .enable = power_perf_enable, - .disable = power_perf_disable, - .read = power_perf_read +struct pmu power_pmu = { + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, }; /* Number of perf_counters counting hardware events */ @@ -631,8 +631,7 @@ static void hw_perf_counter_destroy(struct perf_counter *counter) } } -const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter) +const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { unsigned long ev; struct perf_counter *ctrs[MAX_HWCOUNTERS]; @@ -705,7 +704,7 @@ hw_perf_counter_init(struct perf_counter *counter) if (err) return ERR_PTR(err); - return &power_perf_ops; + return &power_pmu; } /* -- cgit v1.2.3 From ab7ef2e50a557af92f4f90689f51fadadafc16b2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Apr 2009 22:38:51 +1000 Subject: perf_counter: powerpc: allow use of limited-function counters POWER5+ and POWER6 have two hardware counters with limited functionality: PMC5 counts instructions completed in run state and PMC6 counts cycles in run state. (Run state is the state when a hardware RUN bit is 1; the idle task clears RUN while waiting for work to do and sets it when there is work to do.) These counters can't be written to by the kernel, can't generate interrupts, and don't obey the freeze conditions. That means we can only use them for per-task counters (where we know we'll always be in run state; we can't put a per-task counter on an idle task), and only if we don't want interrupts and we do want to count in all processor modes. Obviously some counters can't go on a limited hardware counter, but there are also situations where we can only put a counter on a limited hardware counter - if there are already counters on that exclude some processor modes and we want to put on a per-task cycle or instruction counter that doesn't exclude any processor mode, it could go on if it can use a limited hardware counter. To keep track of these constraints, this adds a flags argument to the processor-specific get_alternatives() functions, with three bits defined: one to say that we can accept alternative event codes that go on limited counters, one to say we only want alternatives on limited counters, and one to say that this is a per-task counter and therefore events that are gated by run state are equivalent to those that aren't (e.g. a "cycles" event is equivalent to a "cycles in run state" event). These flags are computed for each counter and stored in the counter->hw.counter_base field (slightly wonky name for what it does, but it was an existing unused field). Since the limited counters don't freeze when we freeze the other counters, we need some special handling to avoid getting skew between things counted on the limited counters and those counted on normal counters. To minimize this skew, if we are using any limited counters, we read PMC5 and PMC6 immediately after setting and clearing the freeze bit. This is done in a single asm in the new write_mmcr0() function. The code here is specific to PMC5 and PMC6 being the limited hardware counters. Being more general (e.g. having a bitmap of limited hardware counter numbers) would have meant more complex code to read the limited counters when freezing and unfreezing the normal counters, with conditional branches, which would have increased the skew. Since it isn't necessary for the code to be more general at this stage, it isn't. This also extends the back-ends for POWER5+ and POWER6 to be able to handle up to 6 counters rather than the 4 they previously handled. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Robert Richter LKML-Reference: <18936.19035.163066.892208@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 13 +- arch/powerpc/kernel/perf_counter.c | 297 ++++++++++++++++++++++++++++---- arch/powerpc/kernel/power4-pmu.c | 3 +- arch/powerpc/kernel/power5+-pmu.c | 117 +++++++++++-- arch/powerpc/kernel/power5-pmu.c | 3 +- arch/powerpc/kernel/power6-pmu.c | 119 +++++++++++-- arch/powerpc/kernel/ppc970-pmu.c | 3 +- 7 files changed, 479 insertions(+), 76 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 9d7ff6d7fb5..56d66c38143 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -12,6 +12,7 @@ #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 +#define MAX_LIMITED_HWCOUNTERS 2 /* * This struct provides the constants and functions needed to @@ -25,14 +26,24 @@ struct power_pmu { int (*compute_mmcr)(unsigned int events[], int n_ev, unsigned int hwc[], u64 mmcr[]); int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); - int (*get_alternatives)(unsigned int event, unsigned int alt[]); + int (*get_alternatives)(unsigned int event, unsigned int flags, + unsigned int alt[]); void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); + int (*limited_pmc_event)(unsigned int event); + int limited_pmc5_6; /* PMC5 and PMC6 have limited function */ int n_generic; int *generic_events; }; extern struct power_pmu *ppmu; +/* + * Values for flags to get_alternatives() + */ +#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ +#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ +#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ + /* * The power_pmu.get_constraint function returns a 64-bit value and * a 64-bit mask that express the constraints between this event and diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index d9bbe5efc64..15cdc8e6722 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -23,10 +23,14 @@ struct cpu_hw_counters { int n_percpu; int disabled; int n_added; + int n_limited; + u8 pmcs_enabled; struct perf_counter *counter[MAX_HWCOUNTERS]; unsigned int events[MAX_HWCOUNTERS]; + unsigned int flags[MAX_HWCOUNTERS]; u64 mmcr[3]; - u8 pmcs_enabled; + struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; + u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; }; DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); @@ -127,7 +131,8 @@ static void write_pmc(int idx, unsigned long val) * and see if any combination of alternative codes is feasible. * The feasible set is returned in event[]. */ -static int power_check_constraints(unsigned int event[], int n_ev) +static int power_check_constraints(unsigned int event[], unsigned int cflags[], + int n_ev) { u64 mask, value, nv; unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; @@ -144,11 +149,15 @@ static int power_check_constraints(unsigned int event[], int n_ev) /* First see if the events will go on as-is */ for (i = 0; i < n_ev; ++i) { - alternatives[i][0] = event[i]; + if ((cflags[i] & PPMU_LIMITED_PMC_REQD) + && !ppmu->limited_pmc_event(event[i])) { + ppmu->get_alternatives(event[i], cflags[i], + alternatives[i]); + event[i] = alternatives[i][0]; + } if (ppmu->get_constraint(event[i], &amasks[i][0], &avalues[i][0])) return -1; - choice[i] = 0; } value = mask = 0; for (i = 0; i < n_ev; ++i) { @@ -166,7 +175,9 @@ static int power_check_constraints(unsigned int event[], int n_ev) if (!ppmu->get_alternatives) return -1; for (i = 0; i < n_ev; ++i) { - n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]); + choice[i] = 0; + n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], + alternatives[i]); for (j = 1; j < n_alt[i]; ++j) ppmu->get_constraint(alternatives[i][j], &amasks[i][j], &avalues[i][j]); @@ -231,28 +242,41 @@ static int power_check_constraints(unsigned int event[], int n_ev) * exclude_{user,kernel,hv} with each other and any previously * added counters. */ -static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) +static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], + int n_prev, int n_new) { - int eu, ek, eh; - int i, n; + int eu = 0, ek = 0, eh = 0; + int i, n, first; struct perf_counter *counter; n = n_prev + n_new; if (n <= 1) return 0; - eu = ctrs[0]->hw_event.exclude_user; - ek = ctrs[0]->hw_event.exclude_kernel; - eh = ctrs[0]->hw_event.exclude_hv; - if (n_prev == 0) - n_prev = 1; - for (i = n_prev; i < n; ++i) { + first = 1; + for (i = 0; i < n; ++i) { + if (cflags[i] & PPMU_LIMITED_PMC_OK) { + cflags[i] &= ~PPMU_LIMITED_PMC_REQD; + continue; + } counter = ctrs[i]; - if (counter->hw_event.exclude_user != eu || - counter->hw_event.exclude_kernel != ek || - counter->hw_event.exclude_hv != eh) + if (first) { + eu = counter->hw_event.exclude_user; + ek = counter->hw_event.exclude_kernel; + eh = counter->hw_event.exclude_hv; + first = 0; + } else if (counter->hw_event.exclude_user != eu || + counter->hw_event.exclude_kernel != ek || + counter->hw_event.exclude_hv != eh) { return -EAGAIN; + } } + + if (eu || ek || eh) + for (i = 0; i < n; ++i) + if (cflags[i] & PPMU_LIMITED_PMC_OK) + cflags[i] |= PPMU_LIMITED_PMC_REQD; + return 0; } @@ -279,6 +303,85 @@ static void power_pmu_read(struct perf_counter *counter) atomic64_sub(delta, &counter->hw.period_left); } +/* + * On some machines, PMC5 and PMC6 can't be written, don't respect + * the freeze conditions, and don't generate interrupts. This tells + * us if `counter' is using such a PMC. + */ +static int is_limited_pmc(int pmcnum) +{ + return ppmu->limited_pmc5_6 && (pmcnum == 5 || pmcnum == 6); +} + +static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_counter *counter; + u64 val, prev, delta; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + counter = cpuhw->limited_counter[i]; + if (!counter->hw.idx) + continue; + val = (counter->hw.idx == 5) ? pmc5 : pmc6; + prev = atomic64_read(&counter->hw.prev_count); + counter->hw.idx = 0; + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &counter->count); + } +} + +static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_counter *counter; + u64 val; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + counter = cpuhw->limited_counter[i]; + counter->hw.idx = cpuhw->limited_hwidx[i]; + val = (counter->hw.idx == 5) ? pmc5 : pmc6; + atomic64_set(&counter->hw.prev_count, val); + perf_counter_update_userpage(counter); + } +} + +/* + * Since limited counters don't respect the freeze conditions, we + * have to read them immediately after freezing or unfreezing the + * other counters. We try to keep the values from the limited + * counters as consistent as possible by keeping the delay (in + * cycles and instructions) between freezing/unfreezing and reading + * the limited counters as small and consistent as possible. + * Therefore, if any limited counters are in use, we read them + * both, and always in the same order, to minimize variability, + * and do it inside the same asm that writes MMCR0. + */ +static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) +{ + unsigned long pmc5, pmc6; + + if (!cpuhw->n_limited) { + mtspr(SPRN_MMCR0, mmcr0); + return; + } + + /* + * Write MMCR0, then read PMC5 and PMC6 immediately. + */ + asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" + : "=&r" (pmc5), "=&r" (pmc6) + : "r" (mmcr0), "i" (SPRN_MMCR0), + "i" (SPRN_PMC5), "i" (SPRN_PMC6)); + + if (mmcr0 & MMCR0_FC) + freeze_limited_counters(cpuhw, pmc5, pmc6); + else + thaw_limited_counters(cpuhw, pmc5, pmc6); +} + /* * Disable all counters to prevent PMU interrupts and to allow * counters to be added or removed. @@ -321,7 +424,7 @@ u64 hw_perf_save_disable(void) * executed and the PMU has frozen the counters * before we return. */ - mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); + write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); mb(); } local_irq_restore(flags); @@ -342,6 +445,8 @@ void hw_perf_restore(u64 disable) unsigned long val; s64 left; unsigned int hwc_index[MAX_HWCOUNTERS]; + int n_lim; + int idx; if (disable) return; @@ -414,10 +519,18 @@ void hw_perf_restore(u64 disable) /* * Initialize the PMCs for all the new and moved counters. */ + cpuhw->n_limited = n_lim = 0; for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter->hw.idx) continue; + idx = hwc_index[i] + 1; + if (is_limited_pmc(idx)) { + cpuhw->limited_counter[n_lim] = counter; + cpuhw->limited_hwidx[n_lim] = idx; + ++n_lim; + continue; + } val = 0; if (counter->hw_event.irq_period) { left = atomic64_read(&counter->hw.period_left); @@ -425,15 +538,16 @@ void hw_perf_restore(u64 disable) val = 0x80000000L - left; } atomic64_set(&counter->hw.prev_count, val); - counter->hw.idx = hwc_index[i] + 1; - write_pmc(counter->hw.idx, val); + counter->hw.idx = idx; + write_pmc(idx, val); perf_counter_update_userpage(counter); } + cpuhw->n_limited = n_lim; cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; out_enable: mb(); - mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, cpuhw->mmcr[0]); /* * Enable instruction sampling if necessary @@ -448,7 +562,8 @@ void hw_perf_restore(u64 disable) } static int collect_events(struct perf_counter *group, int max_count, - struct perf_counter *ctrs[], unsigned int *events) + struct perf_counter *ctrs[], unsigned int *events, + unsigned int *flags) { int n = 0; struct perf_counter *counter; @@ -457,6 +572,7 @@ static int collect_events(struct perf_counter *group, int max_count, if (n >= max_count) return -1; ctrs[n] = group; + flags[n] = group->hw.counter_base; events[n++] = group->hw.config; } list_for_each_entry(counter, &group->sibling_list, list_entry) { @@ -465,6 +581,7 @@ static int collect_events(struct perf_counter *group, int max_count, if (n >= max_count) return -1; ctrs[n] = counter; + flags[n] = counter->hw.counter_base; events[n++] = counter->hw.config; } } @@ -497,12 +614,14 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, cpuhw = &__get_cpu_var(cpu_hw_counters); n0 = cpuhw->n_counters; n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->counter[n0], &cpuhw->events[n0]); + &cpuhw->counter[n0], &cpuhw->events[n0], + &cpuhw->flags[n0]); if (n < 0) return -EAGAIN; - if (check_excludes(cpuhw->counter, n0, n)) + if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) return -EAGAIN; - if (power_check_constraints(cpuhw->events, n + n0)) + i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); + if (i < 0) return -EAGAIN; cpuhw->n_counters = n0 + n; cpuhw->n_added += n; @@ -554,9 +673,10 @@ static int power_pmu_enable(struct perf_counter *counter) goto out; cpuhw->counter[n0] = counter; cpuhw->events[n0] = counter->hw.config; - if (check_excludes(cpuhw->counter, n0, 1)) + cpuhw->flags[n0] = counter->hw.counter_base; + if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) goto out; - if (power_check_constraints(cpuhw->events, n0 + 1)) + if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) goto out; counter->hw.config = cpuhw->events[n0]; @@ -592,12 +712,24 @@ static void power_pmu_disable(struct perf_counter *counter) cpuhw->counter[i-1] = cpuhw->counter[i]; --cpuhw->n_counters; ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; + if (counter->hw.idx) { + write_pmc(counter->hw.idx, 0); + counter->hw.idx = 0; + } perf_counter_update_userpage(counter); break; } } + for (i = 0; i < cpuhw->n_limited; ++i) + if (counter == cpuhw->limited_counter[i]) + break; + if (i < cpuhw->n_limited) { + while (++i < cpuhw->n_limited) { + cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; + cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; + } + --cpuhw->n_limited; + } if (cpuhw->n_counters == 0) { /* disable exceptions if no counters are running */ cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); @@ -613,6 +745,61 @@ struct pmu power_pmu = { .read = power_pmu_read, }; +/* + * Return 1 if we might be able to put counter on a limited PMC, + * or 0 if not. + * A counter can only go on a limited PMC if it counts something + * that a limited PMC can count, doesn't require interrupts, and + * doesn't exclude any processor mode. + */ +static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev, + unsigned int flags) +{ + int n; + unsigned int alt[MAX_EVENT_ALTERNATIVES]; + + if (counter->hw_event.exclude_user + || counter->hw_event.exclude_kernel + || counter->hw_event.exclude_hv + || counter->hw_event.irq_period) + return 0; + + if (ppmu->limited_pmc_event(ev)) + return 1; + + /* + * The requested event isn't on a limited PMC already; + * see if any alternative code goes on a limited PMC. + */ + if (!ppmu->get_alternatives) + return 0; + + flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; + n = ppmu->get_alternatives(ev, flags, alt); + if (n) + return alt[0]; + + return 0; +} + +/* + * Find an alternative event that goes on a normal PMC, if possible, + * and return the event code, or 0 if there is no such alternative. + * (Note: event code 0 is "don't count" on all machines.) + */ +static unsigned long normal_pmc_alternative(unsigned long ev, + unsigned long flags) +{ + unsigned int alt[MAX_EVENT_ALTERNATIVES]; + int n; + + flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); + n = ppmu->get_alternatives(ev, flags, alt); + if (!n) + return 0; + return alt[0]; +} + /* Number of perf_counters counting hardware events */ static atomic_t num_counters; /* Used to avoid races in calling reserve/release_pmc_hardware */ @@ -633,9 +820,10 @@ static void hw_perf_counter_destroy(struct perf_counter *counter) const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { - unsigned long ev; + unsigned long ev, flags; struct perf_counter *ctrs[MAX_HWCOUNTERS]; unsigned int events[MAX_HWCOUNTERS]; + unsigned int cflags[MAX_HWCOUNTERS]; int n; int err; @@ -661,7 +849,36 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) */ if (!firmware_has_feature(FW_FEATURE_LPAR)) counter->hw_event.exclude_hv = 0; - + + /* + * If this is a per-task counter, then we can use + * PM_RUN_* events interchangeably with their non RUN_* + * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. + * XXX we should check if the task is an idle task. + */ + flags = 0; + if (counter->ctx->task) + flags |= PPMU_ONLY_COUNT_RUN; + + /* + * If this machine has limited counters, check whether this + * event could go on a limited counter. + */ + if (ppmu->limited_pmc5_6) { + if (can_go_on_limited_pmc(counter, ev, flags)) { + flags |= PPMU_LIMITED_PMC_OK; + } else if (ppmu->limited_pmc_event(ev)) { + /* + * The requested event is on a limited PMC, + * but we can't use a limited PMC; see if any + * alternative goes on a normal PMC. + */ + ev = normal_pmc_alternative(ev, flags); + if (!ev) + return ERR_PTR(-EINVAL); + } + } + /* * If this is in a group, check if it can go on with all the * other hardware counters in the group. We assume the counter @@ -670,18 +887,20 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) n = 0; if (counter->group_leader != counter) { n = collect_events(counter->group_leader, ppmu->n_counter - 1, - ctrs, events); + ctrs, events, cflags); if (n < 0) return ERR_PTR(-EINVAL); } events[n] = ev; ctrs[n] = counter; - if (check_excludes(ctrs, n, 1)) + cflags[n] = flags; + if (check_excludes(ctrs, cflags, n, 1)) return ERR_PTR(-EINVAL); - if (power_check_constraints(events, n + 1)) + if (power_check_constraints(events, cflags, n + 1)) return ERR_PTR(-EINVAL); counter->hw.config = events[n]; + counter->hw.counter_base = cflags[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); /* @@ -763,6 +982,10 @@ static void perf_counter_interrupt(struct pt_regs *regs) int found = 0; int nmi; + if (cpuhw->n_limited) + freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), + mfspr(SPRN_PMC6)); + /* * If interrupts were soft-disabled when this PMU interrupt * occurred, treat it as an NMI. @@ -775,6 +998,8 @@ static void perf_counter_interrupt(struct pt_regs *regs) for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; + if (is_limited_pmc(counter->hw.idx)) + continue; val = read_pmc(counter->hw.idx); if ((int)val < 0) { /* counter has overflowed */ @@ -791,6 +1016,8 @@ static void perf_counter_interrupt(struct pt_regs *regs) */ if (!found) { for (i = 0; i < ppmu->n_counter; ++i) { + if (is_limited_pmc(i + 1)) + continue; val = read_pmc(i + 1); if ((int)val < 0) write_pmc(i + 1, 0); @@ -804,7 +1031,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) * XXX might want to use MSR.PM to keep the counters frozen until * we get back out of this interrupt. */ - mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, cpuhw->mmcr[0]); if (nmi) nmi_exit(); diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 1407b19ab61..744a2756958 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -320,7 +320,8 @@ static unsigned int ppc_inst_cmpl[] = { 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 }; -static int p4_get_alternatives(unsigned int event, unsigned int alt[]) +static int p4_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { int i, j, na; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 1222c8ea3c2..8154eaa2404 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -78,8 +78,8 @@ * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 - * [ ><><>< ><> <><>[ > < >< >< >< ><><><><> - * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P4P3P2P1 + * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> + * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 * * NC - number of counters * 51: NC error 0x0008_0000_0000_0000 @@ -105,18 +105,18 @@ * 30: IDU|GRS events needed 0x00_4000_0000 * * B0 - * 20-23: Byte 0 event source 0x00f0_0000 + * 24-27: Byte 0 event source 0x0f00_0000 * Encoding as for the event code * * B1, B2, B3 - * 16-19, 12-15, 8-11: Byte 1, 2, 3 event sources + * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources * - * P4 - * 7: P1 error 0x80 - * 6-7: Count of events needing PMC4 + * P6 + * 11: P6 error 0x800 + * 10-11: Count of events needing PMC6 * - * P1..P3 - * 0-6: Count of events needing PMC1..PMC3 + * P1..P5 + * 0-9: Count of events needing PMC1..PMC5 */ static const int grsel_shift[8] = { @@ -143,11 +143,13 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { - if (pmc > 4) + if (pmc > 6) return -1; sh = (pmc - 1) * 2; mask |= 2 << sh; value |= 1 << sh; + if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) + return -1; } if (event & PM_BUSEVENT_MSK) { unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; @@ -173,16 +175,26 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; } /* Set byte lane select field */ - mask |= 0xfULL << (20 - 4 * byte); - value |= (u64)unit << (20 - 4 * byte); + mask |= 0xfULL << (24 - 4 * byte); + value |= (u64)unit << (24 - 4 * byte); + } + if (pmc < 5) { + /* need a counter from PMC1-4 set */ + mask |= 0x8000000000000ull; + value |= 0x1000000000000ull; } - mask |= 0x8000000000000ull; - value |= 0x1000000000000ull; *maskp = mask; *valp = value; return 0; } +static int power5p_limited_pmc_event(unsigned int event) +{ + int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + + return pmc == 5 || pmc == 6; +} + #define MAX_ALT 3 /* at most 3 alternatives for any event */ static const unsigned int event_alternatives[][MAX_ALT] = { @@ -193,6 +205,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = { { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ + { 0x100005, 0x600005 }, /* PM_RUN_CYC */ { 0x100009, 0x200009 }, /* PM_INST_CMPL */ { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ { 0x300009, 0x400009 }, /* PM_INST_DISP */ @@ -260,24 +273,85 @@ static int find_alternative_bdecode(unsigned int event) return -1; } -static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) +static int power5p_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { int i, j, ae, nalt = 1; + int nlim; alt[0] = event; nalt = 1; + nlim = power5p_limited_pmc_event(event); i = find_alternative(event); if (i >= 0) { for (j = 0; j < MAX_ALT; ++j) { ae = event_alternatives[i][j]; if (ae && ae != event) alt[nalt++] = ae; + nlim += power5p_limited_pmc_event(ae); } } else { ae = find_alternative_bdecode(event); if (ae > 0) alt[nalt++] = ae; } + + if (flags & PPMU_ONLY_COUNT_RUN) { + /* + * We're only counting in RUN state, + * so PM_CYC is equivalent to PM_RUN_CYC + * and PM_INST_CMPL === PM_RUN_INST_CMPL. + * This doesn't include alternatives that don't provide + * any extra flexibility in assigning PMCs (e.g. + * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). + * Note that even with these additional alternatives + * we never end up with more than 3 alternatives for any event. + */ + j = nalt; + for (i = 0; i < nalt; ++i) { + switch (alt[i]) { + case 0xf: /* PM_CYC */ + alt[j++] = 0x600005; /* PM_RUN_CYC */ + ++nlim; + break; + case 0x600005: /* PM_RUN_CYC */ + alt[j++] = 0xf; + break; + case 0x100009: /* PM_INST_CMPL */ + alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ + ++nlim; + break; + case 0x500009: /* PM_RUN_INST_CMPL */ + alt[j++] = 0x100009; /* PM_INST_CMPL */ + alt[j++] = 0x200009; + break; + } + } + nalt = j; + } + + if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { + /* remove the limited PMC events */ + j = 0; + for (i = 0; i < nalt; ++i) { + if (!power5p_limited_pmc_event(alt[i])) { + alt[j] = alt[i]; + ++j; + } + } + nalt = j; + } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { + /* remove all but the limited PMC events */ + j = 0; + for (i = 0; i < nalt; ++i) { + if (power5p_limited_pmc_event(alt[i])) { + alt[j] = alt[i]; + ++j; + } + } + nalt = j; + } + return nalt; } @@ -390,7 +464,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, unsigned char unituse[16]; int ttmuse; - if (n_ev > 4) + if (n_ev > 6) return -1; /* First pass to count resource use */ @@ -399,7 +473,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, for (i = 0; i < n_ev; ++i) { pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { - if (pmc > 4) + if (pmc > 6) return -1; if (pmc_inuse & (1 << (pmc - 1))) return -1; @@ -488,13 +562,16 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, if (pmc >= 4) return -1; pmc_inuse |= 1 << pmc; - } else { + } else if (pmc <= 4) { /* Direct event */ --pmc; if (isbus && (byte & 2) && (psel == 8 || psel == 0x10 || psel == 0x28)) /* add events on higher-numbered bus */ mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + } else { + /* Instructions or run cycles on PMC5/6 */ + --pmc; } if (isbus && unit == PM_GRS) { bit = psel & 7; @@ -538,7 +615,7 @@ static int power5p_generic_events[] = { }; struct power_pmu power5p_pmu = { - .n_counter = 4, + .n_counter = 6, .max_alternatives = MAX_ALT, .add_fields = 0x7000000000055ull, .test_adder = 0x3000040000000ull, @@ -548,4 +625,6 @@ struct power_pmu power5p_pmu = { .disable_pmc = power5p_disable_pmc, .n_generic = ARRAY_SIZE(power5p_generic_events), .generic_events = power5p_generic_events, + .limited_pmc5_6 = 1, + .limited_pmc_event = power5p_limited_pmc_event, }; diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 116c4bb1809..6e667dc8647 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -269,7 +269,8 @@ static int find_alternative_bdecode(unsigned int event) return -1; } -static int power5_get_alternatives(unsigned int event, unsigned int alt[]) +static int power5_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { int i, j, ae, nalt = 1; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index fce1fc290a1..d44049f0ae2 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -182,7 +182,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, unsigned int ttmset = 0; unsigned int pmc_inuse = 0; - if (n_ev > 4) + if (n_ev > 6) return -1; for (i = 0; i < n_ev; ++i) { pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; @@ -202,6 +202,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, for (pmc = 0; pmc < 4; ++pmc) if (!(pmc_inuse & (1 << pmc))) break; + if (pmc >= 4) + return -1; pmc_inuse |= 1 << pmc; } hwc[i] = pmc; @@ -240,7 +242,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, } if (power6_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; - mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); + if (pmc < 4) + mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); } mmcr[0] = 0; if (pmc_inuse & 1) @@ -256,19 +259,20 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, * Layout of constraint bits: * * 0-1 add field: number of uses of PMC1 (max 1) - * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4 - * 8-10 select field: nest (subunit) event selector + * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6 + * 12-15 add field: number of uses of PMC1-4 (max 4) * 16-19 select field: unit on byte 0 of event bus * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 + * 32-34 select field: nest (subunit) event selector */ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) { - int pmc, byte, sh; - unsigned int mask = 0, value = 0; + int pmc, byte, sh, subunit; + u64 mask = 0, value = 0; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { - if (pmc > 4) + if (pmc > 4 && !(event == 0x500009 || event == 0x600005)) return -1; sh = (pmc - 1) * 2; mask |= 2 << sh; @@ -276,26 +280,38 @@ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) } if (event & PM_BUSEVENT_MSK) { byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - sh = byte * 4; + sh = byte * 4 + (16 - PM_UNIT_SH); mask |= PM_UNIT_MSKS << sh; - value |= (event & PM_UNIT_MSKS) << sh; + value |= (u64)(event & PM_UNIT_MSKS) << sh; if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { - mask |= PM_SUBUNIT_MSKS; - value |= event & PM_SUBUNIT_MSKS; + subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; + mask |= (u64)PM_SUBUNIT_MSK << 32; + value |= (u64)subunit << 32; } } + if (pmc <= 4) { + mask |= 0x8000; /* add field for count of PMC1-4 uses */ + value |= 0x1000; + } *maskp = mask; *valp = value; return 0; } +static int p6_limited_pmc_event(unsigned int event) +{ + int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + + return pmc == 5 || pmc == 6; +} + #define MAX_ALT 4 /* at most 4 alternatives for any event */ static const unsigned int event_alternatives[][MAX_ALT] = { { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ - { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */ + { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */ { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ { 0x10000e, 0x400010 }, /* PM_PURR */ { 0x100010, 0x4000f8 }, /* PM_FLUSH */ @@ -340,13 +356,15 @@ static int find_alternatives_list(unsigned int event) return -1; } -static int p6_get_alternatives(unsigned int event, unsigned int alt[]) +static int p6_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { - int i, j; + int i, j, nlim; unsigned int aevent, psel, pmc; unsigned int nalt = 1; alt[0] = event; + nlim = p6_limited_pmc_event(event); /* check the alternatives table */ i = find_alternatives_list(event); @@ -358,6 +376,7 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[]) break; if (aevent != event) alt[nalt++] = aevent; + nlim += p6_limited_pmc_event(aevent); } } else { @@ -375,13 +394,75 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[]) ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); } + if (flags & PPMU_ONLY_COUNT_RUN) { + /* + * We're only counting in RUN state, + * so PM_CYC is equivalent to PM_RUN_CYC, + * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR. + * This doesn't include alternatives that don't provide + * any extra flexibility in assigning PMCs (e.g. + * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC). + * Note that even with these additional alternatives + * we never end up with more than 4 alternatives for any event. + */ + j = nalt; + for (i = 0; i < nalt; ++i) { + switch (alt[i]) { + case 0x1e: /* PM_CYC */ + alt[j++] = 0x600005; /* PM_RUN_CYC */ + ++nlim; + break; + case 0x10000a: /* PM_RUN_CYC */ + alt[j++] = 0x1e; /* PM_CYC */ + break; + case 2: /* PM_INST_CMPL */ + alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ + ++nlim; + break; + case 0x500009: /* PM_RUN_INST_CMPL */ + alt[j++] = 2; /* PM_INST_CMPL */ + break; + case 0x10000e: /* PM_PURR */ + alt[j++] = 0x4000f4; /* PM_RUN_PURR */ + break; + case 0x4000f4: /* PM_RUN_PURR */ + alt[j++] = 0x10000e; /* PM_PURR */ + break; + } + } + nalt = j; + } + + if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { + /* remove the limited PMC events */ + j = 0; + for (i = 0; i < nalt; ++i) { + if (!p6_limited_pmc_event(alt[i])) { + alt[j] = alt[i]; + ++j; + } + } + nalt = j; + } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { + /* remove all but the limited PMC events */ + j = 0; + for (i = 0; i < nalt; ++i) { + if (p6_limited_pmc_event(alt[i])) { + alt[j] = alt[i]; + ++j; + } + } + nalt = j; + } + return nalt; } static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) { /* Set PMCxSEL to 0 to disable PMCx */ - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + if (pmc <= 3) + mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power6_generic_events[] = { @@ -394,14 +475,16 @@ static int power6_generic_events[] = { }; struct power_pmu power6_pmu = { - .n_counter = 4, + .n_counter = 6, .max_alternatives = MAX_ALT, - .add_fields = 0x55, - .test_adder = 0, + .add_fields = 0x1555, + .test_adder = 0x3000, .compute_mmcr = p6_compute_mmcr, .get_constraint = p6_get_constraint, .get_alternatives = p6_get_alternatives, .disable_pmc = p6_disable_pmc, .n_generic = ARRAY_SIZE(power6_generic_events), .generic_events = power6_generic_events, + .limited_pmc5_6 = 1, + .limited_pmc_event = p6_limited_pmc_event, }; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index aed8ccd7c07..af2d1884058 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -243,7 +243,8 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) return 0; } -static int p970_get_alternatives(unsigned int event, unsigned int alt[]) +static int p970_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { alt[0] = event; -- cgit v1.2.3 From b2e5d8588de0b5341eddad87dbe48d2185eaa3dd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 6 May 2009 07:55:33 +0200 Subject: irq: change ->set_affinity() to return status, fix This build failure: arch/powerpc/sysdev/mpic.c:810: error: conflicting types for 'mpic_set_affinity' arch/powerpc/sysdev/mpic.h:39: error: previous declaration of 'mpic_set_affinity' was here make[2]: *** [arch/powerpc/sysdev/mpic.o] Error 1 make[2]: *** Waiting for unfinished jobs.... Triggers because the function prototype was not updated when the function call signature got changed by: d5dedd4: irq: change ->set_affinity() to return status [ Impact: build fix on powerpc ] Cc: Benjamin Herrenschmidt Cc: Yinghai Lu Cc: Andrew Morton Cc: Rusty Russell Cc: linux-arch@vger.kernel.org LKML-Reference: <49F654E9.4070809@kernel.org> Signed-off-by: Ingo Molnar --- arch/powerpc/sysdev/mpic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/mpic.h b/arch/powerpc/sysdev/mpic.h index 3cef2af10f4..eff433c322a 100644 --- a/arch/powerpc/sysdev/mpic.h +++ b/arch/powerpc/sysdev/mpic.h @@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic) extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type); extern void mpic_set_vector(unsigned int virq, unsigned int vector); -extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); +extern int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask); #endif /* _POWERPC_SYSDEV_MPIC_H */ -- cgit v1.2.3 From 9e35ad388bea89f7d6f375af4c0ae98803688666 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 13 May 2009 16:21:38 +0200 Subject: perf_counter: Rework the perf counter disable/enable The current disable/enable mechanism is: token = hw_perf_save_disable(); ... /* do bits */ ... hw_perf_restore(token); This works well, provided that the use nests properly. Except we don't. x86 NMI/INT throttling has non-nested use of this, breaking things. Therefore provide a reference counter disable/enable interface, where the first disable disables the hardware, and the last enable enables the hardware again. [ Impact: refactor, simplify the PMU disable/enable logic ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 15cdc8e6722..bb1b463c136 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -386,7 +386,7 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) * Disable all counters to prevent PMU interrupts and to allow * counters to be added or removed. */ -u64 hw_perf_save_disable(void) +void hw_perf_disable(void) { struct cpu_hw_counters *cpuhw; unsigned long ret; @@ -428,7 +428,6 @@ u64 hw_perf_save_disable(void) mb(); } local_irq_restore(flags); - return ret; } /* @@ -436,7 +435,7 @@ u64 hw_perf_save_disable(void) * If we were previously disabled and counters were added, then * put the new config on the PMU. */ -void hw_perf_restore(u64 disable) +void hw_perf_enable(void) { struct perf_counter *counter; struct cpu_hw_counters *cpuhw; @@ -448,9 +447,12 @@ void hw_perf_restore(u64 disable) int n_lim; int idx; - if (disable) - return; local_irq_save(flags); + if (!cpuhw->disabled) { + local_irq_restore(flags); + return; + } + cpuhw = &__get_cpu_var(cpu_hw_counters); cpuhw->disabled = 0; @@ -649,19 +651,18 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, /* * Add a counter to the PMU. * If all counters are not already frozen, then we disable and - * re-enable the PMU in order to get hw_perf_restore to do the + * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ static int power_pmu_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; unsigned long flags; - u64 pmudis; int n0; int ret = -EAGAIN; local_irq_save(flags); - pmudis = hw_perf_save_disable(); + perf_disable(); /* * Add the counter to the list (if there is room) @@ -685,7 +686,7 @@ static int power_pmu_enable(struct perf_counter *counter) ret = 0; out: - hw_perf_restore(pmudis); + perf_enable(); local_irq_restore(flags); return ret; } @@ -697,11 +698,10 @@ static void power_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; long i; - u64 pmudis; unsigned long flags; local_irq_save(flags); - pmudis = hw_perf_save_disable(); + perf_disable(); power_pmu_read(counter); @@ -735,7 +735,7 @@ static void power_pmu_disable(struct perf_counter *counter) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - hw_perf_restore(pmudis); + perf_enable(); local_irq_restore(flags); } -- cgit v1.2.3 From 60db5e09c13109b13830cc9dcae688003fd39e79 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 15 May 2009 15:19:28 +0200 Subject: perf_counter: frequency based adaptive irq_period Instead of specifying the irq_period for a counter, provide a target interrupt frequency and dynamically adapt the irq_period to match this frequency. [ Impact: new perf-counter attribute/feature ] Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <20090515132018.646195868@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bb1b463c136..db8d5cafc15 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -534,7 +534,7 @@ void hw_perf_enable(void) continue; } val = 0; - if (counter->hw_event.irq_period) { + if (counter->hw.irq_period) { left = atomic64_read(&counter->hw.period_left); if (left < 0x80000000L) val = 0x80000000L - left; @@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) if (!ppmu) return ERR_PTR(-ENXIO); - if ((s64)counter->hw_event.irq_period < 0) - return ERR_PTR(-EINVAL); if (!perf_event_raw(&counter->hw_event)) { ev = perf_event_id(&counter->hw_event); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) @@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) counter->hw.config = events[n]; counter->hw.counter_base = cflags[n]; - atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); + atomic64_set(&counter->hw.period_left, counter->hw.irq_period); /* * See if we need to reserve the PMU. @@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) static void record_and_restart(struct perf_counter *counter, long val, struct pt_regs *regs, int nmi) { + u64 period = counter->hw.irq_period; s64 prev, delta, left; int record = 0; @@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val, */ val = 0; left = atomic64_read(&counter->hw.period_left) - delta; - if (counter->hw_event.irq_period) { + if (period) { if (left <= 0) { - left += counter->hw_event.irq_period; + left += period; if (left <= 0) - left = counter->hw_event.irq_period; + left = period; record = 1; } if (left < 0x80000000L) -- cgit v1.2.3 From ef923214a4816c289e4af2d67a9ebb1a31e4ac61 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 14 May 2009 13:29:14 +1000 Subject: perf_counter: powerpc: use u64 for event codes internally Although the perf_counter API allows 63-bit raw event codes, internally in the powerpc back-end we had been using 32-bit event codes. This expands them to 64 bits so that we can add bits for specifying threshold start/stop events and instruction sampling modes later. This also corrects the return value of can_go_on_limited_pmc; we were returning an event code rather than just a 0/1 value in some circumstances. That didn't particularly matter while event codes were 32-bit, but now that event codes are 64-bit it might, so this fixes it. [ Impact: extend PowerPC perfcounter interfaces from u32 to u64 ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <18955.36874.472452.353104@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 10 +++++----- arch/powerpc/kernel/perf_counter.c | 26 ++++++++++++-------------- arch/powerpc/kernel/power4-pmu.c | 9 ++++----- arch/powerpc/kernel/power5+-pmu.c | 14 +++++++------- arch/powerpc/kernel/power5-pmu.c | 16 ++++++++-------- arch/powerpc/kernel/power6-pmu.c | 16 ++++++++-------- arch/powerpc/kernel/ppc970-pmu.c | 9 ++++----- 7 files changed, 48 insertions(+), 52 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 56d66c38143..ceea76a48e3 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -23,13 +23,13 @@ struct power_pmu { int max_alternatives; u64 add_fields; u64 test_adder; - int (*compute_mmcr)(unsigned int events[], int n_ev, + int (*compute_mmcr)(u64 events[], int n_ev, unsigned int hwc[], u64 mmcr[]); - int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); - int (*get_alternatives)(unsigned int event, unsigned int flags, - unsigned int alt[]); + int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); + int (*get_alternatives)(u64 event, unsigned int flags, + u64 alt[]); void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); - int (*limited_pmc_event)(unsigned int event); + int (*limited_pmc_event)(u64 event); int limited_pmc5_6; /* PMC5 and PMC6 have limited function */ int n_generic; int *generic_events; diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index db8d5cafc15..8d4cafc84b8 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -26,7 +26,7 @@ struct cpu_hw_counters { int n_limited; u8 pmcs_enabled; struct perf_counter *counter[MAX_HWCOUNTERS]; - unsigned int events[MAX_HWCOUNTERS]; + u64 events[MAX_HWCOUNTERS]; unsigned int flags[MAX_HWCOUNTERS]; u64 mmcr[3]; struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; @@ -131,11 +131,11 @@ static void write_pmc(int idx, unsigned long val) * and see if any combination of alternative codes is feasible. * The feasible set is returned in event[]. */ -static int power_check_constraints(unsigned int event[], unsigned int cflags[], +static int power_check_constraints(u64 event[], unsigned int cflags[], int n_ev) { u64 mask, value, nv; - unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; @@ -564,7 +564,7 @@ void hw_perf_enable(void) } static int collect_events(struct perf_counter *group, int max_count, - struct perf_counter *ctrs[], unsigned int *events, + struct perf_counter *ctrs[], u64 *events, unsigned int *flags) { int n = 0; @@ -752,11 +752,11 @@ struct pmu power_pmu = { * that a limited PMC can count, doesn't require interrupts, and * doesn't exclude any processor mode. */ -static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev, +static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, unsigned int flags) { int n; - unsigned int alt[MAX_EVENT_ALTERNATIVES]; + u64 alt[MAX_EVENT_ALTERNATIVES]; if (counter->hw_event.exclude_user || counter->hw_event.exclude_kernel @@ -776,10 +776,8 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev, flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; n = ppmu->get_alternatives(ev, flags, alt); - if (n) - return alt[0]; - return 0; + return n > 0; } /* @@ -787,10 +785,9 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev, * and return the event code, or 0 if there is no such alternative. * (Note: event code 0 is "don't count" on all machines.) */ -static unsigned long normal_pmc_alternative(unsigned long ev, - unsigned long flags) +static u64 normal_pmc_alternative(u64 ev, unsigned long flags) { - unsigned int alt[MAX_EVENT_ALTERNATIVES]; + u64 alt[MAX_EVENT_ALTERNATIVES]; int n; flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); @@ -820,9 +817,10 @@ static void hw_perf_counter_destroy(struct perf_counter *counter) const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { - unsigned long ev, flags; + u64 ev; + unsigned long flags; struct perf_counter *ctrs[MAX_HWCOUNTERS]; - unsigned int events[MAX_HWCOUNTERS]; + u64 events[MAX_HWCOUNTERS]; unsigned int cflags[MAX_HWCOUNTERS]; int n; int err; diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 744a2756958..836fa118eb1 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -213,7 +213,7 @@ static unsigned char direct_marked_event[8] = { * Returns 1 if event counts things relating to marked instructions * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. */ -static int p4_marked_instr_event(unsigned int event) +static int p4_marked_instr_event(u64 event) { int pmc, psel, unit, byte, bit; unsigned int mask; @@ -249,7 +249,7 @@ static int p4_marked_instr_event(unsigned int event) return (mask >> (byte * 8 + bit)) & 1; } -static int p4_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) { int pmc, byte, unit, lower, sh; u64 mask = 0, value = 0; @@ -320,8 +320,7 @@ static unsigned int ppc_inst_cmpl[] = { 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 }; -static int p4_get_alternatives(unsigned int event, unsigned int flags, - unsigned int alt[]) +static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { int i, j, na; @@ -353,7 +352,7 @@ static int p4_get_alternatives(unsigned int event, unsigned int flags, return na; } -static int p4_compute_mmcr(unsigned int event[], int n_ev, +static int p4_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 8154eaa2404..3ac0654372a 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -135,7 +135,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, }; -static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) { int pmc, byte, unit, sh; int bit, fmask; @@ -188,7 +188,7 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) return 0; } -static int power5p_limited_pmc_event(unsigned int event) +static int power5p_limited_pmc_event(u64 event) { int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; @@ -273,11 +273,11 @@ static int find_alternative_bdecode(unsigned int event) return -1; } -static int power5p_get_alternatives(unsigned int event, unsigned int flags, - unsigned int alt[]) +static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { - int i, j, ae, nalt = 1; + int i, j, nalt = 1; int nlim; + u64 ae; alt[0] = event; nalt = 1; @@ -402,7 +402,7 @@ static unsigned char direct_event_is_marked[0x28] = { * Returns 1 if event counts things relating to marked instructions * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. */ -static int power5p_marked_instr_event(unsigned int event) +static int power5p_marked_instr_event(u64 event) { int pmc, psel; int bit, byte, unit; @@ -451,7 +451,7 @@ static int power5p_marked_instr_event(unsigned int event) return (mask >> (byte * 8 + bit)) & 1; } -static int power5p_compute_mmcr(unsigned int event[], int n_ev, +static int power5p_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 6e667dc8647..d5344968ee9 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -139,7 +139,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, }; -static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) { int pmc, byte, unit, sh; int bit, fmask; @@ -224,7 +224,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = { * Scan the alternatives table for a match and return the * index into the alternatives table if found, else -1. */ -static int find_alternative(unsigned int event) +static int find_alternative(u64 event) { int i, j; @@ -250,7 +250,7 @@ static const unsigned char bytedecode_alternatives[4][4] = { * PMCSEL values on other counters. This returns the alternative * event code for those that do, or -1 otherwise. */ -static int find_alternative_bdecode(unsigned int event) +static u64 find_alternative_bdecode(u64 event) { int pmc, altpmc, pp, j; @@ -269,10 +269,10 @@ static int find_alternative_bdecode(unsigned int event) return -1; } -static int power5_get_alternatives(unsigned int event, unsigned int flags, - unsigned int alt[]) +static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { - int i, j, ae, nalt = 1; + int i, j, nalt = 1; + u64 ae; alt[0] = event; nalt = 1; @@ -338,7 +338,7 @@ static unsigned char direct_event_is_marked[0x28] = { * Returns 1 if event counts things relating to marked instructions * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. */ -static int power5_marked_instr_event(unsigned int event) +static int power5_marked_instr_event(u64 event) { int pmc, psel; int bit, byte, unit; @@ -382,7 +382,7 @@ static int power5_marked_instr_event(unsigned int event) return (mask >> (byte * 8 + bit)) & 1; } -static int power5_compute_mmcr(unsigned int event[], int n_ev, +static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index d44049f0ae2..ab7c615c458 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -134,7 +134,7 @@ static u32 marked_bus_events[16] = { * Returns 1 if event counts things relating to marked instructions * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. */ -static int power6_marked_instr_event(unsigned int event) +static int power6_marked_instr_event(u64 event) { int pmc, psel, ptype; int bit, byte, unit; @@ -172,7 +172,7 @@ static int power6_marked_instr_event(unsigned int event) /* * Assign PMC numbers and compute MMCR1 value for a set of events */ -static int p6_compute_mmcr(unsigned int event[], int n_ev, +static int p6_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; @@ -265,7 +265,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 * 32-34 select field: nest (subunit) event selector */ -static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) { int pmc, byte, sh, subunit; u64 mask = 0, value = 0; @@ -298,7 +298,7 @@ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) return 0; } -static int p6_limited_pmc_event(unsigned int event) +static int p6_limited_pmc_event(u64 event) { int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; @@ -337,7 +337,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = { * This could be made more efficient with a binary search on * a presorted list, if necessary */ -static int find_alternatives_list(unsigned int event) +static int find_alternatives_list(u64 event) { int i, j; unsigned int alt; @@ -356,12 +356,12 @@ static int find_alternatives_list(unsigned int event) return -1; } -static int p6_get_alternatives(unsigned int event, unsigned int flags, - unsigned int alt[]) +static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { int i, j, nlim; - unsigned int aevent, psel, pmc; + unsigned int psel, pmc; unsigned int nalt = 1; + u64 aevent; alt[0] = event; nlim = p6_limited_pmc_event(event); diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index af2d1884058..eed47c4523f 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -147,7 +147,7 @@ static unsigned char direct_marked_event[8] = { * Returns 1 if event counts things relating to marked instructions * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. */ -static int p970_marked_instr_event(unsigned int event) +static int p970_marked_instr_event(u64 event) { int pmc, psel, unit, byte, bit; unsigned int mask; @@ -192,7 +192,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, }; -static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) { int pmc, byte, unit, sh, spcsel; u64 mask = 0, value = 0; @@ -243,8 +243,7 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) return 0; } -static int p970_get_alternatives(unsigned int event, unsigned int flags, - unsigned int alt[]) +static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { alt[0] = event; @@ -257,7 +256,7 @@ static int p970_get_alternatives(unsigned int event, unsigned int flags, return 1; } -static int p970_compute_mmcr(unsigned int event[], int n_ev, +static int p970_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; -- cgit v1.2.3 From 0bbd0d4be8d5d3676c126e06e3c75c16def00441 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 14 May 2009 13:31:48 +1000 Subject: perf_counter: powerpc: supply more precise information on counter overflow events This uses values from the MMCRA, SIAR and SDAR registers on powerpc to supply more precise information for overflow events, including a data address when PERF_RECORD_ADDR is specified. Since POWER6 uses different bit positions in MMCRA from earlier processors, this converts the struct power_pmu limited_pmc5_6 field, which only had 0/1 values, into a flags field and defines bit values for its previous use (PPMU_LIMITED_PMC5_6) and a new flag (PPMU_ALT_SIPR) to indicate that the processor uses the POWER6 bit positions rather than the earlier positions. It also adds definitions in reg.h for the new and old positions of the bit that indicates that the SIAR and SDAR values come from the same instruction. For the data address, the SDAR value is supplied if we are not doing instruction sampling. In that case there is no guarantee that the address given in the PERF_RECORD_ADDR subrecord will correspond to the instruction whose address is given in the PERF_RECORD_IP subrecord. If instruction sampling is enabled (e.g. because this counter is counting a marked instruction event), then we only supply the SDAR value for the PERF_RECORD_ADDR subrecord if it corresponds to the instruction whose address is in the PERF_RECORD_IP subrecord. Otherwise we supply 0. [ Impact: support more PMU hardware features on PowerPC ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <18955.37028.48861.555309@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 14 +++++- arch/powerpc/include/asm/reg.h | 2 + arch/powerpc/kernel/perf_counter.c | 84 +++++++++++++++++++++++++++++++-- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- 5 files changed, 97 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index ceea76a48e3..1c60f0ca792 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -30,13 +30,19 @@ struct power_pmu { u64 alt[]); void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); int (*limited_pmc_event)(u64 event); - int limited_pmc5_6; /* PMC5 and PMC6 have limited function */ + u32 flags; int n_generic; int *generic_events; }; extern struct power_pmu *ppmu; +/* + * Values for power_pmu.flags + */ +#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ +#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ + /* * Values for flags to get_alternatives() */ @@ -44,6 +50,12 @@ extern struct power_pmu *ppmu; #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ +struct pt_regs; +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) + +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); + /* * The power_pmu.get_constraint function returns a 64-bit value and * a 64-bit mask that express the constraints between this event and diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e8018d540e8..fb359b0a693 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -492,11 +492,13 @@ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 #define SPRN_MMCRA 0x312 +#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ #define MMCRA_SLOT_SHIFT 24 #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ +#define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */ #define POWER6_MMCRA_SIHV 0x0000040000000000ULL #define POWER6_MMCRA_SIPR 0x0000020000000000ULL #define POWER6_MMCRA_THRM 0x00000020UL diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 8d4cafc84b8..6baae5a5c33 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -17,6 +17,7 @@ #include #include #include +#include struct cpu_hw_counters { int n_counters; @@ -310,7 +311,8 @@ static void power_pmu_read(struct perf_counter *counter) */ static int is_limited_pmc(int pmcnum) { - return ppmu->limited_pmc5_6 && (pmcnum == 5 || pmcnum == 6); + return (ppmu->flags & PPMU_LIMITED_PMC5_6) + && (pmcnum == 5 || pmcnum == 6); } static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, @@ -860,7 +862,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) * If this machine has limited counters, check whether this * event could go on a limited counter. */ - if (ppmu->limited_pmc5_6) { + if (ppmu->flags & PPMU_LIMITED_PMC5_6) { if (can_go_on_limited_pmc(counter, ev, flags)) { flags |= PPMU_LIMITED_PMC_OK; } else if (ppmu->limited_pmc_event(ev)) { @@ -933,6 +935,7 @@ static void record_and_restart(struct perf_counter *counter, long val, u64 period = counter->hw.irq_period; s64 prev, delta, left; int record = 0; + u64 addr, mmcra, sdsync; /* we don't have to worry about interrupts here */ prev = atomic64_read(&counter->hw.prev_count); @@ -963,8 +966,76 @@ static void record_and_restart(struct perf_counter *counter, long val, /* * Finally record data if requested. */ - if (record) - perf_counter_overflow(counter, nmi, regs, 0); + if (record) { + addr = 0; + if (counter->hw_event.record_type & PERF_RECORD_ADDR) { + /* + * The user wants a data address recorded. + * If we're not doing instruction sampling, + * give them the SDAR (sampled data address). + * If we are doing instruction sampling, then only + * give them the SDAR if it corresponds to the + * instruction pointed to by SIAR; this is indicated + * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA. + */ + mmcra = regs->dsisr; + sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? + POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + addr = mfspr(SPRN_SDAR); + } + perf_counter_overflow(counter, nmi, regs, addr); + } +} + +/* + * Called from generic code to get the misc flags (i.e. processor mode) + * for an event. + */ +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + unsigned long mmcra; + + if (TRAP(regs) != 0xf00) { + /* not a PMU interrupt */ + return user_mode(regs) ? PERF_EVENT_MISC_USER : + PERF_EVENT_MISC_KERNEL; + } + + mmcra = regs->dsisr; + if (ppmu->flags & PPMU_ALT_SIPR) { + if (mmcra & POWER6_MMCRA_SIHV) + return PERF_EVENT_MISC_HYPERVISOR; + return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER : + PERF_EVENT_MISC_KERNEL; + } + if (mmcra & MMCRA_SIHV) + return PERF_EVENT_MISC_HYPERVISOR; + return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : + PERF_EVENT_MISC_KERNEL; +} + +/* + * Called from generic code to get the instruction pointer + * for an event. + */ +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + unsigned long mmcra; + unsigned long ip; + unsigned long slot; + + if (TRAP(regs) != 0xf00) + return regs->nip; /* not a PMU interrupt */ + + ip = mfspr(SPRN_SIAR); + mmcra = regs->dsisr; + if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { + slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; + if (slot > 1) + ip += 4 * (slot - 1); + } + return ip; } /* @@ -983,6 +1054,11 @@ static void perf_counter_interrupt(struct pt_regs *regs) freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), mfspr(SPRN_PMC6)); + /* + * Overload regs->dsisr to store MMCRA so we only need to read it once. + */ + regs->dsisr = mfspr(SPRN_MMCRA); + /* * If interrupts were soft-disabled when this PMU interrupt * occurred, treat it as an NMI. diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 3ac0654372a..c6cdfc165d6 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -625,6 +625,6 @@ struct power_pmu power5p_pmu = { .disable_pmc = power5p_disable_pmc, .n_generic = ARRAY_SIZE(power5p_generic_events), .generic_events = power5p_generic_events, - .limited_pmc5_6 = 1, + .flags = PPMU_LIMITED_PMC5_6, .limited_pmc_event = power5p_limited_pmc_event, }; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index ab7c615c458..cd4fbe06c35 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -485,6 +485,6 @@ struct power_pmu power6_pmu = { .disable_pmc = p6_disable_pmc, .n_generic = ARRAY_SIZE(power6_generic_events), .generic_events = power6_generic_events, - .limited_pmc5_6 = 1, + .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, .limited_pmc_event = p6_limited_pmc_event, }; -- cgit v1.2.3 From af3e4aca47d2e05a545a5e10ba5c7193e0b665e0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 30 Apr 2009 10:59:19 +0000 Subject: powerpc: Do not assert pte_locked for hugepage PTE entries With CONFIG_DEBUG_VM, an assertion is made when changing the protection flags of a PTE that the PTE is locked. Huge pages use a different pagetable format and the assertion is bogus and will always trigger with a bug looking something like Unable to handle kernel paging request for data at address 0xf1a00235800006f8 Faulting instruction address: 0xc000000000034a80 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=32 NUMA Maple Modules linked in: dm_snapshot dm_mirror dm_region_hash dm_log dm_mod loop evdev ext3 jbd mbcache sg sd_mod ide_pci_generic pata_amd ata_generic ipr libata tg3 libphy scsi_mod windfarm_pid windfarm_smu_sat windfarm_max6690_sensor windfarm_lm75_sensor windfarm_cpufreq_clamp windfarm_core i2c_powermac NIP: c000000000034a80 LR: c000000000034b18 CTR: 0000000000000003 REGS: c000000003037600 TRAP: 0300 Not tainted (2.6.30-rc3-autokern1) MSR: 9000000000009032 CR: 28002484 XER: 200fffff DAR: f1a00235800006f8, DSISR: 0000000040010000 TASK = c0000002e54cc740[2960] 'map_high_trunca' THREAD: c000000003034000 CPU: 2 GPR00: 4000000000000000 c000000003037880 c000000000895d30 c0000002e5a2e500 GPR04: 00000000a0000000 c0000002edc40880 0000005700000393 0000000000000001 GPR08: f000000011ac0000 01a00235800006e8 00000000000000f5 f1a00235800006e8 GPR12: 0000000028000484 c0000000008dd780 0000000000001000 0000000000000000 GPR16: fffffffffffff000 0000000000000000 00000000a0000000 c000000003037a20 GPR20: c0000002e5f4ece8 0000000000001000 c0000002edc40880 0000000000000000 GPR24: c0000002e5f4ece8 0000000000000000 00000000a0000000 c0000002e5f4ece8 GPR28: 0000005700000393 c0000002e5a2e500 00000000a0000000 c000000003037880 NIP [c000000000034a80] .assert_pte_locked+0xa4/0xd0 LR [c000000000034b18] .ptep_set_access_flags+0x6c/0xb4 Call Trace: [c000000003037880] [c000000003037990] 0xc000000003037990 (unreliable) [c000000003037910] [c000000000034b18] .ptep_set_access_flags+0x6c/0xb4 [c0000000030379b0] [c00000000014bef8] .hugetlb_cow+0x124/0x674 [c000000003037b00] [c00000000014c930] .hugetlb_fault+0x4e8/0x6f8 [c000000003037c00] [c00000000013443c] .handle_mm_fault+0xac/0x828 [c000000003037cf0] [c0000000000340a8] .do_page_fault+0x39c/0x584 [c000000003037e30] [c0000000000057b0] handle_page_fault+0x20/0x5c Instruction dump: 7d29582a 7d200074 7800d182 0b000000 3c004000 3960ffff 780007c6 796b00c4 7d290214 7929a302 1d290068 7d6b4a14 <800b0010> 7c000074 7800d182 0b000000 This patch fixes the problem by not asseting the PTE is locked for VMAs backed by huge pages. Signed-off-by: Mel Gorman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/pgtable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index f5c6fd42265..ae1d67cc090 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -219,7 +219,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, entry = do_dcache_icache_coherency(entry); changed = !pte_same(*(ptep), entry); if (changed) { - assert_pte_locked(vma->vm_mm, address); + if (!(vma->vm_flags & VM_HUGETLB)) + assert_pte_locked(vma->vm_mm, address); __ptep_set_access_flags(ptep, entry); flush_tlb_page_nohash(vma, address); } -- cgit v1.2.3 From 021376a3b655364c92c10be544a3319946a792e8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 13 May 2009 20:30:24 +0000 Subject: powerpc/ftrace: Use pr_devel() in ftrace.c pr_debug() can now result in code being generated even when #DEBUG is not defined. That's not really desirable in the ftrace code which we want to be snappy. With CONFIG_DYNAMIC_DEBUG=y: size before: text data bss dec hex filename 3334 672 4 4010 faa arch/powerpc/kernel/ftrace.o size after: text data bss dec hex filename 2616 360 4 2980 ba4 arch/powerpc/kernel/ftrace.o Signed-off-by: Michael Ellerman Acked-by: Steven Rostedt Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ftrace.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 70e2a736be1..5b078ee391f 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -157,7 +157,7 @@ __ftrace_make_nop(struct module *mod, * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12) */ - pr_debug("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); + pr_devel("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); /* Find where the trampoline jumps to */ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { @@ -165,7 +165,7 @@ __ftrace_make_nop(struct module *mod, return -EFAULT; } - pr_debug(" %08x %08x", jmp[0], jmp[1]); + pr_devel(" %08x %08x", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ if (((jmp[0] & 0xffff0000) != 0x3d820000) || @@ -181,18 +181,18 @@ __ftrace_make_nop(struct module *mod, offset = ((unsigned)((unsigned short)jmp[0]) << 16) + (int)((short)jmp[1]); - pr_debug(" %x ", offset); + pr_devel(" %x ", offset); /* get the address this jumps too */ tramp = mod->arch.toc + offset + 32; - pr_debug("toc: %lx", tramp); + pr_devel("toc: %lx", tramp); if (probe_kernel_read(jmp, (void *)tramp, 8)) { printk(KERN_ERR "Failed to read %lx\n", tramp); return -EFAULT; } - pr_debug(" %08x %08x\n", jmp[0], jmp[1]); + pr_devel(" %08x %08x\n", jmp[0], jmp[1]); ptr = ((unsigned long)jmp[0] << 32) + jmp[1]; @@ -269,7 +269,7 @@ __ftrace_make_nop(struct module *mod, * 0x4e, 0x80, 0x04, 0x20 bctr */ - pr_debug("ip:%lx jumps to %lx", ip, tramp); + pr_devel("ip:%lx jumps to %lx", ip, tramp); /* Find where the trampoline jumps to */ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { @@ -277,7 +277,7 @@ __ftrace_make_nop(struct module *mod, return -EFAULT; } - pr_debug(" %08x %08x ", jmp[0], jmp[1]); + pr_devel(" %08x %08x ", jmp[0], jmp[1]); /* verify that this is what we expect it to be */ if (((jmp[0] & 0xffff0000) != 0x3d600000) || @@ -293,7 +293,7 @@ __ftrace_make_nop(struct module *mod, if (tramp & 0x8000) tramp -= 0x10000; - pr_debug(" %lx ", tramp); + pr_devel(" %lx ", tramp); if (tramp != addr) { printk(KERN_ERR @@ -402,7 +402,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* ld r2,40(r1) */ op[1] = 0xe8410028; - pr_debug("write to %lx\n", rec->ip); + pr_devel("write to %lx\n", rec->ip); if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2)) return -EPERM; @@ -442,7 +442,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EINVAL; } - pr_debug("write to %lx\n", rec->ip); + pr_devel("write to %lx\n", rec->ip); if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) return -EPERM; -- cgit v1.2.3 From c3cf8667ed7db58c1960958cbb0a9098d513cc60 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 15 May 2009 04:33:54 +0000 Subject: powerpc/ftrace: Fix constraint to be early clobber After upgrading my distcc boxes from gcc 4.2.2 to 4.4.0, the function graph tracer broke. This was discovered on my x86 boxes. The issue is that gcc used the same register for an output as it did for an input in an asm statement. I first thought this was a bug in gcc and reported it. I was notified that gcc was correct and that the output had to be flagged as an "early clobber". I noticed that powerpc had the same issue and this patch fixes it. Signed-off-by: Steven Rostedt Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 5b078ee391f..2d182f119d1 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -594,7 +594,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) PPC_LONG "2b,4b\n" ".previous" - : [old] "=r" (old), [faulted] "=r" (faulted) + : [old] "=&r" (old), [faulted] "=r" (faulted) : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); -- cgit v1.2.3 From dc892288f42661a140124ecbf9d44850a95de222 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 15 May 2009 08:01:59 +0000 Subject: powerpc/ps3: Update ps3_defconfig Refresh and set these options: CONFIG_SYSFS_DEPRECATED_V2: y -> n CONFIG_INPUT_JOYSTICK: y -> n CONFIG_HID_SONY: n -> m CONFIG_RTC_DRV_PS3: - -> m Signed-off-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/configs/ps3_defconfig | 105 ++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 43 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index ac14f5245d2..e28e65e7a0e 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -1,13 +1,14 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29-rc8 -# Fri Mar 13 09:28:45 2009 +# Linux kernel version: 2.6.30-rc5 +# Fri May 15 10:37:00 2009 # CONFIG_PPC64=y # # Processor support # +CONFIG_PPC_BOOK3S=y # CONFIG_POWER4_ONLY is not set CONFIG_POWER3=y CONFIG_POWER4=y @@ -55,9 +56,11 @@ CONFIG_OF=y # CONFIG_GENERIC_TBSYNC is not set CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y +CONFIG_DTC=y # CONFIG_DEFAULT_UIMAGE is not set # CONFIG_PPC_DCR_NATIVE is not set # CONFIG_PPC_DCR_MMIO is not set +CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -72,6 +75,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set @@ -88,8 +92,7 @@ CONFIG_CLASSIC_RCU=y CONFIG_LOG_BUF_SHIFT=17 # CONFIG_GROUP_SCHED is not set # CONFIG_CGROUPS is not set -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_SYSFS_DEPRECATED_V2 is not set # CONFIG_RELAY is not set CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set @@ -99,6 +102,9 @@ CONFIG_NAMESPACES=y # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -107,6 +113,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_EXTRA_PASS=y +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -138,6 +145,7 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_ATTRS=y CONFIG_USE_GENERIC_SMP_HELPERS=y +# CONFIG_SLOW_WORK is not set # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -150,7 +158,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -# CONFIG_BLK_DEV_IO_TRACE is not set CONFIG_BLK_DEV_BSG=y # CONFIG_BLK_DEV_INTEGRITY is not set CONFIG_BLOCK_COMPAT=y @@ -172,7 +179,6 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" # # Platform support # -CONFIG_PPC_MULTIPLATFORM=y # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_ISERIES is not set # CONFIG_PPC_PMAC is not set @@ -209,6 +215,7 @@ CONFIG_SPU_FS_64K_LS=y # CONFIG_SPU_TRACE is not set CONFIG_SPU_BASE=y # CONFIG_PQ2ADS is not set +# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set # CONFIG_IPIC is not set # CONFIG_MPIC is not set # CONFIG_MPIC_WEIRD is not set @@ -279,11 +286,14 @@ CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y CONFIG_ARCH_MEMORY_PROBE=y CONFIG_PPC_HAS_HASH_64K=y CONFIG_PPC_4K_PAGES=y # CONFIG_PPC_16K_PAGES is not set # CONFIG_PPC_64K_PAGES is not set +# CONFIG_PPC_256K_PAGES is not set CONFIG_FORCE_MAX_ZONEORDER=13 CONFIG_SCHED_SMT=y CONFIG_PROC_DEVICETREE=y @@ -316,7 +326,6 @@ CONFIG_NET=y # # Networking options # -CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_PACKET=y CONFIG_PACKET_MMAP=y CONFIG_UNIX=y @@ -389,6 +398,7 @@ CONFIG_IPV6_NDISC_NODETYPE=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set # CONFIG_NET_SCHED is not set # CONFIG_DCB is not set @@ -396,6 +406,7 @@ CONFIG_IPV6_NDISC_NODETYPE=y # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set # CONFIG_IRDA is not set @@ -419,11 +430,9 @@ CONFIG_BT_HCIBTUSB=m # CONFIG_BT_HCIBFUSB is not set # CONFIG_BT_HCIVHCI is not set # CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set CONFIG_WIRELESS=y CONFIG_CFG80211=m # CONFIG_CFG80211_REG_DEBUG is not set -CONFIG_NL80211=y # CONFIG_WIRELESS_OLD_REGULATORY is not set CONFIG_WIRELESS_EXT=y # CONFIG_WIRELESS_EXT_SYSFS is not set @@ -602,6 +611,7 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SRP_ATTRS is not set # CONFIG_SCSI_LOWLEVEL is not set # CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set # CONFIG_ATA is not set CONFIG_MD=y # CONFIG_BLK_DEV_MD is not set @@ -616,6 +626,7 @@ CONFIG_BLK_DEV_DM=m # CONFIG_DM_UEVENT is not set # CONFIG_MACINTOSH_DRIVERS is not set CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set @@ -625,6 +636,8 @@ CONFIG_NETDEVICES=y # CONFIG_PHYLIB is not set CONFIG_NET_ETHERNET=y CONFIG_MII=m +# CONFIG_ETHOC is not set +# CONFIG_DNET is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set @@ -646,12 +659,13 @@ CONFIG_GELIC_WIRELESS_OLD_PSK_INTERFACE=y CONFIG_WLAN_80211=y # CONFIG_LIBERTAS is not set # CONFIG_LIBERTAS_THINFIRM is not set +# CONFIG_AT76C50X_USB is not set # CONFIG_USB_ZD1201 is not set # CONFIG_USB_NET_RNDIS_WLAN is not set # CONFIG_RTL8187 is not set # CONFIG_MAC80211_HWSIM is not set # CONFIG_P54_COMMON is not set -# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_AR9170_USB is not set # CONFIG_HOSTAP is not set # CONFIG_B43 is not set # CONFIG_B43LEGACY is not set @@ -673,6 +687,7 @@ CONFIG_USB_PEGASUS=m CONFIG_USB_USBNET=m CONFIG_USB_NET_AX8817X=m # CONFIG_USB_NET_CDCETHER is not set +# CONFIG_USB_NET_CDC_EEM is not set # CONFIG_USB_NET_DM9601 is not set # CONFIG_USB_NET_SMSC95XX is not set # CONFIG_USB_NET_GL620A is not set @@ -724,28 +739,7 @@ CONFIG_INPUT_EVDEV=m # # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set -CONFIG_INPUT_JOYSTICK=y -# CONFIG_JOYSTICK_ANALOG is not set -# CONFIG_JOYSTICK_A3D is not set -# CONFIG_JOYSTICK_ADI is not set -# CONFIG_JOYSTICK_COBRA is not set -# CONFIG_JOYSTICK_GF2K is not set -# CONFIG_JOYSTICK_GRIP is not set -# CONFIG_JOYSTICK_GRIP_MP is not set -# CONFIG_JOYSTICK_GUILLEMOT is not set -# CONFIG_JOYSTICK_INTERACT is not set -# CONFIG_JOYSTICK_SIDEWINDER is not set -# CONFIG_JOYSTICK_TMDC is not set -# CONFIG_JOYSTICK_IFORCE is not set -# CONFIG_JOYSTICK_WARRIOR is not set -# CONFIG_JOYSTICK_MAGELLAN is not set -# CONFIG_JOYSTICK_SPACEORB is not set -# CONFIG_JOYSTICK_SPACEBALL is not set -# CONFIG_JOYSTICK_STINGER is not set -# CONFIG_JOYSTICK_TWIDJOY is not set -# CONFIG_JOYSTICK_ZHENHUA is not set -# CONFIG_JOYSTICK_JOYDUMP is not set -# CONFIG_JOYSTICK_XPAD is not set +# CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set @@ -864,6 +858,7 @@ CONFIG_FB_PS3_DEFAULT_SIZE_M=9 # CONFIG_FB_VIRTUAL is not set # CONFIG_FB_METRONOME is not set # CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set # CONFIG_BACKLIGHT_LCD_SUPPORT is not set # @@ -934,15 +929,17 @@ CONFIG_USB_HIDDEV=y # # Special HID drivers # -# CONFIG_HID_COMPAT is not set # CONFIG_HID_A4TECH is not set # CONFIG_HID_APPLE is not set # CONFIG_HID_BELKIN is not set # CONFIG_HID_CHERRY is not set # CONFIG_HID_CHICONY is not set # CONFIG_HID_CYPRESS is not set +# CONFIG_DRAGONRISE_FF is not set # CONFIG_HID_EZKEY is not set +# CONFIG_HID_KYE is not set # CONFIG_HID_GYRATION is not set +# CONFIG_HID_KENSINGTON is not set # CONFIG_HID_LOGITECH is not set # CONFIG_HID_MICROSOFT is not set # CONFIG_HID_MONTEREY is not set @@ -950,7 +947,7 @@ CONFIG_USB_HIDDEV=y # CONFIG_HID_PANTHERLORD is not set # CONFIG_HID_PETALYNX is not set # CONFIG_HID_SAMSUNG is not set -# CONFIG_HID_SONY is not set +CONFIG_HID_SONY=m # CONFIG_HID_SUNPLUS is not set # CONFIG_GREENASIA_FF is not set # CONFIG_HID_TOPSEED is not set @@ -1012,11 +1009,11 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y # CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # -# see USB_STORAGE Help for more information +# also be needed; see USB_STORAGE Help for more info # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set @@ -1058,7 +1055,6 @@ CONFIG_USB_STORAGE=m # CONFIG_USB_LED is not set # CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_FTDI_ELAN is not set # CONFIG_USB_APPLEDISPLAY is not set @@ -1074,6 +1070,7 @@ CONFIG_USB_STORAGE=m # # OTG and related infrastructure # +# CONFIG_NOP_USB_XCEIV is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -1113,8 +1110,10 @@ CONFIG_RTC_INTF_DEV=y # # on-CPU RTC drivers # -CONFIG_RTC_DRV_PPC=m +# CONFIG_RTC_DRV_GENERIC is not set +CONFIG_RTC_DRV_PS3=m # CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # CONFIG_STAGING is not set @@ -1125,6 +1124,7 @@ CONFIG_EXT2_FS=m # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=m +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1160,6 +1160,11 @@ CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m # CONFIG_FUSE_FS is not set +# +# Caches +# +# CONFIG_FSCACHE is not set + # # CD-ROM/DVD Filesystems # @@ -1211,6 +1216,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1223,7 +1229,6 @@ CONFIG_LOCKD_V4=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set @@ -1283,6 +1288,7 @@ CONFIG_NLS_ISO8859_1=y # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_UTF8 is not set # CONFIG_DLM is not set +CONFIG_BINARY_PRINTF=y # # Library routines @@ -1296,15 +1302,16 @@ CONFIG_CRC_ITU_T=m CONFIG_CRC32=y # CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set -CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m CONFIG_LZO_COMPRESS=m CONFIG_LZO_DECOMPRESS=m -CONFIG_PLIST=y +CONFIG_DECOMPRESS_GZIP=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y CONFIG_HAVE_LMB=y +CONFIG_NLATTR=y # # Kernel hacking @@ -1322,6 +1329,9 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1357,12 +1367,15 @@ CONFIG_DEBUG_LIST=y # CONFIG_FAULT_INJECTION is not set # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y +# CONFIG_DEBUG_PAGEALLOC is not set CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1371,18 +1384,21 @@ CONFIG_TRACING=y # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_FTRACE_STARTUP_TEST is not set -# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_DYNAMIC_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set CONFIG_PRINT_STACK_DEPTH=64 CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_STACK_USAGE is not set -# CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_CODE_PATCHING_SELFTEST is not set # CONFIG_FTR_FIXUP_SELFTEST is not set # CONFIG_MSI_BITMAP_SELFTEST is not set @@ -1415,10 +1431,12 @@ CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_GF128MUL=m # CONFIG_CRYPTO_NULL is not set +CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set # CONFIG_CRYPTO_AUTHENC is not set # CONFIG_CRYPTO_TEST is not set @@ -1487,6 +1505,7 @@ CONFIG_CRYPTO_SALSA20=m # Compression # # CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set CONFIG_CRYPTO_LZO=m # -- cgit v1.2.3 From 0e337b42d620ca7c45fe64e64dd71957c56216c9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 17 May 2009 18:29:03 +0000 Subject: powerpc: Explicit alignment for .data.cacheline_aligned I don't think anything guarantees that the objects in data.page_aligned are a multiple of PAGE_SIZE, thus the section may end on any boundary. So the following section, .data.cacheline_aligned needs an explicit alignment. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index a047a6cfca4..8ef8a14abc9 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -264,6 +264,7 @@ SECTIONS *(.data.page_aligned) } + . = ALIGN(L1_CACHE_BYTES); .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { *(.data.cacheline_aligned) } -- cgit v1.2.3 From c0daaf3f1f672defa3a45ca449b76d0e86c55892 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 18 May 2009 14:02:12 +1000 Subject: perf_counter: powerpc: initialize cpuhw pointer before use Commit 9e35ad38 ("perf_counter: Rework the perf counter disable/enable") added code to the powerpc hw_perf_enable (renamed from hw_perf_restore) to test cpuhw->disabled and return immediately if it is not set (i.e. if the PMU is already enabled). Unfortunately the test got added before cpuhw was initialized, resulting in an oops the first time hw_perf_enable got called. This fixes it by moving the initialization of cpuhw to before cpuhw->disabled is tested. [ Impact: fix oops-causing bug on powerpc ] Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Corey Ashford LKML-Reference: <18960.56772.869734.304631@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 6baae5a5c33..fe21b2440f2 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -450,12 +450,11 @@ void hw_perf_enable(void) int idx; local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_counters); if (!cpuhw->disabled) { local_irq_restore(flags); return; } - - cpuhw = &__get_cpu_var(cpu_hw_counters); cpuhw->disabled = 0; /* -- cgit v1.2.3 From 6eb0ac03899a1363ba176abe0830a9e6698c0503 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 21 May 2009 19:10:23 +0000 Subject: powerpc/maple: Add a quirk to disable MSI for IPR on Bimini Something in the HW or FW setup is busted and MSIs aren't working with IPR on Bimini, so until we figure out exaxtly what's up, we quirk them out Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/maple/pci.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c index 301855263b8..04296ffff8b 100644 --- a/arch/powerpc/platforms/maple/pci.c +++ b/arch/powerpc/platforms/maple/pci.c @@ -592,3 +592,17 @@ int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) } return irq; } + +static void __devinit quirk_ipr_msi(struct pci_dev *dev) +{ + /* Something prevents MSIs from the IPR from working on Bimini, + * and the driver has no smarts to recover. So disable MSI + * on it for now. */ + + if (machine_is(maple)) { + dev->no_msi = 1; + dev_info(&dev->dev, "Quirk disabled MSI\n"); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN, + quirk_ipr_msi); -- cgit v1.2.3 From e1defc4ff0cf57aca6c5e3ff99fa503f5943c1f1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 22 May 2009 17:17:49 -0400 Subject: block: Do away with the notion of hardsect_size Until now we have had a 1:1 mapping between storage device physical block size and the logical block sized used when addressing the device. With SATA 4KB drives coming out that will no longer be the case. The sector size will be 4KB but the logical block size will remain 512-bytes. Hence we need to distinguish between the physical block size and the logical ditto. This patch renames hardsect_size to logical_block_size. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- arch/powerpc/sysdev/axonram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 9e105cbc5e5..a4779912a5c 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -250,7 +250,7 @@ axon_ram_probe(struct of_device *device, const struct of_device_id *device_id) set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT); blk_queue_make_request(bank->disk->queue, axon_ram_make_request); - blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); + blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE); add_disk(bank->disk); bank->irq_id = irq_of_parse_and_map(device->node, 0); -- cgit v1.2.3 From 0bc53a67ac831ec84f730a657dbcadd80a589ef5 Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Sat, 23 May 2009 19:13:03 -0400 Subject: ASoC: Add a few more mpc5200 PSC defines Add a few more mpc5200 PSC defines. More bit fields defines for mpc5200 PSC registers. Signed-off-by: Jon Smirl Acked-by: Grant Likely Signed-off-by: Mark Brown --- arch/powerpc/include/asm/mpc52xx_psc.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h index a218da6bec7..fb841205745 100644 --- a/arch/powerpc/include/asm/mpc52xx_psc.h +++ b/arch/powerpc/include/asm/mpc52xx_psc.h @@ -28,6 +28,10 @@ #define MPC52xx_PSC_MAXNUM 6 /* Programmable Serial Controller (PSC) status register bits */ +#define MPC52xx_PSC_SR_UNEX_RX 0x0001 +#define MPC52xx_PSC_SR_DATA_VAL 0x0002 +#define MPC52xx_PSC_SR_DATA_OVR 0x0004 +#define MPC52xx_PSC_SR_CMDSEND 0x0008 #define MPC52xx_PSC_SR_CDE 0x0080 #define MPC52xx_PSC_SR_RXRDY 0x0100 #define MPC52xx_PSC_SR_RXFULL 0x0200 @@ -61,6 +65,12 @@ #define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001 /* PSC interrupt status/mask bits */ +#define MPC52xx_PSC_IMR_UNEX_RX_SLOT 0x0001 +#define MPC52xx_PSC_IMR_DATA_VALID 0x0002 +#define MPC52xx_PSC_IMR_DATA_OVR 0x0004 +#define MPC52xx_PSC_IMR_CMD_SEND 0x0008 +#define MPC52xx_PSC_IMR_ERROR 0x0040 +#define MPC52xx_PSC_IMR_DEOF 0x0080 #define MPC52xx_PSC_IMR_TXRDY 0x0100 #define MPC52xx_PSC_IMR_RXRDY 0x0200 #define MPC52xx_PSC_IMR_DB 0x0400 @@ -117,6 +127,7 @@ #define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24) #define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24) #define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24) +#define MPC52xx_PSC_SICR_AWR (1 << 30) #define MPC52xx_PSC_SICR_GENCLK (1 << 23) #define MPC52xx_PSC_SICR_I2S (1 << 22) #define MPC52xx_PSC_SICR_CLKPOL (1 << 21) -- cgit v1.2.3 From 8e35961b57da14cb64cb0e4e1b7e3aabda6396fe Mon Sep 17 00:00:00 2001 From: Hideo Saito Date: Sun, 24 May 2009 15:33:34 +0000 Subject: powerpc/mm: Fix broken MMU PID stealing on !SMP The recent rework of the MMU PID handling for non-hash CPUs has a subtle bug in the !SMP "optimized" variant of the PID stealing function. It clears the PID in the mm context before it calls local_flush_tlb_mm(). However, the later will not flush anything if the PID in the context is clear... Signed-off-by: Hideo Saito Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/mmu_context_nohash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index a70e311bd45..030d0005b4d 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -127,12 +127,12 @@ static unsigned int steal_context_up(unsigned int id) pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm); - /* Mark this mm has having no context anymore */ - mm->context.id = MMU_NO_CONTEXT; - /* Flush the TLB for that context */ local_flush_tlb_mm(mm); + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ __clear_bit(id, stale_map[cpu]); -- cgit v1.2.3 From 8a7b8cb91f26a671f22cedc7fd54508667f2d9b9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 26 May 2009 16:27:59 +1000 Subject: perf_counter: powerpc: Implement interrupt throttling This implements interrupt throttling on powerpc. Since we don't have individual count enable/disable or interrupt enable/disable controls per counter, this simply sets the hardware counter to 0, meaning that it will not interrupt again until it has counted 2^31 counts, which will take at least 2^30 cycles assuming a maximum of 2 counts per cycle. Also, we set counter->hw.period_left to the maximum possible value (2^63 - 1), so we won't report overflows for this counter for the forseeable future. The unthrottle operation restores counter->hw.period_left and the hardware counter so that we will once again report a counter overflow after counter->hw.irq_period counts. [ Impact: new perfcounters robustness feature on PowerPC ] Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Corey Ashford LKML-Reference: <18971.35823.643362.446774@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 48 ++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index fe21b2440f2..f96d55f55bd 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -740,10 +740,37 @@ static void power_pmu_disable(struct perf_counter *counter) local_irq_restore(flags); } +/* + * Re-enable interrupts on a counter after they were throttled + * because they were coming too fast. + */ +static void power_pmu_unthrottle(struct perf_counter *counter) +{ + s64 val, left; + unsigned long flags; + + if (!counter->hw.idx || !counter->hw.irq_period) + return; + local_irq_save(flags); + perf_disable(); + power_pmu_read(counter); + left = counter->hw.irq_period; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(counter->hw.idx, val); + atomic64_set(&counter->hw.prev_count, val); + atomic64_set(&counter->hw.period_left, left); + perf_counter_update_userpage(counter); + perf_enable(); + local_irq_restore(flags); +} + struct pmu power_pmu = { .enable = power_pmu_enable, .disable = power_pmu_disable, .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, }; /* @@ -957,10 +984,6 @@ static void record_and_restart(struct perf_counter *counter, long val, if (left < 0x80000000L) val = 0x80000000L - left; } - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); /* * Finally record data if requested. @@ -983,8 +1006,23 @@ static void record_and_restart(struct perf_counter *counter, long val, if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) addr = mfspr(SPRN_SDAR); } - perf_counter_overflow(counter, nmi, regs, addr); + if (perf_counter_overflow(counter, nmi, regs, addr)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the counter to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each counter counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } } + + write_pmc(counter->hw.idx, val); + atomic64_set(&counter->hw.prev_count, val); + atomic64_set(&counter->hw.period_left, left); + perf_counter_update_userpage(counter); } /* -- cgit v1.2.3 From 84532a0fc3d5811dca8e3726fe4d372ea87bd7c6 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:33:14 +1000 Subject: Revert "powerpc: Rework dma-noncoherent to use generic vmalloc layer" This reverts commit 33f00dcedb0e22cdb156a23632814fc580fcfcf8. While it was a good idea to try to use the mm/vmalloc.c allocator instead of our own (in fact, ours is itself a dup on an old variant of the vmalloc one), unfortunately, the approach is terminally busted since dma_alloc_coherent() can be called at interrupt time or in atomic contexts and there's little chances we'll make the code in mm/vmalloc.c cope with\ that :-( Until we can get the generic code to forbid that idiocy and fix all drivers abusing it, we pretty much have no choice but revert to our custom virtual space allocator. There's also a problem with SMP safety since freeing such mapping would require an IPI which cannot be done at interrupt time. However, right now, I don't think we support any platform that is both SMP and has non-coherent DMA (don't laugh, I know such things do exist !) so we can sort that out later. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 25 +++ arch/powerpc/lib/dma-noncoherent.c | 303 ++++++++++++++++++++++++++++++------- 2 files changed, 271 insertions(+), 57 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a0d1146a057..3bb43adce44 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -868,6 +868,31 @@ config TASK_SIZE default "0x80000000" if PPC_PREP || PPC_8xx default "0xc0000000" +config CONSISTENT_START_BOOL + bool "Set custom consistent memory pool address" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the base virtual address + of the consistent memory pool. This pool of virtual + memory is used to make consistent memory allocations. + +config CONSISTENT_START + hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL + default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx) + default "0xff100000" if NOT_COHERENT_CACHE + +config CONSISTENT_SIZE_BOOL + bool "Set custom consistent memory pool size" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the size of the + consistent memory pool. This pool of virtual memory + is used to make consistent memory allocations. + +config CONSISTENT_SIZE + hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL + default "0x00200000" if NOT_COHERENT_CACHE + config PIN_TLB bool "Pinned Kernel TLBs (860 ONLY)" depends on ADVANCED_OPTIONS && 8xx diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c index 005a28d380a..b7dc4c19f58 100644 --- a/arch/powerpc/lib/dma-noncoherent.c +++ b/arch/powerpc/lib/dma-noncoherent.c @@ -29,10 +29,120 @@ #include #include #include -#include #include +/* + * This address range defaults to a value that is safe for all + * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It + * can be further configured for specific applications under + * the "Advanced Setup" menu. -Matt + */ +#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) +#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) + +/* + * This is the page table (2MB) covering uncached, DMA consistent allocations + */ +static pte_t *consistent_pte; +static DEFINE_SPINLOCK(consistent_lock); + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vm_region region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vm_region_alloc with an appropriate + * struct vm_region head (eg): + * + * struct vm_region vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vm_region_alloc(). + */ +struct ppc_vm_region { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +static struct ppc_vm_region consistent_head = { + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_BASE, + .vm_end = CONSISTENT_END, +}; + +static struct ppc_vm_region * +ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct ppc_vm_region *c, *new; + + new = kmalloc(sizeof(struct ppc_vm_region), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&consistent_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + + spin_unlock_irqrestore(&consistent_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&consistent_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) +{ + struct ppc_vm_region *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + /* * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. @@ -41,21 +151,21 @@ void * __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) { struct page *page; + struct ppc_vm_region *c; unsigned long order; - int i; - unsigned int nr_pages = PAGE_ALIGN(size)>>PAGE_SHIFT; - unsigned int array_size = nr_pages * sizeof(struct page *); - struct page **pages; - struct page *end; u64 mask = 0x00ffffff, limit; /* ISA default */ - struct vm_struct *area; - BUG_ON(!mem_init_done); + if (!consistent_pte) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + size = PAGE_ALIGN(size); limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - printk(KERN_WARNING "coherent allocation too big (requested " - "%#x mask %#Lx)\n", size, mask); + if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { + printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", + size, mask); return NULL; } @@ -68,8 +178,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) if (!page) goto no_page; - end = page + (1 << order); - /* * Invalidate any data that might be lurking in the * kernel direct-mapped region for device DMA. @@ -80,59 +188,48 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) flush_dcache_range(kaddr, kaddr + size); } - split_page(page, order); - /* - * Set the "dma handle" + * Allocate a virtual address in the consistent mapping region. */ - *handle = page_to_phys(page); - - area = get_vm_area_caller(size, VM_IOREMAP, - __builtin_return_address(1)); - if (!area) - goto out_free_pages; - - if (array_size > PAGE_SIZE) { - pages = vmalloc(array_size); - area->flags |= VM_VPAGES; - } else { - pages = kmalloc(array_size, GFP_KERNEL); - } - if (!pages) - goto out_free_area; + c = ppc_vm_region_alloc(&consistent_head, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); + if (c) { + unsigned long vaddr = c->vm_start; + pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); + struct page *end = page + (1 << order); - area->pages = pages; - area->nr_pages = nr_pages; + split_page(page, order); - for (i = 0; i < nr_pages; i++) - pages[i] = page + i; + /* + * Set the "dma handle" + */ + *handle = page_to_phys(page); - if (map_vm_area(area, pgprot_noncached(PAGE_KERNEL), &pages)) - goto out_unmap; + do { + BUG_ON(!pte_none(*pte)); - /* - * Free the otherwise unused pages. - */ - page += nr_pages; - while (page < end) { - __free_page(page); - page++; + SetPageReserved(page); + set_pte_at(&init_mm, vaddr, + pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); + page++; + pte++; + vaddr += PAGE_SIZE; + } while (size -= PAGE_SIZE); + + /* + * Free the otherwise unused pages. + */ + while (page < end) { + __free_page(page); + page++; + } + + return (void *)c->vm_start; } - return area->addr; -out_unmap: - vunmap(area->addr); - if (array_size > PAGE_SIZE) - vfree(pages); - else - kfree(pages); - goto out_free_pages; -out_free_area: - free_vm_area(area); -out_free_pages: if (page) __free_pages(page, order); -no_page: + no_page: return NULL; } EXPORT_SYMBOL(__dma_alloc_coherent); @@ -142,11 +239,103 @@ EXPORT_SYMBOL(__dma_alloc_coherent); */ void __dma_free_coherent(size_t size, void *vaddr) { - vfree(vaddr); + struct ppc_vm_region *c; + unsigned long flags, addr; + pte_t *ptep; + + size = PAGE_ALIGN(size); + + spin_lock_irqsave(&consistent_lock, flags); + + c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); + if (!c) + goto no_area; + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + unsigned long pfn; + + ptep++; + addr += PAGE_SIZE; + if (!pte_none(pte) && pte_present(pte)) { + pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + ClearPageReserved(page); + + __free_page(page); + continue; + } + } + + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + list_del(&c->vm_list); + + spin_unlock_irqrestore(&consistent_lock, flags); + + kfree(c); + return; + + no_area: + spin_unlock_irqrestore(&consistent_lock, flags); + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, vaddr); + dump_stack(); } EXPORT_SYMBOL(__dma_free_coherent); +/* + * Initialise the consistent memory allocation. + */ +static int __init dma_alloc_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int ret = 0; + + do { + pgd = pgd_offset(&init_mm, CONSISTENT_BASE); + pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); + pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + + pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + consistent_pte = pte; + } while (0); + + return ret; +} + +core_initcall(dma_alloc_init); + /* * make an area consistent. */ -- cgit v1.2.3 From b16e7766d6436835f473ba823ad04fbdfe5e9cbd Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:36:10 +1000 Subject: powerpc: Move dma-noncoherent.c from arch/powerpc/lib to arch/powerpc/mm (pre-requisite to make the next patches more palatable) Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/lib/Makefile | 1 - arch/powerpc/lib/dma-noncoherent.c | 426 ------------------------------------- arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/dma-noncoherent.c | 426 +++++++++++++++++++++++++++++++++++++ 4 files changed, 427 insertions(+), 427 deletions(-) delete mode 100644 arch/powerpc/lib/dma-noncoherent.c create mode 100644 arch/powerpc/mm/dma-noncoherent.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 8db35278a4b..29b742b90f1 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -18,7 +18,6 @@ obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ memcpy_64.o usercopy_64.o mem_64.o string.o obj-$(CONFIG_XMON) += sstep.o obj-$(CONFIG_KPROBES) += sstep.o -obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o ifeq ($(CONFIG_PPC64),y) obj-$(CONFIG_SMP) += locks.o diff --git a/arch/powerpc/lib/dma-noncoherent.c b/arch/powerpc/lib/dma-noncoherent.c deleted file mode 100644 index b7dc4c19f58..00000000000 --- a/arch/powerpc/lib/dma-noncoherent.c +++ /dev/null @@ -1,426 +0,0 @@ -/* - * PowerPC version derived from arch/arm/mm/consistent.c - * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) - * - * Copyright (C) 2000 Russell King - * - * Consistent memory allocators. Used for DMA devices that want to - * share uncached memory with the processor core. The function return - * is the virtual address and 'dma_handle' is the physical address. - * Mostly stolen from the ARM port, with some changes for PowerPC. - * -- Dan - * - * Reorganized to get rid of the arch-specific consistent_* functions - * and provide non-coherent implementations for the DMA API. -Matt - * - * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() - * implementation. This is pulled straight from ARM and barely - * modified. -Matt - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * This address range defaults to a value that is safe for all - * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It - * can be further configured for specific applications under - * the "Advanced Setup" menu. -Matt - */ -#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) -#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) -#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) - -/* - * This is the page table (2MB) covering uncached, DMA consistent allocations - */ -static pte_t *consistent_pte; -static DEFINE_SPINLOCK(consistent_lock); - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vm_region region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vm_region_alloc(). - */ -struct ppc_vm_region { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; -}; - -static struct ppc_vm_region consistent_head = { - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_start = CONSISTENT_BASE, - .vm_end = CONSISTENT_END, -}; - -static struct ppc_vm_region * -ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct ppc_vm_region *c, *new; - - new = kmalloc(sizeof(struct ppc_vm_region), gfp); - if (!new) - goto out; - - spin_lock_irqsave(&consistent_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = c->vm_end; - if (addr > end) - goto nospc; - } - - found: - /* - * Insert this entry _before_ the one we found. - */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - - spin_unlock_irqrestore(&consistent_lock, flags); - return new; - - nospc: - spin_unlock_irqrestore(&consistent_lock, flags); - kfree(new); - out: - return NULL; -} - -static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) -{ - struct ppc_vm_region *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_start == addr) - goto out; - } - c = NULL; - out: - return c; -} - -/* - * Allocate DMA-coherent memory space and return both the kernel remapped - * virtual and bus address for that space. - */ -void * -__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) -{ - struct page *page; - struct ppc_vm_region *c; - unsigned long order; - u64 mask = 0x00ffffff, limit; /* ISA default */ - - if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); - return NULL; - } - - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { - printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", - size, mask); - return NULL; - } - - order = get_order(size); - - if (mask != 0xffffffff) - gfp |= GFP_DMA; - - page = alloc_pages(gfp, order); - if (!page) - goto no_page; - - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - unsigned long kaddr = (unsigned long)page_address(page); - memset(page_address(page), 0, size); - flush_dcache_range(kaddr, kaddr + size); - } - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = ppc_vm_region_alloc(&consistent_head, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); - if (c) { - unsigned long vaddr = c->vm_start; - pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); - struct page *end = page + (1 << order); - - split_page(page, order); - - /* - * Set the "dma handle" - */ - *handle = page_to_phys(page); - - do { - BUG_ON(!pte_none(*pte)); - - SetPageReserved(page); - set_pte_at(&init_mm, vaddr, - pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); - page++; - pte++; - vaddr += PAGE_SIZE; - } while (size -= PAGE_SIZE); - - /* - * Free the otherwise unused pages. - */ - while (page < end) { - __free_page(page); - page++; - } - - return (void *)c->vm_start; - } - - if (page) - __free_pages(page, order); - no_page: - return NULL; -} -EXPORT_SYMBOL(__dma_alloc_coherent); - -/* - * free a page as defined by the above mapping. - */ -void __dma_free_coherent(size_t size, void *vaddr) -{ - struct ppc_vm_region *c; - unsigned long flags, addr; - pte_t *ptep; - - size = PAGE_ALIGN(size); - - spin_lock_irqsave(&consistent_lock, flags); - - c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); - if (!c) - goto no_area; - - if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - unsigned long pfn; - - ptep++; - addr += PAGE_SIZE; - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); - - __free_page(page); - continue; - } - } - - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - list_del(&c->vm_list); - - spin_unlock_irqrestore(&consistent_lock, flags); - - kfree(c); - return; - - no_area: - spin_unlock_irqrestore(&consistent_lock, flags); - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", - __func__, vaddr); - dump_stack(); -} -EXPORT_SYMBOL(__dma_free_coherent); - -/* - * Initialise the consistent memory allocation. - */ -static int __init dma_alloc_init(void) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int ret = 0; - - do { - pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - - pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte = pte; - } while (0); - - return ret; -} - -core_initcall(dma_alloc_init); - -/* - * make an area consistent. - */ -void __dma_sync(void *vaddr, size_t size, int direction) -{ - unsigned long start = (unsigned long)vaddr; - unsigned long end = start + size; - - switch (direction) { - case DMA_NONE: - BUG(); - case DMA_FROM_DEVICE: - /* - * invalidate only when cache-line aligned otherwise there is - * the potential for discarding uncommitted data from the cache - */ - if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1))) - flush_dcache_range(start, end); - else - invalidate_dcache_range(start, end); - break; - case DMA_TO_DEVICE: /* writeback only */ - clean_dcache_range(start, end); - break; - case DMA_BIDIRECTIONAL: /* writeback and invalidate */ - flush_dcache_range(start, end); - break; - } -} -EXPORT_SYMBOL(__dma_sync); - -#ifdef CONFIG_HIGHMEM -/* - * __dma_sync_page() implementation for systems using highmem. - * In this case, each page of a buffer must be kmapped/kunmapped - * in order to have a virtual address for __dma_sync(). This must - * not sleep so kmap_atomic()/kunmap_atomic() are used. - * - * Note: yes, it is possible and correct to have a buffer extend - * beyond the first page. - */ -static inline void __dma_sync_page_highmem(struct page *page, - unsigned long offset, size_t size, int direction) -{ - size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); - size_t cur_size = seg_size; - unsigned long flags, start, seg_offset = offset; - int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; - int seg_nr = 0; - - local_irq_save(flags); - - do { - start = (unsigned long)kmap_atomic(page + seg_nr, - KM_PPC_SYNC_PAGE) + seg_offset; - - /* Sync this buffer segment */ - __dma_sync((void *)start, seg_size, direction); - kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE); - seg_nr++; - - /* Calculate next buffer segment size */ - seg_size = min((size_t)PAGE_SIZE, size - cur_size); - - /* Add the segment size to our running total */ - cur_size += seg_size; - seg_offset = 0; - } while (seg_nr < nr_segs); - - local_irq_restore(flags); -} -#endif /* CONFIG_HIGHMEM */ - -/* - * __dma_sync_page makes memory consistent. identical to __dma_sync, but - * takes a struct page instead of a virtual address - */ -void __dma_sync_page(struct page *page, unsigned long offset, - size_t size, int direction) -{ -#ifdef CONFIG_HIGHMEM - __dma_sync_page_highmem(page, offset, size, direction); -#else - unsigned long start = (unsigned long)page_address(page) + offset; - __dma_sync((void *)start, size, direction); -#endif -} -EXPORT_SYMBOL(__dma_sync_page); diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 17290bcedc5..b746f4ca420 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -26,3 +26,4 @@ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o +obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c new file mode 100644 index 00000000000..b7dc4c19f58 --- /dev/null +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -0,0 +1,426 @@ +/* + * PowerPC version derived from arch/arm/mm/consistent.c + * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) + * + * Copyright (C) 2000 Russell King + * + * Consistent memory allocators. Used for DMA devices that want to + * share uncached memory with the processor core. The function return + * is the virtual address and 'dma_handle' is the physical address. + * Mostly stolen from the ARM port, with some changes for PowerPC. + * -- Dan + * + * Reorganized to get rid of the arch-specific consistent_* functions + * and provide non-coherent implementations for the DMA API. -Matt + * + * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() + * implementation. This is pulled straight from ARM and barely + * modified. -Matt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * This address range defaults to a value that is safe for all + * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It + * can be further configured for specific applications under + * the "Advanced Setup" menu. -Matt + */ +#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) +#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) + +/* + * This is the page table (2MB) covering uncached, DMA consistent allocations + */ +static pte_t *consistent_pte; +static DEFINE_SPINLOCK(consistent_lock); + +/* + * VM region handling support. + * + * This should become something generic, handling VM region allocations for + * vmalloc and similar (ioremap, module space, etc). + * + * I envisage vmalloc()'s supporting vm_struct becoming: + * + * struct vm_struct { + * struct vm_region region; + * unsigned long flags; + * struct page **pages; + * unsigned int nr_pages; + * unsigned long phys_addr; + * }; + * + * get_vm_area() would then call vm_region_alloc with an appropriate + * struct vm_region head (eg): + * + * struct vm_region vmalloc_head = { + * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), + * .vm_start = VMALLOC_START, + * .vm_end = VMALLOC_END, + * }; + * + * However, vmalloc_head.vm_start is variable (typically, it is dependent on + * the amount of RAM found at boot time.) I would imagine that get_vm_area() + * would have to initialise this each time prior to calling vm_region_alloc(). + */ +struct ppc_vm_region { + struct list_head vm_list; + unsigned long vm_start; + unsigned long vm_end; +}; + +static struct ppc_vm_region consistent_head = { + .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), + .vm_start = CONSISTENT_BASE, + .vm_end = CONSISTENT_END, +}; + +static struct ppc_vm_region * +ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp) +{ + unsigned long addr = head->vm_start, end = head->vm_end - size; + unsigned long flags; + struct ppc_vm_region *c, *new; + + new = kmalloc(sizeof(struct ppc_vm_region), gfp); + if (!new) + goto out; + + spin_lock_irqsave(&consistent_lock, flags); + + list_for_each_entry(c, &head->vm_list, vm_list) { + if ((addr + size) < addr) + goto nospc; + if ((addr + size) <= c->vm_start) + goto found; + addr = c->vm_end; + if (addr > end) + goto nospc; + } + + found: + /* + * Insert this entry _before_ the one we found. + */ + list_add_tail(&new->vm_list, &c->vm_list); + new->vm_start = addr; + new->vm_end = addr + size; + + spin_unlock_irqrestore(&consistent_lock, flags); + return new; + + nospc: + spin_unlock_irqrestore(&consistent_lock, flags); + kfree(new); + out: + return NULL; +} + +static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr) +{ + struct ppc_vm_region *c; + + list_for_each_entry(c, &head->vm_list, vm_list) { + if (c->vm_start == addr) + goto out; + } + c = NULL; + out: + return c; +} + +/* + * Allocate DMA-coherent memory space and return both the kernel remapped + * virtual and bus address for that space. + */ +void * +__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) +{ + struct page *page; + struct ppc_vm_region *c; + unsigned long order; + u64 mask = 0x00ffffff, limit; /* ISA default */ + + if (!consistent_pte) { + printk(KERN_ERR "%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + + size = PAGE_ALIGN(size); + limit = (mask + 1) & ~mask; + if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { + printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", + size, mask); + return NULL; + } + + order = get_order(size); + + if (mask != 0xffffffff) + gfp |= GFP_DMA; + + page = alloc_pages(gfp, order); + if (!page) + goto no_page; + + /* + * Invalidate any data that might be lurking in the + * kernel direct-mapped region for device DMA. + */ + { + unsigned long kaddr = (unsigned long)page_address(page); + memset(page_address(page), 0, size); + flush_dcache_range(kaddr, kaddr + size); + } + + /* + * Allocate a virtual address in the consistent mapping region. + */ + c = ppc_vm_region_alloc(&consistent_head, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); + if (c) { + unsigned long vaddr = c->vm_start; + pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); + struct page *end = page + (1 << order); + + split_page(page, order); + + /* + * Set the "dma handle" + */ + *handle = page_to_phys(page); + + do { + BUG_ON(!pte_none(*pte)); + + SetPageReserved(page); + set_pte_at(&init_mm, vaddr, + pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); + page++; + pte++; + vaddr += PAGE_SIZE; + } while (size -= PAGE_SIZE); + + /* + * Free the otherwise unused pages. + */ + while (page < end) { + __free_page(page); + page++; + } + + return (void *)c->vm_start; + } + + if (page) + __free_pages(page, order); + no_page: + return NULL; +} +EXPORT_SYMBOL(__dma_alloc_coherent); + +/* + * free a page as defined by the above mapping. + */ +void __dma_free_coherent(size_t size, void *vaddr) +{ + struct ppc_vm_region *c; + unsigned long flags, addr; + pte_t *ptep; + + size = PAGE_ALIGN(size); + + spin_lock_irqsave(&consistent_lock, flags); + + c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr); + if (!c) + goto no_area; + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); + addr = c->vm_start; + do { + pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + unsigned long pfn; + + ptep++; + addr += PAGE_SIZE; + + if (!pte_none(pte) && pte_present(pte)) { + pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + ClearPageReserved(page); + + __free_page(page); + continue; + } + } + + printk(KERN_CRIT "%s: bad page in kernel page table\n", + __func__); + } while (size -= PAGE_SIZE); + + flush_tlb_kernel_range(c->vm_start, c->vm_end); + + list_del(&c->vm_list); + + spin_unlock_irqrestore(&consistent_lock, flags); + + kfree(c); + return; + + no_area: + spin_unlock_irqrestore(&consistent_lock, flags); + printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + __func__, vaddr); + dump_stack(); +} +EXPORT_SYMBOL(__dma_free_coherent); + +/* + * Initialise the consistent memory allocation. + */ +static int __init dma_alloc_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int ret = 0; + + do { + pgd = pgd_offset(&init_mm, CONSISTENT_BASE); + pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); + pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); + if (!pmd) { + printk(KERN_ERR "%s: no pmd tables\n", __func__); + ret = -ENOMEM; + break; + } + + pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); + if (!pte) { + printk(KERN_ERR "%s: no pte tables\n", __func__); + ret = -ENOMEM; + break; + } + + consistent_pte = pte; + } while (0); + + return ret; +} + +core_initcall(dma_alloc_init); + +/* + * make an area consistent. + */ +void __dma_sync(void *vaddr, size_t size, int direction) +{ + unsigned long start = (unsigned long)vaddr; + unsigned long end = start + size; + + switch (direction) { + case DMA_NONE: + BUG(); + case DMA_FROM_DEVICE: + /* + * invalidate only when cache-line aligned otherwise there is + * the potential for discarding uncommitted data from the cache + */ + if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1))) + flush_dcache_range(start, end); + else + invalidate_dcache_range(start, end); + break; + case DMA_TO_DEVICE: /* writeback only */ + clean_dcache_range(start, end); + break; + case DMA_BIDIRECTIONAL: /* writeback and invalidate */ + flush_dcache_range(start, end); + break; + } +} +EXPORT_SYMBOL(__dma_sync); + +#ifdef CONFIG_HIGHMEM +/* + * __dma_sync_page() implementation for systems using highmem. + * In this case, each page of a buffer must be kmapped/kunmapped + * in order to have a virtual address for __dma_sync(). This must + * not sleep so kmap_atomic()/kunmap_atomic() are used. + * + * Note: yes, it is possible and correct to have a buffer extend + * beyond the first page. + */ +static inline void __dma_sync_page_highmem(struct page *page, + unsigned long offset, size_t size, int direction) +{ + size_t seg_size = min((size_t)(PAGE_SIZE - offset), size); + size_t cur_size = seg_size; + unsigned long flags, start, seg_offset = offset; + int nr_segs = 1 + ((size - seg_size) + PAGE_SIZE - 1)/PAGE_SIZE; + int seg_nr = 0; + + local_irq_save(flags); + + do { + start = (unsigned long)kmap_atomic(page + seg_nr, + KM_PPC_SYNC_PAGE) + seg_offset; + + /* Sync this buffer segment */ + __dma_sync((void *)start, seg_size, direction); + kunmap_atomic((void *)start, KM_PPC_SYNC_PAGE); + seg_nr++; + + /* Calculate next buffer segment size */ + seg_size = min((size_t)PAGE_SIZE, size - cur_size); + + /* Add the segment size to our running total */ + cur_size += seg_size; + seg_offset = 0; + } while (seg_nr < nr_segs); + + local_irq_restore(flags); +} +#endif /* CONFIG_HIGHMEM */ + +/* + * __dma_sync_page makes memory consistent. identical to __dma_sync, but + * takes a struct page instead of a virtual address + */ +void __dma_sync_page(struct page *page, unsigned long offset, + size_t size, int direction) +{ +#ifdef CONFIG_HIGHMEM + __dma_sync_page_highmem(page, offset, size, direction); +#else + unsigned long start = (unsigned long)page_address(page) + offset; + __dma_sync((void *)start, size, direction); +#endif +} +EXPORT_SYMBOL(__dma_sync_page); -- cgit v1.2.3 From f637a49e507c88354ab32b5d914e06acfb7ee00d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:44:50 +1000 Subject: powerpc: Minor cleanups of kernel virt address space definitions Make FIXADDR_TOP a compile time constant and cleanup a couple of definitions relative to the layout of the kernel address space on ppc32. We also print out that layout at boot time for debugging purposes. This is a pre-requisite for properly fixing non-coherent DMA allocactions. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/fixmap.h | 4 ++-- arch/powerpc/include/asm/pgtable-ppc32.h | 22 ++++++++++++++++++++-- arch/powerpc/mm/init_32.c | 8 ++------ arch/powerpc/mm/mem.c | 13 +++++++++++++ arch/powerpc/mm/pgtable_32.c | 2 -- 5 files changed, 37 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index d60fd18f428..f1f4e23a84e 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -14,8 +14,6 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H -extern unsigned long FIXADDR_TOP; - #ifndef __ASSEMBLY__ #include #include @@ -24,6 +22,8 @@ extern unsigned long FIXADDR_TOP; #include #endif +#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) + /* * Here we define all the compile-time 'special' virtual * addresses. The point is to have a constant address at diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index ba45c997830..28fe9d4bae3 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -10,7 +10,7 @@ extern unsigned long va_to_phys(unsigned long address); extern pte_t *va_to_pte(unsigned long address); -extern unsigned long ioremap_bot, ioremap_base; +extern unsigned long ioremap_bot; #ifdef CONFIG_44x extern int icache_44x_need_flush; @@ -55,9 +55,27 @@ extern int icache_44x_need_flush; #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +/* + * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary + * value (for now) on others, from where we can start layout kernel + * virtual space that goes below PKMAP and FIXMAP + */ +#ifdef CONFIG_HIGHMEM +#define KVIRT_TOP PKMAP_BASE +#else +#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#endif + +/* + * ioremap_bot starts at that address. Early ioremaps move down from there, + * until mem_init() at which point this becomes the top of the vmalloc + * and ioremap space + */ +#define IOREMAP_TOP KVIRT_TOP + /* * Just any arbitrary offset to the start of the vmalloc VM area: the - * current 64MB value just means that there will be a 64MB "hole" after the + * current 16MB value just means that there will be a 64MB "hole" after the * physical memory until the kernel virtual memory starts. That means that * any out-of-bounds memory accesses will hopefully be caught. * The vmalloc() routines leaves a hole of 4kB between each vmalloced diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 666a5e8a5be..3de6a0d9382 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -168,12 +168,8 @@ void __init MMU_init(void) ppc_md.progress("MMU:mapin", 0x301); mapin_ram(); -#ifdef CONFIG_HIGHMEM - ioremap_base = PKMAP_BASE; -#else - ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ -#endif /* CONFIG_HIGHMEM */ - ioremap_bot = ioremap_base; + /* Initialize early top-down ioremap allocator */ + ioremap_bot = IOREMAP_TOP; /* Map in I/O resources */ if (ppc_md.progress) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d0602a76bf7..d3a4e67561f 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -380,6 +380,19 @@ void __init mem_init(void) bsssize >> 10, initsize >> 10); +#ifdef CONFIG_PPC32 + pr_info("Kernel virtual memory layout:\n"); + pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); +#ifdef CONFIG_HIGHMEM + pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", + PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); +#endif /* CONFIG_HIGHMEM */ + pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", + ioremap_bot, IOREMAP_TOP); + pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", + VMALLOC_START, VMALLOC_END); +#endif /* CONFIG_PPC32 */ + mem_init_done = 1; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 430d0908fa5..5422169626b 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -399,8 +399,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable) #endif /* CONFIG_DEBUG_PAGEALLOC */ static int fixmaps; -unsigned long FIXADDR_TOP = (-PAGE_SIZE); -EXPORT_SYMBOL(FIXADDR_TOP); void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { -- cgit v1.2.3 From 8b31e49d1d75729c1da9009664ba52abd1adc628 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 27 May 2009 13:50:33 +1000 Subject: powerpc: Fix up dma_alloc_coherent() on platforms without cache coherency. The implementation we just revived has issues, such as using a Kconfig-defined virtual address area in kernel space that nothing actually carves out (and thus will overlap whatever is there), or having some dependencies on being self contained in a single PTE page which adds unnecessary constraints on the kernel virtual address space. This fixes it by using more classic PTE accessors and automatically locating the area for consistent memory, carving an appropriate hole in the kernel virtual address space, leaving only the size of that area as a Kconfig option. It also brings some dma-mask related fixes from the ARM implementation which was almost identical initially but grew its own fixes. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Kconfig | 13 ---- arch/powerpc/include/asm/dma-mapping.h | 6 +- arch/powerpc/include/asm/pgtable-ppc32.h | 4 ++ arch/powerpc/kernel/dma.c | 2 +- arch/powerpc/mm/dma-noncoherent.c | 108 ++++++++++++------------------- arch/powerpc/mm/mem.c | 4 ++ 6 files changed, 54 insertions(+), 83 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3bb43adce44..cdc9a6ff4be 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -868,19 +868,6 @@ config TASK_SIZE default "0x80000000" if PPC_PREP || PPC_8xx default "0xc0000000" -config CONSISTENT_START_BOOL - bool "Set custom consistent memory pool address" - depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE - help - This option allows you to set the base virtual address - of the consistent memory pool. This pool of virtual - memory is used to make consistent memory allocations. - -config CONSISTENT_START - hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL - default "0xfd000000" if (NOT_COHERENT_CACHE && 8xx) - default "0xff100000" if NOT_COHERENT_CACHE - config CONSISTENT_SIZE_BOOL bool "Set custom consistent memory pool size" depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index c69f2b5f0cc..cb448d68452 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -26,7 +26,9 @@ * allocate the space "normally" and use the cache management functions * to ensure it is consistent. */ -extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp); +struct device; +extern void *__dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp); extern void __dma_free_coherent(size_t size, void *vaddr); extern void __dma_sync(void *vaddr, size_t size, int direction); extern void __dma_sync_page(struct page *page, unsigned long offset, @@ -37,7 +39,7 @@ extern void __dma_sync_page(struct page *page, unsigned long offset, * Cache coherent cores. */ -#define __dma_alloc_coherent(gfp, size, handle) NULL +#define __dma_alloc_coherent(dev, gfp, size, handle) NULL #define __dma_free_coherent(size, addr) ((void)0) #define __dma_sync(addr, size, rw) ((void)0) #define __dma_sync_page(pg, off, sz, rw) ((void)0) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 28fe9d4bae3..c9ff9d75990 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -71,7 +71,11 @@ extern int icache_44x_need_flush; * until mem_init() at which point this becomes the top of the vmalloc * and ioremap space */ +#ifdef CONFIG_NOT_COHERENT_CACHE +#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK) +#else #define IOREMAP_TOP KVIRT_TOP +#endif /* * Just any arbitrary offset to the start of the vmalloc VM area: the diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 53c7788cba7..6b02793dc75 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -32,7 +32,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size, { void *ret; #ifdef CONFIG_NOT_COHERENT_CACHE - ret = __dma_alloc_coherent(size, dma_handle, flag); + ret = __dma_alloc_coherent(dev, size, dma_handle, flag); if (ret == NULL) return NULL; *dma_handle += get_dma_direct_offset(dev); diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index b7dc4c19f58..36692f5c9a7 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -32,20 +32,21 @@ #include +#include "mmu_decl.h" + /* * This address range defaults to a value that is safe for all * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It * can be further configured for specific applications under * the "Advanced Setup" menu. -Matt */ -#define CONSISTENT_BASE (CONFIG_CONSISTENT_START) -#define CONSISTENT_END (CONFIG_CONSISTENT_START + CONFIG_CONSISTENT_SIZE) +#define CONSISTENT_BASE (IOREMAP_TOP) +#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE) #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT) /* * This is the page table (2MB) covering uncached, DMA consistent allocations */ -static pte_t *consistent_pte; static DEFINE_SPINLOCK(consistent_lock); /* @@ -148,22 +149,38 @@ static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsi * virtual and bus address for that space. */ void * -__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) +__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) { struct page *page; struct ppc_vm_region *c; unsigned long order; - u64 mask = 0x00ffffff, limit; /* ISA default */ + u64 mask = ISA_DMA_THRESHOLD, limit; - if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); - dump_stack(); - return NULL; + if (dev) { + mask = dev->coherent_dma_mask; + + /* + * Sanity check the DMA mask - it must be non-zero, and + * must be able to be satisfied by a DMA allocation. + */ + if (mask == 0) { + dev_warn(dev, "coherent DMA mask is unset\n"); + goto no_page; + } + + if ((~mask) & ISA_DMA_THRESHOLD) { + dev_warn(dev, "coherent DMA mask %#llx is smaller " + "than system GFP_DMA mask %#llx\n", + mask, (unsigned long long)ISA_DMA_THRESHOLD); + goto no_page; + } } + size = PAGE_ALIGN(size); limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || size >= (CONSISTENT_END - CONSISTENT_BASE)) { + if ((limit && size >= limit) || + size >= (CONSISTENT_END - CONSISTENT_BASE)) { printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n", size, mask); return NULL; @@ -171,6 +188,7 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) order = get_order(size); + /* Might be useful if we ever have a real legacy DMA zone... */ if (mask != 0xffffffff) gfp |= GFP_DMA; @@ -195,7 +213,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); if (c) { unsigned long vaddr = c->vm_start; - pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); struct page *end = page + (1 << order); split_page(page, order); @@ -206,13 +223,10 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp) *handle = page_to_phys(page); do { - BUG_ON(!pte_none(*pte)); - SetPageReserved(page); - set_pte_at(&init_mm, vaddr, - pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL))); + map_page(vaddr, page_to_phys(page), + pgprot_noncached(PAGE_KERNEL)); page++; - pte++; vaddr += PAGE_SIZE; } while (size -= PAGE_SIZE); @@ -241,8 +255,7 @@ void __dma_free_coherent(size_t size, void *vaddr) { struct ppc_vm_region *c; unsigned long flags, addr; - pte_t *ptep; - + size = PAGE_ALIGN(size); spin_lock_irqsave(&consistent_lock, flags); @@ -258,29 +271,26 @@ void __dma_free_coherent(size_t size, void *vaddr) size = c->vm_end - c->vm_start; } - ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); addr = c->vm_start; do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); + pte_t *ptep; unsigned long pfn; - ptep++; - addr += PAGE_SIZE; - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - + ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr), + addr), + addr), + addr); + if (!pte_none(*ptep) && pte_present(*ptep)) { + pfn = pte_pfn(*ptep); + pte_clear(&init_mm, addr, ptep); if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - ClearPageReserved(page); + ClearPageReserved(page); __free_page(page); - continue; } } - - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); + addr += PAGE_SIZE; } while (size -= PAGE_SIZE); flush_tlb_kernel_range(c->vm_start, c->vm_end); @@ -300,42 +310,6 @@ void __dma_free_coherent(size_t size, void *vaddr) } EXPORT_SYMBOL(__dma_free_coherent); -/* - * Initialise the consistent memory allocation. - */ -static int __init dma_alloc_init(void) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - int ret = 0; - - do { - pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pud = pud_alloc(&init_mm, pgd, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pud, CONSISTENT_BASE); - if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - - pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); - if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); - ret = -ENOMEM; - break; - } - - consistent_pte = pte; - } while (0); - - return ret; -} - -core_initcall(dma_alloc_init); - /* * make an area consistent. */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d3a4e67561f..579382c163a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -387,6 +387,10 @@ void __init mem_init(void) pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP)); #endif /* CONFIG_HIGHMEM */ +#ifdef CONFIG_NOT_COHERENT_CACHE + pr_info(" * 0x%08lx..0x%08lx : consistent mem\n", + IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE); +#endif /* CONFIG_NOT_COHERENT_CACHE */ pr_info(" * 0x%08lx..0x%08lx : early ioremap\n", ioremap_bot, IOREMAP_TOP); pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n", -- cgit v1.2.3 From 60e59f68824102c87e64c5f51c4e57c0b8a61e46 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 24 May 2009 20:34:10 +0000 Subject: powerpc/pmac: Update PowerMac 32-bit defconfig This mostly adds back AppleTouch support and adds CONFIG_HIGHMEM by default. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/configs/pmac32_defconfig | 278 ++++++++++++++++++++++++---------- 1 file changed, 195 insertions(+), 83 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 5339bb44cce..ea8870a3448 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28-rc3 -# Tue Nov 11 19:36:51 2008 +# Linux kernel version: 2.6.30-rc7 +# Mon May 25 14:53:25 2009 # # CONFIG_PPC64 is not set @@ -14,6 +14,7 @@ CONFIG_6xx=y # CONFIG_40x is not set # CONFIG_44x is not set # CONFIG_E200 is not set +CONFIG_PPC_BOOK3S=y CONFIG_PPC_FPU=y CONFIG_ALTIVEC=y CONFIG_PPC_STD_MMU=y @@ -43,7 +44,7 @@ CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_PPC=y CONFIG_EARLY_PRINTK=y CONFIG_GENERIC_NVRAM=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_PPC_OF=y CONFIG_OF=y @@ -52,12 +53,14 @@ CONFIG_OF=y CONFIG_AUDIT_ARCH=y CONFIG_GENERIC_BUG=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_DTC=y # CONFIG_DEFAULT_UIMAGE is not set CONFIG_HIBERNATE_32=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y # CONFIG_PPC_DCR_NATIVE is not set # CONFIG_PPC_DCR_MMIO is not set +CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" # @@ -72,14 +75,24 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set # CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set @@ -88,23 +101,27 @@ CONFIG_NAMESPACES=y # CONFIG_IPC_NS is not set # CONFIG_USER_NS is not set # CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y # CONFIG_EMBEDDED is not set CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y -# CONFIG_COMPAT_BRK is not set CONFIG_BASE_FULL=y CONFIG_FUTEX=y -CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y @@ -114,10 +131,12 @@ CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y +# CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y # CONFIG_MARKERS is not set CONFIG_OPROFILE=y CONFIG_HAVE_OPROFILE=y @@ -127,10 +146,10 @@ CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y +# CONFIG_SLOW_WORK is not set # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set @@ -138,11 +157,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -CONFIG_KMOD=y CONFIG_BLOCK=y CONFIG_LBD=y -# CONFIG_BLK_DEV_IO_TRACE is not set -CONFIG_LSF=y CONFIG_BLK_DEV_BSG=y # CONFIG_BLK_DEV_INTEGRITY is not set @@ -158,14 +174,11 @@ CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_CFQ is not set # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="anticipatory" -CONFIG_CLASSIC_RCU=y CONFIG_FREEZER=y # # Platform support # -CONFIG_PPC_MULTIPLATFORM=y -CONFIG_CLASSIC32=y # CONFIG_PPC_CHRP is not set # CONFIG_MPC5121_ADS is not set # CONFIG_MPC5121_GENERIC is not set @@ -178,7 +191,9 @@ CONFIG_PPC_PMAC=y # CONFIG_PPC_83xx is not set # CONFIG_PPC_86xx is not set # CONFIG_EMBEDDED6xx is not set +# CONFIG_AMIGAONE is not set CONFIG_PPC_NATIVE=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y # CONFIG_IPIC is not set CONFIG_MPIC=y # CONFIG_MPIC_WEIRD is not set @@ -212,11 +227,12 @@ CONFIG_CPU_FREQ_PMAC=y CONFIG_PPC601_SYNC_FIX=y # CONFIG_TAU is not set # CONFIG_FSL_ULI1575 is not set +# CONFIG_SIMPLE_GPIO is not set # # Kernel options # -# CONFIG_HIGHMEM is not set +CONFIG_HIGHMEM=y CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -239,6 +255,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_HAS_WALK_MEMORY=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y # CONFIG_KEXEC is not set +# CONFIG_CRASH_DUMP is not set CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_POPULATES_NODE_MAP=y CONFIG_SELECT_MEMORY_MODEL=y @@ -250,12 +267,17 @@ CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 # CONFIG_MIGRATION is not set -# CONFIG_RESOURCES_64BIT is not set # CONFIG_PHYS_ADDR_T_64BIT is not set CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +CONFIG_PPC_4K_PAGES=y +# CONFIG_PPC_16K_PAGES is not set +# CONFIG_PPC_64K_PAGES is not set +# CONFIG_PPC_256K_PAGES is not set CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_PROC_DEVICETREE=y # CONFIG_CMDLINE_BOOL is not set @@ -288,6 +310,8 @@ CONFIG_ARCH_SUPPORTS_MSI=y # CONFIG_PCI_MSI is not set # CONFIG_PCI_LEGACY is not set # CONFIG_PCI_DEBUG is not set +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set CONFIG_PCCARD=m # CONFIG_PCMCIA_DEBUG is not set CONFIG_PCMCIA=m @@ -397,6 +421,8 @@ CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m # CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set +CONFIG_NETFILTER_XT_TARGET_HL=m +# CONFIG_NETFILTER_XT_TARGET_LED is not set CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m @@ -405,6 +431,7 @@ CONFIG_NETFILTER_XT_TARGET_RATEEST=m CONFIG_NETFILTER_XT_TARGET_TRACE=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +# CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set CONFIG_NETFILTER_XT_MATCH_COMMENT=m # CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m @@ -415,6 +442,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m # CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_HL=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -478,17 +506,15 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_IP_DCCP=m CONFIG_INET_DCCP_DIAG=m -CONFIG_IP_DCCP_ACKVEC=y # # DCCP CCIDs Configuration (EXPERIMENTAL) # -CONFIG_IP_DCCP_CCID2=m # CONFIG_IP_DCCP_CCID2_DEBUG is not set -CONFIG_IP_DCCP_CCID3=m +CONFIG_IP_DCCP_CCID3=y # CONFIG_IP_DCCP_CCID3_DEBUG is not set CONFIG_IP_DCCP_CCID3_RTO=100 -CONFIG_IP_DCCP_TFRC_LIB=m +CONFIG_IP_DCCP_TFRC_LIB=y # # DCCP Kernel Hacking @@ -508,13 +534,16 @@ CONFIG_IP_DCCP_TFRC_LIB=m # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set # CONFIG_NET_SCHED is not set CONFIG_NET_CLS_ROUTE=y +# CONFIG_DCB is not set # # Network testing # # CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set CONFIG_IRDA=m @@ -577,8 +606,6 @@ CONFIG_BT_HIDP=m # # Bluetooth device drivers # -CONFIG_BT_HCIUSB=m -# CONFIG_BT_HCIUSB_SCO is not set # CONFIG_BT_HCIBTUSB is not set # CONFIG_BT_HCIUART is not set CONFIG_BT_HCIBCM203X=m @@ -590,31 +617,27 @@ CONFIG_BT_HCIBFUSB=m # CONFIG_BT_HCIBTUART is not set # CONFIG_BT_HCIVHCI is not set # CONFIG_AF_RXRPC is not set -# CONFIG_PHONET is not set CONFIG_WIRELESS=y CONFIG_CFG80211=m -CONFIG_NL80211=y +# CONFIG_CFG80211_REG_DEBUG is not set CONFIG_WIRELESS_OLD_REGULATORY=y CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y +# CONFIG_LIB80211 is not set CONFIG_MAC80211=m # # Rate control algorithm selection # -CONFIG_MAC80211_RC_PID=y -# CONFIG_MAC80211_RC_MINSTREL is not set -CONFIG_MAC80211_RC_DEFAULT_PID=y -# CONFIG_MAC80211_RC_DEFAULT_MINSTREL is not set -CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_MINSTREL=y +# CONFIG_MAC80211_RC_DEFAULT_PID is not set +CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y +CONFIG_MAC80211_RC_DEFAULT="minstrel" # CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y +# CONFIG_MAC80211_DEBUGFS is not set # CONFIG_MAC80211_DEBUG_MENU is not set -CONFIG_IEEE80211=m -# CONFIG_IEEE80211_DEBUG is not set -CONFIG_IEEE80211_CRYPT_WEP=m -CONFIG_IEEE80211_CRYPT_CCMP=m -CONFIG_IEEE80211_CRYPT_TKIP=m +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -662,17 +685,27 @@ CONFIG_BLK_DEV_RAM_SIZE=4096 # CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y # CONFIG_PHANTOM is not set -# CONFIG_EEPROM_93CX6 is not set # CONFIG_SGI_IOC4 is not set # CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_HP_ILO is not set +# CONFIG_ISL29003 is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_AT24 is not set +# CONFIG_EEPROM_LEGACY is not set +# CONFIG_EEPROM_93CX6 is not set CONFIG_HAVE_IDE=y CONFIG_IDE=y # # Please see Documentation/ide/ide.txt for help/info on IDE drives # +CONFIG_IDE_XFER_MODE=y CONFIG_IDE_TIMINGS=y CONFIG_IDE_ATAPI=y # CONFIG_BLK_DEV_IDE_SATA is not set @@ -684,7 +717,6 @@ CONFIG_BLK_DEV_IDECS=m CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y # CONFIG_BLK_DEV_IDETAPE is not set -CONFIG_BLK_DEV_IDESCSI=y # CONFIG_IDE_TASK_IOCTL is not set CONFIG_IDE_PROC_FS=y @@ -714,6 +746,7 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y # CONFIG_BLK_DEV_JMICRON is not set # CONFIG_BLK_DEV_SC1200 is not set # CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_IT8172 is not set # CONFIG_BLK_DEV_IT8213 is not set # CONFIG_BLK_DEV_IT821X is not set # CONFIG_BLK_DEV_NS87415 is not set @@ -728,7 +761,6 @@ CONFIG_BLK_DEV_SL82C105=y # CONFIG_BLK_DEV_TC86C001 is not set CONFIG_BLK_DEV_IDE_PMAC=y CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y -CONFIG_BLK_DEV_IDEDMA_PMAC=y CONFIG_BLK_DEV_IDEDMA=y # @@ -772,6 +804,7 @@ CONFIG_SCSI_FC_ATTRS=y # CONFIG_SCSI_SRP_ATTRS is not set CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set # CONFIG_BLK_DEV_3W_XXXX_RAID is not set # CONFIG_SCSI_3W_9XXX is not set # CONFIG_SCSI_ACARD is not set @@ -791,8 +824,12 @@ CONFIG_SCSI_AIC7XXX_OLD=m # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set +# CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -822,6 +859,7 @@ CONFIG_SCSI_MAC53C94=y # CONFIG_SCSI_SRP is not set # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set # CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set # CONFIG_ATA is not set CONFIG_MD=y CONFIG_BLK_DEV_MD=m @@ -881,6 +919,7 @@ CONFIG_THERM_ADT746X=m # CONFIG_ANSLCD is not set CONFIG_PMAC_RACKMETER=m CONFIG_NETDEVICES=y +CONFIG_COMPAT_NET_DEV_OPS=y CONFIG_DUMMY=m # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set @@ -898,6 +937,8 @@ CONFIG_BMAC=y CONFIG_SUNGEM=y # CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set +# CONFIG_ETHOC is not set +# CONFIG_DNET is not set # CONFIG_NET_TULIP is not set # CONFIG_HP100 is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set @@ -913,7 +954,6 @@ CONFIG_PCNET32=y # CONFIG_ADAPTEC_STARFIRE is not set # CONFIG_B44 is not set # CONFIG_FORCEDETH is not set -# CONFIG_EEPRO100 is not set # CONFIG_E100 is not set # CONFIG_FEALNX is not set # CONFIG_NATSEMI is not set @@ -923,6 +963,7 @@ CONFIG_PCNET32=y # CONFIG_R6040 is not set # CONFIG_SIS900 is not set # CONFIG_EPIC100 is not set +# CONFIG_SMSC9420 is not set # CONFIG_SUNDANCE is not set # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set @@ -935,6 +976,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -945,18 +987,20 @@ CONFIG_NETDEV_1000=y # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set -# CONFIG_MV643XX_ETH is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set # CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_CHELSIO_T3 is not set # CONFIG_ENIC is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -966,6 +1010,7 @@ CONFIG_NETDEV_10000=y # CONFIG_BNX2X is not set # CONFIG_QLGE is not set # CONFIG_SFC is not set +# CONFIG_BE2NET is not set # CONFIG_TR is not set # @@ -974,20 +1019,11 @@ CONFIG_NETDEV_10000=y # CONFIG_WLAN_PRE80211 is not set CONFIG_WLAN_80211=y # CONFIG_PCMCIA_RAYCS is not set -# CONFIG_IPW2100 is not set -# CONFIG_IPW2200 is not set # CONFIG_LIBERTAS is not set # CONFIG_LIBERTAS_THINFIRM is not set # CONFIG_AIRO is not set -CONFIG_HERMES=m -CONFIG_APPLE_AIRPORT=m -# CONFIG_PLX_HERMES is not set -# CONFIG_TMD_HERMES is not set -# CONFIG_NORTEL_HERMES is not set -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -# CONFIG_PCMCIA_SPECTRUM is not set # CONFIG_ATMEL is not set +# CONFIG_AT76C50X_USB is not set # CONFIG_AIRO_CS is not set # CONFIG_PCMCIA_WL3501 is not set CONFIG_PRISM54=m @@ -997,15 +1033,17 @@ CONFIG_PRISM54=m # CONFIG_RTL8187 is not set # CONFIG_ADM8211 is not set # CONFIG_MAC80211_HWSIM is not set +# CONFIG_MWL8K is not set CONFIG_P54_COMMON=m # CONFIG_P54_USB is not set # CONFIG_P54_PCI is not set +CONFIG_P54_LEDS=y # CONFIG_ATH5K is not set # CONFIG_ATH9K is not set -# CONFIG_IWLCORE is not set -# CONFIG_IWLWIFI_LEDS is not set -# CONFIG_IWLAGN is not set -# CONFIG_IWL3945 is not set +# CONFIG_AR9170_USB is not set +# CONFIG_IPW2100 is not set +# CONFIG_IPW2200 is not set +# CONFIG_IWLWIFI is not set # CONFIG_HOSTAP is not set CONFIG_B43=m CONFIG_B43_PCI_AUTOSELECT=y @@ -1025,6 +1063,19 @@ CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y # CONFIG_B43LEGACY_PIO_MODE is not set # CONFIG_ZD1211RW is not set # CONFIG_RT2X00 is not set +CONFIG_HERMES=m +CONFIG_HERMES_CACHE_FW_ON_INIT=y +CONFIG_APPLE_AIRPORT=m +# CONFIG_PLX_HERMES is not set +# CONFIG_TMD_HERMES is not set +# CONFIG_NORTEL_HERMES is not set +CONFIG_PCI_HERMES=m +CONFIG_PCMCIA_HERMES=m +# CONFIG_PCMCIA_SPECTRUM is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # # USB Network Adapters @@ -1036,6 +1087,7 @@ CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y CONFIG_USB_USBNET=m CONFIG_USB_NET_AX8817X=m CONFIG_USB_NET_CDCETHER=m +# CONFIG_USB_NET_CDC_EEM is not set # CONFIG_USB_NET_DM9601 is not set # CONFIG_USB_NET_SMSC95XX is not set # CONFIG_USB_NET_GL620A is not set @@ -1099,7 +1151,7 @@ CONFIG_INPUT_KEYBOARD=y CONFIG_INPUT_MOUSE=y # CONFIG_MOUSE_PS2 is not set # CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_APPLETOUCH is not set +CONFIG_MOUSE_APPLETOUCH=y # CONFIG_MOUSE_BCM5974 is not set # CONFIG_MOUSE_VSXXXAA is not set # CONFIG_INPUT_JOYSTICK is not set @@ -1150,10 +1202,13 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y # CONFIG_SERIAL_JSM is not set # CONFIG_SERIAL_OF_PLATFORM is not set CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_HVC_UDBG is not set # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=m +# CONFIG_HW_RANDOM_TIMERIOMEM is not set CONFIG_NVRAM=y CONFIG_GEN_RTC=y # CONFIG_GEN_RTC_X is not set @@ -1232,12 +1287,9 @@ CONFIG_I2C_POWERMAC=y # Miscellaneous I2C Chip support # # CONFIG_DS1682 is not set -# CONFIG_EEPROM_AT24 is not set -# CONFIG_EEPROM_LEGACY is not set # CONFIG_SENSORS_PCF8574 is not set # CONFIG_PCF8575 is not set # CONFIG_SENSORS_PCA9539 is not set -# CONFIG_SENSORS_PCF8591 is not set # CONFIG_SENSORS_MAX6875 is not set # CONFIG_SENSORS_TSL2550 is not set # CONFIG_I2C_DEBUG_CORE is not set @@ -1259,11 +1311,11 @@ CONFIG_BATTERY_PMU=y # CONFIG_THERMAL is not set # CONFIG_THERMAL_HWMON is not set # CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y CONFIG_SSB=m CONFIG_SSB_SPROM=y CONFIG_SSB_PCIHOST_POSSIBLE=y @@ -1281,18 +1333,13 @@ CONFIG_SSB_DRIVER_PCICORE=y # CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_TWL4030_CORE is not set # CONFIG_MFD_TMIO is not set # CONFIG_PMIC_DA903X is not set # CONFIG_MFD_WM8400 is not set # CONFIG_MFD_WM8350_I2C is not set - -# -# Voltage and Current regulators -# +# CONFIG_MFD_PCF50633 is not set # CONFIG_REGULATOR is not set -# CONFIG_REGULATOR_FIXED_VOLTAGE is not set -# CONFIG_REGULATOR_VIRTUAL_CONSUMER is not set -# CONFIG_REGULATOR_BQ24022 is not set # # Multimedia devices @@ -1390,6 +1437,7 @@ CONFIG_FB_ATY_BACKLIGHT=y # CONFIG_FB_KYRO is not set CONFIG_FB_3DFX=y # CONFIG_FB_3DFX_ACCEL is not set +CONFIG_FB_3DFX_I2C=y # CONFIG_FB_VOODOO1 is not set # CONFIG_FB_VT8623 is not set # CONFIG_FB_TRIDENT is not set @@ -1399,12 +1447,14 @@ CONFIG_FB_3DFX=y # CONFIG_FB_IBM_GXT4500 is not set # CONFIG_FB_VIRTUAL is not set # CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=m # CONFIG_LCD_ILI9320 is not set # CONFIG_LCD_PLATFORM is not set CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_BACKLIGHT_CORGI is not set +CONFIG_BACKLIGHT_GENERIC=y # # Display device support @@ -1444,11 +1494,13 @@ CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m CONFIG_SND_PCM_OSS_PLUGINS=y CONFIG_SND_SEQUENCER_OSS=y +# CONFIG_SND_HRTIMER is not set # CONFIG_SND_DYNAMIC_MINORS is not set CONFIG_SND_SUPPORT_OLD_API=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y CONFIG_SND_DRIVERS=y CONFIG_SND_DUMMY=m # CONFIG_SND_VIRMIDI is not set @@ -1486,6 +1538,8 @@ CONFIG_SND_PCI=y # CONFIG_SND_INDIGO is not set # CONFIG_SND_INDIGOIO is not set # CONFIG_SND_INDIGODJ is not set +# CONFIG_SND_INDIGOIOX is not set +# CONFIG_SND_INDIGODJX is not set # CONFIG_SND_EMU10K1 is not set # CONFIG_SND_EMU10K1X is not set # CONFIG_SND_ENS1370 is not set @@ -1551,28 +1605,31 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y -CONFIG_HID_BRIGHT=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y -CONFIG_HID_DELL=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +CONFIG_HID_KYE=y CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y +CONFIG_HID_NTRIG=y CONFIG_HID_PANTHERLORD=y # CONFIG_PANTHERLORD_FF is not set CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y +# CONFIG_GREENASIA_FF is not set +CONFIG_HID_TOPSEED=y # CONFIG_THRUSTMASTER_FF is not set # CONFIG_ZEROPLUS_FF is not set CONFIG_USB_SUPPORT=y @@ -1603,6 +1660,7 @@ CONFIG_USB_EHCI_HCD=m CONFIG_USB_EHCI_ROOT_HUB_TT=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set # CONFIG_USB_EHCI_HCD_PPC_OF is not set +# CONFIG_USB_OXU210HP_HCD is not set # CONFIG_USB_ISP116X_HCD is not set # CONFIG_USB_ISP1760_HCD is not set CONFIG_USB_OHCI_HCD=y @@ -1625,24 +1683,23 @@ CONFIG_USB_PRINTER=m # CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # -# may also be needed; see USB_STORAGE Help for more information +# also be needed; see USB_STORAGE Help for more info # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set # CONFIG_USB_STORAGE_DATAFAB is not set # CONFIG_USB_STORAGE_FREECOM is not set # CONFIG_USB_STORAGE_ISD200 is not set -# CONFIG_USB_STORAGE_DPCM is not set # CONFIG_USB_STORAGE_USBAT is not set # CONFIG_USB_STORAGE_SDDR09 is not set # CONFIG_USB_STORAGE_SDDR55 is not set # CONFIG_USB_STORAGE_JUMPSHOT is not set # CONFIG_USB_STORAGE_ALAUDA is not set -CONFIG_USB_STORAGE_ONETOUCH=y +CONFIG_USB_STORAGE_ONETOUCH=m # CONFIG_USB_STORAGE_KARMA is not set # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set # CONFIG_USB_LIBUSUAL is not set @@ -1665,7 +1722,7 @@ CONFIG_USB_EZUSB=y # CONFIG_USB_SERIAL_CH341 is not set # CONFIG_USB_SERIAL_WHITEHEAT is not set # CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set -# CONFIG_USB_SERIAL_CP2101 is not set +# CONFIG_USB_SERIAL_CP210X is not set # CONFIG_USB_SERIAL_CYPRESS_M8 is not set # CONFIG_USB_SERIAL_EMPEG is not set # CONFIG_USB_SERIAL_FTDI_SIO is not set @@ -1701,15 +1758,19 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y # CONFIG_USB_SERIAL_NAVMAN is not set # CONFIG_USB_SERIAL_PL2303 is not set # CONFIG_USB_SERIAL_OTI6858 is not set +# CONFIG_USB_SERIAL_QUALCOMM is not set # CONFIG_USB_SERIAL_SPCP8X5 is not set # CONFIG_USB_SERIAL_HP4X is not set # CONFIG_USB_SERIAL_SAFE is not set +# CONFIG_USB_SERIAL_SIEMENS_MPI is not set # CONFIG_USB_SERIAL_SIERRAWIRELESS is not set +# CONFIG_USB_SERIAL_SYMBOL is not set # CONFIG_USB_SERIAL_TI is not set # CONFIG_USB_SERIAL_CYBERJACK is not set # CONFIG_USB_SERIAL_XIRCOM is not set # CONFIG_USB_SERIAL_OPTION is not set # CONFIG_USB_SERIAL_OMNINET is not set +# CONFIG_USB_SERIAL_OPTICON is not set # CONFIG_USB_SERIAL_DEBUG is not set # @@ -1726,7 +1787,6 @@ CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y # CONFIG_USB_LED is not set # CONFIG_USB_CYPRESS_CY7C63 is not set # CONFIG_USB_CYTHERM is not set -# CONFIG_USB_PHIDGET is not set # CONFIG_USB_IDMOUSE is not set # CONFIG_USB_FTDI_ELAN is not set CONFIG_USB_APPLEDISPLAY=m @@ -1738,6 +1798,11 @@ CONFIG_USB_APPLEDISPLAY=m # CONFIG_USB_ISIGHTFW is not set # CONFIG_USB_VST is not set # CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_NOP_USB_XCEIV is not set # CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set @@ -1748,7 +1813,9 @@ CONFIG_LEDS_CLASS=y # LED drivers # # CONFIG_LEDS_PCA9532 is not set +# CONFIG_LEDS_LP5521 is not set # CONFIG_LEDS_PCA955X is not set +# CONFIG_LEDS_BD2802 is not set # # LED Triggers @@ -1759,11 +1826,16 @@ CONFIG_LEDS_TRIGGER_IDE_DISK=y # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set CONFIG_LEDS_TRIGGER_DEFAULT_ON=y + +# +# iptables trigger is under Netfilter config (LED target) +# # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set # CONFIG_EDAC is not set # CONFIG_RTC_CLASS is not set # CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # CONFIG_STAGING is not set @@ -1774,6 +1846,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y # CONFIG_EXT3_FS_SECURITY is not set @@ -1783,7 +1856,9 @@ CONFIG_EXT4_FS_XATTR=y # CONFIG_EXT4_FS_POSIX_ACL is not set # CONFIG_EXT4_FS_SECURITY is not set CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set CONFIG_JBD2=y +# CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set @@ -1792,6 +1867,7 @@ CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y @@ -1800,6 +1876,11 @@ CONFIG_INOTIFY_USER=y CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m +# +# Caches +# +# CONFIG_FSCACHE is not set + # # CD-ROM/DVD Filesystems # @@ -1831,10 +1912,7 @@ CONFIG_TMPFS=y # CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_HUGETLB_PAGE is not set # CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set CONFIG_HFS_FS=m @@ -1843,6 +1921,7 @@ CONFIG_HFSPLUS_FS=m # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set @@ -1851,6 +1930,7 @@ CONFIG_HFSPLUS_FS=m # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1868,7 +1948,6 @@ CONFIG_NFS_ACL_SUPPORT=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y -# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=y # CONFIG_RPCSEC_GSS_SPKM3 is not set CONFIG_SMB_FS=m @@ -1940,11 +2019,13 @@ CONFIG_NLS_ISO8859_1=m # CONFIG_NLS_KOI8_U is not set CONFIG_NLS_UTF8=m # CONFIG_DLM is not set +CONFIG_BINARY_PRINTF=y # # Library routines # CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y @@ -1954,15 +2035,18 @@ CONFIG_CRC32=y CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y CONFIG_TEXTSEARCH=y CONFIG_TEXTSEARCH_KMP=m CONFIG_TEXTSEARCH_BM=m CONFIG_TEXTSEARCH_FSM=m -CONFIG_PLIST=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y CONFIG_HAVE_LMB=y +CONFIG_NLATTR=y # # Kernel hacking @@ -1973,13 +2057,16 @@ CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y # CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set +CONFIG_DEBUG_FS=y # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y CONFIG_SCHEDSTATS=y # CONFIG_TIMER_STATS is not set @@ -1994,6 +2081,7 @@ CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set CONFIG_STACKTRACE=y # CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set @@ -2001,6 +2089,7 @@ CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_LIST is not set # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_CPU_STALL_DETECTOR is not set @@ -2009,7 +2098,14 @@ CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_FAULT_INJECTION is not set CONFIG_LATENCYTOP=y CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_RING_BUFFER=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -2017,12 +2113,19 @@ CONFIG_HAVE_FUNCTION_TRACER=y # CONFIG_FUNCTION_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set -# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_DYNAMIC_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set +CONFIG_PRINT_STACK_DEPTH=64 # CONFIG_DEBUG_STACKOVERFLOW is not set # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_CODE_PATCHING_SELFTEST is not set @@ -2033,6 +2136,7 @@ CONFIG_XMON_DEFAULT=y CONFIG_XMON_DISASSEMBLY=y CONFIG_DEBUGGER=y CONFIG_IRQSTACKS=y +# CONFIG_VIRQ_DEBUG is not set # CONFIG_BDI_SWITCH is not set CONFIG_BOOTX_TEXT=y # CONFIG_PPC_EARLY_DEBUG is not set @@ -2051,13 +2155,20 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y -CONFIG_CRYPTO_RNG=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_GF128MUL is not set CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_AUTHENC=y # CONFIG_CRYPTO_TEST is not set @@ -2127,6 +2238,7 @@ CONFIG_CRYPTO_TWOFISH_COMMON=m # Compression # CONFIG_CRYPTO_DEFLATE=m +# CONFIG_CRYPTO_ZLIB is not set # CONFIG_CRYPTO_LZO is not set # -- cgit v1.2.3 From b23f3325ed465f1bd914384884269af0d106778c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 15:13:03 +0200 Subject: perf_counter: Rename various fields A few renames: s/irq_period/sample_period/ s/irq_freq/sample_freq/ s/PERF_RECORD_/PERF_SAMPLE_/ s/record_type/sample_type/ And change both the new sample_type and read_format to u64. Reported-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index f96d55f55bd..c9633321e7a 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -535,7 +535,7 @@ void hw_perf_enable(void) continue; } val = 0; - if (counter->hw.irq_period) { + if (counter->hw.sample_period) { left = atomic64_read(&counter->hw.period_left); if (left < 0x80000000L) val = 0x80000000L - left; @@ -749,12 +749,12 @@ static void power_pmu_unthrottle(struct perf_counter *counter) s64 val, left; unsigned long flags; - if (!counter->hw.idx || !counter->hw.irq_period) + if (!counter->hw.idx || !counter->hw.sample_period) return; local_irq_save(flags); perf_disable(); power_pmu_read(counter); - left = counter->hw.irq_period; + left = counter->hw.sample_period; val = 0; if (left < 0x80000000L) val = 0x80000000L - left; @@ -789,7 +789,7 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, if (counter->hw_event.exclude_user || counter->hw_event.exclude_kernel || counter->hw_event.exclude_hv - || counter->hw_event.irq_period) + || counter->hw_event.sample_period) return 0; if (ppmu->limited_pmc_event(ev)) @@ -925,7 +925,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) counter->hw.config = events[n]; counter->hw.counter_base = cflags[n]; - atomic64_set(&counter->hw.period_left, counter->hw.irq_period); + atomic64_set(&counter->hw.period_left, counter->hw.sample_period); /* * See if we need to reserve the PMU. @@ -958,7 +958,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) static void record_and_restart(struct perf_counter *counter, long val, struct pt_regs *regs, int nmi) { - u64 period = counter->hw.irq_period; + u64 period = counter->hw.sample_period; s64 prev, delta, left; int record = 0; u64 addr, mmcra, sdsync; -- cgit v1.2.3 From 0d48696f87e3618b0d35bd3e4e9d7c188d51e7de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2009 19:22:16 +0200 Subject: perf_counter: Rename perf_counter_hw_event => perf_counter_attr The structure isn't hw only and when I read event, I think about those things that fall out the other end. Rename the thing. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index c9633321e7a..ea54686cb78 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -262,13 +262,13 @@ static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], } counter = ctrs[i]; if (first) { - eu = counter->hw_event.exclude_user; - ek = counter->hw_event.exclude_kernel; - eh = counter->hw_event.exclude_hv; + eu = counter->attr.exclude_user; + ek = counter->attr.exclude_kernel; + eh = counter->attr.exclude_hv; first = 0; - } else if (counter->hw_event.exclude_user != eu || - counter->hw_event.exclude_kernel != ek || - counter->hw_event.exclude_hv != eh) { + } else if (counter->attr.exclude_user != eu || + counter->attr.exclude_kernel != ek || + counter->attr.exclude_hv != eh) { return -EAGAIN; } } @@ -483,16 +483,16 @@ void hw_perf_enable(void) /* * Add in MMCR0 freeze bits corresponding to the - * hw_event.exclude_* bits for the first counter. + * attr.exclude_* bits for the first counter. * We have already checked that all counters have the * same values for these bits as the first counter. */ counter = cpuhw->counter[0]; - if (counter->hw_event.exclude_user) + if (counter->attr.exclude_user) cpuhw->mmcr[0] |= MMCR0_FCP; - if (counter->hw_event.exclude_kernel) + if (counter->attr.exclude_kernel) cpuhw->mmcr[0] |= freeze_counters_kernel; - if (counter->hw_event.exclude_hv) + if (counter->attr.exclude_hv) cpuhw->mmcr[0] |= MMCR0_FCHV; /* @@ -786,10 +786,10 @@ static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, int n; u64 alt[MAX_EVENT_ALTERNATIVES]; - if (counter->hw_event.exclude_user - || counter->hw_event.exclude_kernel - || counter->hw_event.exclude_hv - || counter->hw_event.sample_period) + if (counter->attr.exclude_user + || counter->attr.exclude_kernel + || counter->attr.exclude_hv + || counter->attr.sample_period) return 0; if (ppmu->limited_pmc_event(ev)) @@ -855,13 +855,13 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) if (!ppmu) return ERR_PTR(-ENXIO); - if (!perf_event_raw(&counter->hw_event)) { - ev = perf_event_id(&counter->hw_event); + if (!perf_event_raw(&counter->attr)) { + ev = perf_event_id(&counter->attr); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return ERR_PTR(-EOPNOTSUPP); ev = ppmu->generic_events[ev]; } else { - ev = perf_event_config(&counter->hw_event); + ev = perf_event_config(&counter->attr); } counter->hw.config_base = ev; counter->hw.idx = 0; @@ -872,7 +872,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) * the user set it to. */ if (!firmware_has_feature(FW_FEATURE_LPAR)) - counter->hw_event.exclude_hv = 0; + counter->attr.exclude_hv = 0; /* * If this is a per-task counter, then we can use @@ -990,7 +990,7 @@ static void record_and_restart(struct perf_counter *counter, long val, */ if (record) { addr = 0; - if (counter->hw_event.record_type & PERF_RECORD_ADDR) { + if (counter->attr.record_type & PERF_RECORD_ADDR) { /* * The user wants a data address recorded. * If we're not doing instruction sampling, -- cgit v1.2.3 From 6984efb692e97ce5f75f26e595685c04c2061bac Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 3 Jun 2009 19:38:58 +1000 Subject: perf_counter: powerpc: Fix event alternative code generation on POWER5/5+ Commit ef923214 ("perf_counter: powerpc: use u64 for event codes internally") introduced a bug where the return value from function find_alternative_bdecode gets put into a u64 variable and later tested to see if it is < 0. The effect is that we get extra, bogus event code alternatives on POWER5 and POWER5+, leading to error messages such as "oops compute_mmcr failed" being printed and counters not counting properly. This fixes it by using s64 for the return type of find_alternative_bdecode and for the local variable that the caller puts the value in. It also makes the event argument a u64 on POWER5+ for consistency. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur Cc: Stephane Eranian LKML-Reference: <18982.17586.666132.90983@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/power5+-pmu.c | 4 ++-- arch/powerpc/kernel/power5-pmu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index c6cdfc165d6..8471e3c2e46 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -242,7 +242,7 @@ static const unsigned char bytedecode_alternatives[4][4] = { * event code for those that do, or -1 otherwise. This also handles * alternative PCMSEL values for add events. */ -static int find_alternative_bdecode(unsigned int event) +static s64 find_alternative_bdecode(u64 event) { int pmc, altpmc, pp, j; @@ -277,7 +277,7 @@ static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { int i, j, nalt = 1; int nlim; - u64 ae; + s64 ae; alt[0] = event; nalt = 1; diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index d5344968ee9..1b44c5fca18 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -250,7 +250,7 @@ static const unsigned char bytedecode_alternatives[4][4] = { * PMCSEL values on other counters. This returns the alternative * event code for those that do, or -1 otherwise. */ -static u64 find_alternative_bdecode(u64 event) +static s64 find_alternative_bdecode(u64 event) { int pmc, altpmc, pp, j; @@ -272,7 +272,7 @@ static u64 find_alternative_bdecode(u64 event) static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { int i, j, nalt = 1; - u64 ae; + s64 ae; alt[0] = event; nalt = 1; -- cgit v1.2.3 From dcd945e0d8a6d654e3e1de51faea9f98f1504aa5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 3 Jun 2009 19:40:36 +1000 Subject: perf_counter: powerpc: Fix race causing "oops trying to read PMC0" errors When using interrupting counters and limited (non-interrupting) counters at the same time, it's possible that we get an interrupt in write_mmcr0() after writing MMCR0 but before we have set up the counters using limited PMCs. What happens then is that we get into perf_counter_interrupt() with counter->hw.idx = 0 for the limited counters, leading to the "oops trying to read PMC0" error message being printed. This fixes the problem by making perf_counter_interrupt() robust against counter->hw.idx being zero (the counter is just ignored in that case) and also by changing write_mmcr0() to write MMCR0 initially with the counter overflow interrupt enable bits masked (set to 0). If the MMCR0 value requested by the caller has either of those bits set, we write MMCR0 again with the requested value of those bits after setting up the limited counters properly. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: John Kacur Cc: Stephane Eranian LKML-Reference: <18982.17684.138182.954599@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index ea54686cb78..4cc4ac5c791 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -372,16 +372,28 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) /* * Write MMCR0, then read PMC5 and PMC6 immediately. + * To ensure we don't get a performance monitor interrupt + * between writing MMCR0 and freezing/thawing the limited + * counters, we first write MMCR0 with the counter overflow + * interrupt enable bits turned off. */ asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" : "=&r" (pmc5), "=&r" (pmc6) - : "r" (mmcr0), "i" (SPRN_MMCR0), + : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), + "i" (SPRN_MMCR0), "i" (SPRN_PMC5), "i" (SPRN_PMC6)); if (mmcr0 & MMCR0_FC) freeze_limited_counters(cpuhw, pmc5, pmc6); else thaw_limited_counters(cpuhw, pmc5, pmc6); + + /* + * Write the full MMCR0 including the counter overflow interrupt + * enable bits, if necessary. + */ + if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) + mtspr(SPRN_MMCR0, mmcr0); } /* @@ -1108,7 +1120,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; - if (is_limited_pmc(counter->hw.idx)) + if (!counter->hw.idx || is_limited_pmc(counter->hw.idx)) continue; val = read_pmc(counter->hw.idx); if ((int)val < 0) { -- cgit v1.2.3 From 1b58c2515be48d5df79d20210ac5a86e30094de2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 4 Jun 2009 09:49:59 +1000 Subject: perf_counter: powerpc: Use new identifier names in powerpc-specific code Commit b23f3325 ("perf_counter: Rename various fields") fixed up most of the uses of the renamed fields, but missed one instance of "record_type" in powerpc-specific code which needs to be changed to "sample_type", and a "PERF_RECORD_ADDR" in the same statement that needs to be changed to "PERF_SAMPLE_ADDR", causing compilation errors on powerpc. This fixes it. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18983.3111.770392.800486@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 4cc4ac5c791..232b00a36f7 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -1002,7 +1002,7 @@ static void record_and_restart(struct perf_counter *counter, long val, */ if (record) { addr = 0; - if (counter->attr.record_type & PERF_RECORD_ADDR) { + if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { /* * The user wants a data address recorded. * If we're not doing instruction sampling, -- cgit v1.2.3 From a21ca2cac582886a3e95c8bb84ff7c52d4d15e54 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Jun 2009 09:58:57 +0200 Subject: perf_counter: Separate out attr->type from attr->config Counter type is a frequently used value and we do a lot of bit juggling by encoding and decoding it from attr->config. Clean this up by creating a separate attr->type field. Also clean up the various similarly complex user-space bits all around counter attribute management. The net improvement is significant, and it will be easier to add a new major type (which is what triggered this cleanup). (This changes the ABI, all tools are adapted.) (PowerPC build-tested.) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 232b00a36f7..4786ad9a288 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -867,13 +867,13 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) if (!ppmu) return ERR_PTR(-ENXIO); - if (!perf_event_raw(&counter->attr)) { - ev = perf_event_id(&counter->attr); + if (counter->attr.type != PERF_TYPE_RAW) { + ev = counter->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return ERR_PTR(-EOPNOTSUPP); ev = ppmu->generic_events[ev]; } else { - ev = perf_event_config(&counter->attr); + ev = counter->attr.config; } counter->hw.config_base = ev; counter->hw.idx = 0; -- cgit v1.2.3 From 78646121e9a2fcf7977cc15966420e572a450bc3 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 23 Mar 2009 12:12:11 +0200 Subject: KVM: Fix interrupt unhalting a vcpu when it shouldn't kvm_vcpu_block() unhalts vpu on an interrupt/timer without checking if interrupt window is actually opened. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/powerpc/kvm/powerpc.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9057335fdc6..2cf915e51e7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions); } +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + /* do real check here */ + return 1; +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { return !(v->arch.msr & MSR_WE); -- cgit v1.2.3 From df1a132bf3d3508f863336c80a27806a2ac947e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 21:02:22 +0200 Subject: perf_counter: Introduce struct for sample data For easy extension of the sample data, put it in a structure. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 4786ad9a288..5e0bf399c43 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -1001,7 +1001,11 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) { - addr = 0; + struct perf_sample_data data = { + .regs = regs, + .addr = 0, + }; + if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { /* * The user wants a data address recorded. @@ -1016,9 +1020,9 @@ static void record_and_restart(struct perf_counter *counter, long val, sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - addr = mfspr(SPRN_SDAR); + data.addr = mfspr(SPRN_SDAR); } - if (perf_counter_overflow(counter, nmi, regs, addr)) { + if (perf_counter_overflow(counter, nmi, &data)) { /* * Interrupts are coming too fast - throttle them * by setting the counter to 0, so it will be -- cgit v1.2.3 From 9e350de37ac9607012fcf9c5314a28fbddf8f43c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 21:34:59 +0200 Subject: perf_counter: Accurate period data We currently log hw.sample_period for PERF_SAMPLE_PERIOD, however this is incorrect. When we adjust the period, it will only take effect the next cycle but report it for the current cycle. So when we adjust the period for every cycle, we're always wrong. Solve this by keeping track of the last_period. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/perf_counter.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5e0bf399c43..4990ce2e5f0 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -767,6 +767,7 @@ static void power_pmu_unthrottle(struct perf_counter *counter) perf_disable(); power_pmu_read(counter); left = counter->hw.sample_period; + counter->hw.last_period = left; val = 0; if (left < 0x80000000L) val = 0x80000000L - left; @@ -937,7 +938,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) counter->hw.config = events[n]; counter->hw.counter_base = cflags[n]; - atomic64_set(&counter->hw.period_left, counter->hw.sample_period); + counter->hw.last_period = counter->hw.sample_period; + atomic64_set(&counter->hw.period_left, counter->hw.last_period); /* * See if we need to reserve the PMU. @@ -1002,8 +1004,9 @@ static void record_and_restart(struct perf_counter *counter, long val, */ if (record) { struct perf_sample_data data = { - .regs = regs, - .addr = 0, + .regs = regs, + .addr = 0, + .period = counter->hw.last_period, }; if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { -- cgit v1.2.3 From 4da52960fd1ae3ddd14901bc88b608cbeaa4b9a6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 Jun 2009 14:54:01 +1000 Subject: perf_counters: powerpc: Add support for POWER7 processors This adds the back-end for the PMU on POWER7 processors. POWER7 has 4 fully-programmable counters and two fixed-function counters (which do respect the freeze conditions, can generate interrupts, and are writable, unlike PMC5/6 on POWER5+/6). Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18992.36329.189378.17992@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/Makefile | 3 +- arch/powerpc/kernel/perf_counter.c | 4 + arch/powerpc/kernel/power7-pmu.c | 316 +++++++++++++++++++++++++++++++++++++ 3 files changed, 322 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kernel/power7-pmu.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 9ba1bb731fc..a2c683403c2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -95,7 +95,8 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ - power5-pmu.o power5+-pmu.o power6-pmu.o + power5-pmu.o power5+-pmu.o power6-pmu.o \ + power7-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 4990ce2e5f0..5d12e68aac1 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -1181,6 +1181,7 @@ extern struct power_pmu ppc970_pmu; extern struct power_pmu power5_pmu; extern struct power_pmu power5p_pmu; extern struct power_pmu power6_pmu; +extern struct power_pmu power7_pmu; static int init_perf_counters(void) { @@ -1207,6 +1208,9 @@ static int init_perf_counters(void) case 0x3e: ppmu = &power6_pmu; break; + case 0x3f: + ppmu = &power7_pmu; + break; } /* diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c new file mode 100644 index 00000000000..dfac48d8ff4 --- /dev/null +++ b/arch/powerpc/kernel/power7-pmu.c @@ -0,0 +1,316 @@ +/* + * Performance counter support for POWER7 processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER7 + */ +#define PM_PMC_SH 16 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) +#define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_COMBINE_SH 11 /* Combined event bit */ +#define PM_COMBINE_MSK 1 +#define PM_COMBINE_MSKS 0x800 +#define PM_L2SEL_SH 8 /* L2 event select */ +#define PM_L2SEL_MSK 7 +#define PM_PMCSEL_MSK 0xff + +/* + * Bits in MMCR1 for POWER7 + */ +#define MMCR1_TTM0SEL_SH 60 +#define MMCR1_TTM1SEL_SH 56 +#define MMCR1_TTM2SEL_SH 52 +#define MMCR1_TTM3SEL_SH 48 +#define MMCR1_TTMSEL_MSK 0xf +#define MMCR1_L2SEL_SH 45 +#define MMCR1_L2SEL_MSK 7 +#define MMCR1_PMC1_COMBINE_SH 35 +#define MMCR1_PMC2_COMBINE_SH 34 +#define MMCR1_PMC3_COMBINE_SH 33 +#define MMCR1_PMC4_COMBINE_SH 32 +#define MMCR1_PMC1SEL_SH 24 +#define MMCR1_PMC2SEL_SH 16 +#define MMCR1_PMC3SEL_SH 8 +#define MMCR1_PMC4SEL_SH 0 +#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) +#define MMCR1_PMCSEL_MSK 0xff + +/* + * Bits in MMCRA + */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * [ ><><><><><><> + * NC P6P5P4P3P2P1 + * + * NC - number of counters + * 15: NC error 0x8000 + * 12-14: number of events needing PMC1-4 0x7000 + * + * P6 + * 11: P6 error 0x800 + * 10-11: Count of events needing PMC6 + * + * P1..P5 + * 0-9: Count of events needing PMC1..PMC5 + */ + +static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp) +{ + int pmc, sh; + u64 mask = 0, value = 0; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 6) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4)) + return -1; + } + if (pmc < 5) { + /* need a counter from PMC1-4 set */ + mask |= 0x8000; + value |= 0x1000; + } + *maskp = mask; + *valp = value; + return 0; +} + +#define MAX_ALT 2 /* at most 2 alternatives for any event */ + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x200f2, 0x300f2 }, /* PM_INST_DISP */ + { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */ + { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */ +}; + +/* + * Scan the alternatives table for a match and return the + * index into the alternatives table if found, else -1. + */ +static int find_alternative(u64 event) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + break; + for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) + if (event == event_alternatives[i][j]) + return i; + } + return -1; +} + +static s64 find_alternative_decode(u64 event) +{ + int pmc, psel; + + /* this only handles the 4x decode events */ + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40) + return event - (1 << PM_PMC_SH) + 8; + if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48) + return event + (1 << PM_PMC_SH) - 8; + return -1; +} + +static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[]) +{ + int i, j, nalt = 1; + s64 ae; + + alt[0] = event; + nalt = 1; + i = find_alternative(event); + if (i >= 0) { + for (j = 0; j < MAX_ALT; ++j) { + ae = event_alternatives[i][j]; + if (ae && ae != event) + alt[nalt++] = ae; + } + } else { + ae = find_alternative_decode(event); + if (ae > 0) + alt[nalt++] = ae; + } + + if (flags & PPMU_ONLY_COUNT_RUN) { + /* + * We're only counting in RUN state, + * so PM_CYC is equivalent to PM_RUN_CYC + * and PM_INST_CMPL === PM_RUN_INST_CMPL. + * This doesn't include alternatives that don't provide + * any extra flexibility in assigning PMCs. + */ + j = nalt; + for (i = 0; i < nalt; ++i) { + switch (alt[i]) { + case 0x1e: /* PM_CYC */ + alt[j++] = 0x600f4; /* PM_RUN_CYC */ + break; + case 0x600f4: /* PM_RUN_CYC */ + alt[j++] = 0x1e; + break; + case 0x2: /* PM_PPC_CMPL */ + alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */ + break; + case 0x500fa: /* PM_RUN_INST_CMPL */ + alt[j++] = 0x2; /* PM_PPC_CMPL */ + break; + } + } + nalt = j; + } + + return nalt; +} + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int power7_marked_instr_event(u64 event) +{ + int pmc, psel; + int unit; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */ + if (pmc >= 5) + return 0; + + switch (psel >> 4) { + case 2: + return pmc == 2 || pmc == 4; + case 3: + if (psel == 0x3c) + return pmc == 1; + if (psel == 0x3e) + return pmc != 2; + return 1; + case 4: + case 5: + return unit == 0xd; + case 6: + if (psel == 0x64) + return pmc >= 3; + case 8: + return unit == 0xd; + } + return 0; +} + +static int power7_compute_mmcr(u64 event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr1 = 0; + u64 mmcra = 0; + unsigned int pmc, unit, combine, l2sel, psel; + unsigned int pmc_inuse = 0; + int i; + + /* First pass to count resource use */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 6) + return -1; + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + } + } + + /* Second pass: assign PMCs, set all MMCR1 fields */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK; + l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK; + psel = event[i] & PM_PMCSEL_MSK; + if (!pmc) { + /* Bus event or any-PMC direct event */ + for (pmc = 0; pmc < 4; ++pmc) { + if (!(pmc_inuse & (1 << pmc))) + break; + } + if (pmc >= 4) + return -1; + pmc_inuse |= 1 << pmc; + } else { + /* Direct or decoded event */ + --pmc; + } + if (pmc <= 3) { + mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc); + mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc); + mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); + if (unit == 6) /* L2 events */ + mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH; + } + if (power7_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; + hwc[i] = pmc; + } + + /* Return MMCRx values */ + mmcr[0] = 0; + if (pmc_inuse & 1) + mmcr[0] = MMCR0_PMC1CE; + if (pmc_inuse & 0x3e) + mmcr[0] |= MMCR0_PMCjCE; + mmcr[1] = mmcr1; + mmcr[2] = mmcra; + return 0; +} + +static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + if (pmc <= 3) + mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc)); +} + +static int power7_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 0x1e, + [PERF_COUNT_INSTRUCTIONS] = 2, + [PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */ + [PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ + [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ +}; + +struct power_pmu power7_pmu = { + .n_counter = 6, + .max_alternatives = MAX_ALT + 1, + .add_fields = 0x1555ull, + .test_adder = 0x3000ull, + .compute_mmcr = power7_compute_mmcr, + .get_constraint = power7_get_constraint, + .get_alternatives = power7_get_alternatives, + .disable_pmc = power7_disable_pmc, + .n_generic = ARRAY_SIZE(power7_generic_events), + .generic_events = power7_generic_events, +}; -- cgit v1.2.3 From 106b506c3a8b74daa5751e83ed3e46438fcf9a52 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 11 Jun 2009 14:55:42 +1000 Subject: perf_counter: powerpc: Implement generalized cache events for POWER processors This adds tables of event codes for the generalized cache events for all the currently supported powerpc processors: POWER{4,5,5+,6,7} and PPC970*, plus powerpc-specific code to use these tables when a generalized cache event is requested. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18992.36430.933526.742969@drongo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/perf_counter.h | 3 +++ arch/powerpc/kernel/perf_counter.c | 42 ++++++++++++++++++++++++++++-- arch/powerpc/kernel/power4-pmu.c | 41 +++++++++++++++++++++++++++++ arch/powerpc/kernel/power5+-pmu.c | 45 ++++++++++++++++++++++++++++++-- arch/powerpc/kernel/power5-pmu.c | 41 +++++++++++++++++++++++++++++ arch/powerpc/kernel/power6-pmu.c | 46 +++++++++++++++++++++++++++++++-- arch/powerpc/kernel/power7-pmu.c | 41 +++++++++++++++++++++++++++++ arch/powerpc/kernel/ppc970-pmu.c | 41 +++++++++++++++++++++++++++++ 8 files changed, 294 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 1c60f0ca792..cc7c887705b 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -33,6 +33,9 @@ struct power_pmu { u32 flags; int n_generic; int *generic_events; + int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; }; extern struct power_pmu *ppmu; diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5d12e68aac1..bb202388170 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -856,6 +856,36 @@ static void hw_perf_counter_destroy(struct perf_counter *counter) } } +/* + * Translate a generic cache event config to a raw event code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { u64 ev; @@ -868,13 +898,21 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) if (!ppmu) return ERR_PTR(-ENXIO); - if (counter->attr.type != PERF_TYPE_RAW) { + switch (counter->attr.type) { + case PERF_TYPE_HARDWARE: ev = counter->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return ERR_PTR(-EOPNOTSUPP); ev = ppmu->generic_events[ev]; - } else { + break; + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(counter->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + case PERF_TYPE_RAW: ev = counter->attr.config; + break; } counter->hw.config_base = ev; counter->hw.idx = 0; diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 836fa118eb1..0e94b685722 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -543,6 +543,46 @@ static int p4_generic_events[] = { [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ }; +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x8c10, 0x3c10 }, + [C(OP_WRITE)] = { 0x7c10, 0xc13 }, + [C(OP_PREFETCH)] = { 0xc35, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0xc34, 0 }, + }, + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x904 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x900 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x330, 0x331 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + struct power_pmu power4_pmu = { .n_counter = 8, .max_alternatives = 5, @@ -554,4 +594,5 @@ struct power_pmu power4_pmu = { .disable_pmc = p4_disable_pmc, .n_generic = ARRAY_SIZE(p4_generic_events), .generic_events = p4_generic_events, + .cache_events = &power4_cache_events, }; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 8471e3c2e46..bbf2cbb0738 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -614,6 +614,46 @@ static int power5p_generic_events[] = { [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ }; +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, + [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, + [C(OP_PREFETCH)] = { 0xc70e7, -1 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0xc50c3, 0 }, + }, + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0xc20e4, 0x800c4 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x800c0 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x230e4, 0x230e5 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + struct power_pmu power5p_pmu = { .n_counter = 6, .max_alternatives = MAX_ALT, @@ -623,8 +663,9 @@ struct power_pmu power5p_pmu = { .get_constraint = power5p_get_constraint, .get_alternatives = power5p_get_alternatives, .disable_pmc = power5p_disable_pmc, + .limited_pmc_event = power5p_limited_pmc_event, + .flags = PPMU_LIMITED_PMC5_6, .n_generic = ARRAY_SIZE(power5p_generic_events), .generic_events = power5p_generic_events, - .flags = PPMU_LIMITED_PMC5_6, - .limited_pmc_event = power5p_limited_pmc_event, + .cache_events = &power5p_cache_events, }; diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 1b44c5fca18..670cf10b91e 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -556,6 +556,46 @@ static int power5_generic_events[] = { [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ }; +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, + [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, + [C(OP_PREFETCH)] = { 0xc70e7, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x3c309b }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0xc50c3, 0 }, + }, + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x2c4090, 0x800c4 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x800c0 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x230e4, 0x230e5 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + struct power_pmu power5_pmu = { .n_counter = 6, .max_alternatives = MAX_ALT, @@ -567,4 +607,5 @@ struct power_pmu power5_pmu = { .disable_pmc = power5_disable_pmc, .n_generic = ARRAY_SIZE(power5_generic_events), .generic_events = power5_generic_events, + .cache_events = &power5_cache_events, }; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index cd4fbe06c35..4da70786609 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -474,6 +474,47 @@ static int power6_generic_events[] = { [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ }; +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. + */ +static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x80082, 0x80080 }, + [C(OP_WRITE)] = { 0x80086, 0x80088 }, + [C(OP_PREFETCH)] = { 0x810a4, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x100056 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0x4008c, 0 }, + }, + [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x150730, 0x250532 }, + [C(OP_WRITE)] = { 0x250432, 0x150432 }, + [C(OP_PREFETCH)] = { 0x810a6, 0 }, + }, + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x20000e }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x420ce }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x430e6, 0x400052 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + struct power_pmu power6_pmu = { .n_counter = 6, .max_alternatives = MAX_ALT, @@ -483,8 +524,9 @@ struct power_pmu power6_pmu = { .get_constraint = p6_get_constraint, .get_alternatives = p6_get_alternatives, .disable_pmc = p6_disable_pmc, + .limited_pmc_event = p6_limited_pmc_event, + .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, .n_generic = ARRAY_SIZE(power6_generic_events), .generic_events = power6_generic_events, - .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, - .limited_pmc_event = p6_limited_pmc_event, + .cache_events = &power6_cache_events, }; diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index dfac48d8ff4..060e0deb399 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -302,6 +302,46 @@ static int power7_generic_events[] = { [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ }; +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x400f0, 0xc880 }, + [C(OP_WRITE)] = { 0, 0x300f0 }, + [C(OP_PREFETCH)] = { 0xd8b8, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x200fc }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0x408a, 0 }, + }, + [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x6080, 0x6084 }, + [C(OP_WRITE)] = { 0x6082, 0x6086 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x300fc }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x400fc }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x10068, 0x400f6 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + struct power_pmu power7_pmu = { .n_counter = 6, .max_alternatives = MAX_ALT + 1, @@ -313,4 +353,5 @@ struct power_pmu power7_pmu = { .disable_pmc = power7_disable_pmc, .n_generic = ARRAY_SIZE(power7_generic_events), .generic_events = power7_generic_events, + .cache_events = &power7_cache_events, }; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index eed47c4523f..336adf1736a 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -427,6 +427,46 @@ static int ppc970_generic_events[] = { [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ }; +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x8810, 0x3810 }, + [C(OP_WRITE)] = { 0x7810, 0x813 }, + [C(OP_PREFETCH)] = { 0x731, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0x733, 0 }, + }, + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x704 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0x700 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0x431, 0x327 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + struct power_pmu ppc970_pmu = { .n_counter = 8, .max_alternatives = 2, @@ -438,4 +478,5 @@ struct power_pmu ppc970_pmu = { .disable_pmc = p970_disable_pmc, .n_generic = ARRAY_SIZE(ppc970_generic_events), .generic_events = ppc970_generic_events, + .cache_events = &ppc970_cache_events, }; -- cgit v1.2.3 From f4dbfa8f3131a84257223393905f7efad0ca5996 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:06:28 +0200 Subject: perf_counter: Standardize event names Pure renames only, to PERF_COUNT_HW_* and PERF_COUNT_SW_*. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/power4-pmu.c | 12 ++++++------ arch/powerpc/kernel/power5+-pmu.c | 12 ++++++------ arch/powerpc/kernel/power5-pmu.c | 12 ++++++------ arch/powerpc/kernel/power6-pmu.c | 12 ++++++------ arch/powerpc/kernel/ppc970-pmu.c | 12 ++++++------ arch/powerpc/mm/fault.c | 6 +++--- 6 files changed, 33 insertions(+), 33 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 0e94b685722..73956f084b2 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -535,12 +535,12 @@ static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int p4_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 7, - [PERF_COUNT_INSTRUCTIONS] = 0x1001, - [PERF_COUNT_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ - [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ + [PERF_COUNT_HW_CPU_CYCLES] = 7, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index bbf2cbb0738..5f8b7741e97 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -606,12 +606,12 @@ static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int power5p_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 0xf, - [PERF_COUNT_INSTRUCTIONS] = 0x100009, - [PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ - [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ + [PERF_COUNT_HW_CPU_CYCLES] = 0xf, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 670cf10b91e..d54723ab627 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -548,12 +548,12 @@ static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int power5_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 0xf, - [PERF_COUNT_INSTRUCTIONS] = 0x100009, - [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ - [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ + [PERF_COUNT_HW_CPU_CYCLES] = 0xf, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 4da70786609..0cd406ee765 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -466,12 +466,12 @@ static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int power6_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 0x1e, - [PERF_COUNT_INSTRUCTIONS] = 2, - [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ - [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ + [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 336adf1736a..46a20640942 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -419,12 +419,12 @@ static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int ppc970_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 7, - [PERF_COUNT_INSTRUCTIONS] = 1, - [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ - [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ - [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ + [PERF_COUNT_HW_CPU_CYCLES] = 7, + [PERF_COUNT_HW_INSTRUCTIONS] = 1, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ }; #define C(x) PERF_COUNT_HW_CACHE_##x diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index ac0e112031b..5beffc8f481 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -312,7 +312,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { @@ -323,7 +323,7 @@ good_area: #endif } else { current->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, regs, address); } up_read(&mm->mmap_sem); -- cgit v1.2.3 From 8be6e8f3c3a13900169f1141870562d0c723b010 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:19:11 +0200 Subject: perf_counter: Rename L2 to LL cache The top (fastest) and last level (biggest) caches are the most interesting ones, performance wise. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: [ Fixed the Nehalem LL table to LLC Reference/Miss events ] Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/power4-pmu.c | 2 +- arch/powerpc/kernel/power5+-pmu.c | 2 +- arch/powerpc/kernel/power5-pmu.c | 2 +- arch/powerpc/kernel/power6-pmu.c | 2 +- arch/powerpc/kernel/power7-pmu.c | 2 +- arch/powerpc/kernel/ppc970-pmu.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 73956f084b2..07bd308a5fa 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -561,7 +561,7 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0 }, [C(OP_WRITE)] = { 0, 0 }, [C(OP_PREFETCH)] = { 0xc34, 0 }, diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 5f8b7741e97..41e5d2d958d 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -632,7 +632,7 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0 }, [C(OP_WRITE)] = { 0, 0 }, [C(OP_PREFETCH)] = { 0xc50c3, 0 }, diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index d54723ab627..05600b66221 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -574,7 +574,7 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0x3c309b }, [C(OP_WRITE)] = { 0, 0 }, [C(OP_PREFETCH)] = { 0xc50c3, 0 }, diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 0cd406ee765..46f74bebcfd 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -493,7 +493,7 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0x4008c, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x150730, 0x250532 }, [C(OP_WRITE)] = { 0x250432, 0x150432 }, [C(OP_PREFETCH)] = { 0x810a6, 0 }, diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 060e0deb399..b3f7d1216ba 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -320,7 +320,7 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0x408a, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0x6080, 0x6084 }, [C(OP_WRITE)] = { 0x6082, 0x6086 }, [C(OP_PREFETCH)] = { 0, 0 }, diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 46a20640942..ba0a357a89f 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -445,7 +445,7 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_WRITE)] = { -1, -1 }, [C(OP_PREFETCH)] = { 0, 0 }, }, - [C(L2)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ [C(OP_READ)] = { 0, 0 }, [C(OP_WRITE)] = { 0, 0 }, [C(OP_PREFETCH)] = { 0x733, 0 }, -- cgit v1.2.3 From 63b852a6b67d0820d388b0ecd0da83ccb4048b8d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:24 +0000 Subject: asm-generic: rename termios.h, signal.h and mman.h The existing asm-generic versions are incomplete and included by some architectures. New architectures should be able to use a generic version, so rename the existing files and change all users, which lets us add the new files. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- arch/powerpc/include/asm/mman.h | 2 +- arch/powerpc/include/asm/signal.h | 2 +- arch/powerpc/include/asm/termios.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index e7b99bac9f4..7b1c49811a2 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -1,7 +1,7 @@ #ifndef _ASM_POWERPC_MMAN_H #define _ASM_POWERPC_MMAN_H -#include +#include /* * This program is free software; you can redistribute it and/or diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index 69f709d8e8e..3eb13be11d8 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -94,7 +94,7 @@ typedef struct { #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -#include +#include struct old_sigaction { __sighandler_t sa_handler; diff --git a/arch/powerpc/include/asm/termios.h b/arch/powerpc/include/asm/termios.h index 2c14fea07c8..a24f48704a3 100644 --- a/arch/powerpc/include/asm/termios.h +++ b/arch/powerpc/include/asm/termios.h @@ -78,7 +78,7 @@ struct termio { #ifdef __KERNEL__ -#include +#include #endif /* __KERNEL__ */ -- cgit v1.2.3 From c31ae4bb4a9fa4606a74c0a4fb61b74f804e861e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:25 +0000 Subject: asm-generic: introduce asm/bitsperlong.h This provides a reliable way for asm-generic/types.h and other files to find out if it is running on a 32 or 64 bit platform. We cannot use CONFIG_64BIT for this in headers that are included from user space because CONFIG symbols are not available there. We also cannot do it inside of asm/types.h because some headers need the word size but cannot include types.h. The solution is to introduce a new header that defines both __BITS_PER_LONG for user space and BITS_PER_LONG for usage in the kernel. The asm-generic version falls back to 32 bit unless the architecture overrides it, which I did for all 64 bit platforms. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- arch/powerpc/include/asm/bitsperlong.h | 12 ++++++++++++ arch/powerpc/include/asm/types.h | 9 --------- 2 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 arch/powerpc/include/asm/bitsperlong.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/bitsperlong.h b/arch/powerpc/include/asm/bitsperlong.h new file mode 100644 index 00000000000..5f1659032c4 --- /dev/null +++ b/arch/powerpc/include/asm/bitsperlong.h @@ -0,0 +1,12 @@ +#ifndef __ASM_POWERPC_BITSPERLONG_H +#define __ASM_POWERPC_BITSPERLONG_H + +#if defined(__powerpc64__) +# define __BITS_PER_LONG 64 +#else +# define __BITS_PER_LONG 32 +#endif + +#include + +#endif /* __ASM_POWERPC_BITSPERLONG_H */ diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index 7ce27a52bb3..a5aea0ca34e 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -40,15 +40,6 @@ typedef struct { #endif /* __ASSEMBLY__ */ #ifdef __KERNEL__ -/* - * These aren't exported outside the kernel to avoid name space clashes - */ -#ifdef __powerpc64__ -#define BITS_PER_LONG 64 -#else -#define BITS_PER_LONG 32 -#endif - #ifndef __ASSEMBLY__ typedef __vector128 vector128; -- cgit v1.2.3 From 72099ed2719fc5829bd79c6ca9d1783ed026eb37 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:29 +0000 Subject: asm-generic: rename atomic.h to atomic-long.h The existing asm-generic/atomic.h only defines the atomic_long type. This renames it to atomic-long.h so we have a place to add a truly generic atomic.h that can be used on all non-SMP systems. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann Acked-by: Ingo Molnar --- arch/powerpc/include/asm/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index b401950f525..b7d2d07b6f9 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -472,6 +472,6 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) #endif /* __powerpc64__ */ -#include +#include #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_ATOMIC_H_ */ -- cgit v1.2.3 From 5b17e1cd8928ae65932758ce6478ac6d3e9a86b2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 May 2009 22:56:30 +0000 Subject: asm-generic: rename page.h and uaccess.h The current asm-generic/page.h only contains the get_order function, and asm-generic/uaccess.h only implements unaligned accesses. This renames the file to getorder.h and uaccess-unaligned.h to make room for new page.h and uaccess.h file that will be usable by all simple (e.g. nommu) architectures. Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann --- arch/powerpc/include/asm/page_32.h | 2 +- arch/powerpc/include/asm/page_64.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index a0e3f6e6b4e..bd0849dbcaa 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -41,7 +41,7 @@ extern void clear_pages(void *page, int order); static inline void clear_page(void *page) { clear_pages(page, 0); } extern void copy_page(void *to, void *from); -#include +#include #define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1) #define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1) diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 043bfdfe4f7..5817a3b747e 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -180,6 +180,6 @@ do { \ (test_thread_flag(TIF_32BIT) ? \ VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) -#include +#include #endif /* _ASM_POWERPC_PAGE_64_H */ -- cgit v1.2.3 From e14112d1bd5e193166b54be19119cf6440470560 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 12 Jun 2009 10:14:22 +1000 Subject: perfcounters: remove powerpc definitions of perf_counter_do_pending Commit 925d519ab82b6dd7aca9420d809ee83819c08db2 ("perf_counter: unify and fix delayed counter wakeup") added global definitions. Signed-off-by: Stephen Rothwell Acked-by: Paul Mackerras Acked-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/hw_irq.h | 3 --- arch/powerpc/kernel/irq.c | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 20a44d0c9fd..53512374e1c 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -156,8 +156,6 @@ static inline void clear_perf_counter_pending(void) "i" (offsetof(struct paca_struct, perf_counter_pending))); } -extern void perf_counter_do_pending(void); - #else static inline unsigned long test_perf_counter_pending(void) @@ -167,7 +165,6 @@ static inline unsigned long test_perf_counter_pending(void) static inline void set_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {} -static inline void perf_counter_do_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index feff792ed0f..844d3f882a1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 5933048c69edb546f1e93c26dc93816f0be9f754 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 12 Jun 2009 21:47:04 -0600 Subject: module: cleanup FIXME comments about trimming exception table entries. Everyone cut and paste this comment from my original one. We now do it generically, so cut the comments. Signed-off-by: Rusty Russell Cc: Amerigo Wang --- arch/powerpc/kernel/module.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 43e7e3a7f13..477c663e014 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -43,8 +43,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, -- cgit v1.2.3 From 4c921126fe553440261f56691c5f60fbaaa486d6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 12 Jun 2009 12:04:54 +0530 Subject: powerpc, perf_counter: Fix performance counter event types Sachin Sant reported these compiler errors: CC arch/powerpc/kernel/power7-pmu.o arch/powerpc/kernel/power7-pmu.c:297: error: PERF_COUNT_CPU_CYCLES undeclared here (not in a function) Which happened because a last-minute rename of symbols crossed with the Power7 support patch. Fix this by using the new symbol names. Reported-by: Sachin Sant Signed-off-by: Jaswinder Singh Rajput Cc: Paul Mackerras Cc: linuxppc-dev@ozlabs.org LKML-Reference: <1244788494.5554.1.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/power7-pmu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index b3f7d1216ba..b72e7a19d05 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -294,12 +294,12 @@ static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) } static int power7_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 0x1e, - [PERF_COUNT_INSTRUCTIONS] = 2, - [PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */ - [PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ - [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ + [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/ + [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ }; #define C(x) PERF_COUNT_HW_CACHE_##x -- cgit v1.2.3 From 5cdcd9d691a4810ec3f5ed6b49e2bb24871c6907 Mon Sep 17 00:00:00 2001 From: Sankar P Date: Tue, 12 May 2009 12:41:13 +0530 Subject: trivial: spelling fix in ppc code comments Fixes a trivial spelling error in powerpc code comments. Signed-off-by: Sankar P Signed-off-by: Jiri Kosina --- arch/powerpc/mm/slb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 89497fb0428..3b52c80e5e3 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -2,7 +2,7 @@ * PowerPC64 SLB support. * * Copyright (C) 2004 David Gibson , IBM - * Based on earlier code writteh by: + * Based on earlier code written by: * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com * Copyright (c) 2001 Dave Engebretsen * Copyright (C) 2002 Anton Blanchard , IBM -- cgit v1.2.3