aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/oprofile/op_model_amd.c
diff options
context:
space:
mode:
authorJason Yeh <jason.yeh@amd.com>2009-07-08 13:49:38 +0200
committerRobert Richter <robert.richter@amd.com>2009-07-20 16:33:53 +0200
commit4d4036e0e7299c6cbb2d2421b4b30b7a409ce61a (patch)
treec9003cd927ed878412e89a59db0138b6b701b629 /arch/x86/oprofile/op_model_amd.c
parent6e63ea4b0b14ff5fb8a3ca704fcda7d28b95f079 (diff)
oprofile: Implement performance counter multiplexing
The number of hardware counters is limited. The multiplexing feature enables OProfile to gather more events than counters are provided by the hardware. This is realized by switching between events at an user specified time interval. A new file (/dev/oprofile/time_slice) is added for the user to specify the timer interval in ms. If the number of events to profile is higher than the number of hardware counters available, the patch will schedule a work queue that switches the event counter and re-writes the different sets of values into it. The switching mechanism needs to be implemented for each architecture to support multiplexing. This patch only implements AMD CPU support, but multiplexing can be easily extended for other models and architectures. There are follow-on patches that rework parts of this patch. Signed-off-by: Jason Yeh <jason.yeh@amd.com> Signed-off-by: Robert Richter <robert.richter@amd.com>
Diffstat (limited to 'arch/x86/oprofile/op_model_amd.c')
-rw-r--r--arch/x86/oprofile/op_model_amd.c110
1 files changed, 96 insertions, 14 deletions
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f676f8825a3..fdbed3a0c87 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -9,12 +9,15 @@
* @author Philippe Elie
* @author Graydon Hoare
* @author Robert Richter <robert.richter@amd.com>
- * @author Barry Kasindorf
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
*/
#include <linux/oprofile.h>
#include <linux/device.h>
#include <linux/pci.h>
+#include <linux/percpu.h>
#include <asm/ptrace.h>
#include <asm/msr.h>
@@ -25,12 +28,23 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_CONTROLS 32
+#else
+#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_CONTROLS NUM_CONTROLS
+#endif
+
#define OP_EVENT_MASK 0x0FFF
#define OP_CTR_OVERFLOW (1ULL<<31)
#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
-static unsigned long reset_value[NUM_COUNTERS];
+static unsigned long reset_value[NUM_VIRT_COUNTERS];
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+DECLARE_PER_CPU(int, switch_index);
+#endif
#ifdef CONFIG_OPROFILE_IBS
@@ -82,6 +96,16 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
else
msrs->controls[i].addr = 0;
}
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
+ int hw_counter = i % NUM_CONTROLS;
+ if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+ msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
+ else
+ msrs->multiplex[i].addr = 0;
+ }
+#endif
}
static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -90,6 +114,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
u64 val;
int i;
+ /* setup reset_value */
+ for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+ if (counter_config[i].enabled) {
+ reset_value[i] = counter_config[i].count;
+ } else {
+ reset_value[i] = 0;
+ }
+ }
+
/* clear all counters */
for (i = 0; i < NUM_CONTROLS; ++i) {
if (unlikely(!msrs->controls[i].addr))
@@ -108,20 +141,49 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
/* enable active counters */
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (counter_config[i].enabled && msrs->counters[i].addr) {
- reset_value[i] = counter_config[i].count;
- wrmsrl(msrs->counters[i].addr,
- -(u64)counter_config[i].count);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ int offset = i + __get_cpu_var(switch_index);
+#else
+ int offset = i;
+#endif
+ if (counter_config[offset].enabled && msrs->counters[i].addr) {
+ /* setup counter registers */
+ wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
+
+ /* setup control registers */
rdmsrl(msrs->controls[i].addr, val);
val &= model->reserved;
- val |= op_x86_get_ctrl(model, &counter_config[i]);
+ val |= op_x86_get_ctrl(model, &counter_config[offset]);
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+ }
+}
+
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void op_amd_switch_ctrl(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ /* enable active counters */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ int offset = i + __get_cpu_var(switch_index);
+ if (counter_config[offset].enabled) {
+ /* setup control registers */
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[offset]);
wrmsrl(msrs->controls[i].addr, val);
- } else {
- reset_value[i] = 0;
}
}
}
+#endif
+
+
#ifdef CONFIG_OPROFILE_IBS
static inline int
@@ -230,14 +292,19 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
int i;
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (!reset_value[i])
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ int offset = i + __get_cpu_var(switch_index);
+#else
+ int offset = i;
+#endif
+ if (!reset_value[offset])
continue;
rdmsrl(msrs->counters[i].addr, val);
/* bit is clear if overflowed: */
if (val & OP_CTR_OVERFLOW)
continue;
- oprofile_add_sample(regs, i);
- wrmsrl(msrs->counters[i].addr, -(u64)reset_value[i]);
+ oprofile_add_sample(regs, offset);
+ wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
}
op_amd_handle_ibs(regs, msrs);
@@ -250,8 +317,14 @@ static void op_amd_start(struct op_msrs const * const msrs)
{
u64 val;
int i;
+
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (reset_value[i]) {
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ int offset = i + __get_cpu_var(switch_index);
+#else
+ int offset = i;
+#endif
+ if (reset_value[offset]) {
rdmsrl(msrs->controls[i].addr, val);
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
@@ -271,7 +344,11 @@ static void op_amd_stop(struct op_msrs const * const msrs)
* pm callback
*/
for (i = 0; i < NUM_COUNTERS; ++i) {
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ if (!reset_value[i + per_cpu(switch_index, smp_processor_id())])
+#else
if (!reset_value[i])
+#endif
continue;
rdmsrl(msrs->controls[i].addr, val);
val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -289,7 +366,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
if (msrs->counters[i].addr)
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
}
- for (i = 0; i < NUM_CONTROLS; ++i) {
+ for (i = 0; i < NUM_COUNTERS; ++i) {
if (msrs->controls[i].addr)
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
@@ -463,6 +540,8 @@ static void op_amd_exit(void) {}
struct op_x86_model_spec const op_amd_spec = {
.num_counters = NUM_COUNTERS,
.num_controls = NUM_CONTROLS,
+ .num_virt_counters = NUM_VIRT_COUNTERS,
+ .num_virt_controls = NUM_VIRT_CONTROLS,
.reserved = MSR_AMD_EVENTSEL_RESERVED,
.event_mask = OP_EVENT_MASK,
.init = op_amd_init,
@@ -473,4 +552,7 @@ struct op_x86_model_spec const op_amd_spec = {
.start = &op_amd_start,
.stop = &op_amd_stop,
.shutdown = &op_amd_shutdown,
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ .switch_ctrl = &op_amd_switch_ctrl,
+#endif
};