aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig12
-rw-r--r--arch/x86/oprofile/nmi_int.c162
-rw-r--r--arch/x86/oprofile/op_counter.h2
-rw-r--r--arch/x86/oprofile/op_model_amd.c110
-rw-r--r--arch/x86/oprofile/op_model_p4.c4
-rw-r--r--arch/x86/oprofile/op_model_ppro.c2
-rw-r--r--arch/x86/oprofile/op_x86_model.h7
7 files changed, 279 insertions, 20 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 99193b16023..beea3ccebb5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -30,6 +30,18 @@ config OPROFILE_IBS
If unsure, say N.
+config OPROFILE_EVENT_MULTIPLEX
+ bool "OProfile multiplexing support (EXPERIMENTAL)"
+ default n
+ depends on OPROFILE && X86
+ help
+ The number of hardware counters is limited. The multiplexing
+ feature enables OProfile to gather more events than counters
+ are provided by the hardware. This is realized by switching
+ between events at an user specified time interval.
+
+ If unsure, say N.
+
config HAVE_OPROFILE
bool
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index fca8dc94531..e54f6a0b35a 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -1,11 +1,14 @@
/**
* @file nmi_int.c
*
- * @remark Copyright 2002-2008 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
* @author Robert Richter <robert.richter@amd.com>
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
*/
#include <linux/init.h>
@@ -24,6 +27,12 @@
#include "op_counter.h"
#include "op_x86_model.h"
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+DEFINE_PER_CPU(int, switch_index);
+#endif
+
+
static struct op_x86_model_spec const *model;
static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
@@ -31,6 +40,13 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
/* 0 == registered but off, 1 == registered and on */
static int nmi_enabled = 0;
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+extern atomic_t multiplex_counter;
+#endif
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
/* common functions */
u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
@@ -95,6 +111,11 @@ static void free_msrs(void)
per_cpu(cpu_msrs, i).counters = NULL;
kfree(per_cpu(cpu_msrs, i).controls);
per_cpu(cpu_msrs, i).controls = NULL;
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ kfree(per_cpu(cpu_msrs, i).multiplex);
+ per_cpu(cpu_msrs, i).multiplex = NULL;
+#endif
}
}
@@ -103,6 +124,9 @@ static int allocate_msrs(void)
int success = 1;
size_t controls_size = sizeof(struct op_msr) * model->num_controls;
size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters;
+#endif
int i;
for_each_possible_cpu(i) {
@@ -118,6 +142,14 @@ static int allocate_msrs(void)
success = 0;
break;
}
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ per_cpu(cpu_msrs, i).multiplex =
+ kmalloc(multiplex_size, GFP_KERNEL);
+ if (!per_cpu(cpu_msrs, i).multiplex) {
+ success = 0;
+ break;
+ }
+#endif
}
if (!success)
@@ -126,6 +158,25 @@ static int allocate_msrs(void)
return success;
}
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_setup_cpu_mux(struct op_msrs const * const msrs)
+{
+ int i;
+ struct op_msr *multiplex = msrs->multiplex;
+
+ for (i = 0; i < model->num_virt_counters; ++i) {
+ if (counter_config[i].enabled) {
+ multiplex[i].saved = -(u64)counter_config[i].count;
+ } else {
+ multiplex[i].addr = 0;
+ multiplex[i].saved = 0;
+ }
+ }
+}
+
+#endif
+
static void nmi_cpu_setup(void *dummy)
{
int cpu = smp_processor_id();
@@ -133,6 +184,9 @@ static void nmi_cpu_setup(void *dummy)
nmi_cpu_save_registers(msrs);
spin_lock(&oprofilefs_lock);
model->setup_ctrs(model, msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ nmi_setup_cpu_mux(msrs);
+#endif
spin_unlock(&oprofilefs_lock);
per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
apic_write(APIC_LVTPC, APIC_DM_NMI);
@@ -173,14 +227,52 @@ static int nmi_setup(void)
memcpy(per_cpu(cpu_msrs, cpu).controls,
per_cpu(cpu_msrs, 0).controls,
sizeof(struct op_msr) * model->num_controls);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ memcpy(per_cpu(cpu_msrs, cpu).multiplex,
+ per_cpu(cpu_msrs, 0).multiplex,
+ sizeof(struct op_msr) * model->num_virt_counters);
+#endif
}
-
}
on_each_cpu(nmi_cpu_setup, NULL, 1);
nmi_enabled = 1;
return 0;
}
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
+{
+ unsigned int si = __get_cpu_var(switch_index);
+ struct op_msr *multiplex = msrs->multiplex;
+ unsigned int i;
+
+ for (i = 0; i < model->num_counters; ++i) {
+ int offset = i + si;
+ if (multiplex[offset].addr) {
+ rdmsrl(multiplex[offset].addr,
+ multiplex[offset].saved);
+ }
+ }
+}
+
+static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
+{
+ unsigned int si = __get_cpu_var(switch_index);
+ struct op_msr *multiplex = msrs->multiplex;
+ unsigned int i;
+
+ for (i = 0; i < model->num_counters; ++i) {
+ int offset = i + si;
+ if (multiplex[offset].addr) {
+ wrmsrl(multiplex[offset].addr,
+ multiplex[offset].saved);
+ }
+ }
+}
+
+#endif
+
static void nmi_cpu_restore_registers(struct op_msrs *msrs)
{
struct op_msr *counters = msrs->counters;
@@ -214,6 +306,9 @@ static void nmi_cpu_shutdown(void *dummy)
apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
apic_write(APIC_LVTERR, v);
nmi_cpu_restore_registers(msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ __get_cpu_var(switch_index) = 0;
+#endif
}
static void nmi_shutdown(void)
@@ -252,16 +347,15 @@ static void nmi_stop(void)
on_each_cpu(nmi_cpu_stop, NULL, 1);
}
-struct op_counter_config counter_config[OP_MAX_COUNTER];
-
static int nmi_create_files(struct super_block *sb, struct dentry *root)
{
unsigned int i;
- for (i = 0; i < model->num_counters; ++i) {
+ for (i = 0; i < model->num_virt_counters; ++i) {
struct dentry *dir;
char buf[4];
+#ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX
/* quick little hack to _not_ expose a counter if it is not
* available for use. This should protect userspace app.
* NOTE: assumes 1:1 mapping here (that counters are organized
@@ -269,6 +363,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
*/
if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
continue;
+#endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */
snprintf(buf, sizeof(buf), "%d", i);
dir = oprofilefs_mkdir(sb, root, buf);
@@ -283,6 +378,57 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
return 0;
}
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_cpu_switch(void *dummy)
+{
+ int cpu = smp_processor_id();
+ int si = per_cpu(switch_index, cpu);
+ struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+
+ nmi_cpu_stop(NULL);
+ nmi_cpu_save_mpx_registers(msrs);
+
+ /* move to next set */
+ si += model->num_counters;
+ if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
+ per_cpu(switch_index, cpu) = 0;
+ else
+ per_cpu(switch_index, cpu) = si;
+
+ model->switch_ctrl(model, msrs);
+ nmi_cpu_restore_mpx_registers(msrs);
+
+ nmi_cpu_start(NULL);
+}
+
+
+/*
+ * Quick check to see if multiplexing is necessary.
+ * The check should be sufficient since counters are used
+ * in ordre.
+ */
+static int nmi_multiplex_on(void)
+{
+ return counter_config[model->num_counters].count ? 0 : -EINVAL;
+}
+
+static int nmi_switch_event(void)
+{
+ if (!model->switch_ctrl)
+ return -ENOSYS; /* not implemented */
+ if (nmi_multiplex_on() < 0)
+ return -EINVAL; /* not necessary */
+
+ on_each_cpu(nmi_cpu_switch, NULL, 1);
+
+ atomic_inc(&multiplex_counter);
+
+ return 0;
+}
+
+#endif
+
#ifdef CONFIG_SMP
static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
void *data)
@@ -516,12 +662,18 @@ int __init op_nmi_init(struct oprofile_operations *ops)
register_cpu_notifier(&oprofile_cpu_nb);
#endif
/* default values, can be overwritten by model */
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ __raw_get_cpu_var(switch_index) = 0;
+#endif
ops->create_files = nmi_create_files;
ops->setup = nmi_setup;
ops->shutdown = nmi_shutdown;
ops->start = nmi_start;
ops->stop = nmi_stop;
ops->cpu_type = cpu_type;
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ ops->switch_events = nmi_switch_event;
+#endif
if (model->init)
ret = model->init(ops);
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h
index 91b6a116165..e28398df0df 100644
--- a/arch/x86/oprofile/op_counter.h
+++ b/arch/x86/oprofile/op_counter.h
@@ -10,7 +10,7 @@
#ifndef OP_COUNTER_H
#define OP_COUNTER_H
-#define OP_MAX_COUNTER 8
+#define OP_MAX_COUNTER 32
/* Per-perfctr configuration as set via
* oprofilefs.
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f676f8825a3..fdbed3a0c87 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -9,12 +9,15 @@
* @author Philippe Elie
* @author Graydon Hoare
* @author Robert Richter <robert.richter@amd.com>
- * @author Barry Kasindorf
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
*/
#include <linux/oprofile.h>
#include <linux/device.h>
#include <linux/pci.h>
+#include <linux/percpu.h>
#include <asm/ptrace.h>
#include <asm/msr.h>
@@ -25,12 +28,23 @@
#define NUM_COUNTERS 4
#define NUM_CONTROLS 4
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_CONTROLS 32
+#else
+#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_CONTROLS NUM_CONTROLS
+#endif
+
#define OP_EVENT_MASK 0x0FFF
#define OP_CTR_OVERFLOW (1ULL<<31)
#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
-static unsigned long reset_value[NUM_COUNTERS];
+static unsigned long reset_value[NUM_VIRT_COUNTERS];
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+DECLARE_PER_CPU(int, switch_index);
+#endif
#ifdef CONFIG_OPROFILE_IBS
@@ -82,6 +96,16 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
else
msrs->controls[i].addr = 0;
}
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
+ int hw_counter = i % NUM_CONTROLS;
+ if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+ msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
+ else
+ msrs->multiplex[i].addr = 0;
+ }
+#endif
}
static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -90,6 +114,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
u64 val;
int i;
+ /* setup reset_value */
+ for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+ if (counter_config[i].enabled) {
+ reset_value[i] = counter_config[i].count;
+ } else {
+ reset_value[i] = 0;
+ }
+ }
+
/* clear all counters */
for (i = 0; i < NUM_CONTROLS; ++i) {
if (unlikely(!msrs->controls[i].addr))
@@ -108,20 +141,49 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
/* enable active counters */
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (counter_config[i].enabled && msrs->counters[i].addr) {
- reset_value[i] = counter_config[i].count;
- wrmsrl(msrs->counters[i].addr,
- -(u64)counter_config[i].count);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ int offset = i + __get_cpu_var(switch_index);
+#else
+ int offset = i;
+#endif
+ if (counter_config[offset].enabled && msrs->counters[i].addr) {
+ /* setup counter registers */
+ wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
+
+ /* setup control registers */
rdmsrl(msrs->controls[i].addr, val);
val &= model->reserved;
- val |= op_x86_get_ctrl(model, &counter_config[i]);
+ val |= op_x86_get_ctrl(model, &counter_config[offset]);
+ wrmsrl(msrs->controls[i].addr, val);
+ }
+ }
+}
+
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void op_amd_switch_ctrl(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs)
+{
+ u64 val;
+ int i;
+
+ /* enable active counters */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ int offset = i + __get_cpu_var(switch_index);
+ if (counter_config[offset].enabled) {
+ /* setup control registers */
+ rdmsrl(msrs->controls[i].addr, val);
+ val &= model->reserved;
+ val |= op_x86_get_ctrl(model, &counter_config[offset]);
wrmsrl(msrs->controls[i].addr, val);
- } else {
- reset_value[i] = 0;
}
}
}
+#endif
+
+
#ifdef CONFIG_OPROFILE_IBS
static inline int
@@ -230,14 +292,19 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
int i;
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (!reset_value[i])
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ int offset = i + __get_cpu_var(switch_index);
+#else
+ int offset = i;
+#endif
+ if (!reset_value[offset])
continue;
rdmsrl(msrs->counters[i].addr, val);
/* bit is clear if overflowed: */
if (val & OP_CTR_OVERFLOW)
continue;
- oprofile_add_sample(regs, i);
- wrmsrl(msrs->counters[i].addr, -(u64)reset_value[i]);
+ oprofile_add_sample(regs, offset);
+ wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
}
op_amd_handle_ibs(regs, msrs);
@@ -250,8 +317,14 @@ static void op_amd_start(struct op_msrs const * const msrs)
{
u64 val;
int i;
+
for (i = 0; i < NUM_COUNTERS; ++i) {
- if (reset_value[i]) {
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ int offset = i + __get_cpu_var(switch_index);
+#else
+ int offset = i;
+#endif
+ if (reset_value[offset]) {
rdmsrl(msrs->controls[i].addr, val);
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
wrmsrl(msrs->controls[i].addr, val);
@@ -271,7 +344,11 @@ static void op_amd_stop(struct op_msrs const * const msrs)
* pm callback
*/
for (i = 0; i < NUM_COUNTERS; ++i) {
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ if (!reset_value[i + per_cpu(switch_index, smp_processor_id())])
+#else
if (!reset_value[i])
+#endif
continue;
rdmsrl(msrs->controls[i].addr, val);
val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -289,7 +366,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
if (msrs->counters[i].addr)
release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
}
- for (i = 0; i < NUM_CONTROLS; ++i) {
+ for (i = 0; i < NUM_COUNTERS; ++i) {
if (msrs->controls[i].addr)
release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
@@ -463,6 +540,8 @@ static void op_amd_exit(void) {}
struct op_x86_model_spec const op_amd_spec = {
.num_counters = NUM_COUNTERS,
.num_controls = NUM_CONTROLS,
+ .num_virt_counters = NUM_VIRT_COUNTERS,
+ .num_virt_controls = NUM_VIRT_CONTROLS,
.reserved = MSR_AMD_EVENTSEL_RESERVED,
.event_mask = OP_EVENT_MASK,
.init = op_amd_init,
@@ -473,4 +552,7 @@ struct op_x86_model_spec const op_amd_spec = {
.start = &op_amd_start,
.stop = &op_amd_stop,
.shutdown = &op_amd_shutdown,
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ .switch_ctrl = &op_amd_switch_ctrl,
+#endif
};
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 5921b7fc724..65b9237cde8 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -698,6 +698,8 @@ static void p4_shutdown(struct op_msrs const * const msrs)
struct op_x86_model_spec const op_p4_ht2_spec = {
.num_counters = NUM_COUNTERS_HT2,
.num_controls = NUM_CONTROLS_HT2,
+ .num_virt_counters = NUM_COUNTERS_HT2,
+ .num_virt_controls = NUM_CONTROLS_HT2,
.fill_in_addresses = &p4_fill_in_addresses,
.setup_ctrs = &p4_setup_ctrs,
.check_ctrs = &p4_check_ctrs,
@@ -710,6 +712,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
struct op_x86_model_spec const op_p4_spec = {
.num_counters = NUM_COUNTERS_NON_HT,
.num_controls = NUM_CONTROLS_NON_HT,
+ .num_virt_counters = NUM_COUNTERS_NON_HT,
+ .num_virt_controls = NUM_CONTROLS_NON_HT,
.fill_in_addresses = &p4_fill_in_addresses,
.setup_ctrs = &p4_setup_ctrs,
.check_ctrs = &p4_check_ctrs,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 570d717c330..098cbca5c0b 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -206,6 +206,8 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
struct op_x86_model_spec const op_ppro_spec = {
.num_counters = 2,
.num_controls = 2,
+ .num_virt_counters = 2,
+ .num_virt_controls = 2,
.reserved = MSR_PPRO_EVENTSEL_RESERVED,
.fill_in_addresses = &ppro_fill_in_addresses,
.setup_ctrs = &ppro_setup_ctrs,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 505489873b9..0d07d23cb06 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -23,6 +23,7 @@ struct op_msr {
struct op_msrs {
struct op_msr *counters;
struct op_msr *controls;
+ struct op_msr *multiplex;
};
struct pt_regs;
@@ -35,6 +36,8 @@ struct oprofile_operations;
struct op_x86_model_spec {
unsigned int num_counters;
unsigned int num_controls;
+ unsigned int num_virt_counters;
+ unsigned int num_virt_controls;
u64 reserved;
u16 event_mask;
int (*init)(struct oprofile_operations *ops);
@@ -47,6 +50,10 @@ struct op_x86_model_spec {
void (*start)(struct op_msrs const * const msrs);
void (*stop)(struct op_msrs const * const msrs);
void (*shutdown)(struct op_msrs const * const msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+ void (*switch_ctrl)(struct op_x86_model_spec const *model,
+ struct op_msrs const * const msrs);
+#endif
};
struct op_counter_config;