aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c29
-rw-r--r--include/linux/perf_counter.h98
-rw-r--r--include/linux/syscalls.h12
-rw-r--r--kernel/perf_counter.c38
4 files changed, 106 insertions, 71 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 30e7ebf7827..ef1936a871a 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map);
*/
int hw_perf_counter_init(struct perf_counter *counter)
{
+ struct perf_counter_hw_event *hw_event = &counter->hw_event;
struct hw_perf_counter *hwc = &counter->hw;
- u32 hw_event_type = counter->event.hw_event_type;
if (unlikely(!perf_counters_initialized))
return -EINVAL;
@@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter)
hwc->nmi = 0;
if (capable(CAP_SYS_ADMIN)) {
hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
- if (hw_event_type & PERF_COUNT_NMI)
+ if (hw_event->nmi)
hwc->nmi = 1;
}
- hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
- hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
+ hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0;
+ hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0;
- hwc->irq_period = counter->event.hw_event_period;
+ hwc->irq_period = hw_event->irq_period;
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
@@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter)
if (!hwc->irq_period)
hwc->irq_period = 0x7FFFFFFF;
- hwc->next_count = -((s32) hwc->irq_period);
+ hwc->next_count = -(s32)hwc->irq_period;
/*
* Raw event type provide the config in the event structure
*/
- hw_event_type &= ~PERF_COUNT_NMI;
- if (hw_event_type == PERF_COUNT_RAW) {
- hwc->config |= counter->event.hw_raw_ctrl;
+ if (hw_event->raw) {
+ hwc->config |= hw_event->type;
} else {
- if (hw_event_type >= max_intel_perfmon_events)
+ if (hw_event->type >= max_intel_perfmon_events)
return -EINVAL;
/*
* The generic map:
*/
- hwc->config |= intel_perfmon_event_map[hw_event_type];
+ hwc->config |= intel_perfmon_event_map[hw_event->type];
}
counter->wakeup_pending = 0;
@@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
int bit;
list_for_each_entry(counter, &ctx->counters, list) {
- if (counter->record_type != PERF_RECORD_SIMPLE ||
+ if (counter->hw_event.record_type != PERF_RECORD_SIMPLE ||
counter == leader)
continue;
@@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown)
perf_save_and_restart(counter);
}
}
- perf_store_irq_data(leader, counter->event.hw_event_type);
+ perf_store_irq_data(leader, counter->hw_event.type);
perf_store_irq_data(leader, atomic64_counter_read(counter));
}
}
@@ -410,7 +409,7 @@ again:
perf_save_and_restart(counter);
- switch (counter->record_type) {
+ switch (counter->hw_event.record_type) {
case PERF_RECORD_SIMPLE:
continue;
case PERF_RECORD_IRQ:
@@ -418,7 +417,7 @@ again:
break;
case PERF_RECORD_GROUP:
perf_store_irq_data(counter,
- counter->event.hw_event_type);
+ counter->hw_event.type);
perf_store_irq_data(counter,
atomic64_counter_read(counter));
perf_handle_group(counter, &status, &ack);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 1f0017673e7..a2b4852e2d7 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -24,65 +24,93 @@
struct task_struct;
/*
- * Generalized hardware event types, used by the hw_event_type parameter
- * of the sys_perf_counter_open() syscall:
+ * User-space ABI bits:
+ */
+
+/*
+ * Generalized performance counter event types, used by the hw_event.type
+ * parameter of the sys_perf_counter_open() syscall:
*/
enum hw_event_types {
- PERF_COUNT_CYCLES,
- PERF_COUNT_INSTRUCTIONS,
- PERF_COUNT_CACHE_REFERENCES,
- PERF_COUNT_CACHE_MISSES,
- PERF_COUNT_BRANCH_INSTRUCTIONS,
- PERF_COUNT_BRANCH_MISSES,
/*
- * If this bit is set in the type, then trigger NMI sampling:
+ * Common hardware events, generalized by the kernel:
*/
- PERF_COUNT_NMI = (1 << 30),
- PERF_COUNT_RAW = (1 << 31),
+ PERF_COUNT_CYCLES = 0,
+ PERF_COUNT_INSTRUCTIONS = 1,
+ PERF_COUNT_CACHE_REFERENCES = 2,
+ PERF_COUNT_CACHE_MISSES = 3,
+ PERF_COUNT_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_BRANCH_MISSES = 5,
+
+ /*
+ * Special "software" counters provided by the kernel, even if
+ * the hardware does not support performance counters. These
+ * counters measure various physical and sw events of the
+ * kernel (and allow the profiling of them as well):
+ */
+ PERF_COUNT_CPU_CLOCK = -1,
+ PERF_COUNT_TASK_CLOCK = -2,
+ PERF_COUNT_PAGE_FAULTS = -3,
+ PERF_COUNT_CONTEXT_SWITCHES = -4,
};
/*
* IRQ-notification data record type:
*/
-enum perf_record_type {
- PERF_RECORD_SIMPLE,
- PERF_RECORD_IRQ,
- PERF_RECORD_GROUP,
+enum perf_counter_record_type {
+ PERF_RECORD_SIMPLE = 0,
+ PERF_RECORD_IRQ = 1,
+ PERF_RECORD_GROUP = 2,
};
-struct perf_counter_event {
- u32 hw_event_type;
- u32 hw_event_period;
- u64 hw_raw_ctrl;
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_hw_event {
+ u64 type;
+
+ u64 irq_period;
+ u32 record_type;
+
+ u32 disabled : 1, /* off by default */
+ nmi : 1, /* NMI sampling */
+ raw : 1, /* raw event type */
+ __reserved_1 : 29;
+
+ u64 __reserved_2;
};
+/*
+ * Kernel-internal data types:
+ */
+
/**
- * struct hw_perf_counter - performance counter hardware details
+ * struct hw_perf_counter - performance counter hardware details:
*/
struct hw_perf_counter {
- u64 config;
- unsigned long config_base;
- unsigned long counter_base;
- int nmi;
- unsigned int idx;
- u64 prev_count;
- s32 next_count;
- u64 irq_period;
+ u64 config;
+ unsigned long config_base;
+ unsigned long counter_base;
+ int nmi;
+ unsigned int idx;
+ u64 prev_count;
+ u64 irq_period;
+ s32 next_count;
};
/*
* Hardcoded buffer length limit for now, for IRQ-fed events:
*/
-#define PERF_DATA_BUFLEN 2048
+#define PERF_DATA_BUFLEN 2048
/**
* struct perf_data - performance counter IRQ data sampling ...
*/
struct perf_data {
- int len;
- int rd_idx;
- int overrun;
- u8 data[PERF_DATA_BUFLEN];
+ int len;
+ int rd_idx;
+ int overrun;
+ u8 data[PERF_DATA_BUFLEN];
};
/**
@@ -96,7 +124,7 @@ struct perf_counter {
#else
atomic_t count32[2];
#endif
- struct perf_counter_event event;
+ struct perf_counter_hw_event hw_event;
struct hw_perf_counter hw;
struct perf_counter_context *ctx;
@@ -110,8 +138,6 @@ struct perf_counter {
int oncpu;
int cpu;
- enum perf_record_type record_type;
-
/* read() / irq related data */
wait_queue_head_t waitq;
/* optional: for NMIs */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3ecd73d03da..a549678b7c3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,7 +54,7 @@ struct compat_stat;
struct compat_timeval;
struct robust_list_head;
struct getcpu_cache;
-struct perf_counter_event;
+struct perf_counter_hw_event;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
-asmlinkage int
-sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
- pid_t pid, int cpu, int masterfd);
+
+asmlinkage int sys_perf_counter_open(
+
+ struct perf_counter_hw_event *hw_event_uptr __user,
+ pid_t pid,
+ int cpu,
+ int group_fd);
#endif
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 2557c670a3b..0d323ceda3a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
struct perf_counter *counter = file->private_data;
- switch (counter->record_type) {
+ switch (counter->hw_event.record_type) {
case PERF_RECORD_SIMPLE:
return perf_read_hw(counter, buf, count);
@@ -707,7 +707,7 @@ static const struct file_operations perf_fops = {
* Allocate and initialize a counter structure
*/
static struct perf_counter *
-perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
+perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu)
{
struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL);
@@ -718,31 +718,37 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type)
INIT_LIST_HEAD(&counter->list);
init_waitqueue_head(&counter->waitq);
- counter->irqdata = &counter->data[0];
- counter->usrdata = &counter->data[1];
- counter->cpu = cpu;
- counter->record_type = record_type;
- counter->event = *event;
- counter->wakeup_pending = 0;
+ counter->irqdata = &counter->data[0];
+ counter->usrdata = &counter->data[1];
+ counter->cpu = cpu;
+ counter->hw_event = *hw_event;
+ counter->wakeup_pending = 0;
return counter;
}
/**
- * sys_perf_task_open - open a performance counter associate it to a task
- * @hw_event_type: event type for monitoring/sampling...
+ * sys_perf_task_open - open a performance counter, associate it to a task/cpu
+ *
+ * @hw_event_uptr: event type attributes for monitoring/sampling
* @pid: target pid
+ * @cpu: target cpu
+ * @group_fd: group leader counter fd
*/
-asmlinkage int
-sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
- pid_t pid, int cpu, int masterfd)
+asmlinkage int sys_perf_counter_open(
+
+ struct perf_counter_hw_event *hw_event_uptr __user,
+ pid_t pid,
+ int cpu,
+ int group_fd)
+
{
struct perf_counter_context *ctx;
- struct perf_counter_event event;
+ struct perf_counter_hw_event hw_event;
struct perf_counter *counter;
int ret;
- if (copy_from_user(&event, uevent, sizeof(event)) != 0)
+ if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
return -EFAULT;
ctx = find_get_context(pid, cpu);
@@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type,
return PTR_ERR(ctx);
ret = -ENOMEM;
- counter = perf_counter_alloc(&event, cpu, record_type);
+ counter = perf_counter_alloc(&hw_event, cpu);
if (!counter)
goto err_put_context;