From 2743a5b0fa6f309da904f2190a9cc25deee34dbd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 4 Mar 2009 20:36:51 +1100 Subject: perfcounters: provide expansion room in the ABI Impact: ABI change This expands several fields in the perf_counter_hw_event struct and adds a "flags" argument to the perf_counter_open system call, in order that features can be added in future without ABI changes. In particular the record_type field is expanded to 64 bits, and the space for flag bits has been expanded from 32 to 64 bits. This also adds some new fields: * read_format (64 bits) is intended to provide a way to specify what userspace wants to get back when it does a read() on a simple (non-interrupting) counter; * exclude_idle (1 bit) provides a way for userspace to ask that events that occur when the cpu is idle be excluded; * extra_config_len will provide a way for userspace to supply an arbitrary amount of extra machine-specific PMU configuration data immediately following the perf_counter_hw_event struct, to allow sophisticated users to program things such as instruction matching CAMs and address range registers; * __reserved_3 and __reserved_4 provide space for future expansion. Signed-off-by: Paul Mackerras --- include/linux/perf_counter.h | 12 +++++++++--- include/linux/syscalls.h | 2 +- kernel/perf_counter.c | 10 +++++++--- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 186efaf4966..c42455ab155 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -69,9 +69,10 @@ struct perf_counter_hw_event { __s64 type; __u64 irq_period; - __u32 record_type; + __u64 record_type; + __u64 read_format; - __u32 disabled : 1, /* off by default */ + __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ @@ -80,10 +81,15 @@ struct perf_counter_hw_event { exclude_user : 1, /* don't count user */ exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 23; + __reserved_1 : 55; + + __u32 extra_config_len; + __u32 __reserved_4; __u64 __reserved_2; + __u64 __reserved_3; }; /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 28ef2be839c..ab1d7724739 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -698,5 +698,5 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( const struct perf_counter_hw_event __user *hw_event_uptr, - pid_t pid, int cpu, int group_fd); + pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 16b14ba99d3..b2e838959f3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1683,16 +1683,16 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, } /** - * sys_perf_task_open - open a performance counter, associate it to a task/cpu + * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * * @hw_event_uptr: event type attributes for monitoring/sampling * @pid: target pid * @cpu: target cpu * @group_fd: group leader counter fd */ -SYSCALL_DEFINE4(perf_counter_open, +SYSCALL_DEFINE5(perf_counter_open, const struct perf_counter_hw_event __user *, hw_event_uptr, - pid_t, pid, int, cpu, int, group_fd) + pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; @@ -1703,6 +1703,10 @@ SYSCALL_DEFINE4(perf_counter_open, int fput_needed2 = 0; int ret; + /* for future expandability... */ + if (flags) + return -EINVAL; + if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) return -EFAULT; -- cgit v1.2.3