From 465a454f254ee2ff7acc4aececbe31f8af046bc0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 12:31:37 +0200 Subject: x86, mm: Add __get_user_pages_fast() Introduce a gup_fast() variant which is usable from IRQ/NMI context. Signed-off-by: Peter Zijlstra CC: Nick Piggin Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ad613ed66ab..b457bc047ab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -862,6 +862,12 @@ extern int mprotect_fixup(struct vm_area_struct *vma, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); +/* + * doesn't attempt to fault and will return short. + */ +int __get_user_pages_fast(unsigned long start, int nr_pages, int write, + struct page **pages); + /* * A callback you can register to apply pressure to ageable caches. * -- cgit v1.2.3 From 9974458e2f9a11dbd2f4bd14fab5a79af4907b41 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jun 2009 21:45:16 +1000 Subject: perf_counter: Make set_perf_counter_pending() declaration common At present, every architecture that supports perf_counters has to declare set_perf_counter_pending() in its arch-specific headers. This consolidates the declarations into a single declaration in one common place, include/linux/perf_counter.h. On powerpc, we continue to provide a static inline definition of set_perf_counter_pending() in the powerpc hw_irq.h. Also, this removes from the x86 perf_counter.h the unused null definitions of {test,clear}_perf_counter_pending. Reported-by: Mike Frysinger Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: benh@kernel.crashing.org LKML-Reference: <18998.13388.920691.523227@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1b3118a1023..eccae437fe3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -604,6 +604,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_free_task(struct task_struct *task); +extern void set_perf_counter_pending(void); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void __perf_disable(void); -- cgit v1.2.3 From 7522060c95395f479ee4a6af3bbf9e097e92e48f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Jun 2009 08:00:17 +0200 Subject: perf report: Add validation of call-chain entries Add boundary checks for call-chain events. In case of corrupted entries we could crash otherwise. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index eccae437fe3..a7d3a61a59b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -337,6 +337,16 @@ enum perf_event_type { */ }; +#define MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u16 nr; + __u16 hv; + __u16 kernel; + __u16 user; + __u64 ip[MAX_STACK_DEPTH]; +}; + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -652,16 +662,6 @@ extern void perf_counter_fork(struct task_struct *tsk); extern void perf_counter_task_migration(struct task_struct *task, int cpu); -#define MAX_STACK_DEPTH 255 - -struct perf_callchain_entry { - u16 nr; - u16 hv; - u16 kernel; - u16 user; - u64 ip[MAX_STACK_DEPTH]; -}; - extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_paranoid; -- cgit v1.2.3 From 43a21ea81a2400992561146327c4785ce7f7be38 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 19:39:37 +0100 Subject: perf_counter: Add event overlow handling Alternative method of mmap() data output handling that provides better overflow management and a more reliable data stream. Unlike the previous method, that didn't have any user->kernel feedback and relied on userspace keeping up, this method relies on userspace writing its last read position into the control page. It will ensure new output doesn't overwrite not-yet read events, new events for which there is no space left are lost and the overflow counter is incremented, providing exact event loss numbers. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a7d3a61a59b..0765e8e6984 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -236,10 +236,16 @@ struct perf_counter_mmap_page { /* * Control data for the mmap() data buffer. * - * User-space reading this value should issue an rmb(), on SMP capable - * platforms, after reading this value -- see perf_counter_wakeup(). + * User-space reading the @data_head value should issue an rmb(), on + * SMP capable platforms, after reading this value -- see + * perf_counter_wakeup(). + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data. In this case + * the kernel will not over-write unread data. */ __u64 data_head; /* head in the data section */ + __u64 data_tail; /* user-space written tail */ }; #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) @@ -273,6 +279,15 @@ enum perf_event_type { */ PERF_EVENT_MMAP = 1, + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * }; + */ + PERF_EVENT_LOST = 2, + /* * struct { * struct perf_event_header header; @@ -313,26 +328,26 @@ enum perf_event_type { /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field - * will be PERF_RECORD_* + * will be PERF_SAMPLE_* * * struct { * struct perf_event_header header; * - * { u64 ip; } && PERF_RECORD_IP - * { u32 pid, tid; } && PERF_RECORD_TID - * { u64 time; } && PERF_RECORD_TIME - * { u64 addr; } && PERF_RECORD_ADDR - * { u64 config; } && PERF_RECORD_CONFIG - * { u32 cpu, res; } && PERF_RECORD_CPU + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 config; } && PERF_SAMPLE_CONFIG + * { u32 cpu, res; } && PERF_SAMPLE_CPU * * { u64 nr; - * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP + * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP * * { u16 nr, * hv, * kernel, * user; - * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN * }; */ }; @@ -424,6 +439,7 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; /* nr of data pages */ + int writable; /* are we writable */ int nr_locked; /* nr pages mlocked */ atomic_t poll; /* POLL_ for wakeups */ @@ -433,8 +449,8 @@ struct perf_mmap_data { atomic_long_t done_head; /* completed head */ atomic_t lock; /* concurrent writes */ - atomic_t wakeup; /* needs a wakeup */ + atomic_t lost; /* nr records lost */ struct perf_counter_mmap_page *user_page; void *data_pages[0]; -- cgit v1.2.3 From f9188e023c248d73f5b4a589b480e065c1864068 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 22:20:52 +0200 Subject: perf_counter: Make callchain samples extensible Before exposing upstream tools to a callchain-samples ABI, tidy it up to make it more extensible in the future: Use markers in the IP chain to denote context, use (u64)-1..-4095 range for these context markers because we use them for ERR_PTR(), so these addresses are unlikely to be mapped. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0765e8e6984..e7e7e024276 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -343,23 +343,22 @@ enum perf_event_type { * { u64 nr; * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP * - * { u16 nr, - * hv, - * kernel, - * user; + * { u64 nr, * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN * }; */ }; -#define MAX_STACK_DEPTH 255 +enum perf_callchain_context { + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, -struct perf_callchain_entry { - __u16 nr; - __u16 hv; - __u16 kernel; - __u16 user; - __u64 ip[MAX_STACK_DEPTH]; + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, + + PERF_CONTEXT_MAX = (__u64)-4095, }; #ifdef __KERNEL__ @@ -381,6 +380,13 @@ struct perf_callchain_entry { #include #include +#define PERF_MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + __u64 nr; + __u64 ip[PERF_MAX_STACK_DEPTH]; +}; + struct task_struct; /** -- cgit v1.2.3 From e5289d4a181fb6c0b7a7607649af2ffdc491335c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 19 Jun 2009 13:22:51 +0200 Subject: perf_counter: Simplify and fix task migration counting The task migrations counter was causing rare and hard to decypher memory corruptions under load. After a day of debugging and bisection we found that the problem was introduced with: 3f731ca: perf_counter: Fix cpu migration counter Turning them off fixes the crashes. Incidentally, the whole perf_counter_task_migration() logic can be done simpler as well, by injecting a proper sw-counter event. This cleanup also fixed the crashes. The precise failure mode is not completely clear yet, but we are clearly not unhappy about having a fix ;-) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Corey Ashford Cc: Marcelo Tosatti Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e7e7e024276..89698d8aba5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -682,8 +682,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) extern void perf_counter_comm(struct task_struct *tsk); extern void perf_counter_fork(struct task_struct *tsk); -extern void perf_counter_task_migration(struct task_struct *task, int cpu); - extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); extern int sysctl_perf_counter_paranoid; @@ -724,8 +722,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { } static inline void perf_counter_comm(struct task_struct *tsk) { } static inline void perf_counter_fork(struct task_struct *tsk) { } static inline void perf_counter_init(void) { } -static inline void perf_counter_task_migration(struct task_struct *task, - int cpu) { } #endif #endif /* __KERNEL__ */ -- cgit v1.2.3