aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/kernel/ftrace.c35
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/ftrace_irq.h2
-rw-r--r--include/linux/hardirq.h15
-rw-r--r--include/linux/ring_buffer.h9
-rw-r--r--kernel/trace/Kconfig8
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/ring_buffer.c31
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace.h7
-rw-r--r--kernel/trace/trace_hw_branches.c5
-rw-r--r--kernel/trace/trace_output.c6
13 files changed, 85 insertions, 44 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 73f7fe8fd4d..2cf7bbcaed4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -34,6 +34,7 @@ config X86
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4d33224c055..d74d75e0952 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -82,7 +82,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
* are the same as what exists.
*/
-static atomic_t in_nmi = ATOMIC_INIT(0);
+static atomic_t nmi_running = ATOMIC_INIT(0);
static int mod_code_status; /* holds return value of text write */
static int mod_code_write; /* set when NMI should do the write */
static void *mod_code_ip; /* holds the IP to write to */
@@ -115,8 +115,8 @@ static void ftrace_mod_code(void)
void ftrace_nmi_enter(void)
{
- atomic_inc(&in_nmi);
- /* Must have in_nmi seen before reading write flag */
+ atomic_inc(&nmi_running);
+ /* Must have nmi_running seen before reading write flag */
smp_mb();
if (mod_code_write) {
ftrace_mod_code();
@@ -126,19 +126,19 @@ void ftrace_nmi_enter(void)
void ftrace_nmi_exit(void)
{
- /* Finish all executions before clearing in_nmi */
+ /* Finish all executions before clearing nmi_running */
smp_wmb();
- atomic_dec(&in_nmi);
+ atomic_dec(&nmi_running);
}
static void wait_for_nmi(void)
{
- if (!atomic_read(&in_nmi))
+ if (!atomic_read(&nmi_running))
return;
do {
cpu_relax();
- } while(atomic_read(&in_nmi));
+ } while (atomic_read(&nmi_running));
nmi_wait_count++;
}
@@ -367,25 +367,6 @@ int ftrace_disable_ftrace_graph_caller(void)
return ftrace_mod_jmp(ip, old_offset, new_offset);
}
-#else /* CONFIG_DYNAMIC_FTRACE */
-
-/*
- * These functions are picked from those used on
- * this page for dynamic ftrace. They have been
- * simplified to ignore all traces in NMI context.
- */
-static atomic_t in_nmi;
-
-void ftrace_nmi_enter(void)
-{
- atomic_inc(&in_nmi);
-}
-
-void ftrace_nmi_exit(void)
-{
- atomic_dec(&in_nmi);
-}
-
#endif /* !CONFIG_DYNAMIC_FTRACE */
/* Add a function return address to the trace stack on thread info.*/
@@ -475,7 +456,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
&return_to_handler;
/* Nmi's are currently unsupported */
- if (unlikely(atomic_read(&in_nmi)))
+ if (unlikely(in_nmi()))
return;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7840e718c6c..5e302d636fc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -140,7 +140,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
#endif
/**
- * ftrace_make_nop - convert code into top
+ * ftrace_make_nop - convert code into nop
* @mod: module structure if called by module load initialization
* @rec: the mcount call site record
* @addr: the address that the call site should be calling
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
index 366a054d0b0..dca7bf8cffe 100644
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -2,7 +2,7 @@
#define _LINUX_FTRACE_IRQ_H
-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
+#ifdef CONFIG_FTRACE_NMI_ENTER
extern void ftrace_nmi_enter(void);
extern void ftrace_nmi_exit(void);
#else
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index f83288347dd..f3cf86e1465 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -61,6 +61,12 @@
#error PREEMPT_ACTIVE is too low!
#endif
+#define NMI_OFFSET (PREEMPT_ACTIVE << 1)
+
+#if NMI_OFFSET >= 0x80000000
+#error PREEMPT_ACTIVE too high!
+#endif
+
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
@@ -73,6 +79,11 @@
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
+/*
+ * Are we in NMI context?
+ */
+#define in_nmi() (preempt_count() & NMI_OFFSET)
+
#if defined(CONFIG_PREEMPT)
# define PREEMPT_INATOMIC_BASE kernel_locked()
# define PREEMPT_CHECK_OFFSET 1
@@ -167,6 +178,8 @@ extern void irq_exit(void);
#define nmi_enter() \
do { \
ftrace_nmi_enter(); \
+ BUG_ON(in_nmi()); \
+ add_preempt_count(NMI_OFFSET); \
lockdep_off(); \
rcu_nmi_enter(); \
__irq_enter(); \
@@ -177,6 +190,8 @@ extern void irq_exit(void);
__irq_exit(); \
rcu_nmi_exit(); \
lockdep_on(); \
+ BUG_ON(!in_nmi()); \
+ sub_preempt_count(NMI_OFFSET); \
ftrace_nmi_exit(); \
} while (0)
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 3110d92e7d8..3c103d636da 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -121,9 +121,18 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
u64 ring_buffer_time_stamp(int cpu);
void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
+/*
+ * The below functions are fine to use outside the tracing facility.
+ */
+#ifdef CONFIG_RING_BUFFER
void tracing_on(void);
void tracing_off(void);
void tracing_off_permanent(void);
+#else
+static inline void tracing_on(void) { }
+static inline void tracing_off(void) { }
+static inline void tracing_off_permanent(void) { }
+#endif
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 28f2644484d..25131a5d5e4 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -9,6 +9,9 @@ config USER_STACKTRACE_SUPPORT
config NOP_TRACER
bool
+config HAVE_FTRACE_NMI_ENTER
+ bool
+
config HAVE_FUNCTION_TRACER
bool
@@ -37,6 +40,11 @@ config TRACER_MAX_TRACE
config RING_BUFFER
bool
+config FTRACE_NMI_ENTER
+ bool
+ depends on HAVE_FTRACE_NMI_ENTER
+ default y
+
config TRACING
bool
select DEBUG_FS
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 68610031780..1796e018fbf 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -465,7 +465,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
* it is not enabled then do nothing.
*
* If this record is not to be traced and
- * it is enabled then disabled it.
+ * it is enabled then disable it.
*
*/
if (rec->flags & FTRACE_FL_NOTRACE) {
@@ -485,7 +485,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
return 0;
- /* Record is not filtered and is not enabled do nothing */
+ /* Record is not filtered or enabled, do nothing */
if (!fl)
return 0;
@@ -507,7 +507,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
} else {
- /* if record is not enabled do nothing */
+ /* if record is not enabled, do nothing */
if (!(rec->flags & FTRACE_FL_ENABLED))
return 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index aee76b3eeed..53ba3a6d16d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,9 +4,11 @@
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/ring_buffer.h>
+#include <linux/ftrace_irq.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
+#include <linux/hardirq.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
@@ -982,6 +984,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer *buffer = cpu_buffer->buffer;
struct ring_buffer_event *event;
unsigned long flags;
+ bool lock_taken = false;
commit_page = cpu_buffer->commit_page;
/* we just need to protect against interrupts */
@@ -995,7 +998,30 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *next_page = tail_page;
local_irq_save(flags);
- __raw_spin_lock(&cpu_buffer->lock);
+ /*
+ * Since the write to the buffer is still not
+ * fully lockless, we must be careful with NMIs.
+ * The locks in the writers are taken when a write
+ * crosses to a new page. The locks protect against
+ * races with the readers (this will soon be fixed
+ * with a lockless solution).
+ *
+ * Because we can not protect against NMIs, and we
+ * want to keep traces reentrant, we need to manage
+ * what happens when we are in an NMI.
+ *
+ * NMIs can happen after we take the lock.
+ * If we are in an NMI, only take the lock
+ * if it is not already taken. Otherwise
+ * simply fail.
+ */
+ if (unlikely(in_nmi())) {
+ if (!__raw_spin_trylock(&cpu_buffer->lock))
+ goto out_unlock;
+ } else
+ __raw_spin_lock(&cpu_buffer->lock);
+
+ lock_taken = true;
rb_inc_page(cpu_buffer, &next_page);
@@ -1097,7 +1123,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (tail <= BUF_PAGE_SIZE)
local_set(&tail_page->write, tail);
- __raw_spin_unlock(&cpu_buffer->lock);
+ if (likely(lock_taken))
+ __raw_spin_unlock(&cpu_buffer->lock);
local_irq_restore(flags);
return NULL;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ef4dbac9556..03fbd4c20bc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1519,7 +1519,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
SEQ_PUT_FIELD_RET(s, entry->pid);
- SEQ_PUT_FIELD_RET(s, entry->cpu);
+ SEQ_PUT_FIELD_RET(s, iter->cpu);
SEQ_PUT_FIELD_RET(s, iter->ts);
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f2742fb1575..b9838f4a692 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -45,7 +45,6 @@ enum trace_type {
*/
struct trace_entry {
unsigned char type;
- unsigned char cpu;
unsigned char flags;
unsigned char preempt_count;
int pid;
@@ -625,12 +624,12 @@ extern struct tracer nop_trace;
* preempt_enable (after a disable), a schedule might take place
* causing an infinite recursion.
*
- * To prevent this, we read the need_recshed flag before
+ * To prevent this, we read the need_resched flag before
* disabling preemption. When we want to enable preemption we
* check the flag, if it is set, then we call preempt_enable_no_resched.
* Otherwise, we call preempt_enable.
*
- * The rational for doing the above is that if need resched is set
+ * The rational for doing the above is that if need_resched is set
* and we have yet to reschedule, we are either in an atomic location
* (where we do not need to check for scheduling) or we are inside
* the scheduler and do not want to resched.
@@ -651,7 +650,7 @@ static inline int ftrace_preempt_disable(void)
*
* This is a scheduler safe way to enable preemption and not miss
* any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we were either inside an atomic or
+ * If resched is set, then we are either inside an atomic or
* are inside the scheduler (we would have already scheduled
* otherwise). In this case, we do not want to call normal
* preempt_enable, but preempt_enable_no_resched instead.
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index ca4bbcfb9e2..e3e7db61c06 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -158,7 +158,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
trace_assign_type(it, entry);
if (entry->type == TRACE_HW_BRANCHES) {
- if (trace_seq_printf(seq, "%4d ", entry->cpu) &&
+ if (trace_seq_printf(seq, "%4d ", iter->cpu) &&
seq_print_ip_sym(seq, it->to, symflags) &&
trace_seq_printf(seq, "\t <- ") &&
seq_print_ip_sym(seq, it->from, symflags) &&
@@ -193,7 +193,8 @@ void trace_hw_branch(u64 from, u64 to)
if (!event)
goto out;
entry = ring_buffer_event_data(event);
- entry->ent.cpu = cpu;
+ tracing_generic_entry_update(&entry->ent, 0, from);
+ entry->ent.type = TRACE_HW_BRANCHES;
entry->from = from;
entry->to = to;
trace_buffer_unlock_commit(tr, event, 0, 0);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index b6e99af7921..9fc815031b0 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -333,7 +333,7 @@ int trace_print_context(struct trace_iterator *iter)
unsigned long secs = (unsigned long)t;
return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
- comm, entry->pid, entry->cpu, secs, usec_rem);
+ comm, entry->pid, iter->cpu, secs, usec_rem);
}
int trace_print_lat_context(struct trace_iterator *iter)
@@ -356,7 +356,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
char *comm = trace_find_cmdline(entry->pid);
ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
" %ld.%03ldms (+%ld.%03ldms): ", comm,
- entry->pid, entry->cpu, entry->flags,
+ entry->pid, iter->cpu, entry->flags,
entry->preempt_count, iter->idx,
ns2usecs(iter->ts),
abs_usecs / USEC_PER_MSEC,
@@ -364,7 +364,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
rel_usecs / USEC_PER_MSEC,
rel_usecs % USEC_PER_MSEC);
} else {
- ret = lat_print_generic(s, entry, entry->cpu);
+ ret = lat_print_generic(s, entry, iter->cpu);
if (ret)
ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
}