aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/Kconfig.cpu1
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/boot/tty.c2
-rw-r--r--arch/x86/include/asm/ds.h140
-rw-r--r--arch/x86/include/asm/ftrace.h34
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/ds.c752
-rw-r--r--arch/x86/kernel/entry_32.S47
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/ftrace.c366
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/io_apic.c22
-rw-r--r--arch/x86/kernel/pci-calgary_64.c2
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/ptrace.c98
-rw-r--r--arch/x86/kernel/stacktrace.c64
-rw-r--r--arch/x86/kernel/vsyscall_64.c3
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/oprofile/op_model_ppro.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c3
-rw-r--r--arch/x86/xen/mmu.c21
27 files changed, 1039 insertions, 572 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f..0842b112768 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,11 +29,14 @@ config X86
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER if X86_32
+ select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
select HAVE_ARCH_TRACEHOOK
select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select USER_STACKTRACE_SUPPORT
config ARCH_DEFCONFIG
string
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b815664fe37..85a78575956 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -515,6 +515,7 @@ config CPU_SUP_UMC_32
config X86_DS
def_bool X86_PTRACE_BTS
depends on X86_DEBUGCTLMSR
+ select HAVE_HW_BRANCH_TRACER
config X86_PTRACE_BTS
bool "Branch Trace Store"
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2a3dfbd5e67..fa013f529b7 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -186,14 +186,10 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
-config MMIOTRACE_HOOKS
- bool
-
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on DEBUG_KERNEL && PCI
select TRACING
- select MMIOTRACE_HOOKS
help
Mmiotrace traces Memory Mapped I/O access and is meant for
debugging and reverse engineering. It is called from the ioremap
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b39328..7e8e8b25f5f 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
{
u8 pending;
asm volatile("int $0x16; setnz %0"
- : "=rm" (pending)
+ : "=qm" (pending)
: "a" (0x0100));
return pending;
}
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a190bf4..99b6c39774a 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -7,13 +7,12 @@
*
* It manages:
* - per-thread and per-cpu allocation of BTS and PEBS
- * - buffer memory allocation (optional)
- * - buffer overflow handling
+ * - buffer overflow handling (to be done)
* - buffer access
*
* It assumes:
- * - get_task_struct on all parameter tasks
- * - current is allowed to trace parameter tasks
+ * - get_task_struct on all traced tasks
+ * - current is allowed to trace tasks
*
*
* Copyright (C) 2007-2008 Intel Corporation.
@@ -23,13 +22,21 @@
#ifndef _ASM_X86_DS_H
#define _ASM_X86_DS_H
-#ifdef CONFIG_X86_DS
#include <linux/types.h>
#include <linux/init.h>
+#include <linux/err.h>
+
+#ifdef CONFIG_X86_DS
struct task_struct;
+struct ds_tracer;
+struct bts_tracer;
+struct pebs_tracer;
+
+typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
+typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
/*
* Request BTS or PEBS
@@ -37,60 +44,62 @@ struct task_struct;
* Due to alignement constraints, the actual buffer may be slightly
* smaller than the requested or provided buffer.
*
- * Returns 0 on success; -Eerrno otherwise
+ * Returns a pointer to a tracer structure on success, or
+ * ERR_PTR(errcode) on failure.
+ *
+ * The interrupt threshold is independent from the overflow callback
+ * to allow users to use their own overflow interrupt handling mechanism.
*
* task: the task to request recording for;
* NULL for per-cpu recording on the current cpu
* base: the base pointer for the (non-pageable) buffer;
- * NULL if buffer allocation requested
- * size: the size of the requested or provided buffer
+ * size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow;
* NULL if cyclic buffer requested
+ * th: the interrupt threshold in records from the end of the buffer;
+ * -1 if no interrupt threshold is requested.
*/
-typedef void (*ds_ovfl_callback_t)(struct task_struct *);
-extern int ds_request_bts(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl);
-extern int ds_request_pebs(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl);
+extern struct bts_tracer *ds_request_bts(struct task_struct *task,
+ void *base, size_t size,
+ bts_ovfl_callback_t ovfl, size_t th);
+extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
+ void *base, size_t size,
+ pebs_ovfl_callback_t ovfl,
+ size_t th);
/*
* Release BTS or PEBS resources
*
- * Frees buffers allocated on ds_request.
- *
* Returns 0 on success; -Eerrno otherwise
*
- * task: the task to release resources for;
- * NULL to release resources for the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
*/
-extern int ds_release_bts(struct task_struct *task);
-extern int ds_release_pebs(struct task_struct *task);
+extern int ds_release_bts(struct bts_tracer *tracer);
+extern int ds_release_pebs(struct pebs_tracer *tracer);
/*
- * Return the (array) index of the write pointer.
+ * Get the (array) index of the write pointer.
* (assuming an array of BTS/PEBS records)
*
- * Returns -Eerrno on error
+ * Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
- * pos (out): if not NULL, will hold the result
+ * tracer: the tracer handle returned from ds_request_~()
+ * pos (out): will hold the result
*/
-extern int ds_get_bts_index(struct task_struct *task, size_t *pos);
-extern int ds_get_pebs_index(struct task_struct *task, size_t *pos);
+extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos);
+extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos);
/*
- * Return the (array) index one record beyond the end of the array.
+ * Get the (array) index one record beyond the end of the array.
* (assuming an array of BTS/PEBS records)
*
- * Returns -Eerrno on error
+ * Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
- * pos (out): if not NULL, will hold the result
+ * tracer: the tracer handle returned from ds_request_~()
+ * pos (out): will hold the result
*/
-extern int ds_get_bts_end(struct task_struct *task, size_t *pos);
-extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
+extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos);
+extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos);
/*
* Provide a pointer to the BTS/PEBS record at parameter index.
@@ -101,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos);
*
* Returns the size of a single record on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
* index: the index of the requested record
* record (out): pointer to the requested record
*/
-extern int ds_access_bts(struct task_struct *task,
+extern int ds_access_bts(struct bts_tracer *tracer,
size_t index, const void **record);
-extern int ds_access_pebs(struct task_struct *task,
+extern int ds_access_pebs(struct pebs_tracer *tracer,
size_t index, const void **record);
/*
@@ -128,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task,
*
* Returns the number of bytes written or -Eerrno.
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
* buffer: the buffer to write
* size: the size of the buffer
*/
-extern int ds_write_bts(struct task_struct *task,
+extern int ds_write_bts(struct bts_tracer *tracer,
const void *buffer, size_t size);
-extern int ds_write_pebs(struct task_struct *task,
+extern int ds_write_pebs(struct pebs_tracer *tracer,
const void *buffer, size_t size);
/*
- * Same as ds_write_bts/pebs, but omit ownership checks.
- *
- * This is needed to have some other task than the owner of the
- * BTS/PEBS buffer or the parameter task itself write into the
- * respective buffer.
- */
-extern int ds_unchecked_write_bts(struct task_struct *task,
- const void *buffer, size_t size);
-extern int ds_unchecked_write_pebs(struct task_struct *task,
- const void *buffer, size_t size);
-
-/*
* Reset the write pointer of the BTS/PEBS buffer.
*
* Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
*/
-extern int ds_reset_bts(struct task_struct *task);
-extern int ds_reset_pebs(struct task_struct *task);
+extern int ds_reset_bts(struct bts_tracer *tracer);
+extern int ds_reset_pebs(struct pebs_tracer *tracer);
/*
* Clear the BTS/PEBS buffer and reset the write pointer.
@@ -167,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task);
*
* Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_~()
*/
-extern int ds_clear_bts(struct task_struct *task);
-extern int ds_clear_pebs(struct task_struct *task);
+extern int ds_clear_bts(struct bts_tracer *tracer);
+extern int ds_clear_pebs(struct pebs_tracer *tracer);
/*
* Provide the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_pebs()
* value (out): the counter reset value
*/
-extern int ds_get_pebs_reset(struct task_struct *task, u64 *value);
+extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
/*
* Set the PEBS counter reset value.
*
* Returns 0 on success; -Eerrno on error
*
- * task: the task to access;
- * NULL to access the current cpu
+ * tracer: the tracer handle returned from ds_request_pebs()
* value: the new counter reset value
*/
-extern int ds_set_pebs_reset(struct task_struct *task, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
/*
* Initialization
@@ -206,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
/*
* The DS context - part of struct thread_struct.
*/
+#define MAX_SIZEOF_DS (12 * 8)
+
struct ds_context {
/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
- unsigned char *ds;
+ unsigned char ds[MAX_SIZEOF_DS];
/* the owner of the BTS and PEBS configuration, respectively */
- struct task_struct *owner[2];
- /* buffer overflow notification function for BTS and PEBS */
- ds_ovfl_callback_t callback[2];
- /* the original buffer address */
- void *buffer[2];
- /* the number of allocated pages for on-request allocated buffers */
- unsigned int pages[2];
+ struct ds_tracer *owner[2];
/* use count */
unsigned long count;
/* a pointer to the context location inside the thread_struct
@@ -232,7 +219,8 @@ extern void ds_free(struct ds_context *context);
#else /* CONFIG_X86_DS */
-#define ds_init_intel(config) do {} while (0)
+struct cpuinfo_x86;
+static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
#endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9e8bc29b8b1..7e61b4ceb9a 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -17,8 +17,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
*/
return addr - 1;
}
-#endif
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+struct dyn_arch_ftrace {
+ /* No extra data needed for x86 */
+};
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* __ASSEMBLY__ */
#endif /* CONFIG_FUNCTION_TRACER */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Stack of return addresses for functions
+ * of a thread.
+ * Used in struct thread_info
+ */
+struct ftrace_ret_stack {
+ unsigned long ret;
+ unsigned long func;
+ unsigned long long calltime;
+};
+
+/*
+ * Primary handler of a function return.
+ * It relays on ftrace_return_to_handler.
+ * Defined in entry32.S
+ */
+extern void return_to_handler(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e44d379faad..0921b4018c1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -20,6 +20,8 @@
struct task_struct;
struct exec_domain;
#include <asm/processor.h>
+#include <asm/ftrace.h>
+#include <asm/atomic.h>
struct thread_info {
struct task_struct *task; /* main task structure */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9cb3e..64939a0c398 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -14,6 +14,11 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_ftrace.o = -pg
endif
+ifdef CONFIG_FUNCTION_GRAPH_TRACER
+# Don't trace __switch_to() but let it for function tracer
+CFLAGS_REMOVE_process_32.o = -pg
+endif
+
#
# vsyscalls (which work on the user stack) should have
# no stack-protector checks:
@@ -41,7 +46,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
obj-y += i387.o xsave.o
obj-y += ptrace.o
-obj-y += ds.o
+obj-$(CONFIG_X86_DS) += ds.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
@@ -65,6 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8e48c5d4467..88ea02dcb62 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,6 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
+#include <linux/ftrace.h>
#include <linux/acpi.h>
#include <acpi/processor.h>
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int next_perf_state = 0; /* Index into perf table */
unsigned int i;
int result = 0;
+ struct power_trace it;
dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
+ trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d5..816f27f289b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_P4);
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_P3);
+#endif
if (cpu_has_bts)
ptrace_bts_init_intel(c);
-#endif
-
detect_extended_topology(c);
if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
/*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d1a121443bd..19a8c2c0389 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -7,13 +7,12 @@
*
* It manages:
* - per-thread and per-cpu allocation of BTS and PEBS
- * - buffer memory allocation (optional)
- * - buffer overflow handling
+ * - buffer overflow handling (to be done)
* - buffer access
*
* It assumes:
- * - get_task_struct on all parameter tasks
- * - current is allowed to trace parameter tasks
+ * - get_task_struct on all traced tasks
+ * - current is allowed to trace tasks
*
*
* Copyright (C) 2007-2008 Intel Corporation.
@@ -21,8 +20,6 @@
*/
-#ifdef CONFIG_X86_DS
-
#include <asm/ds.h>
#include <linux/errno.h>
@@ -30,6 +27,7 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/kernel.h>
/*
@@ -46,6 +44,33 @@ struct ds_configuration {
};
static struct ds_configuration ds_cfg;
+/*
+ * A BTS or PEBS tracer.
+ *
+ * This holds the configuration of the tracer and serves as a handle
+ * to identify tracers.
+ */
+struct ds_tracer {
+ /* the DS context (partially) owned by this tracer */
+ struct ds_context *context;
+ /* the buffer provided on ds_request() and its size in bytes */
+ void *buffer;
+ size_t size;
+};
+
+struct bts_tracer {
+ /* the common DS part */
+ struct ds_tracer ds;
+ /* buffer overflow notification function */
+ bts_ovfl_callback_t ovfl;
+};
+
+struct pebs_tracer {
+ /* the common DS part */
+ struct ds_tracer ds;
+ /* buffer overflow notification function */
+ pebs_ovfl_callback_t ovfl;
+};
/*
* Debug Store (DS) save area configuration (see Intel64 and IA32
@@ -109,34 +134,13 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
(*(unsigned long *)base) = value;
}
+#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
-/*
- * Locking is done only for allocating BTS or PEBS resources and for
- * guarding context and buffer memory allocation.
- *
- * Most functions require the current task to own the ds context part
- * they are going to access. All the locking is done when validating
- * access to the context.
- */
-static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
/*
- * Validate that the current task is allowed to access the BTS/PEBS
- * buffer of the parameter task.
- *
- * Returns 0, if access is granted; -Eerrno, otherwise.
+ * Locking is done only for allocating BTS or PEBS resources.
*/
-static inline int ds_validate_access(struct ds_context *context,
- enum ds_qualifier qual)
-{
- if (!context)
- return -EPERM;
-
- if (context->owner[qual] == current)
- return 0;
-
- return -EPERM;
-}
+static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
/*
@@ -185,96 +189,43 @@ static inline int check_tracer(struct task_struct *task)
*
* Contexts are use-counted. They are allocated on first access and
* deallocated when the last user puts the context.
- *
- * We distinguish between an allocating and a non-allocating get of a
- * context:
- * - the allocating get is used for requesting BTS/PEBS resources. It
- * requires the caller to hold the global ds_lock.
- * - the non-allocating get is used for all other cases. A
- * non-existing context indicates an error. It acquires and releases
- * the ds_lock itself for obtaining the context.
- *
- * A context and its DS configuration are allocated and deallocated
- * together. A context always has a DS configuration of the
- * appropriate size.
*/
static DEFINE_PER_CPU(struct ds_context *, system_context);
#define this_system_context per_cpu(system_context, smp_processor_id())
-/*
- * Returns the pointer to the parameter task's context or to the
- * system-wide context, if task is NULL.
- *
- * Increases the use count of the returned context, if not NULL.
- */
static inline struct ds_context *ds_get_context(struct task_struct *task)
{
- struct ds_context *context;
-
- spin_lock(&ds_lock);
-
- context = (task ? task->thread.ds_ctx : this_system_context);
- if (context)
- context->count++;
-
- spin_unlock(&ds_lock);
-
- return context;
-}
-
-/*
- * Same as ds_get_context, but allocates the context and it's DS
- * structure, if necessary; returns NULL; if out of memory.
- *
- * pre: requires ds_lock to be held
- */
-static inline struct ds_context *ds_alloc_context(struct task_struct *task)
-{
struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &this_system_context);
struct ds_context *context = *p_context;
+ unsigned long irq;
if (!context) {
- spin_unlock(&ds_lock);
-
context = kzalloc(sizeof(*context), GFP_KERNEL);
-
- if (!context) {
- spin_lock(&ds_lock);
+ if (!context)
return NULL;
- }
- context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
- if (!context->ds) {
- kfree(context);
- spin_lock(&ds_lock);
- return NULL;
- }
+ spin_lock_irqsave(&ds_lock, irq);
- spin_lock(&ds_lock);
- /*
- * Check for race - another CPU could have allocated
- * it meanwhile:
- */
if (*p_context) {
- kfree(context->ds);
kfree(context);
- return *p_context;
- }
- *p_context = context;
+ context = *p_context;
+ } else {
+ *p_context = context;
- context->this = p_context;
- context->task = task;
+ context->this = p_context;
+ context->task = task;
- if (task)
- set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+ if (task)
+ set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
- if (!task || (task == current))
- wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
-
- get_tracer(task);
+ if (!task || (task == current))
+ wrmsrl(MSR_IA32_DS_AREA,
+ (unsigned long)context->ds);
+ }
+ spin_unlock_irqrestore(&ds_lock, irq);
}
context->count++;
@@ -282,16 +233,14 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
return context;
}
-/*
- * Decreases the use count of the parameter context, if not NULL.
- * Deallocates the context, if the use count reaches zero.
- */
static inline void ds_put_context(struct ds_context *context)
{
+ unsigned long irq;
+
if (!context)
return;
- spin_lock(&ds_lock);
+ spin_lock_irqsave(&ds_lock, irq);
if (--context->count)
goto out;
@@ -304,352 +253,351 @@ static inline void ds_put_context(struct ds_context *context)
if (!context->task || (context->task == current))
wrmsrl(MSR_IA32_DS_AREA, 0);
- put_tracer(context->task);
-
- /* free any leftover buffers from tracers that did not
- * deallocate them properly. */
- kfree(context->buffer[ds_bts]);
- kfree(context->buffer[ds_pebs]);
- kfree(context->ds);
kfree(context);
out:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
}
/*
* Handle a buffer overflow
*
- * task: the task whose buffers are overflowing;
- * NULL for a buffer overflow on the current cpu
* context: the ds context
* qual: the buffer type
*/
-static void ds_overflow(struct task_struct *task, struct ds_context *context,
- enum ds_qualifier qual)
-{
- if (!context)
- return;
-
- if (context->callback[qual])
- (*context->callback[qual])(task);
-
- /* todo: do some more overflow handling */
+static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
+{
+ switch (qual) {
+ case ds_bts: {
+ struct bts_tracer *tracer =
+ container_of(context->owner[qual],
+ struct bts_tracer, ds);
+ if (tracer->ovfl)
+ tracer->ovfl(tracer);
+ }
+ break;
+ case ds_pebs: {
+ struct pebs_tracer *tracer =
+ container_of(context->owner[qual],
+ struct pebs_tracer, ds);
+ if (tracer->ovfl)
+ tracer->ovfl(tracer);
+ }
+ break;
+ }
}
-/*
- * Allocate a non-pageable buffer of the parameter size.
- * Checks the memory and the locked memory rlimit.
- *
- * Returns the buffer, if successful;
- * NULL, if out of memory or rlimit exceeded.
- *
- * size: the requested buffer size in bytes
- * pages (out): if not NULL, contains the number of pages reserved
- */
-static inline void *ds_allocate_buffer(size_t size, unsigned int *pages)
+static void ds_install_ds_config(struct ds_context *context,
+ enum ds_qualifier qual,
+ void *base, size_t size, size_t ith)
{
- unsigned long rlim, vm, pgsz;
- void *buffer;
-
- pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->total_vm + pgsz;
- if (rlim < vm)
- return NULL;
+ unsigned long buffer, adj;
- rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
- vm = current->mm->locked_vm + pgsz;
- if (rlim < vm)
- return NULL;
+ /* adjust the buffer address and size to meet alignment
+ * constraints:
+ * - buffer is double-word aligned
+ * - size is multiple of record size
+ *
+ * We checked the size at the very beginning; we have enough
+ * space to do the adjustment.
+ */
+ buffer = (unsigned long)base;
- buffer = kzalloc(size, GFP_KERNEL);
- if (!buffer)
- return NULL;
+ adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
+ buffer += adj;
+ size -= adj;
- current->mm->total_vm += pgsz;
- current->mm->locked_vm += pgsz;
+ size /= ds_cfg.sizeof_rec[qual];
+ size *= ds_cfg.sizeof_rec[qual];
- if (pages)
- *pages = pgsz;
+ ds_set(context->ds, qual, ds_buffer_base, buffer);
+ ds_set(context->ds, qual, ds_index, buffer);
+ ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
- return buffer;
+ /* The value for 'no threshold' is -1, which will set the
+ * threshold outside of the buffer, just like we want it.
+ */
+ ds_set(context->ds, qual,
+ ds_interrupt_threshold, buffer + size - ith);
}
-static int ds_request(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl, enum ds_qualifier qual)
+static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
+ struct task_struct *task,
+ void *base, size_t size, size_t th)
{
struct ds_context *context;
- unsigned long buffer, adj;
- const unsigned long alignment = (1 << 3);
- int error = 0;
+ unsigned long irq;
+ int error;
+ error = -EOPNOTSUPP;
if (!ds_cfg.sizeof_ds)
- return -EOPNOTSUPP;
+ goto out;
+
+ error = -EINVAL;
+ if (!base)
+ goto out;
/* we require some space to do alignment adjustments below */
- if (size < (alignment + ds_cfg.sizeof_rec[qual]))
- return -EINVAL;
+ error = -EINVAL;
+ if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
+ goto out;
- /* buffer overflow notification is not yet implemented */
- if (ovfl)
- return -EOPNOTSUPP;
+ if (th != (size_t)-1) {
+ th *= ds_cfg.sizeof_rec[qual];
+ error = -EINVAL;
+ if (size <= th)
+ goto out;
+ }
- spin_lock(&ds_lock);
+ tracer->buffer = base;
+ tracer->size = size;
error = -ENOMEM;
- context = ds_alloc_context(task);
+ context = ds_get_context(task);
if (!context)
- goto out_unlock;
+ goto out;
+ tracer->context = context;
+
+
+ spin_lock_irqsave(&ds_lock, irq);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
+ get_tracer(task);
- error = -EALREADY;
- if (context->owner[qual] == current)
- goto out_unlock;
error = -EPERM;
- if (context->owner[qual] != NULL)
- goto out_unlock;
- context->owner[qual] = current;
-
- spin_unlock(&ds_lock);
-
-
- error = -ENOMEM;
- if (!base) {
- base = ds_allocate_buffer(size, &context->pages[qual]);
- if (!base)
- goto out_release;
-
- context->buffer[qual] = base;
- }
- error = 0;
-
- context->callback[qual] = ovfl;
+ if (context->owner[qual])
+ goto out_put_tracer;
+ context->owner[qual] = tracer;
- /* adjust the buffer address and size to meet alignment
- * constraints:
- * - buffer is double-word aligned
- * - size is multiple of record size
- *
- * We checked the size at the very beginning; we have enough
- * space to do the adjustment.
- */
- buffer = (unsigned long)base;
-
- adj = ALIGN(buffer, alignment) - buffer;
- buffer += adj;
- size -= adj;
-
- size /= ds_cfg.sizeof_rec[qual];
- size *= ds_cfg.sizeof_rec[qual];
-
- ds_set(context->ds, qual, ds_buffer_base, buffer);
- ds_set(context->ds, qual, ds_index, buffer);
- ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
+ spin_unlock_irqrestore(&ds_lock, irq);
- if (ovfl) {
- /* todo: select a suitable interrupt threshold */
- } else
- ds_set(context->ds, qual,
- ds_interrupt_threshold, buffer + size + 1);
- /* we keep the context until ds_release */
- return error;
+ ds_install_ds_config(context, qual, base, size, th);
- out_release:
- context->owner[qual] = NULL;
- ds_put_context(context);
- return error;
+ return 0;
+ out_put_tracer:
+ put_tracer(task);
out_unlock:
- spin_unlock(&ds_lock);
+ spin_unlock_irqrestore(&ds_lock, irq);
ds_put_context(context);
+ tracer->context = NULL;
+ out:
return error;
}
-int ds_request_bts(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl)
+struct bts_tracer *ds_request_bts(struct task_struct *task,
+ void *base, size_t size,
+ bts_ovfl_callback_t ovfl, size_t th)
{
- return ds_request(task, base, size, ovfl, ds_bts);
-}
+ struct bts_tracer *tracer;
+ int error;
-int ds_request_pebs(struct task_struct *task, void *base, size_t size,
- ds_ovfl_callback_t ovfl)
-{
- return ds_request(task, base, size, ovfl, ds_pebs);
+ /* buffer overflow notification is not yet implemented */
+ error = -EOPNOTSUPP;
+ if (ovfl)
+ goto out;
+
+ error = -ENOMEM;
+ tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+ if (!tracer)
+ goto out;
+ tracer->ovfl = ovfl;
+
+ error = ds_request(&tracer->ds, ds_bts, task, base, size, th);
+ if (error < 0)
+ goto out_tracer;
+
+ return tracer;
+
+ out_tracer:
+ kfree(tracer);
+ out:
+ return ERR_PTR(error);
}
-static int ds_release(struct task_struct *task, enum ds_qualifier qual)
+struct pebs_tracer *ds_request_pebs(struct task_struct *task,
+ void *base, size_t size,
+ pebs_ovfl_callback_t ovfl, size_t th)
{
- struct ds_context *context;
+ struct pebs_tracer *tracer;
int error;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
+ /* buffer overflow notification is not yet implemented */
+ error = -EOPNOTSUPP;
+ if (ovfl)
goto out;
- kfree(context->buffer[qual]);
- context->buffer[qual] = NULL;
+ error = -ENOMEM;
+ tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
+ if (!tracer)
+ goto out;
+ tracer->ovfl = ovfl;
- current->mm->total_vm -= context->pages[qual];
- current->mm->locked_vm -= context->pages[qual];
- context->pages[qual] = 0;
- context->owner[qual] = NULL;
+ error = ds_request(&tracer->ds, ds_pebs, task, base, size, th);
+ if (error < 0)
+ goto out_tracer;
- /*
- * we put the context twice:
- * once for the ds_get_context
- * once for the corresponding ds_request
- */
- ds_put_context(context);
+ return tracer;
+
+ out_tracer:
+ kfree(tracer);
out:
- ds_put_context(context);
- return error;
+ return ERR_PTR(error);
}
-int ds_release_bts(struct task_struct *task)
+static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual)
{
- return ds_release(task, ds_bts);
+ BUG_ON(tracer->context->owner[qual] != tracer);
+ tracer->context->owner[qual] = NULL;
+
+ put_tracer(tracer->context->task);
+ ds_put_context(tracer->context);
}
-int ds_release_pebs(struct task_struct *task)
+int ds_release_bts(struct bts_tracer *tracer)
{
- return ds_release(task, ds_pebs);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_release(&tracer->ds, ds_bts);
+ kfree(tracer);
+
+ return 0;
}
-static int ds_get_index(struct task_struct *task, size_t *pos,
- enum ds_qualifier qual)
+int ds_release_pebs(struct pebs_tracer *tracer)
{
- struct ds_context *context;
- unsigned long base, index;
- int error;
+ if (!tracer)
+ return -EINVAL;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
+ ds_release(&tracer->ds, ds_pebs);
+ kfree(tracer);
+
+ return 0;
+}
+
+static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
+{
+ unsigned long base, index;
base = ds_get(context->ds, qual, ds_buffer_base);
index = ds_get(context->ds, qual, ds_index);
- error = ((index - base) / ds_cfg.sizeof_rec[qual]);
- if (pos)
- *pos = error;
- out:
- ds_put_context(context);
- return error;
+ return (index - base) / ds_cfg.sizeof_rec[qual];
}
-int ds_get_bts_index(struct task_struct *task, size_t *pos)
+int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos)
{
- return ds_get_index(task, pos, ds_bts);
+ if (!tracer)
+ return -EINVAL;
+
+ if (!pos)
+ return -EINVAL;
+
+ *pos = ds_get_index(tracer->ds.context, ds_bts);
+
+ return 0;
}
-int ds_get_pebs_index(struct task_struct *task, size_t *pos)
+int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
{
- return ds_get_index(task, pos, ds_pebs);
+ if (!tracer)
+ return -EINVAL;
+
+ if (!pos)
+ return -EINVAL;
+
+ *pos = ds_get_index(tracer->ds.context, ds_pebs);
+
+ return 0;
}
-static int ds_get_end(struct task_struct *task, size_t *pos,
- enum ds_qualifier qual)
+static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual)
{
- struct ds_context *context;
- unsigned long base, end;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
+ unsigned long base, max;
base = ds_get(context->ds, qual, ds_buffer_base);
- end = ds_get(context->ds, qual, ds_absolute_maximum);
+ max = ds_get(context->ds, qual, ds_absolute_maximum);
- error = ((end - base) / ds_cfg.sizeof_rec[qual]);
- if (pos)
- *pos = error;
- out:
- ds_put_context(context);
- return error;
+ return (max - base) / ds_cfg.sizeof_rec[qual];
}
-int ds_get_bts_end(struct task_struct *task, size_t *pos)
+int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos)
{
- return ds_get_end(task, pos, ds_bts);
+ if (!tracer)
+ return -EINVAL;
+
+ if (!pos)
+ return -EINVAL;
+
+ *pos = ds_get_end(tracer->ds.context, ds_bts);
+
+ return 0;
}
-int ds_get_pebs_end(struct task_struct *task, size_t *pos)
+int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
{
- return ds_get_end(task, pos, ds_pebs);
+ if (!tracer)
+ return -EINVAL;
+
+ if (!pos)
+ return -EINVAL;
+
+ *pos = ds_get_end(tracer->ds.context, ds_pebs);
+
+ return 0;
}
-static int ds_access(struct task_struct *task, size_t index,
- const void **record, enum ds_qualifier qual)
+static int ds_access(struct ds_context *context, enum ds_qualifier qual,
+ size_t index, const void **record)
{
- struct ds_context *context;
unsigned long base, idx;
- int error;
if (!record)
return -EINVAL;
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
-
base = ds_get(context->ds, qual, ds_buffer_base);
idx = base + (index * ds_cfg.sizeof_rec[qual]);
- error = -EINVAL;
if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
- goto out;
+ return -EINVAL;
*record = (const void *)idx;
- error = ds_cfg.sizeof_rec[qual];
- out:
- ds_put_context(context);
- return error;
+
+ return ds_cfg.sizeof_rec[qual];
}
-int ds_access_bts(struct task_struct *task, size_t index, const void **record)
+int ds_access_bts(struct bts_tracer *tracer, size_t index,
+ const void **record)
{
- return ds_access(task, index, record, ds_bts);
+ if (!tracer)
+ return -EINVAL;
+
+ return ds_access(tracer->ds.context, ds_bts, index, record);
}
-int ds_access_pebs(struct task_struct *task, size_t index, const void **record)
+int ds_access_pebs(struct pebs_tracer *tracer, size_t index,
+ const void **record)
{
- return ds_access(task, index, record, ds_pebs);
+ if (!tracer)
+ return -EINVAL;
+
+ return ds_access(tracer->ds.context, ds_pebs, index, record);
}
-static int ds_write(struct task_struct *task, const void *record, size_t size,
- enum ds_qualifier qual, int force)
+static int ds_write(struct ds_context *context, enum ds_qualifier qual,
+ const void *record, size_t size)
{
- struct ds_context *context;
- int error;
+ int bytes_written = 0;
if (!record)
return -EINVAL;
- error = -EPERM;
- context = ds_get_context(task);
- if (!context)
- goto out;
-
- if (!force) {
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
- }
-
- error = 0;
while (size) {
unsigned long base, index, end, write_end, int_th;
unsigned long write_size, adj_write_size;
@@ -677,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
write_end = end;
if (write_end <= index)
- goto out;
+ break;
write_size = min((unsigned long) size, write_end - index);
memcpy((void *)index, record, write_size);
record = (const char *)record + write_size;
- size -= write_size;
- error += write_size;
+ size -= write_size;
+ bytes_written += write_size;
adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
adj_write_size *= ds_cfg.sizeof_rec[qual];
@@ -699,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size,
ds_set(context->ds, qual, ds_index, index);
if (index >= int_th)
- ds_overflow(task, context, qual);
+ ds_overflow(context, qual);
}
- out:
- ds_put_context(context);
- return error;
+ return bytes_written;
}
-int ds_write_bts(struct task_struct *task, const void *record, size_t size)
+int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size)
{
- return ds_write(task, record, size, ds_bts, /* force = */ 0);
-}
+ if (!tracer)
+ return -EINVAL;
-int ds_write_pebs(struct task_struct *task, const void *record, size_t size)
-{
- return ds_write(task, record, size, ds_pebs, /* force = */ 0);
+ return ds_write(tracer->ds.context, ds_bts, record, size);
}
-int ds_unchecked_write_bts(struct task_struct *task,
- const void *record, size_t size)
+int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size)
{
- return ds_write(task, record, size, ds_bts, /* force = */ 1);
-}
+ if (!tracer)
+ return -EINVAL;
-int ds_unchecked_write_pebs(struct task_struct *task,
- const void *record, size_t size)
-{
- return ds_write(task, record, size, ds_pebs, /* force = */ 1);
+ return ds_write(tracer->ds.context, ds_pebs, record, size);
}
-static int ds_reset_or_clear(struct task_struct *task,
- enum ds_qualifier qual, int clear)
+static void ds_reset_or_clear(struct ds_context *context,
+ enum ds_qualifier qual, int clear)
{
- struct ds_context *context;
unsigned long base, end;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, qual);
- if (error < 0)
- goto out;
base = ds_get(context->ds, qual, ds_buffer_base);
end = ds_get(context->ds, qual, ds_absolute_maximum);
@@ -748,89 +681,100 @@ static int ds_reset_or_clear(struct task_struct *task,
memset((void *)base, 0, end - base);
ds_set(context->ds, qual, ds_index, base);
-
- error = 0;
- out:
- ds_put_context(context);
- return error;
}
-int ds_reset_bts(struct task_struct *task)
+int ds_reset_bts(struct bts_tracer *tracer)
{
- return ds_reset_or_clear(task, ds_bts, /* clear = */ 0);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
+
+ return 0;
}
-int ds_reset_pebs(struct task_struct *task)
+int ds_reset_pebs(struct pebs_tracer *tracer)
{
- return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0);
+
+ return 0;
}
-int ds_clear_bts(struct task_struct *task)
+int ds_clear_bts(struct bts_tracer *tracer)
{
- return ds_reset_or_clear(task, ds_bts, /* clear = */ 1);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1);
+
+ return 0;
}
-int ds_clear_pebs(struct task_struct *task)
+int ds_clear_pebs(struct pebs_tracer *tracer)
{
- return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1);
+ if (!tracer)
+ return -EINVAL;
+
+ ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1);
+
+ return 0;
}
-int ds_get_pebs_reset(struct task_struct *task, u64 *value)
+int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value)
{
- struct ds_context *context;
- int error;
+ if (!tracer)
+ return -EINVAL;
if (!value)
return -EINVAL;
- context = ds_get_context(task);
- error = ds_validate_access(context, ds_pebs);
- if (error < 0)
- goto out;
-
- *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8));
+ *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
- error = 0;
- out:
- ds_put_context(context);
- return error;
+ return 0;
}
-int ds_set_pebs_reset(struct task_struct *task, u64 value)
+int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
{
- struct ds_context *context;
- int error;
-
- context = ds_get_context(task);
- error = ds_validate_access(context, ds_pebs);
- if (error < 0)
- goto out;
+ if (!tracer)
+ return -EINVAL;
- *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value;
+ *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
- error = 0;
- out:
- ds_put_context(context);
- return error;
+ return 0;
}
static const struct ds_configuration ds_cfg_var = {
.sizeof_ds = sizeof(long) * 12,
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10
+#else
+ .sizeof_rec[ds_pebs] = sizeof(long) * 18
+#endif
};
static const struct ds_configuration ds_cfg_64 = {
.sizeof_ds = 8 * 12,
.sizeof_field = 8,
.sizeof_rec[ds_bts] = 8 * 3,
+#ifdef __i386__
.sizeof_rec[ds_pebs] = 8 * 10
+#else
+ .sizeof_rec[ds_pebs] = 8 * 18
+#endif
};
static inline void
ds_configure(const struct ds_configuration *cfg)
{
ds_cfg = *cfg;
+
+ printk(KERN_INFO "DS available\n");
+
+ BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds);
}
void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -838,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+ case 0 ... 0xC:
+ /* sorry, don't know about them */
+ break;
case 0xD:
case 0xE: /* Pentium M */
ds_configure(&ds_cfg_var);
break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
+ default: /* Core2, Atom, ... */
ds_configure(&ds_cfg_64);
break;
- default:
- /* sorry, don't know about them */
- break;
}
break;
case 0xF:
@@ -875,7 +818,8 @@ void ds_free(struct ds_context *context)
* is dying. There should not be any user of that context left
* to disturb us, anymore. */
unsigned long leftovers = context->count;
- while (leftovers--)
+ while (leftovers--) {
+ put_tracer(context->task);
ds_put_context(context);
+ }
}
-#endif /* CONFIG_X86_DS */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 28b597ef9ca..958af86186c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1157,6 +1157,9 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
pushl %eax
pushl %ecx
pushl %edx
@@ -1171,6 +1174,11 @@ ftrace_call:
popl %edx
popl %ecx
popl %eax
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ jmp ftrace_stub
+#endif
.globl ftrace_stub
ftrace_stub:
@@ -1180,8 +1188,15 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
cmpl $ftrace_stub, ftrace_trace_function
jnz trace
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ cmpl $ftrace_stub, ftrace_graph_return
+ jnz ftrace_graph_caller
+#endif
.globl ftrace_stub
ftrace_stub:
ret
@@ -1200,12 +1215,42 @@ trace:
popl %edx
popl %ecx
popl %eax
-
jmp ftrace_stub
END(mcount)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %edx
+ lea 0x4(%ebp), %eax
+ call prepare_ftrace_return
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+END(ftrace_graph_caller)
+
+.globl return_to_handler
+return_to_handler:
+ pushl $0
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ call ftrace_return_to_handler
+ movl %eax, 0xc(%esp)
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+#endif
+
.section .rodata,"a"
#include "syscall_table_32.S"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b86f332c96a..08aa6b10933 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,6 +68,8 @@ ENTRY(mcount)
END(mcount)
ENTRY(ftrace_caller)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
/* taken from glibc */
subq $0x38, %rsp
@@ -103,6 +105,9 @@ END(ftrace_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $0, function_trace_stop
+ jne ftrace_stub
+
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
.globl ftrace_stub
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 50ea0ac8c9b..7ef914e6a2f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -14,14 +14,17 @@
#include <linux/uaccess.h>
#include <linux/ftrace.h>
#include <linux/percpu.h>
+#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <asm/ftrace.h>
+#include <linux/ftrace.h>
#include <asm/nops.h>
+#include <asm/nmi.h>
-static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+#ifdef CONFIG_DYNAMIC_FTRACE
union ftrace_code_union {
char code[MCOUNT_INSN_SIZE];
@@ -31,18 +34,12 @@ union ftrace_code_union {
} __attribute__((packed));
};
-
static int ftrace_calc_offset(long ip, long addr)
{
return (int)(addr - ip);
}
-unsigned char *ftrace_nop_replace(void)
-{
- return ftrace_nop;
-}
-
-unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
{
static union ftrace_code_union calc;
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
return calc.code;
}
-int
+/*
+ * Modifying code must take extra care. On an SMP machine, if
+ * the code being modified is also being executed on another CPU
+ * that CPU will have undefined results and possibly take a GPF.
+ * We use kstop_machine to stop other CPUS from exectuing code.
+ * But this does not stop NMIs from happening. We still need
+ * to protect against that. We separate out the modification of
+ * the code to take care of this.
+ *
+ * Two buffers are added: An IP buffer and a "code" buffer.
+ *
+ * 1) Put the instruction pointer into the IP buffer
+ * and the new code into the "code" buffer.
+ * 2) Set a flag that says we are modifying code
+ * 3) Wait for any running NMIs to finish.
+ * 4) Write the code
+ * 5) clear the flag.
+ * 6) Wait for any running NMIs to finish.
+ *
+ * If an NMI is executed, the first thing it does is to call
+ * "ftrace_nmi_enter". This will check if the flag is set to write
+ * and if it is, it will write what is in the IP and "code" buffers.
+ *
+ * The trick is, it does not matter if everyone is writing the same
+ * content to the code location. Also, if a CPU is executing code
+ * it is OK to write to that code location if the contents being written
+ * are the same as what exists.
+ */
+
+static atomic_t in_nmi = ATOMIC_INIT(0);
+static int mod_code_status; /* holds return value of text write */
+static int mod_code_write; /* set when NMI should do the write */
+static void *mod_code_ip; /* holds the IP to write to */
+static void *mod_code_newcode; /* holds the text to write to the IP */
+
+static unsigned nmi_wait_count;
+static atomic_t nmi_update_count = ATOMIC_INIT(0);
+
+int ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ int r;
+
+ r = snprintf(buf, size, "%u %u",
+ nmi_wait_count,
+ atomic_read(&nmi_update_count));
+ return r;
+}
+
+static void ftrace_mod_code(void)
+{
+ /*
+ * Yes, more than one CPU process can be writing to mod_code_status.
+ * (and the code itself)
+ * But if one were to fail, then they all should, and if one were
+ * to succeed, then they all should.
+ */
+ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+ MCOUNT_INSN_SIZE);
+}
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+ /* Must have in_nmi seen before reading write flag */
+ smp_mb();
+ if (mod_code_write) {
+ ftrace_mod_code();
+ atomic_inc(&nmi_update_count);
+ }
+}
+
+void ftrace_nmi_exit(void)
+{
+ /* Finish all executions before clearing in_nmi */
+ smp_wmb();
+ atomic_dec(&in_nmi);
+}
+
+static void wait_for_nmi(void)
+{
+ int waited = 0;
+
+ while (atomic_read(&in_nmi)) {
+ waited = 1;
+ cpu_relax();
+ }
+
+ if (waited)
+ nmi_wait_count++;
+}
+
+static int
+do_ftrace_mod_code(unsigned long ip, void *new_code)
+{
+ mod_code_ip = (void *)ip;
+ mod_code_newcode = new_code;
+
+ /* The buffers need to be visible before we let NMIs write them */
+ smp_wmb();
+
+ mod_code_write = 1;
+
+ /* Make sure write bit is visible before we wait on NMIs */
+ smp_mb();
+
+ wait_for_nmi();
+
+ /* Make sure all running NMIs have finished before we write the code */
+ smp_mb();
+
+ ftrace_mod_code();
+
+ /* Make sure the write happens before clearing the bit */
+ smp_wmb();
+
+ mod_code_write = 0;
+
+ /* make sure NMIs see the cleared bit */
+ smp_mb();
+
+ wait_for_nmi();
+
+ return mod_code_status;
+}
+
+
+
+
+static unsigned char ftrace_nop[MCOUNT_INSN_SIZE];
+
+static unsigned char *ftrace_nop_replace(void)
+{
+ return ftrace_nop;
+}
+
+static int
ftrace_modify_code(unsigned long ip, unsigned char *old_code,
unsigned char *new_code)
{
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return -EINVAL;
/* replace the text with the new text */
- if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
+ if (do_ftrace_mod_code(ip, new_code))
return -EPERM;
sync_core();
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
return 0;
}
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_call_replace(ip, addr);
+ new = ftrace_nop_replace();
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned char *new, *old;
+ unsigned long ip = rec->ip;
+
+ old = ftrace_nop_replace();
+ new = ftrace_call_replace(ip, addr);
+
+ return ftrace_modify_code(rec->ip, old, new);
+}
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -165,3 +320,194 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+
+static int ftrace_mod_jmp(unsigned long ip,
+ int old_offset, int new_offset)
+{
+ unsigned char code[MCOUNT_INSN_SIZE];
+
+ if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE))
+ return -EFAULT;
+
+ if (code[0] != 0xe9 || old_offset != *(int *)(&code[1]))
+ return -EINVAL;
+
+ *(int *)(&code[1]) = new_offset;
+
+ if (do_ftrace_mod_code(ip, &code))
+ return -EPERM;
+
+ return 0;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ int old_offset, new_offset;
+
+ old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+ new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+
+ return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ int old_offset, new_offset;
+
+ old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE);
+ new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE);
+
+ return ftrace_mod_jmp(ip, old_offset, new_offset);
+}
+
+#else /* CONFIG_DYNAMIC_FTRACE */
+
+/*
+ * These functions are picked from those used on
+ * this page for dynamic ftrace. They have been
+ * simplified to ignore all traces in NMI context.
+ */
+static atomic_t in_nmi;
+
+void ftrace_nmi_enter(void)
+{
+ atomic_inc(&in_nmi);
+}
+
+void ftrace_nmi_exit(void)
+{
+ atomic_dec(&in_nmi);
+}
+
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+/* Add a function return address to the trace stack on thread info.*/
+static int push_return_trace(unsigned long ret, unsigned long long time,
+ unsigned long func, int *depth)
+{
+ int index;
+
+ if (!current->ret_stack)
+ return -EBUSY;
+
+ /* The return trace stack is full */
+ if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+ atomic_inc(&current->trace_overrun);
+ return -EBUSY;
+ }
+
+ index = ++current->curr_ret_stack;
+ barrier();
+ current->ret_stack[index].ret = ret;
+ current->ret_stack[index].func = func;
+ current->ret_stack[index].calltime = time;
+ *depth = index;
+
+ return 0;
+}
+
+/* Retrieve a function return address to the trace stack on thread info.*/
+static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
+{
+ int index;
+
+ index = current->curr_ret_stack;
+ *ret = current->ret_stack[index].ret;
+ trace->func = current->ret_stack[index].func;
+ trace->calltime = current->ret_stack[index].calltime;
+ trace->overrun = atomic_read(&current->trace_overrun);
+ trace->depth = index;
+ current->curr_ret_stack--;
+}
+
+/*
+ * Send the trace to the ring-buffer.
+ * @return the original return address.
+ */
+unsigned long ftrace_return_to_handler(void)
+{
+ struct ftrace_graph_ret trace;
+ unsigned long ret;
+
+ pop_return_trace(&trace, &ret);
+ trace.rettime = cpu_clock(raw_smp_processor_id());
+ ftrace_graph_return(&trace);
+
+ return ret;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+{
+ unsigned long old;
+ unsigned long long calltime;
+ int faulted;
+ struct ftrace_graph_ent trace;
+ unsigned long return_hooker = (unsigned long)
+ &return_to_handler;
+
+ /* Nmi's are currently unsupported */
+ if (atomic_read(&in_nmi))
+ return;
+
+ /*
+ * Protect against fault, even if it shouldn't
+ * happen. This tool is too much intrusive to
+ * ignore such a protection.
+ */
+ asm volatile(
+ "1: movl (%[parent_old]), %[old]\n"
+ "2: movl %[return_hooker], (%[parent_replaced])\n"
+ " movl $0, %[faulted]\n"
+
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %[faulted]\n"
+ ".previous\n"
+
+ ".section __ex_table, \"a\"\n"
+ " .long 1b, 3b\n"
+ " .long 2b, 3b\n"
+ ".previous\n"
+
+ : [parent_replaced] "=r" (parent), [old] "=r" (old),
+ [faulted] "=r" (faulted)
+ : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
+ : "memory"
+ );
+
+ if (WARN_ON(faulted)) {
+ unregister_ftrace_graph();
+ return;
+ }
+
+ if (WARN_ON(!__kernel_text_address(old))) {
+ unregister_ftrace_graph();
+ *parent = old;
+ return;
+ }
+
+ calltime = cpu_clock(raw_smp_processor_id());
+
+ if (push_return_trace(old, calltime,
+ self_addr, &trace.depth) == -EBUSY) {
+ *parent = old;
+ return;
+ }
+
+ trace.func = self_addr;
+ ftrace_graph_entry(&trace);
+
+}
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608d4ca..b0f61f0dcd0 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
stts();
}
-void __init init_thread_xstate(void)
+void __cpuinit init_thread_xstate(void)
{
if (!HAVE_HWFP) {
xstate_size = sizeof(struct i387_soft_struct);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index c9513e1ff28..1fec0f9b150 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3608,27 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic)
int __init probe_nr_irqs(void)
{
- int idx;
- int nr = 0;
-#ifndef CONFIG_XEN
- int nr_min = 32;
-#else
- int nr_min = NR_IRQS;
-#endif
-
- for (idx = 0; idx < nr_ioapics; idx++)
- nr += io_apic_get_redir_entries(idx) + 1;
-
- /* double it for hotplug and msi and nmi */
- nr <<= 1;
-
- /* something wrong ? */
- if (nr < nr_min)
- nr = nr_min;
- if (WARN_ON(nr > NR_IRQS))
- nr = NR_IRQS;
-
- return nr;
+ return NR_IRQS;
}
/* --------------------------------------------------------------------------
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d78f3..d28bbdc35e4 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
++p;
if (*p == '\0')
break;
- bridge = simple_strtol(p, &endp, 0);
+ bridge = simple_strtoul(p, &endp, 0);
if (p == endp)
break;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c622772744d..c27af49a4ed 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
+#include <linux/ftrace.h>
#include <asm/system.h>
unsigned long idle_halt;
@@ -100,6 +101,9 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
if (hlt_use_halt()) {
+ struct power_trace it;
+
+ trace_power_start(&it, POWER_CSTATE, 1);
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -112,6 +116,7 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
+ trace_power_end(&it);
} else {
local_irq_enable();
/* loop is done by the caller */
@@ -154,24 +159,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
+ struct power_trace it;
+
+ trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
if (!need_resched()) {
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__mwait(ax, cx);
}
+ trace_power_end(&it);
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
+ struct power_trace it;
if (!need_resched()) {
+ trace_power_start(&it, POWER_CSTATE, 1);
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__sti_mwait(0, 0);
else
local_irq_enable();
+ trace_power_end(&it);
} else
local_irq_enable();
}
@@ -183,9 +195,13 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
+ struct power_trace it;
+
+ trace_power_start(&it, POWER_CSTATE, 0);
local_irq_enable();
while (!need_resched())
cpu_relax();
+ trace_power_end(&it);
}
/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a6d8c12e10..2c8ec1ba75e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
size_t bts_index, bts_end;
int error;
- error = ds_get_bts_end(child, &bts_end);
+ error = ds_get_bts_end(child->bts, &bts_end);
if (error < 0)
return error;
if (bts_end <= index)
return -EINVAL;
- error = ds_get_bts_index(child, &bts_index);
+ error = ds_get_bts_index(child->bts, &bts_index);
if (error < 0)
return error;
@@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
if (bts_end <= bts_index)
bts_index -= bts_end;
- error = ds_access_bts(child, bts_index, &bts_record);
+ error = ds_access_bts(child->bts, bts_index, &bts_record);
if (error < 0)
return error;
@@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child,
size_t end, i;
int error;
- error = ds_get_bts_index(child, &end);
+ error = ds_get_bts_index(child->bts, &end);
if (error < 0)
return error;
if (size < (end * sizeof(struct bts_struct)))
return -EIO;
- error = ds_access_bts(child, 0, (const void **)&raw);
+ error = ds_access_bts(child->bts, 0, (const void **)&raw);
if (error < 0)
return error;
@@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child,
return -EFAULT;
}
- error = ds_clear_bts(child);
+ error = ds_clear_bts(child->bts);
if (error < 0)
return error;
return end;
}
-static void ptrace_bts_ovfl(struct task_struct *child)
-{
- send_sig(child->thread.bts_ovfl_signal, child, 0);
-}
-
static int ptrace_bts_config(struct task_struct *child,
long cfg_size,
const struct ptrace_bts_config __user *ucfg)
@@ -760,23 +755,45 @@ static int ptrace_bts_config(struct task_struct *child,
goto errout;
if (cfg.flags & PTRACE_BTS_O_ALLOC) {
- ds_ovfl_callback_t ovfl = NULL;
+ bts_ovfl_callback_t ovfl = NULL;
unsigned int sig = 0;
- /* we ignore the error in case we were not tracing child */
- (void)ds_release_bts(child);
+ error = -EINVAL;
+ if (cfg.size < (10 * bts_cfg.sizeof_bts))
+ goto errout;
if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
if (!cfg.signal)
goto errout;
+ error = -EOPNOTSUPP;
+ goto errout;
+
sig = cfg.signal;
- ovfl = ptrace_bts_ovfl;
}
- error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl);
- if (error < 0)
+ if (child->bts) {
+ (void)ds_release_bts(child->bts);
+ kfree(child->bts_buffer);
+
+ child->bts = NULL;
+ child->bts_buffer = NULL;
+ }
+
+ error = -ENOMEM;
+ child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
+ if (!child->bts_buffer)
+ goto errout;
+
+ child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
+ ovfl, /* th = */ (size_t)-1);
+ if (IS_ERR(child->bts)) {
+ error = PTR_ERR(child->bts);
+ kfree(child->bts_buffer);
+ child->bts = NULL;
+ child->bts_buffer = NULL;
goto errout;
+ }
child->thread.bts_ovfl_signal = sig;
}
@@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child,
if (cfg_size < sizeof(cfg))
return -EIO;
- error = ds_get_bts_end(child, &end);
+ error = ds_get_bts_end(child->bts, &end);
if (error < 0)
return error;
- error = ds_access_bts(child, /* index = */ 0, &base);
+ error = ds_access_bts(child->bts, /* index = */ 0, &base);
if (error < 0)
return error;
- error = ds_access_bts(child, /* index = */ end, &max);
+ error = ds_access_bts(child->bts, /* index = */ end, &max);
if (error < 0)
return error;
@@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child,
return -EINVAL;
}
- /* The writing task will be the switched-to task on a context
- * switch. It needs to write into the switched-from task's BTS
- * buffer. */
- return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts);
+ return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
}
void ptrace_bts_take_timestamp(struct task_struct *tsk,
@@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
switch (c->x86) {
case 0x6:
switch (c->x86_model) {
+ case 0 ... 0xC:
+ /* sorry, don't know about them */
+ break;
case 0xD:
case 0xE: /* Pentium M */
bts_configure(&bts_cfg_pentium_m);
break;
- case 0xF: /* Core2 */
- case 0x1C: /* Atom */
+ default: /* Core2, Atom, ... */
bts_configure(&bts_cfg_core2);
break;
- default:
- /* sorry, don't know about them */
- break;
}
break;
case 0xF:
@@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child)
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
#ifdef CONFIG_X86_PTRACE_BTS
- (void)ds_release_bts(child);
+ if (child->bts) {
+ (void)ds_release_bts(child->bts);
+ kfree(child->bts_buffer);
+ child->bts_buffer = NULL;
- child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
- if (!child->thread.debugctlmsr)
- clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+ child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
+ if (!child->thread.debugctlmsr)
+ clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
- clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+ clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
+ }
#endif /* CONFIG_X86_PTRACE_BTS */
}
@@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
(child, data, (struct ptrace_bts_config __user *)addr);
break;
- case PTRACE_BTS_SIZE:
- ret = ds_get_bts_index(child, /* pos = */ NULL);
+ case PTRACE_BTS_SIZE: {
+ size_t size;
+
+ ret = ds_get_bts_index(child->bts, &size);
+ if (ret == 0) {
+ BUG_ON(size != (int) size);
+ ret = (int) size;
+ }
break;
+ }
case PTRACE_BTS_GET:
ret = ptrace_bts_read_record
@@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
case PTRACE_BTS_CLEAR:
- ret = ds_clear_bts(child);
+ ret = ds_clear_bts(child->bts);
break;
case PTRACE_BTS_DRAIN:
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index a03e7f6d90c..10786af9554 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/stacktrace.h>
#include <linux/module.h>
+#include <linux/uaccess.h>
#include <asm/stacktrace.h>
static void save_stack_warning(void *data, char *msg)
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
+
+struct stack_frame {
+ const void __user *next_fp;
+ unsigned long ret_addr;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+ int ret;
+
+ if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+ return 0;
+
+ ret = 1;
+ pagefault_disable();
+ if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+ ret = 0;
+ pagefault_enable();
+
+ return ret;
+}
+
+static inline void __save_stack_trace_user(struct stack_trace *trace)
+{
+ const struct pt_regs *regs = task_pt_regs(current);
+ const void __user *fp = (const void __user *)regs->bp;
+
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = regs->ip;
+
+ while (trace->nr_entries < trace->max_entries) {
+ struct stack_frame frame;
+
+ frame.next_fp = NULL;
+ frame.ret_addr = 0;
+ if (!copy_stack_frame(fp, &frame))
+ break;
+ if ((unsigned long)fp < regs->sp)
+ break;
+ if (frame.ret_addr) {
+ trace->entries[trace->nr_entries++] =
+ frame.ret_addr;
+ }
+ if (fp == frame.next_fp)
+ break;
+ fp = frame.next_fp;
+ }
+}
+
+void save_stack_trace_user(struct stack_trace *trace)
+{
+ /*
+ * Trace user stack if we are not a kernel thread
+ */
+ if (current->mm) {
+ __save_stack_trace_user(trace);
+ }
+ if (trace->nr_entries < trace->max_entries)
+ trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b8b6690a86..6f3d3d4cd97 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -17,6 +17,9 @@
* want per guest time just set the kernel.vsyscall64 sysctl to 0.
*/
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h>
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb75e82..15c3e699918 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
/*
* Enable and initialize the xsave feature.
*/
-void __init xsave_cntxt_init(void)
+void __ref xsave_cntxt_init(void)
{
unsigned int eax, ebx, ecx, edx;
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index fea4565ff57..d8cc96a2738 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
-obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
-mmiotrace-y := pf_in.o mmio-mod.o
+mmiotrace-y := kmmio.o pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
obj-$(CONFIG_NUMA) += numa_$(BITS).o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 31e8730fa24..4152d3c3b13 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -53,7 +53,7 @@
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE_HOOKS
+#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3f1b81a83e2..716d26f0e5d 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
int i;
if (!reset_value) {
- reset_value = kmalloc(sizeof(unsigned) * num_counters,
+ reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
GFP_ATOMIC);
if (!reset_value)
return;
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 1ef0f90813d..d9d35824c56 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -9,6 +9,9 @@
* Also alternative() doesn't work.
*/
+/* Disable profiling for userspace code: */
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/kernel.h>
#include <linux/posix-timers.h>
#include <linux/time.h>
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 688936044dc..636ef4caa52 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
* For 64-bit, we must skip the Xen hole in the middle of the address
* space, just after the big x86-64 virtual hole.
*/
-static int xen_pgd_walk(struct mm_struct *mm,
- int (*func)(struct mm_struct *mm, struct page *,
- enum pt_level),
- unsigned long limit)
+static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
{
- pgd_t *pgd = mm->pgd;
int flush = 0;
unsigned hole_low, hole_high;
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -753,6 +752,14 @@ out:
return flush;
}
+static int xen_pgd_walk(struct mm_struct *mm,
+ int (*func)(struct mm_struct *mm, struct page *,
+ enum pt_level),
+ unsigned long limit)
+{
+ return __xen_pgd_walk(mm, mm->pgd, func, limit);
+}
+
/* If we're using split pte locks, then take the page's lock and
return a pointer to it. Otherwise return NULL. */
static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
xen_mc_batch();
- if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
+ if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
/* re-enable interrupts for flushing */
xen_mc_issue(0);
@@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
PT_PMD);
#endif
- xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
+ __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
xen_mc_issue(0);
}