aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/cgroup_freezer.c51
-rw-r--r--kernel/freezer.c20
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/lockdep.c17
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/printk.c39
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/resource.c8
-rw-r--r--kernel/sched.c3
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c169
-rw-r--r--kernel/sched_idletask.c5
-rw-r--r--kernel/sched_rt.c5
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/sysctl.c12
-rw-r--r--kernel/trace/Kconfig31
-rw-r--r--kernel/trace/Makefile6
-rw-r--r--kernel/trace/ftrace.c608
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace.c137
-rw-r--r--kernel/trace/trace.h22
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_selftest.c18
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/tracepoint.c257
28 files changed, 595 insertions, 848 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 305f11dbef2..9a3ec66a9d8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -13,7 +13,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
CFLAGS_REMOVE_sched.o = -mno-spe
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
CFLAGS_REMOVE_lockdep_proc.o = -pg
@@ -88,7 +88,7 @@ obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
-obj-$(CONFIG_FTRACE) += trace/
+obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e9505695449..7fa476f01d0 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -162,9 +162,13 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
struct task_struct *task)
{
struct freezer *freezer;
- int retval;
- /* Anything frozen can't move or be moved to/from */
+ /*
+ * Anything frozen can't move or be moved to/from.
+ *
+ * Since orig_freezer->state == FROZEN means that @task has been
+ * frozen, so it's sufficient to check the latter condition.
+ */
if (is_task_frozen_enough(task))
return -EBUSY;
@@ -173,13 +177,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
if (freezer->state == CGROUP_FROZEN)
return -EBUSY;
- retval = 0;
- task_lock(task);
- freezer = task_freezer(task);
- if (freezer->state == CGROUP_FROZEN)
- retval = -EBUSY;
- task_unlock(task);
- return retval;
+ return 0;
}
static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -190,8 +188,9 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
freezer = task_freezer(task);
task_unlock(task);
- BUG_ON(freezer->state == CGROUP_FROZEN);
spin_lock_irq(&freezer->lock);
+ BUG_ON(freezer->state == CGROUP_FROZEN);
+
/* Locking avoids race with FREEZING -> THAWED transitions. */
if (freezer->state == CGROUP_FREEZING)
freeze_task(task, true);
@@ -276,25 +275,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
return num_cant_freeze_now ? -EBUSY : 0;
}
-static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
+static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
{
struct cgroup_iter it;
struct task_struct *task;
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it))) {
- int do_wake;
-
- task_lock(task);
- do_wake = __thaw_process(task);
- task_unlock(task);
- if (do_wake)
- wake_up_process(task);
+ thaw_process(task);
}
cgroup_iter_end(cgroup, &it);
- freezer->state = CGROUP_THAWED;
- return 0;
+ freezer->state = CGROUP_THAWED;
}
static int freezer_change_state(struct cgroup *cgroup,
@@ -304,27 +296,22 @@ static int freezer_change_state(struct cgroup *cgroup,
int retval = 0;
freezer = cgroup_freezer(cgroup);
+
spin_lock_irq(&freezer->lock);
+
update_freezer_state(cgroup, freezer);
if (goal_state == freezer->state)
goto out;
- switch (freezer->state) {
+
+ switch (goal_state) {
case CGROUP_THAWED:
- retval = try_to_freeze_cgroup(cgroup, freezer);
+ unfreeze_cgroup(cgroup, freezer);
break;
- case CGROUP_FREEZING:
- if (goal_state == CGROUP_FROZEN) {
- /* Userspace is retrying after
- * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
- retval = try_to_freeze_cgroup(cgroup, freezer);
- break;
- }
- /* state == FREEZING and goal_state == THAWED, so unfreeze */
case CGROUP_FROZEN:
- retval = unfreeze_cgroup(cgroup, freezer);
+ retval = try_to_freeze_cgroup(cgroup, freezer);
break;
default:
- break;
+ BUG();
}
out:
spin_unlock_irq(&freezer->lock);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index ba6248b323e..2f4936cf708 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p)
}
}
-/*
- * Wake up a frozen process
- *
- * task_lock() is needed to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails. Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
- */
-int __thaw_process(struct task_struct *p)
+static int __thaw_process(struct task_struct *p)
{
if (frozen(p)) {
p->flags &= ~PF_FROZEN;
@@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p)
return 0;
}
+/*
+ * Wake up a frozen process
+ *
+ * task_lock() is needed to prevent the race with refrigerator() which may
+ * occur if the freezing of tasks fails. Namely, without the lock, if the
+ * freezing of tasks failed, thaw_tasks() might have run before a task in
+ * refrigerator() could call frozen_process(), in which case the task would be
+ * frozen and no one would thaw it.
+ */
int thaw_process(struct task_struct *p)
{
task_lock(p);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index fac014a81b2..4d161c70ba5 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -220,7 +220,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
}
}
-void register_default_affinity_proc(void)
+static void register_default_affinity_proc(void)
{
#ifdef CONFIG_SMP
proc_create("irq/default_smp_affinity", 0600, NULL,
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index dbda475b13b..06e157119d2 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2169,12 +2169,11 @@ void early_boot_irqs_on(void)
/*
* Hardirqs will be enabled:
*/
-void trace_hardirqs_on_caller(unsigned long a0)
+void trace_hardirqs_on_caller(unsigned long ip)
{
struct task_struct *curr = current;
- unsigned long ip;
- time_hardirqs_on(CALLER_ADDR0, a0);
+ time_hardirqs_on(CALLER_ADDR0, ip);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2188,7 +2187,6 @@ void trace_hardirqs_on_caller(unsigned long a0)
}
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
- ip = (unsigned long) __builtin_return_address(0);
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
@@ -2224,11 +2222,11 @@ EXPORT_SYMBOL(trace_hardirqs_on);
/*
* Hardirqs were disabled:
*/
-void trace_hardirqs_off_caller(unsigned long a0)
+void trace_hardirqs_off_caller(unsigned long ip)
{
struct task_struct *curr = current;
- time_hardirqs_off(CALLER_ADDR0, a0);
+ time_hardirqs_off(CALLER_ADDR0, ip);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2241,7 +2239,7 @@ void trace_hardirqs_off_caller(unsigned long a0)
* We have done an ON -> OFF transition:
*/
curr->hardirqs_enabled = 0;
- curr->hardirq_disable_ip = _RET_IP_;
+ curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
debug_atomic_inc(&hardirqs_off_events);
} else
@@ -3417,9 +3415,10 @@ retry:
}
printk(" ignoring it.\n");
unlock = 0;
+ } else {
+ if (count != 10)
+ printk(KERN_CONT " locked it.\n");
}
- if (count != 10)
- printk(" locked it.\n");
do_each_thread(g, p) {
/*
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index dcd165f92a8..23bd4daeb96 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -96,7 +96,7 @@ config SUSPEND
config PM_TEST_SUSPEND
bool "Test suspend/resume and wakealarm during bootup"
- depends on SUSPEND && PM_DEBUG && RTC_LIB=y
+ depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
---help---
This option will let you suspend your machine during bootup, and
make it wake up a few seconds later using an RTC wakeup alarm.
diff --git a/kernel/printk.c b/kernel/printk.c
index 6341af77eb6..f492f1583d7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -233,45 +233,6 @@ static inline void boot_delay_msec(void)
#endif
/*
- * Return the number of unread characters in the log buffer.
- */
-static int log_buf_get_len(void)
-{
- return logged_chars;
-}
-
-/*
- * Copy a range of characters from the log buffer.
- */
-int log_buf_copy(char *dest, int idx, int len)
-{
- int ret, max;
- bool took_lock = false;
-
- if (!oops_in_progress) {
- spin_lock_irq(&logbuf_lock);
- took_lock = true;
- }
-
- max = log_buf_get_len();
- if (idx < 0 || idx >= max) {
- ret = -1;
- } else {
- if (len > max)
- len = max;
- ret = len;
- idx += (log_end - max);
- while (len-- > 0)
- dest[len] = LOG_BUF(idx + len);
- }
-
- if (took_lock)
- spin_unlock_irq(&logbuf_lock);
-
- return ret;
-}
-
-/*
* Commands to do_syslog:
*
* 0 -- Close the log. Currently a NOP.
diff --git a/kernel/profile.c b/kernel/profile.c
index a9e422df6bf..9830a037d8d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -102,7 +102,7 @@ int profile_setup(char *str)
__setup("profile=", profile_setup);
-int profile_init(void)
+int __ref profile_init(void)
{
int buffer_bytes;
if (!prof_on)
diff --git a/kernel/resource.c b/kernel/resource.c
index 4089d12af6e..4337063663e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -17,6 +17,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/device.h>
+#include <linux/pfn.h>
#include <asm/io.h>
@@ -522,7 +523,7 @@ static void __init __reserve_region_with_split(struct resource *root,
{
struct resource *parent = root;
struct resource *conflict;
- struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
+ struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC);
if (!res)
return;
@@ -571,7 +572,7 @@ static void __init __reserve_region_with_split(struct resource *root,
}
-void reserve_region_with_split(struct resource *root,
+void __init reserve_region_with_split(struct resource *root,
resource_size_t start, resource_size_t end,
const char *name)
{
@@ -849,7 +850,8 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
continue;
if (p->end < addr)
continue;
- if (p->start <= addr && (p->end >= addr + size - 1))
+ if (PFN_DOWN(p->start) <= PFN_DOWN(addr) &&
+ PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1))
continue;
printk(KERN_WARNING "resource map sanity check conflict: "
"0x%llx 0x%llx 0x%llx 0x%llx %s\n",
diff --git a/kernel/sched.c b/kernel/sched.c
index 6625c3c4b10..e8819bc6f46 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -386,7 +386,6 @@ struct cfs_rq {
u64 exec_clock;
u64 min_vruntime;
- u64 pair_start;
struct rb_root tasks_timeline;
struct rb_node *rb_leftmost;
@@ -3344,7 +3343,7 @@ small_imbalance:
} else
this_load_per_task = cpu_avg_load_per_task(this_cpu);
- if (max_load - this_load + 2*busiest_load_per_task >=
+ if (max_load - this_load + busiest_load_per_task >=
busiest_load_per_task * imbn) {
*imbalance = busiest_load_per_task;
return busiest;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ad958c1ec70..5ae17762ec3 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -319,7 +319,7 @@ static int __init init_sched_debug_procfs(void)
{
struct proc_dir_entry *pe;
- pe = proc_create("sched_debug", 0644, NULL, &sched_debug_fops);
+ pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
if (!pe)
return -ENOMEM;
return 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9573c33688b..ce514afd78f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
return se->parent;
}
+/* return depth at which a sched entity is present in the hierarchy */
+static inline int depth_se(struct sched_entity *se)
+{
+ int depth = 0;
+
+ for_each_sched_entity(se)
+ depth++;
+
+ return depth;
+}
+
+static void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+ int se_depth, pse_depth;
+
+ /*
+ * preemption test can be made between sibling entities who are in the
+ * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
+ * both tasks until we find their ancestors who are siblings of common
+ * parent.
+ */
+
+ /* First walk up until both entities are at same depth */
+ se_depth = depth_se(*se);
+ pse_depth = depth_se(*pse);
+
+ while (se_depth > pse_depth) {
+ se_depth--;
+ *se = parent_entity(*se);
+ }
+
+ while (pse_depth > se_depth) {
+ pse_depth--;
+ *pse = parent_entity(*pse);
+ }
+
+ while (!is_same_group(*se, *pse)) {
+ *se = parent_entity(*se);
+ *pse = parent_entity(*pse);
+ }
+}
+
#else /* CONFIG_FAIR_GROUP_SCHED */
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
return NULL;
}
+static inline void
+find_matching_se(struct sched_entity **se, struct sched_entity **pse)
+{
+}
+
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
return se->vruntime - cfs_rq->min_vruntime;
}
+static void update_min_vruntime(struct cfs_rq *cfs_rq)
+{
+ u64 vruntime = cfs_rq->min_vruntime;
+
+ if (cfs_rq->curr)
+ vruntime = cfs_rq->curr->vruntime;
+
+ if (cfs_rq->rb_leftmost) {
+ struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
+ struct sched_entity,
+ run_node);
+
+ if (vruntime == cfs_rq->min_vruntime)
+ vruntime = se->vruntime;
+ else
+ vruntime = min_vruntime(vruntime, se->vruntime);
+ }
+
+ cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
+}
+
/*
* Enqueue an entity into the rb-tree:
*/
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Maintain a cache of leftmost tree entries (it is frequently
* used):
*/
- if (leftmost) {
+ if (leftmost)
cfs_rq->rb_leftmost = &se->run_node;
- /*
- * maintain cfs_rq->min_vruntime to be a monotonic increasing
- * value tracking the leftmost vruntime in the tree.
- */
- cfs_rq->min_vruntime =
- max_vruntime(cfs_rq->min_vruntime, se->vruntime);
- }
rb_link_node(&se->run_node, parent, link);
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,18 +336,9 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->rb_leftmost == &se->run_node) {
struct rb_node *next_node;
- struct sched_entity *next;
next_node = rb_next(&se->run_node);
cfs_rq->rb_leftmost = next_node;
-
- if (next_node) {
- next = rb_entry(next_node,
- struct sched_entity, run_node);
- cfs_rq->min_vruntime =
- max_vruntime(cfs_rq->min_vruntime,
- next->vruntime);
- }
}
if (cfs_rq->next == se)
@@ -424,6 +477,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
schedstat_add(cfs_rq, exec_clock, delta_exec);
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
curr->vruntime += delta_exec_weighted;
+ update_min_vruntime(cfs_rq);
}
static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +667,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
- u64 vruntime;
-
- if (first_fair(cfs_rq)) {
- vruntime = min_vruntime(cfs_rq->min_vruntime,
- __pick_next_entity(cfs_rq)->vruntime);
- } else
- vruntime = cfs_rq->min_vruntime;
+ u64 vruntime = cfs_rq->min_vruntime;
/*
* The 'current' period is already promised to the current tasks,
@@ -696,6 +744,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
account_entity_dequeue(cfs_rq, se);
+ update_min_vruntime(cfs_rq);
}
/*
@@ -742,16 +791,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->prev_sum_exec_runtime = se->sum_exec_runtime;
}
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
+
static struct sched_entity *
pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- struct rq *rq = rq_of(cfs_rq);
- u64 pair_slice = rq->clock - cfs_rq->pair_start;
-
- if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
- cfs_rq->pair_start = rq->clock;
+ if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1)
return se;
- }
return cfs_rq->next;
}
@@ -1122,10 +1169,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
return 0;
- if (!sync && sched_feat(SYNC_WAKEUPS) &&
- curr->se.avg_overlap < sysctl_sched_migration_cost &&
- p->se.avg_overlap < sysctl_sched_migration_cost)
- sync = 1;
+ if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
+ p->se.avg_overlap > sysctl_sched_migration_cost))
+ sync = 0;
/*
* If sync wakeup then subtract the (maximum possible)
@@ -1244,13 +1290,42 @@ static unsigned long wakeup_gran(struct sched_entity *se)
* More easily preempt - nice tasks, while not making it harder for
* + nice tasks.
*/
- if (sched_feat(ASYM_GRAN))
- gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
+ if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
+ gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
return gran;
}
/*
+ * Should 'se' preempt 'curr'.
+ *
+ * |s1
+ * |s2
+ * |s3
+ * g
+ * |<--->|c
+ *
+ * w(c, s1) = -1
+ * w(c, s2) = 0
+ * w(c, s3) = 1
+ *
+ */
+static int
+wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
+{
+ s64 gran, vdiff = curr->vruntime - se->vruntime;
+
+ if (vdiff <= 0)
+ return -1;
+
+ gran = wakeup_gran(curr);
+ if (vdiff > gran)
+ return 1;
+
+ return 0;
+}
+
+/*
* Preempt the current task with a newly woken task if needed:
*/
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
@@ -1258,7 +1333,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
struct task_struct *curr = rq->curr;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
struct sched_entity *se = &curr->se, *pse = &p->se;
- s64 delta_exec;
if (unlikely(rt_prio(p->prio))) {
update_rq_clock(rq);
@@ -1296,9 +1370,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
return;
}
- delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
- if (delta_exec > wakeup_gran(pse))
- resched_task(curr);
+ find_matching_se(&se, &pse);
+
+ while (se) {
+ BUG_ON(!pse);
+
+ if (wakeup_preempt_entity(se, pse) == 1) {
+ resched_task(curr);
+ break;
+ }
+
+ se = parent_entity(se);
+ pse = parent_entity(pse);
+ }
}
static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1594,9 +1678,6 @@ static const struct sched_class fair_sched_class = {
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
-#ifdef CONFIG_SMP
- .select_task_rq = select_task_rq_fair,
-#endif /* CONFIG_SMP */
.check_preempt_curr = check_preempt_wakeup,
@@ -1604,6 +1685,8 @@ static const struct sched_class fair_sched_class = {
.put_prev_task = put_prev_task_fair,
#ifdef CONFIG_SMP
+ .select_task_rq = select_task_rq_fair,
+
.load_balance = load_balance_fair,
.move_one_task = move_one_task_fair,
#endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index dec4ccabe2f..8a21a2e28c1 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = {
/* dequeue is not valid, we print a debug message there: */
.dequeue_task = dequeue_task_idle,
-#ifdef CONFIG_SMP
- .select_task_rq = select_task_rq_idle,
-#endif /* CONFIG_SMP */
.check_preempt_curr = check_preempt_curr_idle,
@@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = {
.put_prev_task = put_prev_task_idle,
#ifdef CONFIG_SMP
+ .select_task_rq = select_task_rq_idle,
+
.load_balance = load_balance_idle,
.move_one_task = move_one_task_idle,
#endif
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b446dc87494..d9ba9d5f99d 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1504,9 +1504,6 @@ static const struct sched_class rt_sched_class = {
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
-#ifdef CONFIG_SMP
- .select_task_rq = select_task_rq_rt,
-#endif /* CONFIG_SMP */
.check_preempt_curr = check_preempt_curr_rt,
@@ -1514,6 +1511,8 @@ static const struct sched_class rt_sched_class = {
.put_prev_task = put_prev_task_rt,
#ifdef CONFIG_SMP
+ .select_task_rq = select_task_rq_rt,
+
.load_balance = load_balance_rt,
.move_one_task = move_one_task_rt,
.set_cpus_allowed = set_cpus_allowed_rt,
diff --git a/kernel/signal.c b/kernel/signal.c
index 105217da5c8..4530fc65445 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1144,7 +1144,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
struct task_struct * p;
for_each_process(p) {
- if (p->pid > 1 && !same_thread_group(p, current)) {
+ if (task_pid_vnr(p) > 1 &&
+ !same_thread_group(p, current)) {
int err = group_send_sig_info(sig, info, p);
++count;
if (err != -EPERM)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a13bd4dfaeb..6b6b727258b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -474,7 +474,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
{
.ctl_name = CTL_UNNUMBERED,
.procname = "ftrace_enabled",
@@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = {
.proc_handler = &ftrace_enable_sysctl,
},
#endif
+#ifdef CONFIG_TRACING
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "ftrace_dump_on_opps",
+ .data = &ftrace_dump_on_oops,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
#ifdef CONFIG_MODULES
{
.ctl_name = KERN_MODPROBE,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1cb3e1f616a..33dbefd471e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,13 +1,13 @@
#
-# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
+# Architectures that offer an FUNCTION_TRACER implementation should
+# select HAVE_FUNCTION_TRACER:
#
config NOP_TRACER
bool
-config HAVE_FTRACE
+config HAVE_FUNCTION_TRACER
bool
- select NOP_TRACER
config HAVE_DYNAMIC_FTRACE
bool
@@ -25,12 +25,15 @@ config TRACING
bool
select DEBUG_FS
select RING_BUFFER
- select STACKTRACE
+ select STACKTRACE if STACKTRACE_SUPPORT
select TRACEPOINTS
+ select NOP_TRACER
-config FTRACE
+menu "Tracers"
+
+config FUNCTION_TRACER
bool "Kernel Function Tracer"
- depends on HAVE_FTRACE
+ depends on HAVE_FUNCTION_TRACER
depends on DEBUG_KERNEL
select FRAME_POINTER
select TRACING
@@ -49,7 +52,6 @@ config IRQSOFF_TRACER
default n
depends on TRACE_IRQFLAGS_SUPPORT
depends on GENERIC_TIME
- depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACE_IRQFLAGS
select TRACING
@@ -73,7 +75,6 @@ config PREEMPT_TRACER
default n
depends on GENERIC_TIME
depends on PREEMPT
- depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACING
select TRACER_MAX_TRACE
@@ -101,7 +102,6 @@ config SYSPROF_TRACER
config SCHED_TRACER
bool "Scheduling Latency Tracer"
- depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACING
select CONTEXT_SWITCH_TRACER
@@ -112,7 +112,6 @@ config SCHED_TRACER
config CONTEXT_SWITCH_TRACER
bool "Trace process context switches"
- depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACING
select MARKERS
@@ -122,9 +121,9 @@ config CONTEXT_SWITCH_TRACER
config BOOT_TRACER
bool "Trace boot initcalls"
- depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACING
+ select CONTEXT_SWITCH_TRACER
help
This tracer helps developers to optimize boot times: it records
the timings of the initcalls and traces key events and the identity
@@ -141,9 +140,9 @@ config BOOT_TRACER
config STACK_TRACER
bool "Trace max stack"
- depends on HAVE_FTRACE
+ depends on HAVE_FUNCTION_TRACER
depends on DEBUG_KERNEL
- select FTRACE
+ select FUNCTION_TRACER
select STACKTRACE
help
This special tracer records the maximum stack footprint of the
@@ -160,7 +159,7 @@ config STACK_TRACER
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
- depends on FTRACE
+ depends on FUNCTION_TRACER
depends on HAVE_DYNAMIC_FTRACE
depends on DEBUG_KERNEL
default y
@@ -170,7 +169,7 @@ config DYNAMIC_FTRACE
with a No-Op instruction) as they are called. A table is
created to dynamically enable them again.
- This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
+ This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
has native performance as long as no tracing is active.
The changes to the code are done by a kernel thread that
@@ -195,3 +194,5 @@ config FTRACE_STARTUP_TEST
a series of tests are made to verify that the tracer is
functioning properly. It will do tests on all the configured
tracers of ftrace.
+
+endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index a85dfba88ba..c8228b1a49e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
# Do not instrument the tracer itself:
-ifdef CONFIG_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
@@ -10,13 +10,13 @@ CFLAGS_trace_selftest_dynamic.o = -pg
obj-y += trace_selftest_dynamic.o
endif
-obj-$(CONFIG_FTRACE) += libftrace.o
+obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
-obj-$(CONFIG_FTRACE) += trace_functions.o
+obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4dda4f60a2a..4a39d24568c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -25,13 +25,24 @@
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
-#include <linux/hash.h>
#include <linux/list.h>
#include <asm/ftrace.h>
#include "trace.h"
+#define FTRACE_WARN_ON(cond) \
+ do { \
+ if (WARN_ON(cond)) \
+ ftrace_kill(); \
+ } while (0)
+
+#define FTRACE_WARN_ON_ONCE(cond) \
+ do { \
+ if (WARN_ON_ONCE(cond)) \
+ ftrace_kill(); \
+ } while (0)
+
/* ftrace_enabled is a method to turn ftrace on or off */
int ftrace_enabled __read_mostly;
static int last_ftrace_enabled;
@@ -153,21 +164,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
}
#ifdef CONFIG_DYNAMIC_FTRACE
-
#ifndef CONFIG_FTRACE_MCOUNT_RECORD
-/*
- * The hash lock is only needed when the recording of the mcount
- * callers are dynamic. That is, by the caller themselves and
- * not recorded via the compilation.
- */
-static DEFINE_SPINLOCK(ftrace_hash_lock);
-#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
-#define ftrace_hash_unlock(flags) \
- spin_unlock_irqrestore(&ftrace_hash_lock, flags)
-#else
-/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
-#define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
-#define ftrace_hash_unlock(flags) do { } while(0)
+# error Dynamic ftrace depends on MCOUNT_RECORD
#endif
/*
@@ -178,8 +176,6 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
*/
static unsigned long mcount_addr = MCOUNT_ADDR;
-static struct task_struct *ftraced_task;
-
enum {
FTRACE_ENABLE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
@@ -190,13 +186,9 @@ enum {
static int ftrace_filtered;
static int tracing_on;
-static int frozen_record_count;
-
-static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
-static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
+static LIST_HEAD(ftrace_new_addrs);
-static DEFINE_MUTEX(ftraced_lock);
static DEFINE_MUTEX(ftrace_regex_lock);
struct ftrace_page {
@@ -214,16 +206,13 @@ struct ftrace_page {
static struct ftrace_page *ftrace_pages_start;
static struct ftrace_page *ftrace_pages;
-static int ftraced_trigger;
-static int ftraced_suspend;
-static int ftraced_stop;
-
-static int ftrace_record_suspend;
-
static struct dyn_ftrace *ftrace_free_records;
#ifdef CONFIG_KPROBES
+
+static int frozen_record_count;
+
static inline void freeze_record(struct dyn_ftrace *rec)
{
if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -250,72 +239,6 @@ static inline int record_frozen(struct dyn_ftrace *rec)
# define record_frozen(rec) ({ 0; })
#endif /* CONFIG_KPROBES */
-int skip_trace(unsigned long ip)
-{
- unsigned long fl;
- struct dyn_ftrace *rec;
- struct hlist_node *t;
- struct hlist_head *head;
-
- if (frozen_record_count == 0)
- return 0;
-
- head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
- hlist_for_each_entry_rcu(rec, t, head, node) {
- if (rec->ip == ip) {
- if (record_frozen(rec)) {
- if (rec->flags & FTRACE_FL_FAILED)
- return 1;
-
- if (!(rec->flags & FTRACE_FL_CONVERTED))
- return 1;
-
- if (!tracing_on || !ftrace_enabled)
- return 1;
-
- if (ftrace_filtered) {
- fl = rec->flags & (FTRACE_FL_FILTER |
- FTRACE_FL_NOTRACE);
- if (!fl || (fl & FTRACE_FL_NOTRACE))
- return 1;
- }
- }
- break;
- }
- }
-
- return 0;
-}
-
-static inline int
-ftrace_ip_in_hash(unsigned long ip, unsigned long key)
-{
- struct dyn_ftrace *p;
- struct hlist_node *t;
- int found = 0;
-
- hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
- if (p->ip == ip) {
- found = 1;
- break;
- }
- }
-
- return found;
-}
-
-static inline void
-ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
-{
- hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
-}
-
-/* called from kstop_machine */
-static inline void ftrace_del_hash(struct dyn_ftrace *node)
-{
- hlist_del(&node->node);
-}
-
static void ftrace_free_rec(struct dyn_ftrace *rec)
{
rec->ip = (unsigned long)ftrace_free_records;
@@ -346,7 +269,6 @@ void ftrace_release(void *start, unsigned long size)
}
}
spin_unlock(&ftrace_lock);
-
}
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -358,10 +280,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
rec = ftrace_free_records;
if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
- WARN_ON_ONCE(1);
+ FTRACE_WARN_ON_ONCE(1);
ftrace_free_records = NULL;
- ftrace_disabled = 1;
- ftrace_enabled = 0;
return NULL;
}
@@ -371,76 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
}
if (ftrace_pages->index == ENTRIES_PER_PAGE) {
- if (!ftrace_pages->next)
- return NULL;
+ if (!ftrace_pages->next) {
+ /* allocate another page */
+ ftrace_pages->next =
+ (void *)get_zeroed_page(GFP_KERNEL);
+ if (!ftrace_pages->next)
+ return NULL;
+ }
ftrace_pages = ftrace_pages->next;
}
return &ftrace_pages->records[ftrace_pages->index++];
}
-static void
+static struct dyn_ftrace *
ftrace_record_ip(unsigned long ip)
{
- struct dyn_ftrace *node;
- unsigned long flags;
- unsigned long key;
- int resched;
- int cpu;
+ struct dyn_ftrace *rec;
if (!ftrace_enabled || ftrace_disabled)
- return;
-
- resched = need_resched();
- preempt_disable_notrace();
+ return NULL;
- /*
- * We simply need to protect against recursion.
- * Use the the raw version of smp_processor_id and not
- * __get_cpu_var which can call debug hooks that can
- * cause a recursive crash here.
- */
- cpu = raw_smp_processor_id();
- per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
- if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
- goto out;
-
- if (unlikely(ftrace_record_suspend))
- goto out;
-
- key = hash_long(ip, FTRACE_HASHBITS);
-
- WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
-
- if (ftrace_ip_in_hash(ip, key))
- goto out;
-
- ftrace_hash_lock(flags);
-
- /* This ip may have hit the hash before the lock */
- if (ftrace_ip_in_hash(ip, key))
- goto out_unlock;
-
- node = ftrace_alloc_dyn_node(ip);
- if (!node)
- goto out_unlock;
-
- node->ip = ip;
-
- ftrace_add_hash(node, key);
+ rec = ftrace_alloc_dyn_node(ip);
+ if (!rec)
+ return NULL;
- ftraced_trigger = 1;
+ rec->ip = ip;
- out_unlock:
- ftrace_hash_unlock(flags);
- out:
- per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
+ list_add(&rec->list, &ftrace_new_addrs);
- /* prevent recursion with scheduler */
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
+ return rec;
}
#define FTRACE_ADDR ((long)(ftrace_caller))
@@ -559,7 +439,6 @@ static void ftrace_replace_code(int enable)
rec->flags |= FTRACE_FL_FAILED;
if ((system_state == SYSTEM_BOOTING) ||
!core_kernel_text(rec->ip)) {
- ftrace_del_hash(rec);
ftrace_free_rec(rec);
}
}
@@ -567,15 +446,6 @@ static void ftrace_replace_code(int enable)
}
}
-static void ftrace_shutdown_replenish(void)
-{
- if (ftrace_pages->next)
- return;
-
- /* allocate another page */
- ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
-}
-
static void print_ip_ins(const char *fmt, unsigned char *p)
{
int i;
@@ -591,23 +461,23 @@ ftrace_code_disable(struct dyn_ftrace *rec)
{
unsigned long ip;
unsigned char *nop, *call;
- int failed;
+ int ret;
ip = rec->ip;
nop = ftrace_nop_replace();
call = ftrace_call_replace(ip, mcount_addr);
- failed = ftrace_modify_code(ip, call, nop);
- if (failed) {
- switch (failed) {
- case 1:
- WARN_ON_ONCE(1);
+ ret = ftrace_modify_code(ip, call, nop);
+ if (ret) {
+ switch (ret) {
+ case -EFAULT:
+ FTRACE_WARN_ON_ONCE(1);
pr_info("ftrace faulted on modifying ");
print_ip_sym(ip);
break;
- case 2:
- WARN_ON_ONCE(1);
+ case -EINVAL:
+ FTRACE_WARN_ON_ONCE(1);
pr_info("ftrace failed to modify ");
print_ip_sym(ip);
print_ip_ins(" expected: ", call);
@@ -615,6 +485,15 @@ ftrace_code_disable(struct dyn_ftrace *rec)
print_ip_ins(" replace: ", nop);
printk(KERN_CONT "\n");
break;
+ case -EPERM:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on writing ");
+ print_ip_sym(ip);
+ break;
+ default:
+ FTRACE_WARN_ON_ONCE(1);
+ pr_info("ftrace faulted on unknown error ");
+ print_ip_sym(ip);
}
rec->flags |= FTRACE_FL_FAILED;
@@ -623,19 +502,11 @@ ftrace_code_disable(struct dyn_ftrace *rec)
return 1;
}
-static int __ftrace_update_code(void *ignore);
-
static int __ftrace_modify_code(void *data)
{
- unsigned long addr;
int *command = data;
if (*command & FTRACE_ENABLE_CALLS) {
- /*
- * Update any recorded ips now that we have the
- * machine stopped
- */
- __ftrace_update_code(NULL);
ftrace_replace_code(1);
tracing_on = 1;
} else if (*command & FTRACE_DISABLE_CALLS) {
@@ -646,14 +517,6 @@ static int __ftrace_modify_code(void *data)
if (*command & FTRACE_UPDATE_TRACE_FUNC)
ftrace_update_ftrace_func(ftrace_trace_function);
- if (*command & FTRACE_ENABLE_MCOUNT) {
- addr = (unsigned long)ftrace_record_ip;
- ftrace_mcount_set(&addr);
- } else if (*command & FTRACE_DISABLE_MCOUNT) {
- addr = (unsigned long)ftrace_stub;
- ftrace_mcount_set(&addr);
- }
-
return 0;
}
@@ -662,26 +525,9 @@ static void ftrace_run_update_code(int command)
stop_machine(__ftrace_modify_code, &command, NULL);
}
-void ftrace_disable_daemon(void)
-{
- /* Stop the daemon from calling kstop_machine */
- mutex_lock(&ftraced_lock);
- ftraced_stop = 1;
- mutex_unlock(&ftraced_lock);
-
- ftrace_force_update();
-}
-
-void ftrace_enable_daemon(void)
-{
- mutex_lock(&ftraced_lock);
- ftraced_stop = 0;
- mutex_unlock(&ftraced_lock);
-
- ftrace_force_update();
-}
-
static ftrace_func_t saved_ftrace_func;
+static int ftrace_start;
+static DEFINE_MUTEX(ftrace_start_lock);
static void ftrace_startup(void)
{
@@ -690,9 +536,9 @@ static void ftrace_startup(void)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftraced_lock);
- ftraced_suspend++;
- if (ftraced_suspend == 1)
+ mutex_lock(&ftrace_start_lock);
+ ftrace_start++;
+ if (ftrace_start == 1)
command |= FTRACE_ENABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -705,7 +551,7 @@ static void ftrace_startup(void)
ftrace_run_update_code(command);
out:
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
}
static void ftrace_shutdown(void)
@@ -715,9 +561,9 @@ static void ftrace_shutdown(void)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftraced_lock);
- ftraced_suspend--;
- if (!ftraced_suspend)
+ mutex_lock(&ftrace_start_lock);
+ ftrace_start--;
+ if (!ftrace_start)
command |= FTRACE_DISABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -730,7 +576,7 @@ static void ftrace_shutdown(void)
ftrace_run_update_code(command);
out:
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
}
static void ftrace_startup_sysctl(void)
@@ -740,15 +586,15 @@ static void ftrace_startup_sysctl(void)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftraced_lock);
+ mutex_lock(&ftrace_start_lock);
/* Force update next time */
saved_ftrace_func = NULL;
- /* ftraced_suspend is true if we want ftrace running */
- if (ftraced_suspend)
+ /* ftrace_start is true if we want ftrace running */
+ if (ftrace_start)
command |= FTRACE_ENABLE_CALLS;
ftrace_run_update_code(command);
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
}
static void ftrace_shutdown_sysctl(void)
@@ -758,112 +604,50 @@ static void ftrace_shutdown_sysctl(void)
if (unlikely(ftrace_disabled))
return;
- mutex_lock(&ftraced_lock);
- /* ftraced_suspend is true if ftrace is running */
- if (ftraced_suspend)
+ mutex_lock(&ftrace_start_lock);
+ /* ftrace_start is true if ftrace is running */
+ if (ftrace_start)
command |= FTRACE_DISABLE_CALLS;
ftrace_run_update_code(command);
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
}
static cycle_t ftrace_update_time;
static unsigned long ftrace_update_cnt;
unsigned long ftrace_update_tot_cnt;
-static int __ftrace_update_code(void *ignore)
+static int ftrace_update_code(void)
{
- int i, save_ftrace_enabled;
+ struct dyn_ftrace *p, *t;
cycle_t start, stop;
- struct dyn_ftrace *p;
- struct hlist_node *t, *n;
- struct hlist_head *head, temp_list;
-
- /* Don't be recording funcs now */
- ftrace_record_suspend++;
- save_ftrace_enabled = ftrace_enabled;
- ftrace_enabled = 0;
start = ftrace_now(raw_smp_processor_id());
ftrace_update_cnt = 0;
- /* No locks needed, the machine is stopped! */
- for (i = 0; i < FTRACE_HASHSIZE; i++) {
- INIT_HLIST_HEAD(&temp_list);
- head = &ftrace_hash[i];
+ list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
- /* all CPUS are stopped, we are safe to modify code */
- hlist_for_each_entry_safe(p, t, n, head, node) {
- /* Skip over failed records which have not been
- * freed. */
- if (p->flags & FTRACE_FL_FAILED)
- continue;
+ /* If something went wrong, bail without enabling anything */
+ if (unlikely(ftrace_disabled))
+ return -1;
- /* Unconverted records are always at the head of the
- * hash bucket. Once we encounter a converted record,
- * simply skip over to the next bucket. Saves ftraced
- * some processor cycles (ftrace does its bid for
- * global warming :-p ). */
- if (p->flags & (FTRACE_FL_CONVERTED))
- break;
+ list_del_init(&p->list);
- /* Ignore updates to this record's mcount site.
- * Reintroduce this record at the head of this
- * bucket to attempt to "convert" it again if
- * the kprobe on it is unregistered before the
- * next run. */
- if (get_kprobe((void *)p->ip)) {
- ftrace_del_hash(p);
- INIT_HLIST_NODE(&p->node);
- hlist_add_head(&p->node, &temp_list);
- freeze_record(p);
- continue;
- } else {
- unfreeze_record(p);
- }
-
- /* convert record (i.e, patch mcount-call with NOP) */
- if (ftrace_code_disable(p)) {
- p->flags |= FTRACE_FL_CONVERTED;
- ftrace_update_cnt++;
- } else {
- if ((system_state == SYSTEM_BOOTING) ||
- !core_kernel_text(p->ip)) {
- ftrace_del_hash(p);
- ftrace_free_rec(p);
- }
- }
- }
-
- hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
- hlist_del(&p->node);
- INIT_HLIST_NODE(&p->node);
- hlist_add_head(&p->node, head);
- }
+ /* convert record (i.e, patch mcount-call with NOP) */
+ if (ftrace_code_disable(p)) {
+ p->flags |= FTRACE_FL_CONVERTED;
+ ftrace_update_cnt++;
+ } else
+ ftrace_free_rec(p);
}
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += ftrace_update_cnt;
- ftraced_trigger = 0;
-
- ftrace_enabled = save_ftrace_enabled;
- ftrace_record_suspend--;
return 0;
}
-static int ftrace_update_code(void)
-{
- if (unlikely(ftrace_disabled) ||
- !ftrace_enabled || !ftraced_trigger)
- return 0;
-
- stop_machine(__ftrace_update_code, NULL, NULL);
-
- return 1;
-}
-
static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
{
struct ftrace_page *pg;
@@ -892,7 +676,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
pg = ftrace_pages = ftrace_pages_start;
cnt = num_to_init / ENTRIES_PER_PAGE;
- pr_info("ftrace: allocating %ld hash entries in %d pages\n",
+ pr_info("ftrace: allocating %ld entries in %d pages\n",
num_to_init, cnt);
for (i = 0; i < cnt; i++) {
@@ -1401,10 +1185,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
}
mutex_lock(&ftrace_sysctl_lock);
- mutex_lock(&ftraced_lock);
- if (iter->filtered && ftraced_suspend && ftrace_enabled)
+ mutex_lock(&ftrace_start_lock);
+ if (iter->filtered && ftrace_start && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
- mutex_unlock(&ftraced_lock);
+ mutex_unlock(&ftrace_start_lock);
mutex_unlock(&ftrace_sysctl_lock);
kfree(iter);
@@ -1424,55 +1208,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
return ftrace_regex_release(inode, file, 0);
}
-static ssize_t
-ftraced_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- /* don't worry about races */
- char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
- int r = strlen(buf);
-
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static ssize_t
-ftraced_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- char buf[64];
- long val;
- int ret;
-
- if (cnt >= sizeof(buf))
- return -EINVAL;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- if (strncmp(buf, "enable", 6) == 0)
- val = 1;
- else if (strncmp(buf, "disable", 7) == 0)
- val = 0;
- else {
- buf[cnt] = 0;
-
- ret = strict_strtoul(buf, 10, &val);
- if (ret < 0)
- return ret;
-
- val = !!val;
- }
-
- if (val)
- ftrace_enable_daemon();
- else
- ftrace_disable_daemon();
-
- filp->f_pos += cnt;
-
- return cnt;
-}
-
static struct file_operations ftrace_avail_fops = {
.open = ftrace_avail_open,
.read = seq_read,
@@ -1503,54 +1238,6 @@ static struct file_operations ftrace_notrace_fops = {
.release = ftrace_notrace_release,
};
-static struct file_operations ftraced_fops = {
- .open = tracing_open_generic,
- .read = ftraced_read,
- .write = ftraced_write,
-};
-
-/**
- * ftrace_force_update - force an update to all recording ftrace functions
- */
-int ftrace_force_update(void)
-{
- int ret = 0;
-
- if (unlikely(ftrace_disabled))
- return -ENODEV;
-
- mutex_lock(&ftrace_sysctl_lock);
- mutex_lock(&ftraced_lock);
-
- /*
- * If ftraced_trigger is not set, then there is nothing
- * to update.
- */
- if (ftraced_trigger && !ftrace_update_code())
- ret = -EBUSY;
-
- mutex_unlock(&ftraced_lock);
- mutex_unlock(&ftrace_sysctl_lock);
-
- return ret;
-}
-
-static void ftrace_force_shutdown(void)
-{
- struct task_struct *task;
- int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
-
- mutex_lock(&ftraced_lock);
- task = ftraced_task;
- ftraced_task = NULL;
- ftraced_suspend = -1;
- ftrace_run_update_code(command);
- mutex_unlock(&ftraced_lock);
-
- if (task)
- kthread_stop(task);
-}
-
static __init int ftrace_init_debugfs(void)
{
struct dentry *d_tracer;
@@ -1581,17 +1268,11 @@ static __init int ftrace_init_debugfs(void)
pr_warning("Could not create debugfs "
"'set_ftrace_notrace' entry\n");
- entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
- NULL, &ftraced_fops);
- if (!entry)
- pr_warning("Could not create debugfs "
- "'ftraced_enabled' entry\n");
return 0;
}
fs_initcall(ftrace_init_debugfs);
-#ifdef CONFIG_FTRACE_MCOUNT_RECORD
static int ftrace_convert_nops(unsigned long *start,
unsigned long *end)
{
@@ -1599,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start,
unsigned long addr;
unsigned long flags;
+ mutex_lock(&ftrace_start_lock);
p = start;
while (p < end) {
addr = ftrace_call_adjust(*p++);
- /* should not be called from interrupt context */
- spin_lock(&ftrace_lock);
ftrace_record_ip(addr);
- spin_unlock(&ftrace_lock);
- ftrace_shutdown_replenish();
}
- /* p is ignored */
+ /* disable interrupts to prevent kstop machine */
local_irq_save(flags);
- __ftrace_update_code(p);
+ ftrace_update_code();
local_irq_restore(flags);
+ mutex_unlock(&ftrace_start_lock);
return 0;
}
@@ -1658,130 +1337,34 @@ void __init ftrace_init(void)
failed:
ftrace_disabled = 1;
}
-#else /* CONFIG_FTRACE_MCOUNT_RECORD */
-static int ftraced(void *ignore)
-{
- unsigned long usecs;
-
- while (!kthread_should_stop()) {
-
- set_current_state(TASK_INTERRUPTIBLE);
-
- /* check once a second */
- schedule_timeout(HZ);
- if (unlikely(ftrace_disabled))
- continue;
-
- mutex_lock(&ftrace_sysctl_lock);
- mutex_lock(&ftraced_lock);
- if (!ftraced_suspend && !ftraced_stop &&
- ftrace_update_code()) {
- usecs = nsecs_to_usecs(ftrace_update_time);
- if (ftrace_update_tot_cnt > 100000) {
- ftrace_update_tot_cnt = 0;
- pr_info("hm, dftrace overflow: %lu change%s"
- " (%lu total) in %lu usec%s\n",
- ftrace_update_cnt,
- ftrace_update_cnt != 1 ? "s" : "",
- ftrace_update_tot_cnt,
- usecs, usecs != 1 ? "s" : "");
- ftrace_disabled = 1;
- WARN_ON_ONCE(1);
- }
- }
- mutex_unlock(&ftraced_lock);
- mutex_unlock(&ftrace_sysctl_lock);
-
- ftrace_shutdown_replenish();
- }
- __set_current_state(TASK_RUNNING);
- return 0;
-}
+#else
-static int __init ftrace_dynamic_init(void)
+static int __init ftrace_nodyn_init(void)
{
- struct task_struct *p;
- unsigned long addr;
- int ret;
-
- addr = (unsigned long)ftrace_record_ip;
-
- stop_machine(ftrace_dyn_arch_init, &addr, NULL);
-
- /* ftrace_dyn_arch_init places the return code in addr */
- if (addr) {
- ret = (int)addr;
- goto failed;
- }
-
- ret = ftrace_dyn_table_alloc(NR_TO_INIT);
- if (ret)
- goto failed;
-
- p = kthread_run(ftraced, NULL, "ftraced");
- if (IS_ERR(p)) {
- ret = -1;
- goto failed;
- }
-
- last_ftrace_enabled = ftrace_enabled = 1;
- ftraced_task = p;
-
+ ftrace_enabled = 1;
return 0;
-
- failed:
- ftrace_disabled = 1;
- return ret;
}
+device_initcall(ftrace_nodyn_init);
-core_initcall(ftrace_dynamic_init);
-#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
-
-#else
# define ftrace_startup() do { } while (0)
# define ftrace_shutdown() do { } while (0)
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
-# define ftrace_force_shutdown() do { } while (0)
#endif /* CONFIG_DYNAMIC_FTRACE */
/**
- * ftrace_kill_atomic - kill ftrace from critical sections
+ * ftrace_kill - kill ftrace
*
* This function should be used by panic code. It stops ftrace
* but in a not so nice way. If you need to simply kill ftrace
* from a non-atomic section, use ftrace_kill.
*/
-void ftrace_kill_atomic(void)
-{
- ftrace_disabled = 1;
- ftrace_enabled = 0;
-#ifdef CONFIG_DYNAMIC_FTRACE
- ftraced_suspend = -1;
-#endif
- clear_ftrace_function();
-}
-
-/**
- * ftrace_kill - totally shutdown ftrace
- *
- * This is a safety measure. If something was detected that seems
- * wrong, calling this function will keep ftrace from doing
- * any more modifications, and updates.
- * used when something went wrong.
- */
void ftrace_kill(void)
{
- mutex_lock(&ftrace_sysctl_lock);
ftrace_disabled = 1;
ftrace_enabled = 0;
-
clear_ftrace_function();
- mutex_unlock(&ftrace_sysctl_lock);
-
- /* Try to totally disable ftrace */
- ftrace_force_shutdown();
}
/**
@@ -1870,3 +1453,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
mutex_unlock(&ftrace_sysctl_lock);
return ret;
}
+
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 94af1fe56bb..cedf4e26828 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -130,7 +130,7 @@ struct buffer_page {
static inline void free_buffer_page(struct buffer_page *bpage)
{
if (bpage->page)
- __free_page(bpage->page);
+ free_page((unsigned long)bpage->page);
kfree(bpage);
}
@@ -966,7 +966,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
if (unlikely(*delta > (1ULL << 59) && !once++)) {
printk(KERN_WARNING "Delta way too big! %llu"
" ts=%llu write stamp = %llu\n",
- *delta, *ts, cpu_buffer->write_stamp);
+ (unsigned long long)*delta,
+ (unsigned long long)*ts,
+ (unsigned long long)cpu_buffer->write_stamp);
WARN_ON(1);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d345d649d07..e4c40c868d6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -34,6 +34,7 @@
#include <linux/stacktrace.h>
#include <linux/ring_buffer.h>
+#include <linux/irqflags.h>
#include "trace.h"
@@ -63,6 +64,37 @@ static cpumask_t __read_mostly tracing_buffer_mask;
static int tracing_disabled = 1;
+/*
+ * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
+ *
+ * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
+ * is set, then ftrace_dump is called. This will output the contents
+ * of the ftrace buffers to the console. This is very useful for
+ * capturing traces that lead to crashes and outputing it to a
+ * serial console.
+ *
+ * It is default off, but you can enable it with either specifying
+ * "ftrace_dump_on_oops" in the kernel command line, or setting
+ * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ */
+int ftrace_dump_on_oops;
+
+static int tracing_set_tracer(char *buf);
+
+static int __init set_ftrace(char *str)
+{
+ tracing_set_tracer(str);
+ return 1;
+}
+__setup("ftrace", set_ftrace);
+
+static int __init set_ftrace_dump_on_oops(char *str)
+{
+ ftrace_dump_on_oops = 1;
+ return 1;
+}
+__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
+
long
ns2usecs(cycle_t nsec)
{
@@ -655,7 +687,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
entry->flags =
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+#else
+ TRACE_FLAG_IRQS_NOSUPPORT |
+#endif
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -700,6 +736,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
unsigned long flags,
int skip, int pc)
{
+#ifdef CONFIG_STACKTRACE
struct ring_buffer_event *event;
struct stack_entry *entry;
struct stack_trace trace;
@@ -725,6 +762,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
save_stack_trace(&trace);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+#endif
}
void __trace_stack(struct trace_array *tr,
@@ -851,7 +889,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
preempt_enable_notrace();
}
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
static void
function_trace_call(unsigned long ip, unsigned long parent_ip)
{
@@ -865,9 +903,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
if (unlikely(!ftrace_function_enabled))
return;
- if (skip_trace(ip))
- return;
-
pc = preempt_count();
resched = need_resched();
preempt_disable_notrace();
@@ -1246,7 +1281,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
trace_seq_printf(s, "%3d", cpu);
trace_seq_printf(s, "%c%c",
- (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
+ (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -2369,28 +2405,11 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static ssize_t
-tracing_set_trace_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
+static int tracing_set_tracer(char *buf)
{
struct trace_array *tr = &global_trace;
struct tracer *t;
- char buf[max_tracer_type_len+1];
- int i;
- size_t ret;
-
- if (cnt > max_tracer_type_len)
- cnt = max_tracer_type_len;
- ret = cnt;
-
- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
-
- buf[cnt] = 0;
-
- /* strip ending whitespace. */
- for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
- buf[i] = 0;
+ int ret = 0;
mutex_lock(&trace_types_lock);
for (t = trace_types; t; t = t->next) {
@@ -2414,8 +2433,35 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
out:
mutex_unlock(&trace_types_lock);
- if (ret == cnt)
- filp->f_pos += cnt;
+ return ret;
+}
+
+static ssize_t
+tracing_set_trace_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[max_tracer_type_len+1];
+ int i;
+ size_t ret;
+
+ if (cnt > max_tracer_type_len)
+ cnt = max_tracer_type_len;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+
+ /* strip ending whitespace. */
+ for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
+ buf[i] = 0;
+
+ ret = tracing_set_tracer(buf);
+ if (!ret)
+ ret = cnt;
+
+ if (ret > 0)
+ filp->f_pos += ret;
return ret;
}
@@ -2816,22 +2862,38 @@ static struct file_operations tracing_mark_fops = {
#ifdef CONFIG_DYNAMIC_FTRACE
+int __weak ftrace_arch_read_dyn_info(char *buf, int size)
+{
+ return 0;
+}
+
static ssize_t
-tracing_read_long(struct file *filp, char __user *ubuf,
+tracing_read_dyn_info(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
+ static char ftrace_dyn_info_buffer[1024];
+ static DEFINE_MUTEX(dyn_info_mutex);
unsigned long *p = filp->private_data;
- char buf[64];
+ char *buf = ftrace_dyn_info_buffer;
+ int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
int r;
- r = sprintf(buf, "%ld\n", *p);
+ mutex_lock(&dyn_info_mutex);
+ r = sprintf(buf, "%ld ", *p);
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+ buf[r++] = '\n';
+
+ r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+
+ mutex_unlock(&dyn_info_mutex);
+
+ return r;
}
-static struct file_operations tracing_read_long_fops = {
+static struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
- .read = tracing_read_long,
+ .read = tracing_read_dyn_info,
};
#endif
@@ -2940,7 +3002,7 @@ static __init int tracer_init_debugfs(void)
#ifdef CONFIG_DYNAMIC_FTRACE
entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt,
- &tracing_read_long_fops);
+ &tracing_dyn_info_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'dyn_ftrace_total_info' entry\n");
@@ -3021,7 +3083,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk);
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
return NOTIFY_OK;
}
@@ -3037,7 +3100,8 @@ static int trace_die_handler(struct notifier_block *self,
{
switch (val) {
case DIE_OOPS:
- ftrace_dump();
+ if (ftrace_dump_on_oops)
+ ftrace_dump();
break;
default:
break;
@@ -3078,7 +3142,6 @@ trace_printk_seq(struct trace_seq *s)
trace_seq_reset(s);
}
-
void ftrace_dump(void)
{
static DEFINE_SPINLOCK(ftrace_dump_lock);
@@ -3097,7 +3160,7 @@ void ftrace_dump(void)
dump_ran = 1;
/* No turning back! */
- ftrace_kill_atomic();
+ ftrace_kill();
for_each_tracing_cpu(cpu) {
atomic_inc(&global_trace.data[cpu]->disabled);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f1f99572cde..8465ad05270 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -120,18 +120,20 @@ struct trace_boot {
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
- * IRQS_OFF - interrupts were disabled
- * NEED_RESCED - reschedule is requested
- * HARDIRQ - inside an interrupt handler
- * SOFTIRQ - inside a softirq handler
- * CONT - multiple entries hold the trace item
+ * IRQS_OFF - interrupts were disabled
+ * IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
+ * NEED_RESCED - reschedule is requested
+ * HARDIRQ - inside an interrupt handler
+ * SOFTIRQ - inside a softirq handler
+ * CONT - multiple entries hold the trace item
*/
enum trace_flag_type {
TRACE_FLAG_IRQS_OFF = 0x01,
- TRACE_FLAG_NEED_RESCHED = 0x02,
- TRACE_FLAG_HARDIRQ = 0x04,
- TRACE_FLAG_SOFTIRQ = 0x08,
- TRACE_FLAG_CONT = 0x10,
+ TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
+ TRACE_FLAG_NEED_RESCHED = 0x04,
+ TRACE_FLAG_HARDIRQ = 0x08,
+ TRACE_FLAG_SOFTIRQ = 0x10,
+ TRACE_FLAG_CONT = 0x20,
};
#define TRACE_BUF_SIZE 1024
@@ -335,7 +337,7 @@ void update_max_tr_single(struct trace_array *tr,
extern cycle_t ftrace_now(int cpu);
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
void tracing_start_function_trace(void);
void tracing_stop_function_trace(void);
#else
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index e90eb0c2c56..0f85a64003d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr)
static struct tracer function_trace __read_mostly =
{
- .name = "ftrace",
+ .name = "function",
.init = function_trace_init,
.reset = function_trace_reset,
.ctrl_update = function_trace_ctrl_update,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a7db7f040ae..9c74071c10e 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
*/
static __cacheline_aligned_in_smp unsigned long max_sequence;
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
{
.func = irqsoff_tracer_call,
};
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
/*
* Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index fe4a252c236..3ae93f16b56 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
static void __wakeup_reset(struct trace_array *tr);
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
@@ -96,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly =
{
.func = wakeup_tracer_call,
};
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
/*
* Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 09cf230d7ec..90bc752a758 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -70,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
return ret;
}
-#ifdef CONFIG_FTRACE
+#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -99,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
/* passed in by parameter to fool gcc from optimizing */
func();
- /* update the records */
- ret = ftrace_force_update();
- if (ret) {
- printk(KERN_CONT ".. ftraced failed .. ");
- return ret;
- }
-
/*
* Some archs *cough*PowerPC*cough* add charachters to the
* start of the function names. We simply put a '*' to
@@ -183,13 +176,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
/* make sure msleep has been recorded */
msleep(1);
- /* force the recorded functions to be traced */
- ret = ftrace_force_update();
- if (ret) {
- printk(KERN_CONT ".. ftraced failed .. ");
- return ret;
- }
-
/* start the tracing */
ftrace_enabled = 1;
tracer_enabled = 1;
@@ -226,7 +212,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
return ret;
}
-#endif /* CONFIG_FTRACE */
+#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_IRQSOFF_TRACER
int
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 74c5d9a3afa..be682b62fe5 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -44,6 +44,10 @@ static inline void check_stack(void)
if (this_size <= max_stack_size)
return;
+ /* we do not handle interrupt stacks yet */
+ if (!object_is_on_stack(&this_size))
+ return;
+
raw_local_irq_save(flags);
__raw_spin_lock(&max_stack_lock);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f2b7c28a470..e96590f17de 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -43,6 +43,7 @@ static DEFINE_MUTEX(tracepoints_mutex);
*/
#define TRACEPOINT_HASH_BITS 6
#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
/*
* Note about RCU :
@@ -54,40 +55,43 @@ struct tracepoint_entry {
struct hlist_node hlist;
void **funcs;
int refcount; /* Number of times armed. 0 if disarmed. */
- struct rcu_head rcu;
- void *oldptr;
- unsigned char rcu_pending:1;
char name[0];
};
-static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+struct tp_probes {
+ union {
+ struct rcu_head rcu;
+ struct list_head list;
+ } u;
+ void *probes[0];
+};
-static void free_old_closure(struct rcu_head *head)
+static inline void *allocate_probes(int count)
{
- struct tracepoint_entry *entry = container_of(head,
- struct tracepoint_entry, rcu);
- kfree(entry->oldptr);
- /* Make sure we free the data before setting the pending flag to 0 */
- smp_wmb();
- entry->rcu_pending = 0;
+ struct tp_probes *p = kmalloc(count * sizeof(void *)
+ + sizeof(struct tp_probes), GFP_KERNEL);
+ return p == NULL ? NULL : p->probes;
}
-static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+static void rcu_free_old_probes(struct rcu_head *head)
{
- if (!old)
- return;
- entry->oldptr = old;
- entry->rcu_pending = 1;
- /* write rcu_pending before calling the RCU callback */
- smp_wmb();
- call_rcu_sched(&entry->rcu, free_old_closure);
+ kfree(container_of(head, struct tp_probes, u.rcu));
+}
+
+static inline void release_probes(void *old)
+{
+ if (old) {
+ struct tp_probes *tp_probes = container_of(old,
+ struct tp_probes, probes[0]);
+ call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
+ }
}
static void debug_print_probes(struct tracepoint_entry *entry)
{
int i;
- if (!tracepoint_debug)
+ if (!tracepoint_debug || !entry->funcs)
return;
for (i = 0; entry->funcs[i]; i++)
@@ -111,12 +115,13 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
return ERR_PTR(-EEXIST);
}
/* + 2 : one for new probe, one for NULL func */
- new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+ new = allocate_probes(nr_probes + 2);
if (new == NULL)
return ERR_PTR(-ENOMEM);
if (old)
memcpy(new, old, nr_probes * sizeof(void *));
new[nr_probes] = probe;
+ new[nr_probes + 1] = NULL;
entry->refcount = nr_probes + 1;
entry->funcs = new;
debug_print_probes(entry);
@@ -131,6 +136,9 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
old = entry->funcs;
+ if (!old)
+ return ERR_PTR(-ENOENT);
+
debug_print_probes(entry);
/* (N -> M), (N > 1, M >= 0) probes */
for (nr_probes = 0; old[nr_probes]; nr_probes++) {
@@ -148,13 +156,13 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
int j = 0;
/* N -> M, (N > 1, M > 0) */
/* + 1 for NULL */
- new = kzalloc((nr_probes - nr_del + 1)
- * sizeof(void *), GFP_KERNEL);
+ new = allocate_probes(nr_probes - nr_del + 1);
if (new == NULL)
return ERR_PTR(-ENOMEM);
for (i = 0; old[i]; i++)
if ((probe && old[i] != probe))
new[j++] = old[i];
+ new[nr_probes - nr_del] = NULL;
entry->refcount = nr_probes - nr_del;
entry->funcs = new;
}
@@ -212,7 +220,6 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
memcpy(&e->name[0], name, name_len);
e->funcs = NULL;
e->refcount = 0;
- e->rcu_pending = 0;
hlist_add_head(&e->hlist, head);
return e;
}
@@ -221,32 +228,10 @@ static struct tracepoint_entry *add_tracepoint(const char *name)
* Remove the tracepoint from the tracepoint hash table. Must be called with
* mutex_lock held.
*/
-static int remove_tracepoint(const char *name)
+static inline void remove_tracepoint(struct tracepoint_entry *e)
{
- struct hlist_head *head;
- struct hlist_node *node;
- struct tracepoint_entry *e;
- int found = 0;
- size_t len = strlen(name) + 1;
- u32 hash = jhash(name, len-1, 0);
-
- head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
- hlist_for_each_entry(e, node, head, hlist) {
- if (!strcmp(name, e->name)) {
- found = 1;
- break;
- }
- }
- if (!found)
- return -ENOENT;
- if (e->refcount)
- return -EBUSY;
hlist_del(&e->hlist);
- /* Make sure the call_rcu_sched has been executed */
- if (e->rcu_pending)
- rcu_barrier_sched();
kfree(e);
- return 0;
}
/*
@@ -317,6 +302,23 @@ static void tracepoint_update_probes(void)
module_update_tracepoints();
}
+static void *tracepoint_add_probe(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ void *old;
+
+ entry = get_tracepoint(name);
+ if (!entry) {
+ entry = add_tracepoint(name);
+ if (IS_ERR(entry))
+ return entry;
+ }
+ old = tracepoint_entry_add_probe(entry, probe);
+ if (IS_ERR(old) && !entry->refcount)
+ remove_tracepoint(entry);
+ return old;
+}
+
/**
* tracepoint_probe_register - Connect a probe to a tracepoint
* @name: tracepoint name
@@ -327,44 +329,36 @@ static void tracepoint_update_probes(void)
*/
int tracepoint_probe_register(const char *name, void *probe)
{
- struct tracepoint_entry *entry;
- int ret = 0;
void *old;
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry) {
- entry = add_tracepoint(name);
- if (IS_ERR(entry)) {
- ret = PTR_ERR(entry);
- goto end;
- }
- }
- /*
- * If we detect that a call_rcu_sched is pending for this tracepoint,
- * make sure it's executed now.
- */
- if (entry->rcu_pending)
- rcu_barrier_sched();
- old = tracepoint_entry_add_probe(entry, probe);
- if (IS_ERR(old)) {
- ret = PTR_ERR(old);
- goto end;
- }
+ old = tracepoint_add_probe(name, probe);
mutex_unlock(&tracepoints_mutex);
+ if (IS_ERR(old))
+ return PTR_ERR(old);
+
tracepoint_update_probes(); /* may update entry */
- mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- WARN_ON(!entry);
- if (entry->rcu_pending)
- rcu_barrier_sched();
- tracepoint_entry_free_old(entry, old);
-end:
- mutex_unlock(&tracepoints_mutex);
- return ret;
+ release_probes(old);
+ return 0;
}
EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+static void *tracepoint_remove_probe(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ void *old;
+
+ entry = get_tracepoint(name);
+ if (!entry)
+ return ERR_PTR(-ENOENT);
+ old = tracepoint_entry_remove_probe(entry, probe);
+ if (IS_ERR(old))
+ return old;
+ if (!entry->refcount)
+ remove_tracepoint(entry);
+ return old;
+}
+
/**
* tracepoint_probe_unregister - Disconnect a probe from a tracepoint
* @name: tracepoint name
@@ -377,33 +371,104 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register);
*/
int tracepoint_probe_unregister(const char *name, void *probe)
{
- struct tracepoint_entry *entry;
void *old;
- int ret = -ENOENT;
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry)
- goto end;
- if (entry->rcu_pending)
- rcu_barrier_sched();
- old = tracepoint_entry_remove_probe(entry, probe);
+ old = tracepoint_remove_probe(name, probe);
mutex_unlock(&tracepoints_mutex);
+ if (IS_ERR(old))
+ return PTR_ERR(old);
+
tracepoint_update_probes(); /* may update entry */
+ release_probes(old);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+static LIST_HEAD(old_probes);
+static int need_update;
+
+static void tracepoint_add_old_probes(void *old)
+{
+ need_update = 1;
+ if (old) {
+ struct tp_probes *tp_probes = container_of(old,
+ struct tp_probes, probes[0]);
+ list_add(&tp_probes->u.list, &old_probes);
+ }
+}
+
+/**
+ * tracepoint_probe_register_noupdate - register a probe but not connect
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * caller must call tracepoint_probe_update_all()
+ */
+int tracepoint_probe_register_noupdate(const char *name, void *probe)
+{
+ void *old;
+
mutex_lock(&tracepoints_mutex);
- entry = get_tracepoint(name);
- if (!entry)
- goto end;
- if (entry->rcu_pending)
- rcu_barrier_sched();
- tracepoint_entry_free_old(entry, old);
- remove_tracepoint(name); /* Ignore busy error message */
- ret = 0;
-end:
+ old = tracepoint_add_probe(name, probe);
+ if (IS_ERR(old)) {
+ mutex_unlock(&tracepoints_mutex);
+ return PTR_ERR(old);
+ }
+ tracepoint_add_old_probes(old);
mutex_unlock(&tracepoints_mutex);
- return ret;
+ return 0;
}
-EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
+
+/**
+ * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * caller must call tracepoint_probe_update_all()
+ */
+int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
+{
+ void *old;
+
+ mutex_lock(&tracepoints_mutex);
+ old = tracepoint_remove_probe(name, probe);
+ if (IS_ERR(old)) {
+ mutex_unlock(&tracepoints_mutex);
+ return PTR_ERR(old);
+ }
+ tracepoint_add_old_probes(old);
+ mutex_unlock(&tracepoints_mutex);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
+
+/**
+ * tracepoint_probe_update_all - update tracepoints
+ */
+void tracepoint_probe_update_all(void)
+{
+ LIST_HEAD(release_probes);
+ struct tp_probes *pos, *next;
+
+ mutex_lock(&tracepoints_mutex);
+ if (!need_update) {
+ mutex_unlock(&tracepoints_mutex);
+ return;
+ }
+ if (!list_empty(&old_probes))
+ list_replace_init(&old_probes, &release_probes);
+ need_update = 0;
+ mutex_unlock(&tracepoints_mutex);
+
+ tracepoint_update_probes();
+ list_for_each_entry_safe(pos, next, &release_probes, u.list) {
+ list_del(&pos->u.list);
+ call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
+ }
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
/**
* tracepoint_get_iter_range - Get a next tracepoint iterator given a range.