aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/audit_tree.c91
-rw-r--r--kernel/auditfilter.c14
-rw-r--r--kernel/cgroup_freezer.c19
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/exit.c14
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/hrtimer.c26
-rw-r--r--kernel/kprobes.c23
-rw-r--r--kernel/sched.c15
-rw-r--r--kernel/sched_debug.c41
-rw-r--r--kernel/sched_fair.c17
-rw-r--r--kernel/softirq.c7
-rw-r--r--kernel/stop_machine.c5
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--kernel/workqueue.c45
18 files changed, 252 insertions, 108 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a3ec66a9d8..19fad003b19 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -11,8 +11,6 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o
-CFLAGS_REMOVE_sched.o = -mno-spe
-
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
@@ -21,7 +19,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
-CFLAGS_REMOVE_sched.o = -mno-spe -pg
+CFLAGS_REMOVE_sched.o = -pg
endif
obj-$(CONFIG_FREEZER) += freezer.o
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 8ba0e0d934f..8b509441f49 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -24,6 +24,7 @@ struct audit_chunk {
struct list_head trees; /* with root here */
int dead;
int count;
+ atomic_long_t refs;
struct rcu_head head;
struct node {
struct list_head list;
@@ -56,7 +57,8 @@ static LIST_HEAD(prune_list);
* tree is refcounted; one reference for "some rules on rules_list refer to
* it", one for each chunk with pointer to it.
*
- * chunk is refcounted by embedded inotify_watch.
+ * chunk is refcounted by embedded inotify_watch + .refs (non-zero refcount
+ * of watch contributes 1 to .refs).
*
* node.index allows to get from node.list to containing chunk.
* MSB of that sucker is stolen to mark taggings that we might have to
@@ -121,6 +123,7 @@ static struct audit_chunk *alloc_chunk(int count)
INIT_LIST_HEAD(&chunk->hash);
INIT_LIST_HEAD(&chunk->trees);
chunk->count = count;
+ atomic_long_set(&chunk->refs, 1);
for (i = 0; i < count; i++) {
INIT_LIST_HEAD(&chunk->owners[i].list);
chunk->owners[i].index = i;
@@ -129,9 +132,8 @@ static struct audit_chunk *alloc_chunk(int count)
return chunk;
}
-static void __free_chunk(struct rcu_head *rcu)
+static void free_chunk(struct audit_chunk *chunk)
{
- struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
int i;
for (i = 0; i < chunk->count; i++) {
@@ -141,14 +143,16 @@ static void __free_chunk(struct rcu_head *rcu)
kfree(chunk);
}
-static inline void free_chunk(struct audit_chunk *chunk)
+void audit_put_chunk(struct audit_chunk *chunk)
{
- call_rcu(&chunk->head, __free_chunk);
+ if (atomic_long_dec_and_test(&chunk->refs))
+ free_chunk(chunk);
}
-void audit_put_chunk(struct audit_chunk *chunk)
+static void __put_chunk(struct rcu_head *rcu)
{
- put_inotify_watch(&chunk->watch);
+ struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
+ audit_put_chunk(chunk);
}
enum {HASH_SIZE = 128};
@@ -176,7 +180,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
list_for_each_entry_rcu(p, list, hash) {
if (p->watch.inode == inode) {
- get_inotify_watch(&p->watch);
+ atomic_long_inc(&p->refs);
return p;
}
}
@@ -194,17 +198,49 @@ int audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
/* tagging and untagging inodes with trees */
-static void untag_chunk(struct audit_chunk *chunk, struct node *p)
+static struct audit_chunk *find_chunk(struct node *p)
+{
+ int index = p->index & ~(1U<<31);
+ p -= index;
+ return container_of(p, struct audit_chunk, owners[0]);
+}
+
+static void untag_chunk(struct node *p)
{
+ struct audit_chunk *chunk = find_chunk(p);
struct audit_chunk *new;
struct audit_tree *owner;
int size = chunk->count - 1;
int i, j;
+ if (!pin_inotify_watch(&chunk->watch)) {
+ /*
+ * Filesystem is shutting down; all watches are getting
+ * evicted, just take it off the node list for this
+ * tree and let the eviction logics take care of the
+ * rest.
+ */
+ owner = p->owner;
+ if (owner->root == chunk) {
+ list_del_init(&owner->same_root);
+ owner->root = NULL;
+ }
+ list_del_init(&p->list);
+ p->owner = NULL;
+ put_tree(owner);
+ return;
+ }
+
+ spin_unlock(&hash_lock);
+
+ /*
+ * pin_inotify_watch() succeeded, so the watch won't go away
+ * from under us.
+ */
mutex_lock(&chunk->watch.inode->inotify_mutex);
if (chunk->dead) {
mutex_unlock(&chunk->watch.inode->inotify_mutex);
- return;
+ goto out;
}
owner = p->owner;
@@ -221,7 +257,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
inotify_evict_watch(&chunk->watch);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
put_inotify_watch(&chunk->watch);
- return;
+ goto out;
}
new = alloc_chunk(size);
@@ -263,7 +299,7 @@ static void untag_chunk(struct audit_chunk *chunk, struct node *p)
inotify_evict_watch(&chunk->watch);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
put_inotify_watch(&chunk->watch);
- return;
+ goto out;
Fallback:
// do the best we can
@@ -277,6 +313,9 @@ Fallback:
put_tree(owner);
spin_unlock(&hash_lock);
mutex_unlock(&chunk->watch.inode->inotify_mutex);
+out:
+ unpin_inotify_watch(&chunk->watch);
+ spin_lock(&hash_lock);
}
static int create_chunk(struct inode *inode, struct audit_tree *tree)
@@ -387,13 +426,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
return 0;
}
-static struct audit_chunk *find_chunk(struct node *p)
-{
- int index = p->index & ~(1U<<31);
- p -= index;
- return container_of(p, struct audit_chunk, owners[0]);
-}
-
static void kill_rules(struct audit_tree *tree)
{
struct audit_krule *rule, *next;
@@ -431,17 +463,10 @@ static void prune_one(struct audit_tree *victim)
spin_lock(&hash_lock);
while (!list_empty(&victim->chunks)) {
struct node *p;
- struct audit_chunk *chunk;
p = list_entry(victim->chunks.next, struct node, list);
- chunk = find_chunk(p);
- get_inotify_watch(&chunk->watch);
- spin_unlock(&hash_lock);
-
- untag_chunk(chunk, p);
- put_inotify_watch(&chunk->watch);
- spin_lock(&hash_lock);
+ untag_chunk(p);
}
spin_unlock(&hash_lock);
put_tree(victim);
@@ -469,7 +494,6 @@ static void trim_marked(struct audit_tree *tree)
while (!list_empty(&tree->chunks)) {
struct node *node;
- struct audit_chunk *chunk;
node = list_entry(tree->chunks.next, struct node, list);
@@ -477,14 +501,7 @@ static void trim_marked(struct audit_tree *tree)
if (!(node->index & (1U<<31)))
break;
- chunk = find_chunk(node);
- get_inotify_watch(&chunk->watch);
- spin_unlock(&hash_lock);
-
- untag_chunk(chunk, node);
-
- put_inotify_watch(&chunk->watch);
- spin_lock(&hash_lock);
+ untag_chunk(node);
}
if (!tree->root && !tree->goner) {
tree->goner = 1;
@@ -878,7 +895,7 @@ static void handle_event(struct inotify_watch *watch, u32 wd, u32 mask,
static void destroy_watch(struct inotify_watch *watch)
{
struct audit_chunk *chunk = container_of(watch, struct audit_chunk, watch);
- free_chunk(chunk);
+ call_rcu(&chunk->head, __put_chunk);
}
static const struct inotify_operations rtree_inotify_ops = {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b7d354e2b0e..9fd85a4640a 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1094,8 +1094,8 @@ static void audit_inotify_unregister(struct list_head *in_list)
list_for_each_entry_safe(p, n, in_list, ilist) {
list_del(&p->ilist);
inotify_rm_watch(audit_ih, &p->wdata);
- /* the put matching the get in audit_do_del_rule() */
- put_inotify_watch(&p->wdata);
+ /* the unpin matching the pin in audit_do_del_rule() */
+ unpin_inotify_watch(&p->wdata);
}
}
@@ -1389,9 +1389,13 @@ static inline int audit_del_rule(struct audit_entry *entry,
/* Put parent on the inotify un-registration
* list. Grab a reference before releasing
* audit_filter_mutex, to be released in
- * audit_inotify_unregister(). */
- list_add(&parent->ilist, &inotify_list);
- get_inotify_watch(&parent->wdata);
+ * audit_inotify_unregister().
+ * If filesystem is going away, just leave
+ * the sucker alone, eviction will take
+ * care of it.
+ */
+ if (pin_inotify_watch(&parent->wdata))
+ list_add(&parent->ilist, &inotify_list);
}
}
}
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 7fa476f01d0..fb249e2bcad 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -184,9 +184,20 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
{
struct freezer *freezer;
- task_lock(task);
+ /*
+ * No lock is needed, since the task isn't on tasklist yet,
+ * so it can't be moved to another cgroup, which means the
+ * freezer won't be removed and will be valid during this
+ * function call.
+ */
freezer = task_freezer(task);
- task_unlock(task);
+
+ /*
+ * The root cgroup is non-freezable, so we can skip the
+ * following check.
+ */
+ if (!freezer->css.cgroup->parent)
+ return;
spin_lock_irq(&freezer->lock);
BUG_ON(freezer->state == CGROUP_FROZEN);
@@ -331,7 +342,7 @@ static int freezer_write(struct cgroup *cgroup,
else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
goal_state = CGROUP_FROZEN;
else
- return -EIO;
+ return -EINVAL;
if (!cgroup_lock_live_group(cgroup))
return -ENODEV;
@@ -350,6 +361,8 @@ static struct cftype files[] = {
static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
{
+ if (!cgroup->parent)
+ return 0;
return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 86d49045dae..5a732c5ef08 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
#endif
};
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
+
+const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
+EXPORT_SYMBOL(cpu_all_bits);
diff --git a/kernel/exit.c b/kernel/exit.c
index 80137a5d946..2d8be7ebb0f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -40,7 +40,6 @@
#include <linux/cn_proc.h>
#include <linux/mutex.h>
#include <linux/futex.h>
-#include <linux/compat.h>
#include <linux/pipe_fs_i.h>
#include <linux/audit.h> /* for audit_free() */
#include <linux/resource.h>
@@ -141,6 +140,11 @@ static void __exit_signal(struct task_struct *tsk)
if (sig) {
flush_sigqueue(&sig->shared_pending);
taskstats_tgid_free(sig);
+ /*
+ * Make sure ->signal can't go away under rq->lock,
+ * see account_group_exec_runtime().
+ */
+ task_rq_unlock_wait(tsk);
__cleanup_signal(sig);
}
}
@@ -1054,14 +1058,6 @@ NORET_TYPE void do_exit(long code)
exit_itimers(tsk->signal);
}
acct_collect(code, group_dead);
-#ifdef CONFIG_FUTEX
- if (unlikely(tsk->robust_list))
- exit_robust_list(tsk);
-#ifdef CONFIG_COMPAT
- if (unlikely(tsk->compat_robust_list))
- compat_exit_robust_list(tsk);
-#endif
-#endif
if (group_dead)
tty_audit_exit();
if (unlikely(tsk->audit_context))
diff --git a/kernel/fork.c b/kernel/fork.c
index f6083561dfe..2a372a0e206 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -40,6 +40,7 @@
#include <linux/jiffies.h>
#include <linux/tracehook.h>
#include <linux/futex.h>
+#include <linux/compat.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/rcupdate.h>
#include <linux/ptrace.h>
@@ -519,6 +520,16 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
{
struct completion *vfork_done = tsk->vfork_done;
+ /* Get rid of any futexes when releasing the mm */
+#ifdef CONFIG_FUTEX
+ if (unlikely(tsk->robust_list))
+ exit_robust_list(tsk);
+#ifdef CONFIG_COMPAT
+ if (unlikely(tsk->compat_robust_list))
+ compat_exit_robust_list(tsk);
+#endif
+#endif
+
/* Get rid of any cached register state */
deactivate_mm(tsk, mm);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b465dfde42..47e63349d1b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -664,14 +664,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
/* Timer is expired, act upon the callback mode */
switch(timer->cb_mode) {
- case HRTIMER_CB_IRQSAFE_NO_RESTART:
- debug_hrtimer_deactivate(timer);
- /*
- * We can call the callback from here. No restart
- * happens, so no danger of recursion
- */
- BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
- return 1;
case HRTIMER_CB_IRQSAFE_PERCPU:
case HRTIMER_CB_IRQSAFE_UNLOCKED:
/*
@@ -683,7 +675,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
*/
debug_hrtimer_deactivate(timer);
return 1;
- case HRTIMER_CB_IRQSAFE:
case HRTIMER_CB_SOFTIRQ:
/*
* Move everything else into the softirq pending list !
@@ -1209,6 +1200,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
enum hrtimer_restart (*fn)(struct hrtimer *);
struct hrtimer *timer;
int restart;
+ int emulate_hardirq_ctx = 0;
timer = list_entry(cpu_base->cb_pending.next,
struct hrtimer, cb_entry);
@@ -1217,10 +1209,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
timer_stats_account_hrtimer(timer);
fn = timer->function;
+ /*
+ * A timer might have been added to the cb_pending list
+ * when it was migrated during a cpu-offline operation.
+ * Emulate hardirq context for such timers.
+ */
+ if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
+ timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
+ emulate_hardirq_ctx = 1;
+
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
spin_unlock_irq(&cpu_base->lock);
- restart = fn(timer);
+ if (unlikely(emulate_hardirq_ctx)) {
+ local_irq_disable();
+ restart = fn(timer);
+ local_irq_enable();
+ } else
+ restart = fn(timer);
spin_lock_irq(&cpu_base->lock);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8b57a2597f2..9f8a3f25259 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -72,7 +72,7 @@ static bool kprobe_enabled;
DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
static struct {
- spinlock_t lock ____cacheline_aligned;
+ spinlock_t lock ____cacheline_aligned_in_smp;
} kretprobe_table_locks[KPROBE_TABLE_SIZE];
static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
@@ -613,30 +613,37 @@ static int __kprobes __register_kprobe(struct kprobe *p,
return -EINVAL;
p->addr = addr;
- if (!kernel_text_address((unsigned long) p->addr) ||
- in_kprobes_functions((unsigned long) p->addr))
+ preempt_disable();
+ if (!__kernel_text_address((unsigned long) p->addr) ||
+ in_kprobes_functions((unsigned long) p->addr)) {
+ preempt_enable();
return -EINVAL;
+ }
p->mod_refcounted = 0;
/*
* Check if are we probing a module.
*/
- probed_mod = module_text_address((unsigned long) p->addr);
+ probed_mod = __module_text_address((unsigned long) p->addr);
if (probed_mod) {
- struct module *calling_mod = module_text_address(called_from);
+ struct module *calling_mod;
+ calling_mod = __module_text_address(called_from);
/*
* We must allow modules to probe themself and in this case
* avoid incrementing the module refcount, so as to allow
* unloading of self probing modules.
*/
if (calling_mod && calling_mod != probed_mod) {
- if (unlikely(!try_module_get(probed_mod)))
+ if (unlikely(!try_module_get(probed_mod))) {
+ preempt_enable();
return -EINVAL;
+ }
p->mod_refcounted = 1;
} else
probed_mod = NULL;
}
+ preempt_enable();
p->nmissed = 0;
INIT_LIST_HEAD(&p->list);
@@ -718,6 +725,10 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
struct kprobe *old_p;
if (p->mod_refcounted) {
+ /*
+ * Since we've already incremented refcount,
+ * we don't need to disable preemption.
+ */
mod = module_text_address((unsigned long)p->addr);
if (mod)
module_put(mod);
diff --git a/kernel/sched.c b/kernel/sched.c
index 57c933ffbee..c94baf2969e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -399,7 +399,7 @@ struct cfs_rq {
*/
struct sched_entity *curr, *next, *last;
- unsigned long nr_spread_over;
+ unsigned int nr_spread_over;
#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
@@ -969,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
}
}
+void task_rq_unlock_wait(struct task_struct *p)
+{
+ struct rq *rq = task_rq(p);
+
+ smp_mb(); /* spin-unlock-wait is not a full memory barrier */
+ spin_unlock_wait(&rq->lock);
+}
+
static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
@@ -1448,6 +1456,8 @@ static unsigned long cpu_avg_load_per_task(int cpu)
if (rq->nr_running)
rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+ else
+ rq->avg_load_per_task = 0;
return rq->avg_load_per_task;
}
@@ -5860,6 +5870,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
+ spin_lock_irqsave(&rq->lock, flags);
+
__sched_fork(idle);
idle->se.exec_start = sched_clock();
@@ -5867,7 +5879,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
idle->cpus_allowed = cpumask_of_cpu(cpu);
__set_task_cpu(idle, cpu);
- spin_lock_irqsave(&rq->lock, flags);
rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
idle->oncpu = 1;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5ae17762ec3..48ecc51e770 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -144,7 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
last = __pick_last_entity(cfs_rq);
if (last)
max_vruntime = last->vruntime;
- min_vruntime = rq->cfs.min_vruntime;
+ min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
spin_unlock_irqrestore(&rq->lock, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
@@ -161,26 +161,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
-#ifdef CONFIG_SCHEDSTATS
-#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
-
- P(yld_exp_empty);
- P(yld_act_empty);
- P(yld_both_empty);
- P(yld_count);
- P(sched_switch);
- P(sched_count);
- P(sched_goidle);
-
- P(ttwu_count);
- P(ttwu_local);
-
- P(bkl_count);
-
-#undef P
-#endif
- SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
+ SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
@@ -260,6 +242,25 @@ static void print_cpu(struct seq_file *m, int cpu)
#undef P
#undef PN
+#ifdef CONFIG_SCHEDSTATS
+#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
+
+ P(yld_exp_empty);
+ P(yld_act_empty);
+ P(yld_both_empty);
+ P(yld_count);
+
+ P(sched_switch);
+ P(sched_count);
+ P(sched_goidle);
+
+ P(ttwu_count);
+ P(ttwu_local);
+
+ P(bkl_count);
+
+#undef P
+#endif
print_cfs_stats(m, cpu);
print_rt_stats(m, cpu);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 51aa3e102ac..98345e45b05 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -716,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
__enqueue_entity(cfs_rq, se);
}
+static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+ if (cfs_rq->last == se)
+ cfs_rq->last = NULL;
+
+ if (cfs_rq->next == se)
+ cfs_rq->next = NULL;
+}
+
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
{
@@ -738,11 +747,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
#endif
}
- if (cfs_rq->last == se)
- cfs_rq->last = NULL;
-
- if (cfs_rq->next == se)
- cfs_rq->next = NULL;
+ clear_buddies(cfs_rq, se);
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
@@ -977,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
if (unlikely(cfs_rq->nr_running == 1))
return;
+ clear_buddies(cfs_rq, se);
+
if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
update_rq_clock(rq);
/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7110daeb9a9..e7c69a720d6 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -269,10 +269,11 @@ void irq_enter(void)
{
int cpu = smp_processor_id();
- if (idle_cpu(cpu) && !in_interrupt())
+ if (idle_cpu(cpu) && !in_interrupt()) {
+ __irq_enter();
tick_check_idle(cpu);
-
- __irq_enter();
+ } else
+ __irq_enter();
}
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 9bc4c00872c..24e8ceacc38 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -112,7 +112,7 @@ static int chill(void *unused)
int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
{
struct work_struct *sm_work;
- int i;
+ int i, ret;
/* Set up initial state. */
mutex_lock(&lock);
@@ -137,8 +137,9 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
/* This will release the thread on our CPU. */
put_cpu();
flush_workqueue(stop_machine_wq);
+ ret = active.fnret;
mutex_unlock(&lock);
- return active.fnret;
+ return ret;
}
int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5bbb1044f84..342fc9ccab4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void)
*/
static void tick_nohz_kick_tick(int cpu)
{
+#if 0
+ /* Switch back to 2.6.27 behaviour */
+
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t delta, now;
@@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu)
return;
tick_nohz_restart(ts, now);
+#endif
}
#else
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3f338063864..2f76193c348 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1060,7 +1060,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
/* Did the write stamp get updated already? */
if (unlikely(ts < cpu_buffer->write_stamp))
- goto again;
+ delta = 0;
if (test_time_stamp(delta)) {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9f3b478f917..697eda36b86 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1755,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
return TRACE_TYPE_HANDLED;
SEQ_PUT_FIELD_RET(s, entry->pid);
- SEQ_PUT_FIELD_RET(s, iter->cpu);
+ SEQ_PUT_FIELD_RET(s, entry->cpu);
SEQ_PUT_FIELD_RET(s, iter->ts);
switch (entry->type) {
@@ -2676,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
{
unsigned long val;
char buf[64];
- int ret;
+ int ret, cpu;
struct trace_array *tr = filp->private_data;
if (cnt >= sizeof(buf))
@@ -2704,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
goto out;
}
+ /* disable all cpu buffers */
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_inc(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_inc(&max_tr.data[cpu]->disabled);
+ }
+
if (val != global_trace.entries) {
ret = ring_buffer_resize(global_trace.buffer, val);
if (ret < 0) {
@@ -2735,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
if (tracing_disabled)
cnt = -ENOMEM;
out:
+ for_each_tracing_cpu(cpu) {
+ if (global_trace.data[cpu])
+ atomic_dec(&global_trace.data[cpu]->disabled);
+ if (max_tr.data[cpu])
+ atomic_dec(&max_tr.data[cpu]->disabled);
+ }
+
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f928f2a87b9..d4dc69ddebd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -970,6 +970,51 @@ undo:
return ret;
}
+#ifdef CONFIG_SMP
+struct work_for_cpu {
+ struct work_struct work;
+ long (*fn)(void *);
+ void *arg;
+ long ret;
+};
+
+static void do_work_for_cpu(struct work_struct *w)
+{
+ struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
+
+ wfc->ret = wfc->fn(wfc->arg);
+}
+
+/**
+ * work_on_cpu - run a function in user context on a particular cpu
+ * @cpu: the cpu to run on
+ * @fn: the function to run
+ * @arg: the function arg
+ *
+ * This will return -EINVAL in the cpu is not online, or the return value
+ * of @fn otherwise.
+ */
+long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+{
+ struct work_for_cpu wfc;
+
+ INIT_WORK(&wfc.work, do_work_for_cpu);
+ wfc.fn = fn;
+ wfc.arg = arg;
+ get_online_cpus();
+ if (unlikely(!cpu_online(cpu)))
+ wfc.ret = -EINVAL;
+ else {
+ schedule_work_on(cpu, &wfc.work);
+ flush_work(&wfc.work);
+ }
+ put_online_cpus();
+
+ return wfc.ret;
+}
+EXPORT_SYMBOL_GPL(work_on_cpu);
+#endif /* CONFIG_SMP */
+
void __init init_workqueues(void)
{
cpu_populated_map = cpu_online_map;