aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c13
-rw-r--r--kernel/auditfilter.c4
-rw-r--r--kernel/cgroup.c3
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/futex.c24
-rw-r--r--kernel/irq/Makefile2
-rw-r--r--kernel/irq/chip.c12
-rw-r--r--kernel/irq/handle.c60
-rw-r--r--kernel/irq/internals.h5
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/irq/migration.c14
-rw-r--r--kernel/irq/numa_migrate.c38
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kgdb.c4
-rw-r--r--kernel/kmod.c4
-rw-r--r--kernel/kprobes.c31
-rw-r--r--kernel/lockdep_internals.h4
-rw-r--r--kernel/mutex.c4
-rw-r--r--kernel/panic.c40
-rw-r--r--kernel/posix-cpu-timers.c8
-rw-r--r--kernel/power/disk.c25
-rw-r--r--kernel/power/main.c7
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/sched.c304
-rw-r--r--kernel/sched_clock.c3
-rw-r--r--kernel/sched_cpupri.c2
-rw-r--r--kernel/sched_fair.c13
-rw-r--r--kernel/sched_idletask.c3
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/sysctl.c36
-rw-r--r--kernel/time/tick-common.c12
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/timer.c86
-rw-r--r--kernel/trace/trace.c3
-rw-r--r--kernel/wait.c2
37 files changed, 476 insertions, 321 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 968ef9457d4..27235f5de19 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -92,19 +92,18 @@ extern int initcall_debug;
static async_cookie_t __lowest_in_progress(struct list_head *running)
{
struct async_entry *entry;
+
if (!list_empty(running)) {
entry = list_first_entry(running,
struct async_entry, list);
return entry->cookie;
- } else if (!list_empty(&async_pending)) {
- entry = list_first_entry(&async_pending,
- struct async_entry, list);
- return entry->cookie;
- } else {
- /* nothing in progress... next_cookie is "infinity" */
- return next_cookie;
}
+ list_for_each_entry(entry, &async_pending, list)
+ if (entry->running == running)
+ return entry->cookie;
+
+ return next_cookie; /* "infinity" value */
}
static async_cookie_t lowest_in_progress(struct list_head *running)
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a6fe71fd5d1..713098ee5a0 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1028,7 +1028,7 @@ static void audit_update_watch(struct audit_parent *parent,
if (audit_enabled) {
struct audit_buffer *ab;
- ab = audit_log_start(NULL, GFP_KERNEL,
+ ab = audit_log_start(NULL, GFP_NOFS,
AUDIT_CONFIG_CHANGE);
audit_log_format(ab, "auid=%u ses=%u",
audit_get_loginuid(current),
@@ -1067,7 +1067,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
e = container_of(r, struct audit_entry, rule);
if (audit_enabled) {
struct audit_buffer *ab;
- ab = audit_log_start(NULL, GFP_KERNEL,
+ ab = audit_log_start(NULL, GFP_NOFS,
AUDIT_CONFIG_CHANGE);
audit_log_format(ab, "auid=%u ses=%u",
audit_get_loginuid(current),
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 382109b5bae..a7267bfd376 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1133,8 +1133,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
free_cg_links:
free_cg_links(&tmp_cg_links);
drop_new_super:
- up_write(&sb->s_umount);
- deactivate_super(sb);
+ deactivate_locked_super(sb);
return ret;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd0072..875ffbdd96d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1409,7 +1409,7 @@ long do_fork(unsigned long clone_flags,
}
audit_finish_fork(p);
- tracehook_report_clone(trace, regs, clone_flags, nr, p);
+ tracehook_report_clone(regs, clone_flags, nr, p);
/*
* We set PF_STARTING at creation in case tracing wants to
diff --git a/kernel/futex.c b/kernel/futex.c
index eef8cd26b5e..d546b2d53a6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -193,6 +193,7 @@ static void drop_futex_key_refs(union futex_key *key)
* @uaddr: virtual address of the futex
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
* @key: address where result is stored.
+ * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE)
*
* Returns a negative error code or 0
* The key words are stored in *key on success.
@@ -203,7 +204,8 @@ static void drop_futex_key_refs(union futex_key *key)
*
* lock_page() might sleep, the caller should not hold a spinlock.
*/
-static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
+static int
+get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
@@ -226,7 +228,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
* but access_ok() should be faster than find_vma()
*/
if (!fshared) {
- if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
+ if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
return -EFAULT;
key->private.mm = mm;
key->private.address = address;
@@ -235,7 +237,7 @@ static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
}
again:
- err = get_user_pages_fast(address, 1, 0, &page);
+ err = get_user_pages_fast(address, 1, rw == VERIFY_WRITE, &page);
if (err < 0)
return err;
@@ -677,7 +679,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
if (!bitset)
return -EINVAL;
- ret = get_futex_key(uaddr, fshared, &key);
+ ret = get_futex_key(uaddr, fshared, &key, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
@@ -723,10 +725,10 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
int ret, op_ret;
retry:
- ret = get_futex_key(uaddr1, fshared, &key1);
+ ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, fshared, &key2);
+ ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -814,10 +816,10 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
int ret, drop_count = 0;
retry:
- ret = get_futex_key(uaddr1, fshared, &key1);
+ ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, fshared, &key2);
+ ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_READ);
if (unlikely(ret != 0))
goto out_put_key1;
@@ -1140,7 +1142,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
q.bitset = bitset;
retry:
q.key = FUTEX_KEY_INIT;
- ret = get_futex_key(uaddr, fshared, &q.key);
+ ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
@@ -1330,7 +1332,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
q.pi_state = NULL;
retry:
q.key = FUTEX_KEY_INIT;
- ret = get_futex_key(uaddr, fshared, &q.key);
+ ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
@@ -1594,7 +1596,7 @@ retry:
if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
return -EPERM;
- ret = get_futex_key(uaddr, fshared, &key);
+ ret = get_futex_key(uaddr, fshared, &key, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 3394f8f5296..7d047808419 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
-obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
+obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o
obj-$(CONFIG_PM_SLEEP) += pm.o
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c687ba4363f..13c68e71b72 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,7 +359,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
spin_lock(&desc->lock);
mask_ack_irq(desc, irq);
- desc = irq_remap_to_desc(irq, desc);
if (unlikely(desc->status & IRQ_INPROGRESS))
goto out_unlock;
@@ -438,7 +437,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
desc->status &= ~IRQ_INPROGRESS;
out:
desc->chip->eoi(irq);
- desc = irq_remap_to_desc(irq, desc);
spin_unlock(&desc->lock);
}
@@ -475,7 +473,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
!desc->action)) {
desc->status |= (IRQ_PENDING | IRQ_MASKED);
mask_ack_irq(desc, irq);
- desc = irq_remap_to_desc(irq, desc);
goto out_unlock;
}
kstat_incr_irqs_this_cpu(irq, desc);
@@ -483,7 +480,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
/* Start handling the irq */
if (desc->chip->ack)
desc->chip->ack(irq);
- desc = irq_remap_to_desc(irq, desc);
/* Mark the IRQ currently in progress.*/
desc->status |= IRQ_INPROGRESS;
@@ -544,10 +540,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
- if (desc->chip->eoi) {
+ if (desc->chip->eoi)
desc->chip->eoi(irq);
- desc = irq_remap_to_desc(irq, desc);
- }
}
void
@@ -582,10 +576,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
/* Uninstall? */
if (handle == handle_bad_irq) {
- if (desc->chip != &no_irq_chip) {
+ if (desc->chip != &no_irq_chip)
mask_ack_irq(desc, irq);
- desc = irq_remap_to_desc(irq, desc);
- }
desc->status |= IRQ_DISABLED;
desc->depth = 1;
}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index d82142be8dd..18041a254d3 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -11,6 +11,7 @@
*/
#include <linux/irq.h>
+#include <linux/slab.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
@@ -81,45 +82,48 @@ static struct irq_desc irq_desc_init = {
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
};
-void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
{
- int node;
void *ptr;
- node = cpu_to_node(cpu);
- ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
+ if (slab_is_available())
+ ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
+ GFP_ATOMIC, node);
+ else
+ ptr = alloc_bootmem_node(NODE_DATA(node),
+ nr * sizeof(*desc->kstat_irqs));
/*
* don't overwite if can not get new one
* init_copy_kstat_irqs() could still use old one
*/
if (ptr) {
- printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n",
- cpu, node);
+ printk(KERN_DEBUG " alloc kstat_irqs on node %d\n", node);
desc->kstat_irqs = ptr;
}
}
-static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
{
memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
spin_lock_init(&desc->lock);
desc->irq = irq;
#ifdef CONFIG_SMP
- desc->cpu = cpu;
+ desc->node = node;
#endif
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
- init_kstat_irqs(desc, cpu, nr_cpu_ids);
+ init_kstat_irqs(desc, node, nr_cpu_ids);
if (!desc->kstat_irqs) {
printk(KERN_ERR "can not alloc kstat_irqs\n");
BUG_ON(1);
}
- if (!init_alloc_desc_masks(desc, cpu, false)) {
+ if (!alloc_desc_masks(desc, node, false)) {
printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
BUG_ON(1);
}
- arch_init_chip_data(desc, cpu);
+ init_desc_masks(desc);
+ arch_init_chip_data(desc, node);
}
/*
@@ -169,7 +173,8 @@ int __init early_irq_init(void)
desc[i].irq = i;
desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
- init_alloc_desc_masks(&desc[i], 0, true);
+ alloc_desc_masks(&desc[i], 0, true);
+ init_desc_masks(&desc[i]);
irq_desc_ptrs[i] = desc + i;
}
@@ -187,11 +192,10 @@ struct irq_desc *irq_to_desc(unsigned int irq)
return NULL;
}
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
{
struct irq_desc *desc;
unsigned long flags;
- int node;
if (irq >= nr_irqs) {
WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
@@ -210,15 +214,17 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
if (desc)
goto out_unlock;
- node = cpu_to_node(cpu);
- desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
- printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n",
- irq, cpu, node);
+ if (slab_is_available())
+ desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+ else
+ desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
+
+ printk(KERN_DEBUG " alloc irq_desc for %d on node %d\n", irq, node);
if (!desc) {
printk(KERN_ERR "can not alloc irq_desc\n");
BUG_ON(1);
}
- init_one_irq_desc(irq, desc, cpu);
+ init_one_irq_desc(irq, desc, node);
irq_desc_ptrs[irq] = desc;
@@ -256,7 +262,8 @@ int __init early_irq_init(void)
for (i = 0; i < count; i++) {
desc[i].irq = i;
- init_alloc_desc_masks(&desc[i], 0, true);
+ alloc_desc_masks(&desc[i], 0, true);
+ init_desc_masks(&desc[i]);
desc[i].kstat_irqs = kstat_irqs_all[i];
}
return arch_early_irq_init();
@@ -267,7 +274,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
{
return irq_to_desc(irq);
}
@@ -363,8 +370,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
- WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!");
-
if (!(action->flags & IRQF_DISABLED))
local_irq_enable_in_hardirq();
@@ -455,11 +460,8 @@ unsigned int __do_IRQ(unsigned int irq)
/*
* No locking required for CPU-local interrupts:
*/
- if (desc->chip->ack) {
+ if (desc->chip->ack)
desc->chip->ack(irq);
- /* get new one */
- desc = irq_remap_to_desc(irq, desc);
- }
if (likely(!(desc->status & IRQ_DISABLED))) {
action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
@@ -470,10 +472,8 @@ unsigned int __do_IRQ(unsigned int irq)
}
spin_lock(&desc->lock);
- if (desc->chip->ack) {
+ if (desc->chip->ack)
desc->chip->ack(irq);
- desc = irq_remap_to_desc(irq, desc);
- }
/*
* REPLAY is when Linux resends an IRQ that was dropped earlier
* WAITING is used by probe to mark irqs that are being tested
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 01ce20eab38..73468253143 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -16,7 +16,7 @@ extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
extern struct lock_class_key irq_desc_lock_class;
-extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
+extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
extern void clear_kstat_irqs(struct irq_desc *desc);
extern spinlock_t sparse_irq_lock;
@@ -42,6 +42,9 @@ static inline void unregister_handler_proc(unsigned int irq,
extern int irq_select_affinity_usr(unsigned int irq);
+extern void
+irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
+
/*
* Debugging printout:
*/
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 2734eca5924..aaf5c9d0577 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -80,7 +80,7 @@ int irq_can_set_affinity(unsigned int irq)
return 1;
}
-static void
+void
irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
{
struct irqaction *action = desc->action;
@@ -109,17 +109,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
spin_lock_irqsave(&desc->lock, flags);
#ifdef CONFIG_GENERIC_PENDING_IRQ
- if (desc->status & IRQ_MOVE_PCNTXT)
- desc->chip->set_affinity(irq, cpumask);
+ if (desc->status & IRQ_MOVE_PCNTXT) {
+ if (!desc->chip->set_affinity(irq, cpumask)) {
+ cpumask_copy(desc->affinity, cpumask);
+ irq_set_thread_affinity(desc, cpumask);
+ }
+ }
else {
desc->status |= IRQ_MOVE_PENDING;
cpumask_copy(desc->pending_mask, cpumask);
}
#else
- cpumask_copy(desc->affinity, cpumask);
- desc->chip->set_affinity(irq, cpumask);
+ if (!desc->chip->set_affinity(irq, cpumask)) {
+ cpumask_copy(desc->affinity, cpumask);
+ irq_set_thread_affinity(desc, cpumask);
+ }
#endif
- irq_set_thread_affinity(desc, cpumask);
desc->status |= IRQ_AFFINITY_SET;
spin_unlock_irqrestore(&desc->lock, flags);
return 0;
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index e05ad9be43b..cfe767ca154 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -1,5 +1,8 @@
#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
void move_masked_irq(int irq)
{
@@ -39,11 +42,12 @@ void move_masked_irq(int irq)
* masking the irqs.
*/
if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
- < nr_cpu_ids)) {
- cpumask_and(desc->affinity,
- desc->pending_mask, cpu_online_mask);
- desc->chip->set_affinity(irq, desc->affinity);
- }
+ < nr_cpu_ids))
+ if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
+ cpumask_copy(desc->affinity, desc->pending_mask);
+ irq_set_thread_affinity(desc, desc->pending_mask);
+ }
+
cpumask_clear(desc->pending_mask);
}
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 44bbdcbaf8d..2f69bee57bf 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -15,9 +15,9 @@
static void init_copy_kstat_irqs(struct irq_desc *old_desc,
struct irq_desc *desc,
- int cpu, int nr)
+ int node, int nr)
{
- init_kstat_irqs(desc, cpu, nr);
+ init_kstat_irqs(desc, node, nr);
if (desc->kstat_irqs != old_desc->kstat_irqs)
memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
@@ -34,20 +34,20 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
}
static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
- struct irq_desc *desc, int cpu)
+ struct irq_desc *desc, int node)
{
memcpy(desc, old_desc, sizeof(struct irq_desc));
- if (!init_alloc_desc_masks(desc, cpu, false)) {
+ if (!alloc_desc_masks(desc, node, false)) {
printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
"for migration.\n", irq);
return false;
}
spin_lock_init(&desc->lock);
- desc->cpu = cpu;
+ desc->node = node;
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
- init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+ init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
init_copy_desc_masks(old_desc, desc);
- arch_init_copy_chip_data(old_desc, desc, cpu);
+ arch_init_copy_chip_data(old_desc, desc, node);
return true;
}
@@ -59,12 +59,11 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
}
static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
- int cpu)
+ int node)
{
struct irq_desc *desc;
unsigned int irq;
unsigned long flags;
- int node;
irq = old_desc->irq;
@@ -76,7 +75,6 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
if (desc && old_desc != desc)
goto out_unlock;
- node = cpu_to_node(cpu);
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
if (!desc) {
printk(KERN_ERR "irq %d: can not get new irq_desc "
@@ -85,7 +83,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
desc = old_desc;
goto out_unlock;
}
- if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
+ if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) {
/* still use old one */
kfree(desc);
desc = old_desc;
@@ -97,9 +95,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
/* free the old one */
free_one_irq_desc(old_desc, desc);
- spin_unlock(&old_desc->lock);
kfree(old_desc);
- spin_lock(&desc->lock);
return desc;
@@ -109,24 +105,14 @@ out_unlock:
return desc;
}
-struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
{
- int old_cpu;
- int node, old_node;
-
/* those all static, do move them */
if (desc->irq < NR_IRQS_LEGACY)
return desc;
- old_cpu = desc->cpu;
- if (old_cpu != cpu) {
- node = cpu_to_node(cpu);
- old_node = cpu_to_node(old_cpu);
- if (old_node != node)
- desc = __real_move_irq_desc(desc, cpu);
- else
- desc->cpu = cpu;
- }
+ if (desc->node != node)
+ desc = __real_move_irq_desc(desc, node);
return desc;
}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 5a758c6e495..e4983770913 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1451,7 +1451,6 @@ int kernel_kexec(void)
error = device_suspend(PMSG_FREEZE);
if (error)
goto Resume_console;
- device_pm_lock();
/* At this point, device_suspend() has been called,
* but *not* device_power_down(). We *must*
* device_power_down() now. Otherwise, drivers for
@@ -1489,7 +1488,6 @@ int kernel_kexec(void)
enable_nonboot_cpus();
device_power_up(PMSG_RESTORE);
Resume_devices:
- device_pm_unlock();
device_resume(PMSG_RESTORE);
Resume_console:
resume_console();
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index e4dcfb2272a..9147a3190c9 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -1583,8 +1583,8 @@ static void sysrq_handle_gdb(int key, struct tty_struct *tty)
static struct sysrq_key_op sysrq_gdb_op = {
.handler = sysrq_handle_gdb,
- .help_msg = "Gdb",
- .action_msg = "GDB",
+ .help_msg = "debug(G)",
+ .action_msg = "DEBUG",
};
#endif
diff --git a/kernel/kmod.c b/kernel/kmod.c
index b750675251e..7e95bedb2bf 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -370,8 +370,10 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
sub_info->argv = argv;
sub_info->envp = envp;
sub_info->cred = prepare_usermodehelper_creds();
- if (!sub_info->cred)
+ if (!sub_info->cred) {
+ kfree(sub_info);
return NULL;
+ }
out:
return sub_info;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index a5e74ddee0e..c0fa54b276d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -319,6 +319,22 @@ struct kprobe __kprobes *get_kprobe(void *addr)
return NULL;
}
+/* Arm a kprobe with text_mutex */
+static void __kprobes arm_kprobe(struct kprobe *kp)
+{
+ mutex_lock(&text_mutex);
+ arch_arm_kprobe(kp);
+ mutex_unlock(&text_mutex);
+}
+
+/* Disarm a kprobe with text_mutex */
+static void __kprobes disarm_kprobe(struct kprobe *kp)
+{
+ mutex_lock(&text_mutex);
+ arch_disarm_kprobe(kp);
+ mutex_unlock(&text_mutex);
+}
+
/*
* Aggregate handlers for multiple kprobes support - these handlers
* take care of invoking the individual kprobe handlers on p->list
@@ -538,7 +554,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
ap->flags &= ~KPROBE_FLAG_DISABLED;
if (!kprobes_all_disarmed)
/* Arm the breakpoint again. */
- arch_arm_kprobe(ap);
+ arm_kprobe(ap);
}
return 0;
}
@@ -789,11 +805,8 @@ static int __kprobes __unregister_kprobe_top(struct kprobe *p)
* enabled and not gone - otherwise, the breakpoint would
* already have been removed. We save on flushing icache.
*/
- if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) {
- mutex_lock(&text_mutex);
- arch_disarm_kprobe(p);
- mutex_unlock(&text_mutex);
- }
+ if (!kprobes_all_disarmed && !kprobe_disabled(old_p))
+ disarm_kprobe(p);
hlist_del_rcu(&old_p->hlist);
} else {
if (p->break_handler && !kprobe_gone(p))
@@ -810,7 +823,7 @@ noclean:
if (!kprobe_disabled(old_p)) {
try_to_disable_aggr_kprobe(old_p);
if (!kprobes_all_disarmed && kprobe_disabled(old_p))
- arch_disarm_kprobe(old_p);
+ disarm_kprobe(old_p);
}
}
return 0;
@@ -1364,7 +1377,7 @@ int __kprobes disable_kprobe(struct kprobe *kp)
try_to_disable_aggr_kprobe(p);
if (!kprobes_all_disarmed && kprobe_disabled(p))
- arch_disarm_kprobe(p);
+ disarm_kprobe(p);
out:
mutex_unlock(&kprobe_mutex);
return ret;
@@ -1393,7 +1406,7 @@ int __kprobes enable_kprobe(struct kprobe *kp)
}
if (!kprobes_all_disarmed && kprobe_disabled(p))
- arch_arm_kprobe(p);
+ arm_kprobe(p);
p->flags &= ~KPROBE_FLAG_DISABLED;
if (p != kp)
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index a2cc7e9a6e8..699a2ac3a0d 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -54,9 +54,9 @@ enum {
* table (if it's not there yet), and we check it for lock order
* conflicts and deadlocks.
*/
-#define MAX_LOCKDEP_ENTRIES 8192UL
+#define MAX_LOCKDEP_ENTRIES 16384UL
-#define MAX_LOCKDEP_CHAINS_BITS 14
+#define MAX_LOCKDEP_CHAINS_BITS 15
#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 507cf2b5e9f..6ca5fe96e39 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -249,7 +249,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
/* didnt get the lock, go to sleep: */
spin_unlock_mutex(&lock->wait_lock, flags);
- __schedule();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
spin_lock_mutex(&lock->wait_lock, flags);
}
diff --git a/kernel/panic.c b/kernel/panic.c
index 3dcaa166135..984b3ecbd72 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -340,34 +340,46 @@ void oops_exit(void)
}
#ifdef WANT_WARN_ON_SLOWPATH
-void warn_slowpath(const char *file, int line, const char *fmt, ...)
-{
+struct slowpath_args {
+ const char *fmt;
va_list args;
- char function[KSYM_SYMBOL_LEN];
- unsigned long caller = (unsigned long)__builtin_return_address(0);
- const char *board;
+};
- sprint_symbol(function, caller);
+static void warn_slowpath_common(const char *file, int line, void *caller, struct slowpath_args *args)
+{
+ const char *board;
printk(KERN_WARNING "------------[ cut here ]------------\n");
- printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file,
- line, function);
+ printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller);
board = dmi_get_system_info(DMI_PRODUCT_NAME);
if (board)
printk(KERN_WARNING "Hardware name: %s\n", board);
- if (fmt) {
- va_start(args, fmt);
- vprintk(fmt, args);
- va_end(args);
- }
+ if (args)
+ vprintk(args->fmt, args->args);
print_modules();
dump_stack();
print_oops_end_marker();
add_taint(TAINT_WARN);
}
-EXPORT_SYMBOL(warn_slowpath);
+
+void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
+{
+ struct slowpath_args args;
+
+ args.fmt = fmt;
+ va_start(args.args, fmt);
+ warn_slowpath_common(file, line, __builtin_return_address(0), &args);
+ va_end(args.args);
+}
+EXPORT_SYMBOL(warn_slowpath_fmt);
+
+void warn_slowpath_null(const char *file, int line)
+{
+ warn_slowpath_common(file, line, __builtin_return_address(0), NULL);
+}
+EXPORT_SYMBOL(warn_slowpath_null);
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index c9dcf98b446..bece7c0b67b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1420,19 +1420,19 @@ void run_posix_cpu_timers(struct task_struct *tsk)
* timer call will interfere.
*/
list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
- int firing;
+ int cpu_firing;
+
spin_lock(&timer->it_lock);
list_del_init(&timer->it.cpu.entry);
- firing = timer->it.cpu.firing;
+ cpu_firing = timer->it.cpu.firing;
timer->it.cpu.firing = 0;
/*
* The firing flag is -1 if we collided with a reset
* of the timer, which already reported this
* almost-firing as an overrun. So don't generate an event.
*/
- if (likely(firing >= 0)) {
+ if (likely(cpu_firing >= 0))
cpu_timer_fire(timer);
- }
spin_unlock(&timer->it_lock);
}
}
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index e71ca9cd81b..5cb080e7eeb 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -215,8 +215,6 @@ static int create_image(int platform_mode)
if (error)
return error;
- device_pm_lock();
-
/* At this point, device_suspend() has been called, but *not*
* device_power_down(). We *must* call device_power_down() now.
* Otherwise, drivers for some devices (e.g. interrupt controllers)
@@ -227,7 +225,7 @@ static int create_image(int platform_mode)
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting hibernation\n");
- goto Unlock;
+ return error;
}
error = platform_pre_snapshot(platform_mode);
@@ -241,9 +239,9 @@ static int create_image(int platform_mode)
local_irq_disable();
- sysdev_suspend(PMSG_FREEZE);
+ error = sysdev_suspend(PMSG_FREEZE);
if (error) {
- printk(KERN_ERR "PM: Some devices failed to power down, "
+ printk(KERN_ERR "PM: Some system devices failed to power down, "
"aborting hibernation\n");
goto Enable_irqs;
}
@@ -280,9 +278,6 @@ static int create_image(int platform_mode)
device_power_up(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
- Unlock:
- device_pm_unlock();
-
return error;
}
@@ -344,13 +339,11 @@ static int resume_target_kernel(bool platform_mode)
{
int error;
- device_pm_lock();
-
error = device_power_down(PMSG_QUIESCE);
if (error) {
printk(KERN_ERR "PM: Some devices failed to power down, "
"aborting resume\n");
- goto Unlock;
+ return error;
}
error = platform_pre_restore(platform_mode);
@@ -403,9 +396,6 @@ static int resume_target_kernel(bool platform_mode)
device_power_up(PMSG_RECOVER);
- Unlock:
- device_pm_unlock();
-
return error;
}
@@ -464,11 +454,9 @@ int hibernation_platform_enter(void)
goto Resume_devices;
}
- device_pm_lock();
-
error = device_power_down(PMSG_HIBERNATE);
if (error)
- goto Unlock;
+ goto Resume_devices;
error = hibernation_ops->prepare();
if (error)
@@ -493,9 +481,6 @@ int hibernation_platform_enter(void)
device_power_up(PMSG_RESTORE);
- Unlock:
- device_pm_unlock();
-
Resume_devices:
entering_platform_hibernation = false;
device_resume(PMSG_RESTORE);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f99ed6a75ea..868028280d1 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -289,12 +289,10 @@ static int suspend_enter(suspend_state_t state)
{
int error;
- device_pm_lock();
-
if (suspend_ops->prepare) {
error = suspend_ops->prepare();
if (error)
- goto Done;
+ return error;
}
error = device_power_down(PMSG_SUSPEND);
@@ -343,9 +341,6 @@ static int suspend_enter(suspend_state_t state)
if (suspend_ops->finish)
suspend_ops->finish();
- Done:
- device_pm_unlock();
-
return error;
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0692ab5a0d6..42c317874cf 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -304,6 +304,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
if (child->ptrace) {
child->exit_code = data;
dead = __ptrace_detach(current, child);
+ if (!child->exit_state)
+ wake_up_process(child);
}
write_unlock_irq(&tasklist_lock);
diff --git a/kernel/sched.c b/kernel/sched.c
index b902e587a3a..c3c04e25656 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -630,6 +630,10 @@ struct rq {
struct list_head migration_queue;
#endif
+ /* calc_load related fields */
+ unsigned long calc_load_update;
+ long calc_load_active;
+
#ifdef CONFIG_SCHED_HRTICK
#ifdef CONFIG_SMP
int hrtick_csd_pending;
@@ -1728,6 +1732,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
}
#endif
+static void calc_load_account_active(struct rq *this_rq);
+
#include "sched_stats.h"
#include "sched_idletask.c"
#include "sched_fair.c"
@@ -2458,6 +2464,17 @@ out:
return success;
}
+/**
+ * wake_up_process - Wake up a specific process
+ * @p: The process to be woken up.
+ *
+ * Attempt to wake up the nominated process and move it to the set of runnable
+ * processes. Returns 1 if the process was woken up, 0 if it was already
+ * running.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
+ */
int wake_up_process(struct task_struct *p)
{
return try_to_wake_up(p, TASK_ALL, 0);
@@ -2856,19 +2873,72 @@ unsigned long nr_iowait(void)
return sum;
}
-unsigned long nr_active(void)
+/* Variables and functions for calc_load */
+static atomic_long_t calc_load_tasks;
+static unsigned long calc_load_update;
+unsigned long avenrun[3];
+EXPORT_SYMBOL(avenrun);
+
+/**
+ * get_avenrun - get the load average array
+ * @loads: pointer to dest load array
+ * @offset: offset to add
+ * @shift: shift count to shift the result left
+ *
+ * These values are estimates at best, so no need for locking.
+ */
+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
{
- unsigned long i, running = 0, uninterruptible = 0;
+ loads[0] = (avenrun[0] + offset) << shift;
+ loads[1] = (avenrun[1] + offset) << shift;
+ loads[2] = (avenrun[2] + offset) << shift;
+}
- for_each_online_cpu(i) {
- running += cpu_rq(i)->nr_running;
- uninterruptible += cpu_rq(i)->nr_uninterruptible;
- }
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+ load *= exp;
+ load += active * (FIXED_1 - exp);
+ return load >> FSHIFT;
+}
- if (unlikely((long)uninterruptible < 0))
- uninterruptible = 0;
+/*
+ * calc_load - update the avenrun load estimates 10 ticks after the
+ * CPUs have updated calc_load_tasks.
+ */
+void calc_global_load(void)
+{
+ unsigned long upd = calc_load_update + 10;
+ long active;
+
+ if (time_before(jiffies, upd))
+ return;
+
+ active = atomic_long_read(&calc_load_tasks);
+ active = active > 0 ? active * FIXED_1 : 0;
- return running + uninterruptible;
+ avenrun[0] = calc_load(avenrun[0], EXP_1, active);
+ avenrun[1] = calc_load(avenrun[1], EXP_5, active);
+ avenrun[2] = calc_load(avenrun[2], EXP_15, active);
+
+ calc_load_update += LOAD_FREQ;
+}
+
+/*
+ * Either called from update_cpu_load() or from a cpu going idle
+ */
+static void calc_load_account_active(struct rq *this_rq)
+{
+ long nr_active, delta;
+
+ nr_active = this_rq->nr_running;
+ nr_active += (long) this_rq->nr_uninterruptible;
+
+ if (nr_active != this_rq->calc_load_active) {
+ delta = nr_active - this_rq->calc_load_active;
+ this_rq->calc_load_active = nr_active;
+ atomic_long_add(delta, &calc_load_tasks);
+ }
}
/*
@@ -2899,6 +2969,11 @@ static void update_cpu_load(struct rq *this_rq)
new_load += scale-1;
this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
}
+
+ if (time_after_eq(jiffies, this_rq->calc_load_update)) {
+ this_rq->calc_load_update += LOAD_FREQ;
+ calc_load_account_active(this_rq);
+ }
}
#ifdef CONFIG_SMP
@@ -4240,10 +4315,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
static struct {
atomic_t load_balancer;
cpumask_var_t cpu_mask;
+ cpumask_var_t ilb_grp_nohz_mask;
} nohz ____cacheline_aligned = {
.load_balancer = ATOMIC_INIT(-1),
};
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+/**
+ * lowest_flag_domain - Return lowest sched_domain containing flag.
+ * @cpu: The cpu whose lowest level of sched domain is to
+ * be returned.
+ * @flag: The flag to check for the lowest sched_domain
+ * for the given cpu.
+ *
+ * Returns the lowest sched_domain of a cpu which contains the given flag.
+ */
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+ struct sched_domain *sd;
+
+ for_each_domain(cpu, sd)
+ if (sd && (sd->flags & flag))
+ break;
+
+ return sd;
+}
+
+/**
+ * for_each_flag_domain - Iterates over sched_domains containing the flag.
+ * @cpu: The cpu whose domains we're iterating over.
+ * @sd: variable holding the value of the power_savings_sd
+ * for cpu.
+ * @flag: The flag to filter the sched_domains to be iterated.
+ *
+ * Iterates over all the scheduler domains for a given cpu that has the 'flag'
+ * set, starting from the lowest sched_domain to the highest.
+ */
+#define for_each_flag_domain(cpu, sd, flag) \
+ for (sd = lowest_flag_domain(cpu, flag); \
+ (sd && (sd->flags & flag)); sd = sd->parent)
+
+/**
+ * is_semi_idle_group - Checks if the given sched_group is semi-idle.
+ * @ilb_group: group to be checked for semi-idleness
+ *
+ * Returns: 1 if the group is semi-idle. 0 otherwise.
+ *
+ * We define a sched_group to be semi idle if it has atleast one idle-CPU
+ * and atleast one non-idle CPU. This helper function checks if the given
+ * sched_group is semi-idle or not.
+ */
+static inline int is_semi_idle_group(struct sched_group *ilb_group)
+{
+ cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
+ sched_group_cpus(ilb_group));
+
+ /*
+ * A sched_group is semi-idle when it has atleast one busy cpu
+ * and atleast one idle cpu.
+ */
+ if (cpumask_empty(nohz.ilb_grp_nohz_mask))
+ return 0;
+
+ if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
+ return 0;
+
+ return 1;
+}
+/**
+ * find_new_ilb - Finds the optimum idle load balancer for nomination.
+ * @cpu: The cpu which is nominating a new idle_load_balancer.
+ *
+ * Returns: Returns the id of the idle load balancer if it exists,
+ * Else, returns >= nr_cpu_ids.
+ *
+ * This algorithm picks the idle load balancer such that it belongs to a
+ * semi-idle powersavings sched_domain. The idea is to try and avoid
+ * completely idle packages/cores just for the purpose of idle load balancing
+ * when there are other idle cpu's which are better suited for that job.
+ */
+static int find_new_ilb(int cpu)
+{
+ struct sched_domain *sd;
+ struct sched_group *ilb_group;
+
+ /*
+ * Have idle load balancer selection from semi-idle packages only
+ * when power-aware load balancing is enabled
+ */
+ if (!(sched_smt_power_savings || sched_mc_power_savings))
+ goto out_done;
+
+ /*
+ * Optimize for the case when we have no idle CPUs or only one
+ * idle CPU. Don't walk the sched_domain hierarchy in such cases
+ */
+ if (cpumask_weight(nohz.cpu_mask) < 2)
+ goto out_done;
+
+ for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
+ ilb_group = sd->groups;
+
+ do {
+ if (is_semi_idle_group(ilb_group))
+ return cpumask_first(nohz.ilb_grp_nohz_mask);
+
+ ilb_group = ilb_group->next;
+
+ } while (ilb_group != sd->groups);
+ }
+
+out_done:
+ return cpumask_first(nohz.cpu_mask);
+}
+#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
+static inline int find_new_ilb(int call_cpu)
+{
+ return cpumask_first(nohz.cpu_mask);
+}
+#endif
+
/*
* This routine will try to nominate the ilb (idle load balancing)
* owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -4298,8 +4489,24 @@ int select_nohz_load_balancer(int stop_tick)
/* make me the ilb owner */
if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
return 1;
- } else if (atomic_read(&nohz.load_balancer) == cpu)
+ } else if (atomic_read(&nohz.load_balancer) == cpu) {
+ int new_ilb;
+
+ if (!(sched_smt_power_savings ||
+ sched_mc_power_savings))
+ return 1;
+ /*
+ * Check to see if there is a more power-efficient
+ * ilb.
+ */
+ new_ilb = find_new_ilb(cpu);
+ if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
+ atomic_set(&nohz.load_balancer, -1);
+ resched_cpu(new_ilb);
+ return 0;
+ }
return 1;
+ }
} else {
if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
return 0;
@@ -4468,15 +4675,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
}
if (atomic_read(&nohz.load_balancer) == -1) {
- /*
- * simple selection for now: Nominate the
- * first cpu in the nohz list to be the next
- * ilb owner.
- *
- * TBD: Traverse the sched domains and nominate
- * the nearest cpu in the nohz.cpu_mask.
- */
- int ilb = cpumask_first(nohz.cpu_mask);
+ int ilb = find_new_ilb(cpu);
if (ilb < nr_cpu_ids)
resched_cpu(ilb);
@@ -4732,7 +4931,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
if (user_tick)
account_user_time(p, one_jiffy, one_jiffy_scaled);
- else if (p != rq->idle)
+ else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
one_jiffy_scaled);
else
@@ -5007,13 +5206,15 @@ pick_next_task(struct rq *rq)
/*
* schedule() is the main scheduler function.
*/
-asmlinkage void __sched __schedule(void)
+asmlinkage void __sched schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
+need_resched:
+ preempt_disable();
cpu = smp_processor_id();
rq = cpu_rq(cpu);
rcu_qsctr_inc(cpu);
@@ -5070,15 +5271,9 @@ need_resched_nonpreemptible:
if (unlikely(reacquire_kernel_lock(current) < 0))
goto need_resched_nonpreemptible;
-}
-asmlinkage void __sched schedule(void)
-{
-need_resched:
- preempt_disable();
- __schedule();
preempt_enable_no_resched();
- if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
+ if (need_resched())
goto need_resched;
}
EXPORT_SYMBOL(schedule);
@@ -5221,7 +5416,7 @@ EXPORT_SYMBOL(default_wake_function);
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
*/
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int sync, void *key)
{
wait_queue_t *curr, *next;
@@ -5241,6 +5436,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
*/
void __wake_up(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
@@ -5279,6 +5477,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
* with each other. This can prevent needless bouncing between CPUs.
*
* On UP it can prevent extra preemption.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
*/
void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, void *key)
@@ -5315,6 +5516,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
* awakened in the same order in which they were queued.
*
* See also complete_all(), wait_for_completion() and related routines.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
*/
void complete(struct completion *x)
{
@@ -5332,6 +5536,9 @@ EXPORT_SYMBOL(complete);
* @x: holds the state of this particular completion
*
* This will wake up all threads waiting on this particular completion event.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
*/
void complete_all(struct completion *x)
{
@@ -6490,8 +6697,9 @@ void sched_show_task(struct task_struct *p)
#ifdef CONFIG_DEBUG_STACK_USAGE
free = stack_not_used(p);
#endif
- printk(KERN_CONT "%5lu %5d %6d\n", free,
- task_pid_nr(p), task_pid_nr(p->real_parent));
+ printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
+ task_pid_nr(p), task_pid_nr(p->real_parent),
+ (unsigned long)task_thread_info(p)->flags);
show_stack(p, NULL);
}
@@ -6970,6 +7178,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
}
}
+
+/*
+ * remove the tasks which were accounted by rq from calc_load_tasks.
+ */
+static void calc_global_load_remove(struct rq *rq)
+{
+ atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+}
#endif /* CONFIG_HOTPLUG_CPU */
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -7204,6 +7420,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
/* Update our root-domain */
rq = cpu_rq(cpu);
spin_lock_irqsave(&rq->lock, flags);
+ rq->calc_load_update = calc_load_update;
+ rq->calc_load_active = 0;
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -7243,7 +7461,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
cpuset_unlock();
migrate_nr_uninterruptible(rq);
BUG_ON(rq->nr_running != 0);
-
+ calc_global_load_remove(rq);
/*
* No need to migrate the tasks: it was best-effort if
* they didn't take sched_hotcpu_mutex. Just wake up
@@ -7753,8 +7971,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
/*
* The cpus mask in sched_group and sched_domain hangs off the end.
- * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
- * for nr_cpu_ids < CONFIG_NR_CPUS.
+ *
+ * ( See the the comments in include/linux/sched.h:struct sched_group
+ * and struct sched_domain. )
*/
struct static_sched_group {
struct sched_group sg;
@@ -7875,7 +8094,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
struct sched_domain *sd;
sd = &per_cpu(phys_domains, j).sd;
- if (j != cpumask_first(sched_group_cpus(sd->groups))) {
+ if (j != group_first_cpu(sd->groups)) {
/*
* Only add "power" once for each
* physical package.
@@ -7953,7 +8172,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
WARN_ON(!sd || !sd->groups);
- if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
+ if (cpu != group_first_cpu(sd->groups))
return;
child = sd->child;
@@ -8938,6 +9157,8 @@ void __init sched_init(void)
rq = cpu_rq(i);
spin_lock_init(&rq->lock);
rq->nr_running = 0;
+ rq->calc_load_active = 0;
+ rq->calc_load_update = jiffies + LOAD_FREQ;
init_cfs_rq(&rq->cfs, rq);
init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -9045,6 +9266,9 @@ void __init sched_init(void)
* when this runqueue becomes "idle".
*/
init_idle(current, smp_processor_id());
+
+ calc_load_update = jiffies + LOAD_FREQ;
+
/*
* During early bootup we pretend to be a normal task:
*/
@@ -9055,6 +9279,7 @@ void __init sched_init(void)
#ifdef CONFIG_SMP
#ifdef CONFIG_NO_HZ
alloc_bootmem_cpumask_var(&nohz.cpu_mask);
+ alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask);
#endif
alloc_bootmem_cpumask_var(&cpu_isolated_map);
#endif /* SMP */
@@ -9800,6 +10025,13 @@ static int sched_rt_global_constraints(void)
if (sysctl_sched_rt_period <= 0)
return -EINVAL;
+ /*
+ * There's always some RT tasks in the root group
+ * -- migration, kstopmachine etc..
+ */
+ if (sysctl_sched_rt_runtime == 0)
+ return -EBUSY;
+
spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
for_each_possible_cpu(i) {
struct rt_rq *rt_rq = &cpu_rq(i)->rt;
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 819f17ac796..e1d16c9a768 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -38,7 +38,8 @@
*/
unsigned long long __attribute__((weak)) sched_clock(void)
{
- return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+ return (unsigned long long)(jiffies - INITIAL_JIFFIES)
+ * (NSEC_PER_SEC / HZ);
}
static __read_mostly int sched_clock_running;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index cdd3c89574c..344712a5e3e 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -165,7 +165,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
vec->count = 0;
if (bootmem)
alloc_bootmem_cpumask_var(&vec->mask);
- else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
+ else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
goto cleanup;
}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3816f217f11..5f9650e8fe7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1487,17 +1487,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
find_matching_se(&se, &pse);
- while (se) {
- BUG_ON(!pse);
+ BUG_ON(!pse);
- if (wakeup_preempt_entity(se, pse) == 1) {
- resched_task(curr);
- break;
- }
-
- se = parent_entity(se);
- pse = parent_entity(pse);
- }
+ if (wakeup_preempt_entity(se, pse) == 1)
+ resched_task(curr);
}
static struct task_struct *pick_next_task_fair(struct rq *rq)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 8a21a2e28c1..499672c10cb 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy
static struct task_struct *pick_next_task_idle(struct rq *rq)
{
schedstat_inc(rq, sched_goidle);
-
+ /* adjust the active tasks as we might go into a long sleep */
+ calc_load_account_active(rq);
return rq->idle;
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f2c66f8f971..9bf0d2a7304 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1591,7 +1591,7 @@ static inline void init_sched_rt_class(void)
unsigned int i;
for_each_possible_cpu(i)
- alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
+ zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
GFP_KERNEL, cpu_to_node(i));
}
#endif /* CONFIG_SMP */
diff --git a/kernel/smp.c b/kernel/smp.c
index 858baac568e..ad63d850120 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
+ if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
cpu_to_node(cpu)))
return NOTIFY_BAD;
break;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b525dd34851..f674f332a02 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -828,7 +828,7 @@ int __init __weak arch_early_irq_init(void)
return 0;
}
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+int __weak arch_init_chip_data(struct irq_desc *desc, int node)
{
return 0;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e3d2c7dd59b..6a463716ecb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,7 +101,9 @@ static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
static unsigned long one_ul = 1;
static int one_hundred = 100;
-static int one_thousand = 1000;
+
+/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
+static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
static int maxolduid = 65535;
@@ -729,6 +731,14 @@ static struct ctl_table kern_table[] = {
},
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "bootloader_version",
+ .data = &bootloader_version,
+ .maxlen = sizeof (int),
+ .mode = 0444,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "kstack_depth_to_print",
.data = &kstack_depth_to_print,
.maxlen = sizeof(int),
@@ -1006,7 +1016,7 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &dirty_bytes_handler,
.strategy = &sysctl_intvec,
- .extra1 = &one_ul,
+ .extra1 = &dirty_bytes_min,
},
{
.procname = "dirty_writeback_centisecs",
@@ -1031,28 +1041,6 @@ static struct ctl_table vm_table[] = {
.proc_handler = &proc_dointvec,
},
{
- .ctl_name = CTL_UNNUMBERED,
- .procname = "nr_pdflush_threads_min",
- .data = &nr_pdflush_threads_min,
- .maxlen = sizeof nr_pdflush_threads_min,
- .mode = 0644 /* read-write */,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &one,
- .extra2 = &nr_pdflush_threads_max,
- },
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "nr_pdflush_threads_max",
- .data = &nr_pdflush_threads_max,
- .maxlen = sizeof nr_pdflush_threads_max,
- .mode = 0644 /* read-write */,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &nr_pdflush_threads_min,
- .extra2 = &one_thousand,
- },
- {
.ctl_name = VM_SWAPPINESS,
.procname = "swappiness",
.data = &vm_swappiness,
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 21a5ca84951..83c4417b6a3 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -93,7 +93,17 @@ void tick_handle_periodic(struct clock_event_device *dev)
for (;;) {
if (!clockevents_program_event(dev, next, ktime_get()))
return;
- tick_periodic(cpu);
+ /*
+ * Have to be careful here. If we're in oneshot mode,
+ * before we call tick_periodic() in a loop, we need
+ * to be sure we're using a real hardware clocksource.
+ * Otherwise we could get trapped in an infinite
+ * loop, as the tick_periodic() increments jiffies,
+ * when then will increment time, posibly causing
+ * the loop to trigger again and again.
+ */
+ if (timekeeping_valid_for_hres())
+ tick_periodic(cpu);
next = ktime_add(next, tick_period);
}
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 687dff49f6e..52a8bf8931f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -22,7 +22,7 @@
/*
* This read-write spinlock protects us from races in SMP while
- * playing with xtime and avenrun.
+ * playing with xtime.
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
diff --git a/kernel/timer.c b/kernel/timer.c
index cffffad01c3..a26ed294f93 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1123,47 +1123,6 @@ void update_process_times(int user_tick)
}
/*
- * Nr of active tasks - counted in fixed-point numbers
- */
-static unsigned long count_active_tasks(void)
-{
- return nr_active() * FIXED_1;
-}
-
-/*
- * Hmm.. Changed this, as the GNU make sources (load.c) seems to
- * imply that avenrun[] is the standard name for this kind of thing.
- * Nothing else seems to be standardized: the fractional size etc
- * all seem to differ on different machines.
- *
- * Requires xtime_lock to access.
- */
-unsigned long avenrun[3];
-
-EXPORT_SYMBOL(avenrun);
-
-/*
- * calc_load - given tick count, update the avenrun load estimates.
- * This is called while holding a write_lock on xtime_lock.
- */
-static inline void calc_load(unsigned long ticks)
-{
- unsigned long active_tasks; /* fixed-point */
- static int count = LOAD_FREQ;
-
- count -= ticks;
- if (unlikely(count < 0)) {
- active_tasks = count_active_tasks();
- do {
- CALC_LOAD(avenrun[0], EXP_1, active_tasks);
- CALC_LOAD(avenrun[1], EXP_5, active_tasks);
- CALC_LOAD(avenrun[2], EXP_15, active_tasks);
- count += LOAD_FREQ;
- } while (count < 0);
- }
-}
-
-/*
* This function runs timers and the timer-tq in bottom half context.
*/
static void run_timer_softirq(struct softirq_action *h)
@@ -1187,16 +1146,6 @@ void run_local_timers(void)
}
/*
- * Called by the timer interrupt. xtime_lock must already be taken
- * by the timer IRQ!
- */
-static inline void update_times(unsigned long ticks)
-{
- update_wall_time();
- calc_load(ticks);
-}
-
-/*
* The 64-bit jiffies value is not atomic - you MUST NOT read it
* without sampling the sequence number in xtime_lock.
* jiffies is defined in the linker script...
@@ -1205,7 +1154,8 @@ static inline void update_times(unsigned long ticks)
void do_timer(unsigned long ticks)
{
jiffies_64 += ticks;
- update_times(ticks);
+ update_wall_time();
+ calc_global_load();
}
#ifdef __ARCH_WANT_SYS_ALARM
@@ -1406,37 +1356,17 @@ int do_sysinfo(struct sysinfo *info)
{
unsigned long mem_total, sav_total;
unsigned int mem_unit, bitcount;
- unsigned long seq;
+ struct timespec tp;
memset(info, 0, sizeof(struct sysinfo));
- do {
- struct timespec tp;
- seq = read_seqbegin(&xtime_lock);
-
- /*
- * This is annoying. The below is the same thing
- * posix_get_clock_monotonic() does, but it wants to
- * take the lock which we want to cover the loads stuff
- * too.
- */
-
- getnstimeofday(&tp);
- tp.tv_sec += wall_to_monotonic.tv_sec;
- tp.tv_nsec += wall_to_monotonic.tv_nsec;
- monotonic_to_bootbased(&tp);
- if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
- tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
- tp.tv_sec++;
- }
- info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+ ktime_get_ts(&tp);
+ monotonic_to_bootbased(&tp);
+ info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
- info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
- info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
- info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+ get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
- info->procs = nr_threads;
- } while (read_seqretry(&xtime_lock, seq));
+ info->procs = nr_threads;
si_meminfo(info);
si_swapinfo(info);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1ce5dc6372b..cda81ec58d9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2380,7 +2380,7 @@ static const char readme_msg[] =
"# echo print-parent > /debug/tracing/trace_options\n"
"# echo 1 > /debug/tracing/tracing_enabled\n"
"# cat /debug/tracing/trace > /tmp/trace.txt\n"
- "echo 0 > /debug/tracing/tracing_enabled\n"
+ "# echo 0 > /debug/tracing/tracing_enabled\n"
;
static ssize_t
@@ -3448,6 +3448,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
if (!ref)
break;
+ ref->ref = 1;
ref->buffer = info->tr->buffer;
ref->page = ring_buffer_alloc_read_page(ref->buffer);
if (!ref->page) {
diff --git a/kernel/wait.c b/kernel/wait.c
index 42a2dbc181c..ea7c3b4275c 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
if (!list_empty(&wait->task_list))
list_del_init(&wait->task_list);
else if (waitqueue_active(q))
- __wake_up_common(q, mode, 1, 0, key);
+ __wake_up_locked_key(q, mode, key);
spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(abort_exclusive_wait);