aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c3
-rw-r--r--kernel/capability.c338
-rw-r--r--kernel/cpu.c42
-rw-r--r--kernel/cpuset.c6
-rw-r--r--kernel/exec_domain.c2
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/irq/manage.c103
-rw-r--r--kernel/kmod.c6
-rw-r--r--kernel/module.c336
-rw-r--r--kernel/power/Kconfig13
-rw-r--r--kernel/power/main.c194
-rw-r--r--kernel/power/poweroff.c4
-rw-r--r--kernel/power/process.c2
-rw-r--r--kernel/power/snapshot.c88
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/rcuclassic.c2
-rw-r--r--kernel/rcupreempt.c10
-rw-r--r--kernel/rtmutex-tester.c7
-rw-r--r--kernel/sched.c154
-rw-r--r--kernel/sched_fair.c5
-rw-r--r--kernel/sched_rt.c79
-rw-r--r--kernel/softlockup.c45
-rw-r--r--kernel/stop_machine.c3
-rw-r--r--kernel/sys_ni.c5
-rw-r--r--kernel/sysctl.c34
-rw-r--r--kernel/taskstats.c4
-rw-r--r--kernel/time/clocksource.c12
-rw-r--r--kernel/time/tick-broadcast.c3
-rw-r--r--kernel/time/tick-common.c14
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/trace/trace_sysprof.c4
-rw-r--r--kernel/workqueue.c45
32 files changed, 1044 insertions, 534 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c10e7aae04d..4699950e65b 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1476,7 +1476,8 @@ void audit_syscall_entry(int arch, int major,
struct audit_context *context = tsk->audit_context;
enum audit_state state;
- BUG_ON(!context);
+ if (unlikely(!context))
+ return;
/*
* This happens only on certain architectures that make system
diff --git a/kernel/capability.c b/kernel/capability.c
index 901e0fdc3ff..0101e847603 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -115,11 +115,208 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy)
return 0;
}
+#ifndef CONFIG_SECURITY_FILE_CAPABILITIES
+
+/*
+ * Without filesystem capability support, we nominally support one process
+ * setting the capabilities of another
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+ kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+ struct task_struct *target;
+ int ret;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ if (pid && pid != task_pid_vnr(current)) {
+ target = find_task_by_vpid(pid);
+ if (!target) {
+ ret = -ESRCH;
+ goto out;
+ }
+ } else
+ target = current;
+
+ ret = security_capget(target, pEp, pIp, pPp);
+
+out:
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ return ret;
+}
+
+/*
+ * cap_set_pg - set capabilities for all processes in a given process
+ * group. We call this holding task_capability_lock and tasklist_lock.
+ */
+static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *g, *target;
+ int ret = -EPERM;
+ int found = 0;
+ struct pid *pgrp;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ pgrp = find_vpid(pgrp_nr);
+ do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
+ target = g;
+ while_each_thread(g, target) {
+ if (!security_capset_check(target, effective,
+ inheritable, permitted)) {
+ security_capset_set(target, effective,
+ inheritable, permitted);
+ ret = 0;
+ }
+ found = 1;
+ }
+ } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ if (!found)
+ ret = 0;
+ return ret;
+}
+
/*
- * For sys_getproccap() and sys_setproccap(), any of the three
- * capability set pointers may be NULL -- indicating that that set is
- * uninteresting and/or not to be changed.
+ * cap_set_all - set capabilities for all processes other than init
+ * and self. We call this holding task_capability_lock and tasklist_lock.
*/
+static inline int cap_set_all(kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *g, *target;
+ int ret = -EPERM;
+ int found = 0;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ do_each_thread(g, target) {
+ if (target == current
+ || is_container_init(target->group_leader))
+ continue;
+ found = 1;
+ if (security_capset_check(target, effective, inheritable,
+ permitted))
+ continue;
+ ret = 0;
+ security_capset_set(target, effective, inheritable, permitted);
+ } while_each_thread(g, target);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ if (!found)
+ ret = 0;
+
+ return ret;
+}
+
+/*
+ * Given the target pid does not refer to the current process we
+ * need more elaborate support... (This support is not present when
+ * filesystem capabilities are configured.)
+ */
+static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ struct task_struct *target;
+ int ret;
+
+ if (!capable(CAP_SETPCAP))
+ return -EPERM;
+
+ if (pid == -1) /* all procs other than current and init */
+ return cap_set_all(effective, inheritable, permitted);
+
+ else if (pid < 0) /* all procs in process group */
+ return cap_set_pg(-pid, effective, inheritable, permitted);
+
+ /* target != current */
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ target = find_task_by_vpid(pid);
+ if (!target)
+ ret = -ESRCH;
+ else {
+ ret = security_capset_check(target, effective, inheritable,
+ permitted);
+
+ /* having verified that the proposed changes are legal,
+ we now put them into effect. */
+ if (!ret)
+ security_capset_set(target, effective, inheritable,
+ permitted);
+ }
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+
+ return ret;
+}
+
+#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */
+
+/*
+ * If we have configured with filesystem capability support, then the
+ * only thing that can change the capabilities of the current process
+ * is the current process. As such, we can't be in this code at the
+ * same time as we are in the process of setting capabilities in this
+ * process. The net result is that we can limit our use of locks to
+ * when we are reading the caps of another process.
+ */
+static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp,
+ kernel_cap_t *pIp, kernel_cap_t *pPp)
+{
+ int ret;
+
+ if (pid && (pid != task_pid_vnr(current))) {
+ struct task_struct *target;
+
+ spin_lock(&task_capability_lock);
+ read_lock(&tasklist_lock);
+
+ target = find_task_by_vpid(pid);
+ if (!target)
+ ret = -ESRCH;
+ else
+ ret = security_capget(target, pEp, pIp, pPp);
+
+ read_unlock(&tasklist_lock);
+ spin_unlock(&task_capability_lock);
+ } else
+ ret = security_capget(current, pEp, pIp, pPp);
+
+ return ret;
+}
+
+/*
+ * With filesystem capability support configured, the kernel does not
+ * permit the changing of capabilities in one process by another
+ * process. (CAP_SETPCAP has much less broad semantics when configured
+ * this way.)
+ */
+static inline int do_sys_capset_other_tasks(pid_t pid,
+ kernel_cap_t *effective,
+ kernel_cap_t *inheritable,
+ kernel_cap_t *permitted)
+{
+ return -EPERM;
+}
+
+#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */
/*
* Atomically modify the effective capabilities returning the original
@@ -155,7 +352,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
{
int ret = 0;
pid_t pid;
- struct task_struct *target;
unsigned tocopy;
kernel_cap_t pE, pI, pP;
@@ -169,23 +365,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
if (pid < 0)
return -EINVAL;
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
- if (pid && pid != task_pid_vnr(current)) {
- target = find_task_by_vpid(pid);
- if (!target) {
- ret = -ESRCH;
- goto out;
- }
- } else
- target = current;
-
- ret = security_capget(target, &pE, &pI, &pP);
-
-out:
- read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
+ ret = cap_get_target_pid(pid, &pE, &pI, &pP);
if (!ret) {
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
@@ -216,7 +396,6 @@ out:
* before modification is attempted and the application
* fails.
*/
-
if (copy_to_user(dataptr, kdata, tocopy
* sizeof(struct __user_cap_data_struct))) {
return -EFAULT;
@@ -226,70 +405,8 @@ out:
return ret;
}
-/*
- * cap_set_pg - set capabilities for all processes in a given process
- * group. We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective,
- kernel_cap_t *inheritable,
- kernel_cap_t *permitted)
-{
- struct task_struct *g, *target;
- int ret = -EPERM;
- int found = 0;
- struct pid *pgrp;
-
- pgrp = find_vpid(pgrp_nr);
- do_each_pid_task(pgrp, PIDTYPE_PGID, g) {
- target = g;
- while_each_thread(g, target) {
- if (!security_capset_check(target, effective,
- inheritable,
- permitted)) {
- security_capset_set(target, effective,
- inheritable,
- permitted);
- ret = 0;
- }
- found = 1;
- }
- } while_each_pid_task(pgrp, PIDTYPE_PGID, g);
-
- if (!found)
- ret = 0;
- return ret;
-}
-
-/*
- * cap_set_all - set capabilities for all processes other than init
- * and self. We call this holding task_capability_lock and tasklist_lock.
- */
-static inline int cap_set_all(kernel_cap_t *effective,
- kernel_cap_t *inheritable,
- kernel_cap_t *permitted)
-{
- struct task_struct *g, *target;
- int ret = -EPERM;
- int found = 0;
-
- do_each_thread(g, target) {
- if (target == current || is_container_init(target->group_leader))
- continue;
- found = 1;
- if (security_capset_check(target, effective, inheritable,
- permitted))
- continue;
- ret = 0;
- security_capset_set(target, effective, inheritable, permitted);
- } while_each_thread(g, target);
-
- if (!found)
- ret = 0;
- return ret;
-}
-
/**
- * sys_capset - set capabilities for a process or a group of processes
+ * sys_capset - set capabilities for a process or (*) a group of processes
* @header: pointer to struct that contains capability version and
* target pid data
* @data: pointer to struct that contains the effective, permitted,
@@ -313,7 +430,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S];
unsigned i, tocopy;
kernel_cap_t inheritable, permitted, effective;
- struct task_struct *target;
int ret;
pid_t pid;
@@ -324,9 +440,6 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
if (get_user(pid, &header->pid))
return -EFAULT;
- if (pid && pid != task_pid_vnr(current) && !capable(CAP_SETPCAP))
- return -EPERM;
-
if (copy_from_user(&kdata, data, tocopy
* sizeof(struct __user_cap_data_struct))) {
return -EFAULT;
@@ -344,40 +457,31 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
i++;
}
- spin_lock(&task_capability_lock);
- read_lock(&tasklist_lock);
-
- if (pid > 0 && pid != task_pid_vnr(current)) {
- target = find_task_by_vpid(pid);
- if (!target) {
- ret = -ESRCH;
- goto out;
- }
- } else
- target = current;
-
- ret = 0;
-
- /* having verified that the proposed changes are legal,
- we now put them into effect. */
- if (pid < 0) {
- if (pid == -1) /* all procs other than current and init */
- ret = cap_set_all(&effective, &inheritable, &permitted);
+ if (pid && (pid != task_pid_vnr(current)))
+ ret = do_sys_capset_other_tasks(pid, &effective, &inheritable,
+ &permitted);
+ else {
+ /*
+ * This lock is required even when filesystem
+ * capability support is configured - it protects the
+ * sys_capget() call from returning incorrect data in
+ * the case that the targeted process is not the
+ * current one.
+ */
+ spin_lock(&task_capability_lock);
- else /* all procs in process group */
- ret = cap_set_pg(-pid, &effective, &inheritable,
- &permitted);
- } else {
- ret = security_capset_check(target, &effective, &inheritable,
+ ret = security_capset_check(current, &effective, &inheritable,
&permitted);
+ /*
+ * Having verified that the proposed changes are
+ * legal, we now put them into effect.
+ */
if (!ret)
- security_capset_set(target, &effective, &inheritable,
+ security_capset_set(current, &effective, &inheritable,
&permitted);
+ spin_unlock(&task_capability_lock);
}
-out:
- read_unlock(&tasklist_lock);
- spin_unlock(&task_capability_lock);
return ret;
}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index cfb1d43ab80..2cc409ce0a8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -64,6 +64,8 @@ void __init cpu_hotplug_init(void)
cpu_hotplug.refcount = 0;
}
+cpumask_t cpu_active_map;
+
#ifdef CONFIG_HOTPLUG_CPU
void get_online_cpus(void)
@@ -291,11 +293,30 @@ int __ref cpu_down(unsigned int cpu)
int err = 0;
cpu_maps_update_begin();
- if (cpu_hotplug_disabled)
+
+ if (cpu_hotplug_disabled) {
err = -EBUSY;
- else
- err = _cpu_down(cpu, 0);
+ goto out;
+ }
+
+ cpu_clear(cpu, cpu_active_map);
+
+ /*
+ * Make sure the all cpus did the reschedule and are not
+ * using stale version of the cpu_active_map.
+ * This is not strictly necessary becuase stop_machine()
+ * that we run down the line already provides the required
+ * synchronization. But it's really a side effect and we do not
+ * want to depend on the innards of the stop_machine here.
+ */
+ synchronize_sched();
+
+ err = _cpu_down(cpu, 0);
+ if (cpu_online(cpu))
+ cpu_set(cpu, cpu_active_map);
+
+out:
cpu_maps_update_done();
return err;
}
@@ -355,11 +376,18 @@ int __cpuinit cpu_up(unsigned int cpu)
}
cpu_maps_update_begin();
- if (cpu_hotplug_disabled)
+
+ if (cpu_hotplug_disabled) {
err = -EBUSY;
- else
- err = _cpu_up(cpu, 0);
+ goto out;
+ }
+
+ err = _cpu_up(cpu, 0);
+ if (cpu_online(cpu))
+ cpu_set(cpu, cpu_active_map);
+
+out:
cpu_maps_update_done();
return err;
}
@@ -413,7 +441,7 @@ void __ref enable_nonboot_cpus(void)
goto out;
printk("Enabling non-boot CPUs ...\n");
- for_each_cpu_mask(cpu, frozen_cpus) {
+ for_each_cpu_mask_nr(cpu, frozen_cpus) {
error = _cpu_up(cpu, 1);
if (!error) {
printk("CPU%d is up\n", cpu);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 459d601947a..d5738910c34 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -564,7 +564,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
* partition_sched_domains().
*/
-static void rebuild_sched_domains(void)
+void rebuild_sched_domains(void)
{
struct kfifo *q; /* queue of cpusets to be scanned */
struct cpuset *cp; /* scans q */
@@ -679,7 +679,9 @@ restart:
if (apn == b->pn) {
cpus_or(*dp, *dp, b->cpus_allowed);
b->pn = -1;
- update_domain_attr(dattr, b);
+ if (dattr)
+ update_domain_attr(dattr
+ + nslot, b);
}
}
nslot++;
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index a9e6bad9f70..c1ef192aa65 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -65,7 +65,7 @@ lookup_exec_domain(u_long personality)
goto out;
}
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
read_unlock(&exec_domains_lock);
request_module("personality-%ld", pers);
read_lock(&exec_domains_lock);
diff --git a/kernel/fork.c b/kernel/fork.c
index adefc1131f2..552c8d8e77a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -33,6 +33,7 @@
#include <linux/cpu.h>
#include <linux/cgroup.h>
#include <linux/security.h>
+#include <linux/hugetlb.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/jiffies.h>
@@ -307,6 +308,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
}
/*
+ * Clear hugetlb-related page reserves for children. This only
+ * affects MAP_PRIVATE mappings. Faults generated by the child
+ * are not guaranteed to succeed, even if read-only
+ */
+ if (is_vm_hugetlb_page(tmp))
+ reset_vma_resv_huge_pages(tmp);
+
+ /*
* Link in the new vma and copy the page table entries.
*/
*pprev = tmp;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 77a51be3601..5bc6e5ecc49 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -217,6 +217,17 @@ void enable_irq(unsigned int irq)
}
EXPORT_SYMBOL(enable_irq);
+int set_irq_wake_real(unsigned int irq, unsigned int on)
+{
+ struct irq_desc *desc = irq_desc + irq;
+ int ret = -ENXIO;
+
+ if (desc->chip->set_wake)
+ ret = desc->chip->set_wake(irq, on);
+
+ return ret;
+}
+
/**
* set_irq_wake - control irq power management wakeup
* @irq: interrupt to control
@@ -233,30 +244,34 @@ int set_irq_wake(unsigned int irq, unsigned int on)
{
struct irq_desc *desc = irq_desc + irq;
unsigned long flags;
- int ret = -ENXIO;
- int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
+ int ret = 0;
/* wakeup-capable irqs can be shared between drivers that
* don't need to have the same sleep mode behaviors.
*/
spin_lock_irqsave(&desc->lock, flags);
if (on) {
- if (desc->wake_depth++ == 0)
- desc->status |= IRQ_WAKEUP;
- else
- set_wake = NULL;
+ if (desc->wake_depth++ == 0) {
+ ret = set_irq_wake_real(irq, on);
+ if (ret)
+ desc->wake_depth = 0;
+ else
+ desc->status |= IRQ_WAKEUP;
+ }
} else {
if (desc->wake_depth == 0) {
printk(KERN_WARNING "Unbalanced IRQ %d "
"wake disable\n", irq);
WARN_ON(1);
- } else if (--desc->wake_depth == 0)
- desc->status &= ~IRQ_WAKEUP;
- else
- set_wake = NULL;
+ } else if (--desc->wake_depth == 0) {
+ ret = set_irq_wake_real(irq, on);
+ if (ret)
+ desc->wake_depth = 1;
+ else
+ desc->status &= ~IRQ_WAKEUP;
+ }
}
- if (set_wake)
- ret = desc->chip->set_wake(irq, on);
+
spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
@@ -293,6 +308,30 @@ void compat_irq_chip_set_default_handler(struct irq_desc *desc)
desc->handle_irq = NULL;
}
+static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
+ unsigned long flags)
+{
+ int ret;
+
+ if (!chip || !chip->set_type) {
+ /*
+ * IRQF_TRIGGER_* but the PIC does not support multiple
+ * flow-types?
+ */
+ pr_warning("No set_type function for IRQ %d (%s)\n", irq,
+ chip ? (chip->name ? : "unknown") : "unknown");
+ return 0;
+ }
+
+ ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
+
+ if (ret)
+ pr_err("setting flow type for irq %u failed (%pF)\n",
+ irq, chip->set_type);
+
+ return ret;
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
@@ -304,6 +343,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
const char *old_name = NULL;
unsigned long flags;
int shared = 0;
+ int ret;
if (irq >= NR_IRQS)
return -EINVAL;
@@ -361,35 +401,23 @@ int setup_irq(unsigned int irq, struct irqaction *new)
shared = 1;
}
- *p = new;
-
- /* Exclude IRQ from balancing */
- if (new->flags & IRQF_NOBALANCING)
- desc->status |= IRQ_NO_BALANCING;
-
if (!shared) {
irq_chip_set_defaults(desc->chip);
-#if defined(CONFIG_IRQ_PER_CPU)
- if (new->flags & IRQF_PERCPU)
- desc->status |= IRQ_PER_CPU;
-#endif
-
/* Setup the type (level, edge polarity) if configured: */
if (new->flags & IRQF_TRIGGER_MASK) {
- if (desc->chip->set_type)
- desc->chip->set_type(irq,
- new->flags & IRQF_TRIGGER_MASK);
- else
- /*
- * IRQF_TRIGGER_* but the PIC does not support
- * multiple flow-types?
- */
- printk(KERN_WARNING "No IRQF_TRIGGER set_type "
- "function for IRQ %d (%s)\n", irq,
- desc->chip->name);
+ ret = __irq_set_trigger(desc->chip, irq, new->flags);
+
+ if (ret) {
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return ret;
+ }
} else
compat_irq_chip_set_default_handler(desc);
+#if defined(CONFIG_IRQ_PER_CPU)
+ if (new->flags & IRQF_PERCPU)
+ desc->status |= IRQ_PER_CPU;
+#endif
desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING |
IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED);
@@ -408,6 +436,13 @@ int setup_irq(unsigned int irq, struct irqaction *new)
/* Set default affinity mask once everything is setup */
irq_select_affinity(irq);
}
+
+ *p = new;
+
+ /* Exclude IRQ from balancing */
+ if (new->flags & IRQF_NOBALANCING)
+ desc->status |= IRQ_NO_BALANCING;
+
/* Reset broken irq detection when installing new handler */
desc->irq_count = 0;
desc->irqs_unhandled = 0;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 8df97d3dfda..2989f67c444 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -42,7 +42,7 @@ extern int max_threads;
static struct workqueue_struct *khelper_wq;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
/*
modprobe_path is set via /proc/sys.
@@ -417,12 +417,12 @@ int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
{
struct file *f;
- f = create_write_pipe();
+ f = create_write_pipe(0);
if (IS_ERR(f))
return PTR_ERR(f);
*filp = f;
- f = create_read_pipe(f);
+ f = create_read_pipe(f, 0);
if (IS_ERR(f)) {
free_write_pipe(*filp);
return PTR_ERR(f);
diff --git a/kernel/module.c b/kernel/module.c
index 5f80478b746..d8b5605132a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -70,6 +70,9 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
+/* Bounds of module allocation, for speeding __module_text_address */
+static unsigned long module_addr_min = -1UL, module_addr_max = 0;
+
int register_module_notifier(struct notifier_block * nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -134,17 +137,19 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
extern const struct kernel_symbol __stop___ksymtab_gpl[];
extern const struct kernel_symbol __start___ksymtab_gpl_future[];
extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
-extern const struct kernel_symbol __start___ksymtab_unused[];
-extern const struct kernel_symbol __stop___ksymtab_unused[];
-extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
-extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
extern const struct kernel_symbol __start___ksymtab_gpl_future[];
extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
extern const unsigned long __start___kcrctab[];
extern const unsigned long __start___kcrctab_gpl[];
extern const unsigned long __start___kcrctab_gpl_future[];
+#ifdef CONFIG_UNUSED_SYMBOLS
+extern const struct kernel_symbol __start___ksymtab_unused[];
+extern const struct kernel_symbol __stop___ksymtab_unused[];
+extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
+extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
extern const unsigned long __start___kcrctab_unused[];
extern const unsigned long __start___kcrctab_unused_gpl[];
+#endif
#ifndef CONFIG_MODVERSIONS
#define symversion(base, idx) NULL
@@ -152,156 +157,186 @@ extern const unsigned long __start___kcrctab_unused_gpl[];
#define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
#endif
-/* lookup symbol in given range of kernel_symbols */
-static const struct kernel_symbol *lookup_symbol(const char *name,
- const struct kernel_symbol *start,
- const struct kernel_symbol *stop)
-{
- const struct kernel_symbol *ks = start;
- for (; ks < stop; ks++)
- if (strcmp(ks->name, name) == 0)
- return ks;
- return NULL;
-}
-
-static bool always_ok(bool gplok, bool warn, const char *name)
-{
- return true;
-}
-
-static bool printk_unused_warning(bool gplok, bool warn, const char *name)
-{
- if (warn) {
- printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
- "however this module is using it.\n", name);
- printk(KERN_WARNING
- "This symbol will go away in the future.\n");
- printk(KERN_WARNING
- "Please evalute if this is the right api to use and if "
- "it really is, submit a report the linux kernel "
- "mailinglist together with submitting your code for "
- "inclusion.\n");
- }
- return true;
-}
-
-static bool gpl_only_unused_warning(bool gplok, bool warn, const char *name)
-{
- if (!gplok)
- return false;
- return printk_unused_warning(gplok, warn, name);
-}
-
-static bool gpl_only(bool gplok, bool warn, const char *name)
-{
- return gplok;
-}
-
-static bool warn_if_not_gpl(bool gplok, bool warn, const char *name)
-{
- if (!gplok && warn) {
- printk(KERN_WARNING "Symbol %s is being used "
- "by a non-GPL module, which will not "
- "be allowed in the future\n", name);
- printk(KERN_WARNING "Please see the file "
- "Documentation/feature-removal-schedule.txt "
- "in the kernel source tree for more details.\n");
- }
- return true;
-}
-
struct symsearch {
const struct kernel_symbol *start, *stop;
const unsigned long *crcs;
- bool (*check)(bool gplok, bool warn, const char *name);
+ enum {
+ NOT_GPL_ONLY,
+ GPL_ONLY,
+ WILL_BE_GPL_ONLY,
+ } licence;
+ bool unused;
};
-/* Look through this array of symbol tables for a symbol match which
- * passes the check function. */
-static const struct kernel_symbol *search_symarrays(const struct symsearch *arr,
- unsigned int num,
- const char *name,
- bool gplok,
- bool warn,
- const unsigned long **crc)
+static bool each_symbol_in_section(const struct symsearch *arr,
+ unsigned int arrsize,
+ struct module *owner,
+ bool (*fn)(const struct symsearch *syms,
+ struct module *owner,
+ unsigned int symnum, void *data),
+ void *data)
{
- unsigned int i;
- const struct kernel_symbol *ks;
+ unsigned int i, j;
- for (i = 0; i < num; i++) {
- ks = lookup_symbol(name, arr[i].start, arr[i].stop);
- if (!ks || !arr[i].check(gplok, warn, name))
- continue;
-
- if (crc)
- *crc = symversion(arr[i].crcs, ks - arr[i].start);
- return ks;
+ for (j = 0; j < arrsize; j++) {
+ for (i = 0; i < arr[j].stop - arr[j].start; i++)
+ if (fn(&arr[j], owner, i, data))
+ return true;
}
- return NULL;
+
+ return false;
}
-/* Find a symbol, return value, (optional) crc and (optional) module
- * which owns it */
-static unsigned long find_symbol(const char *name,
- struct module **owner,
- const unsigned long **crc,
- bool gplok,
- bool warn)
+/* Returns true as soon as fn returns true, otherwise false. */
+static bool each_symbol(bool (*fn)(const struct symsearch *arr,
+ struct module *owner,
+ unsigned int symnum, void *data),
+ void *data)
{
struct module *mod;
- const struct kernel_symbol *ks;
const struct symsearch arr[] = {
{ __start___ksymtab, __stop___ksymtab, __start___kcrctab,
- always_ok },
+ NOT_GPL_ONLY, false },
{ __start___ksymtab_gpl, __stop___ksymtab_gpl,
- __start___kcrctab_gpl, gpl_only },
+ __start___kcrctab_gpl,
+ GPL_ONLY, false },
{ __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
- __start___kcrctab_gpl_future, warn_if_not_gpl },
+ __start___kcrctab_gpl_future,
+ WILL_BE_GPL_ONLY, false },
+#ifdef CONFIG_UNUSED_SYMBOLS
{ __start___ksymtab_unused, __stop___ksymtab_unused,
- __start___kcrctab_unused, printk_unused_warning },
+ __start___kcrctab_unused,
+ NOT_GPL_ONLY, true },
{ __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
- __start___kcrctab_unused_gpl, gpl_only_unused_warning },
+ __start___kcrctab_unused_gpl,
+ GPL_ONLY, true },
+#endif
};
- /* Core kernel first. */
- ks = search_symarrays(arr, ARRAY_SIZE(arr), name, gplok, warn, crc);
- if (ks) {
- if (owner)
- *owner = NULL;
- return ks->value;
- }
+ if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
+ return true;
- /* Now try modules. */
list_for_each_entry(mod, &modules, list) {
struct symsearch arr[] = {
{ mod->syms, mod->syms + mod->num_syms, mod->crcs,
- always_ok },
+ NOT_GPL_ONLY, false },
{ mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
- mod->gpl_crcs, gpl_only },
+ mod->gpl_crcs,
+ GPL_ONLY, false },
{ mod->gpl_future_syms,
mod->gpl_future_syms + mod->num_gpl_future_syms,
- mod->gpl_future_crcs, warn_if_not_gpl },
+ mod->gpl_future_crcs,
+ WILL_BE_GPL_ONLY, false },
+#ifdef CONFIG_UNUSED_SYMBOLS
{ mod->unused_syms,
mod->unused_syms + mod->num_unused_syms,
- mod->unused_crcs, printk_unused_warning },
+ mod->unused_crcs,
+ NOT_GPL_ONLY, true },
{ mod->unused_gpl_syms,
mod->unused_gpl_syms + mod->num_unused_gpl_syms,
- mod->unused_gpl_crcs, gpl_only_unused_warning },
+ mod->unused_gpl_crcs,
+ GPL_ONLY, true },
+#endif
};
- ks = search_symarrays(arr, ARRAY_SIZE(arr),
- name, gplok, warn, crc);
- if (ks) {
- if (owner)
- *owner = mod;
- return ks->value;
+ if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
+ return true;
+ }
+ return false;
+}
+
+struct find_symbol_arg {
+ /* Input */
+ const char *name;
+ bool gplok;
+ bool warn;
+
+ /* Output */
+ struct module *owner;
+ const unsigned long *crc;
+ unsigned long value;
+};
+
+static bool find_symbol_in_section(const struct symsearch *syms,
+ struct module *owner,
+ unsigned int symnum, void *data)
+{
+ struct find_symbol_arg *fsa = data;
+
+ if (strcmp(syms->start[symnum].name, fsa->name) != 0)
+ return false;
+
+ if (!fsa->gplok) {
+ if (syms->licence == GPL_ONLY)
+ return false;
+ if (syms->licence == WILL_BE_GPL_ONLY && fsa->warn) {
+ printk(KERN_WARNING "Symbol %s is being used "
+ "by a non-GPL module, which will not "
+ "be allowed in the future\n", fsa->name);
+ printk(KERN_WARNING "Please see the file "
+ "Documentation/feature-removal-schedule.txt "
+ "in the kernel source tree for more details.\n");
}
}
+#ifdef CONFIG_UNUSED_SYMBOLS
+ if (syms->unused && fsa->warn) {
+ printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
+ "however this module is using it.\n", fsa->name);
+ printk(KERN_WARNING
+ "This symbol will go away in the future.\n");
+ printk(KERN_WARNING
+ "Please evalute if this is the right api to use and if "
+ "it really is, submit a report the linux kernel "
+ "mailinglist together with submitting your code for "
+ "inclusion.\n");
+ }
+#endif
+
+ fsa->owner = owner;
+ fsa->crc = symversion(syms->crcs, symnum);
+ fsa->value = syms->start[symnum].value;
+ return true;
+}
+
+/* Find a symbol, return value, (optional) crc and (optional) module
+ * which owns it */
+static unsigned long find_symbol(const char *name,
+ struct module **owner,
+ const unsigned long **crc,
+ bool gplok,
+ bool warn)
+{
+ struct find_symbol_arg fsa;
+
+ fsa.name = name;
+ fsa.gplok = gplok;
+ fsa.warn = warn;
+
+ if (each_symbol(find_symbol_in_section, &fsa)) {
+ if (owner)
+ *owner = fsa.owner;
+ if (crc)
+ *crc = fsa.crc;
+ return fsa.value;
+ }
+
DEBUGP("Failed to find symbol %s\n", name);
return -ENOENT;
}
+/* lookup symbol in given range of kernel_symbols */
+static const struct kernel_symbol *lookup_symbol(const char *name,
+ const struct kernel_symbol *start,
+ const struct kernel_symbol *stop)
+{
+ const struct kernel_symbol *ks = start;
+ for (; ks < stop; ks++)
+ if (strcmp(ks->name, name) == 0)
+ return ks;
+ return NULL;
+}
+
/* Search for module by name: must hold module_mutex. */
static struct module *find_module(const char *name)
{
@@ -639,8 +674,8 @@ static int __try_stop_module(void *_sref)
{
struct stopref *sref = _sref;
- /* If it's not unused, quit unless we are told to block. */
- if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) {
+ /* If it's not unused, quit unless we're forcing. */
+ if (module_refcount(sref->mod) != 0) {
if (!(*sref->forced = try_force_unload(sref->flags)))
return -EWOULDBLOCK;
}
@@ -652,9 +687,16 @@ static int __try_stop_module(void *_sref)
static int try_stop_module(struct module *mod, int flags, int *forced)
{
- struct stopref sref = { mod, flags, forced };
+ if (flags & O_NONBLOCK) {
+ struct stopref sref = { mod, flags, forced };
- return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
+ return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
+ } else {
+ /* We don't need to stop the machine for this. */
+ mod->state = MODULE_STATE_GOING;
+ synchronize_sched();
+ return 0;
+ }
}
unsigned int module_refcount(struct module *mod)
@@ -1445,8 +1487,10 @@ static int verify_export_symbols(struct module *mod)
{ mod->syms, mod->num_syms },
{ mod->gpl_syms, mod->num_gpl_syms },
{ mod->gpl_future_syms, mod->num_gpl_future_syms },
+#ifdef CONFIG_UNUSED_SYMBOLS
{ mod->unused_syms, mod->num_unused_syms },
{ mod->unused_gpl_syms, mod->num_unused_gpl_syms },
+#endif
};
for (i = 0; i < ARRAY_SIZE(arr); i++) {
@@ -1526,7 +1570,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
}
/* Update size with this section: return offset. */
-static long get_offset(unsigned long *size, Elf_Shdr *sechdr)
+static long get_offset(unsigned int *size, Elf_Shdr *sechdr)
{
long ret;
@@ -1738,6 +1782,20 @@ static inline void add_kallsyms(struct module *mod,
}
#endif /* CONFIG_KALLSYMS */
+static void *module_alloc_update_bounds(unsigned long size)
+{
+ void *ret = module_alloc(size);
+
+ if (ret) {
+ /* Update module bounds. */
+ if ((unsigned long)ret < module_addr_min)
+ module_addr_min = (unsigned long)ret;
+ if ((unsigned long)ret + size > module_addr_max)
+ module_addr_max = (unsigned long)ret + size;
+ }
+ return ret;
+}
+
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static struct module *load_module(void __user *umod,
@@ -1764,10 +1822,12 @@ static struct module *load_module(void __user *umod,
unsigned int gplfutureindex;
unsigned int gplfuturecrcindex;
unsigned int unwindex = 0;
+#ifdef CONFIG_UNUSED_SYMBOLS
unsigned int unusedindex;
unsigned int unusedcrcindex;
unsigned int unusedgplindex;
unsigned int unusedgplcrcindex;
+#endif
unsigned int markersindex;
unsigned int markersstringsindex;
struct module *mod;
@@ -1850,13 +1910,15 @@ static struct module *load_module(void __user *umod,
exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
- unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
- unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
+#ifdef CONFIG_UNUSED_SYMBOLS
+ unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
+ unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
+#endif
setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
@@ -1935,7 +1997,7 @@ static struct module *load_module(void __user *umod,
layout_sections(mod, hdr, sechdrs, secstrings);
/* Do the allocs. */
- ptr = module_alloc(mod->core_size);
+ ptr = module_alloc_update_bounds(mod->core_size);
if (!ptr) {
err = -ENOMEM;
goto free_percpu;
@@ -1943,7 +2005,7 @@ static struct module *load_module(void __user *umod,
memset(ptr, 0, mod->core_size);
mod->module_core = ptr;
- ptr = module_alloc(mod->init_size);
+ ptr = module_alloc_update_bounds(mod->init_size);
if (!ptr && mod->init_size) {
err = -ENOMEM;
goto free_core;
@@ -2018,14 +2080,15 @@ static struct module *load_module(void __user *umod,
mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr;
mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size /
sizeof(*mod->gpl_future_syms);
- mod->num_unused_syms = sechdrs[unusedindex].sh_size /
- sizeof(*mod->unused_syms);
- mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
- sizeof(*mod->unused_gpl_syms);
mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
if (gplfuturecrcindex)
mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+#ifdef CONFIG_UNUSED_SYMBOLS
+ mod->num_unused_syms = sechdrs[unusedindex].sh_size /
+ sizeof(*mod->unused_syms);
+ mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
+ sizeof(*mod->unused_gpl_syms);
mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
if (unusedcrcindex)
mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
@@ -2033,13 +2096,17 @@ static struct module *load_module(void __user *umod,
if (unusedgplcrcindex)
mod->unused_gpl_crcs
= (void *)sechdrs[unusedgplcrcindex].sh_addr;
+#endif
#ifdef CONFIG_MODVERSIONS
- if ((mod->num_syms && !crcindex) ||
- (mod->num_gpl_syms && !gplcrcindex) ||
- (mod->num_gpl_future_syms && !gplfuturecrcindex) ||
- (mod->num_unused_syms && !unusedcrcindex) ||
- (mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
+ if ((mod->num_syms && !crcindex)
+ || (mod->num_gpl_syms && !gplcrcindex)
+ || (mod->num_gpl_future_syms && !gplfuturecrcindex)
+#ifdef CONFIG_UNUSED_SYMBOLS
+ || (mod->num_unused_syms && !unusedcrcindex)
+ || (mod->num_unused_gpl_syms && !unusedgplcrcindex)
+#endif
+ ) {
printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
err = try_to_force_load(mod, "nocrc");
if (err)
@@ -2512,7 +2579,7 @@ static int m_show(struct seq_file *m, void *p)
struct module *mod = list_entry(p, struct module, list);
char buf[8];
- seq_printf(m, "%s %lu",
+ seq_printf(m, "%s %u",
mod->name, mod->init_size + mod->core_size);
print_unload_info(m, mod);
@@ -2595,6 +2662,9 @@ struct module *__module_text_address(unsigned long addr)
{
struct module *mod;
+ if (addr < module_addr_min || addr > module_addr_max)
+ return NULL;
+
list_for_each_entry(mod, &modules, list)
if (within(addr, mod->module_init, mod->init_text_size)
|| within(addr, mod->module_core, mod->core_text_size))
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index b45da40e8d2..dcd165f92a8 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -82,7 +82,7 @@ config PM_SLEEP_SMP
config PM_SLEEP
bool
- depends on SUSPEND || HIBERNATION
+ depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE
default y
config SUSPEND
@@ -94,6 +94,17 @@ config SUSPEND
powered and thus its contents are preserved, such as the
suspend-to-RAM state (e.g. the ACPI S3 state).
+config PM_TEST_SUSPEND
+ bool "Test suspend/resume and wakealarm during bootup"
+ depends on SUSPEND && PM_DEBUG && RTC_LIB=y
+ ---help---
+ This option will let you suspend your machine during bootup, and
+ make it wake up a few seconds later using an RTC wakeup alarm.
+ Enable this with a kernel parameter like "test_suspend=mem".
+
+ You probably want to have your system's RTC driver statically
+ linked, ensuring that it's available when this test runs.
+
config SUSPEND_FREEZER
bool "Enable freezer for suspend to RAM/standby" \
if ARCH_WANTS_FREEZER_CONTROL || BROKEN
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3398f4651aa..95bff23ecda 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -132,6 +132,61 @@ static inline int suspend_test(int level) { return 0; }
#ifdef CONFIG_SUSPEND
+#ifdef CONFIG_PM_TEST_SUSPEND
+
+/*
+ * We test the system suspend code by setting an RTC wakealarm a short
+ * time in the future, then suspending. Suspending the devices won't
+ * normally take long ... some systems only need a few milliseconds.
+ *
+ * The time it takes is system-specific though, so when we test this
+ * during system bootup we allow a LOT of time.
+ */
+#define TEST_SUSPEND_SECONDS 5
+
+static unsigned long suspend_test_start_time;
+
+static void suspend_test_start(void)
+{
+ /* FIXME Use better timebase than "jiffies", ideally a clocksource.
+ * What we want is a hardware counter that will work correctly even
+ * during the irqs-are-off stages of the suspend/resume cycle...
+ */
+ suspend_test_start_time = jiffies;
+}
+
+static void suspend_test_finish(const char *label)
+{
+ long nj = jiffies - suspend_test_start_time;
+ unsigned msec;
+
+ msec = jiffies_to_msecs(abs(nj));
+ pr_info("PM: %s took %d.%03d seconds\n", label,
+ msec / 1000, msec % 1000);
+
+ /* Warning on suspend means the RTC alarm period needs to be
+ * larger -- the system was sooo slooowwww to suspend that the
+ * alarm (should have) fired before the system went to sleep!
+ *
+ * Warning on either suspend or resume also means the system
+ * has some performance issues. The stack dump of a WARN_ON
+ * is more likely to get the right attention than a printk...
+ */
+ WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000));
+}
+
+#else
+
+static void suspend_test_start(void)
+{
+}
+
+static void suspend_test_finish(const char *label)
+{
+}
+
+#endif
+
/* This is just an arbitrary number */
#define FREE_PAGE_NUMBER (100)
@@ -266,12 +321,13 @@ int suspend_devices_and_enter(suspend_state_t state)
goto Close;
}
suspend_console();
+ suspend_test_start();
error = device_suspend(PMSG_SUSPEND);
if (error) {
printk(KERN_ERR "PM: Some devices failed to suspend\n");
goto Recover_platform;
}
-
+ suspend_test_finish("suspend devices");
if (suspend_test(TEST_DEVICES))
goto Recover_platform;
@@ -293,7 +349,9 @@ int suspend_devices_and_enter(suspend_state_t state)
if (suspend_ops->finish)
suspend_ops->finish();
Resume_devices:
+ suspend_test_start();
device_resume(PMSG_RESUME);
+ suspend_test_finish("resume devices");
resume_console();
Close:
if (suspend_ops->end)
@@ -521,3 +579,137 @@ static int __init pm_init(void)
}
core_initcall(pm_init);
+
+
+#ifdef CONFIG_PM_TEST_SUSPEND
+
+#include <linux/rtc.h>
+
+/*
+ * To test system suspend, we need a hands-off mechanism to resume the
+ * system. RTCs wake alarms are a common self-contained mechanism.
+ */
+
+static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
+{
+ static char err_readtime[] __initdata =
+ KERN_ERR "PM: can't read %s time, err %d\n";
+ static char err_wakealarm [] __initdata =
+ KERN_ERR "PM: can't set %s wakealarm, err %d\n";
+ static char err_suspend[] __initdata =
+ KERN_ERR "PM: suspend test failed, error %d\n";
+ static char info_test[] __initdata =
+ KERN_INFO "PM: test RTC wakeup from '%s' suspend\n";
+
+ unsigned long now;
+ struct rtc_wkalrm alm;
+ int status;
+
+ /* this may fail if the RTC hasn't been initialized */
+ status = rtc_read_time(rtc, &alm.time);
+ if (status < 0) {
+ printk(err_readtime, rtc->dev.bus_id, status);
+ return;
+ }
+ rtc_tm_to_time(&alm.time, &now);
+
+ memset(&alm, 0, sizeof alm);
+ rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time);
+ alm.enabled = true;
+
+ status = rtc_set_alarm(rtc, &alm);
+ if (status < 0) {
+ printk(err_wakealarm, rtc->dev.bus_id, status);
+ return;
+ }
+
+ if (state == PM_SUSPEND_MEM) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ if (status == -ENODEV)
+ state = PM_SUSPEND_STANDBY;
+ }
+ if (state == PM_SUSPEND_STANDBY) {
+ printk(info_test, pm_states[state]);
+ status = pm_suspend(state);
+ }
+ if (status < 0)
+ printk(err_suspend, status);
+}
+
+static int __init has_wakealarm(struct device *dev, void *name_ptr)
+{
+ struct rtc_device *candidate = to_rtc_device(dev);
+
+ if (!candidate->ops->set_alarm)
+ return 0;
+ if (!device_may_wakeup(candidate->dev.parent))
+ return 0;
+
+ *(char **)name_ptr = dev->bus_id;
+ return 1;
+}
+
+/*
+ * Kernel options like "test_suspend=mem" force suspend/resume sanity tests
+ * at startup time. They're normally disabled, for faster boot and because
+ * we can't know which states really work on this particular system.
+ */
+static suspend_state_t test_state __initdata = PM_SUSPEND_ON;
+
+static char warn_bad_state[] __initdata =
+ KERN_WARNING "PM: can't test '%s' suspend state\n";
+
+static int __init setup_test_suspend(char *value)
+{
+ unsigned i;
+
+ /* "=mem" ==> "mem" */
+ value++;
+ for (i = 0; i < PM_SUSPEND_MAX; i++) {
+ if (!pm_states[i])
+ continue;
+ if (strcmp(pm_states[i], value) != 0)
+ continue;
+ test_state = (__force suspend_state_t) i;
+ return 0;
+ }
+ printk(warn_bad_state, value);
+ return 0;
+}
+__setup("test_suspend", setup_test_suspend);
+
+static int __init test_suspend(void)
+{
+ static char warn_no_rtc[] __initdata =
+ KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n";
+
+ char *pony = NULL;
+ struct rtc_device *rtc = NULL;
+
+ /* PM is initialized by now; is that state testable? */
+ if (test_state == PM_SUSPEND_ON)
+ goto done;
+ if (!valid_state(test_state)) {
+ printk(warn_bad_state, pm_states[test_state]);
+ goto done;
+ }
+
+ /* RTCs have initialized by now too ... can we use one? */
+ class_find_device(rtc_class, NULL, &pony, has_wakealarm);
+ if (pony)
+ rtc = rtc_class_open(pony);
+ if (!rtc) {
+ printk(warn_no_rtc);
+ goto done;
+ }
+
+ /* go for it */
+ test_wakealarm(rtc, test_state);
+ rtc_class_close(rtc);
+done:
+ return 0;
+}
+late_initcall(test_suspend);
+
+#endif /* CONFIG_PM_TEST_SUSPEND */
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index 678ec736076..72016f05147 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -10,6 +10,7 @@
#include <linux/pm.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
+#include <linux/cpumask.h>
/*
* When the user hits Sys-Rq o to power down the machine this is the
@@ -25,7 +26,8 @@ static DECLARE_WORK(poweroff_work, do_poweroff);
static void handle_poweroff(int key, struct tty_struct *tty)
{
- schedule_work(&poweroff_work);
+ /* run sysrq poweroff on boot cpu */
+ schedule_work_on(first_cpu(cpu_online_map), &poweroff_work);
}
static struct sysrq_key_op sysrq_poweroff_op = {
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 5fb87652f21..278946aecaf 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -149,7 +149,7 @@ static int try_to_freeze_tasks(bool sig_only)
unsigned long end_time;
unsigned int todo;
struct timeval start, end;
- s64 elapsed_csecs64;
+ u64 elapsed_csecs64;
unsigned int elapsed_csecs;
do_gettimeofday(&start);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 5f91a07c4ea..5d2ab836e99 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -205,8 +205,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
* objects. The main list's elements are of type struct zone_bitmap
* and each of them corresonds to one zone. For each zone bitmap
* object there is a list of objects of type struct bm_block that
- * represent each blocks of bit chunks in which information is
- * stored.
+ * represent each blocks of bitmap in which information is stored.
*
* struct memory_bitmap contains a pointer to the main list of zone
* bitmap objects, a struct bm_position used for browsing the bitmap,
@@ -224,26 +223,27 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
* pfns that correspond to the start and end of the represented zone.
*
* struct bm_block contains a pointer to the memory page in which
- * information is stored (in the form of a block of bit chunks
- * of type unsigned long each). It also contains the pfns that
- * correspond to the start and end of the represented memory area and
- * the number of bit chunks in the block.
+ * information is stored (in the form of a block of bitmap)
+ * It also contains the pfns that correspond to the start and end of
+ * the represented memory area.
*/
#define BM_END_OF_MAP (~0UL)
-#define BM_CHUNKS_PER_BLOCK (PAGE_SIZE / sizeof(long))
-#define BM_BITS_PER_CHUNK (sizeof(long) << 3)
#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
struct bm_block {
struct bm_block *next; /* next element of the list */
unsigned long start_pfn; /* pfn represented by the first bit */
unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
- unsigned int size; /* number of bit chunks */
- unsigned long *data; /* chunks of bits representing pages */
+ unsigned long *data; /* bitmap representing pages */
};
+static inline unsigned long bm_block_bits(struct bm_block *bb)
+{
+ return bb->end_pfn - bb->start_pfn;
+}
+
struct zone_bitmap {
struct zone_bitmap *next; /* next element of the list */
unsigned long start_pfn; /* minimal pfn in this zone */
@@ -257,7 +257,6 @@ struct zone_bitmap {
struct bm_position {
struct zone_bitmap *zone_bm;
struct bm_block *block;
- int chunk;
int bit;
};
@@ -272,12 +271,6 @@ struct memory_bitmap {
/* Functions that operate on memory bitmaps */
-static inline void memory_bm_reset_chunk(struct memory_bitmap *bm)
-{
- bm->cur.chunk = 0;
- bm->cur.bit = -1;
-}
-
static void memory_bm_position_reset(struct memory_bitmap *bm)
{
struct zone_bitmap *zone_bm;
@@ -285,7 +278,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
zone_bm = bm->zone_bm_list;
bm->cur.zone_bm = zone_bm;
bm->cur.block = zone_bm->bm_blocks;
- memory_bm_reset_chunk(bm);
+ bm->cur.bit = 0;
}
static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
@@ -394,12 +387,10 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
bb->start_pfn = pfn;
if (nr >= BM_BITS_PER_BLOCK) {
pfn += BM_BITS_PER_BLOCK;
- bb->size = BM_CHUNKS_PER_BLOCK;
nr -= BM_BITS_PER_BLOCK;
} else {
/* This is executed only once in the loop */
pfn += nr;
- bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK);
}
bb->end_pfn = pfn;
bb = bb->next;
@@ -478,8 +469,8 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
}
zone_bm->cur_block = bb;
pfn -= bb->start_pfn;
- *bit_nr = pfn % BM_BITS_PER_CHUNK;
- *addr = bb->data + pfn / BM_BITS_PER_CHUNK;
+ *bit_nr = pfn;
+ *addr = bb->data;
return 0;
}
@@ -528,36 +519,6 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
return test_bit(bit, addr);
}
-/* Two auxiliary functions for memory_bm_next_pfn */
-
-/* Find the first set bit in the given chunk, if there is one */
-
-static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p)
-{
- bit++;
- while (bit < BM_BITS_PER_CHUNK) {
- if (test_bit(bit, chunk_p))
- return bit;
-
- bit++;
- }
- return -1;
-}
-
-/* Find a chunk containing some bits set in given block of bits */
-
-static inline int next_chunk_in_block(int n, struct bm_block *bb)
-{
- n++;
- while (n < bb->size) {
- if (bb->data[n])
- return n;
-
- n++;
- }
- return -1;
-}
-
/**
* memory_bm_next_pfn - find the pfn that corresponds to the next set bit
* in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
@@ -571,40 +532,33 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
{
struct zone_bitmap *zone_bm;
struct bm_block *bb;
- int chunk;
int bit;
do {
bb = bm->cur.block;
do {
- chunk = bm->cur.chunk;
bit = bm->cur.bit;
- do {
- bit = next_bit_in_chunk(bit, bb->data + chunk);
- if (bit >= 0)
- goto Return_pfn;
-
- chunk = next_chunk_in_block(chunk, bb);
- bit = -1;
- } while (chunk >= 0);
+ bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
+ if (bit < bm_block_bits(bb))
+ goto Return_pfn;
+
bb = bb->next;
bm->cur.block = bb;
- memory_bm_reset_chunk(bm);
+ bm->cur.bit = 0;
} while (bb);
zone_bm = bm->cur.zone_bm->next;
if (zone_bm) {
bm->cur.zone_bm = zone_bm;
bm->cur.block = zone_bm->bm_blocks;
- memory_bm_reset_chunk(bm);
+ bm->cur.bit = 0;
}
} while (zone_bm);
memory_bm_position_reset(bm);
return BM_END_OF_MAP;
Return_pfn:
- bm->cur.chunk = chunk;
- bm->cur.bit = bit;
- return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
+ bm->cur.bit = bit + 1;
+ return bb->start_pfn + bit;
}
/**
diff --git a/kernel/printk.c b/kernel/printk.c
index 07ad9e7f7a6..3f7a2a94583 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -933,7 +933,7 @@ void suspend_console(void)
{
if (!console_suspend_enabled)
return;
- printk("Suspending console(s)\n");
+ printk("Suspending console(s) (use no_console_suspend to debug)\n");
acquire_console_sem();
console_suspended = 1;
}
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 16eeeaa9d61..6f8696c502f 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -106,7 +106,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
*/
cpus_and(cpumask, rcp->cpumask, cpu_online_map);
cpu_clear(rdp->cpu, cpumask);
- for_each_cpu_mask(cpu, cpumask)
+ for_each_cpu_mask_nr(cpu, cpumask)
smp_send_reschedule(cpu);
}
}
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 6f62b77d93c..27827931ca0 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -756,7 +756,7 @@ rcu_try_flip_idle(void)
/* Now ask each CPU for acknowledgement of the flip. */
- for_each_cpu_mask(cpu, rcu_cpu_online_map) {
+ for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
dyntick_save_progress_counter(cpu);
}
@@ -774,7 +774,7 @@ rcu_try_flip_waitack(void)
int cpu;
RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
- for_each_cpu_mask(cpu, rcu_cpu_online_map)
+ for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
if (rcu_try_flip_waitack_needed(cpu) &&
per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
@@ -806,7 +806,7 @@ rcu_try_flip_waitzero(void)
/* Check to see if the sum of the "last" counters is zero. */
RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
- for_each_cpu_mask(cpu, rcu_cpu_online_map)
+ for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
if (sum != 0) {
RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -821,7 +821,7 @@ rcu_try_flip_waitzero(void)
smp_mb(); /* ^^^^^^^^^^^^ */
/* Call for a memory barrier from each CPU. */
- for_each_cpu_mask(cpu, rcu_cpu_online_map) {
+ for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
dyntick_save_progress_counter(cpu);
}
@@ -841,7 +841,7 @@ rcu_try_flip_waitmb(void)
int cpu;
RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
- for_each_cpu_mask(cpu, rcu_cpu_online_map)
+ for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
if (rcu_try_flip_waitmb_needed(cpu) &&
per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 092e4c620af..a56f629b057 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -297,8 +297,8 @@ static int test_func(void *data)
*
* opcode:data
*/
-static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
- size_t count)
+static ssize_t sysfs_test_command(struct sys_device *dev, struct sysdev_attribute *attr,
+ const char *buf, size_t count)
{
struct sched_param schedpar;
struct test_thread_data *td;
@@ -360,7 +360,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
* @dev: thread to query
* @buf: char buffer to be filled with thread status info
*/
-static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
+static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute *attr,
+ char *buf)
{
struct test_thread_data *td;
struct task_struct *tsk;
diff --git a/kernel/sched.c b/kernel/sched.c
index c13c75e9f9f..6acf749d333 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1006,7 +1006,7 @@ static inline int hrtick_enabled(struct rq *rq)
{
if (!sched_feat(HRTICK))
return 0;
- if (!cpu_online(cpu_of(rq)))
+ if (!cpu_active(cpu_of(rq)))
return 0;
return hrtimer_is_hres_active(&rq->hrtick_timer);
}
@@ -2029,7 +2029,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
/* Tally up the load of all CPUs in the group */
avg_load = 0;
- for_each_cpu_mask(i, group->cpumask) {
+ for_each_cpu_mask_nr(i, group->cpumask) {
/* Bias balancing toward cpus of our domain */
if (local_group)
load = source_load(i, load_idx);
@@ -2071,7 +2071,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
/* Traverse only the allowed CPUs */
cpus_and(*tmp, group->cpumask, p->cpus_allowed);
- for_each_cpu_mask(i, *tmp) {
+ for_each_cpu_mask_nr(i, *tmp) {
load = weighted_cpuload(i);
if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2802,7 +2802,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
rq = task_rq_lock(p, &flags);
if (!cpu_isset(dest_cpu, p->cpus_allowed)
- || unlikely(cpu_is_offline(dest_cpu)))
+ || unlikely(!cpu_active(dest_cpu)))
goto out;
/* force the process onto the specified CPU */
@@ -3089,7 +3089,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
max_cpu_load = 0;
min_cpu_load = ~0UL;
- for_each_cpu_mask(i, group->cpumask) {
+ for_each_cpu_mask_nr(i, group->cpumask) {
struct rq *rq;
if (!cpu_isset(i, *cpus))
@@ -3368,7 +3368,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
unsigned long max_load = 0;
int i;
- for_each_cpu_mask(i, group->cpumask) {
+ for_each_cpu_mask_nr(i, group->cpumask) {
unsigned long wl;
if (!cpu_isset(i, *cpus))
@@ -3770,7 +3770,7 @@ int select_nohz_load_balancer(int stop_tick)
/*
* If we are going offline and still the leader, give up!
*/
- if (cpu_is_offline(cpu) &&
+ if (!cpu_active(cpu) &&
atomic_read(&nohz.load_balancer) == cpu) {
if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
BUG();
@@ -3910,7 +3910,7 @@ static void run_rebalance_domains(struct softirq_action *h)
int balance_cpu;
cpu_clear(this_cpu, cpus);
- for_each_cpu_mask(balance_cpu, cpus) {
+ for_each_cpu_mask_nr(balance_cpu, cpus) {
/*
* If this cpu gets work to do, stop the load balancing
* work being done for other cpus. Next load
@@ -5794,7 +5794,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
struct rq *rq_dest, *rq_src;
int ret = 0, on_rq;
- if (unlikely(cpu_is_offline(dest_cpu)))
+ if (unlikely(!cpu_active(dest_cpu)))
return ret;
rq_src = cpu_rq(src_cpu);
@@ -6721,7 +6721,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
cpus_clear(*covered);
- for_each_cpu_mask(i, *span) {
+ for_each_cpu_mask_nr(i, *span) {
struct sched_group *sg;
int group = group_fn(i, cpu_map, &sg, tmpmask);
int j;
@@ -6732,7 +6732,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
cpus_clear(sg->cpumask);
sg->__cpu_power = 0;
- for_each_cpu_mask(j, *span) {
+ for_each_cpu_mask_nr(j, *span) {
if (group_fn(j, cpu_map, NULL, tmpmask) != group)
continue;
@@ -6932,7 +6932,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
if (!sg)
return;
do {
- for_each_cpu_mask(j, sg->cpumask) {
+ for_each_cpu_mask_nr(j, sg->cpumask) {
struct sched_domain *sd;
sd = &per_cpu(phys_domains, j);
@@ -6957,7 +6957,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
{
int cpu, i;
- for_each_cpu_mask(cpu, *cpu_map) {
+ for_each_cpu_mask_nr(cpu, *cpu_map) {
struct sched_group **sched_group_nodes
= sched_group_nodes_bycpu[cpu];
@@ -7196,7 +7196,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
/*
* Set up domains for cpus specified by the cpu_map.
*/
- for_each_cpu_mask(i, *cpu_map) {
+ for_each_cpu_mask_nr(i, *cpu_map) {
struct sched_domain *sd = NULL, *p;
SCHED_CPUMASK_VAR(nodemask, allmasks);
@@ -7263,7 +7263,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
#ifdef CONFIG_SCHED_SMT
/* Set up CPU (sibling) groups */
- for_each_cpu_mask(i, *cpu_map) {
+ for_each_cpu_mask_nr(i, *cpu_map) {
SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
SCHED_CPUMASK_VAR(send_covered, allmasks);
@@ -7280,7 +7280,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
#ifdef CONFIG_SCHED_MC
/* Set up multi-core groups */
- for_each_cpu_mask(i, *cpu_map) {
+ for_each_cpu_mask_nr(i, *cpu_map) {
SCHED_CPUMASK_VAR(this_core_map, allmasks);
SCHED_CPUMASK_VAR(send_covered, allmasks);
@@ -7347,7 +7347,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
goto error;
}
sched_group_nodes[i] = sg;
- for_each_cpu_mask(j, *nodemask) {
+ for_each_cpu_mask_nr(j, *nodemask) {
struct sched_domain *sd;
sd = &per_cpu(node_domains, j);
@@ -7393,21 +7393,21 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
/* Calculate CPU power for physical packages and nodes */
#ifdef CONFIG_SCHED_SMT
- for_each_cpu_mask(i, *cpu_map) {
+ for_each_cpu_mask_nr(i, *cpu_map) {
struct sched_domain *sd = &per_cpu(cpu_domains, i);
init_sched_groups_power(i, sd);
}
#endif
#ifdef CONFIG_SCHED_MC
- for_each_cpu_mask(i, *cpu_map) {
+ for_each_cpu_mask_nr(i, *cpu_map) {
struct sched_domain *sd = &per_cpu(core_domains, i);
init_sched_groups_power(i, sd);
}
#endif
- for_each_cpu_mask(i, *cpu_map) {
+ for_each_cpu_mask_nr(i, *cpu_map) {
struct sched_domain *sd = &per_cpu(phys_domains, i);
init_sched_groups_power(i, sd);
@@ -7427,7 +7427,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
#endif
/* Attach the domains */
- for_each_cpu_mask(i, *cpu_map) {
+ for_each_cpu_mask_nr(i, *cpu_map) {
struct sched_domain *sd;
#ifdef CONFIG_SCHED_SMT
sd = &per_cpu(cpu_domains, i);
@@ -7472,18 +7472,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
}
/*
- * Free current domain masks.
- * Called after all cpus are attached to NULL domain.
- */
-static void free_sched_domains(void)
-{
- ndoms_cur = 0;
- if (doms_cur != &fallback_doms)
- kfree(doms_cur);
- doms_cur = &fallback_doms;
-}
-
-/*
* Set up scheduler domains and groups. Callers must hold the hotplug lock.
* For now this just excludes isolated cpus, but could be used to
* exclude other special cases in the future.
@@ -7522,7 +7510,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
unregister_sched_domain_sysctl();
- for_each_cpu_mask(i, *cpu_map)
+ for_each_cpu_mask_nr(i, *cpu_map)
cpu_attach_domain(NULL, &def_root_domain, i);
synchronize_sched();
arch_destroy_sched_domains(cpu_map, &tmpmask);
@@ -7561,7 +7549,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
* ownership of it and will kfree it when done with it. If the caller
* failed the kmalloc call, then it can pass in doms_new == NULL,
* and partition_sched_domains() will fallback to the single partition
- * 'fallback_doms'.
+ * 'fallback_doms', it also forces the domains to be rebuilt.
*
* Call with hotplug lock held
*/
@@ -7575,12 +7563,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
/* always unregister in case we don't destroy any domains */
unregister_sched_domain_sysctl();
- if (doms_new == NULL) {
- ndoms_new = 1;
- doms_new = &fallback_doms;
- cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
- dattr_new = NULL;
- }
+ if (doms_new == NULL)
+ ndoms_new = 0;
/* Destroy deleted domains */
for (i = 0; i < ndoms_cur; i++) {
@@ -7595,6 +7579,14 @@ match1:
;
}
+ if (doms_new == NULL) {
+ ndoms_cur = 0;
+ ndoms_new = 1;
+ doms_new = &fallback_doms;
+ cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
+ dattr_new = NULL;
+ }
+
/* Build new domains */
for (i = 0; i < ndoms_new; i++) {
for (j = 0; j < ndoms_cur; j++) {
@@ -7625,17 +7617,10 @@ match2:
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
int arch_reinit_sched_domains(void)
{
- int err;
-
get_online_cpus();
- mutex_lock(&sched_domains_mutex);
- detach_destroy_domains(&cpu_online_map);
- free_sched_domains();
- err = arch_init_sched_domains(&cpu_online_map);
- mutex_unlock(&sched_domains_mutex);
+ rebuild_sched_domains();
put_online_cpus();
-
- return err;
+ return 0;
}
static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@ -7656,11 +7641,13 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
}
#ifdef CONFIG_SCHED_MC
-static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
+static ssize_t sched_mc_power_savings_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *page)
{
return sprintf(page, "%u\n", sched_mc_power_savings);
}
static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
+ struct sysdev_attribute *attr,
const char *buf, size_t count)
{
return sched_power_savings_store(buf, count, 0);
@@ -7670,11 +7657,13 @@ static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
#endif
#ifdef CONFIG_SCHED_SMT
-static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
+static ssize_t sched_smt_power_savings_show(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *page)
{
return sprintf(page, "%u\n", sched_smt_power_savings);
}
static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
+ struct sysdev_attribute *attr,
const char *buf, size_t count)
{
return sched_power_savings_store(buf, count, 1);
@@ -7701,59 +7690,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
}
#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
+#ifndef CONFIG_CPUSETS
/*
- * Force a reinitialization of the sched domains hierarchy. The domains
- * and groups cannot be updated in place without racing with the balancing
- * code, so we temporarily attach all running cpus to the NULL domain
- * which will prevent rebalancing while the sched domains are recalculated.
+ * Add online and remove offline CPUs from the scheduler domains.
+ * When cpusets are enabled they take over this function.
*/
static int update_sched_domains(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ partition_sched_domains(0, NULL, NULL);
+ return NOTIFY_OK;
+
+ default:
+ return NOTIFY_DONE;
+ }
+}
+#endif
+
+static int update_runtime(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
int cpu = (int)(long)hcpu;
switch (action) {
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
disable_runtime(cpu_rq(cpu));
- /* fall-through */
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- detach_destroy_domains(&cpu_online_map);
- free_sched_domains();
return NOTIFY_OK;
-
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
enable_runtime(cpu_rq(cpu));
- /* fall-through */
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- /*
- * Fall through and re-initialise the domains.
- */
- break;
+ return NOTIFY_OK;
+
default:
return NOTIFY_DONE;
}
-
-#ifndef CONFIG_CPUSETS
- /*
- * Create default domain partitioning if cpusets are disabled.
- * Otherwise we let cpusets rebuild the domains based on the
- * current setup.
- */
-
- /* The hotplug lock is already held by cpu_up/cpu_down */
- arch_init_sched_domains(&cpu_online_map);
-#endif
-
- return NOTIFY_OK;
}
void __init sched_init_smp(void)
@@ -7773,8 +7752,15 @@ void __init sched_init_smp(void)
cpu_set(smp_processor_id(), non_isolated_cpus);
mutex_unlock(&sched_domains_mutex);
put_online_cpus();
+
+#ifndef CONFIG_CPUSETS
/* XXX: Theoretical race here - CPU may be hotplugged now */
hotcpu_notifier(update_sched_domains, 0);
+#endif
+
+ /* RT runtime code needs to handle some hotplug events */
+ hotcpu_notifier(update_runtime, 0);
+
init_hrtick();
/* Move init over to a non-isolated CPU */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6893b3ed65f..cf2cd6ce4cb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1003,6 +1003,8 @@ static void yield_task_fair(struct rq *rq)
* not idle and an idle cpu is available. The span of cpus to
* search starts with cpus closest then further out as needed,
* so we always favor a closer, idle cpu.
+ * Domains may include CPUs that are not usable for migration,
+ * hence we need to mask them out (cpu_active_map)
*
* Returns the CPU we should wake onto.
*/
@@ -1030,7 +1032,8 @@ static int wake_idle(int cpu, struct task_struct *p)
|| ((sd->flags & SD_WAKE_IDLE_FAR)
&& !task_hot(p, task_rq(p)->clock, sd))) {
cpus_and(tmp, sd->span, p->cpus_allowed);
- for_each_cpu_mask(i, tmp) {
+ cpus_and(tmp, tmp, cpu_active_map);
+ for_each_cpu_mask_nr(i, tmp) {
if (idle_cpu(i)) {
if (i != task_cpu(p)) {
schedstat_inc(p,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 93ac8ee0827..908c04f9dad 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -240,7 +240,7 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
spin_lock(&rt_b->rt_runtime_lock);
rt_period = ktime_to_ns(rt_b->rt_period);
- for_each_cpu_mask(i, rd->span) {
+ for_each_cpu_mask_nr(i, rd->span) {
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
s64 diff;
@@ -601,11 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
return;
- if (rt_se->nr_cpus_allowed == 1)
- list_add(&rt_se->run_list, queue);
- else
- list_add_tail(&rt_se->run_list, queue);
-
+ list_add_tail(&rt_se->run_list, queue);
__set_bit(rt_se_prio(rt_se), array->bitmap);
inc_rt_tasks(rt_se, rt_rq);
@@ -690,32 +686,34 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
* Put task to the end of the run list without the overhead of dequeue
* followed by enqueue.
*/
-static
-void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
+static void
+requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
{
- struct rt_prio_array *array = &rt_rq->active;
-
if (on_rt_rq(rt_se)) {
- list_del_init(&rt_se->run_list);
- list_add_tail(&rt_se->run_list,
- array->queue + rt_se_prio(rt_se));
+ struct rt_prio_array *array = &rt_rq->active;
+ struct list_head *queue = array->queue + rt_se_prio(rt_se);
+
+ if (head)
+ list_move(&rt_se->run_list, queue);
+ else
+ list_move_tail(&rt_se->run_list, queue);
}
}
-static void requeue_task_rt(struct rq *rq, struct task_struct *p)
+static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
{
struct sched_rt_entity *rt_se = &p->rt;
struct rt_rq *rt_rq;
for_each_sched_rt_entity(rt_se) {
rt_rq = rt_rq_of_se(rt_se);
- requeue_rt_entity(rt_rq, rt_se);
+ requeue_rt_entity(rt_rq, rt_se, head);
}
}
static void yield_task_rt(struct rq *rq)
{
- requeue_task_rt(rq, rq->curr);
+ requeue_task_rt(rq, rq->curr, 0);
}
#ifdef CONFIG_SMP
@@ -755,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
*/
return task_cpu(p);
}
+
+static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
+{
+ cpumask_t mask;
+
+ if (rq->curr->rt.nr_cpus_allowed == 1)
+ return;
+
+ if (p->rt.nr_cpus_allowed != 1
+ && cpupri_find(&rq->rd->cpupri, p, &mask))
+ return;
+
+ if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
+ return;
+
+ /*
+ * There appears to be other cpus that can accept
+ * current and none to run 'p', so lets reschedule
+ * to try and push current away:
+ */
+ requeue_task_rt(rq, p, 1);
+ resched_task(rq->curr);
+}
+
#endif /* CONFIG_SMP */
/*
@@ -780,18 +802,8 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
* to move current somewhere else, making room for our non-migratable
* task.
*/
- if((p->prio == rq->curr->prio)
- && p->rt.nr_cpus_allowed == 1
- && rq->curr->rt.nr_cpus_allowed != 1) {
- cpumask_t mask;
-
- if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
- /*
- * There appears to be other cpus that can accept
- * current, so lets reschedule to try and push it away
- */
- resched_task(rq->curr);
- }
+ if (p->prio == rq->curr->prio && !need_resched())
+ check_preempt_equal_prio(rq, p);
#endif
}
@@ -924,6 +936,13 @@ static int find_lowest_rq(struct task_struct *task)
return -1; /* No targets found */
/*
+ * Only consider CPUs that are usable for migration.
+ * I guess we might want to change cpupri_find() to ignore those
+ * in the first place.
+ */
+ cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
+
+ /*
* At this point we have built a mask of cpus representing the
* lowest priority tasks in the system. Now we want to elect
* the best one based on our affinity and topology.
@@ -1109,7 +1128,7 @@ static int pull_rt_task(struct rq *this_rq)
next = pick_next_task_rt(this_rq);
- for_each_cpu_mask(cpu, this_rq->rd->rto_mask) {
+ for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
if (this_cpu == cpu)
continue;
@@ -1417,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
* on the queue:
*/
if (p->rt.run_list.prev != p->rt.run_list.next) {
- requeue_task_rt(rq, p);
+ requeue_task_rt(rq, p, 0);
set_tsk_need_resched(p);
}
}
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index a272d78185e..7bd8d1aadd5 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -13,6 +13,7 @@
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
+#include <linux/lockdep.h>
#include <linux/notifier.h>
#include <linux/module.h>
@@ -25,7 +26,22 @@ static DEFINE_PER_CPU(unsigned long, print_timestamp);
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
static int __read_mostly did_panic;
-unsigned long __read_mostly softlockup_thresh = 60;
+int __read_mostly softlockup_thresh = 60;
+
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * soft-lockup occurs:
+ */
+unsigned int __read_mostly softlockup_panic =
+ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+
+static int __init softlockup_panic_setup(char *str)
+{
+ softlockup_panic = simple_strtoul(str, NULL, 0);
+
+ return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
static int
softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -84,6 +100,14 @@ void softlockup_tick(void)
struct pt_regs *regs = get_irq_regs();
unsigned long now;
+ /* Is detection switched off? */
+ if (!per_cpu(watchdog_task, this_cpu) || softlockup_thresh <= 0) {
+ /* Be sure we don't false trigger if switched back on */
+ if (touch_timestamp)
+ per_cpu(touch_timestamp, this_cpu) = 0;
+ return;
+ }
+
if (touch_timestamp == 0) {
__touch_softlockup_watchdog();
return;
@@ -92,11 +116,8 @@ void softlockup_tick(void)
print_timestamp = per_cpu(print_timestamp, this_cpu);
/* report at most once a second */
- if ((print_timestamp >= touch_timestamp &&
- print_timestamp < (touch_timestamp + 1)) ||
- did_panic || !per_cpu(watchdog_task, this_cpu)) {
+ if (print_timestamp == touch_timestamp || did_panic)
return;
- }
/* do not print during early bootup: */
if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -106,8 +127,11 @@ void softlockup_tick(void)
now = get_timestamp(this_cpu);
- /* Wake up the high-prio watchdog task every second: */
- if (now > (touch_timestamp + 1))
+ /*
+ * Wake up the high-prio watchdog task twice per
+ * threshold timespan.
+ */
+ if (now > touch_timestamp + softlockup_thresh/2)
wake_up_process(per_cpu(watchdog_task, this_cpu));
/* Warn about unreasonable delays: */
@@ -121,11 +145,15 @@ void softlockup_tick(void)
this_cpu, now - touch_timestamp,
current->comm, task_pid_nr(current));
print_modules();
+ print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
spin_unlock(&print_lock);
+
+ if (softlockup_panic)
+ panic("softlockup: hung tasks");
}
/*
@@ -178,6 +206,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
t->last_switch_timestamp = now;
touch_nmi_watchdog();
+
+ if (softlockup_panic)
+ panic("softlockup: blocked tasks");
}
/*
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index ba9b2054ecb..738b411ff2d 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -33,8 +33,9 @@ static int stopmachine(void *cpu)
{
int irqs_disabled = 0;
int prepared = 0;
+ cpumask_of_cpu_ptr(cpumask, (int)(long)cpu);
- set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
+ set_cpus_allowed_ptr(current, cpumask);
/* Ack: we are alive */
smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 5b9b467de07..bd66ac5406f 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -31,6 +31,7 @@ cond_syscall(sys_socketpair);
cond_syscall(sys_bind);
cond_syscall(sys_listen);
cond_syscall(sys_accept);
+cond_syscall(sys_paccept);
cond_syscall(sys_connect);
cond_syscall(sys_getsockname);
cond_syscall(sys_getpeername);
@@ -59,6 +60,7 @@ cond_syscall(sys_epoll_create);
cond_syscall(sys_epoll_ctl);
cond_syscall(sys_epoll_wait);
cond_syscall(sys_epoll_pwait);
+cond_syscall(compat_sys_epoll_pwait);
cond_syscall(sys_semget);
cond_syscall(sys_semop);
cond_syscall(sys_semtimedop);
@@ -94,6 +96,7 @@ cond_syscall(sys_keyctl);
cond_syscall(compat_sys_keyctl);
cond_syscall(compat_sys_socketcall);
cond_syscall(sys_inotify_init);
+cond_syscall(sys_inotify_init1);
cond_syscall(sys_inotify_add_watch);
cond_syscall(sys_inotify_rm_watch);
cond_syscall(sys_migrate_pages);
@@ -154,6 +157,7 @@ cond_syscall(sys_ioprio_get);
/* New file descriptors */
cond_syscall(sys_signalfd);
+cond_syscall(sys_signalfd4);
cond_syscall(compat_sys_signalfd);
cond_syscall(sys_timerfd_create);
cond_syscall(sys_timerfd_settime);
@@ -161,3 +165,4 @@ cond_syscall(sys_timerfd_gettime);
cond_syscall(compat_sys_timerfd_settime);
cond_syscall(compat_sys_timerfd_gettime);
cond_syscall(sys_eventfd);
+cond_syscall(sys_eventfd2);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6b16e16428d..1a8299d1fe5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -43,6 +43,7 @@
#include <linux/limits.h>
#include <linux/dcache.h>
#include <linux/syscalls.h>
+#include <linux/vmstat.h>
#include <linux/nfs_fs.h>
#include <linux/acpi.h>
#include <linux/reboot.h>
@@ -80,7 +81,6 @@ extern int sysctl_drop_caches;
extern int percpu_pagelist_fraction;
extern int compat_log;
extern int maps_protect;
-extern int sysctl_stat_interval;
extern int latencytop_enabled;
extern int sysctl_nr_open_min, sysctl_nr_open_max;
#ifdef CONFIG_RCU_TORTURE_TEST
@@ -88,12 +88,13 @@ extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
/* Constants used for minimum and maximum */
-#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
static int one = 1;
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
static int sixty = 60;
+static int neg_one = -1;
#endif
#ifdef CONFIG_MMU
@@ -110,7 +111,7 @@ static int min_percpu_pagelist_fract = 8;
static int ngroups_max = NGROUPS_MAX;
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
extern char modprobe_path[];
#endif
#ifdef CONFIG_CHR_DEV_SG
@@ -475,7 +476,7 @@ static struct ctl_table kern_table[] = {
.proc_handler = &ftrace_enable_sysctl,
},
#endif
-#ifdef CONFIG_KMOD
+#ifdef CONFIG_MODULES
{
.ctl_name = KERN_MODPROBE,
.procname = "modprobe",
@@ -739,13 +740,24 @@ static struct ctl_table kern_table[] = {
#ifdef CONFIG_DETECT_SOFTLOCKUP
{
.ctl_name = CTL_UNNUMBERED,
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
.procname = "softlockup_thresh",
.data = &softlockup_thresh,
- .maxlen = sizeof(unsigned long),
+ .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_doulongvec_minmax,
+ .proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
- .extra1 = &one,
+ .extra1 = &neg_one,
.extra2 = &sixty,
},
{
@@ -947,7 +959,7 @@ static struct ctl_table vm_table[] = {
#ifdef CONFIG_HUGETLB_PAGE
{
.procname = "nr_hugepages",
- .data = &max_huge_pages,
+ .data = NULL,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &hugetlb_sysctl_handler,
@@ -973,10 +985,12 @@ static struct ctl_table vm_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nr_overcommit_hugepages",
- .data = &sysctl_overcommit_huge_pages,
- .maxlen = sizeof(sysctl_overcommit_huge_pages),
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = &hugetlb_overcommit_handler,
+ .extra1 = (void *)&hugetlb_zero,
+ .extra2 = (void *)&hugetlb_infinity,
},
#endif
{
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 4a23517169a..06b17547f4e 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -301,7 +301,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
return -EINVAL;
if (isadd == REGISTER) {
- for_each_cpu_mask(cpu, mask) {
+ for_each_cpu_mask_nr(cpu, mask) {
s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
cpu_to_node(cpu));
if (!s)
@@ -320,7 +320,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
/* Deregister or cleanup */
cleanup:
- for_each_cpu_mask(cpu, mask) {
+ for_each_cpu_mask_nr(cpu, mask) {
listeners = &per_cpu(listener_array, cpu);
down_write(&listeners->sem);
list_for_each_entry_safe(s, tmp, &listeners->list, list) {
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index dadde5361f3..093d4acf993 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -145,9 +145,9 @@ static void clocksource_watchdog(unsigned long data)
* Cycle through CPUs to check if the CPUs stay
* synchronized to each other.
*/
- int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map);
+ int next_cpu = next_cpu_nr(raw_smp_processor_id(), cpu_online_map);
- if (next_cpu >= NR_CPUS)
+ if (next_cpu >= nr_cpu_ids)
next_cpu = first_cpu(cpu_online_map);
watchdog_timer.expires += WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer, next_cpu);
@@ -376,7 +376,8 @@ void clocksource_unregister(struct clocksource *cs)
* Provides sysfs interface for listing current clocksource.
*/
static ssize_t
-sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
+sysfs_show_current_clocksources(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
{
ssize_t count = 0;
@@ -397,6 +398,7 @@ sysfs_show_current_clocksources(struct sys_device *dev, char *buf)
* clocksource selction.
*/
static ssize_t sysfs_override_clocksource(struct sys_device *dev,
+ struct sysdev_attribute *attr,
const char *buf, size_t count)
{
struct clocksource *ovr = NULL;
@@ -449,7 +451,9 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
* Provides sysfs interface for listing registered clocksources
*/
static ssize_t
-sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
+sysfs_show_available_clocksources(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ char *buf)
{
struct clocksource *src;
ssize_t count = 0;
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f48d0f09d32..31463d370b9 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -399,8 +399,7 @@ again:
mask = CPU_MASK_NONE;
now = ktime_get();
/* Find all expired events */
- for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
- cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
+ for_each_cpu_mask_nr(cpu, tick_broadcast_oneshot_mask) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 <= now.tv64)
cpu_set(cpu, mask);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 4f3886562b8..bf43284d685 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -135,7 +135,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
*/
static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
- cpumask_t cpumask)
+ const cpumask_t *cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL;
@@ -169,8 +169,8 @@ static void tick_setup_device(struct tick_device *td,
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
- if (!cpus_equal(newdev->cpumask, cpumask))
- irq_set_affinity(newdev->irq, cpumask);
+ if (!cpus_equal(newdev->cpumask, *cpumask))
+ irq_set_affinity(newdev->irq, *cpumask);
/*
* When global broadcasting is active, check if the current
@@ -196,20 +196,20 @@ static int tick_check_new_device(struct clock_event_device *newdev)
struct tick_device *td;
int cpu, ret = NOTIFY_OK;
unsigned long flags;
- cpumask_t cpumask;
+ cpumask_of_cpu_ptr_declare(cpumask);
spin_lock_irqsave(&tick_device_lock, flags);
cpu = smp_processor_id();
+ cpumask_of_cpu_ptr_next(cpumask, cpu);
if (!cpu_isset(cpu, newdev->cpumask))
goto out_bc;
td = &per_cpu(tick_cpu_device, cpu);
curdev = td->evtdev;
- cpumask = cpumask_of_cpu(cpu);
/* cpu local device ? */
- if (!cpus_equal(newdev->cpumask, cpumask)) {
+ if (!cpus_equal(newdev->cpumask, *cpumask)) {
/*
* If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
* If we have a cpu local device already, do not replace it
* by a non cpu local device
*/
- if (curdev && cpus_equal(curdev->cpumask, cpumask))
+ if (curdev && cpus_equal(curdev->cpumask, *cpumask))
goto out_bc;
}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index beef7ccdf84..942fc7c8528 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -140,8 +140,6 @@ void tick_nohz_update_jiffies(void)
if (!ts->tick_stopped)
return;
- touch_softlockup_watchdog();
-
cpu_clear(cpu, nohz_cpu_mask);
now = ktime_get();
ts->idle_waketime = now;
@@ -149,6 +147,8 @@ void tick_nohz_update_jiffies(void)
local_irq_save(flags);
tick_do_update_jiffies64(now);
local_irq_restore(flags);
+
+ touch_softlockup_watchdog();
}
void tick_nohz_stop_idle(int cpu)
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 2301e1e7c60..63528086337 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -213,7 +213,9 @@ static void start_stack_timers(void)
int cpu;
for_each_online_cpu(cpu) {
- set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ cpumask_of_cpu_ptr(new_mask, cpu);
+
+ set_cpus_allowed_ptr(current, new_mask);
start_stack_timer(cpu);
}
set_cpus_allowed_ptr(current, &saved_mask);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ce7799540c9..6fd158b2102 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -140,7 +140,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
wake_up(&cwq->more_work);
}
-/* Preempt must be disabled. */
static void __queue_work(struct cpu_workqueue_struct *cwq,
struct work_struct *work)
{
@@ -175,6 +174,31 @@ int queue_work(struct workqueue_struct *wq, struct work_struct *work)
}
EXPORT_SYMBOL_GPL(queue_work);
+/**
+ * queue_work_on - queue work on specific cpu
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to a specific CPU, the caller must ensure it
+ * can't go away.
+ */
+int
+queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
+{
+ int ret = 0;
+
+ if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+ BUG_ON(!list_empty(&work->entry));
+ __queue_work(wq_per_cpu(wq, cpu), work);
+ ret = 1;
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_on);
+
static void delayed_work_timer_fn(unsigned long __data)
{
struct delayed_work *dwork = (struct delayed_work *)__data;
@@ -397,7 +421,7 @@ void flush_workqueue(struct workqueue_struct *wq)
might_sleep();
lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
lock_release(&wq->lockdep_map, 1, _THIS_IP_);
- for_each_cpu_mask(cpu, *cpu_map)
+ for_each_cpu_mask_nr(cpu, *cpu_map)
flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
}
EXPORT_SYMBOL_GPL(flush_workqueue);
@@ -477,7 +501,7 @@ static void wait_on_work(struct work_struct *work)
wq = cwq->wq;
cpu_map = wq_cpu_map(wq);
- for_each_cpu_mask(cpu, *cpu_map)
+ for_each_cpu_mask_nr(cpu, *cpu_map)
wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
}
@@ -553,6 +577,19 @@ int schedule_work(struct work_struct *work)
}
EXPORT_SYMBOL(schedule_work);
+/*
+ * schedule_work_on - put work task on a specific cpu
+ * @cpu: cpu to put the work task on
+ * @work: job to be done
+ *
+ * This puts a job on a specific cpu
+ */
+int schedule_work_on(int cpu, struct work_struct *work)
+{
+ return queue_work_on(cpu, keventd_wq, work);
+}
+EXPORT_SYMBOL(schedule_work_on);
+
/**
* schedule_delayed_work - put work task in global workqueue after delay
* @dwork: job to be done
@@ -813,7 +850,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
list_del(&wq->list);
spin_unlock(&workqueue_lock);
- for_each_cpu_mask(cpu, *cpu_map)
+ for_each_cpu_mask_nr(cpu, *cpu_map)
cleanup_workqueue_thread(per_cpu_ptr(wq->cpu_wq, cpu));
put_online_cpus();