diff options
author | Steve French <sfrench@us.ibm.com> | 2005-11-09 14:33:22 -0800 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2005-11-09 14:33:22 -0800 |
commit | e82b3aec8d508d2a925a4c766e97f16b7c4dfb1b (patch) | |
tree | 69d5685ef0c194f651a03e30bff14628b4d45400 /kernel | |
parent | ec58ef03284f0bfa50a04982b74c8c2325a0758e (diff) | |
parent | ad8f76be48d817b48222411ae16a7dfe257bdb24 (diff) |
Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 92 | ||||
-rw-r--r-- | kernel/cpu.c | 33 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 9 | ||||
-rw-r--r-- | kernel/futex.c | 5 | ||||
-rw-r--r-- | kernel/irq/manage.c | 1 | ||||
-rw-r--r-- | kernel/kprobes.c | 134 | ||||
-rw-r--r-- | kernel/module.c | 1 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 6 | ||||
-rw-r--r-- | kernel/power/main.c | 2 | ||||
-rw-r--r-- | kernel/power/power.h | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 100 | ||||
-rw-r--r-- | kernel/power/swsusp.c | 251 | ||||
-rw-r--r-- | kernel/printk.c | 1 | ||||
-rw-r--r-- | kernel/ptrace.c | 84 | ||||
-rw-r--r-- | kernel/sched.c | 164 | ||||
-rw-r--r-- | kernel/softirq.c | 3 | ||||
-rw-r--r-- | kernel/softlockup.c | 6 | ||||
-rw-r--r-- | kernel/sys.c | 26 | ||||
-rw-r--r-- | kernel/sysctl.c | 141 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
21 files changed, 674 insertions, 395 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 2e3f4a47e7d..6312d6bd43e 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -54,6 +54,7 @@ #include <linux/jiffies.h> #include <linux/times.h> #include <linux/syscalls.h> +#include <linux/mount.h> #include <asm/uaccess.h> #include <asm/div64.h> #include <linux/blkdev.h> /* sector_div */ @@ -192,6 +193,7 @@ static void acct_file_reopen(struct file *file) add_timer(&acct_globals.timer); } if (old_acct) { + mnt_unpin(old_acct->f_vfsmnt); spin_unlock(&acct_globals.lock); do_acct_process(0, old_acct); filp_close(old_acct, NULL); @@ -199,6 +201,42 @@ static void acct_file_reopen(struct file *file) } } +static int acct_on(char *name) +{ + struct file *file; + int error; + + /* Difference from BSD - they don't do O_APPEND */ + file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { + filp_close(file, NULL); + return -EACCES; + } + + if (!file->f_op->write) { + filp_close(file, NULL); + return -EIO; + } + + error = security_acct(file); + if (error) { + filp_close(file, NULL); + return error; + } + + spin_lock(&acct_globals.lock); + mnt_pin(file->f_vfsmnt); + acct_file_reopen(file); + spin_unlock(&acct_globals.lock); + + mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */ + + return 0; +} + /** * sys_acct - enable/disable process accounting * @name: file name for accounting records or NULL to shutdown accounting @@ -212,47 +250,41 @@ static void acct_file_reopen(struct file *file) */ asmlinkage long sys_acct(const char __user *name) { - struct file *file = NULL; - char *tmp; int error; if (!capable(CAP_SYS_PACCT)) return -EPERM; if (name) { - tmp = getname(name); - if (IS_ERR(tmp)) { + char *tmp = getname(name); + if (IS_ERR(tmp)) return (PTR_ERR(tmp)); - } - /* Difference from BSD - they don't do O_APPEND */ - file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0); + error = acct_on(tmp); putname(tmp); - if (IS_ERR(file)) { - return (PTR_ERR(file)); - } - if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { - filp_close(file, NULL); - return (-EACCES); - } - - if (!file->f_op->write) { - filp_close(file, NULL); - return (-EIO); + } else { + error = security_acct(NULL); + if (!error) { + spin_lock(&acct_globals.lock); + acct_file_reopen(NULL); + spin_unlock(&acct_globals.lock); } } + return error; +} - error = security_acct(file); - if (error) { - if (file) - filp_close(file, NULL); - return error; - } - +/** + * acct_auto_close - turn off a filesystem's accounting if it is on + * @m: vfsmount being shut down + * + * If the accounting is turned on for a file in the subtree pointed to + * to by m, turn accounting off. Done when m is about to die. + */ +void acct_auto_close_mnt(struct vfsmount *m) +{ spin_lock(&acct_globals.lock); - acct_file_reopen(file); + if (acct_globals.file && acct_globals.file->f_vfsmnt == m) + acct_file_reopen(NULL); spin_unlock(&acct_globals.lock); - - return (0); } /** @@ -266,8 +298,8 @@ void acct_auto_close(struct super_block *sb) { spin_lock(&acct_globals.lock); if (acct_globals.file && - acct_globals.file->f_dentry->d_inode->i_sb == sb) { - acct_file_reopen((struct file *)NULL); + acct_globals.file->f_vfsmnt->mnt_sb == sb) { + acct_file_reopen(NULL); } spin_unlock(&acct_globals.lock); } diff --git a/kernel/cpu.c b/kernel/cpu.c index 3619e939182..d61ba88f34e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,24 @@ EXPORT_SYMBOL_GPL(cpucontrol); static struct notifier_block *cpu_chain; +/* + * Used to check by callers if they need to acquire the cpucontrol + * or not to protect a cpu from being removed. Its sometimes required to + * call these functions both for normal operations, and in response to + * a cpu being added/removed. If the context of the call is in the same + * thread context as a CPU hotplug thread, we dont need to take the lock + * since its already protected + * check drivers/cpufreq/cpufreq.c for its usage - Ashok Raj + */ + +int current_in_cpu_hotplug(void) +{ + return (current->flags & PF_HOTPLUG_CPU); +} + +EXPORT_SYMBOL_GPL(current_in_cpu_hotplug); + + /* Need to know about CPUs going up/down? */ int register_cpu_notifier(struct notifier_block *nb) { @@ -94,6 +112,13 @@ int cpu_down(unsigned int cpu) goto out; } + /* + * Leave a trace in current->flags indicating we are already in + * process of performing CPU hotplug. Callers can check if cpucontrol + * is already acquired by current thread, and if so not cause + * a dead lock by not acquiring the lock + */ + current->flags |= PF_HOTPLUG_CPU; err = notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, (void *)(long)cpu); if (err == NOTIFY_BAD) { @@ -146,6 +171,7 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out: + current->flags &= ~PF_HOTPLUG_CPU; unlock_cpu_hotplug(); return err; } @@ -163,6 +189,12 @@ int __devinit cpu_up(unsigned int cpu) ret = -EINVAL; goto out; } + + /* + * Leave a trace in current->flags indicating we are already in + * process of performing CPU hotplug. + */ + current->flags |= PF_HOTPLUG_CPU; ret = notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu); if (ret == NOTIFY_BAD) { printk("%s: attempt to bring up CPU %u failed\n", @@ -185,6 +217,7 @@ out_notify: if (ret != 0) notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); out: + current->flags &= ~PF_HOTPLUG_CPU; up(&cpucontrol); return ret; } diff --git a/kernel/exit.c b/kernel/exit.c index 537394b25e8..452a1d11617 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -28,6 +28,7 @@ #include <linux/cpuset.h> #include <linux/syscalls.h> #include <linux/signal.h> +#include <linux/cn_proc.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -863,6 +864,7 @@ fastcall NORET_TYPE void do_exit(long code) module_put(tsk->binfmt->module); tsk->exit_code = code; + proc_exit_connector(tsk); exit_notify(tsk); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); diff --git a/kernel/fork.c b/kernel/fork.c index 8a069612eac..158710d2256 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include <linux/profile.h> #include <linux/rmap.h> #include <linux/acct.h> +#include <linux/cn_proc.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -469,13 +470,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; - /* - * There are cases where the PTL is held to ensure no - * new threads start up in user mode using an mm, which - * allows optimizing out ipis; the tlb_gather_mmu code - * is an example. - */ - spin_unlock_wait(&oldmm->page_table_lock); goto good_mm; } @@ -1143,6 +1137,7 @@ static task_t *copy_process(unsigned long clone_flags, __get_cpu_var(process_counts)++; } + proc_fork_connector(p); if (!current->signal->tty && p->signal->tty) p->signal->tty = NULL; diff --git a/kernel/futex.c b/kernel/futex.c index 3b4d5ad44cc..aca8d10704f 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -365,6 +365,11 @@ retry: if (bh1 != bh2) spin_unlock(&bh2->lock); + if (unlikely(op_ret != -EFAULT)) { + ret = op_ret; + goto out; + } + /* futex_atomic_op_inuser needs to both read and write * *(int __user *)uaddr2, but we can't modify it * non-atomically. Therefore, if get_user below is not diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1cfdb08ddf2..3bd7226d15f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -24,6 +24,7 @@ cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for * * This function waits for any pending IRQ handlers for this interrupt * to complete before returning. If you use this function while diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ce4915dd683..5beda378cc7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -32,7 +32,6 @@ * <prasanna@in.ibm.com> added function-return probes. */ #include <linux/kprobes.h> -#include <linux/spinlock.h> #include <linux/hash.h> #include <linux/init.h> #include <linux/slab.h> @@ -49,9 +48,9 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; -unsigned int kprobe_cpu = NR_CPUS; -static DEFINE_SPINLOCK(kprobe_lock); -static struct kprobe *curr_kprobe; +static DEFINE_SPINLOCK(kprobe_lock); /* Protects kprobe_table */ +DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ +static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; /* * kprobe->ainsn.insn points to the copy of the instruction to be @@ -153,50 +152,31 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) } } -/* Locks kprobe: irqs must be disabled */ -void __kprobes lock_kprobes(void) +/* We have preemption disabled.. so it is safe to use __ versions */ +static inline void set_kprobe_instance(struct kprobe *kp) { - unsigned long flags = 0; - - /* Avoiding local interrupts to happen right after we take the kprobe_lock - * and before we get a chance to update kprobe_cpu, this to prevent - * deadlock when we have a kprobe on ISR routine and a kprobe on task - * routine - */ - local_irq_save(flags); - - spin_lock(&kprobe_lock); - kprobe_cpu = smp_processor_id(); - - local_irq_restore(flags); + __get_cpu_var(kprobe_instance) = kp; } -void __kprobes unlock_kprobes(void) +static inline void reset_kprobe_instance(void) { - unsigned long flags = 0; - - /* Avoiding local interrupts to happen right after we update - * kprobe_cpu and before we get a a chance to release kprobe_lock, - * this to prevent deadlock when we have a kprobe on ISR routine and - * a kprobe on task routine - */ - local_irq_save(flags); - - kprobe_cpu = NR_CPUS; - spin_unlock(&kprobe_lock); - - local_irq_restore(flags); + __get_cpu_var(kprobe_instance) = NULL; } -/* You have to be holding the kprobe_lock */ +/* + * This routine is called either: + * - under the kprobe_lock spinlock - during kprobe_[un]register() + * OR + * - with preemption disabled - from arch/xxx/kernel/kprobes.c + */ struct kprobe __kprobes *get_kprobe(void *addr) { struct hlist_head *head; struct hlist_node *node; + struct kprobe *p; head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; - hlist_for_each(node, head) { - struct kprobe *p = hlist_entry(node, struct kprobe, hlist); + hlist_for_each_entry_rcu(p, node, head, hlist) { if (p->addr == addr) return p; } @@ -211,13 +191,13 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *kp; - list_for_each_entry(kp, &p->list, list) { + list_for_each_entry_rcu(kp, &p->list, list) { if (kp->pre_handler) { - curr_kprobe = kp; + set_kprobe_instance(kp); if (kp->pre_handler(kp, regs)) return 1; } - curr_kprobe = NULL; + reset_kprobe_instance(); } return 0; } @@ -227,11 +207,11 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, { struct kprobe *kp; - list_for_each_entry(kp, &p->list, list) { + list_for_each_entry_rcu(kp, &p->list, list) { if (kp->post_handler) { - curr_kprobe = kp; + set_kprobe_instance(kp); kp->post_handler(kp, regs, flags); - curr_kprobe = NULL; + reset_kprobe_instance(); } } return; @@ -240,12 +220,14 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr) { + struct kprobe *cur = __get_cpu_var(kprobe_instance); + /* * if we faulted "during" the execution of a user specified * probe handler, invoke just that probe's fault handler */ - if (curr_kprobe && curr_kprobe->fault_handler) { - if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr)) + if (cur && cur->fault_handler) { + if (cur->fault_handler(cur, regs, trapnr)) return 1; } return 0; @@ -253,17 +235,18 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) { - struct kprobe *kp = curr_kprobe; - if (curr_kprobe && kp->break_handler) { - if (kp->break_handler(kp, regs)) { - curr_kprobe = NULL; - return 1; - } + struct kprobe *cur = __get_cpu_var(kprobe_instance); + int ret = 0; + + if (cur && cur->break_handler) { + if (cur->break_handler(cur, regs)) + ret = 1; } - curr_kprobe = NULL; - return 0; + reset_kprobe_instance(); + return ret; } +/* Called with kretprobe_lock held */ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) { struct hlist_node *node; @@ -273,6 +256,7 @@ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) return NULL; } +/* Called with kretprobe_lock held */ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe *rp) { @@ -283,6 +267,7 @@ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe return NULL; } +/* Called with kretprobe_lock held */ void __kprobes add_rp_inst(struct kretprobe_instance *ri) { /* @@ -301,6 +286,7 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri) hlist_add_head(&ri->uflist, &ri->rp->used_instances); } +/* Called with kretprobe_lock held */ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) { /* remove rp inst off the rprobe_inst_table */ @@ -334,13 +320,13 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) struct hlist_node *node, *tmp; unsigned long flags = 0; - spin_lock_irqsave(&kprobe_lock, flags); + spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { if (ri->task == tk) recycle_rp_inst(ri); } - spin_unlock_irqrestore(&kprobe_lock, flags); + spin_unlock_irqrestore(&kretprobe_lock, flags); } /* @@ -351,9 +337,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) { struct kretprobe *rp = container_of(p, struct kretprobe, kp); + unsigned long flags = 0; /*TODO: consider to only swap the RA after the last pre_handler fired */ + spin_lock_irqsave(&kretprobe_lock, flags); arch_prepare_kretprobe(rp, regs); + spin_unlock_irqrestore(&kretprobe_lock, flags); return 0; } @@ -384,13 +373,13 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) struct kprobe *kp; if (p->break_handler) { - list_for_each_entry(kp, &old_p->list, list) { + list_for_each_entry_rcu(kp, &old_p->list, list) { if (kp->break_handler) return -EEXIST; } - list_add_tail(&p->list, &old_p->list); + list_add_tail_rcu(&p->list, &old_p->list); } else - list_add(&p->list, &old_p->list); + list_add_rcu(&p->list, &old_p->list); return 0; } @@ -408,18 +397,18 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) ap->break_handler = aggr_break_handler; INIT_LIST_HEAD(&ap->list); - list_add(&p->list, &ap->list); + list_add_rcu(&p->list, &ap->list); INIT_HLIST_NODE(&ap->hlist); - hlist_del(&p->hlist); - hlist_add_head(&ap->hlist, + hlist_del_rcu(&p->hlist); + hlist_add_head_rcu(&ap->hlist, &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); } /* * This is the second or subsequent kprobe at the address - handle * the intricacies - * TODO: Move kcalloc outside the spinlock + * TODO: Move kcalloc outside the spin_lock */ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) @@ -445,7 +434,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) { arch_disarm_kprobe(p); - hlist_del(&p->hlist); + hlist_del_rcu(&p->hlist); spin_unlock_irqrestore(&kprobe_lock, flags); arch_remove_kprobe(p); } @@ -453,11 +442,10 @@ static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) static inline void cleanup_aggr_kprobe(struct kprobe *old_p, struct kprobe *p, unsigned long flags) { - list_del(&p->list); - if (list_empty(&old_p->list)) { + list_del_rcu(&p->list); + if (list_empty(&old_p->list)) cleanup_kprobe(old_p, flags); - kfree(old_p); - } else + else spin_unlock_irqrestore(&kprobe_lock, flags); } @@ -480,9 +468,9 @@ int __kprobes register_kprobe(struct kprobe *p) if ((ret = arch_prepare_kprobe(p)) != 0) goto rm_kprobe; + p->nmissed = 0; spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); - p->nmissed = 0; if (old_p) { ret = register_aggr_kprobe(old_p, p); goto out; @@ -490,7 +478,7 @@ int __kprobes register_kprobe(struct kprobe *p) arch_copy_kprobe(p); INIT_HLIST_NODE(&p->hlist); - hlist_add_head(&p->hlist, + hlist_add_head_rcu(&p->hlist, &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); arch_arm_kprobe(p); @@ -511,10 +499,16 @@ void __kprobes unregister_kprobe(struct kprobe *p) spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); if (old_p) { + /* cleanup_*_kprobe() does the spin_unlock_irqrestore */ if (old_p->pre_handler == aggr_pre_handler) cleanup_aggr_kprobe(old_p, p, flags); else cleanup_kprobe(p, flags); + + synchronize_sched(); + if (old_p->pre_handler == aggr_pre_handler && + list_empty(&old_p->list)) + kfree(old_p); } else spin_unlock_irqrestore(&kprobe_lock, flags); } @@ -591,13 +585,13 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp) unregister_kprobe(&rp->kp); /* No race here */ - spin_lock_irqsave(&kprobe_lock, flags); + spin_lock_irqsave(&kretprobe_lock, flags); free_rp_inst(rp); while ((ri = get_used_rp_inst(rp)) != NULL) { ri->rp = NULL; hlist_del(&ri->uflist); } - spin_unlock_irqrestore(&kprobe_lock, flags); + spin_unlock_irqrestore(&kretprobe_lock, flags); } static int __init init_kprobes(void) diff --git a/kernel/module.c b/kernel/module.c index ff5c500ab62..2ea929d51ad 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -37,6 +37,7 @@ #include <linux/stop_machine.h> #include <linux/device.h> #include <linux/string.h> +#include <linux/sched.h> #include <asm/uaccess.h> #include <asm/semaphore.h> #include <asm/cacheflush.h> diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 91a89426494..84af54c39e1 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -497,7 +497,7 @@ static void process_timer_rebalance(struct task_struct *p, left = cputime_div(cputime_sub(expires.cpu, val.cpu), nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ticks = cputime_add(prof_ticks(t), left); if (cputime_eq(t->it_prof_expires, cputime_zero) || @@ -512,7 +512,7 @@ static void process_timer_rebalance(struct task_struct *p, left = cputime_div(cputime_sub(expires.cpu, val.cpu), nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ticks = cputime_add(virt_ticks(t), left); if (cputime_eq(t->it_virt_expires, cputime_zero) || @@ -527,7 +527,7 @@ static void process_timer_rebalance(struct task_struct *p, nsleft = expires.sched - val.sched; do_div(nsleft, nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ns = t->sched_time + nsleft; if (t->it_sched_expires == 0 || t->it_sched_expires > ns) { diff --git a/kernel/power/main.c b/kernel/power/main.c index 18d7d693fbb..6ee2cad530e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -167,7 +167,7 @@ static int enter_state(suspend_state_t state) { int error; - if (pm_ops->valid && !pm_ops->valid(state)) + if (pm_ops && pm_ops->valid && !pm_ops->valid(state)) return -ENODEV; if (down_trylock(&pm_sem)) return -EBUSY; diff --git a/kernel/power/power.h b/kernel/power/power.h index d4fd96a135a..6c042b5ee14 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -65,8 +65,8 @@ extern suspend_pagedir_t *pagedir_save; extern asmlinkage int swsusp_arch_suspend(void); extern asmlinkage int swsusp_arch_resume(void); -extern int restore_highmem(void); -extern struct pbe * alloc_pagedir(unsigned nr_pages); +extern void free_pagedir(struct pbe *pblist); +extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed); extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages); extern void swsusp_free(void); -extern int enough_swap(unsigned nr_pages); +extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 42a62870439..4a6dbcefd37 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -88,8 +88,7 @@ static int save_highmem_zone(struct zone *zone) return 0; } - -static int save_highmem(void) +int save_highmem(void) { struct zone *zone; int res = 0; @@ -120,11 +119,7 @@ int restore_highmem(void) } return 0; } -#else -static int save_highmem(void) { return 0; } -int restore_highmem(void) { return 0; } -#endif /* CONFIG_HIGHMEM */ - +#endif static int pfn_is_nosave(unsigned long pfn) { @@ -168,9 +163,8 @@ static unsigned count_data_pages(void) { struct zone *zone; unsigned long zone_pfn; - unsigned n; + unsigned int n = 0; - n = 0; for_each_zone (zone) { if (is_highmem(zone)) continue; @@ -217,7 +211,7 @@ static void copy_data_pages(struct pbe *pblist) * free_pagedir - free pages allocated with alloc_pagedir() */ -static void free_pagedir(struct pbe *pblist) +void free_pagedir(struct pbe *pblist) { struct pbe *pbe; @@ -250,10 +244,10 @@ static inline void fill_pb_page(struct pbe *pbpage) * of memory pages allocated with alloc_pagedir() */ -void create_pbe_list(struct pbe *pblist, unsigned nr_pages) +void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) { struct pbe *pbpage, *p; - unsigned num = PBES_PER_PAGE; + unsigned int num = PBES_PER_PAGE; for_each_pb_page (pbpage, pblist) { if (num >= nr_pages) @@ -270,9 +264,30 @@ void create_pbe_list(struct pbe *pblist, unsigned nr_pages) pr_debug("create_pbe_list(): initialized %d PBEs\n", num); } -static void *alloc_image_page(void) +/** + * @safe_needed - on resume, for storing the PBE list and the image, + * we can only use memory pages that do not conflict with the pages + * which had been used before suspend. + * + * The unsafe pages are marked with the PG_nosave_free flag + * + * Allocated but unusable (ie eaten) memory pages should be marked + * so that swsusp_free() can release them + */ + +static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed) { - void *res = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD); + void *res; + + if (safe_needed) + do { + res = (void *)get_zeroed_page(gfp_mask); + if (res && PageNosaveFree(virt_to_page(res))) + /* This is for swsusp_free() */ + SetPageNosave(virt_to_page(res)); + } while (res && PageNosaveFree(virt_to_page(res))); + else + res = (void *)get_zeroed_page(gfp_mask); if (res) { SetPageNosave(virt_to_page(res)); SetPageNosaveFree(virt_to_page(res)); @@ -280,6 +295,11 @@ static void *alloc_image_page(void) return res; } +unsigned long get_safe_page(gfp_t gfp_mask) +{ + return (unsigned long)alloc_image_page(gfp_mask, 1); +} + /** * alloc_pagedir - Allocate the page directory. * @@ -293,21 +313,21 @@ static void *alloc_image_page(void) * On each page we set up a list of struct_pbe elements. */ -struct pbe *alloc_pagedir(unsigned nr_pages) +struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed) { - unsigned num; + unsigned int num; struct pbe *pblist, *pbe; if (!nr_pages) return NULL; pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages); - pblist = alloc_image_page(); + pblist = alloc_image_page(gfp_mask, safe_needed); /* FIXME: rewrite this ugly loop */ for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages; pbe = pbe->next, num += PBES_PER_PAGE) { pbe += PB_PAGE_SKIP; - pbe->next = alloc_image_page(); + pbe->next = alloc_image_page(gfp_mask, safe_needed); } if (!pbe) { /* get_zeroed_page() failed */ free_pagedir(pblist); @@ -329,7 +349,7 @@ void swsusp_free(void) for_each_zone(zone) { for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { - struct page * page; + struct page *page; page = pfn_to_page(zone_pfn + zone->zone_start_pfn); if (PageNosave(page) && PageNosaveFree(page)) { ClearPageNosave(page); @@ -348,31 +368,39 @@ void swsusp_free(void) * free pages. */ -static int enough_free_mem(unsigned nr_pages) +static int enough_free_mem(unsigned int nr_pages) { pr_debug("swsusp: available memory: %u pages\n", nr_free_pages()); return nr_free_pages() > (nr_pages + PAGES_FOR_IO + (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); } +int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed) +{ + struct pbe *p; -static struct pbe *swsusp_alloc(unsigned nr_pages) + for_each_pbe (p, pblist) { + p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed); + if (!p->address) + return -ENOMEM; + } + return 0; +} + +static struct pbe *swsusp_alloc(unsigned int nr_pages) { - struct pbe *pblist, *p; + struct pbe *pblist; - if (!(pblist = alloc_pagedir(nr_pages))) { + if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) { printk(KERN_ERR "suspend: Allocating pagedir failed.\n"); return NULL; } create_pbe_list(pblist, nr_pages); - for_each_pbe (p, pblist) { - p->address = (unsigned long)alloc_image_page(); - if (!p->address) { - printk(KERN_ERR "suspend: Allocating image pages failed.\n"); - swsusp_free(); - return NULL; - } + if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) { + printk(KERN_ERR "suspend: Allocating image pages failed.\n"); + swsusp_free(); + return NULL; } return pblist; @@ -380,14 +408,9 @@ static struct pbe *swsusp_alloc(unsigned nr_pages) asmlinkage int swsusp_save(void) { - unsigned nr_pages; + unsigned int nr_pages; pr_debug("swsusp: critical section: \n"); - if (save_highmem()) { - printk(KERN_CRIT "swsusp: Not enough free pages for highmem\n"); - restore_highmem(); - return -ENOMEM; - } drain_local_pages(); nr_pages = count_data_pages(); @@ -407,11 +430,6 @@ asmlinkage int swsusp_save(void) return -ENOMEM; } - if (!enough_swap(nr_pages)) { - printk(KERN_ERR "swsusp: Not enough free swap\n"); - return -ENOSPC; - } - pagedir_nosave = swsusp_alloc(nr_pages); if (!pagedir_nosave) return -ENOMEM; diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 12db1d2ad61..c05f46e7348 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -73,6 +73,14 @@ #include "power.h" +#ifdef CONFIG_HIGHMEM +int save_highmem(void); +int restore_highmem(void); +#else +static int save_highmem(void) { return 0; } +static int restore_highmem(void) { return 0; } +#endif + #define CIPHER "aes" #define MAXKEY 32 #define MAXIV 32 @@ -85,18 +93,11 @@ unsigned int nr_copy_pages __nosavedata = 0; /* Suspend pagedir is allocated before final copy, therefore it must be freed after resume - Warning: this is evil. There are actually two pagedirs at time of - resume. One is "pagedir_save", which is empty frame allocated at - time of suspend, that must be freed. Second is "pagedir_nosave", - allocated at time of resume, that travels through memory not to - collide with anything. - Warning: this is even more evil than it seems. Pagedirs this file talks about are completely different from page directories used by MMU hardware. */ suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; -suspend_pagedir_t *pagedir_save; #define SWSUSP_SIG "S1SUSPEND" @@ -122,8 +123,8 @@ static struct swsusp_info swsusp_info; static unsigned short swapfile_used[MAX_SWAPFILES]; static unsigned short root_swap; -static int write_page(unsigned long addr, swp_entry_t * loc); -static int bio_read_page(pgoff_t page_off, void * page); +static int write_page(unsigned long addr, swp_entry_t *loc); +static int bio_read_page(pgoff_t page_off, void *page); static u8 key_iv[MAXKEY+MAXIV]; @@ -355,7 +356,7 @@ static void lock_swapdevices(void) * This is a partial improvement, since we will at least return other * errors, though we need to eventually fix the damn code. */ -static int write_page(unsigned long addr, swp_entry_t * loc) +static int write_page(unsigned long addr, swp_entry_t *loc) { swp_entry_t entry; int error = 0; @@ -383,9 +384,9 @@ static int write_page(unsigned long addr, swp_entry_t * loc) static void data_free(void) { swp_entry_t entry; - struct pbe * p; + struct pbe *p; - for_each_pbe(p, pagedir_nosave) { + for_each_pbe (p, pagedir_nosave) { entry = p->swap_address; if (entry.val) swap_free(entry); @@ -492,8 +493,8 @@ static void free_pagedir_entries(void) static int write_pagedir(void) { int error = 0; - unsigned n = 0; - struct pbe * pbe; + unsigned int n = 0; + struct pbe *pbe; printk( "Writing pagedir..."); for_each_pb_page (pbe, pagedir_nosave) { @@ -507,6 +508,26 @@ static int write_pagedir(void) } /** + * enough_swap - Make sure we have enough swap to save the image. + * + * Returns TRUE or FALSE after checking the total amount of swap + * space avaiable. + * + * FIXME: si_swapinfo(&i) returns all swap devices information. + * We should only consider resume_device. + */ + +static int enough_swap(unsigned int nr_pages) +{ + struct sysinfo i; + + si_swapinfo(&i); + pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); + return i.freeswap > (nr_pages + PAGES_FOR_IO + + (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); +} + +/** * write_suspend_image - Write entire image and metadata. * */ @@ -514,6 +535,11 @@ static int write_suspend_image(void) { int error; + if (!enough_swap(nr_copy_pages)) { + printk(KERN_ERR "swsusp: Not enough free swap\n"); + return -ENOSPC; + } + init_header(); if ((error = data_write())) goto FreeData; @@ -533,27 +559,6 @@ static int write_suspend_image(void) goto Done; } -/** - * enough_swap - Make sure we have enough swap to save the image. - * - * Returns TRUE or FALSE after checking the total amount of swap - * space avaiable. - * - * FIXME: si_swapinfo(&i) returns all swap devices information. - * We should only consider resume_device. - */ - -int enough_swap(unsigned nr_pages) -{ - struct sysinfo i; - - si_swapinfo(&i); - pr_debug("swsusp: available swap: %lu pages\n", i.freeswap); - return i.freeswap > (nr_pages + PAGES_FOR_IO + - (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE); -} - - /* It is important _NOT_ to umount filesystems at this point. We want * them synced (in case something goes wrong) but we DO not want to mark * filesystem clean: it is not. (And it does not matter, if we resume @@ -563,12 +568,15 @@ int swsusp_write(void) { int error; + if ((error = swsusp_swap_check())) { + printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); + return error; + } lock_swapdevices(); error = write_suspend_image(); /* This will unlock ignored swap devices since writing is finished */ lock_swapdevices(); return error; - } @@ -576,6 +584,7 @@ int swsusp_write(void) int swsusp_suspend(void) { int error; + if ((error = arch_prepare_suspend())) return error; local_irq_disable(); @@ -587,15 +596,12 @@ int swsusp_suspend(void) */ if ((error = device_power_down(PMSG_FREEZE))) { printk(KERN_ERR "Some devices failed to power down, aborting suspend\n"); - local_irq_enable(); - return error; + goto Enable_irqs; } - if ((error = swsusp_swap_check())) { - printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n"); - device_power_up(); - local_irq_enable(); - return error; + if ((error = save_highmem())) { + printk(KERN_ERR "swsusp: Not enough free pages for highmem\n"); + goto Restore_highmem; } save_processor_state(); @@ -603,8 +609,10 @@ int swsusp_suspend(void) printk(KERN_ERR "Error %d suspending\n", error); /* Restore control flow magically appears here */ restore_processor_state(); +Restore_highmem: restore_highmem(); device_power_up(); +Enable_irqs: local_irq_enable(); return error; } @@ -636,127 +644,43 @@ int swsusp_resume(void) } /** - * On resume, for storing the PBE list and the image, - * we can only use memory pages that do not conflict with the pages - * which had been used before suspend. - * - * We don't know which pages are usable until we allocate them. - * - * Allocated but unusable (ie eaten) memory pages are marked so that - * swsusp_free() can release them - */ - -unsigned long get_safe_page(gfp_t gfp_mask) -{ - unsigned long m; - - do { - m = get_zeroed_page(gfp_mask); - if (m && PageNosaveFree(virt_to_page(m))) - /* This is for swsusp_free() */ - SetPageNosave(virt_to_page(m)); - } while (m && PageNosaveFree(virt_to_page(m))); - if (m) { - /* This is for swsusp_free() */ - SetPageNosave(virt_to_page(m)); - SetPageNosaveFree(virt_to_page(m)); - } - return m; -} - -/** - * check_pagedir - We ensure here that pages that the PBEs point to - * won't collide with pages where we're going to restore from the loaded - * pages later - */ - -static int check_pagedir(struct pbe *pblist) -{ - struct pbe *p; - - /* This is necessary, so that we can free allocated pages - * in case of failure - */ - for_each_pbe (p, pblist) - p->address = 0UL; - - for_each_pbe (p, pblist) { - p->address = get_safe_page(GFP_ATOMIC); - if (!p->address) - return -ENOMEM; - } - return 0; -} - -/** - * swsusp_pagedir_relocate - It is possible, that some memory pages - * occupied by the list of PBEs collide with pages where we're going to - * restore from the loaded pages later. We relocate them here. + * mark_unsafe_pages - mark the pages that cannot be used for storing + * the image during resume, because they conflict with the pages that + * had been used before suspend */ -static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) +static void mark_unsafe_pages(struct pbe *pblist) { struct zone *zone; unsigned long zone_pfn; - struct pbe *pbpage, *tail, *p; - void *m; - int rel = 0; + struct pbe *p; if (!pblist) /* a sanity check */ - return NULL; - - pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n", - swsusp_info.pagedir_pages); + return; /* Clear page flags */ - for_each_zone (zone) { - for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) - if (pfn_valid(zone_pfn + zone->zone_start_pfn)) - ClearPageNosaveFree(pfn_to_page(zone_pfn + + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) + if (pfn_valid(zone_pfn + zone->zone_start_pfn)) + ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn)); } /* Mark orig addresses */ - for_each_pbe (p, pblist) SetPageNosaveFree(virt_to_page(p->orig_address)); - tail = pblist + PB_PAGE_SKIP; - - /* Relocate colliding pages */ - - for_each_pb_page (pbpage, pblist) { - if (PageNosaveFree(virt_to_page((unsigned long)pbpage))) { - m = (void *)get_safe_page(GFP_ATOMIC | __GFP_COLD); - if (!m) - return NULL; - memcpy(m, (void *)pbpage, PAGE_SIZE); - if (pbpage == pblist) - pblist = (struct pbe *)m; - else - tail->next = (struct pbe *)m; - pbpage = (struct pbe *)m; - - /* We have to link the PBEs again */ - for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++) - if (p->next) /* needed to save the end */ - p->next = p + 1; - - rel++; - } - tail = pbpage + PB_PAGE_SKIP; - } +} - /* This is for swsusp_free() */ - for_each_pb_page (pbpage, pblist) { - SetPageNosave(virt_to_page(pbpage)); - SetPageNosaveFree(virt_to_page(pbpage)); +static void copy_page_backup_list(struct pbe *dst, struct pbe *src) +{ + /* We assume both lists contain the same number of elements */ + while (src) { + dst->orig_address = src->orig_address; + dst->swap_address = src->swap_address; + dst = dst->next; + src = src->next; } - - printk("swsusp: Relocated %d pages\n", rel); - - return pblist; } /* @@ -770,7 +694,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) static atomic_t io_done = ATOMIC_INIT(0); -static int end_io(struct bio * bio, unsigned int num, int err) +static int end_io(struct bio *bio, unsigned int num, int err) { if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) panic("I/O error reading memory image"); @@ -778,7 +702,7 @@ static int end_io(struct bio * bio, unsigned int num, int err) return 0; } -static struct block_device * resume_bdev; +static struct block_device *resume_bdev; /** * submit - submit BIO request. @@ -791,10 +715,10 @@ static struct block_device * resume_bdev; * Then submit it and wait. */ -static int submit(int rw, pgoff_t page_off, void * page) +static int submit(int rw, pgoff_t page_off, void *page) { int error = 0; - struct bio * bio; + struct bio *bio; bio = bio_alloc(GFP_ATOMIC, 1); if (!bio) @@ -823,12 +747,12 @@ static int submit(int rw, pgoff_t page_off, void * page) return error; } -static int bio_read_page(pgoff_t page_off, void * page) +static int bio_read_page(pgoff_t page_off, void *page) { return submit(READ, page_off, page); } -static int bio_write_page(pgoff_t page_off, void * page) +static int bio_write_page(pgoff_t page_off, void *page) { return submit(WRITE, page_off, page); } @@ -838,7 +762,7 @@ static int bio_write_page(pgoff_t page_off, void * page) * I really don't think that it's foolproof but more than nothing.. */ -static const char * sanity_check(void) +static const char *sanity_check(void) { dump_info(); if (swsusp_info.version_code != LINUX_VERSION_CODE) @@ -864,7 +788,7 @@ static const char * sanity_check(void) static int check_header(void) { - const char * reason = NULL; + const char *reason = NULL; int error; if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) @@ -895,7 +819,7 @@ static int check_sig(void) * Reset swap signature now. */ error = bio_write_page(0, &swsusp_header); - } else { + } else { return -EINVAL; } if (!error) @@ -912,7 +836,7 @@ static int check_sig(void) static int data_read(struct pbe *pblist) { - struct pbe * p; + struct pbe *p; int error = 0; int i = 0; int mod = swsusp_info.image_pages / 100; @@ -950,7 +874,7 @@ static int data_read(struct pbe *pblist) static int read_pagedir(struct pbe *pblist) { struct pbe *pbpage, *p; - unsigned i = 0; + unsigned int i = 0; int error; if (!pblist) @@ -997,20 +921,25 @@ static int read_suspend_image(void) int error = 0; struct pbe *p; - if (!(p = alloc_pagedir(nr_copy_pages))) + if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0))) return -ENOMEM; if ((error = read_pagedir(p))) return error; - create_pbe_list(p, nr_copy_pages); - - if (!(pagedir_nosave = swsusp_pagedir_relocate(p))) + mark_unsafe_pages(p); + pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1); + if (pagedir_nosave) { + create_pbe_list(pagedir_nosave, nr_copy_pages); + copy_page_backup_list(pagedir_nosave, p); + } + free_pagedir(p); + if (!pagedir_nosave) return -ENOMEM; /* Allocate memory for the image and read the data from swap */ - error = check_pagedir(pagedir_nosave); + error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1); if (!error) error = data_read(pagedir_nosave); diff --git a/kernel/printk.c b/kernel/printk.c index 3cb9708209b..e9be027bc93 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -806,7 +806,6 @@ void console_unblank(void) c->unblank(); release_console_sem(); } -EXPORT_SYMBOL(console_unblank); /* * Return the console tty driver structure and its associated index diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 863eee8bff4..b88d4186cd7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -155,7 +155,7 @@ int ptrace_attach(struct task_struct *task) retval = -EPERM; if (task->pid <= 1) goto bad; - if (task == current) + if (task->tgid == current->tgid) goto bad; /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) @@ -406,3 +406,85 @@ int ptrace_request(struct task_struct *child, long request, return ret; } + +#ifndef __ARCH_SYS_PTRACE +static int ptrace_get_task_struct(long request, long pid, + struct task_struct **childp) +{ + struct task_struct *child; + int ret; + + /* + * Callers use child == NULL as an indication to exit early even + * when the return value is 0, so make sure it is non-NULL here. + */ + *childp = NULL; + + if (request == PTRACE_TRACEME) { + /* + * Are we already being traced? + */ + if (current->ptrace & PT_PTRACED) + return -EPERM; + ret = security_ptrace(current->parent, current); + if (ret) + return -EPERM; + /* + * Set the ptrace bit in the process ptrace flags. + */ + current->ptrace |= PT_PTRACED; + return 0; + } + + /* + * You may not mess with init + */ + if (pid == 1) + return -EPERM; + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + return -ESRCH; + + *childp = child; + return 0; +} + +asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + long ret; + + /* + * This lock_kernel fixes a subtle race with suid exec + */ + lock_kernel(); + ret = ptrace_get_task_struct(request, pid, &child); + if (!child) + goto out; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_put_task_struct; + + ret = arch_ptrace(child, request, addr, data); + if (ret < 0) + goto out_put_task_struct; + + out_put_task_struct: + put_task_struct(child); + out: + unlock_kernel(); + return ret; +} +#endif /* __ARCH_SYS_PTRACE */ diff --git a/kernel/sched.c b/kernel/sched.c index 340dd238c16..ac3f5cc3bb5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -206,6 +206,7 @@ struct runqueue { */ unsigned long nr_running; #ifdef CONFIG_SMP + unsigned long prio_bias; unsigned long cpu_load[3]; #endif unsigned long long nr_switches; @@ -659,13 +660,68 @@ static int effective_prio(task_t *p) return prio; } +#ifdef CONFIG_SMP +static inline void inc_prio_bias(runqueue_t *rq, int prio) +{ + rq->prio_bias += MAX_PRIO - prio; +} + +static inline void dec_prio_bias(runqueue_t *rq, int prio) +{ + rq->prio_bias -= MAX_PRIO - prio; +} + +static inline void inc_nr_running(task_t *p, runqueue_t *rq) +{ + rq->nr_running++; + if (rt_task(p)) { + if (p != rq->migration_thread) + /* + * The migration thread does the actual balancing. Do + * not bias by its priority as the ultra high priority + * will skew balancing adversely. + */ + inc_prio_bias(rq, p->prio); + } else + inc_prio_bias(rq, p->static_prio); +} + +static inline void dec_nr_running(task_t *p, runqueue_t *rq) +{ + rq->nr_running--; + if (rt_task(p)) { + if (p != rq->migration_thread) + dec_prio_bias(rq, p->prio); + } else + dec_prio_bias(rq, p->static_prio); +} +#else +static inline void inc_prio_bias(runqueue_t *rq, int prio) +{ +} + +static inline void dec_prio_bias(runqueue_t *rq, int prio) +{ +} + +static inline void inc_nr_running(task_t *p, runqueue_t *rq) +{ + rq->nr_running++; +} + +static inline void dec_nr_running(task_t *p, runqueue_t *rq) +{ + rq->nr_running--; +} +#endif + /* * __activate_task - move a task to the runqueue. */ static inline void __activate_task(task_t *p, runqueue_t *rq) { enqueue_task(p, rq->active); - rq->nr_running++; + inc_nr_running(p, rq); } /* @@ -674,7 +730,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) static inline void __activate_idle_task(task_t *p, runqueue_t *rq) { enqueue_task_head(p, rq->active); - rq->nr_running++; + inc_nr_running(p, rq); } static int recalc_task_prio(task_t *p, unsigned long long now) @@ -793,7 +849,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) */ static void deactivate_task(struct task_struct *p, runqueue_t *rq) { - rq->nr_running--; + dec_nr_running(p, rq); dequeue_task(p, p->array); p->array = NULL; } @@ -808,21 +864,28 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) #ifdef CONFIG_SMP static void resched_task(task_t *p) { - int need_resched, nrpolling; + int cpu; assert_spin_locked(&task_rq(p)->lock); - /* minimise the chance of sending an interrupt to poll_idle() */ - nrpolling = test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); - need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED); - nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG); + if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED))) + return; + + set_tsk_thread_flag(p, TIF_NEED_RESCHED); + + cpu = task_cpu(p); + if (cpu == smp_processor_id()) + return; - if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) - smp_send_reschedule(task_cpu(p)); + /* NEED_RESCHED must be visible before we test POLLING_NRFLAG */ + smp_mb(); + if (!test_tsk_thread_flag(p, TIF_POLLING_NRFLAG)) + smp_send_reschedule(cpu); } #else static inline void resched_task(task_t *p) { + assert_spin_locked(&task_rq(p)->lock); set_tsk_need_resched(p); } #endif @@ -930,27 +993,61 @@ void kick_process(task_t *p) * We want to under-estimate the load of migration sources, to * balance conservatively. */ -static inline unsigned long source_load(int cpu, int type) +static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) { runqueue_t *rq = cpu_rq(cpu); - unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + unsigned long running = rq->nr_running; + unsigned long source_load, cpu_load = rq->cpu_load[type-1], + load_now = running * SCHED_LOAD_SCALE; + if (type == 0) - return load_now; + source_load = load_now; + else + source_load = min(cpu_load, load_now); + + if (running > 1 || (idle == NOT_IDLE && running)) + /* + * If we are busy rebalancing the load is biased by + * priority to create 'nice' support across cpus. When + * idle rebalancing we should only bias the source_load if + * there is more than one task running on that queue to + * prevent idle rebalance from trying to pull tasks from a + * queue with only one running task. + */ + source_load = source_load * rq->prio_bias / running; + + return source_load; +} - return min(rq->cpu_load[type-1], load_now); +static inline unsigned long source_load(int cpu, int type) +{ + return __source_load(cpu, type, NOT_IDLE); } /* * Return a high guess at the load of a migration-target cpu */ -static inline unsigned long target_load(int cpu, int type) +static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) { runqueue_t *rq = cpu_rq(cpu); - unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + unsigned long running = rq->nr_running; + unsigned long target_load, cpu_load = rq->cpu_load[type-1], + load_now = running * SCHED_LOAD_SCALE; + if (type == 0) - return load_now; + target_load = load_now; + else + target_load = max(cpu_load, load_now); + + if (running > 1 || (idle == NOT_IDLE && running)) + target_load = target_load * rq->prio_bias / running; + + return target_load; +} - return max(rq->cpu_load[type-1], load_now); +static inline unsigned long target_load(int cpu, int type) +{ + return __target_load(cpu, type, NOT_IDLE); } /* @@ -1411,7 +1508,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) list_add_tail(&p->run_list, ¤t->run_list); p->array = current->array; p->array->nr_active++; - rq->nr_running++; + inc_nr_running(p, rq); } set_need_resched(); } else @@ -1468,7 +1565,7 @@ void fastcall sched_exit(task_t *p) * the sleep_avg of the parent as well. */ rq = task_rq_lock(p->parent, &flags); - if (p->first_time_slice) { + if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) { p->parent->time_slice += p->time_slice; if (unlikely(p->parent->time_slice > task_timeslice(p))) p->parent->time_slice = task_timeslice(p); @@ -1756,9 +1853,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) { dequeue_task(p, src_array); - src_rq->nr_running--; + dec_nr_running(p, src_rq); set_task_cpu(p, this_cpu); - this_rq->nr_running++; + inc_nr_running(p, this_rq); enqueue_task(p, this_array); p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) + this_rq->timestamp_last_tick; @@ -1937,9 +2034,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, /* Bias balancing toward cpus of our domain */ if (local_group) - load = target_load(i, load_idx); + load = __target_load(i, load_idx, idle); else - load = source_load(i, load_idx); + load = __source_load(i, load_idx, idle); avg_load += load; } @@ -2044,14 +2141,15 @@ out_balanced: /* * find_busiest_queue - find the busiest runqueue among the cpus in group. */ -static runqueue_t *find_busiest_queue(struct sched_group *group) +static runqueue_t *find_busiest_queue(struct sched_group *group, + enum idle_type idle) { unsigned long load, max_load = 0; runqueue_t *busiest = NULL; int i; for_each_cpu_mask(i, group->cpumask) { - load = source_load(i, 0); + load = __source_load(i, 0, idle); if (load > max_load) { max_load = load; @@ -2095,7 +2193,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, goto out_balanced; } - busiest = find_busiest_queue(group); + busiest = find_busiest_queue(group, idle); if (!busiest) { schedstat_inc(sd, lb_nobusyq[idle]); goto out_balanced; @@ -2218,7 +2316,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, goto out_balanced; } - busiest = find_busiest_queue(group); + busiest = find_busiest_queue(group, NEWLY_IDLE); if (!busiest) { schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); goto out_balanced; @@ -3451,8 +3549,10 @@ void set_user_nice(task_t *p, long nice) goto out_unlock; } array = p->array; - if (array) + if (array) { dequeue_task(p, array); + dec_prio_bias(rq, p->static_prio); + } old_prio = p->prio; new_prio = NICE_TO_PRIO(nice); @@ -3462,6 +3562,7 @@ void set_user_nice(task_t *p, long nice) if (array) { enqueue_task(p, array); + inc_prio_bias(rq, p->static_prio); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: @@ -3563,8 +3664,6 @@ int idle_cpu(int cpu) return cpu_curr(cpu) == cpu_rq(cpu)->idle; } -EXPORT_SYMBOL_GPL(idle_cpu); - /** * idle_task - return the idle task for a given cpu. * @cpu: the processor in question. @@ -4680,7 +4779,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind it from offline cpu so it can run. Fall thru. */ - kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); + kthread_bind(cpu_rq(cpu)->migration_thread, + any_online_cpu(cpu_online_map)); kthread_stop(cpu_rq(cpu)->migration_thread); cpu_rq(cpu)->migration_thread = NULL; break; diff --git a/kernel/softirq.c b/kernel/softirq.c index f766b2fc48b..ad3295cdded 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -470,7 +470,8 @@ static int __devinit cpu_callback(struct notifier_block *nfb, #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id()); + kthread_bind(per_cpu(ksoftirqd, hotcpu), + any_online_cpu(cpu_online_map)); case CPU_DEAD: p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 75976209cea..c67189a25d5 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -73,9 +73,6 @@ void softlockup_tick(struct pt_regs *regs) static int watchdog(void * __bind_cpu) { struct sched_param param = { .sched_priority = 99 }; - int this_cpu = (long) __bind_cpu; - - printk("softlockup thread %d started up.\n", this_cpu); sched_setscheduler(current, SCHED_FIFO, ¶m); current->flags |= PF_NOFREEZE; @@ -123,7 +120,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id()); + kthread_bind(per_cpu(watchdog_task, hotcpu), + any_online_cpu(cpu_online_map)); case CPU_DEAD: p = per_cpu(watchdog_task, hotcpu); per_cpu(watchdog_task, hotcpu) = NULL; diff --git a/kernel/sys.c b/kernel/sys.c index 2fa1ed18123..c43b3e22bbd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -28,6 +28,7 @@ #include <linux/suspend.h> #include <linux/tty.h> #include <linux/signal.h> +#include <linux/cn_proc.h> #include <linux/compat.h> #include <linux/syscalls.h> @@ -375,18 +376,21 @@ void emergency_restart(void) } EXPORT_SYMBOL_GPL(emergency_restart); -/** - * kernel_restart - reboot the system - * - * Shutdown everything and perform a clean reboot. - * This is not safe to call in interrupt context. - */ void kernel_restart_prepare(char *cmd) { notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); } + +/** + * kernel_restart - reboot the system + * @cmd: pointer to buffer containing command to execute for restart + * or %NULL + * + * Shutdown everything and perform a clean reboot. + * This is not safe to call in interrupt context. + */ void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); @@ -623,6 +627,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) current->egid = new_egid; current->gid = new_rgid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -662,6 +667,7 @@ asmlinkage long sys_setgid(gid_t gid) return -EPERM; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -751,6 +757,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) current->fsuid = current->euid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); } @@ -798,6 +805,7 @@ asmlinkage long sys_setuid(uid_t uid) current->suid = new_suid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); } @@ -846,6 +854,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) current->suid = suid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); } @@ -898,6 +907,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) current->sgid = sgid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -940,6 +950,7 @@ asmlinkage long sys_setfsuid(uid_t uid) } key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); @@ -968,6 +979,7 @@ asmlinkage long sys_setfsgid(gid_t gid) } current->fsgid = gid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); } return old_fsgid; } @@ -1485,8 +1497,6 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); -EXPORT_SYMBOL(uts_sem); - asmlinkage long sys_newuname(struct new_utsname __user * name) { int errno = 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8e56e249554..9990e10192e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -169,7 +169,7 @@ struct file_operations proc_sys_file_operations = { extern struct proc_dir_entry *proc_sys_root; -static void register_proc_table(ctl_table *, struct proc_dir_entry *); +static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *); static void unregister_proc_table(ctl_table *, struct proc_dir_entry *); #endif @@ -952,7 +952,7 @@ static ctl_table fs_table[] = { .data = &aio_nr, .maxlen = sizeof(aio_nr), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, { .ctl_name = FS_AIO_MAX_NR, @@ -960,7 +960,7 @@ static ctl_table fs_table[] = { .data = &aio_max_nr, .maxlen = sizeof(aio_max_nr), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, #ifdef CONFIG_INOTIFY { @@ -992,10 +992,51 @@ static ctl_table dev_table[] = { extern void init_irq_proc (void); +static DEFINE_SPINLOCK(sysctl_lock); + +/* called under sysctl_lock */ +static int use_table(struct ctl_table_header *p) +{ + if (unlikely(p->unregistering)) + return 0; + p->used++; + return 1; +} + +/* called under sysctl_lock */ +static void unuse_table(struct ctl_table_header *p) +{ + if (!--p->used) + if (unlikely(p->unregistering)) + complete(p->unregistering); +} + +/* called under sysctl_lock, will reacquire if has to wait */ +static void start_unregistering(struct ctl_table_header *p) +{ + /* + * if p->used is 0, nobody will ever touch that entry again; + * we'll eliminate all paths to it before dropping sysctl_lock + */ + if (unlikely(p->used)) { + struct completion wait; + init_completion(&wait); + p->unregistering = &wait; + spin_unlock(&sysctl_lock); + wait_for_completion(&wait); + spin_lock(&sysctl_lock); + } + /* + * do not remove from the list until nobody holds it; walking the + * list in do_sysctl() relies on that. + */ + list_del_init(&p->ctl_entry); +} + void __init sysctl_init(void) { #ifdef CONFIG_PROC_FS - register_proc_table(root_table, proc_sys_root); + register_proc_table(root_table, proc_sys_root, &root_table_header); init_irq_proc(); #endif } @@ -1004,6 +1045,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol void __user *newval, size_t newlen) { struct list_head *tmp; + int error = -ENOTDIR; if (nlen <= 0 || nlen >= CTL_MAXNAME) return -ENOTDIR; @@ -1012,20 +1054,30 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol if (!oldlenp || get_user(old_len, oldlenp)) return -EFAULT; } + spin_lock(&sysctl_lock); tmp = &root_table_header.ctl_entry; do { struct ctl_table_header *head = list_entry(tmp, struct ctl_table_header, ctl_entry); void *context = NULL; - int error = parse_table(name, nlen, oldval, oldlenp, + + if (!use_table(head)) + continue; + + spin_unlock(&sysctl_lock); + + error = parse_table(name, nlen, oldval, oldlenp, newval, newlen, head->ctl_table, &context); kfree(context); + + spin_lock(&sysctl_lock); + unuse_table(head); if (error != -ENOTDIR) - return error; - tmp = tmp->next; - } while (tmp != &root_table_header.ctl_entry); - return -ENOTDIR; + break; + } while ((tmp = tmp->next) != &root_table_header.ctl_entry); + spin_unlock(&sysctl_lock); + return error; } asmlinkage long sys_sysctl(struct __sysctl_args __user *args) @@ -1236,12 +1288,16 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, return NULL; tmp->ctl_table = table; INIT_LIST_HEAD(&tmp->ctl_entry); + tmp->used = 0; + tmp->unregistering = NULL; + spin_lock(&sysctl_lock); if (insert_at_head) list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); else list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); + spin_unlock(&sysctl_lock); #ifdef CONFIG_PROC_FS - register_proc_table(table, proc_sys_root); + register_proc_table(table, proc_sys_root, tmp); #endif return tmp; } @@ -1255,10 +1311,13 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, */ void unregister_sysctl_table(struct ctl_table_header * header) { - list_del(&header->ctl_entry); + might_sleep(); + spin_lock(&sysctl_lock); + start_unregistering(header); #ifdef CONFIG_PROC_FS unregister_proc_table(header->ctl_table, proc_sys_root); #endif + spin_unlock(&sysctl_lock); kfree(header); } @@ -1269,7 +1328,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) #ifdef CONFIG_PROC_FS /* Scan the sysctl entries in table and add them all into /proc */ -static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) +static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) { struct proc_dir_entry *de; int len; @@ -1305,13 +1364,14 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root) de = create_proc_entry(table->procname, mode, root); if (!de) continue; + de->set = set; de->data = (void *) table; if (table->proc_handler) de->proc_fops = &proc_sys_file_operations; } table->de = de; if (de->mode & S_IFDIR) - register_proc_table(table->child, de); + register_proc_table(table->child, de, set); } } @@ -1336,6 +1396,13 @@ static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root continue; } + /* + * In any case, mark the entry as goner; we'll keep it + * around if it's busy, but we'll know to do nothing with + * its fields. We are under sysctl_lock here. + */ + de->data = NULL; + /* Don't unregister proc entries that are still being used.. */ if (atomic_read(&de->count)) continue; @@ -1349,27 +1416,38 @@ static ssize_t do_rw_proc(int write, struct file * file, char __user * buf, size_t count, loff_t *ppos) { int op; - struct proc_dir_entry *de; + struct proc_dir_entry *de = PDE(file->f_dentry->d_inode); struct ctl_table *table; size_t res; - ssize_t error; - - de = PDE(file->f_dentry->d_inode); - if (!de || !de->data) - return -ENOTDIR; - table = (struct ctl_table *) de->data; - if (!table || !table->proc_handler) - return -ENOTDIR; - op = (write ? 002 : 004); - if (ctl_perm(table, op)) - return -EPERM; + ssize_t error = -ENOTDIR; - res = count; - - error = (*table->proc_handler) (table, write, file, buf, &res, ppos); - if (error) - return error; - return res; + spin_lock(&sysctl_lock); + if (de && de->data && use_table(de->set)) { + /* + * at that point we know that sysctl was not unregistered + * and won't be until we finish + */ + spin_unlock(&sysctl_lock); + table = (struct ctl_table *) de->data; + if (!table || !table->proc_handler) + goto out; + error = -EPERM; + op = (write ? 002 : 004); + if (ctl_perm(table, op)) + goto out; + + /* careful: calling conventions are nasty here */ + res = count; + error = (*table->proc_handler)(table, write, file, + buf, &res, ppos); + if (!error) + error = res; + out: + spin_lock(&sysctl_lock); + unuse_table(de->set); + } + spin_unlock(&sysctl_lock); + return error; } static int proc_opensys(struct inode *inode, struct file *file) @@ -1997,6 +2075,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: pointer to the file position * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer * values from/to the user buffer, treated as an ASCII string. diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7cee222231b..42df83d7fad 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -524,7 +524,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, list_for_each_entry(wq, &workqueues, list) { /* Unbind so it can run. */ kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, - smp_processor_id()); + any_online_cpu(cpu_online_map)); cleanup_workqueue_thread(wq, hotcpu); } break; |