diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/compat.c | 35 | ||||
-rw-r--r-- | kernel/cpu.c | 14 | ||||
-rw-r--r-- | kernel/delayacct.c | 15 | ||||
-rw-r--r-- | kernel/exit.c | 1 | ||||
-rw-r--r-- | kernel/fork.c | 8 | ||||
-rw-r--r-- | kernel/futex.c | 7 | ||||
-rw-r--r-- | kernel/irq/chip.c | 2 | ||||
-rw-r--r-- | kernel/irq/handle.c | 4 | ||||
-rw-r--r-- | kernel/irq/manage.c | 9 | ||||
-rw-r--r-- | kernel/kmod.c | 8 | ||||
-rw-r--r-- | kernel/lockdep.c | 3 | ||||
-rw-r--r-- | kernel/module.c | 4 | ||||
-rw-r--r-- | kernel/power/disk.c | 37 | ||||
-rw-r--r-- | kernel/printk.c | 21 | ||||
-rw-r--r-- | kernel/signal.c | 15 | ||||
-rw-r--r-- | kernel/spinlock.c | 21 | ||||
-rw-r--r-- | kernel/sys_ni.c | 1 | ||||
-rw-r--r-- | kernel/sysctl.c | 30 | ||||
-rw-r--r-- | kernel/taskstats.c | 87 | ||||
-rw-r--r-- | kernel/time/ntp.c | 2 | ||||
-rw-r--r-- | kernel/tsacct.c | 17 | ||||
-rw-r--r-- | kernel/unwind.c | 327 | ||||
-rw-r--r-- | kernel/user.c | 11 | ||||
-rw-r--r-- | kernel/workqueue.c | 6 |
24 files changed, 538 insertions, 147 deletions
diff --git a/kernel/compat.c b/kernel/compat.c index 75573e5d27b..6952dd05730 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -678,7 +678,7 @@ int get_compat_sigevent(struct sigevent *event, ? -EFAULT : 0; } -long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask, +long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, unsigned long bitmap_size) { int i, j; @@ -982,4 +982,37 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, } return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); } + +asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, + compat_ulong_t maxnode, + const compat_ulong_t __user *old_nodes, + const compat_ulong_t __user *new_nodes) +{ + unsigned long __user *old = NULL; + unsigned long __user *new = NULL; + nodemask_t tmp_mask; + unsigned long nr_bits; + unsigned long size; + + nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES); + size = ALIGN(nr_bits, BITS_PER_LONG) / 8; + if (old_nodes) { + if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits)) + return -EFAULT; + old = compat_alloc_user_space(new_nodes ? size * 2 : size); + if (new_nodes) + new = old + size / sizeof(unsigned long); + if (copy_to_user(old, nodes_addr(tmp_mask), size)) + return -EFAULT; + } + if (new_nodes) { + if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits)) + return -EFAULT; + if (new == NULL) + new = compat_alloc_user_space(size); + if (copy_to_user(new, nodes_addr(tmp_mask), size)) + return -EFAULT; + } + return sys_migrate_pages(pid, nr_bits + 1, old, new); +} #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 27dd3ee4709..272254f20d9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -58,8 +58,8 @@ void unlock_cpu_hotplug(void) recursive_depth--; return; } - mutex_unlock(&cpu_bitmask_lock); recursive = NULL; + mutex_unlock(&cpu_bitmask_lock); } EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); @@ -150,18 +150,18 @@ static int _cpu_down(unsigned int cpu) p = __stop_machine_run(take_cpu_down, NULL, cpu); mutex_unlock(&cpu_bitmask_lock); - if (IS_ERR(p)) { + if (IS_ERR(p) || cpu_online(cpu)) { /* CPU didn't die: tell everyone. Can't complain. */ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, (void *)(long)cpu) == NOTIFY_BAD) BUG(); - err = PTR_ERR(p); - goto out_allowed; - } - - if (cpu_online(cpu)) + if (IS_ERR(p)) { + err = PTR_ERR(p); + goto out_allowed; + } goto out_thread; + } /* Wait for it to sleep (leaving idle task). */ while (!idle_cpu(cpu)) diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 36752f124c6..66a0ea48751 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -66,6 +66,7 @@ static void delayacct_end(struct timespec *start, struct timespec *end, { struct timespec ts; s64 ns; + unsigned long flags; do_posix_clock_monotonic_gettime(end); ts = timespec_sub(*end, *start); @@ -73,10 +74,10 @@ static void delayacct_end(struct timespec *start, struct timespec *end, if (ns < 0) return; - spin_lock(¤t->delays->lock); + spin_lock_irqsave(¤t->delays->lock, flags); *total += ns; (*count)++; - spin_unlock(¤t->delays->lock); + spin_unlock_irqrestore(¤t->delays->lock, flags); } void __delayacct_blkio_start(void) @@ -104,6 +105,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) s64 tmp; struct timespec ts; unsigned long t1,t2,t3; + unsigned long flags; /* Though tsk->delays accessed later, early exit avoids * unnecessary returning of other data @@ -136,14 +138,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ - spin_lock(&tsk->delays->lock); + spin_lock_irqsave(&tsk->delays->lock, flags); tmp = d->blkio_delay_total + tsk->delays->blkio_delay; d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; tmp = d->swapin_delay_total + tsk->delays->swapin_delay; d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; - spin_unlock(&tsk->delays->lock); + spin_unlock_irqrestore(&tsk->delays->lock, flags); done: return 0; @@ -152,11 +154,12 @@ done: __u64 __delayacct_blkio_ticks(struct task_struct *tsk) { __u64 ret; + unsigned long flags; - spin_lock(&tsk->delays->lock); + spin_lock_irqsave(&tsk->delays->lock, flags); ret = nsec_to_clock_t(tsk->delays->blkio_delay + tsk->delays->swapin_delay); - spin_unlock(&tsk->delays->lock); + spin_unlock_irqrestore(&tsk->delays->lock, flags); return ret; } diff --git a/kernel/exit.c b/kernel/exit.c index f250a5e3e28..06de6c4e8ca 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -128,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk) flush_sigqueue(&tsk->pending); if (sig) { flush_sigqueue(&sig->shared_pending); + taskstats_tgid_free(sig); __cleanup_signal(sig); } } diff --git a/kernel/fork.c b/kernel/fork.c index 29ebb30850e..8cdd3e72ba5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -830,7 +830,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts if (clone_flags & CLONE_THREAD) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); - taskstats_tgid_alloc(current->signal); + taskstats_tgid_alloc(current); return 0; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); @@ -897,7 +897,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts void __cleanup_signal(struct signal_struct *sig) { exit_thread_group_keys(sig); - taskstats_tgid_free(sig); kmem_cache_free(signal_cachep, sig); } @@ -1316,9 +1315,8 @@ struct task_struct * __devinit fork_idle(int cpu) struct pt_regs regs; task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0); - if (!task) - return ERR_PTR(-ENOMEM); - init_idle(task, cpu); + if (!IS_ERR(task)) + init_idle(task, cpu); return task; } diff --git a/kernel/futex.c b/kernel/futex.c index b364e002619..93ef30ba209 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1507,6 +1507,13 @@ static int futex_fd(u32 __user *uaddr, int signal) struct futex_q *q; struct file *filp; int ret, err; + static unsigned long printk_interval; + + if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) { + printk(KERN_WARNING "Process `%s' used FUTEX_FD, which " + "will be removed from the kernel in June 2007\n", + current->comm); + } ret = -EINVAL; if (!valid_signal(signal)) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 2d0dc3efe81..ebfd24a4185 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -233,6 +233,8 @@ void irq_chip_set_defaults(struct irq_chip *chip) chip->shutdown = chip->disable; if (!chip->name) chip->name = chip->typename; + if (!chip->end) + chip->end = dummy_irq_chip.end; } static inline void mask_ack_irq(struct irq_desc *desc, int irq) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 42aa6f1a3f0..a681912bc89 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -231,10 +231,10 @@ fastcall unsigned int __do_IRQ(unsigned int irq) spin_unlock(&desc->lock); action_ret = handle_IRQ_event(irq, action); - - spin_lock(&desc->lock); if (!noirqdebug) note_interrupt(irq, desc, action_ret); + + spin_lock(&desc->lock); if (likely(!(desc->status & IRQ_PENDING))) break; desc->status &= ~IRQ_PENDING; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 6879202afe9..b385878c6e8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -216,6 +216,7 @@ int setup_irq(unsigned int irq, struct irqaction *new) { struct irq_desc *desc = irq_desc + irq; struct irqaction *old, **p; + const char *old_name = NULL; unsigned long flags; int shared = 0; @@ -255,8 +256,10 @@ int setup_irq(unsigned int irq, struct irqaction *new) * set the trigger type must match. */ if (!((old->flags & new->flags) & IRQF_SHARED) || - ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) + ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK)) { + old_name = old->name; goto mismatch; + } #if defined(CONFIG_IRQ_PER_CPU) /* All handlers must agree on per-cpuness */ @@ -322,11 +325,13 @@ int setup_irq(unsigned int irq, struct irqaction *new) return 0; mismatch: - spin_unlock_irqrestore(&desc->lock, flags); if (!(new->flags & IRQF_PROBE_SHARED)) { printk(KERN_ERR "IRQ handler type mismatch for IRQ %d\n", irq); + if (old_name) + printk(KERN_ERR "current handler: %s\n", old_name); dump_stack(); } + spin_unlock_irqrestore(&desc->lock, flags); return -EBUSY; } diff --git a/kernel/kmod.c b/kernel/kmod.c index bb4e29d924e..2b76dee2849 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -307,14 +307,14 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp, return 0; f = create_write_pipe(); - if (!f) - return -ENOMEM; + if (IS_ERR(f)) + return PTR_ERR(f); *filp = f; f = create_read_pipe(f); - if (!f) { + if (IS_ERR(f)) { free_write_pipe(*filp); - return -ENOMEM; + return PTR_ERR(f); } sub_info.stdin = f; diff --git a/kernel/lockdep.c b/kernel/lockdep.c index b739be2a6dc..c9fefdb1a7d 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -1081,7 +1081,8 @@ static int static_obj(void *obj) */ for_each_possible_cpu(i) { start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_end + per_cpu_offset(i); + end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM + + per_cpu_offset(i); if ((addr >= start) && (addr < end)) return 1; diff --git a/kernel/module.c b/kernel/module.c index 67009bd56c5..f0166563c60 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1342,7 +1342,7 @@ static void set_license(struct module *mod, const char *license) if (!license_is_gpl_compatible(license)) { if (!(tainted & TAINT_PROPRIETARY_MODULE)) - printk(KERN_WARNING "%s: module license '%s' taints" + printk(KERN_WARNING "%s: module license '%s' taints " "kernel.\n", mod->name, license); add_taint_module(mod, TAINT_PROPRIETARY_MODULE); } @@ -1718,7 +1718,7 @@ static struct module *load_module(void __user *umod, set_license(mod, get_modinfo(sechdrs, infoindex, "license")); if (strcmp(mod->name, "ndiswrapper") == 0) - add_taint_module(mod, TAINT_PROPRIETARY_MODULE); + add_taint(TAINT_PROPRIETARY_MODULE); if (strcmp(mod->name, "driverloader") == 0) add_taint_module(mod, TAINT_PROPRIETARY_MODULE); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index d3a158a6031..b1fb7866b0b 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -71,7 +71,7 @@ static inline void platform_finish(void) static int prepare_processes(void) { - int error; + int error = 0; pm_prepare_console(); @@ -84,6 +84,12 @@ static int prepare_processes(void) goto thaw; } + if (pm_disk_mode == PM_DISK_TESTPROC) { + printk("swsusp debug: Waiting for 5 seconds.\n"); + mdelay(5000); + goto thaw; + } + /* Free memory before shutting down devices. */ if (!(error = swsusp_shrink_memory())) return 0; @@ -120,13 +126,21 @@ int pm_suspend_disk(void) if (error) return error; + if (pm_disk_mode == PM_DISK_TESTPROC) + goto Thaw; + suspend_console(); error = device_suspend(PMSG_FREEZE); if (error) { resume_console(); printk("Some devices failed to suspend\n"); - unprepare_processes(); - return error; + goto Thaw; + } + + if (pm_disk_mode == PM_DISK_TEST) { + printk("swsusp debug: Waiting for 5 seconds.\n"); + mdelay(5000); + goto Done; } pr_debug("PM: snapshotting memory.\n"); @@ -143,16 +157,17 @@ int pm_suspend_disk(void) power_down(pm_disk_mode); else { swsusp_free(); - unprepare_processes(); - return error; + goto Thaw; } - } else + } else { pr_debug("PM: Image restored successfully.\n"); + } swsusp_free(); Done: device_resume(); resume_console(); + Thaw: unprepare_processes(); return error; } @@ -249,6 +264,8 @@ static const char * const pm_disk_modes[] = { [PM_DISK_PLATFORM] = "platform", [PM_DISK_SHUTDOWN] = "shutdown", [PM_DISK_REBOOT] = "reboot", + [PM_DISK_TEST] = "test", + [PM_DISK_TESTPROC] = "testproc", }; /** @@ -303,17 +320,19 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n) } } if (mode) { - if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT) + if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT || + mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) { pm_disk_mode = mode; - else { + } else { if (pm_ops && pm_ops->enter && (mode == pm_ops->pm_disk_mode)) pm_disk_mode = mode; else error = -EINVAL; } - } else + } else { error = -EINVAL; + } pr_debug("PM: suspend-to-disk mode set to '%s'\n", pm_disk_modes[mode]); diff --git a/kernel/printk.c b/kernel/printk.c index f7d427ef503..66426552fbf 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -31,6 +31,7 @@ #include <linux/security.h> #include <linux/bootmem.h> #include <linux/syscalls.h> +#include <linux/jiffies.h> #include <asm/uaccess.h> @@ -1101,3 +1102,23 @@ int printk_ratelimit(void) printk_ratelimit_burst); } EXPORT_SYMBOL(printk_ratelimit); + +/** + * printk_timed_ratelimit - caller-controlled printk ratelimiting + * @caller_jiffies: pointer to caller's state + * @interval_msecs: minimum interval between prints + * + * printk_timed_ratelimit() returns true if more than @interval_msecs + * milliseconds have elapsed since the last time printk_timed_ratelimit() + * returned true. + */ +bool printk_timed_ratelimit(unsigned long *caller_jiffies, + unsigned int interval_msecs) +{ + if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) { + *caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs); + return true; + } + return false; +} +EXPORT_SYMBOL(printk_timed_ratelimit); diff --git a/kernel/signal.c b/kernel/signal.c index 7ed8d5304be..df18c167a2a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -267,18 +267,25 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) { struct sigqueue *q = NULL; + struct user_struct *user; - atomic_inc(&t->user->sigpending); + /* + * In order to avoid problems with "switch_user()", we want to make + * sure that the compiler doesn't re-load "t->user" + */ + user = t->user; + barrier(); + atomic_inc(&user->sigpending); if (override_rlimit || - atomic_read(&t->user->sigpending) <= + atomic_read(&user->sigpending) <= t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) q = kmem_cache_alloc(sigqueue_cachep, flags); if (unlikely(q == NULL)) { - atomic_dec(&t->user->sigpending); + atomic_dec(&user->sigpending); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; - q->user = get_uid(t->user); + q->user = get_uid(user); } return(q); } diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 476c3741511..2c6c2bf8551 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -293,6 +293,27 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) } EXPORT_SYMBOL(_spin_lock_nested); +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + /* + * On lockdep we dont want the hand-coded irq-enable of + * _raw_spin_lock_flags() code, because lockdep assumes + * that interrupts are not re-enabled during lock-acquire: + */ +#ifdef CONFIG_PROVE_SPIN_LOCKING + _raw_spin_lock(lock); +#else + _raw_spin_lock_flags(lock, &flags); +#endif + return flags; +} + +EXPORT_SYMBOL(_spin_lock_irqsave_nested); #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 0e53314b14d..d7306d0f3df 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -135,6 +135,7 @@ cond_syscall(sys_madvise); cond_syscall(sys_mremap); cond_syscall(sys_remap_file_pages); cond_syscall(compat_sys_move_pages); +cond_syscall(compat_sys_migrate_pages); /* block-layer dependent */ cond_syscall(sys_bdflush); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8bff2c18fb5..09e569f4792 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1315,7 +1315,9 @@ repeat: return -ENOTDIR; if (get_user(n, name)) return -EFAULT; - for ( ; table->ctl_name; table++) { + for ( ; table->ctl_name || table->procname; table++) { + if (!table->ctl_name) + continue; if (n == table->ctl_name || table->ctl_name == CTL_ANY) { int error; if (table->child) { @@ -1532,7 +1534,7 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, int len; mode_t mode; - for (; table->ctl_name; table++) { + for (; table->ctl_name || table->procname; table++) { /* Can't do anything without a proc name. */ if (!table->procname) continue; @@ -1579,7 +1581,7 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root) { struct proc_dir_entry *de; - for (; table->ctl_name; table++) { + for (; table->ctl_name || table->procname; table++) { if (!(de = table->de)) continue; if (de->mode & S_IFDIR) { @@ -2680,13 +2682,33 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, asmlinkage long sys_sysctl(struct __sysctl_args __user *args) { static int msg_count; + struct __sysctl_args tmp; + int name[CTL_MAXNAME]; + int i; + + /* Read in the sysctl name for better debug message logging */ + if (copy_from_user(&tmp, args, sizeof(tmp))) + return -EFAULT; + if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME) + return -ENOTDIR; + for (i = 0; i < tmp.nlen; i++) + if (get_user(name[i], tmp.name + i)) + return -EFAULT; + + /* Ignore accesses to kernel.version */ + if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION)) + goto out; if (msg_count < 5) { msg_count++; printk(KERN_INFO "warning: process `%s' used the removed sysctl " - "system call\n", current->comm); + "system call with ", current->comm); + for (i = 0; i < tmp.nlen; i++) + printk("%d.", name[i]); + printk("\n"); } +out: return -ENOSYS; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 5d6a8c54ee8..f45c5e70773 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -77,7 +77,8 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, /* * If new attributes are added, please revisit this allocation */ - skb = nlmsg_new(genlmsg_total_size(size), GFP_KERNEL); + size = nlmsg_total_size(genlmsg_total_size(size)); + skb = nlmsg_new(size, GFP_KERNEL); if (!skb) return -ENOMEM; @@ -174,21 +175,19 @@ static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) up_write(&listeners->sem); } -static int fill_pid(pid_t pid, struct task_struct *pidtsk, +static int fill_pid(pid_t pid, struct task_struct *tsk, struct taskstats *stats) { int rc = 0; - struct task_struct *tsk = pidtsk; - if (!pidtsk) { - read_lock(&tasklist_lock); + if (!tsk) { + rcu_read_lock(); tsk = find_task_by_pid(pid); - if (!tsk) { - read_unlock(&tasklist_lock); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) return -ESRCH; - } - get_task_struct(tsk); - read_unlock(&tasklist_lock); } else get_task_struct(tsk); @@ -214,39 +213,30 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk, } -static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, +static int fill_tgid(pid_t tgid, struct task_struct *first, struct taskstats *stats) { - struct task_struct *tsk, *first; + struct task_struct *tsk; unsigned long flags; + int rc = -ESRCH; /* * Add additional stats from live tasks except zombie thread group * leaders who are already counted with the dead tasks */ - first = tgidtsk; - if (!first) { - read_lock(&tasklist_lock); + rcu_read_lock(); + if (!first) first = find_task_by_pid(tgid); - if (!first) { - read_unlock(&tasklist_lock); - return -ESRCH; - } - get_task_struct(first); - read_unlock(&tasklist_lock); - } else - get_task_struct(first); - /* Start with stats from dead tasks */ - spin_lock_irqsave(&first->signal->stats_lock, flags); + if (!first || !lock_task_sighand(first, &flags)) + goto out; + if (first->signal->stats) memcpy(stats, first->signal->stats, sizeof(*stats)); - spin_unlock_irqrestore(&first->signal->stats_lock, flags); tsk = first; - read_lock(&tasklist_lock); do { - if (tsk->exit_state == EXIT_ZOMBIE && thread_group_leader(tsk)) + if (tsk->exit_state) continue; /* * Accounting subsystem can call its functions here to @@ -257,15 +247,18 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk, delayacct_add_tsk(stats, tsk); } while_each_thread(first, tsk); - read_unlock(&tasklist_lock); - stats->version = TASKSTATS_VERSION; + unlock_task_sighand(first, &flags); + rc = 0; +out: + rcu_read_unlock(); + + stats->version = TASKSTATS_VERSION; /* * Accounting subsytems can also add calls here to modify * fields of taskstats. */ - - return 0; + return rc; } @@ -273,7 +266,7 @@ static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; - spin_lock_irqsave(&tsk->signal->stats_lock, flags); + spin_lock_irqsave(&tsk->sighand->siglock, flags); if (!tsk->signal->stats) goto ret; @@ -285,7 +278,7 @@ static void fill_tgid_exit(struct task_struct *tsk) */ delayacct_add_tsk(tsk->signal->stats, tsk); ret: - spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); + spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return; } @@ -419,7 +412,7 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) return send_reply(rep_skb, info->snd_pid); nla_put_failure: - return genlmsg_cancel(rep_skb, reply); + rc = genlmsg_cancel(rep_skb, reply); err: nlmsg_free(rep_skb); return rc; @@ -461,24 +454,26 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, size_t size; int is_thread_group; struct nlattr *na; - unsigned long flags; - if (!family_registered || !tidstats) + if (!family_registered) return; - spin_lock_irqsave(&tsk->signal->stats_lock, flags); - is_thread_group = tsk->signal->stats ? 1 : 0; - spin_unlock_irqrestore(&tsk->signal->stats_lock, flags); - - rc = 0; /* * Size includes space for nested attributes */ size = nla_total_size(sizeof(u32)) + nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); - if (is_thread_group) - size = 2 * size; /* PID + STATS + TGID + STATS */ + is_thread_group = (tsk->signal->stats != NULL); + if (is_thread_group) { + /* PID + STATS + TGID + STATS */ + size = 2 * size; + /* fill the tsk->signal->stats structure */ + fill_tgid_exit(tsk); + } + + if (!tidstats) + return; rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, &reply, size); if (rc < 0) @@ -498,11 +493,8 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats, goto send; /* - * tsk has/had a thread group so fill the tsk->signal->stats structure * Doesn't matter if tsk is the leader or the last group member leaving */ - - fill_tgid_exit(tsk); if (!group_dead) goto send; @@ -519,7 +511,6 @@ send: nla_put_failure: genlmsg_cancel(rep_skb, reply); - goto ret; err_skb: nlmsg_free(rep_skb); ret: diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 47195fa0ec4..3afeaa3a73f 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -161,9 +161,9 @@ void second_overflow(void) time_adjust += MAX_TICKADJ; tick_length -= MAX_TICKADJ_SCALED; } else { - time_adjust = 0; tick_length += (s64)(time_adjust * NSEC_PER_USEC / HZ) << TICK_LENGTH_SHIFT; + time_adjust = 0; } } } diff --git a/kernel/tsacct.c b/kernel/tsacct.c index db443221ba5..96f77013d3f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -36,7 +36,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) /* calculate task elapsed time in timespec */ do_posix_clock_monotonic_gettime(&uptime); - ts = timespec_sub(uptime, current->group_leader->start_time); + ts = timespec_sub(uptime, tsk->start_time); /* rebase elapsed time to usec */ ac_etime = timespec_to_ns(&ts); do_div(ac_etime, NSEC_PER_USEC); @@ -58,7 +58,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_uid = tsk->uid; stats->ac_gid = tsk->gid; stats->ac_pid = tsk->pid; - stats->ac_ppid = (tsk->parent) ? tsk->parent->pid : 0; + rcu_read_lock(); + stats->ac_ppid = pid_alive(tsk) ? + rcu_dereference(tsk->real_parent)->tgid : 0; + rcu_read_unlock(); stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; stats->ac_minflt = tsk->min_flt; @@ -77,13 +80,17 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) */ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) { + struct mm_struct *mm; + /* convert pages-jiffies to Mbyte-usec */ stats->coremem = jiffies_to_usecs(p->acct_rss_mem1) * PAGE_SIZE / MB; stats->virtmem = jiffies_to_usecs(p->acct_vm_mem1) * PAGE_SIZE / MB; - if (p->mm) { + mm = get_task_mm(p); + if (mm) { /* adjust to KB unit */ - stats->hiwater_rss = p->mm->hiwater_rss * PAGE_SIZE / KB; - stats->hiwater_vm = p->mm->hiwater_vm * PAGE_SIZE / KB; + stats->hiwater_rss = mm->hiwater_rss * PAGE_SIZE / KB; + stats->hiwater_vm = mm->hiwater_vm * PAGE_SIZE / KB; + mmput(mm); } stats->read_char = p->rchar; stats->write_char = p->wchar; diff --git a/kernel/unwind.c b/kernel/unwind.c index 2e2368607aa..ed0a21d4a90 100644 --- a/kernel/unwind.c +++ b/kernel/unwind.c @@ -11,13 +11,15 @@ #include <linux/unwind.h> #include <linux/module.h> -#include <linux/delay.h> +#include <linux/bootmem.h> +#include <linux/sort.h> #include <linux/stop_machine.h> #include <asm/sections.h> #include <asm/uaccess.h> #include <asm/unaligned.h> extern char __start_unwind[], __end_unwind[]; +extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; #define MAX_STACK_DEPTH 8 @@ -100,6 +102,8 @@ static struct unwind_table { } core, init; const void *address; unsigned long size; + const unsigned char *header; + unsigned long hdrsz; struct unwind_table *link; const char *name; } root_table; @@ -145,6 +149,10 @@ static struct unwind_table *find_table(unsigned long pc) return table; } +static unsigned long read_pointer(const u8 **pLoc, + const void *end, + signed ptrType); + static void init_unwind_table(struct unwind_table *table, const char *name, const void *core_start, @@ -152,14 +160,30 @@ static void init_unwind_table(struct unwind_table *table, const void *init_start, unsigned long init_size, const void *table_start, - unsigned long table_size) + unsigned long table_size, + const u8 *header_start, + unsigned long header_size) { + const u8 *ptr = header_start + 4; + const u8 *end = header_start + header_size; + table->core.pc = (unsigned long)core_start; table->core.range = core_size; table->init.pc = (unsigned long)init_start; table->init.range = init_size; table->address = table_start; table->size = table_size; + /* See if the linker provided table looks valid. */ + if (header_size <= 4 + || header_start[0] != 1 + || (void *)read_pointer(&ptr, end, header_start[1]) != table_start + || header_start[2] == DW_EH_PE_omit + || read_pointer(&ptr, end, header_start[2]) <= 0 + || header_start[3] == DW_EH_PE_omit) + header_start = NULL; + table->hdrsz = header_size; + smp_wmb(); + table->header = header_start; table->link = NULL; table->name = name; } @@ -169,7 +193,143 @@ void __init unwind_init(void) init_unwind_table(&root_table, "kernel", _text, _end - _text, NULL, 0, - __start_unwind, __end_unwind - __start_unwind); + __start_unwind, __end_unwind - __start_unwind, + __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr); +} + +static const u32 bad_cie, not_fde; +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *); +static signed fde_pointer_type(const u32 *cie); + +struct eh_frame_hdr_table_entry { + unsigned long start, fde; +}; + +static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) +{ + const struct eh_frame_hdr_table_entry *e1 = p1; + const struct eh_frame_hdr_table_entry *e2 = p2; + + return (e1->start > e2->start) - (e1->start < e2->start); +} + +static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) +{ + struct eh_frame_hdr_table_entry *e1 = p1; + struct eh_frame_hdr_table_entry *e2 = p2; + unsigned long v; + + v = e1->start; + e1->start = e2->start; + e2->start = v; + v = e1->fde; + e1->fde = e2->fde; + e2->fde = v; +} + +static void __init setup_unwind_table(struct unwind_table *table, + void *(*alloc)(unsigned long)) +{ + const u8 *ptr; + unsigned long tableSize = table->size, hdrSize; + unsigned n; + const u32 *fde; + struct { + u8 version; + u8 eh_frame_ptr_enc; + u8 fde_count_enc; + u8 table_enc; + unsigned long eh_frame_ptr; + unsigned int fde_count; + struct eh_frame_hdr_table_entry table[]; + } __attribute__((__packed__)) *header; + + if (table->header) + return; + + if (table->hdrsz) + printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n", + table->name); + + if (tableSize & (sizeof(*fde) - 1)) + return; + + for (fde = table->address, n = 0; + tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = cie_for_fde(fde, table); + signed ptrType; + + if (cie == ¬_fde) + continue; + if (cie == NULL + || cie == &bad_cie + || (ptrType = fde_pointer_type(cie)) < 0) + return; + ptr = (const u8 *)(fde + 2); + if (!read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType)) + return; + ++n; + } + + if (tableSize || !n) + return; + + hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) + + 2 * n * sizeof(unsigned long); + header = alloc(hdrSize); + if (!header) + return; + header->version = 1; + header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native; + header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4; + header->table_enc = DW_EH_PE_abs|DW_EH_PE_native; + put_unaligned((unsigned long)table->address, &header->eh_frame_ptr); + BUILD_BUG_ON(offsetof(typeof(*header), fde_count) + % __alignof(typeof(header->fde_count))); + header->fde_count = n; + + BUILD_BUG_ON(offsetof(typeof(*header), table) + % __alignof(typeof(*header->table))); + for (fde = table->address, tableSize = table->size, n = 0; + tableSize; + tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { + const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); + + if (!fde[1]) + continue; /* this is a CIE */ + ptr = (const u8 *)(fde + 2); + header->table[n].start = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + fde_pointer_type(cie)); + header->table[n].fde = (unsigned long)fde; + ++n; + } + WARN_ON(n != header->fde_count); + + sort(header->table, + n, + sizeof(*header->table), + cmp_eh_frame_hdr_table_entries, + swap_eh_frame_hdr_table_entries); + + table->hdrsz = hdrSize; + smp_wmb(); + table->header = (const void *)header; +} + +static void *__init balloc(unsigned long sz) +{ + return __alloc_bootmem_nopanic(sz, + sizeof(unsigned int), + __pa(MAX_DMA_ADDRESS)); +} + +void __init unwind_setup(void) +{ + setup_unwind_table(&root_table, balloc); } #ifdef CONFIG_MODULES @@ -193,7 +353,8 @@ void *unwind_add_table(struct module *module, init_unwind_table(table, module->name, module->module_core, module->core_size, module->module_init, module->init_size, - table_start, table_size); + table_start, table_size, + NULL, 0); if (last_table) last_table->link = table; @@ -303,6 +464,26 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) return value; } +static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) +{ + const u32 *cie; + + if (!*fde || (*fde & (sizeof(*fde) - 1))) + return &bad_cie; + if (!fde[1]) + return ¬_fde; /* this is a CIE */ + if ((fde[1] & (sizeof(*fde) - 1)) + || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) + return NULL; /* this is not a valid FDE */ + cie = fde + 1 - fde[1] / sizeof(*fde); + if (*cie <= sizeof(*cie) + 4 + || *cie >= fde[1] - sizeof(*fde) + || (*cie & (sizeof(*cie) - 1)) + || cie[1]) + return NULL; /* this is not a (valid) CIE */ + return cie; +} + static unsigned long read_pointer(const u8 **pLoc, const void *end, signed ptrType) @@ -610,49 +791,108 @@ int unwind(struct unwind_frame_info *frame) unsigned i; signed ptrType = -1; uleb128_t retAddrReg = 0; - struct unwind_table *table; + const struct unwind_table *table; struct unwind_state state; if (UNW_PC(frame) == 0) return -EINVAL; if ((table = find_table(pc)) != NULL && !(table->size & (sizeof(*fde) - 1))) { - unsigned long tableSize = table->size; - - for (fde = table->address; - tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; - tableSize -= sizeof(*fde) + *fde, - fde += 1 + *fde / sizeof(*fde)) { - if (!*fde || (*fde & (sizeof(*fde) - 1))) - break; - if (!fde[1]) - continue; /* this is a CIE */ - if ((fde[1] & (sizeof(*fde) - 1)) - || fde[1] > (unsigned long)(fde + 1) - - (unsigned long)table->address) - continue; /* this is not a valid FDE */ - cie = fde + 1 - fde[1] / sizeof(*fde); - if (*cie <= sizeof(*cie) + 4 - || *cie >= fde[1] - sizeof(*fde) - || (*cie & (sizeof(*cie) - 1)) - || cie[1] - || (ptrType = fde_pointer_type(cie)) < 0) { - cie = NULL; /* this is not a (valid) CIE */ - continue; + const u8 *hdr = table->header; + unsigned long tableSize; + + smp_rmb(); + if (hdr && hdr[0] == 1) { + switch(hdr[3] & DW_EH_PE_FORM) { + case DW_EH_PE_native: tableSize = sizeof(unsigned long); break; + case DW_EH_PE_data2: tableSize = 2; break; + case DW_EH_PE_data4: tableSize = 4; break; + case DW_EH_PE_data8: tableSize = 8; break; + default: tableSize = 0; break; } + ptr = hdr + 4; + end = hdr + table->hdrsz; + if (tableSize + && read_pointer(&ptr, end, hdr[1]) + == (unsigned long)table->address + && (i = read_pointer(&ptr, end, hdr[2])) > 0 + && i == (end - ptr) / (2 * tableSize) + && !((end - ptr) % (2 * tableSize))) { + do { + const u8 *cur = ptr + (i / 2) * (2 * tableSize); + + startLoc = read_pointer(&cur, + cur + tableSize, + hdr[3]); + if (pc < startLoc) + i /= 2; + else { + ptr = cur - tableSize; + i = (i + 1) / 2; + } + } while (startLoc && i > 1); + if (i == 1 + && (startLoc = read_pointer(&ptr, + ptr + tableSize, + hdr[3])) != 0 + && pc >= startLoc) + fde = (void *)read_pointer(&ptr, + ptr + tableSize, + hdr[3]); + } + } + + if (fde != NULL) { + cie = cie_for_fde(fde, table); ptr = (const u8 *)(fde + 2); - startLoc = read_pointer(&ptr, - (const u8 *)(fde + 1) + *fde, - ptrType); - endLoc = startLoc - + read_pointer(&ptr, - (const u8 *)(fde + 1) + *fde, - ptrType & DW_EH_PE_indirect - ? ptrType - : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed)); - if (pc >= startLoc && pc < endLoc) - break; - cie = NULL; + if(cie != NULL + && cie != &bad_cie + && cie != ¬_fde + && (ptrType = fde_pointer_type(cie)) >= 0 + && read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType) == startLoc) { + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType); + if(pc >= endLoc) + fde = NULL; + } else + fde = NULL; + } + if (fde == NULL) { + for (fde = table->address, tableSize = table->size; + cie = NULL, tableSize > sizeof(*fde) + && tableSize - sizeof(*fde) >= *fde; + tableSize -= sizeof(*fde) + *fde, + fde += 1 + *fde / sizeof(*fde)) { + cie = cie_for_fde(fde, table); + if (cie == &bad_cie) { + cie = NULL; + break; + } + if (cie == NULL + || cie == ¬_fde + || (ptrType = fde_pointer_type(cie)) < 0) + continue; + ptr = (const u8 *)(fde + 2); + startLoc = read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType); + if (!startLoc) + continue; + if (!(ptrType & DW_EH_PE_indirect)) + ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; + endLoc = startLoc + + read_pointer(&ptr, + (const u8 *)(fde + 1) + *fde, + ptrType); + if (pc >= startLoc && pc < endLoc) + break; + } } } if (cie != NULL) { @@ -698,8 +938,11 @@ int unwind(struct unwind_frame_info *frame) else { retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); /* skip augmentation */ - if (((const char *)(cie + 2))[1] == 'z') - ptr += get_uleb128(&ptr, end); + if (((const char *)(cie + 2))[1] == 'z') { + uleb128_t augSize = get_uleb128(&ptr, end); + + ptr += augSize; + } if (ptr > end || retAddrReg >= ARRAY_SIZE(reg_info) || REG_INVALID(retAddrReg) @@ -723,9 +966,7 @@ int unwind(struct unwind_frame_info *frame) if (cie == NULL || fde == NULL) { #ifdef CONFIG_FRAME_POINTER unsigned long top, bottom; -#endif -#ifdef CONFIG_FRAME_POINTER top = STACK_TOP(frame->task); bottom = STACK_BOTTOM(frame->task); # if FRAME_RETADDR_OFFSET < 0 diff --git a/kernel/user.c b/kernel/user.c index 6408c042429..220e586127a 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -187,6 +187,17 @@ void switch_uid(struct user_struct *new_user) atomic_dec(&old_user->processes); switch_uid_keyring(new_user); current->user = new_user; + + /* + * We need to synchronize with __sigqueue_alloc() + * doing a get_uid(p->user).. If that saw the old + * user value, we need to wait until it has exited + * its critical region before we can free the old + * structure. + */ + smp_mb(); + spin_unlock_wait(¤t->sighand->siglock); + free_uid(old_user); suid_keys(current); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3df9bfc7ff7..17c2f03d2c2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -99,7 +99,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq, * @wq: workqueue to use * @work: work to queue * - * Returns non-zero if it was successfully added. + * Returns 0 if @work was already on a queue, non-zero otherwise. * * We queue the work to the CPU it was submitted, but there is no * guarantee that it will be processed by that CPU. @@ -138,7 +138,7 @@ static void delayed_work_timer_fn(unsigned long __data) * @work: work to queue * @delay: number of jiffies to wait before queueing * - * Returns non-zero if it was successfully added. + * Returns 0 if @work was already on a queue, non-zero otherwise. */ int fastcall queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay) @@ -169,7 +169,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work); * @work: work to queue * @delay: number of jiffies to wait before queueing * - * Returns non-zero if it was successfully added. + * Returns 0 if @work was already on a queue, non-zero otherwise. */ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work, unsigned long delay) |