diff options
author | David S. Miller <davem@davemloft.net> | 2009-04-16 17:35:26 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-04-16 17:35:26 -0700 |
commit | a54bfa40fd16aeb90bc556189221576f746f8567 (patch) | |
tree | 176bb7a99ffab5f42f0dd4e9671f335be3f3efa0 /kernel | |
parent | fe957c40ec5e2763b9977c565beab3bde3aaf85b (diff) | |
parent | 134ffb4cad92a6aa534e55a9be145bca780a32c1 (diff) |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/fork.c | 13 | ||||
-rw-r--r-- | kernel/futex.c | 7 | ||||
-rw-r--r-- | kernel/irq/numa_migrate.c | 1 | ||||
-rw-r--r-- | kernel/kthread.c | 26 | ||||
-rw-r--r-- | kernel/module.c | 3 | ||||
-rw-r--r-- | kernel/mutex.c | 3 | ||||
-rw-r--r-- | kernel/panic.c | 12 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 9 | ||||
-rw-r--r-- | kernel/power/disk.c | 8 | ||||
-rw-r--r-- | kernel/power/user.c | 9 | ||||
-rw-r--r-- | kernel/ptrace.c | 16 | ||||
-rw-r--r-- | kernel/sched.c | 160 | ||||
-rw-r--r-- | kernel/sched_cpupri.c | 5 | ||||
-rw-r--r-- | kernel/sched_rt.c | 15 | ||||
-rw-r--r-- | kernel/timer.c | 7 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 4 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace.c | 36 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_events_stage_2.h | 4 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 36 |
23 files changed, 297 insertions, 115 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 989c7c202b3..b9e2edd0072 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -800,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) sig->cputime_expires.virt_exp = cputime_zero; sig->cputime_expires.sched_exp = 0; + if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { + sig->cputime_expires.prof_exp = + secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); + sig->cputimer.running = 1; + } + /* The timer lists. */ INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); @@ -815,11 +821,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) atomic_inc(¤t->signal->live); return 0; } - sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); - - if (sig) - posix_cpu_timers_init_group(sig); + sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; if (!sig) return -ENOMEM; @@ -859,6 +862,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); + posix_cpu_timers_init_group(sig); + acct_init_pacct(&sig->pacct); tty_audit_fork(sig); diff --git a/kernel/futex.c b/kernel/futex.c index 6b50a024bca..eef8cd26b5e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -883,7 +883,12 @@ retry_private: out_unlock: double_unlock_hb(hb1, hb2); - /* drop_futex_key_refs() must be called outside the spinlocks. */ + /* + * drop_futex_key_refs() must be called outside the spinlocks. During + * the requeue we moved futex_q's from the hash bucket at key1 to the + * one at key2 and updated their key pointer. We no longer need to + * hold the references to key1. + */ while (--drop_count >= 0) drop_futex_key_refs(&key1); diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 243d6121e50..44bbdcbaf8d 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -54,6 +54,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) { free_kstat_irqs(old_desc, desc); + free_desc_masks(old_desc, desc); arch_free_chip_data(old_desc, desc); } diff --git a/kernel/kthread.c b/kernel/kthread.c index 84bbadd4d02..4ebaf8519ab 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -76,6 +76,7 @@ static int kthread(void *_create) /* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); + create->result = current; complete(&create->started); schedule(); @@ -96,22 +97,10 @@ static void create_kthread(struct kthread_create_info *create) /* We want our own signal handler (we take no signals by default). */ pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); - if (pid < 0) { + if (pid < 0) create->result = ERR_PTR(pid); - } else { - struct sched_param param = { .sched_priority = 0 }; + else wait_for_completion(&create->started); - read_lock(&tasklist_lock); - create->result = find_task_by_pid_ns(pid, &init_pid_ns); - read_unlock(&tasklist_lock); - /* - * root may have changed our (kthreadd's) priority or CPU mask. - * The kernel thread should not inherit these properties. - */ - sched_setscheduler(create->result, SCHED_NORMAL, ¶m); - set_user_nice(create->result, KTHREAD_NICE_LEVEL); - set_cpus_allowed_ptr(create->result, cpu_all_mask); - } complete(&create->done); } @@ -154,11 +143,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), wait_for_completion(&create.done); if (!IS_ERR(create.result)) { + struct sched_param param = { .sched_priority = 0 }; va_list args; + va_start(args, namefmt); vsnprintf(create.result->comm, sizeof(create.result->comm), namefmt, args); va_end(args); + /* + * root may have changed our (kthreadd's) priority or CPU mask. + * The kernel thread should not inherit these properties. + */ + sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); + set_user_nice(create.result, KTHREAD_NICE_LEVEL); + set_cpus_allowed_ptr(create.result, cpu_all_mask); } return create.result; } diff --git a/kernel/module.c b/kernel/module.c index 05f014efa32..e797812a4d9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2388,6 +2388,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_LIVE, mod); + /* We need to finish all async code before the module init sequence is done */ + async_synchronize_full(); + mutex_lock(&module_mutex); /* Drop initial reference. */ module_put(mod); diff --git a/kernel/mutex.c b/kernel/mutex.c index 5d79781394a..507cf2b5e9f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -148,7 +148,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire(&lock->dep_map, subclass, 0, ip); -#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) +#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \ + !defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES) /* * Optimistic spinning. * diff --git a/kernel/panic.c b/kernel/panic.c index 3fd8c5bf8b3..934fb377f4b 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -213,8 +213,16 @@ unsigned long get_taint(void) void add_taint(unsigned flag) { - /* can't trust the integrity of the kernel anymore: */ - debug_locks = 0; + /* + * Can't trust the integrity of the kernel anymore. + * We don't call directly debug_locks_off() because the issue + * is not necessarily serious enough to set oops_in_progress to 1 + * Also we want to keep up lockdep for staging development and + * post-warning case. + */ + if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off()) + printk(KERN_WARNING "Disabling lockdep due to kernel taint\n"); + set_bit(flag, &tainted_mask); } EXPORT_SYMBOL(add_taint); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 8e5d9a68b02..c9dcf98b446 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -18,7 +18,7 @@ void update_rlimit_cpu(unsigned long rlim_new) cputime = secs_to_cputime(rlim_new); if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || - cputime_lt(current->signal->it_prof_expires, cputime)) { + cputime_gt(current->signal->it_prof_expires, cputime)) { spin_lock_irq(¤t->sighand->siglock); set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); spin_unlock_irq(¤t->sighand->siglock); @@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, cpu->cpu = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); + cpu->sched = task_sched_runtime(p); break; } return 0; @@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock, { struct task_cputime cputime; - thread_group_cputime(p, &cputime); switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: + thread_group_cputime(p, &cputime); cpu->cpu = cputime_add(cputime.utime, cputime.stime); break; case CPUCLOCK_VIRT: + thread_group_cputime(p, &cputime); cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + cpu->sched = thread_group_sched_runtime(p); break; } return 0; diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 5f21ab2bbcd..0854770b63b 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -22,6 +22,7 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> +#include <scsi/scsi_scan.h> #include <asm/suspend.h> #include "power.h" @@ -645,6 +646,13 @@ static int software_resume(void) return 0; /* + * We can't depend on SCSI devices being available after loading one of + * their modules if scsi_complete_async_scans() is not called and the + * resume device usually is a SCSI one. + */ + scsi_complete_async_scans(); + + /* * name_to_dev_t() below takes a sysfs buffer mutex when sysfs * is configured into the kernel. Since the regular hibernate * trigger path is via sysfs which takes a buffer mutex before diff --git a/kernel/power/user.c b/kernel/power/user.c index 6c85359364f..ed97375daae 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -24,6 +24,7 @@ #include <linux/cpu.h> #include <linux/freezer.h> #include <linux/smp_lock.h> +#include <scsi/scsi_scan.h> #include <asm/uaccess.h> @@ -92,6 +93,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) filp->private_data = data; memset(&data->handle, 0, sizeof(struct snapshot_handle)); if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { + /* Hibernating. The image device should be accessible. */ data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; @@ -99,6 +101,13 @@ static int snapshot_open(struct inode *inode, struct file *filp) if (error) pm_notifier_call_chain(PM_POST_HIBERNATION); } else { + /* + * Resuming. We may need to wait for the image device to + * appear. + */ + wait_for_device_probe(); + scsi_complete_async_scans(); + data->swap = -1; data->mode = O_WRONLY; error = pm_notifier_call_chain(PM_RESTORE_PREPARE); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index aaad0ec3419..64191fa09b7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -21,9 +21,7 @@ #include <linux/audit.h> #include <linux/pid_namespace.h> #include <linux/syscalls.h> - -#include <asm/pgtable.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> /* @@ -48,7 +46,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) list_add(&child->ptrace_entry, &new_parent->ptraced); child->parent = new_parent; } - + /* * Turn a tracing stop into a normal stop now, since with no tracer there * would be no way to wake it up with SIGCONT or SIGKILL. If there was a @@ -173,7 +171,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) task_lock(task); err = __ptrace_may_access(task, mode); task_unlock(task); - return (!err ? true : false); + return !err; } int ptrace_attach(struct task_struct *task) @@ -358,7 +356,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst copied += retval; src += retval; dst += retval; - len -= retval; + len -= retval; } return copied; } @@ -383,7 +381,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds copied += retval; src += retval; dst += retval; - len -= retval; + len -= retval; } return copied; } @@ -496,9 +494,9 @@ static int ptrace_resume(struct task_struct *child, long request, long data) if (unlikely(!arch_has_single_step())) return -EIO; user_enable_single_step(child); - } - else + } else { user_disable_single_step(child); + } child->exit_code = data; wake_up_process(child); diff --git a/kernel/sched.c b/kernel/sched.c index 6cc1fd5d507..5724508c3b6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, struct rq_iterator *iterator); #endif +/* Time spent by the tasks of the cpu accounting group executing in ... */ +enum cpuacct_stat_index { + CPUACCT_STAT_USER, /* ... user mode */ + CPUACCT_STAT_SYSTEM, /* ... kernel mode */ + + CPUACCT_STAT_NSTATS, +}; + #ifdef CONFIG_CGROUP_CPUACCT static void cpuacct_charge(struct task_struct *tsk, u64 cputime); +static void cpuacct_update_stats(struct task_struct *tsk, + enum cpuacct_stat_index idx, cputime_t val); #else static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} +static inline void cpuacct_update_stats(struct task_struct *tsk, + enum cpuacct_stat_index idx, cputime_t val) {} #endif static inline void inc_cpu_load(struct rq *rq, unsigned long load) @@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); EXPORT_PER_CPU_SYMBOL(kstat); /* - * Return any ns on the sched_clock that have not yet been banked in + * Return any ns on the sched_clock that have not yet been accounted in * @p in case that task is currently running. + * + * Called with task_rq_lock() held on @rq. */ +static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) +{ + u64 ns = 0; + + if (task_current(rq, p)) { + update_rq_clock(rq); + ns = rq->clock - p->se.exec_start; + if ((s64)ns < 0) + ns = 0; + } + + return ns; +} + unsigned long long task_delta_exec(struct task_struct *p) { unsigned long flags; @@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p) u64 ns = 0; rq = task_rq_lock(p, &flags); + ns = do_task_delta_exec(p, rq); + task_rq_unlock(rq, &flags); - if (task_current(rq, p)) { - u64 delta_exec; + return ns; +} - update_rq_clock(rq); - delta_exec = rq->clock - p->se.exec_start; - if ((s64)delta_exec > 0) - ns = delta_exec; - } +/* + * Return accounted runtime for the task. + * In case the task is currently running, return the runtime plus current's + * pending runtime that have not been accounted yet. + */ +unsigned long long task_sched_runtime(struct task_struct *p) +{ + unsigned long flags; + struct rq *rq; + u64 ns = 0; + + rq = task_rq_lock(p, &flags); + ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); + task_rq_unlock(rq, &flags); + + return ns; +} + +/* + * Return sum_exec_runtime for the thread group. + * In case the task is currently running, return the sum plus current's + * pending runtime that have not been accounted yet. + * + * Note that the thread group might have other running tasks as well, + * so the return value not includes other pending runtime that other + * running tasks might have. + */ +unsigned long long thread_group_sched_runtime(struct task_struct *p) +{ + struct task_cputime totals; + unsigned long flags; + struct rq *rq; + u64 ns; + rq = task_rq_lock(p, &flags); + thread_group_cputime(p, &totals); + ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); task_rq_unlock(rq, &flags); return ns; @@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime, cpustat->nice = cputime64_add(cpustat->nice, tmp); else cpustat->user = cputime64_add(cpustat->user, tmp); + + cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime); /* Account for user time used */ acct_update_integrals(p); } @@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, else cpustat->system = cputime64_add(cpustat->system, tmp); + cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); + /* Account for system time used */ acct_update_integrals(p); } @@ -7302,7 +7367,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpumask_or(groupmask, groupmask, sched_group_cpus(group)); cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); - printk(KERN_CONT " %s", str); + printk(KERN_CONT " %s (__cpu_power = %d)", str, + group->__cpu_power); group = group->next; } while (group != sd->groups); @@ -9925,6 +9991,7 @@ struct cpuacct { struct cgroup_subsys_state css; /* cpuusage holds pointer to a u64-type object on every cpu */ u64 *cpuusage; + struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; struct cpuacct *parent; }; @@ -9949,20 +10016,32 @@ static struct cgroup_subsys_state *cpuacct_create( struct cgroup_subsys *ss, struct cgroup *cgrp) { struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); + int i; if (!ca) - return ERR_PTR(-ENOMEM); + goto out; ca->cpuusage = alloc_percpu(u64); - if (!ca->cpuusage) { - kfree(ca); - return ERR_PTR(-ENOMEM); - } + if (!ca->cpuusage) + goto out_free_ca; + + for (i = 0; i < CPUACCT_STAT_NSTATS; i++) + if (percpu_counter_init(&ca->cpustat[i], 0)) + goto out_free_counters; if (cgrp->parent) ca->parent = cgroup_ca(cgrp->parent); return &ca->css; + +out_free_counters: + while (--i >= 0) + percpu_counter_destroy(&ca->cpustat[i]); + free_percpu(ca->cpuusage); +out_free_ca: + kfree(ca); +out: + return ERR_PTR(-ENOMEM); } /* destroy an existing cpu accounting group */ @@ -9970,7 +10049,10 @@ static void cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) { struct cpuacct *ca = cgroup_ca(cgrp); + int i; + for (i = 0; i < CPUACCT_STAT_NSTATS; i++) + percpu_counter_destroy(&ca->cpustat[i]); free_percpu(ca->cpuusage); kfree(ca); } @@ -10057,6 +10139,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, return 0; } +static const char *cpuacct_stat_desc[] = { + [CPUACCT_STAT_USER] = "user", + [CPUACCT_STAT_SYSTEM] = "system", +}; + +static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + struct cpuacct *ca = cgroup_ca(cgrp); + int i; + + for (i = 0; i < CPUACCT_STAT_NSTATS; i++) { + s64 val = percpu_counter_read(&ca->cpustat[i]); + val = cputime64_to_clock_t(val); + cb->fill(cb, cpuacct_stat_desc[i], val); + } + return 0; +} + static struct cftype files[] = { { .name = "usage", @@ -10067,7 +10168,10 @@ static struct cftype files[] = { .name = "usage_percpu", .read_seq_string = cpuacct_percpu_seq_read, }, - + { + .name = "stat", + .read_map = cpuacct_stats_show, + }, }; static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) @@ -10089,12 +10193,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) return; cpu = task_cpu(tsk); + + rcu_read_lock(); + ca = task_ca(tsk); for (; ca; ca = ca->parent) { u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); *cpuusage += cputime; } + + rcu_read_unlock(); +} + +/* + * Charge the system/user time to the task's accounting group. + */ +static void cpuacct_update_stats(struct task_struct *tsk, + enum cpuacct_stat_index idx, cputime_t val) +{ + struct cpuacct *ca; + + if (unlikely(!cpuacct_subsys.active)) + return; + + rcu_read_lock(); + ca = task_ca(tsk); + + do { + percpu_counter_add(&ca->cpustat[idx], val); + ca = ca->parent; + } while (ca); + rcu_read_unlock(); } struct cgroup_subsys cpuacct_subsys = { diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 1e00bfacf9b..cdd3c89574c 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c @@ -55,7 +55,7 @@ static int convert_prio(int prio) * cpupri_find - find the best (lowest-pri) CPU in the system * @cp: The cpupri context * @p: The task - * @lowest_mask: A mask to fill in with selected CPUs + * @lowest_mask: A mask to fill in with selected CPUs (or NULL) * * Note: This function returns the recommended CPUs as calculated during the * current invokation. By the time the call returns, the CPUs may have in @@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) continue; - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); + if (lowest_mask) + cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); return 1; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 299d012b439..f2c66f8f971 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync) static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - cpumask_var_t mask; - if (rq->curr->rt.nr_cpus_allowed == 1) return; - if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) - return; - if (p->rt.nr_cpus_allowed != 1 - && cpupri_find(&rq->rd->cpupri, p, mask)) - goto free; + && cpupri_find(&rq->rd->cpupri, p, NULL)) + return; - if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) - goto free; + if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) + return; /* * There appears to be other cpus that can accept @@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) */ requeue_task_rt(rq, p, 1); resched_task(rq->curr); -free: - free_cpumask_var(mask); } #endif /* CONFIG_SMP */ diff --git a/kernel/timer.c b/kernel/timer.c index b4555568b4e..cffffad01c3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -531,10 +531,13 @@ static void __init_timer(struct timer_list *timer, } /** - * init_timer - initialize a timer. + * init_timer_key - initialize a timer * @timer: the timer to be initialized + * @name: name of the timer + * @key: lockdep class key of the fake lock used for tracking timer + * sync lock dependencies * - * init_timer() must be done to a timer prior calling *any* of the + * init_timer_key() must be done to a timer prior calling *any* of the * other timer functions. */ void init_timer_key(struct timer_list *timer, diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2246141bda4..417d1985e29 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -312,7 +312,7 @@ config KMEMTRACE and profile kernel code. This requires an userspace application to use. See - Documentation/vm/kmemtrace.txt for more information. + Documentation/trace/kmemtrace.txt for more information. Saying Y will make the kernel somewhat larger and slower. However, if you disable kmemtrace at run-time or boot-time, the performance @@ -403,7 +403,7 @@ config MMIOTRACE implementation and works via page faults. Tracing is disabled by default and can be enabled at run-time. - See Documentation/tracers/mmiotrace.txt. + See Documentation/trace/mmiotrace.txt. If you are not helping to develop drivers, say N. config MMIOTRACE_TEST diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b32ff446c3f..921ef5d1f0b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1377,12 +1377,12 @@ static int blk_trace_str2mask(const char *str) { int i; int mask = 0; - char *s, *token; + char *buf, *s, *token; - s = kstrdup(str, GFP_KERNEL); - if (s == NULL) + buf = kstrdup(str, GFP_KERNEL); + if (buf == NULL) return -ENOMEM; - s = strstrip(s); + s = strstrip(buf); while (1) { token = strsep(&s, ","); @@ -1403,7 +1403,7 @@ static int blk_trace_str2mask(const char *str) break; } } - kfree(s); + kfree(buf); return mask; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9d28476a985..1ce5dc6372b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3277,19 +3277,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) info->tr = &global_trace; info->cpu = cpu; - info->spare = ring_buffer_alloc_read_page(info->tr->buffer); + info->spare = NULL; /* Force reading ring buffer for first read */ info->read = (unsigned int)-1; - if (!info->spare) - goto out; filp->private_data = info; - return 0; - - out: - kfree(info); - return -ENOMEM; + return nonseekable_open(inode, filp); } static ssize_t @@ -3304,6 +3298,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (!count) return 0; + if (!info->spare) + info->spare = ring_buffer_alloc_read_page(info->tr->buffer); + if (!info->spare) + return -ENOMEM; + /* Do we have previous read data to read? */ if (info->read < PAGE_SIZE) goto read; @@ -3342,7 +3341,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) { struct ftrace_buffer_info *info = file->private_data; - ring_buffer_free_read_page(info->tr->buffer, info->spare); + if (info->spare) + ring_buffer_free_read_page(info->tr->buffer, info->spare); kfree(info); return 0; @@ -3428,14 +3428,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, int size, i; size_t ret; - /* - * We can't seek on a buffer input - */ - if (unlikely(*ppos)) - return -ESPIPE; + if (*ppos & (PAGE_SIZE - 1)) { + WARN_ONCE(1, "Ftrace: previous read must page-align\n"); + return -EINVAL; + } + if (len & (PAGE_SIZE - 1)) { + WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); + if (len < PAGE_SIZE) + return -EINVAL; + len &= PAGE_MASK; + } - for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { + for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { struct page *page; int r; @@ -3474,6 +3479,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.partial[i].offset = 0; spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; + *ppos += PAGE_SIZE; } spd.nr_pages = i; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64ec4d278ff..576f4fa2af0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -503,6 +503,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; + buf[cnt] = '\0'; pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) @@ -520,9 +521,10 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } - if (filter_add_pred(call, pred)) { + err = filter_add_pred(call, pred); + if (err < 0) { filter_free_pred(pred); - return -EINVAL; + return err; } *ppos += cnt; @@ -569,6 +571,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, if (copy_from_user(&buf, ubuf, cnt)) return -EFAULT; + buf[cnt] = '\0'; pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) @@ -586,10 +589,11 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } - if (filter_add_subsystem_pred(system, pred)) { + err = filter_add_subsystem_pred(system, pred); + if (err < 0) { filter_free_subsystem_preds(system); filter_free_pred(pred); - return -EINVAL; + return err; } *ppos += cnt; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 026be412f35..e03cbf1e38f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -215,7 +215,7 @@ static int __filter_add_pred(struct ftrace_event_call *call, } } - return -ENOMEM; + return -ENOSPC; } static int is_string_field(const char *type) @@ -319,7 +319,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system, } if (i == MAX_FILTER_PRED) - return -EINVAL; + return -ENOSPC; events_for_each(call) { int err; @@ -410,16 +410,22 @@ int filter_parse(char **pbuf, struct filter_pred *pred) } } + if (!val_str) { + pred->field_name = NULL; + return -EINVAL; + } + pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); if (!pred->field_name) return -ENOMEM; - pred->val = simple_strtoull(val_str, &tmp, 10); + pred->val = simple_strtoull(val_str, &tmp, 0); if (tmp == val_str) { pred->str_val = kstrdup(val_str, GFP_KERNEL); if (!pred->str_val) return -ENOMEM; - } + } else if (*tmp != '\0') + return -EINVAL; return 0; } diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 30743f7d411..d363c6672c6 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -105,10 +105,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ return 0; #undef __entry -#define __entry "REC" +#define __entry REC #undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) #undef TP_fast_assign #define TP_fast_assign(args...) args diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a2a3af29c94..5e579645ac8 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,5 +1,5 @@ +#include <trace/syscall.h> #include <linux/kernel.h> -#include <linux/ftrace.h> #include <asm/syscall.h> #include "trace_output.h" diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b6b966ce145..f71fb2a0895 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -966,20 +966,20 @@ undo: } #ifdef CONFIG_SMP -static struct workqueue_struct *work_on_cpu_wq __read_mostly; struct work_for_cpu { - struct work_struct work; + struct completion completion; long (*fn)(void *); void *arg; long ret; }; -static void do_work_for_cpu(struct work_struct *w) +static int do_work_for_cpu(void *_wfc) { - struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); - + struct work_for_cpu *wfc = _wfc; wfc->ret = wfc->fn(wfc->arg); + complete(&wfc->completion); + return 0; } /** @@ -990,17 +990,23 @@ static void do_work_for_cpu(struct work_struct *w) * * This will return the value @fn returns. * It is up to the caller to ensure that the cpu doesn't go offline. + * The caller must not hold any locks which would prevent @fn from completing. */ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) { - struct work_for_cpu wfc; - - INIT_WORK(&wfc.work, do_work_for_cpu); - wfc.fn = fn; - wfc.arg = arg; - queue_work_on(cpu, work_on_cpu_wq, &wfc.work); - flush_work(&wfc.work); - + struct task_struct *sub_thread; + struct work_for_cpu wfc = { + .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), + .fn = fn, + .arg = arg, + }; + + sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); + if (IS_ERR(sub_thread)) + return PTR_ERR(sub_thread); + kthread_bind(sub_thread, cpu); + wake_up_process(sub_thread); + wait_for_completion(&wfc.completion); return wfc.ret; } EXPORT_SYMBOL_GPL(work_on_cpu); @@ -1016,8 +1022,4 @@ void __init init_workqueues(void) hotcpu_notifier(workqueue_cpu_callback, 0); keventd_wq = create_workqueue("events"); BUG_ON(!keventd_wq); -#ifdef CONFIG_SMP - work_on_cpu_wq = create_workqueue("work_on_cpu"); - BUG_ON(!work_on_cpu_wq); -#endif } |