From d172f4ef31bec924c6ebcb242c9d7d290811e1e5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 22 Dec 2007 21:18:25 -0800 Subject: Modules: fix memory leak of module names Due to the change in kobject name handling, the module kobject needs to have a null release function to ensure that the name it previously set will be properly cleaned up. All of this wierdness goes away in 2.6.25 with the rework of the kobject name and cleanup logic, but this is required for 2.6.24. Thanks to Alexey Dobriyan for finding the problem, and to Kay Sievers for pointing out the simple way to fix it after I tried many complex ways. Cc: Alexey Dobriyan Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- kernel/params.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel') diff --git a/kernel/params.c b/kernel/params.c index 2a4c51487e7..7686417ee00 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -697,8 +697,18 @@ static struct kset_uevent_ops module_uevent_ops = { decl_subsys(module, &module_ktype, &module_uevent_ops); int module_sysfs_initialized; +static void module_release(struct kobject *kobj) +{ + /* + * Stupid empty release function to allow the memory for the kobject to + * be properly cleaned up. This will not need to be present for 2.6.25 + * with the upcoming kobject core rework. + */ +} + static struct kobj_type module_ktype = { .sysfs_ops = &module_sysfs_ops, + .release = module_release, }; /* -- cgit v1.2.3 From fb445ee5f9bfc7cbef9e397556170c608dc02955 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 29 Dec 2007 01:19:49 -0800 Subject: [SERIAL]: Fix section mismatches in Sun serial console drivers. We're exporting an __init function, oops :-) The core issue here is that add_preferred_console() is marked as __init, this makes it impossible to invoke this thing from a driver probe routine which is what the Sparc serial drivers need to do. There is no harm in dropping the __init marker. This code will actually work properly when invoked from a modular driver, except that init will probably not pick up the console change without some other support code. Then we can drop the __init from sunserial_console_match() and we're no longer exporting an __init function to modules. Signed-off-by: David S. Miller --- kernel/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index a30fe33de39..89011bf8c10 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -817,7 +817,7 @@ __setup("console=", console_setup); * commonly to provide a default console (ie from PROM variables) when * the user has not supplied one. */ -int __init add_preferred_console(char *name, int idx, char *options) +int add_preferred_console(char *name, int idx, char *options) { struct console_cmdline *c; int i; -- cgit v1.2.3 From 90b2628f1fe94a667330d425a7fb76ec8d2a49ec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 30 Dec 2007 17:24:35 +0100 Subject: sched: fix gcc warnings Meelis Roos reported these warnings on sparc64: CC kernel/sched.o In file included from kernel/sched.c:879: kernel/sched_debug.c: In function 'nsec_high': kernel/sched_debug.c:38: warning: comparison of distinct pointer types lacks a cast the debug check in do_div() is over-eager here, because the long long is always positive in these places. Mark this by casting them to unsigned long long. no change in code output: text data bss dec hex filename 51471 6582 376 58429 e43d sched.o.before 51471 6582 376 58429 e43d sched.o.after md5: 7f7729c111f185bf3ccea4d542abc049 sched.o.before.asm 7f7729c111f185bf3ccea4d542abc049 sched.o.after.asm Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index d30467b47dd..80fbbfc0429 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -31,9 +31,9 @@ /* * Ease the printing of nsec fields: */ -static long long nsec_high(long long nsec) +static long long nsec_high(unsigned long long nsec) { - if (nsec < 0) { + if ((long long)nsec < 0) { nsec = -nsec; do_div(nsec, 1000000); return -nsec; @@ -43,9 +43,9 @@ static long long nsec_high(long long nsec) return nsec; } -static unsigned long nsec_low(long long nsec) +static unsigned long nsec_low(unsigned long long nsec) { - if (nsec < 0) + if ((long long)nsec < 0) nsec = -nsec; return do_div(nsec, 1000000); -- cgit v1.2.3 From 831830b5a2b5d413407adf380ef62fe17d6fcbf2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 2 Jan 2008 14:09:57 +0000 Subject: restrict reading from /proc//maps to those who share ->mm or can ptrace pid Contents of /proc/*/maps is sensitive and may become sensitive after open() (e.g. if target originally shares our ->mm and later does exec on suid-root binary). Check at read() (actually, ->start() of iterator) time that mm_struct we'd grabbed and locked is - still the ->mm of target - equal to reader's ->mm or the target is ptracable by reader. Signed-off-by: Al Viro Acked-by: Rik van Riel Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 7c76f2ffaea..0c65d306f41 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -120,7 +120,7 @@ int ptrace_check_attach(struct task_struct *child, int kill) return ret; } -static int may_attach(struct task_struct *task) +int __ptrace_may_attach(struct task_struct *task) { /* May we inspect the given task? * This check is used both for attaching with ptrace @@ -154,7 +154,7 @@ int ptrace_may_attach(struct task_struct *task) { int err; task_lock(task); - err = may_attach(task); + err = __ptrace_may_attach(task); task_unlock(task); return !err; } -- cgit v1.2.3 From b8c9a18712f7b617fda66d878ce3759c9e575ba0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 2 Jan 2008 13:48:27 -0800 Subject: Fix kernel/ptrace.c compile problem (missing "may_attach()") The previous commit missed one use of "may_attach()" that had been renamed to __ptrace_may_attach(). Tssk, tssk, Al. Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 0c65d306f41..c25db863081 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -196,7 +196,7 @@ repeat: /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; - retval = may_attach(task); + retval = __ptrace_may_attach(task); if (retval) goto bad; -- cgit v1.2.3 From b59f8197c5ddd0d5d74b663650be5449dacd34aa Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 7 Jan 2008 14:23:34 -0800 Subject: acct: real_parent ppid The ac_ppid field reported in process accounting records should match what getppid() would have returned to that process, regardless of whether a debugger is attached. Signed-off-by: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/acct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/acct.c b/kernel/acct.c index cf19547cc9e..521dfa53cb9 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -482,7 +482,7 @@ static void do_acct_process(struct file *file) #endif #if ACCT_VERSION==3 ac.ac_pid = current->tgid; - ac.ac_ppid = current->parent->tgid; + ac.ac_ppid = current->real_parent->tgid; #endif spin_lock_irq(¤t->sighand->siglock); -- cgit v1.2.3 From 83a08e7c6ed533a47631794e7f618a98094b4129 Mon Sep 17 00:00:00 2001 From: Ken'ichi Ohmichi Date: Tue, 8 Jan 2008 15:33:05 -0800 Subject: vmcoreinfo: add the array length of "free_list" for filtering free pages This patch adds the array length of "free_area.free_list" to the vmcoreinfo data so that makedumpfile (dump filtering command) can exclude all free pages in linux-2.6.24. makedumpfile creates a small dumpfile by excluding unnecessary pages for the analysis. To distinguish unnecessary pages, makedumpfile gets the vmcoreinfo data which has the minimum debugging information only for dump filtering. In 2.6.24-rc1 or later, the free_area.free_list is an array which has one list for each migrate types instead of a single list. makedumpfile needs the array length of "free_area.free_list" and the vmcoreinfo data should contain it. Signed-off-by: Huang Ying Tested-by: Ken'ichi Ohmichi Acked-by: Simon Horman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/kexec.c b/kernel/kexec.c index aa74a1ef2da..9a26eec9eb0 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1404,6 +1404,7 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_OFFSET(list_head, next); VMCOREINFO_OFFSET(list_head, prev); VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); + VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); VMCOREINFO_NUMBER(NR_FREE_PAGES); arch_crash_save_vmcoreinfo(); -- cgit v1.2.3 From cdf71a10c7b6432d9b48e292cca2c62a0b9fa6cf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 8 Jan 2008 19:47:38 +0100 Subject: futex: Prevent stale futex owner when interrupted/timeout Roland Westrelin did a great analysis of a long standing thinko in the return path of futex_lock_pi. While we fixed the lock steal case long ago, which was easy to trigger, we never had a test case which exposed this problem and stupidly never thought about the reverse lock stealing scenario and the return to user space with a stale state. When a blocked tasks returns from rt_mutex_timed_locked without holding the rt_mutex (due to a signal or timeout) and at the same time the task holding the futex is releasing the futex and assigning the ownership of the futex to the returning task, then it might happen that a third task acquires the rt_mutex before the final rt_mutex_trylock() of the returning task happens under the futex hash bucket lock. The returning task returns to user space with ETIMEOUT or EINTR, but the user space futex value is assigned to this task. The task which acquired the rt_mutex fixes the user space futex value right after the hash bucket lock has been released by the returning task, but for a short period of time the user space value is wrong. Detailed description is available at: https://bugzilla.redhat.com/show_bug.cgi?id=400541 The fix for this is the same as we do when the rt_mutex was acquired by a higher priority task via lock stealing from the designated new owner. In that case we already fix the user space value and the internal pi_state up before we return. This mechanism can be used to fixup the above corner case as well. When the returning task, which failed to acquire the rt_mutex, notices that it is the designated owner of the futex, then it fixes up the stale user space value and the pi_state, before returning to user space. This happens with the futex hash bucket lock held, so the task which acquired the rt_mutex is guaranteed to be blocked on the hash bucket lock. We can access the rt_mutex owner, which gives us the pid of the new owner, safely here as the owner is not able to modify (release) it while waiting on the hash bucket lock. Rename the "curr" argument of fixup_pi_state_owner() to "newowner" to avoid confusion with current and add the check for the stale state into the failure path of rt_mutex_trylock() in the return path of unlock_futex_pi(). If the situation is detected use fixup_pi_state_owner() to assign everything to the owner of the rt_mutex. Pointed-out-and-tested-by: Roland Westrelin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/futex.c | 51 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 172a1aeeafd..db9824de8bf 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1097,15 +1097,15 @@ static void unqueue_me_pi(struct futex_q *q) } /* - * Fixup the pi_state owner with current. + * Fixup the pi_state owner with the new owner. * * Must be called with hash bucket lock held and mm->sem held for non * private futexes. */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *curr) + struct task_struct *newowner) { - u32 newtid = task_pid_vnr(curr) | FUTEX_WAITERS; + u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; u32 uval, curval, newval; int ret; @@ -1119,12 +1119,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, } else newtid |= FUTEX_OWNER_DIED; - pi_state->owner = curr; + pi_state->owner = newowner; - spin_lock_irq(&curr->pi_lock); + spin_lock_irq(&newowner->pi_lock); WARN_ON(!list_empty(&pi_state->list)); - list_add(&pi_state->list, &curr->pi_state_list); - spin_unlock_irq(&curr->pi_lock); + list_add(&pi_state->list, &newowner->pi_state_list); + spin_unlock_irq(&newowner->pi_lock); /* * We own it, so we have to replace the pending owner @@ -1508,9 +1508,40 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, * when we were on the way back before we locked the * hash bucket. */ - if (q.pi_state->owner == curr && - rt_mutex_trylock(&q.pi_state->pi_mutex)) { - ret = 0; + if (q.pi_state->owner == curr) { + /* + * Try to get the rt_mutex now. This might + * fail as some other task acquired the + * rt_mutex after we removed ourself from the + * rt_mutex waiters list. + */ + if (rt_mutex_trylock(&q.pi_state->pi_mutex)) + ret = 0; + else { + /* + * pi_state is incorrect, some other + * task did a lock steal and we + * returned due to timeout or signal + * without taking the rt_mutex. Too + * late. We can access the + * rt_mutex_owner without locking, as + * the other task is now blocked on + * the hash bucket lock. Fix the state + * up. + */ + struct task_struct *owner; + int res; + + owner = rt_mutex_owner(&q.pi_state->pi_mutex); + res = fixup_pi_state_owner(uaddr, &q, owner); + + WARN_ON(rt_mutex_owner(&q.pi_state->pi_mutex) != + owner); + + /* propagate -EFAULT, if the fixup failed */ + if (res) + ret = res; + } } else { /* * Paranoia check. If we did not take the lock -- cgit v1.2.3 From fcfd50afb6e94c8cf121ca4e7e3e7166bae7c6aa Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Wed, 9 Jan 2008 00:03:23 -0800 Subject: show_task: real_parent The show_task function invoked by sysrq-t et al displays the pid and parent's pid of each task. It seems more useful to show the actual process hierarchy here than who is using ptrace on each process. Signed-off-by: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 3df84ea6aba..37cf07aa416 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4918,7 +4918,7 @@ static void show_task(struct task_struct *p) } #endif printk(KERN_CONT "%5lu %5d %6d\n", free, - task_pid_nr(p), task_pid_nr(p->parent)); + task_pid_nr(p), task_pid_nr(p->real_parent)); if (state != TASK_RUNNING) show_stack(p, NULL); -- cgit v1.2.3 From 9f9adecd2d0e4f88fa0e8cb06c6ec207748df70a Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 13 Dec 2007 17:38:03 -0500 Subject: PM: ACPI and APM must not be enabled at the same time ACPI and APM used "pm_active" to guarantee that they would not be simultaneously active. But pm_active was recently moved under CONFIG_PM_LEGACY, so that without CONFIG_PM_LEGACY, pm_active became a NOP -- allowing ACPI and APM to both be simultaneously enabled. This caused unpredictable results, including boot hangs. Further, the code under CONFIG_PM_LEGACY is scheduled for removal. So replace pm_active with pm_flags. pm_flags depends only on CONFIG_PM, which is present for both CONFIG_APM and CONFIG_ACPI. http://bugzilla.kernel.org/show_bug.cgi?id=9194 Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 3 +++ kernel/power/pm.c | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/power/main.c b/kernel/power/main.c index 3cdf95b1dc9..f71c9504a5c 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -28,6 +28,9 @@ BLOCKING_NOTIFIER_HEAD(pm_chain_head); DEFINE_MUTEX(pm_mutex); +unsigned int pm_flags; +EXPORT_SYMBOL(pm_flags); + #ifdef CONFIG_SUSPEND /* This is just an arbitrary number */ diff --git a/kernel/power/pm.c b/kernel/power/pm.c index c50d15266c1..60c73fa670d 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c @@ -27,8 +27,6 @@ #include #include -int pm_active; - /* * Locking notes: * pm_devs_lock can be a semaphore providing pm ops are not called @@ -204,6 +202,4 @@ int pm_send_all(pm_request_t rqst, void *data) EXPORT_SYMBOL(pm_register); EXPORT_SYMBOL(pm_send_all); -EXPORT_SYMBOL(pm_active); - -- cgit v1.2.3 From 84427eaef1fb91704c7112bdb598c810003b99f3 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 10 Jan 2008 12:52:04 -0800 Subject: remove task_ppid_nr_ns task_ppid_nr_ns is called in three places. One of these should never have called it. In the other two, using it broke the existing semantics. This was presumably accidental. If the function had not been there, it would have been much more obvious to the eye that those patches were changing the behavior. We don't need this function. In task_state, the pid of the ptracer is not the ppid of the ptracer. In do_task_stat, ppid is the tgid of the real_parent, not its pid. I also moved the call outside of lock_task_sighand, since it doesn't need it. In sys_getppid, ppid is the tgid of the real_parent, not its pid. Signed-off-by: Roland McGrath Signed-off-by: Linus Torvalds --- kernel/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index d4527dcef1a..26671f4db07 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -978,7 +978,7 @@ asmlinkage long sys_getppid(void) int pid; rcu_read_lock(); - pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns); + pid = task_tgid_nr_ns(current->real_parent, current->nsproxy->pid_ns); rcu_read_unlock(); return pid; -- cgit v1.2.3 From cb2a52052cebe4716e83b9d2e53682ba00f67de6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 14 Jan 2008 00:55:03 -0800 Subject: modules: de-mutex more symbol lookup paths in the module code Kyle McMartin reports sysrq_timer_list_show() can hit the module mutex from hard interrupt context. These paths don't need to though, since we long ago changed all the module list manipulation to occur via stop_machine(). Disabling preemption is enough. Signed-off-by: Rusty Russell Cc: Ingo Molnar Cc: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/module.c b/kernel/module.c index 91fe6958b6e..c2e3e2e9880 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2214,29 +2214,34 @@ static const char *get_ksymbol(struct module *mod, /* For kallsyms to ask for address resolution. NULL means not found. We don't lock, as this is used for oops resolution and races are a lesser concern. */ +/* FIXME: Risky: returns a pointer into a module w/o lock */ const char *module_address_lookup(unsigned long addr, unsigned long *size, unsigned long *offset, char **modname) { struct module *mod; + const char *ret = NULL; + preempt_disable(); list_for_each_entry(mod, &modules, list) { if (within(addr, mod->module_init, mod->init_size) || within(addr, mod->module_core, mod->core_size)) { if (modname) *modname = mod->name; - return get_ksymbol(mod, addr, size, offset); + ret = get_ksymbol(mod, addr, size, offset); + break; } } - return NULL; + preempt_enable(); + return ret; } int lookup_module_symbol_name(unsigned long addr, char *symname) { struct module *mod; - mutex_lock(&module_mutex); + preempt_disable(); list_for_each_entry(mod, &modules, list) { if (within(addr, mod->module_init, mod->init_size) || within(addr, mod->module_core, mod->core_size)) { @@ -2246,12 +2251,12 @@ int lookup_module_symbol_name(unsigned long addr, char *symname) if (!sym) goto out; strlcpy(symname, sym, KSYM_NAME_LEN); - mutex_unlock(&module_mutex); + preempt_enable(); return 0; } } out: - mutex_unlock(&module_mutex); + preempt_enable(); return -ERANGE; } @@ -2260,7 +2265,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, { struct module *mod; - mutex_lock(&module_mutex); + preempt_disable(); list_for_each_entry(mod, &modules, list) { if (within(addr, mod->module_init, mod->init_size) || within(addr, mod->module_core, mod->core_size)) { @@ -2273,12 +2278,12 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, strlcpy(modname, mod->name, MODULE_NAME_LEN); if (name) strlcpy(name, sym, KSYM_NAME_LEN); - mutex_unlock(&module_mutex); + preempt_enable(); return 0; } } out: - mutex_unlock(&module_mutex); + preempt_enable(); return -ERANGE; } @@ -2287,7 +2292,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, { struct module *mod; - mutex_lock(&module_mutex); + preempt_disable(); list_for_each_entry(mod, &modules, list) { if (symnum < mod->num_symtab) { *value = mod->symtab[symnum].st_value; @@ -2296,12 +2301,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, KSYM_NAME_LEN); strlcpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, mod); - mutex_unlock(&module_mutex); + preempt_enable(); return 0; } symnum -= mod->num_symtab; } - mutex_unlock(&module_mutex); + preempt_enable(); return -ERANGE; } @@ -2324,6 +2329,7 @@ unsigned long module_kallsyms_lookup_name(const char *name) unsigned long ret = 0; /* Don't lock: we're in enough trouble already. */ + preempt_disable(); if ((colon = strchr(name, ':')) != NULL) { *colon = '\0'; if ((mod = find_module(name)) != NULL) @@ -2334,6 +2340,7 @@ unsigned long module_kallsyms_lookup_name(const char *name) if ((ret = mod_find_symname(mod, name)) != 0) break; } + preempt_enable(); return ret; } #endif /* CONFIG_KALLSYMS */ -- cgit v1.2.3 From 5a26db5bd25cf4bf32ae9fa9f6136b6b6d5b45c5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 16 Jan 2008 09:51:58 +0100 Subject: lockdep: fix internal double unlock during self-test Lockdep, during self-test (when it was simulating double unlocks) was sometimes unconditionally unlocking a spinlock when it had not been locked. This won't work for ticket locks. Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra --- kernel/lockdep.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 723bd9f9255..4335f12a27c 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2943,9 +2943,10 @@ void lockdep_free_key_range(void *start, unsigned long size) struct list_head *head; unsigned long flags; int i; + int locked; raw_local_irq_save(flags); - graph_lock(); + locked = graph_lock(); /* * Unhash all classes that were created by this module: @@ -2959,7 +2960,8 @@ void lockdep_free_key_range(void *start, unsigned long size) zap_class(class); } - graph_unlock(); + if (locked) + graph_unlock(); raw_local_irq_restore(flags); } @@ -2969,6 +2971,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) struct list_head *head; unsigned long flags; int i, j; + int locked; raw_local_irq_save(flags); @@ -2987,7 +2990,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) * Debug check: in the end all mapped classes should * be gone. */ - graph_lock(); + locked = graph_lock(); for (i = 0; i < CLASSHASH_SIZE; i++) { head = classhash_table + i; if (list_empty(head)) @@ -3000,7 +3003,8 @@ void lockdep_reset_lock(struct lockdep_map *lock) } } } - graph_unlock(); + if (locked) + graph_unlock(); out_restore: raw_local_irq_restore(flags); -- cgit v1.2.3 From eb13ba873881abd5e15af784756a61af635e665e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 16 Jan 2008 09:51:58 +0100 Subject: lockdep: fix workqueue creation API lockdep interaction Dave Young reported warnings from lockdep that the workqueue API can sometimes try to register lockdep classes with the same key but different names. This is not permitted in lockdep. Unfortunately, I was unaware of that restriction when I wrote the code to debug workqueue problems with lockdep and used the workqueue name as the lockdep class name. This can obviously lead to the problem if the workqueue name is dynamic. This patch solves the problem by always using a constant name for the workqueue's lockdep class, namely either the constant name that was passed in or a string consisting of the variable name. Signed-off-by: Johannes Berg Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra --- kernel/workqueue.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 52d5e7c9a8e..8db0b597509 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -722,7 +722,8 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu) struct workqueue_struct *__create_workqueue_key(const char *name, int singlethread, int freezeable, - struct lock_class_key *key) + struct lock_class_key *key, + const char *lock_name) { struct workqueue_struct *wq; struct cpu_workqueue_struct *cwq; @@ -739,7 +740,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name, } wq->name = name; - lockdep_init_map(&wq->lockdep_map, name, key, 0); + lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); wq->singlethread = singlethread; wq->freezeable = freezeable; INIT_LIST_HEAD(&wq->list); -- cgit v1.2.3 From 784680336b616dcc4c17cbd25add3b49c555cdeb Mon Sep 17 00:00:00 2001 From: Nigel Cunningham Date: Thu, 17 Jan 2008 15:21:21 -0800 Subject: Fix unbalanced helper_lock in kernel/kmod.c call_usermodehelper_exec() has an exit path that can leave the helper_lock() call at the top of the routine unbalanced. The attached patch fixes this issue. Signed-off-by: Nigel Cunningham Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kmod.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/kmod.c b/kernel/kmod.c index c6a4f8aebeb..bb7df2a28bd 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -451,13 +451,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, enum umh_wait wait) { DECLARE_COMPLETION_ONSTACK(done); - int retval; + int retval = 0; helper_lock(); - if (sub_info->path[0] == '\0') { - retval = 0; + if (sub_info->path[0] == '\0') goto out; - } if (!khelper_wq || usermodehelper_disabled) { retval = -EBUSY; @@ -468,13 +466,14 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, sub_info->wait = wait; queue_work(khelper_wq, &sub_info->work); - if (wait == UMH_NO_WAIT) /* task has freed sub_info */ - return 0; + if (wait == UMH_NO_WAIT) /* task has freed sub_info */ + goto unlock; wait_for_completion(&done); retval = sub_info->retval; - out: +out: call_usermodehelper_freeinfo(sub_info); +unlock: helper_unlock(); return retval; } -- cgit v1.2.3 From 0ec160dd48b666ddef39d639323d0da26d0b710d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 21 Jan 2008 17:18:24 -0800 Subject: hrtimer: fix section mismatch Fix section mismatch in hrtimer.c: WARNING: vmlinux.o(.text+0x50c61): Section mismatch: reference to .init.text: (between 'hrtimer_cpu_notify' and 'down_read_trylock') Noticed by Johannes Berg and confirmed by Sam Ravnborg. Signed-off-by: Randy Dunlap Cc: Sam Ravnborg Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index e65dd0b47cd..f994bb8065e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1378,7 +1378,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) /* * Functions related to boot-time initialization: */ -static void __devinit init_hrtimers_cpu(int cpu) +static void __cpuinit init_hrtimers_cpu(int cpu) { struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); int i; -- cgit v1.2.3 From 48ccf3dac341118992b70ca89c47728e8b1d300b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 21 Jan 2008 17:18:25 -0800 Subject: timer: fix section mismatch The caller is __cpuinit. Also, this code block and its caller are inside #ifdef CONFIG_HOTPLUG_CPU blocks, so this code should reflect that config symbol's usage. WARNING: vmlinux.o(.text+0x4252f): Section mismatch: reference to .init.text: (between 'timer_cpu_notify' and 'msleep') Signed-off-by: Randy Dunlap Cc: Sam Ravnborg Cc: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/timer.c b/kernel/timer.c index 26671f4db07..2a00c22203f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1289,7 +1289,7 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head) } } -static void __devinit migrate_timers(int cpu) +static void __cpuinit migrate_timers(int cpu) { tvec_base_t *old_base; tvec_base_t *new_base; -- cgit v1.2.3 From c61935fd0e7f087a643827b4bf5ef646963c10fa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Jan 2008 11:24:58 +0100 Subject: sched: group scheduler, set uid share fix setting cpu share to 1 causes hangs, as reported in: http://bugzilla.kernel.org/show_bug.cgi?id=9779 as the default share is 1024, the values of 0 and 1 can indeed cause problems. Limit it to 2 or higher values. These values can only be set by the root user - but still it makes sense to protect against nonsensical values. Signed-off-by: Ingo Molnar --- kernel/sched.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 37cf07aa416..e76b11ca6df 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7153,6 +7153,14 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) { int i; + /* + * A weight of 0 or 1 can cause arithmetics problems. + * (The default weight is 1024 - so there's no practical + * limitation from this.) + */ + if (shares < 2) + shares = 2; + spin_lock(&tg->lock); if (tg->shares == shares) goto done; -- cgit v1.2.3 From 00e10776ff908a767b3d36a53d330db8fdc53a56 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 22 Jan 2008 03:31:39 -0800 Subject: rcu: fix section mismatch rcu_online_cpu() should be __cpuinit instead of __devinit. WARNING: vmlinux.o(.text+0x4b6d5): Section mismatch: reference to .init.text: (between 'rcu_cpu_notify' and 'wakeme_after_rcu') Signed-off-by: Randy Dunlap Cc: Sam Ravnborg Acked-by: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcupdate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a66d4d1615f..f2c1a04e9b1 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -549,7 +549,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, rdp->blimit = blimit; } -static void __devinit rcu_online_cpu(int cpu) +static void __cpuinit rcu_online_cpu(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_data, cpu); struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu); -- cgit v1.2.3