From d172f4ef31bec924c6ebcb242c9d7d290811e1e5 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Sat, 22 Dec 2007 21:18:25 -0800
Subject: Modules: fix memory leak of module names

Due to the change in kobject name handling, the module kobject needs to
have a null release function to ensure that the name it previously set
will be properly cleaned up.

All of this wierdness goes away in 2.6.25 with the rework of the kobject
name and cleanup logic, but this is required for 2.6.24.

Thanks to Alexey Dobriyan for finding the problem, and to Kay Sievers
for pointing out the simple way to fix it after I tried many complex
ways.

Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 kernel/params.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'kernel')

diff --git a/kernel/params.c b/kernel/params.c
index 2a4c51487e7..7686417ee00 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -697,8 +697,18 @@ static struct kset_uevent_ops module_uevent_ops = {
 decl_subsys(module, &module_ktype, &module_uevent_ops);
 int module_sysfs_initialized;
 
+static void module_release(struct kobject *kobj)
+{
+	/*
+	 * Stupid empty release function to allow the memory for the kobject to
+	 * be properly cleaned up.  This will not need to be present for 2.6.25
+	 * with the upcoming kobject core rework.
+	 */
+}
+
 static struct kobj_type module_ktype = {
 	.sysfs_ops =	&module_sysfs_ops,
+	.release =	module_release,
 };
 
 /*
-- 
cgit v1.2.3


From fb445ee5f9bfc7cbef9e397556170c608dc02955 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 29 Dec 2007 01:19:49 -0800
Subject: [SERIAL]: Fix section mismatches in Sun serial console drivers.

We're exporting an __init function, oops :-)

The core issue here is that add_preferred_console() is marked
as __init, this makes it impossible to invoke this thing from
a driver probe routine which is what the Sparc serial drivers
need to do.

There is no harm in dropping the __init marker.  This code will
actually work properly when invoked from a modular driver,
except that init will probably not pick up the console change
without some other support code.

Then we can drop the __init from sunserial_console_match()
and we're no longer exporting an __init function to modules.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/printk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/printk.c b/kernel/printk.c
index a30fe33de39..89011bf8c10 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -817,7 +817,7 @@ __setup("console=", console_setup);
  * commonly to provide a default console (ie from PROM variables) when
  * the user has not supplied one.
  */
-int __init add_preferred_console(char *name, int idx, char *options)
+int add_preferred_console(char *name, int idx, char *options)
 {
 	struct console_cmdline *c;
 	int i;
-- 
cgit v1.2.3


From 90b2628f1fe94a667330d425a7fb76ec8d2a49ec Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 30 Dec 2007 17:24:35 +0100
Subject: sched: fix gcc warnings

Meelis Roos reported these warnings on sparc64:

  CC      kernel/sched.o
  In file included from kernel/sched.c:879:
  kernel/sched_debug.c: In function 'nsec_high':
  kernel/sched_debug.c:38: warning: comparison of distinct pointer types lacks a cast

the debug check in do_div() is over-eager here, because the long long
is always positive in these places. Mark this by casting them to
unsigned long long.

no change in code output:

   text    data     bss     dec     hex filename
  51471    6582     376   58429    e43d sched.o.before
  51471    6582     376   58429    e43d sched.o.after

  md5:
   7f7729c111f185bf3ccea4d542abc049  sched.o.before.asm
   7f7729c111f185bf3ccea4d542abc049  sched.o.after.asm

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_debug.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index d30467b47dd..80fbbfc0429 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -31,9 +31,9 @@
 /*
  * Ease the printing of nsec fields:
  */
-static long long nsec_high(long long nsec)
+static long long nsec_high(unsigned long long nsec)
 {
-	if (nsec < 0) {
+	if ((long long)nsec < 0) {
 		nsec = -nsec;
 		do_div(nsec, 1000000);
 		return -nsec;
@@ -43,9 +43,9 @@ static long long nsec_high(long long nsec)
 	return nsec;
 }
 
-static unsigned long nsec_low(long long nsec)
+static unsigned long nsec_low(unsigned long long nsec)
 {
-	if (nsec < 0)
+	if ((long long)nsec < 0)
 		nsec = -nsec;
 
 	return do_div(nsec, 1000000);
-- 
cgit v1.2.3


From 831830b5a2b5d413407adf380ef62fe17d6fcbf2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Wed, 2 Jan 2008 14:09:57 +0000
Subject: restrict reading from /proc/<pid>/maps to those who share ->mm or can
 ptrace pid

Contents of /proc/*/maps is sensitive and may become sensitive after
open() (e.g.  if target originally shares our ->mm and later does exec
on suid-root binary).

Check at read() (actually, ->start() of iterator) time that mm_struct
we'd grabbed and locked is
 - still the ->mm of target
 - equal to reader's ->mm or the target is ptracable by reader.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/ptrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 7c76f2ffaea..0c65d306f41 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -120,7 +120,7 @@ int ptrace_check_attach(struct task_struct *child, int kill)
 	return ret;
 }
 
-static int may_attach(struct task_struct *task)
+int __ptrace_may_attach(struct task_struct *task)
 {
 	/* May we inspect the given task?
 	 * This check is used both for attaching with ptrace
@@ -154,7 +154,7 @@ int ptrace_may_attach(struct task_struct *task)
 {
 	int err;
 	task_lock(task);
-	err = may_attach(task);
+	err = __ptrace_may_attach(task);
 	task_unlock(task);
 	return !err;
 }
-- 
cgit v1.2.3


From b8c9a18712f7b617fda66d878ce3759c9e575ba0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.linux-foundation.org>
Date: Wed, 2 Jan 2008 13:48:27 -0800
Subject: Fix kernel/ptrace.c compile problem (missing "may_attach()")

The previous commit missed one use of "may_attach()" that had been
renamed to __ptrace_may_attach().  Tssk, tssk, Al.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0c65d306f41..c25db863081 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -196,7 +196,7 @@ repeat:
 	/* the same process cannot be attached many times */
 	if (task->ptrace & PT_PTRACED)
 		goto bad;
-	retval = may_attach(task);
+	retval = __ptrace_may_attach(task);
 	if (retval)
 		goto bad;
 
-- 
cgit v1.2.3


From b59f8197c5ddd0d5d74b663650be5449dacd34aa Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Mon, 7 Jan 2008 14:23:34 -0800
Subject: acct: real_parent ppid

The ac_ppid field reported in process accounting records
should match what getppid() would have returned to that
process, regardless of whether a debugger is attached.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/acct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/acct.c b/kernel/acct.c
index cf19547cc9e..521dfa53cb9 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -482,7 +482,7 @@ static void do_acct_process(struct file *file)
 #endif
 #if ACCT_VERSION==3
 	ac.ac_pid = current->tgid;
-	ac.ac_ppid = current->parent->tgid;
+	ac.ac_ppid = current->real_parent->tgid;
 #endif
 
 	spin_lock_irq(&current->sighand->siglock);
-- 
cgit v1.2.3


From 83a08e7c6ed533a47631794e7f618a98094b4129 Mon Sep 17 00:00:00 2001
From: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Date: Tue, 8 Jan 2008 15:33:05 -0800
Subject: vmcoreinfo: add the array length of "free_list" for filtering free
 pages

This patch adds the array length of "free_area.free_list" to the vmcoreinfo
data so that makedumpfile (dump filtering command) can exclude all free pages
in linux-2.6.24.

makedumpfile creates a small dumpfile by excluding unnecessary pages for the
analysis. To distinguish unnecessary pages, makedumpfile gets the vmcoreinfo
data which has the minimum debugging information only for dump filtering.

In 2.6.24-rc1 or later, the free_area.free_list is an array which has one list
for each migrate types instead of a single list. makedumpfile needs the array
length of "free_area.free_list" and the vmcoreinfo data should contain it.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Tested-by: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
Acked-by: Simon Horman <horms@verge.net.au>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kexec.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/kexec.c b/kernel/kexec.c
index aa74a1ef2da..9a26eec9eb0 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1404,6 +1404,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_OFFSET(list_head, next);
 	VMCOREINFO_OFFSET(list_head, prev);
 	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
+	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
 	VMCOREINFO_NUMBER(NR_FREE_PAGES);
 
 	arch_crash_save_vmcoreinfo();
-- 
cgit v1.2.3


From cdf71a10c7b6432d9b48e292cca2c62a0b9fa6cf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 8 Jan 2008 19:47:38 +0100
Subject: futex: Prevent stale futex owner when interrupted/timeout

Roland Westrelin did a great analysis of a long standing thinko in the
return path of futex_lock_pi.

While we fixed the lock steal case long ago, which was easy to trigger,
we never had a test case which exposed this problem and stupidly never
thought about the reverse lock stealing scenario and the return to user
space with a stale state.

When a blocked tasks returns from rt_mutex_timed_locked without holding
the rt_mutex (due to a signal or timeout) and at the same time the task
holding the futex is releasing the futex and assigning the ownership of
the futex to the returning task, then it might happen that a third task
acquires the rt_mutex before the final rt_mutex_trylock() of the
returning task happens under the futex hash bucket lock. The returning
task returns to user space with ETIMEOUT or EINTR, but the user space
futex value is assigned to this task. The task which acquired the
rt_mutex fixes the user space futex value right after the hash bucket
lock has been released by the returning task, but for a short period of
time the user space value is wrong.

Detailed description is available at:

   https://bugzilla.redhat.com/show_bug.cgi?id=400541

The fix for this is the same as we do when the rt_mutex was acquired by
a higher priority task via lock stealing from the designated new owner.
In that case we already fix the user space value and the internal
pi_state up before we return. This mechanism can be used to fixup the
above corner case as well. When the returning task, which failed to
acquire the rt_mutex, notices that it is the designated owner of the
futex, then it fixes up the stale user space value and the pi_state,
before returning to user space. This happens with the futex hash bucket
lock held, so the task which acquired the rt_mutex is guaranteed to be
blocked on the hash bucket lock. We can access the rt_mutex owner, which
gives us the pid of the new owner, safely here as the owner is not able
to modify (release) it while waiting on the hash bucket lock.

Rename the "curr" argument of fixup_pi_state_owner() to "newowner" to
avoid confusion with current and add the check for the stale state into
the failure path of rt_mutex_trylock() in the return path of
unlock_futex_pi(). If the situation is detected use
fixup_pi_state_owner() to assign everything to the owner of the
rt_mutex.

Pointed-out-and-tested-by: Roland Westrelin <roland.westrelin@sun.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/futex.c | 51 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/futex.c b/kernel/futex.c
index 172a1aeeafd..db9824de8bf 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1097,15 +1097,15 @@ static void unqueue_me_pi(struct futex_q *q)
 }
 
 /*
- * Fixup the pi_state owner with current.
+ * Fixup the pi_state owner with the new owner.
  *
  * Must be called with hash bucket lock held and mm->sem held for non
  * private futexes.
  */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-				struct task_struct *curr)
+				struct task_struct *newowner)
 {
-	u32 newtid = task_pid_vnr(curr) | FUTEX_WAITERS;
+	u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
 	struct futex_pi_state *pi_state = q->pi_state;
 	u32 uval, curval, newval;
 	int ret;
@@ -1119,12 +1119,12 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 	} else
 		newtid |= FUTEX_OWNER_DIED;
 
-	pi_state->owner = curr;
+	pi_state->owner = newowner;
 
-	spin_lock_irq(&curr->pi_lock);
+	spin_lock_irq(&newowner->pi_lock);
 	WARN_ON(!list_empty(&pi_state->list));
-	list_add(&pi_state->list, &curr->pi_state_list);
-	spin_unlock_irq(&curr->pi_lock);
+	list_add(&pi_state->list, &newowner->pi_state_list);
+	spin_unlock_irq(&newowner->pi_lock);
 
 	/*
 	 * We own it, so we have to replace the pending owner
@@ -1508,9 +1508,40 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 		 * when we were on the way back before we locked the
 		 * hash bucket.
 		 */
-		if (q.pi_state->owner == curr &&
-		    rt_mutex_trylock(&q.pi_state->pi_mutex)) {
-			ret = 0;
+		if (q.pi_state->owner == curr) {
+			/*
+			 * Try to get the rt_mutex now. This might
+			 * fail as some other task acquired the
+			 * rt_mutex after we removed ourself from the
+			 * rt_mutex waiters list.
+			 */
+			if (rt_mutex_trylock(&q.pi_state->pi_mutex))
+				ret = 0;
+			else {
+				/*
+				 * pi_state is incorrect, some other
+				 * task did a lock steal and we
+				 * returned due to timeout or signal
+				 * without taking the rt_mutex. Too
+				 * late. We can access the
+				 * rt_mutex_owner without locking, as
+				 * the other task is now blocked on
+				 * the hash bucket lock. Fix the state
+				 * up.
+				 */
+				struct task_struct *owner;
+				int res;
+
+				owner = rt_mutex_owner(&q.pi_state->pi_mutex);
+				res = fixup_pi_state_owner(uaddr, &q, owner);
+
+				WARN_ON(rt_mutex_owner(&q.pi_state->pi_mutex) !=
+					owner);
+
+				/* propagate -EFAULT, if the fixup failed */
+				if (res)
+					ret = res;
+			}
 		} else {
 			/*
 			 * Paranoia check. If we did not take the lock
-- 
cgit v1.2.3


From fcfd50afb6e94c8cf121ca4e7e3e7166bae7c6aa Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 9 Jan 2008 00:03:23 -0800
Subject: show_task: real_parent

The show_task function invoked by sysrq-t et al displays the
pid and parent's pid of each task.  It seems more useful to
show the actual process hierarchy here than who is using
ptrace on each process.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 3df84ea6aba..37cf07aa416 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4918,7 +4918,7 @@ static void show_task(struct task_struct *p)
 	}
 #endif
 	printk(KERN_CONT "%5lu %5d %6d\n", free,
-		task_pid_nr(p), task_pid_nr(p->parent));
+		task_pid_nr(p), task_pid_nr(p->real_parent));
 
 	if (state != TASK_RUNNING)
 		show_stack(p, NULL);
-- 
cgit v1.2.3


From 9f9adecd2d0e4f88fa0e8cb06c6ec207748df70a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 13 Dec 2007 17:38:03 -0500
Subject: PM: ACPI and APM must not be enabled at the same time

ACPI and APM used "pm_active" to guarantee that
they would not be simultaneously active.

But pm_active was recently moved under CONFIG_PM_LEGACY,
so that without CONFIG_PM_LEGACY, pm_active became a NOP --
allowing ACPI and APM to both be simultaneously enabled.
This caused unpredictable results, including boot hangs.

Further, the code under CONFIG_PM_LEGACY is scheduled
for removal.

So replace pm_active with pm_flags.
pm_flags depends only on CONFIG_PM,
which is present for both CONFIG_APM and CONFIG_ACPI.

http://bugzilla.kernel.org/show_bug.cgi?id=9194

Signed-off-by: Len Brown <len.brown@intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/main.c | 3 +++
 kernel/power/pm.c   | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3cdf95b1dc9..f71c9504a5c 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -28,6 +28,9 @@ BLOCKING_NOTIFIER_HEAD(pm_chain_head);
 
 DEFINE_MUTEX(pm_mutex);
 
+unsigned int pm_flags;
+EXPORT_SYMBOL(pm_flags);
+
 #ifdef CONFIG_SUSPEND
 
 /* This is just an arbitrary number */
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
index c50d15266c1..60c73fa670d 100644
--- a/kernel/power/pm.c
+++ b/kernel/power/pm.c
@@ -27,8 +27,6 @@
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
 
-int pm_active;
-
 /*
  *	Locking notes:
  *		pm_devs_lock can be a semaphore providing pm ops are not called
@@ -204,6 +202,4 @@ int pm_send_all(pm_request_t rqst, void *data)
 
 EXPORT_SYMBOL(pm_register);
 EXPORT_SYMBOL(pm_send_all);
-EXPORT_SYMBOL(pm_active);
-
 
-- 
cgit v1.2.3


From 84427eaef1fb91704c7112bdb598c810003b99f3 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Thu, 10 Jan 2008 12:52:04 -0800
Subject: remove task_ppid_nr_ns

task_ppid_nr_ns is called in three places.  One of these should never
have called it.  In the other two, using it broke the existing
semantics.  This was presumably accidental.  If the function had not
been there, it would have been much more obvious to the eye that those
patches were changing the behavior.  We don't need this function.

In task_state, the pid of the ptracer is not the ppid of the ptracer.

In do_task_stat, ppid is the tgid of the real_parent, not its pid.
I also moved the call outside of lock_task_sighand, since it doesn't
need it.

In sys_getppid, ppid is the tgid of the real_parent, not its pid.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/timer.c b/kernel/timer.c
index d4527dcef1a..26671f4db07 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -978,7 +978,7 @@ asmlinkage long sys_getppid(void)
 	int pid;
 
 	rcu_read_lock();
-	pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns);
+	pid = task_tgid_nr_ns(current->real_parent, current->nsproxy->pid_ns);
 	rcu_read_unlock();
 
 	return pid;
-- 
cgit v1.2.3


From cb2a52052cebe4716e83b9d2e53682ba00f67de6 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 14 Jan 2008 00:55:03 -0800
Subject: modules: de-mutex more symbol lookup paths in the module code

Kyle McMartin reports sysrq_timer_list_show() can hit the module mutex
from hard interrupt context.  These paths don't need to though, since we
long ago changed all the module list manipulation to occur via
stop_machine().

Disabling preemption is enough.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/module.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/module.c b/kernel/module.c
index 91fe6958b6e..c2e3e2e9880 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2214,29 +2214,34 @@ static const char *get_ksymbol(struct module *mod,
 /* For kallsyms to ask for address resolution.  NULL means not found.
    We don't lock, as this is used for oops resolution and races are a
    lesser concern. */
+/* FIXME: Risky: returns a pointer into a module w/o lock */
 const char *module_address_lookup(unsigned long addr,
 				  unsigned long *size,
 				  unsigned long *offset,
 				  char **modname)
 {
 	struct module *mod;
+	const char *ret = NULL;
 
+	preempt_disable();
 	list_for_each_entry(mod, &modules, list) {
 		if (within(addr, mod->module_init, mod->init_size)
 		    || within(addr, mod->module_core, mod->core_size)) {
 			if (modname)
 				*modname = mod->name;
-			return get_ksymbol(mod, addr, size, offset);
+			ret = get_ksymbol(mod, addr, size, offset);
+			break;
 		}
 	}
-	return NULL;
+	preempt_enable();
+	return ret;
 }
 
 int lookup_module_symbol_name(unsigned long addr, char *symname)
 {
 	struct module *mod;
 
-	mutex_lock(&module_mutex);
+	preempt_disable();
 	list_for_each_entry(mod, &modules, list) {
 		if (within(addr, mod->module_init, mod->init_size) ||
 		    within(addr, mod->module_core, mod->core_size)) {
@@ -2246,12 +2251,12 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
 			if (!sym)
 				goto out;
 			strlcpy(symname, sym, KSYM_NAME_LEN);
-			mutex_unlock(&module_mutex);
+			preempt_enable();
 			return 0;
 		}
 	}
 out:
-	mutex_unlock(&module_mutex);
+	preempt_enable();
 	return -ERANGE;
 }
 
@@ -2260,7 +2265,7 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
 {
 	struct module *mod;
 
-	mutex_lock(&module_mutex);
+	preempt_disable();
 	list_for_each_entry(mod, &modules, list) {
 		if (within(addr, mod->module_init, mod->init_size) ||
 		    within(addr, mod->module_core, mod->core_size)) {
@@ -2273,12 +2278,12 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
 				strlcpy(modname, mod->name, MODULE_NAME_LEN);
 			if (name)
 				strlcpy(name, sym, KSYM_NAME_LEN);
-			mutex_unlock(&module_mutex);
+			preempt_enable();
 			return 0;
 		}
 	}
 out:
-	mutex_unlock(&module_mutex);
+	preempt_enable();
 	return -ERANGE;
 }
 
@@ -2287,7 +2292,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 {
 	struct module *mod;
 
-	mutex_lock(&module_mutex);
+	preempt_disable();
 	list_for_each_entry(mod, &modules, list) {
 		if (symnum < mod->num_symtab) {
 			*value = mod->symtab[symnum].st_value;
@@ -2296,12 +2301,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 				KSYM_NAME_LEN);
 			strlcpy(module_name, mod->name, MODULE_NAME_LEN);
 			*exported = is_exported(name, mod);
-			mutex_unlock(&module_mutex);
+			preempt_enable();
 			return 0;
 		}
 		symnum -= mod->num_symtab;
 	}
-	mutex_unlock(&module_mutex);
+	preempt_enable();
 	return -ERANGE;
 }
 
@@ -2324,6 +2329,7 @@ unsigned long module_kallsyms_lookup_name(const char *name)
 	unsigned long ret = 0;
 
 	/* Don't lock: we're in enough trouble already. */
+	preempt_disable();
 	if ((colon = strchr(name, ':')) != NULL) {
 		*colon = '\0';
 		if ((mod = find_module(name)) != NULL)
@@ -2334,6 +2340,7 @@ unsigned long module_kallsyms_lookup_name(const char *name)
 			if ((ret = mod_find_symname(mod, name)) != 0)
 				break;
 	}
+	preempt_enable();
 	return ret;
 }
 #endif /* CONFIG_KALLSYMS */
-- 
cgit v1.2.3


From 5a26db5bd25cf4bf32ae9fa9f6136b6b6d5b45c5 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 16 Jan 2008 09:51:58 +0100
Subject: lockdep: fix internal double unlock during self-test Lockdep, during
 self-test (when it was simulating double unlocks) was sometimes
 unconditionally unlocking a spinlock when it had not been locked. This won't
 work for ticket locks.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/lockdep.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 723bd9f9255..4335f12a27c 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2943,9 +2943,10 @@ void lockdep_free_key_range(void *start, unsigned long size)
 	struct list_head *head;
 	unsigned long flags;
 	int i;
+	int locked;
 
 	raw_local_irq_save(flags);
-	graph_lock();
+	locked = graph_lock();
 
 	/*
 	 * Unhash all classes that were created by this module:
@@ -2959,7 +2960,8 @@ void lockdep_free_key_range(void *start, unsigned long size)
 				zap_class(class);
 	}
 
-	graph_unlock();
+	if (locked)
+		graph_unlock();
 	raw_local_irq_restore(flags);
 }
 
@@ -2969,6 +2971,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
 	struct list_head *head;
 	unsigned long flags;
 	int i, j;
+	int locked;
 
 	raw_local_irq_save(flags);
 
@@ -2987,7 +2990,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
 	 * Debug check: in the end all mapped classes should
 	 * be gone.
 	 */
-	graph_lock();
+	locked = graph_lock();
 	for (i = 0; i < CLASSHASH_SIZE; i++) {
 		head = classhash_table + i;
 		if (list_empty(head))
@@ -3000,7 +3003,8 @@ void lockdep_reset_lock(struct lockdep_map *lock)
 			}
 		}
 	}
-	graph_unlock();
+	if (locked)
+		graph_unlock();
 
 out_restore:
 	raw_local_irq_restore(flags);
-- 
cgit v1.2.3


From eb13ba873881abd5e15af784756a61af635e665e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 16 Jan 2008 09:51:58 +0100
Subject: lockdep: fix workqueue creation API lockdep interaction Dave Young
 reported warnings from lockdep that the workqueue API can sometimes try to
 register lockdep classes with the same key but different names. This is not
 permitted in lockdep.

Unfortunately, I was unaware of that restriction when I wrote
the code to debug workqueue problems with lockdep and used the
workqueue name as the lockdep class name. This can obviously
lead to the problem if the workqueue name is dynamic.

This patch solves the problem by always using a constant name
for the workqueue's lockdep class, namely either the constant
name that was passed in or a string consisting of the variable
name.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/workqueue.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 52d5e7c9a8e..8db0b597509 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -722,7 +722,8 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						int singlethread,
 						int freezeable,
-						struct lock_class_key *key)
+						struct lock_class_key *key,
+						const char *lock_name)
 {
 	struct workqueue_struct *wq;
 	struct cpu_workqueue_struct *cwq;
@@ -739,7 +740,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	}
 
 	wq->name = name;
-	lockdep_init_map(&wq->lockdep_map, name, key, 0);
+	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
-- 
cgit v1.2.3


From 784680336b616dcc4c17cbd25add3b49c555cdeb Mon Sep 17 00:00:00 2001
From: Nigel Cunningham <nigel@nigel.suspend2.net>
Date: Thu, 17 Jan 2008 15:21:21 -0800
Subject: Fix unbalanced helper_lock in kernel/kmod.c

call_usermodehelper_exec() has an exit path that can leave the
helper_lock() call at the top of the routine unbalanced.  The attached
patch fixes this issue.

Signed-off-by: Nigel Cunningham <nigel@tuxonice.net>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kmod.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kmod.c b/kernel/kmod.c
index c6a4f8aebeb..bb7df2a28bd 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -451,13 +451,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
 			     enum umh_wait wait)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
-	int retval;
+	int retval = 0;
 
 	helper_lock();
-	if (sub_info->path[0] == '\0') {
-		retval = 0;
+	if (sub_info->path[0] == '\0')
 		goto out;
-	}
 
 	if (!khelper_wq || usermodehelper_disabled) {
 		retval = -EBUSY;
@@ -468,13 +466,14 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
 	sub_info->wait = wait;
 
 	queue_work(khelper_wq, &sub_info->work);
-	if (wait == UMH_NO_WAIT) /* task has freed sub_info */
-		return 0;
+	if (wait == UMH_NO_WAIT)	/* task has freed sub_info */
+		goto unlock;
 	wait_for_completion(&done);
 	retval = sub_info->retval;
 
-  out:
+out:
 	call_usermodehelper_freeinfo(sub_info);
+unlock:
 	helper_unlock();
 	return retval;
 }
-- 
cgit v1.2.3


From 0ec160dd48b666ddef39d639323d0da26d0b710d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 21 Jan 2008 17:18:24 -0800
Subject: hrtimer: fix section mismatch

Fix section mismatch in hrtimer.c:

WARNING: vmlinux.o(.text+0x50c61): Section mismatch: reference to .init.text: (between 'hrtimer_cpu_notify' and 'down_read_trylock')

Noticed by Johannes Berg and confirmed by Sam Ravnborg.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@akpm@linux-foundation.org>
---
 kernel/hrtimer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e65dd0b47cd..f994bb8065e 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1378,7 +1378,7 @@ sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
 /*
  * Functions related to boot-time initialization:
  */
-static void __devinit init_hrtimers_cpu(int cpu)
+static void __cpuinit init_hrtimers_cpu(int cpu)
 {
 	struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
 	int i;
-- 
cgit v1.2.3


From 48ccf3dac341118992b70ca89c47728e8b1d300b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 21 Jan 2008 17:18:25 -0800
Subject: timer: fix section mismatch

The caller is __cpuinit.
Also, this code block and its caller are inside #ifdef CONFIG_HOTPLUG_CPU
blocks, so this code should reflect that config symbol's usage.

WARNING: vmlinux.o(.text+0x4252f): Section mismatch: reference to .init.text: (between 'timer_cpu_notify' and 'msleep')

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@akpm@linux-foundation.org>
---
 kernel/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/timer.c b/kernel/timer.c
index 26671f4db07..2a00c22203f 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1289,7 +1289,7 @@ static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
 	}
 }
 
-static void __devinit migrate_timers(int cpu)
+static void __cpuinit migrate_timers(int cpu)
 {
 	tvec_base_t *old_base;
 	tvec_base_t *new_base;
-- 
cgit v1.2.3


From c61935fd0e7f087a643827b4bf5ef646963c10fa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 22 Jan 2008 11:24:58 +0100
Subject: sched: group scheduler, set uid share fix

setting cpu share to 1 causes hangs, as reported in:

    http://bugzilla.kernel.org/show_bug.cgi?id=9779

as the default share is 1024, the values of 0 and 1 can indeed
cause problems. Limit it to 2 or higher values.

These values can only be set by the root user - but still it
makes sense to protect against nonsensical values.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 37cf07aa416..e76b11ca6df 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7153,6 +7153,14 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 {
 	int i;
 
+	/*
+	 * A weight of 0 or 1 can cause arithmetics problems.
+	 * (The default weight is 1024 - so there's no practical
+	 *  limitation from this.)
+	 */
+	if (shares < 2)
+		shares = 2;
+
 	spin_lock(&tg->lock);
 	if (tg->shares == shares)
 		goto done;
-- 
cgit v1.2.3


From 00e10776ff908a767b3d36a53d330db8fdc53a56 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 22 Jan 2008 03:31:39 -0800
Subject: rcu: fix section mismatch

rcu_online_cpu() should be __cpuinit instead of __devinit.

WARNING: vmlinux.o(.text+0x4b6d5): Section mismatch: reference to .init.text: (between 'rcu_cpu_notify' and 'wakeme_after_rcu')

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/rcupdate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a66d4d1615f..f2c1a04e9b1 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -549,7 +549,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
 	rdp->blimit = blimit;
 }
 
-static void __devinit rcu_online_cpu(int cpu)
+static void __cpuinit rcu_online_cpu(int cpu)
 {
 	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
 	struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
-- 
cgit v1.2.3