From 32bd671d6cbeda60dc73be77fa2b9037d9a9bfa0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 5 Feb 2009 12:24:15 +0100
Subject: signal: re-add dead task accumulation stats.

We're going to split the process wide cpu accounting into two parts:

 - clocks; which can take all the time they want since they run
           from user context.

 - timers; which need constant time tracing but can affort the overhead
           because they're default off -- and rare.

The clock readout will go back to a full sum of the thread group, for this
we need to re-add the exit stats that were removed in the initial itimer
rework (f06febc9: timers: fix itimer/many thread hang).

Furthermore, since that full sum can be rather slow for large thread groups
and we have the complete dead task stats, revert the do_notify_parent time
computation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2127e959e0f..2e0646a3031 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -559,7 +559,7 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
-	cputime_t cutime, cstime;
+	cputime_t utime, stime, cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -567,6 +567,14 @@ struct signal_struct {
 	unsigned long inblock, oublock, cinblock, coublock;
 	struct task_io_accounting ioac;
 
+	/*
+	 * Cumulative ns of schedule CPU time fo dead threads in the
+	 * group, not including a zombie group leader, (This only differs
+	 * from jiffies_to_ns(utime + stime) if sched_clock uses something
+	 * other than jiffies.)
+	 */
+	unsigned long long sum_sched_runtime;
+
 	/*
 	 * We don't bother to synchronize most readers of this at all,
 	 * because there is no reader checking a limit that actually needs
-- 
cgit v1.2.3


From 4cd4c1b40d40447fb5e7ba80746c6d7ba91d7a53 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 5 Feb 2009 12:24:16 +0100
Subject: timers: split process wide cpu clocks/timers

Change the process wide cpu timers/clocks so that we:

 1) don't mess up the kernel with too many threads,
 2) don't have a per-cpu allocation for each process,
 3) have no impact when not used.

In order to accomplish this we're going to split it into two parts:

 - clocks; which can take all the time they want since they run
           from user context -- ie. sys_clock_gettime(CLOCK_PROCESS_CPUTIME_ID)

 - timers; which need constant time sampling but since they're
           explicity used, the user can pay the overhead.

The clock readout will go back to a full sum of the thread group, while the
timers will run of a global 'clock' that only runs when needed, so only
programs that make use of the facility pay the price.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h | 11 +++++-----
 include/linux/sched.h     | 54 +++++++++++++++++++++++++++--------------------
 2 files changed, 36 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index ea0ea1a4c36..e752d973fa2 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -48,12 +48,11 @@ extern struct fs_struct init_fs;
 	.posix_timers	 = LIST_HEAD_INIT(sig.posix_timers),		\
 	.cpu_timers	= INIT_CPU_TIMERS(sig.cpu_timers),		\
 	.rlim		= INIT_RLIMITS,					\
-	.cputime	= { .totals = {					\
-		.utime = cputime_zero,					\
-		.stime = cputime_zero,					\
-		.sum_exec_runtime = 0,					\
-		.lock = __SPIN_LOCK_UNLOCKED(sig.cputime.totals.lock),	\
-	}, },								\
+	.cputimer	= { 						\
+		.cputime = INIT_CPUTIME,				\
+		.running = 0,						\
+		.lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock),	\
+	},								\
 }
 
 extern struct nsproxy init_nsproxy;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2e0646a3031..082d7619b3a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -443,7 +443,6 @@ struct pacct_struct {
  * @utime:		time spent in user mode, in &cputime_t units
  * @stime:		time spent in kernel mode, in &cputime_t units
  * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
- * @lock:		lock for fields in this struct
  *
  * This structure groups together three kinds of CPU time that are
  * tracked for threads and thread groups.  Most things considering
@@ -454,23 +453,33 @@ struct task_cputime {
 	cputime_t utime;
 	cputime_t stime;
 	unsigned long long sum_exec_runtime;
-	spinlock_t lock;
 };
 /* Alternate field names when used to cache expirations. */
 #define prof_exp	stime
 #define virt_exp	utime
 #define sched_exp	sum_exec_runtime
 
+#define INIT_CPUTIME	\
+	(struct task_cputime) {					\
+		.utime = cputime_zero,				\
+		.stime = cputime_zero,				\
+		.sum_exec_runtime = 0,				\
+	}
+
 /**
- * struct thread_group_cputime - thread group interval timer counts
- * @totals:		thread group interval timers; substructure for
- *			uniprocessor kernel, per-cpu for SMP kernel.
+ * struct thread_group_cputimer - thread group interval timer counts
+ * @cputime:		thread group interval timers.
+ * @running:		non-zero when there are timers running and
+ * 			@cputime receives updates.
+ * @lock:		lock for fields in this struct.
  *
  * This structure contains the version of task_cputime, above, that is
- * used for thread group CPU clock calculations.
+ * used for thread group CPU timer calculations.
  */
-struct thread_group_cputime {
-	struct task_cputime totals;
+struct thread_group_cputimer {
+	struct task_cputime cputime;
+	int running;
+	spinlock_t lock;
 };
 
 /*
@@ -519,10 +528,10 @@ struct signal_struct {
 	cputime_t it_prof_incr, it_virt_incr;
 
 	/*
-	 * Thread group totals for process CPU clocks.
-	 * See thread_group_cputime(), et al, for details.
+	 * Thread group totals for process CPU timers.
+	 * See thread_group_cputimer(), et al, for details.
 	 */
-	struct thread_group_cputime cputime;
+	struct thread_group_cputimer cputimer;
 
 	/* Earliest-expiration cache. */
 	struct task_cputime cputime_expires;
@@ -2191,27 +2200,26 @@ static inline int spin_needbreak(spinlock_t *lock)
 /*
  * Thread group CPU time accounting.
  */
+void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 
 static inline
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 {
-	struct task_cputime *totals = &tsk->signal->cputime.totals;
+	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 	unsigned long flags;
 
-	spin_lock_irqsave(&totals->lock, flags);
-	*times = *totals;
-	spin_unlock_irqrestore(&totals->lock, flags);
+	WARN_ON(!cputimer->running);
+
+	spin_lock_irqsave(&cputimer->lock, flags);
+	*times = cputimer->cputime;
+	spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
 static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
-	sig->cputime.totals = (struct task_cputime){
-		.utime = cputime_zero,
-		.stime = cputime_zero,
-		.sum_exec_runtime = 0,
-	};
-
-	spin_lock_init(&sig->cputime.totals.lock);
+	sig->cputimer.cputime = INIT_CPUTIME;
+	spin_lock_init(&sig->cputimer.lock);
+	sig->cputimer.running = 0;
 }
 
 static inline void thread_group_cputime_free(struct signal_struct *sig)
-- 
cgit v1.2.3


From 7d8e23df69820e6be42bcc41d441f4860e8c76f7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 6 Feb 2009 14:57:51 +0100
Subject: timers: split process wide cpu clocks/timers, remove spurious warning

Mike Galbraith reported that the new warning in thread_group_cputimer()
triggers en masse with Amarok running.

Oleg Nesterov observed:

  Can't fastpath_timer_check()->thread_group_cputimer() have the
  false warning too? Suppose we had the timer, then posix_cpu_timer_del()
  removes this timer, but task_cputime_zero(&sig->cputime_expires) still
  not true.

Remove the spurious debug warning.

Reported-by: Mike Galbraith <efault@gmx.de>
Explained-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 082d7619b3a..79392916d6c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2208,8 +2208,6 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 	unsigned long flags;
 
-	WARN_ON(!cputimer->running);
-
 	spin_lock_irqsave(&cputimer->lock, flags);
 	*times = cputimer->cputime;
 	spin_unlock_irqrestore(&cputimer->lock, flags);
-- 
cgit v1.2.3


From 704126ad81b8cb7d3d70adb9ecb143f4d3fb38af Mon Sep 17 00:00:00 2001
From: Yu Zhao <yu.zhao@intel.com>
Date: Sun, 4 Jan 2009 16:28:52 +0800
Subject: VT-d: handle Invalidation Queue Error to avoid system hang

When hardware detects any error with a descriptor from the invalidation
queue, it stops fetching new descriptors from the queue until software
clears the Invalidation Queue Error bit in the Fault Status register.
Following fix handles the IQE so the kernel won't be trapped in an
infinite loop.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/intel-iommu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index c4f6c101dbc..d2e3cbfba14 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -194,6 +194,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 /* FSTS_REG */
 #define DMA_FSTS_PPF ((u32)2)
 #define DMA_FSTS_PFO ((u32)1)
+#define DMA_FSTS_IQE (1 << 4)
 #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
 
 /* FRCD_REG, 32 bits access */
@@ -328,7 +329,7 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
 			  unsigned int size_order, u64 type,
 			  int non_present_entry_flush);
 
-extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
+extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
 extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
 extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
-- 
cgit v1.2.3


From 43a990765a9e874350bae1009366d00809dbc9d8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 10 Feb 2009 00:00:22 +0100
Subject: sound: Remove OSSlib stuff from linux/soundcard.h

Removed OSSlib stuff from linux/soundcard.h to fix the warnings for
'make headers_check'.

This patch breaks building against OSSlib with the kernel headers
instead of its own headers. It should still work with any
version of the library from the 2003 onwards which provide
their own headers for the latest interface.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/soundcard.h | 74 +++++++++++++++--------------------------------
 1 file changed, 23 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/soundcard.h b/include/linux/soundcard.h
index 523d069c862..1904afedb82 100644
--- a/include/linux/soundcard.h
+++ b/include/linux/soundcard.h
@@ -1045,50 +1045,36 @@ typedef struct mixer_vol_table {
  */
 #define LOCL_STARTAUDIO		1
 
-#if (!defined(__KERNEL__) && !defined(KERNEL) && !defined(INKERNEL) && !defined(_KERNEL)) || defined(USE_SEQ_MACROS) 
+#if !defined(__KERNEL__) || defined(USE_SEQ_MACROS)
 /*
  *	Some convenience macros to simplify programming of the
  *	/dev/sequencer interface
  *
- *	These macros define the API which should be used when possible.
+ *	This is a legacy interface for applications written against
+ *	the OSSlib-3.8 style interface. It is no longer possible
+ *	to actually link against OSSlib with this header, but we
+ *	still provide these macros for programs using them.
+ *
+ *	If you want to use OSSlib, it is recommended that you get
+ *	the GPL version of OSS-4.x and build against that version
+ *	of the header.
+ *
+ *	We redefine the extern keyword so that make headers_check
+ *	does not complain about SEQ_USE_EXTBUF.
  */
 #define SEQ_DECLAREBUF()		SEQ_USE_EXTBUF()
 
 void seqbuf_dump(void);	/* This function must be provided by programs */
 
-extern int OSS_init(int seqfd, int buflen);
-extern void OSS_seqbuf_dump(int fd, unsigned char *buf, int buflen);
-extern void OSS_seq_advbuf(int len, int fd, unsigned char *buf, int buflen);
-extern void OSS_seq_needbuf(int len, int fd, unsigned char *buf, int buflen);
-extern void OSS_patch_caching(int dev, int chn, int patch,
-			      int fd, unsigned char *buf, int buflen);
-extern void OSS_drum_caching(int dev, int chn, int patch,
-			      int fd, unsigned char *buf, int buflen);
-extern void OSS_write_patch(int fd, unsigned char *buf, int len);
-extern int OSS_write_patch2(int fd, unsigned char *buf, int len);
-
 #define SEQ_PM_DEFINES int __foo_bar___
-#ifdef OSSLIB
-#  define SEQ_USE_EXTBUF() \
-		extern unsigned char *_seqbuf; \
-		extern int _seqbuflen;extern int _seqbufptr
-#  define SEQ_DEFINEBUF(len) SEQ_USE_EXTBUF();static int _requested_seqbuflen=len
-#  define _SEQ_ADVBUF(len) OSS_seq_advbuf(len, seqfd, _seqbuf, _seqbuflen)
-#  define _SEQ_NEEDBUF(len) OSS_seq_needbuf(len, seqfd, _seqbuf, _seqbuflen)
-#  define SEQ_DUMPBUF() OSS_seqbuf_dump(seqfd, _seqbuf, _seqbuflen)
-
-#  define SEQ_LOAD_GMINSTR(dev, instr) \
-		OSS_patch_caching(dev, -1, instr, seqfd, _seqbuf, _seqbuflen)
-#  define SEQ_LOAD_GMDRUM(dev, drum) \
-		OSS_drum_caching(dev, -1, drum, seqfd, _seqbuf, _seqbuflen)
-#else /* !OSSLIB */
-
-#  define SEQ_LOAD_GMINSTR(dev, instr)
-#  define SEQ_LOAD_GMDRUM(dev, drum)
-
-#  define SEQ_USE_EXTBUF() \
-		extern unsigned char _seqbuf[]; \
-		extern int _seqbuflen;extern int _seqbufptr
+
+#define SEQ_LOAD_GMINSTR(dev, instr)
+#define SEQ_LOAD_GMDRUM(dev, drum)
+
+#define _SEQ_EXTERN extern
+#define SEQ_USE_EXTBUF() \
+		_SEQ_EXTERN unsigned char _seqbuf[]; \
+		_SEQ_EXTERN int _seqbuflen; _SEQ_EXTERN int _seqbufptr
 
 #ifndef USE_SIMPLE_MACROS
 /* Sample seqbuf_dump() implementation:
@@ -1131,7 +1117,6 @@ extern int OSS_write_patch2(int fd, unsigned char *buf, int len);
  */
 #define _SEQ_NEEDBUF(len)	/* empty */
 #endif
-#endif /* !OSSLIB */
 
 #define SEQ_VOLUME_MODE(dev, mode)	{_SEQ_NEEDBUF(8);\
 					_seqbuf[_seqbufptr] = SEQ_EXTENDED;\
@@ -1215,14 +1200,8 @@ extern int OSS_write_patch2(int fd, unsigned char *buf, int len);
 		_CHN_COMMON(dev, MIDI_CHN_PRESSURE, chn, pressure, 0, 0)
 
 #define SEQ_SET_PATCH SEQ_PGM_CHANGE
-#ifdef OSSLIB
-#   define SEQ_PGM_CHANGE(dev, chn, patch) \
-		{OSS_patch_caching(dev, chn, patch, seqfd, _seqbuf, _seqbuflen); \
-		 _CHN_COMMON(dev, MIDI_PGM_CHANGE, chn, patch, 0, 0);}
-#else
-#   define SEQ_PGM_CHANGE(dev, chn, patch) \
+#define SEQ_PGM_CHANGE(dev, chn, patch) \
 		_CHN_COMMON(dev, MIDI_PGM_CHANGE, chn, patch, 0, 0)
-#endif
 
 #define SEQ_CONTROL(dev, chn, controller, value) \
 		_CHN_COMMON(dev, MIDI_CTL_CHANGE, chn, controller, 0, value)
@@ -1300,19 +1279,12 @@ extern int OSS_write_patch2(int fd, unsigned char *buf, int len);
 /*
  * Patch loading.
  */
-#ifdef OSSLIB
-#   define SEQ_WRPATCH(patchx, len) \
-		OSS_write_patch(seqfd, (char*)(patchx), len)
-#   define SEQ_WRPATCH2(patchx, len) \
-		OSS_write_patch2(seqfd, (char*)(patchx), len)
-#else
-#   define SEQ_WRPATCH(patchx, len) \
+#define SEQ_WRPATCH(patchx, len) \
 		{if (_seqbufptr) SEQ_DUMPBUF();\
 		 if (write(seqfd, (char*)(patchx), len)==-1) \
 		    perror("Write patch: /dev/sequencer");}
-#   define SEQ_WRPATCH2(patchx, len) \
+#define SEQ_WRPATCH2(patchx, len) \
 		(SEQ_DUMPBUF(), write(seqfd, (char*)(patchx), len))
-#endif
 
 #endif
 #endif
-- 
cgit v1.2.3


From 7f5aa215088b817add9c71914b83650bdd49f8a9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 10 Feb 2009 11:15:34 -0500
Subject: jbd2: Avoid possible NULL dereference in
 jbd2_journal_begin_ordered_truncate()

If we race with commit code setting i_transaction to NULL, we could
possibly dereference it.  Proper locking requires the journal pointer
(to access journal->j_list_lock), which we don't have.  So we have to
change the prototype of the function so that filesystem passes us the
journal pointer.  Also add a more detailed comment about why the
function jbd2_journal_begin_ordered_truncate() does what it does and
how it should be used.

Thanks to Dan Carpenter <error27@gmail.com> for pointing to the
suspitious code.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Acked-by: Joel Becker <joel.becker@oracle.com>
CC: linux-ext4@vger.kernel.org
CC: ocfs2-devel@oss.oracle.com
CC: mfasheh@suse.de
CC: Dan Carpenter <error27@gmail.com>
---
 include/linux/jbd2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index b28b37eb11c..4d248b3f132 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1150,7 +1150,8 @@ extern int	   jbd2_journal_clear_err  (journal_t *);
 extern int	   jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int	   jbd2_journal_force_commit(journal_t *);
 extern int	   jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
-extern int	   jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
+extern int	   jbd2_journal_begin_ordered_truncate(journal_t *journal,
+				struct jbd2_inode *inode, loff_t new_size);
 extern void	   jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
 extern void	   jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
 
-- 
cgit v1.2.3


From e672f7db767156bf71adf9c592cfe81b339523d6 Mon Sep 17 00:00:00 2001
From: Chuck Ebbert <cebbert@redhat.com>
Date: Tue, 10 Feb 2009 17:18:17 -0800
Subject: pkt_sched: type should be __u32 in header

Using u32 in this header breaks the build of iptables.

Signed-off-by: Chuck Ebbert <cebbert@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index b2648e8e498..d51a2b3e221 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -515,7 +515,7 @@ enum
 
 struct tc_drr_stats
 {
-	u32	deficit;
+	__u32	deficit;
 };
 
 #endif
-- 
cgit v1.2.3


From 3fccfd67df79c6351a156eb25a7a514e5f39c4d9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 10 Feb 2009 16:37:31 +0100
Subject: timers: split process wide cpu clocks/timers, fix

To decrease the chance of a missed enable, always enable the timer when we
sample it, we'll always disable it when we find that there are no active timers
in the jiffy tick.

This fixes a flood of warnings reported by Mike Galbraith.

Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 79392916d6c..5d10fa0b600 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2209,6 +2209,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 	unsigned long flags;
 
 	spin_lock_irqsave(&cputimer->lock, flags);
+	cputimer->running = 1;
 	*times = cputimer->cputime;
 	spin_unlock_irqrestore(&cputimer->lock, flags);
 }
-- 
cgit v1.2.3


From 4da94d49b2ecb0a26e716a8811c3ecc542c2a65d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 11 Feb 2009 11:30:27 +0100
Subject: timers: fix TIMER_ABSTIME for process wide cpu timers

The POSIX timer interface allows for absolute time expiry values through the
TIMER_ABSTIME flag, therefore we have to synchronize the timer to the clock
every time we start it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5d10fa0b600..8981e52c714 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2201,18 +2201,7 @@ static inline int spin_needbreak(spinlock_t *lock)
  * Thread group CPU time accounting.
  */
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-
-static inline
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
-{
-	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cputimer->lock, flags);
-	cputimer->running = 1;
-	*times = cputimer->cputime;
-	spin_unlock_irqrestore(&cputimer->lock, flags);
-}
+void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 
 static inline void thread_group_cputime_init(struct signal_struct *sig)
 {
-- 
cgit v1.2.3


From 9f339e7028e2855717af3193c938f9960ad13b38 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Wed, 11 Feb 2009 15:10:27 +0100
Subject: x86, ptrace, mm: fix double-free on race

Ptrace_detach() races with __ptrace_unlink() if the traced task is
reaped while detaching. This might cause a double-free of the BTS
buffer.

Change the ptrace_detach() path to only do the memory accounting in
ptrace_bts_detach() and leave the buffer free to ptrace_bts_untrace()
which will be called from __ptrace_unlink().

The fix follows a proposal from Oleg Nesterov.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e8ddc98b840..3d7fb44d7d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1305,5 +1305,6 @@ void vmemmap_populate_print_last(void);
 
 extern void *alloc_locked_buffer(size_t size);
 extern void free_locked_buffer(void *buffer, size_t size);
+extern void release_locked_buffer(void *buffer, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
-- 
cgit v1.2.3


From cfebe563bd0a3ff97e1bc167123120d59c7a84db Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 11 Feb 2009 13:04:36 -0800
Subject: cgroups: fix lockdep subclasses overflow

I enabled all cgroup subsystems when compiling kernel, and then:
 # mount -t cgroup -o net_cls xxx /mnt
 # mkdir /mnt/0

This showed up immediately:
 BUG: MAX_LOCKDEP_SUBCLASSES too low!
 turning off the locking correctness validator.

It's caused by the cgroup hierarchy lock:
	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
		struct cgroup_subsys *ss = subsys[i];
		if (ss->root == root)
			mutex_lock_nested(&ss->hierarchy_mutex, i);
	}

Now we have 9 cgroup subsystems, and the above 'i' for net_cls is 8, but
MAX_LOCKDEP_SUBCLASSES is 8.

This patch uses different lockdep keys for different subsystems.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e4e8e117d27..499900d0cee 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -378,6 +378,7 @@ struct cgroup_subsys {
 	 * - initiating hotplug events
 	 */
 	struct mutex hierarchy_mutex;
+	struct lock_class_key subsys_key;
 
 	/*
 	 * Link to parent, and list entry in parent's children.
-- 
cgit v1.2.3


From 6c5979631b4b03c9288776562c18036765e398c1 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 11 Feb 2009 13:04:38 -0800
Subject: syscall define: fix uml compile bug

With the new system call defines we get this on uml:

arch/um/sys-i386/built-in.o: In function `sys_call_table':
(.rodata+0x308): undefined reference to `sys_sigprocmask'

Reason for this is that uml passes the preprocessor option
-Dsigprocmask=kernel_sigprocmask to gcc when compiling the kernel.
This causes SYSCALL_DEFINE3(sigprocmask, ...) to be expanded to
SYSCALL_DEFINEx(3, kernel_sigprocmask, ...) and finally to a system
call named sys_kernel_sigprocmask.  However sys_sigprocmask is missing
because of this.

To avoid macro expansion for the system call name just concatenate the
name at first define instead of carrying it through severel levels.
This was pointed out by Al Viro.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Reviewed-by: WANG Cong <wangcong@zeuux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0eda02ff241..f9f900cfd06 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -95,13 +95,13 @@ struct old_linux_dirent;
 #define __SC_TEST5(t5, a5, ...)	__SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
 #define __SC_TEST6(t6, a6, ...)	__SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
-#define SYSCALL_DEFINE0(name)   asmlinkage long sys_##name(void)
-#define SYSCALL_DEFINE1(...)    SYSCALL_DEFINEx(1, __VA_ARGS__)
-#define SYSCALL_DEFINE2(...)    SYSCALL_DEFINEx(2, __VA_ARGS__)
-#define SYSCALL_DEFINE3(...)    SYSCALL_DEFINEx(3, __VA_ARGS__)
-#define SYSCALL_DEFINE4(...)    SYSCALL_DEFINEx(4, __VA_ARGS__)
-#define SYSCALL_DEFINE5(...)    SYSCALL_DEFINEx(5, __VA_ARGS__)
-#define SYSCALL_DEFINE6(...)    SYSCALL_DEFINEx(6, __VA_ARGS__)
+#define SYSCALL_DEFINE0(name)	   asmlinkage long sys_##name(void)
+#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
+#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
+#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
+#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
+#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
+#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
 
 #ifdef CONFIG_PPC64
 #define SYSCALL_ALIAS(alias, name)					\
@@ -121,21 +121,21 @@ struct old_linux_dirent;
 
 #define SYSCALL_DEFINE(name) static inline long SYSC_##name
 #define SYSCALL_DEFINEx(x, name, ...)					\
-	asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__));		\
-	static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__));	\
-	asmlinkage long SyS_##name(__SC_LONG##x(__VA_ARGS__))		\
+	asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__));		\
+	static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__));	\
+	asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__))		\
 	{								\
 		__SC_TEST##x(__VA_ARGS__);				\
-		return (long) SYSC_##name(__SC_CAST##x(__VA_ARGS__));	\
+		return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__));	\
 	}								\
-	SYSCALL_ALIAS(sys_##name, SyS_##name);				\
-	static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__))
+	SYSCALL_ALIAS(sys##name, SyS##name);				\
+	static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__))
 
 #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */
 
 #define SYSCALL_DEFINE(name) asmlinkage long sys_##name
 #define SYSCALL_DEFINEx(x, name, ...)					\
-	asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__))
+	asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))
 
 #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
 
-- 
cgit v1.2.3


From 1d93e52eb48df986a3c4d5ad8a520bf1f6837367 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <jw@emlix.com>
Date: Wed, 11 Feb 2009 08:47:19 -0700
Subject: dmaengine: update kerneldoc

Some of the kerneldoc comments in the dmaengine header describe
already removed structure members.  Remove them.

Also add a short description for dma_device->device_is_tx_complete.

Signed-off-by: Johannes Weiner <jw@emlix.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3e68469c188..087e79acf8c 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -97,7 +97,6 @@ typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
 
 /**
  * struct dma_chan_percpu - the per-CPU part of struct dma_chan
- * @refcount: local_t used for open-coded "bigref" counting
  * @memcpy_count: transaction counter
  * @bytes_transferred: byte counter
  */
@@ -114,9 +113,6 @@ struct dma_chan_percpu {
  * @cookie: last cookie value returned to client
  * @chan_id: channel ID for sysfs
  * @dev: class device for sysfs
- * @refcount: kref, used in "bigref" slow-mode
- * @slow_ref: indicates that the DMA channel is free
- * @rcu: the DMA channel's RCU head
  * @device_node: used to add this to the device chan list
  * @local: per-cpu pointer to a struct dma_chan_percpu
  * @client-count: how many clients are using this channel
@@ -211,8 +207,6 @@ struct dma_async_tx_descriptor {
  * @global_node: list_head for global dma_device_list
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
- * @refcount: reference count
- * @done: IO completion struct
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
@@ -225,6 +219,7 @@ struct dma_async_tx_descriptor {
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
  * @device_terminate_all: terminate all pending operations
+ * @device_is_tx_complete: poll for transaction completion
  * @device_issue_pending: push pending transactions to hardware
  */
 struct dma_device {
-- 
cgit v1.2.3


From 7a0eb1960e8ddcb68ea631caf16815485af0e228 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 19 Jan 2009 14:57:52 +0200
Subject: KVM: Avoid using CONFIG_ in userspace visible headers

Kconfig symbols are not available in userspace, and are not stripped by
headers-install.  Avoid their use by adding #defines in <asm/kvm.h> to
suit each architecture.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 5715f190760..0424326f167 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -58,10 +58,10 @@ struct kvm_irqchip {
 	__u32 pad;
         union {
 		char dummy[512];  /* reserving space */
-#ifdef CONFIG_X86
+#ifdef __KVM_HAVE_PIT
 		struct kvm_pic_state pic;
 #endif
-#if defined(CONFIG_X86) || defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_IOAPIC
 		struct kvm_ioapic_state ioapic;
 #endif
 	} chip;
@@ -384,16 +384,16 @@ struct kvm_trace_rec {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#if defined(CONFIG_X86)||defined(CONFIG_IA64)
+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
 #endif
 #define KVM_CAP_IOMMU 18
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_MSI
 #define KVM_CAP_DEVICE_MSI 20
 #endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#if defined(CONFIG_X86)
+#ifdef __KVM_HAVE_USER_NMI
 #define KVM_CAP_USER_NMI 22
 #endif
 
-- 
cgit v1.2.3


From ad8ba2cd44d4d39fb3fe55d5dcc565b19fc3a7fb Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 6 Jan 2009 10:03:02 +0800
Subject: KVM: Add kvm_arch_sync_events to sync with asynchronize events

kvm_arch_sync_events is introduced to quiet down all other events may happen
contemporary with VM destroy process, like IRQ handler and work struct for
assigned device.

For kvm_arch_sync_events is called at the very beginning of kvm_destroy_vm(), so
the state of KVM here is legal and can provide a environment to quiet down other
events.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec49d0be7f5..bf6f703642f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -285,6 +285,7 @@ void kvm_free_physmem(struct kvm *kvm);
 struct  kvm *kvm_arch_create_vm(void);
 void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_free_all_assigned_devices(struct kvm *kvm);
+void kvm_arch_sync_events(struct kvm *kvm);
 
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
-- 
cgit v1.2.3


From 5955c7a2cfb6a35429adea5dc480002b15ca8cfc Mon Sep 17 00:00:00 2001
From: Zlatko Calusic <zlatko.calusic@iskon.hr>
Date: Wed, 18 Feb 2009 01:33:34 +0100
Subject: Add support for VT6415 PCIE PATA IDE Host Controller

Signed-off-by: Zlatko Calusic <zlatko.calusic@iskon.hr>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 52a9fe08451..918391b4b10 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1312,6 +1312,7 @@
 #define PCI_DEVICE_ID_VIA_VT3351	0x0351
 #define PCI_DEVICE_ID_VIA_VT3364	0x0364
 #define PCI_DEVICE_ID_VIA_8371_0	0x0391
+#define PCI_DEVICE_ID_VIA_6415		0x0415
 #define PCI_DEVICE_ID_VIA_8501_0	0x0501
 #define PCI_DEVICE_ID_VIA_82C561	0x0561
 #define PCI_DEVICE_ID_VIA_82C586_1	0x0571
-- 
cgit v1.2.3


From 92a0acce186cde8ead56c6915d9479773673ea1a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 17 Feb 2009 21:24:05 -0800
Subject: net: Kill skb_truesize_check(), it only catches false-positives.

A long time ago we had bugs, primarily in TCP, where we would modify
skb->truesize (for TSO queue collapsing) in ways which would corrupt
the socket memory accounting.

skb_truesize_check() was added in order to try and catch this error
more systematically.

However this debugging check has morphed into a Frankenstein of sorts
and these days it does nothing other than catch false-positives.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cf2cb50f77d..9dcf956ad18 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -416,15 +416,6 @@ extern void	      skb_over_panic(struct sk_buff *skb, int len,
 				     void *here);
 extern void	      skb_under_panic(struct sk_buff *skb, int len,
 				      void *here);
-extern void	      skb_truesize_bug(struct sk_buff *skb);
-
-static inline void skb_truesize_check(struct sk_buff *skb)
-{
-	int len = sizeof(struct sk_buff) + skb->len;
-
-	if (unlikely((int)skb->truesize < len))
-		skb_truesize_bug(skb);
-}
 
 extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 			int getfrag(void *from, char *to, int offset,
-- 
cgit v1.2.3


From 93dbb393503d53cd226e5e1f0088fe8f4dbaa2b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 16 Feb 2009 10:25:40 +0100
Subject: block: fix bad definition of BIO_RW_SYNC

We can't OR shift values, so get rid of BIO_RW_SYNC and use BIO_RW_SYNCIO
and BIO_RW_UNPLUG explicitly. This brings back the behaviour from before
213d9417fec62ef4c3675621b9364a667954d4dd.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bio.h          | 2 --
 include/linux/blktrace_api.h | 1 +
 include/linux/fs.h           | 6 +++---
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2aa283ab062..1b16108a541 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -171,8 +171,6 @@ struct bio {
 #define BIO_RW_FAILFAST_TRANSPORT	8
 #define BIO_RW_FAILFAST_DRIVER		9
 
-#define BIO_RW_SYNC	(BIO_RW_SYNCIO | BIO_RW_UNPLUG)
-
 #define bio_rw_flagged(bio, flag)	((bio)->bi_rw & (1 << (flag)))
 
 /*
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 25379cba237..6e915878e88 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -15,6 +15,7 @@ enum blktrace_cat {
 	BLK_TC_WRITE	= 1 << 1,	/* writes */
 	BLK_TC_BARRIER	= 1 << 2,	/* barrier */
 	BLK_TC_SYNC	= 1 << 3,	/* sync IO */
+	BLK_TC_SYNCIO	= BLK_TC_SYNC,
 	BLK_TC_QUEUE	= 1 << 4,	/* queueing/merging */
 	BLK_TC_REQUEUE	= 1 << 5,	/* requeueing */
 	BLK_TC_ISSUE	= 1 << 6,	/* issue */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6022f44043f..67857dc1636 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -87,10 +87,10 @@ struct inodes_stat_t {
 #define WRITE 1
 #define READA 2		/* read-ahead  - don't block if no resources */
 #define SWRITE 3	/* for ll_rw_block() - wait for buffer lock */
-#define READ_SYNC	(READ | (1 << BIO_RW_SYNC))
+#define READ_SYNC	(READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
 #define READ_META	(READ | (1 << BIO_RW_META))
-#define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
-#define SWRITE_SYNC	(SWRITE | (1 << BIO_RW_SYNC))
+#define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
+#define SWRITE_SYNC	(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
 #define WRITE_BARRIER	(WRITE | (1 << BIO_RW_BARRIER))
 #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
 #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
-- 
cgit v1.2.3


From 5ca431f9ae8db8c6edb9c64bebe6d6521077afd6 Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 18 Feb 2009 15:29:23 +0100
Subject: netfilter: nfnetlink_log: fix per-rule qthreshold override

In NFLOG the per-rule qthreshold should overrides per-instance only
it is set. With current code, the per-rule qthreshold is 1 if not set
and it overrides the per-instance qthreshold.

This patch modifies the default xt_NFLOG threshold from 1 to
0. Thus a value of 0 means there is no per-rule setting and the instance
parameter has to apply.

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_NFLOG.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_NFLOG.h b/include/linux/netfilter/xt_NFLOG.h
index cdcd0ed58f7..4b36aeb46a1 100644
--- a/include/linux/netfilter/xt_NFLOG.h
+++ b/include/linux/netfilter/xt_NFLOG.h
@@ -2,7 +2,7 @@
 #define _XT_NFLOG_TARGET
 
 #define XT_NFLOG_DEFAULT_GROUP		0x1
-#define XT_NFLOG_DEFAULT_THRESHOLD	1
+#define XT_NFLOG_DEFAULT_THRESHOLD	0
 
 #define XT_NFLOG_MASK			0x0
 
-- 
cgit v1.2.3


From c296861291669f305deef19b78042330d7135017 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 18 Feb 2009 14:48:12 -0800
Subject: vmalloc: add __get_vm_area_caller()

We have get_vm_area_caller() and __get_vm_area() but not
__get_vm_area_caller()

On powerpc, I use __get_vm_area() to separate the ranges of addresses
given to vmalloc vs.  ioremap (various good reasons for that) so in order
to be able to implement the new caller tracking in /proc/vmallocinfo, I
need a "_caller" variant of it.

(akpm: needed for ongoing powerpc development, so merge it early)

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 506e7620a98..9c0890c7a06 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -84,6 +84,10 @@ extern struct vm_struct *get_vm_area_caller(unsigned long size,
 					unsigned long flags, void *caller);
 extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 					unsigned long start, unsigned long end);
+extern struct vm_struct *__get_vm_area_caller(unsigned long size,
+					unsigned long flags,
+					unsigned long start, unsigned long end,
+					void *caller);
 extern struct vm_struct *get_vm_area_node(unsigned long size,
 					  unsigned long flags, int node,
 					  gfp_t gfp_mask);
-- 
cgit v1.2.3


From 55ec82176eca52e4e0530a82a0eb59160a1a95a1 Mon Sep 17 00:00:00 2001
From: Paul Turner <pjt@google.com>
Date: Wed, 18 Feb 2009 14:48:15 -0800
Subject: vfs: separate FMODE_PREAD/FMODE_PWRITE into separate flags

Separate FMODE_PREAD and FMODE_PWRITE into separate flags to reflect the
reality that the read and write paths may have independent restrictions.

A git grep verifies that these flags are always cleared together so this
new behavior will only apply to interfaces that change to clear flags
individually.

This is required for "seq_file: properly cope with pread", a post-2.6.25
regression fix.

[akpm@linux-foundation.org: add comment]
Signed-off-by: Paul Turner <pjt@google.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc:  Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@kernel.org>		[2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6022f44043f..5852bd6afbe 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -54,24 +54,30 @@ struct inodes_stat_t {
 #define MAY_ACCESS 16
 #define MAY_OPEN 32
 
+/*
+ * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
+ * to O_WRONLY and O_RDWR via the strange trick in __dentry_open()
+ */
+
 /* file is open for reading */
 #define FMODE_READ		((__force fmode_t)1)
 /* file is open for writing */
 #define FMODE_WRITE		((__force fmode_t)2)
 /* file is seekable */
 #define FMODE_LSEEK		((__force fmode_t)4)
-/* file can be accessed using pread/pwrite */
+/* file can be accessed using pread */
 #define FMODE_PREAD		((__force fmode_t)8)
-#define FMODE_PWRITE		FMODE_PREAD	/* These go hand in hand */
+/* file can be accessed using pwrite */
+#define FMODE_PWRITE		((__force fmode_t)16)
 /* File is opened for execution with sys_execve / sys_uselib */
-#define FMODE_EXEC		((__force fmode_t)16)
+#define FMODE_EXEC		((__force fmode_t)32)
 /* File is opened with O_NDELAY (only set for block devices) */
-#define FMODE_NDELAY		((__force fmode_t)32)
+#define FMODE_NDELAY		((__force fmode_t)64)
 /* File is opened with O_EXCL (only set for block devices) */
-#define FMODE_EXCL		((__force fmode_t)64)
+#define FMODE_EXCL		((__force fmode_t)128)
 /* File is opened using open(.., 3, ..) and is writeable only for ioctls
    (specialy hack for floppy.c) */
-#define FMODE_WRITE_IOCTL	((__force fmode_t)128)
+#define FMODE_WRITE_IOCTL	((__force fmode_t)256)
 
 /*
  * Don't update ctime and mtime.
-- 
cgit v1.2.3


From 8f19d472935c83d823fa4cf02bcc0a7b9952db30 Mon Sep 17 00:00:00 2001
From: Eric Biederman <ebiederm@xmission.com>
Date: Wed, 18 Feb 2009 14:48:16 -0800
Subject: seq_file: properly cope with pread

Currently seq_read assumes that the offset passed to it is always the
offset it passed to user space.  In the case pread this assumption is
broken and we do the wrong thing when presented with pread.

To solve this I introduce an offset cache inside of struct seq_file so we
know where our logical file position is.  Then in seq_read if we try to
read from another offset we reset our data structures and attempt to go to
the offset user space wanted.

[akpm@linux-foundation.org: restore FMODE_PWRITE]
[pjt@google.com: seq_open needs its fmode opened up to take advantage of this]
Signed-off-by: Eric Biederman <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Paul Turner <pjt@google.com>
Cc: <stable@kernel.org>		[2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/seq_file.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 40ea5058c2e..f616f31576d 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -19,6 +19,7 @@ struct seq_file {
 	size_t from;
 	size_t count;
 	loff_t index;
+	loff_t read_pos;
 	u64 version;
 	struct mutex lock;
 	const struct seq_operations *op;
-- 
cgit v1.2.3


From 610d18f4128ebbd88845d0fc60cce67b49af881e Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Wed, 18 Feb 2009 14:48:18 -0800
Subject: timerfd: add flags check

As requested by Michael, add a missing check for valid flags in
timerfd_settime(), and make it return EINVAL in case some extra bits are
set.

Michael said:
If this is to be any use to userland apps that want to check flag
support (perhaps it is too late already), then the sooner we get it
into the kernel the better: 2.6.29 would be good; earlier stables as
well would be even better.

[akpm@linux-foundation.org: remove unused TFD_FLAGS_SET]
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: <stable@kernel.org>		[2.6.27.x, 2.6.28.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/timerfd.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index 86cb0501d3e..2d0792983f8 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -11,13 +11,21 @@
 /* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
 
-/* Flags for timerfd_settime.  */
+/*
+ * CAREFUL: Check include/asm-generic/fcntl.h when defining
+ * new flags, since they might collide with O_* ones. We want
+ * to re-use O_* flags that couldn't possibly have a meaning
+ * from eventfd, in order to leave a free define-space for
+ * shared O_* flags.
+ */
 #define TFD_TIMER_ABSTIME (1 << 0)
-
-/* Flags for timerfd_create.  */
 #define TFD_CLOEXEC O_CLOEXEC
 #define TFD_NONBLOCK O_NONBLOCK
 
+#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
+/* Flags for timerfd_create.  */
+#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
+/* Flags for timerfd_settime.  */
+#define TFD_SETTIME_FLAGS TFD_TIMER_ABSTIME
 
 #endif /* _LINUX_TIMERFD_H */
-
-- 
cgit v1.2.3


From 1cf6e7d83bf334cc5916137862c920a97aabc018 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 18 Feb 2009 14:48:18 -0800
Subject: mm: task dirty accounting fix

YAMAMOTO-san noticed that task_dirty_inc doesn't seem to be called properly for
cases where set_page_dirty is not used to dirty a page (eg. mark_buffer_dirty).

Additionally, there is some inconsistency about when task_dirty_inc is
called.  It is used for dirty balancing, however it even gets called for
__set_page_dirty_no_writeback.

So rather than increment it in a set_page_dirty wrapper, move it down to
exactly where the dirty page accounting stats are incremented.

Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7dc04ff5ab8..10074212a35 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1159,6 +1159,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
 
 /* mm/page-writeback.c */
 int write_one_page(struct page *page, int wait);
+void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
 #define VM_MAX_READAHEAD	128	/* kbytes */
-- 
cgit v1.2.3


From 287d859222e0adbc67666a6154aaf42d7d5bbb54 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 18 Feb 2009 14:48:26 -0800
Subject: atmel-mci: fix initialization of dma slave data

The conversion of atmel-mci to dma_request_channel missed the
initialization of the channel dma_slave information.  The filter_fn passed
to dma_request_channel is responsible for initializing the channel's
private data.  This implementation has the additional benefit of enabling
a generic client-channel data passing mechanism.

Reviewed-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dmaengine.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 3e68469c188..f0413845f20 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -121,6 +121,7 @@ struct dma_chan_percpu {
  * @local: per-cpu pointer to a struct dma_chan_percpu
  * @client-count: how many clients are using this channel
  * @table_count: number of appearances in the mem-to-mem allocation table
+ * @private: private data for certain client-channel associations
  */
 struct dma_chan {
 	struct dma_device *device;
@@ -134,6 +135,7 @@ struct dma_chan {
 	struct dma_chan_percpu *local;
 	int client_count;
 	int table_count;
+	void *private;
 };
 
 /**
-- 
cgit v1.2.3


From f2dbcfa738368c8a40d4a5f0b65dc9879577cb21 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 18 Feb 2009 14:48:32 -0800
Subject: mm: clean up for early_pfn_to_nid()

What's happening is that the assertion in mm/page_alloc.c:move_freepages()
is triggering:

	BUG_ON(page_zone(start_page) != page_zone(end_page));

Once I knew this is what was happening, I added some annotations:

	if (unlikely(page_zone(start_page) != page_zone(end_page))) {
		printk(KERN_ERR "move_freepages: Bogus zones: "
		       "start_page[%p] end_page[%p] zone[%p]\n",
		       start_page, end_page, zone);
		printk(KERN_ERR "move_freepages: "
		       "start_zone[%p] end_zone[%p]\n",
		       page_zone(start_page), page_zone(end_page));
		printk(KERN_ERR "move_freepages: "
		       "start_pfn[0x%lx] end_pfn[0x%lx]\n",
		       page_to_pfn(start_page), page_to_pfn(end_page));
		printk(KERN_ERR "move_freepages: "
		       "start_nid[%d] end_nid[%d]\n",
		       page_to_nid(start_page), page_to_nid(end_page));
 ...

And here's what I got:

	move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00]
	move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00]
	move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff]
	move_freepages: start_nid[1] end_nid[0]

My memory layout on this box is:

[    0.000000] Zone PFN ranges:
[    0.000000]   Normal   0x00000000 -> 0x0081ff5d
[    0.000000] Movable zone start PFN for each node
[    0.000000] early_node_map[8] active PFN ranges
[    0.000000]     0: 0x00000000 -> 0x00020000
[    0.000000]     1: 0x00800000 -> 0x0081f7ff
[    0.000000]     1: 0x0081f800 -> 0x0081fe50
[    0.000000]     1: 0x0081fed1 -> 0x0081fed8
[    0.000000]     1: 0x0081feda -> 0x0081fedb
[    0.000000]     1: 0x0081fedd -> 0x0081fee5
[    0.000000]     1: 0x0081fee7 -> 0x0081ff51
[    0.000000]     1: 0x0081ff59 -> 0x0081ff5d

So it's a block move in that 0x81f600-->0x81f7ff region which triggers
the problem.

This patch:

Declaration of early_pfn_to_nid() is scattered over per-arch include
files, and it seems it's complicated to know when the declaration is used.
 I think it makes fix-for-memmap-init not easy.

This patch moves all declaration to include/linux/mm.h

After this,
  if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
     -> Use static definition in include/linux/mm.h
  else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
     -> Use generic definition in mm/page_alloc.c
  else
     -> per-arch back end function will be called.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reported-by: David Miller <davem@davemlloft.net>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: <stable@kernel.org>		[2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 10074212a35..065cdf8c09f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1041,10 +1041,23 @@ extern void free_bootmem_with_active_regions(int nid,
 typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
 extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
 extern void sparse_memory_present_with_active_regions(int nid);
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-extern int early_pfn_to_nid(unsigned long pfn);
-#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
 #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
+#if !defined(CONFIG_ARCH_POPULATES_NODE_MAP) && \
+    !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
+static inline int __early_pfn_to_nid(unsigned long pfn)
+{
+	return 0;
+}
+#else
+/* please see mm/page_alloc.c */
+extern int __meminit early_pfn_to_nid(unsigned long pfn);
+#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+/* there is a per-arch backend function. */
+extern int __meminit __early_pfn_to_nid(unsigned long pfn);
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+#endif
+
 extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long,
 				unsigned long, enum memmap_context);
-- 
cgit v1.2.3


From cc2559bccc72767cb446f79b071d96c30c26439b Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 18 Feb 2009 14:48:33 -0800
Subject: mm: fix memmap init for handling memory hole

Now, early_pfn_in_nid(PFN, NID) may returns false if PFN is a hole.
and memmap initialization was not done. This was a trouble for
sparc boot.

To fix this, the PFN should be initialized and marked as PG_reserved.
This patch changes early_pfn_in_nid() return true if PFN is a hole.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reported-by: David Miller <davem@davemlloft.net>
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: <stable@kernel.org>		[2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 09c14e213b6..1aca6cebbb7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1071,7 +1071,7 @@ void sparse_init(void);
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_NODES_SPAN_OTHER_NODES
-#define early_pfn_in_nid(pfn, nid)	(early_pfn_to_nid(pfn) == (nid))
+bool early_pfn_in_nid(unsigned long pfn, int nid);
 #else
 #define early_pfn_in_nid(pfn, nid)	(1)
 #endif
-- 
cgit v1.2.3


From ffa7525c13eb3db0fd19a3e1cffe2ce6f561f5f3 Mon Sep 17 00:00:00 2001
From: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Date: Wed, 18 Feb 2009 14:48:34 -0800
Subject: jsm: additional device support

I have a Digi Neo 8 PCI card (114f:00b1) Serial controller: Digi
International Digi Neo 8 (rev 05)

that works with the jsm driver after using the following patch.

Signed-off-by: Adam Lackorzynski <adam@os.inf.tu-dresden.de>
Cc: Scott H Kilau <Scott_Kilau@digi.com>
Cc: Wendy Xiong <wendyx@us.ibm.com>
Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 918391b4b10..114b8192eab 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1445,6 +1445,7 @@
 #define PCI_DEVICE_ID_DIGI_DF_M_E	0x0071
 #define PCI_DEVICE_ID_DIGI_DF_M_IOM2_A	0x0072
 #define PCI_DEVICE_ID_DIGI_DF_M_A	0x0073
+#define PCI_DEVICE_ID_DIGI_NEO_8	0x00B1
 #define PCI_DEVICE_ID_NEO_2DB9          0x00C8
 #define PCI_DEVICE_ID_NEO_2DB9PRI       0x00C9
 #define PCI_DEVICE_ID_NEO_2RJ45         0x00CA
-- 
cgit v1.2.3


From 97bef7dd05563807539122c488a5dd93ed327722 Mon Sep 17 00:00:00 2001
From: Bernhard Walle <bernhard.walle@gmx.de>
Date: Wed, 18 Feb 2009 14:48:40 -0800
Subject: Bernhard has moved

Since I don't work for SUSE any more and the bwalle@suse.de address is
invalid, correct it in the copyright headers and documentation.

Signed-off-by: Bernhard Walle <bernhard.walle@gmx.de>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/firmware-map.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h
index 6e199c8dfac..cca686b3912 100644
--- a/include/linux/firmware-map.h
+++ b/include/linux/firmware-map.h
@@ -1,7 +1,7 @@
 /*
  * include/linux/firmware-map.h:
  *  Copyright (C) 2008 SUSE LINUX Products GmbH
- *  by Bernhard Walle <bwalle@suse.de>
+ *  by Bernhard Walle <bernhard.walle@gmx.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License v2.0 as published by
-- 
cgit v1.2.3


From e4dd61882e2cfe47ea72ecd825671e8e5ae29038 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 18 Feb 2009 23:31:11 -0800
Subject: vlan: Update skb->mac_header in __vlan_put_tag().

After moving mac addresses in __vlan_put_tag() skb->mac_header needs
to be updated.

Reported-by: Karl Hiramoto <karl@hiramoto.org>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index f8ff918c208..e1ff5b14310 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -210,6 +210,7 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 
 	/* Move the mac addresses to the beginning of the new header. */
 	memmove(skb->data, skb->data + VLAN_HLEN, 2 * VLAN_ETH_ALEN);
+	skb->mac_header -= VLAN_HLEN;
 
 	/* first, the ethernet type */
 	veth->h_vlan_proto = htons(ETH_P_8021Q);
-- 
cgit v1.2.3


From 3ef0e5ba467366125f04b423f4638baca54a4fc1 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 20 Feb 2009 15:38:41 -0800
Subject: slab: introduce kzfree()

kzfree() is a wrapper for kfree() that additionally zeroes the underlying
memory before releasing it to the slab allocator.

Currently there is code which memset()s the memory region of an object
before releasing it back to the slab allocator to make sure
security-sensitive data are really zeroed out after use.

These callsites can then just use kzfree() which saves some code, makes
users greppable and allows for a stupid destructor that isn't necessarily
aware of the actual object size.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Matt Mackall <mpm@selenic.com>
Acked-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index f96d13c281e..24c5602bee9 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -127,6 +127,7 @@ int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
 void * __must_check __krealloc(const void *, size_t, gfp_t);
 void * __must_check krealloc(const void *, size_t, gfp_t);
 void kfree(const void *);
+void kzfree(const void *);
 size_t ksize(const void *);
 
 /*
-- 
cgit v1.2.3


From 01b24fee285b098c74318a8be801ef3711ec18d7 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Fri, 20 Feb 2009 15:38:49 -0800
Subject: spi_bitbang: add more lowlevel function documentation

This adds more documentation of the lowlevel API to avoid future bugs.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spi/spi_bitbang.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index bf8de281b4e..eed4254bd50 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -83,6 +83,13 @@ extern int spi_bitbang_stop(struct spi_bitbang *spi);
  *  int getmiso(struct spi_device *);
  *  void spidelay(unsigned);
  *
+ * setsck()'s is_on parameter is a zero/nonzero boolean.
+ *
+ * setmosi()'s is_on parameter is a zero/nonzero boolean.
+ *
+ * getmiso() is required to return 0 or 1 only. Any other value is invalid
+ * and will result in improper operation.
+ *
  * A non-inlined routine would call bitbang_txrx_*() routines.  The
  * main loop could easily compile down to a handful of instructions,
  * especially if the delay is a NOP (to run at peak speed).
-- 
cgit v1.2.3


From b6adea334c6c89d5e6c94f9196bbf3a279cb53bd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 20 Feb 2009 15:38:52 -0800
Subject: 8250: fix boot hang with serial console when using with Serial Over
 Lan port

Intel 8257x Ethernet boards have a feature called Serial Over Lan.

This feature works by emulating a serial port, and it is detected by
kernel as a normal 8250 port.  However, this emulation is not perfect, as
also noticed on changeset 7500b1f602aad75901774a67a687ee985d85893f.

Before this patch, the kernel were trying to check if the serial TX is
capable of work using IRQ's.

This were done with a code similar this:

        serial_outp(up, UART_IER, UART_IER_THRI);
        lsr = serial_in(up, UART_LSR);
        iir = serial_in(up, UART_IIR);
        serial_outp(up, UART_IER, 0);

        if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT)
		up->bugs |= UART_BUG_TXEN;

This works fine for other 8250 ports, but, on 8250-emulated SoL port, the
chip is a little lazy to down UART_IIR_NO_INT at UART_IIR register.

Due to that, UART_BUG_TXEN is sometimes enabled.  However, as TX IRQ keeps
working, and the TX polling is now enabled, the driver miss-interprets the
IRQ received later, hanging up the machine until a key is pressed at the
serial console.

This is the 6 version of this patch.  Previous versions were trying to
introduce a large enough delay between serial_outp and serial_in(up,
UART_IIR), but not taking forever.  However, the needed delay couldn't be
safely determined.

At the experimental tests, a delay of 1us solves most of the cases, but
still hangs sometimes.  Increasing the delay to 5us was better, but still
doesn't solve.  A very high delay of 50 ms seemed to work every time.

However, poking around with delays and pray for it to be enough doesn't
seem to be a good approach, even for a quirk.

So, instead of playing with random large arbitrary delays, let's just
disable UART_BUG_TXEN for all SoL ports.

[akpm@linux-foundation.org: fix warnings]
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pci_ids.h     | 3 +++
 include/linux/serial_core.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 114b8192eab..aca8c458aa8 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2323,6 +2323,9 @@
 #define PCI_DEVICE_ID_INTEL_82378	0x0484
 #define PCI_DEVICE_ID_INTEL_I960	0x0960
 #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
+#define PCI_DEVICE_ID_INTEL_8257X_SOL	0x1062
+#define PCI_DEVICE_ID_INTEL_82573E_SOL	0x1085
+#define PCI_DEVICE_ID_INTEL_82573L_SOL	0x108F
 #define PCI_DEVICE_ID_INTEL_82815_MC	0x1130
 #define PCI_DEVICE_ID_INTEL_82815_CGC	0x1132
 #define PCI_DEVICE_ID_INTEL_82092AA_0	0x1221
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 90bbbf0b116..df9245c7bd3 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -296,6 +296,7 @@ struct uart_port {
 #define UPF_HARDPPS_CD		((__force upf_t) (1 << 11))
 #define UPF_LOW_LATENCY		((__force upf_t) (1 << 13))
 #define UPF_BUGGY_UART		((__force upf_t) (1 << 14))
+#define UPF_NO_TXEN_TEST	((__force upf_t) (1 << 15))
 #define UPF_MAGIC_MULTIPLIER	((__force upf_t) (1 << 16))
 #define UPF_CONS_FLOW		((__force upf_t) (1 << 23))
 #define UPF_SHARE_IRQ		((__force upf_t) (1 << 24))
-- 
cgit v1.2.3


From 216773a787c3c46ef26bf1742c1fdba37d26be45 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 14 Feb 2009 01:59:06 +0100
Subject: Consolidate driver_probe_done() loops into one place

there's a few places that currently loop over driver_probe_done(), and
I'm about to add another one. This patch abstracts it into a helper
to reduce duplication.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Len Brown <lenb@kernel.org>
Acked-by: Greg KH <gregkh@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 45e5b1921fb..47f343c7bdd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -147,6 +147,8 @@ extern void put_driver(struct device_driver *drv);
 extern struct device_driver *driver_find(const char *name,
 					 struct bus_type *bus);
 extern int driver_probe_done(void);
+extern int wait_for_device_probe(void);
+
 
 /* sysfs interface for exporting driver attributes */
 
-- 
cgit v1.2.3


From 770824bdc421ff58a64db608294323571c949f4c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 22 Feb 2009 18:38:50 +0100
Subject: PM: Split up sysdev_[suspend|resume] from device_power_[down|up]

Move the sysdev_suspend/resume from the callee to the callers, with
no real change in semantics, so that we can rework the disabling of
interrupts during suspend/hibernation.

This is based on an earlier patch from Linus.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index de2e0a8f672..24ba5f67b3a 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -381,10 +381,12 @@ struct dev_pm_info {
 
 #ifdef CONFIG_PM_SLEEP
 extern void device_pm_lock(void);
+extern int sysdev_resume(void);
 extern void device_power_up(pm_message_t state);
 extern void device_resume(pm_message_t state);
 
 extern void device_pm_unlock(void);
+extern int sysdev_suspend(pm_message_t state);
 extern int device_power_down(pm_message_t state);
 extern int device_suspend(pm_message_t state);
 extern int device_prepare_suspend(pm_message_t state);
-- 
cgit v1.2.3


From cd97f39b7cdf1c8a9c9f52865eec795b7f0c811d Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 24 Feb 2009 19:19:49 +0100
Subject: i2c-dev: Clarify the unit of ioctl I2C_TIMEOUT

The unit in which user-space can set the bus timeout value is jiffies
for historical reasons (back when HZ was always 100.) This is however
not good because user-space doesn't know how long a jiffy lasts. The
timeout value should instead be set in a fixed time unit. Given the
original value of HZ, this unit should be 10 ms, for compatibility.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
---
 include/linux/i2c-dev.h | 2 +-
 include/linux/i2c.h     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-dev.h b/include/linux/i2c-dev.h
index 311315b56b6..fd53bfd2647 100644
--- a/include/linux/i2c-dev.h
+++ b/include/linux/i2c-dev.h
@@ -33,7 +33,7 @@
  */
 #define I2C_RETRIES	0x0701	/* number of times a device address should
 				   be polled when not acknowledging */
-#define I2C_TIMEOUT	0x0702	/* set timeout in jiffies - call with int */
+#define I2C_TIMEOUT	0x0702	/* set timeout in units of 10 ms */
 
 /* NOTE: Slave address is 7 or 10 bits, but 10-bit addresses
  * are NOT supported! (due to code brokenness)
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index fcfbfea3af7..c86c3b07604 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -361,7 +361,7 @@ struct i2c_adapter {
 	struct mutex bus_lock;
 	struct mutex clist_lock;
 
-	int timeout;
+	int timeout;			/* in jiffies */
 	int retries;
 	struct device dev;		/* the adapter device */
 
-- 
cgit v1.2.3


From 4ab0d47d0ab311eb181532c1ecb6d02905685071 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue, 24 Feb 2009 17:35:12 -0800
Subject: gpu/drm, x86, PAT: io_mapping_create_wc and resource_size_t

io_mapping_create_wc should take a resource_size_t parameter in place of
unsigned long. With unsigned long, there will be no way to map greater than 4GB
address in i386/32 bit.

On x86, greater than 4GB addresses cannot be mapped on i386 without PAE. Return
error for such a case.

Patch also adds a structure for io_mapping, that saves the base, size and
type on HAVE_ATOMIC_IOMAP archs, that can be used to verify the offset on
io_mapping_map calls.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Eric Anholt <eric@anholt.net>
Cc: Keith Packard <keithp@keithp.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/io-mapping.h | 46 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 82df31726a5..cbc2f0cd631 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -30,11 +30,14 @@
  * See Documentation/io_mapping.txt
  */
 
-/* this struct isn't actually defined anywhere */
-struct io_mapping;
-
 #ifdef CONFIG_HAVE_ATOMIC_IOMAP
 
+struct io_mapping {
+	resource_size_t base;
+	unsigned long size;
+	pgprot_t prot;
+};
+
 /*
  * For small address space machines, mapping large objects
  * into the kernel virtual space isn't practical. Where
@@ -43,23 +46,40 @@ struct io_mapping;
  */
 
 static inline struct io_mapping *
-io_mapping_create_wc(unsigned long base, unsigned long size)
+io_mapping_create_wc(resource_size_t base, unsigned long size)
 {
-	return (struct io_mapping *) base;
+	struct io_mapping *iomap;
+
+	if (!is_io_mapping_possible(base, size))
+		return NULL;
+
+	iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
+	if (!iomap)
+		return NULL;
+
+	iomap->base = base;
+	iomap->size = size;
+	iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL));
+	return iomap;
 }
 
 static inline void
 io_mapping_free(struct io_mapping *mapping)
 {
+	kfree(mapping);
 }
 
 /* Atomic map/unmap */
 static inline void *
 io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset)
 {
-	offset += (unsigned long) mapping;
-	return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0,
-				     __pgprot(__PAGE_KERNEL_WC));
+	resource_size_t phys_addr;
+	unsigned long pfn;
+
+	BUG_ON(offset >= mapping->size);
+	phys_addr = mapping->base + offset;
+	pfn = (unsigned long) (phys_addr >> PAGE_SHIFT);
+	return iomap_atomic_prot_pfn(pfn, KM_USER0, mapping->prot);
 }
 
 static inline void
@@ -71,8 +91,9 @@ io_mapping_unmap_atomic(void *vaddr)
 static inline void *
 io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 {
-	offset += (unsigned long) mapping;
-	return ioremap_wc(offset, PAGE_SIZE);
+	BUG_ON(offset >= mapping->size);
+	resource_size_t phys_addr = mapping->base + offset;
+	return ioremap_wc(phys_addr, PAGE_SIZE);
 }
 
 static inline void
@@ -83,9 +104,12 @@ io_mapping_unmap(void *vaddr)
 
 #else
 
+/* this struct isn't actually defined anywhere */
+struct io_mapping;
+
 /* Create the io_mapping object*/
 static inline struct io_mapping *
-io_mapping_create_wc(unsigned long base, unsigned long size)
+io_mapping_create_wc(resource_size_t base, unsigned long size)
 {
 	return (struct io_mapping *) ioremap_wc(base, size);
 }
-- 
cgit v1.2.3


From 8fed43684174b68f04d01d1210fd00536af790df Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Wed, 25 Feb 2009 20:28:24 +0100
Subject: ide: fix refcounting in device drivers

During host driver module removal del_gendisk() results in a final
put on drive->gendev and freeing the drive by drive_release_dev().

Convert device drivers from using struct kref to use struct device
so device driver's object holds reference on ->gendev and prevents
drive from prematurely going away.

Also fix ->remove methods to not erroneously drop reference on a
host driver by using only put_device() instead of ide*_put().

Reported-by: Stanislaw Gruszka <stf_xl@wp.pl>
Tested-by: Stanislaw Gruszka <stf_xl@wp.pl>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 194da5a4b0d..fe235b65207 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -663,7 +663,7 @@ typedef struct ide_drive_s ide_drive_t;
 #define to_ide_device(dev)		container_of(dev, ide_drive_t, gendev)
 
 #define to_ide_drv(obj, cont_type)	\
-	container_of(obj, struct cont_type, kref)
+	container_of(obj, struct cont_type, dev)
 
 #define ide_drv_g(disk, cont_type)	\
 	container_of((disk)->private_data, struct cont_type, driver)
-- 
cgit v1.2.3


From a682604838763981613e42015cd0e39f2989d6bb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 25 Feb 2009 18:03:42 -0800
Subject: rcu: Teach RCU that idle task is not quiscent state at boot

This patch fixes a bug located by Vegard Nossum with the aid of
kmemcheck, updated based on review comments from Nick Piggin,
Ingo Molnar, and Andrew Morton.  And cleans up the variable-name
and function-name language.  ;-)

The boot CPU runs in the context of its idle thread during boot-up.
During this time, idle_cpu(0) will always return nonzero, which will
fool Classic and Hierarchical RCU into deciding that a large chunk of
the boot-up sequence is a big long quiescent state.  This in turn causes
RCU to prematurely end grace periods during this time.

This patch changes the rcutree.c and rcuclassic.c rcu_check_callbacks()
function to ignore the idle task as a quiescent state until the
system has started up the scheduler in rest_init(), introducing a
new non-API function rcu_idle_now_means_idle() to inform RCU of this
transition.  RCU maintains an internal rcu_idle_cpu_truthful variable
to track this state, which is then used by rcu_check_callback() to
determine if it should believe idle_cpu().

Because this patch has the effect of disallowing RCU grace periods
during long stretches of the boot-up sequence, this patch also introduces
Josh Triplett's UP-only optimization that makes synchronize_rcu() be a
no-op if num_online_cpus() returns 1.  This allows boot-time code that
calls synchronize_rcu() to proceed normally.  Note, however, that RCU
callbacks registered by call_rcu() will likely queue up until later in
the boot sequence.  Although rcuclassic and rcutree can also use this
same optimization after boot completes, rcupreempt must restrict its
use of this optimization to the portion of the boot sequence before the
scheduler starts up, given that an rcupreempt RCU read-side critical
section may be preeempted.

In addition, this patch takes Nick Piggin's suggestion to make the
system_state global variable be __read_mostly.

Changes since v4:

o	Changes the name of the introduced function and variable to
	be less emotional.  ;-)

Changes since v3:

o	WARN_ON(nr_context_switches() > 0) to verify that RCU
	switches out of boot-time mode before the first context
	switch, as suggested by Nick Piggin.

Changes since v2:

o	Created rcu_blocking_is_gp() internal-to-RCU API that
	determines whether a call to synchronize_rcu() is itself
	a grace period.

o	The definition of rcu_blocking_is_gp() for rcuclassic and
	rcutree checks to see if but a single CPU is online.

o	The definition of rcu_blocking_is_gp() for rcupreempt
	checks to see both if but a single CPU is online and if
	the system is still in early boot.

	This allows rcupreempt to again work correctly if running
	on a single CPU after booting is complete.

o	Added check to rcupreempt's synchronize_sched() for there
	being but one online CPU.

Tested all three variants both SMP and !SMP, booted fine, passed a short
rcutorture test on both x86 and Power.

Located-by: Vegard Nossum <vegard.nossum@gmail.com>
Tested-by: Vegard Nossum <vegard.nossum@gmail.com>
Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcuclassic.h |  6 ++++++
 include/linux/rcupdate.h   |  4 ++++
 include/linux/rcupreempt.h | 15 +++++++++++++++
 include/linux/rcutree.h    |  6 ++++++
 4 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index f3f697df1d7..80044a4f3ab 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -181,4 +181,10 @@ extern long rcu_batches_completed_bh(void);
 #define rcu_enter_nohz()	do { } while (0)
 #define rcu_exit_nohz()		do { } while (0)
 
+/* A context switch is a grace period for rcuclassic. */
+static inline int rcu_blocking_is_gp(void)
+{
+	return num_online_cpus() == 1;
+}
+
 #endif /* __LINUX_RCUCLASSIC_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 921340a7b71..528343e6da5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,6 +52,9 @@ struct rcu_head {
 	void (*func)(struct rcu_head *head);
 };
 
+/* Internal to kernel, but needed by rcupreempt.h. */
+extern int rcu_scheduler_active;
+
 #if defined(CONFIG_CLASSIC_RCU)
 #include <linux/rcuclassic.h>
 #elif defined(CONFIG_TREE_RCU)
@@ -265,6 +268,7 @@ extern void rcu_barrier_sched(void);
 
 /* Internal to kernel */
 extern void rcu_init(void);
+extern void rcu_scheduler_starting(void);
 extern int rcu_needs_cpu(int cpu);
 
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index 3e05c09b54a..74304b4538d 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -142,4 +142,19 @@ static inline void rcu_exit_nohz(void)
 #define rcu_exit_nohz()		do { } while (0)
 #endif /* CONFIG_NO_HZ */
 
+/*
+ * A context switch is a grace period for rcupreempt synchronize_rcu()
+ * only during early boot, before the scheduler has been initialized.
+ * So, how the heck do we get a context switch?  Well, if the caller
+ * invokes synchronize_rcu(), they are willing to accept a context
+ * switch, so we simply pretend that one happened.
+ *
+ * After boot, there might be a blocked or preempted task in an RCU
+ * read-side critical section, so we cannot then take the fastpath.
+ */
+static inline int rcu_blocking_is_gp(void)
+{
+	return num_online_cpus() == 1 && !rcu_scheduler_active;
+}
+
 #endif /* __LINUX_RCUPREEMPT_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d4368b7975c..a722fb67bb2 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -326,4 +326,10 @@ static inline void rcu_exit_nohz(void)
 }
 #endif /* CONFIG_NO_HZ */
 
+/* A context switch is a grace period for rcutree. */
+static inline int rcu_blocking_is_gp(void)
+{
+	return num_online_cpus() == 1;
+}
+
 #endif /* __LINUX_RCUTREE_H */
-- 
cgit v1.2.3


From 1e42807918d17e8c93bf14fbb74be84b141334c1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 23 Feb 2009 09:03:10 +0100
Subject: block: reduce stack footprint of blk_recount_segments()

blk_recalc_rq_segments() requires a request structure passed in, which
we don't have from blk_recount_segments(). So the latter allocates one on
the stack, using > 400 bytes of stack for that. This can cause us to spill
over one page of stack from ext4 at least:

 0)     4560     400   blk_recount_segments+0x43/0x62
 1)     4160      32   bio_phys_segments+0x1c/0x24
 2)     4128      32   blk_rq_bio_prep+0x2a/0xf9
 3)     4096      32   init_request_from_bio+0xf9/0xfe
 4)     4064     112   __make_request+0x33c/0x3f6
 5)     3952     144   generic_make_request+0x2d1/0x321
 6)     3808      64   submit_bio+0xb9/0xc3
 7)     3744      48   submit_bh+0xea/0x10e
 8)     3696     368   ext4_mb_init_cache+0x257/0xa6a [ext4]
 9)     3328     288   ext4_mb_regular_allocator+0x421/0xcd9 [ext4]
10)     3040     160   ext4_mb_new_blocks+0x211/0x4b4 [ext4]
11)     2880     336   ext4_ext_get_blocks+0xb61/0xd45 [ext4]
12)     2544      96   ext4_get_blocks_wrap+0xf2/0x200 [ext4]
13)     2448      80   ext4_da_get_block_write+0x6e/0x16b [ext4]
14)     2368     352   mpage_da_map_blocks+0x7e/0x4b3 [ext4]
15)     2016     352   ext4_da_writepages+0x2ce/0x43c [ext4]
16)     1664      32   do_writepages+0x2d/0x3c
17)     1632     144   __writeback_single_inode+0x162/0x2cd
18)     1488      96   generic_sync_sb_inodes+0x1e3/0x32b
19)     1392      16   sync_sb_inodes+0xe/0x10
20)     1376      48   writeback_inodes+0x69/0xb3
21)     1328     208   balance_dirty_pages_ratelimited_nr+0x187/0x2f9
22)     1120     224   generic_file_buffered_write+0x1d4/0x2c4
23)      896     176   __generic_file_aio_write_nolock+0x35f/0x393
24)      720      80   generic_file_aio_write+0x6c/0xc8
25)      640      80   ext4_file_write+0xa9/0x137 [ext4]
26)      560     320   do_sync_write+0xf0/0x137
27)      240      48   vfs_write+0xb3/0x13c
28)      192      64   sys_write+0x4c/0x74
29)      128     128   system_call_fastpath+0x16/0x1b

Split the segment counting out into a __blk_recalc_rq_segments() helper
to avoid allocating an onstack request just for checking the physical
segment count.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dcaa0fd84b0..465d6babc84 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -708,6 +708,8 @@ struct req_iterator {
 };
 
 /* This should not be used directly - use rq_for_each_segment */
+#define for_each_bio(_bio)		\
+	for (; _bio; _bio = _bio->bi_next)
 #define __rq_for_each_bio(_bio, rq)	\
 	if ((rq->bio))			\
 		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
-- 
cgit v1.2.3


From 54e991242850edc8c53f71fa5aa3ba7a93ce38f5 Mon Sep 17 00:00:00 2001
From: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Date: Fri, 27 Feb 2009 15:13:54 +0530
Subject: sched: don't allow setuid to succeed if the user does not have rt
 bandwidth

Impact: fix hung task with certain (non-default) rt-limit settings

Corey Hickey reported that on using setuid to change the uid of a
rt process, the process would be unkillable and not be running.
This is because there was no rt runtime for that user group. Add
in a check to see if a user can attach an rt task to its task group.
On failure, return EINVAL, which is also returned in
CONFIG_CGROUP_SCHED.

Reported-by: Corey Hickey <bugfood-ml@fatooh.org>
Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8981e52c714..8c216e057c9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2291,9 +2291,13 @@ extern long sched_group_rt_runtime(struct task_group *tg);
 extern int sched_group_set_rt_period(struct task_group *tg,
 				      long rt_period_us);
 extern long sched_group_rt_period(struct task_group *tg);
+extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
 #endif
 #endif
 
+extern int task_can_switch_user(struct user_struct *up,
+					struct task_struct *tsk);
+
 #ifdef CONFIG_TASK_XACCT
 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
 {
-- 
cgit v1.2.3


From 5170836679185357dc1b7660bad13287b39e1e33 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 27 Feb 2009 14:03:03 -0800
Subject: Fix recursive lock in free_uid()/free_user_ns()

free_uid() and free_user_ns() are corecursive when CONFIG_USER_SCHED=n,
but free_user_ns() is called from free_uid() by way of uid_hash_remove(),
which requires uidhash_lock to be held.  free_user_ns() then calls
free_uid() to complete the destruction.

Fix this by deferring the destruction of the user_namespace.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/user_namespace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 315bcd37522..cc4f45361db 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -13,6 +13,7 @@ struct user_namespace {
 	struct kref		kref;
 	struct hlist_head	uidhash_table[UIDHASH_SZ];
 	struct user_struct	*creator;
+	struct work_struct	destroyer;
 };
 
 extern struct user_namespace init_user_ns;
-- 
cgit v1.2.3


From 5c2522218059ca1f4174a568923b988aad3ddfda Mon Sep 17 00:00:00 2001
From: Chris Leech <christopher.leech@intel.com>
Date: Fri, 27 Feb 2009 10:01:36 +0000
Subject: net headers: cleanup dcbnl.h

1) add an include for <linux/types.h>
2) change dcbmsg.dcb_family from unsigned char to __u8 to be more
   consistent with use of kernel types

Signed-off-by: Chris Leech <christopher.leech@intel.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index b0ef274e003..7d2e1000618 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -20,10 +20,12 @@
 #ifndef __LINUX_DCBNL_H__
 #define __LINUX_DCBNL_H__
 
+#include <linux/types.h>
+
 #define DCB_PROTO_VERSION 1
 
 struct dcbmsg {
-	unsigned char      dcb_family;
+	__u8               dcb_family;
 	__u8               cmd;
 	__u16              dcb_pad;
 };
-- 
cgit v1.2.3


From 709ab3261e3ed789c0bb31c6ab53c9eccb276522 Mon Sep 17 00:00:00 2001
From: Chris Leech <christopher.leech@intel.com>
Date: Fri, 27 Feb 2009 10:01:42 +0000
Subject: net headers: export dcbnl.h

The DCB netlink interface is required for building the userspace tools
available at e1000.sourceforge.net

Signed-off-by: Chris Leech <christopher.leech@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index b97cdc516a8..106c3ba5084 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -52,6 +52,7 @@ header-y += const.h
 header-y += cgroupstats.h
 header-y += cramfs_fs.h
 header-y += cycx_cfm.h
+header-y += dcbnl.h
 header-y += dlmconstants.h
 header-y += dlm_device.h
 header-y += dlm_netlink.h
-- 
cgit v1.2.3


From 5ce04e3de8c36ba37c56e94e3c4dc7973c7f546c Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Sun, 1 Mar 2009 08:53:27 -0800
Subject: fix warning in io_mapping_map_wc()

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/io-mapping.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index cbc2f0cd631..0adb0f91568 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -91,8 +91,11 @@ io_mapping_unmap_atomic(void *vaddr)
 static inline void *
 io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset)
 {
+	resource_size_t phys_addr;
+
 	BUG_ON(offset >= mapping->size);
-	resource_size_t phys_addr = mapping->base + offset;
+	phys_addr = mapping->base + offset;
+
 	return ioremap_wc(phys_addr, PAGE_SIZE);
 }
 
-- 
cgit v1.2.3


From 9d40bbda599def1e1d155d7f7dca14fe8744bd2b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 4 Mar 2009 23:46:25 -0800
Subject: vlan: Fix vlan-in-vlan crashes.

As analyzed by Patrick McHardy, vlan needs to reset it's
netdev_ops pointer in it's ->init() function but this
leaves the compat method pointers stale.

Add a netdev_resync_ops() and call it from the vlan code.

Any other driver which changes ->netdev_ops after register_netdevice()
will need to call this new function after doing so too.

With help from Patrick McHardy.

Tested-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ec54785d34f..659366734f3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1079,6 +1079,7 @@ extern void		synchronize_net(void);
 extern int 		register_netdevice_notifier(struct notifier_block *nb);
 extern int		unregister_netdevice_notifier(struct notifier_block *nb);
 extern int		init_dummy_netdev(struct net_device *dev);
+extern void		netdev_resync_ops(struct net_device *dev);
 
 extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
 extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
-- 
cgit v1.2.3


From e7d3ef13d52a126438f687a1a32da65ff926ed57 Mon Sep 17 00:00:00 2001
From: Stuart Hayes <stuart_hayes@dell.com>
Date: Wed, 4 Mar 2009 11:59:46 -0800
Subject: libata: change drive ready wait after hard reset to 5s

This fixes problems during resume with drives that take longer than 1s to
be ready.  The ATA-6 spec appears to allow 5 seconds for a drive to be
ready.

On one affected system, this patch changes "PM: resume devices took..."
message from 17 seconds to 4 seconds, and gets rid of a lot of ugly
timeout/error messages.

Without this patch, the libata code moves on after 1s, tries to send a
soft reset (which the drive doesn't see because it isn't ready) which also
times out, then an IDENTIFY command is sent to the drive which times out,
and finally the error handler will try to send another hard reset which
will finally get things working.

Signed-off-by: Stuart Hayes <stuart_hayes@dell.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5d87bc09a1f..dd818c7decd 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -275,7 +275,7 @@ enum {
 	 * advised to wait only for the following duration before
 	 * doing SRST.
 	 */
-	ATA_TMOUT_PMP_SRST_WAIT	= 1000,
+	ATA_TMOUT_PMP_SRST_WAIT	= 5000,
 
 	/* ATA bus states */
 	BUS_UNKNOWN		= 0,
-- 
cgit v1.2.3


From 5825627c9463581fd9e70f8285685889ae5bb9bb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 27 Feb 2009 17:35:43 +0900
Subject: libata: fix dma_unmap_sg misuse

libata passes the returned value of dma_map_sg() to
dma_unmap_sg(),which is the misuse of dma_unmap_sg().

DMA-mapping.txt says:

To unmap a scatterlist, just call:

	pci_unmap_sg(pdev, sglist, nents, direction);

Again, make sure DMA activity has already finished.

PLEASE NOTE:  The 'nents' argument to the pci_unmap_sg call must be
              the _same_ one you passed into the pci_map_sg call,
	      it should _NOT_ be the 'count' value _returned_ from the
              pci_map_sg call.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index dd818c7decd..fbf064e13ad 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -530,6 +530,7 @@ struct ata_queued_cmd {
 	unsigned long		flags;		/* ATA_QCFLAG_xxx */
 	unsigned int		tag;
 	unsigned int		n_elem;
+	unsigned int		orig_n_elem;
 
 	int			dma_dir;
 
-- 
cgit v1.2.3


From 84bda12af31f930e4200c5244aa111de2485d7b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 2 Mar 2009 18:53:26 +0900
Subject: libata: align ap->sector_buf

ap->sector_buf is used as DMA target and should at least be aligned on
cacheline.  This caused problems on some embedded machines.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 include/linux/libata.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index fbf064e13ad..dc18b87ed72 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -751,7 +751,8 @@ struct ata_port {
 	acpi_handle		acpi_handle;
 	struct ata_acpi_gtm	__acpi_init_gtm; /* use ata_acpi_init_gtm() */
 #endif
-	u8			sector_buf[ATA_SECT_SIZE]; /* owned by EH */
+	/* owned by EH */
+	u8			sector_buf[ATA_SECT_SIZE] ____cacheline_aligned;
 };
 
 /* The following initializer overrides a method to NULL whether one of
-- 
cgit v1.2.3


From 849d7130001ab740a5a4778a561049841fdd77c9 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stf_xl@wp.pl>
Date: Thu, 5 Mar 2009 16:10:57 +0100
Subject: ide: allow to wrap interrupt handler

Signed-off-by: Stanislaw Gruszka <stf_xl@wp.pl>
Cc: Andrew Victor <linux@maxim.org.za>
[bart: minor checkpatch.pl / CodingStyle fixups]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index fe235b65207..e0cedfe9fad 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -866,6 +866,7 @@ struct ide_host {
 	unsigned int	n_ports;
 	struct device	*dev[2];
 	unsigned int	(*init_chipset)(struct pci_dev *);
+	irq_handler_t	irq_handler;
 	unsigned long	host_flags;
 	void		*host_priv;
 	ide_hwif_t	*cur_port;	/* for hosts requiring serialization */
-- 
cgit v1.2.3


From ebcad5aaea26da3cb2ca90b7f31a67a027eb60db Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 5 Mar 2009 16:10:59 +0100
Subject: remove stale comment from <linux/hdreg.h>

HDIO_GET_IDENTITY returns 256 words currently.

Noticed by Norman Diamond.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/hdreg.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hdreg.h b/include/linux/hdreg.h
index c37e9241fae..ed21bd3dbd2 100644
--- a/include/linux/hdreg.h
+++ b/include/linux/hdreg.h
@@ -511,7 +511,6 @@ struct hd_driveid {
 	unsigned short	words69_70[2];	/* reserved words 69-70
 					 * future command overlap and queuing
 					 */
-	/* HDIO_GET_IDENTITY currently returns only words 0 through 70 */
 	unsigned short	words71_74[4];	/* reserved words 71-74
 					 * for IDENTIFY PACKET DEVICE command
 					 */
-- 
cgit v1.2.3


From d42ad15b759d05a87f22b484af63987eff38ea88 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Thu, 5 Mar 2009 17:20:55 +0100
Subject: ata: add CFA specific identify data words

Declare CFA specific identify data words 162 and 163 for future use.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
[bart: update patch summary/description]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ata.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 08a86d5cdf1..9a061accd8b 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -89,6 +89,8 @@ enum {
 	ATA_ID_DLF		= 128,
 	ATA_ID_CSFO		= 129,
 	ATA_ID_CFA_POWER	= 160,
+	ATA_ID_CFA_KEY_MGMT	= 162,
+	ATA_ID_CFA_MODES	= 163,
 	ATA_ID_ROT_SPEED	= 217,
 	ATA_ID_PIO4		= (1 << 1),
 
-- 
cgit v1.2.3


From ab96ddec7213004b632d24dc2cdcd2df5f16f50b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 7 Mar 2009 13:39:22 -0800
Subject: Input: serio - fix protocol number for TouchIT213

Protocol 0x37 has been reserved for iNexio devices and Sahara
was supposed to get 0x38.

Reported-by: Claudio Nieder <private@claudio.ch>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/serio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serio.h b/include/linux/serio.h
index 1bcb357a01a..e0417e4d3f1 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -212,7 +212,7 @@ static inline void serio_unpin_driver(struct serio *serio)
 #define SERIO_FUJITSU	0x35
 #define SERIO_ZHENHUA	0x36
 #define SERIO_INEXIO	0x37
-#define SERIO_TOUCHIT213	0x37
+#define SERIO_TOUCHIT213	0x38
 #define SERIO_W8001	0x39
 
 #endif
-- 
cgit v1.2.3


From 129f8ae9b1b5be94517da76009ea956e89104ce8 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 9 Mar 2009 15:07:33 -0400
Subject: Revert "[CPUFREQ] Disable sysfs ui for p4-clockmod."

This reverts commit e088e4c9cdb618675874becb91b2fd581ee707e6.

Removing the sysfs interface for p4-clockmod was flagged as a
regression in bug 12826.

Course of action:
 - Find out the remaining causes of overheating, and fix them
   if possible. ACPI should be doing the right thing automatically.
   If it isn't, we need to fix that.
 - mark p4-clockmod ui as deprecated
 - try again with the removal in six months.

It's not really feasible to printk about the deprecation, because
it needs to happen at all the sysfs entry points, which means adding
a lot of strcmp("p4-clockmod".. calls to the core, which.. bleuch.

Signed-off-by: Dave Jones <davej@redhat.com>
---
 include/linux/cpufreq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 384b38d3e8e..161042746af 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -234,7 +234,6 @@ struct cpufreq_driver {
 	int	(*suspend)	(struct cpufreq_policy *policy, pm_message_t pmsg);
 	int	(*resume)	(struct cpufreq_policy *policy);
 	struct freq_attr	**attr;
-	bool			hide_interface;
 };
 
 /* flags */
-- 
cgit v1.2.3


From ae46141ff08f1965b17c531b571953c39ce8b9e2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 10 Mar 2009 20:33:18 -0400
Subject: NFSv3: Fix posix ACL code

Fix a memory leak due to allocation in the XDR layer. In cases where the
RPC call needs to be retransmitted, we end up allocating new pages without
clearing the old ones. Fix this by moving the allocation into
nfs3_proc_setacls().

Also fix an issue discovered by Kevin Rudd, whereby the amount of memory
reserved for the acls in the xdr_buf->head was miscalculated, and causing
corruption.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 2 ++
 include/linux/nfsacl.h  | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a550b528319..2e5f00066af 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -406,6 +406,8 @@ struct nfs3_setaclargs {
 	int			mask;
 	struct posix_acl *	acl_access;
 	struct posix_acl *	acl_default;
+	size_t			len;
+	unsigned int		npages;
 	struct page **		pages;
 };
 
diff --git a/include/linux/nfsacl.h b/include/linux/nfsacl.h
index 54487a99beb..43011b69297 100644
--- a/include/linux/nfsacl.h
+++ b/include/linux/nfsacl.h
@@ -37,6 +37,9 @@
 #define NFSACL_MAXPAGES		((2*(8+12*NFS_ACL_MAX_ENTRIES) + PAGE_SIZE-1) \
 				 >> PAGE_SHIFT)
 
+#define NFS_ACL_MAX_ENTRIES_INLINE	(5)
+#define NFS_ACL_INLINE_BUFSIZE	((2*(2+3*NFS_ACL_MAX_ENTRIES_INLINE)) << 2)
+
 static inline unsigned int
 nfsacl_size(struct posix_acl *acl_access, struct posix_acl *acl_default)
 {
-- 
cgit v1.2.3


From 78851e1aa4c3b796d5f0bb11b445016726302b44 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 10 Mar 2009 20:33:19 -0400
Subject: NLM: Shrink the IPv4-only version of nlm_cmp_addr()

Clean up/micro-optimatization:  Make the AF_INET-only version of
nlm_cmp_addr() smaller.  This matches the style of
nlm_privileged_requester(), and makes the AF_INET-only version of
nlm_cmp_addr() nearly the same size as it was before IPv6 support.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/lockd/lockd.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index aa6fe7026de..51855dfd8ad 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -346,6 +346,7 @@ static inline int __nlm_cmp_addr4(const struct sockaddr *sap1,
 	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
 				  const struct sockaddr *sap2)
 {
@@ -353,6 +354,13 @@ static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
 	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
 	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
 }
+#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
+				  const struct sockaddr *sap2)
+{
+	return 0;
+}
+#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
 
 /*
  * Compare two host addresses
-- 
cgit v1.2.3


From 76e6eee03353f01bfca707d4dbb1f10a4ee27dc0 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Mar 2009 14:35:43 -0600
Subject: cpumask: tsk_cpumask for accessing the struct task_struct's
 cpus_allowed.

This allows us to change the representation (to a dangling bitmap or
cpumask_var_t) without breaking all the callers: they can use
tsk_cpumask() now and won't see a difference as the changes roll into
linux-next.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/sched.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8c216e057c9..011db2f4c94 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1419,6 +1419,9 @@ struct task_struct {
 #endif
 };
 
+/* Future-safe accessor for struct task_struct's cpus_allowed. */
+#define tsk_cpumask(tsk) (&(tsk)->cpus_allowed)
+
 /*
  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
-- 
cgit v1.2.3


From 45e575ab9bfada5a5ef1b6174f8e749b1ecf0864 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 12 Mar 2009 14:35:44 -0600
Subject: cpumask: mm_cpumask for accessing the struct mm_struct's cpu_vm_mask.

This allows us to change the representation (to a dangling bitmap or
cpumask_var_t) without breaking all the callers: they can use
mm_cpumask() now and won't see a difference as the changes roll into
linux-next.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/mm_types.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 92915e81443..d84feb7bdbf 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -276,4 +276,7 @@ struct mm_struct {
 #endif
 };
 
+/* Future-safe accessor for struct mm_struct's cpu_vm_mask. */
+#define mm_cpumask(mm) (&(mm)->cpu_vm_mask)
+
 #endif /* _LINUX_MM_TYPES_H */
-- 
cgit v1.2.3


From 446c92b2901bedb3725d29b4e73def8aba623ffc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Thu, 12 Mar 2009 18:03:16 +0100
Subject: [ARM] 5421/1: ftrace: fix crash due to tracing of __naked functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a fix for the following crash observed in 2.6.29-rc3:
http://lkml.org/lkml/2009/1/29/150

On ARM it doesn't make sense to trace a naked function because then
mcount is called without stack and frame pointer being set up and there
is no chance to restore the lr register to the value before mcount was
called.

Reported-by: Matthias Kaehlcke <matthias@kaehlcke.net>
Tested-by: Matthias Kaehlcke <matthias@kaehlcke.net>

Cc: Abhishek Sagar <sagar.abhishek@gmail.com>
Cc: Steven Rostedt <rostedt@home.goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/compiler-gcc.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 1514d534dee..a3ed7cb8ca3 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -52,7 +52,15 @@
 #define __deprecated			__attribute__((deprecated))
 #define __packed			__attribute__((packed))
 #define __weak				__attribute__((weak))
-#define __naked				__attribute__((naked))
+
+/*
+ * it doesn't make sense on ARM (currently the only user of __naked) to trace
+ * naked functions because then mcount is called without stack and frame pointer
+ * being set up and there is no chance to restore the lr register to the value
+ * before mcount was called.
+ */
+#define __naked				__attribute__((naked)) notrace
+
 #define __noreturn			__attribute__((noreturn))
 
 /*
-- 
cgit v1.2.3


From 5d82720a7f41f0c877e026c7d17e3bf20ccdbae0 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 13 Mar 2009 21:16:13 +0100
Subject: ide: save the returned value of dma_map_sg

dma_map_sg could return a value different to 'nents' argument of
dma_map_sg so the ide stack needs to save it for the later usage
(e.g. for_each_sg).

The ide stack also needs to save the original sg_nents value for
pci_unmap_sg.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
[bart: backport to Linus' tree]
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index e0cedfe9fad..25087aead65 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -797,6 +797,7 @@ typedef struct hwif_s {
 	struct scatterlist *sg_table;
 	int sg_max_nents;		/* Maximum number of entries in it */
 	int sg_nents;			/* Current number of entries in it */
+	int orig_sg_nents;
 	int sg_dma_direction;		/* dma transfer direction */
 
 	/* data phase of the active command (currently only valid for PIO/DMA) */
-- 
cgit v1.2.3


From 87092698c665e0a358caf9825ae13114343027e8 Mon Sep 17 00:00:00 2001
From: un'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Mon, 9 Mar 2009 10:40:52 +0100
Subject: block: Add gfp_mask parameter to bio_integrity_clone()

Stricter gfp_mask might be required for clone allocation.
For example, request-based dm may clone bio in interrupt context
so it has to use GFP_ATOMIC.

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 1b16108a541..d8bd43bfdcf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -531,7 +531,7 @@ extern void bio_integrity_endio(struct bio *, int);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
 extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
-extern int bio_integrity_clone(struct bio *, struct bio *, struct bio_set *);
+extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
 extern int bioset_integrity_create(struct bio_set *, int);
 extern void bioset_integrity_free(struct bio_set *);
 extern void bio_integrity_init_slab(void);
@@ -542,7 +542,7 @@ extern void bio_integrity_init_slab(void);
 #define bioset_integrity_create(a, b)	(0)
 #define bio_integrity_prep(a)		(0)
 #define bio_integrity_enabled(a)	(0)
-#define bio_integrity_clone(a, b, c)	(0)
+#define bio_integrity_clone(a, b, c,d )	(0)
 #define bioset_integrity_free(a)	do { } while (0)
 #define bio_integrity_free(a, b)	do { } while (0)
 #define bio_integrity_endio(a, b)	do { } while (0)
-- 
cgit v1.2.3


From 76a67ec6fb79ff3570dcb5342142c16098299911 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Mon, 16 Mar 2009 18:34:20 -0400
Subject: nfsd: nfsd should drop CAP_MKNOD for non-root

Since creating a device node is normally an operation requiring special
privilege, Igor Zhbanov points out that it is surprising (to say the
least) that a client can, for example, create a device node on a
filesystem exported with root_squash.

So, make sure CAP_MKNOD is among the capabilities dropped when an nfsd
thread handles a request from a non-root user.

Reported-by: Igor Zhbanov <izh1979@gmail.com>
Cc: stable@kernel.org
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/capability.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 1b987255613..4864a43b2b4 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -393,8 +393,10 @@ struct cpu_vfs_cap_data {
 # define CAP_FULL_SET     ((kernel_cap_t){{ ~0, ~0 }})
 # define CAP_INIT_EFF_SET ((kernel_cap_t){{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }})
 # define CAP_FS_SET       ((kernel_cap_t){{ CAP_FS_MASK_B0, CAP_FS_MASK_B1 } })
-# define CAP_NFSD_SET     ((kernel_cap_t){{ CAP_FS_MASK_B0|CAP_TO_MASK(CAP_SYS_RESOURCE), \
-					CAP_FS_MASK_B1 } })
+# define CAP_NFSD_SET     ((kernel_cap_t){{ CAP_FS_MASK_B0 \
+					    | CAP_TO_MASK(CAP_SYS_RESOURCE) \
+					    | CAP_TO_MASK(CAP_MKNOD), \
+					    CAP_FS_MASK_B1 } })
 
 #endif /* _KERNEL_CAPABILITY_U32S != 2 */
 
-- 
cgit v1.2.3