178 files changed, 2480 insertions, 1213 deletions
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index e3bb29a8d8d..c119484258b 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -133,10 +133,6 @@ interested in translating it, please email me
 	<para>updates of your driver can take place without recompiling
 	the kernel.</para>
 </listitem>
-<listitem>
-	<para>if you need to keep some parts of your driver closed source,
-	you can do so without violating the GPL license on the kernel.</para>
-</listitem>
 </itemizedlist>
 
 <sect1 id="how_uio_works">
diff --git a/Documentation/fb/pvr2fb.txt b/Documentation/fb/pvr2fb.txt
index 2bf6c2321c2..36bdeff585e 100644
--- a/Documentation/fb/pvr2fb.txt
+++ b/Documentation/fb/pvr2fb.txt
@@ -9,14 +9,13 @@ one found in the Dreamcast.
 Advantages:
 
  * It provides a nice large console (128 cols + 48 lines with 1024x768)
-   without using tiny, unreadable fonts.
+   without using tiny, unreadable fonts (NOT on the Dreamcast)
  * You can run XF86_FBDev on top of /dev/fb0
  * Most important: boot logo :-)
 
 Disadvantages:
 
- * Driver is currently limited to the Dreamcast PowerVR 2 implementation
-   at the time of this writing.
+ * Driver is largely untested on non-Dreamcast systems.
 
 Configuration
 =============
@@ -29,11 +28,16 @@ Accepted options:
 font:X    - default font to use. All fonts are supported, including the
             SUN12x22 font which is very nice at high resolutions.
 
-mode:X    - default video mode. The following video modes are supported:
-            640x240-60, 640x480-60.
 	    
+mode:X    - default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
+            The following video modes are supported:
+            640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
+            defaults to 640x480-16@60. At the time of writing the
+            24bpp and 32bpp modes function poorly. Work to fix that is
+            ongoing
+
             Note: the 640x240 mode is currently broken, and should not be
-            used for any reason. It is only mentioned as a reference.
+            used for any reason. It is only mentioned here as a reference.
 
 inverse   - invert colors on screen (for LCD displays)
 
@@ -52,10 +56,10 @@ output:X  - output type. This can be any of the following: pal, ntsc, and
 X11
 ===
 
-XF86_FBDev should work, in theory. At the time of this writing it is
-totally untested and may or may not even portray the beginnings of
-working. If you end up testing this, please let me know!
+XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
+on any 2.6 series kernel.
 
 --
 Paul Mundt <lethal@linuxdc.org>
+Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>
 
diff --git a/Documentation/i386/zero-page.txt b/Documentation/i386/zero-page.txt
index 75b3680c41e..6c0817c4568 100644
--- a/Documentation/i386/zero-page.txt
+++ b/Documentation/i386/zero-page.txt
@@ -1,3 +1,13 @@
+---------------------------------------------------------------------------
+!!!!!!!!!!!!!!!WARNING!!!!!!!!
+The zero page is a kernel internal data structure, not a stable ABI.  It might change
+without warning and the kernel has no way to detect old version of it.
+If you're writing some external code like a boot loader you should only use
+the stable versioned real mode boot protocol described in boot.txt. Otherwise the kernel
+might break you at any time.
+!!!!!!!!!!!!!WARNING!!!!!!!!!!!
+----------------------------------------------------------------------------
+
 Summary of boot_params layout (kernel point of view)
      ( collected by Hans Lermen and Martin Mares )
  
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 536d5bfbdb8..fe8b0c4892c 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -98,6 +98,15 @@ applicable everywhere (see syntax).
   times, the limit is set to the largest selection.
   Reverse dependencies can only be used with boolean or tristate
   symbols.
+  Note:
+	select is evil.... select will by brute force set a symbol
+	equal to 'y' without visiting the dependencies. So abusing
+	select you are able to select a symbol FOO even if FOO depends
+	on BAR that is not set. In general use select only for
+	non-visible symbols (no promts anywhere) and for symbols with
+	no dependencies. That will limit the usefulness but on the
+	other hand avoid the illegal configurations all over. kconfig
+	should one day warn about such things.
 
 - numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
   This allows to limit the range of possible input values for int
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index efdb42fd3fb..a326487a3ab 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1922,7 +1922,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See header of drivers/scsi/wd7000.c.
 
 	wdt=		[WDT] Watchdog
-			See Documentation/watchdog/watchdog.txt.
+			See Documentation/watchdog/wdt.txt.
 
 	xd=		[HW,XT] Original XT pre-IDE (RLL encoded) disks.
 	xd_geo=		See header of drivers/block/xd.c.
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index 31e794ef5f9..c0b7a455639 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -13,7 +13,9 @@ LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000)
 
 CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds
 LDLIBS:=-lz
-
+# Removing this works for some versions of ld.so (eg. Ubuntu Feisty) and
+# not others (eg. FC7).
+LDFLAGS+=-static
 all: lguest.lds lguest
 
 # The linker script on x86 is so complex the only way of creating one
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
new file mode 100644
index 00000000000..5fbcc22c98e
--- /dev/null
+++ b/Documentation/memory-hotplug.txt
@@ -0,0 +1,322 @@
+==============
+Memory Hotplug
+==============
+
+Last Updated: Jul 28 2007
+
+This document is about memory hotplug including how-to-use and current status.
+Because Memory Hotplug is still under development, contents of this text will
+be changed often.
+
+1. Introduction
+  1.1 purpose of memory hotplug
+  1.2. Phases of memory hotplug
+  1.3. Unit of Memory online/offline operation
+2. Kernel Configuration
+3. sysfs files for memory hotplug
+4. Physical memory hot-add phase
+  4.1 Hardware(Firmware) Support
+  4.2 Notify memory hot-add event by hand
+5. Logical Memory hot-add phase
+  5.1. State of memory
+  5.2. How to online memory
+6. Logical memory remove
+  6.1 Memory offline and ZONE_MOVABLE
+  6.2. How to offline memory
+7. Physical memory remove
+8. Future Work List
+
+Note(1): x86_64's has special implementation for memory hotplug.
+         This text does not describe it.
+Note(2): This text assumes that sysfs is mounted at /sys.
+
+
+---------------
+1. Introduction
+---------------
+
+1.1 purpose of memory hotplug
+------------
+Memory Hotplug allows users to increase/decrease the amount of memory.
+Generally, there are two purposes.
+
+(A) For changing the amount of memory.
+    This is to allow a feature like capacity on demand.
+(B) For installing/removing DIMMs or NUMA-nodes physically.
+    This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
+
+(A) is required by highly virtualized environments and (B) is required by
+hardware which supports memory power management.
+
+Linux memory hotplug is designed for both purpose.
+
+
+1.2. Phases of memory hotplug
+---------------
+There are 2 phases in Memory Hotplug.
+  1) Physical Memory Hotplug phase
+  2) Logical Memory Hotplug phase.
+
+The First phase is to communicate hardware/firmware and make/erase
+environment for hotplugged memory. Basically, this phase is necessary
+for the purpose (B), but this is good phase for communication between
+highly virtualized environments too.
+
+When memory is hotplugged, the kernel recognizes new memory, makes new memory
+management tables, and makes sysfs files for new memory's operation.
+
+If firmware supports notification of connection of new memory to OS,
+this phase is triggered automatically. ACPI can notify this event. If not,
+"probe" operation by system administration is used instead.
+(see Section 4.).
+
+Logical Memory Hotplug phase is to change memory state into
+avaiable/unavailable for users. Amount of memory from user's view is
+changed by this phase. The kernel makes all memory in it as free pages
+when a memory range is available.
+
+In this document, this phase is described as online/offline.
+
+Logical Memory Hotplug phase is triggred by write of sysfs file by system
+administrator. For the hot-add case, it must be executed after Physical Hotplug
+phase by hand.
+(However, if you writes udev's hotplug scripts for memory hotplug, these
+ phases can be execute in seamless way.)
+
+
+1.3. Unit of Memory online/offline operation
+------------
+Memory hotplug uses SPARSEMEM memory model. SPARSEMEM divides the whole memory
+into chunks of the same size. The chunk is called a "section". The size of
+a section is architecture dependent. For example, power uses 16MiB, ia64 uses
+1GiB. The unit of online/offline operation is "one section". (see Section 3.)
+
+To determine the size of sections, please read this file:
+
+/sys/devices/system/memory/block_size_bytes
+
+This file shows the size of sections in byte.
+
+-----------------------
+2. Kernel Configuration
+-----------------------
+To use memory hotplug feature, kernel must be compiled with following
+config options.
+
+- For all memory hotplug
+    Memory model -> Sparse Memory  (CONFIG_SPARSEMEM)
+    Allow for memory hot-add       (CONFIG_MEMORY_HOTPLUG)
+
+- To enable memory removal, the followings are also necessary
+    Allow for memory hot remove    (CONFIG_MEMORY_HOTREMOVE)
+    Page Migration                 (CONFIG_MIGRATION)
+
+- For ACPI memory hotplug, the followings are also necessary
+    Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
+    This option can be kernel module.
+
+- As a related configuration, if your box has a feature of NUMA-node hotplug
+  via ACPI, then this option is necessary too.
+    ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
+    (CONFIG_ACPI_CONTAINER).
+    This option can be kernel module too.
+
+--------------------------------
+3 sysfs files for memory hotplug
+--------------------------------
+All sections have their device information under /sys/devices/system/memory as
+
+/sys/devices/system/memory/memoryXXX
+(XXX is section id.)
+
+Now, XXX is defined as start_address_of_section / section_size.
+
+For example, assume 1GiB section size. A device for a memory starting at
+0x100000000 is /sys/device/system/memory/memory4
+(0x100000000 / 1Gib = 4)
+This device covers address range [0x100000000 ... 0x140000000)
+
+Under each section, you can see 3 files.
+
+/sys/devices/system/memory/memoryXXX/phys_index
+/sys/devices/system/memory/memoryXXX/phys_device
+/sys/devices/system/memory/memoryXXX/state
+
+'phys_index' : read-only and contains section id, same as XXX.
+'state'      : read-write
+               at read:  contains online/offline state of memory.
+               at write: user can specify "online", "offline" command
+'phys_device': read-only: designed to show the name of physical memory device.
+               This is not well implemented now.
+
+NOTE:
+  These directories/files appear after physical memory hotplug phase.
+
+
+--------------------------------
+4. Physical memory hot-add phase
+--------------------------------
+
+4.1 Hardware(Firmware) Support
+------------
+On x86_64/ia64 platform, memory hotplug by ACPI is supported.
+
+In general, the firmware (ACPI) which supports memory hotplug defines
+memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
+Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
+script. This will be done automatically.
+
+But scripts for memory hotplug are not contained in generic udev package(now).
+You may have to write it by yourself or online/offline memory by hand.
+Please see "How to online memory", "How to offline memory" in this text.
+
+If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
+"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
+calls hotplug code for all of objects which are defined in it.
+If memory device is found, memory hotplug code will be called.
+
+
+4.2 Notify memory hot-add event by hand
+------------
+In some environments, especially virtualized environment, firmware will not
+notify memory hotplug event to the kernel. For such environment, "probe"
+interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE.
+
+Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not
+contain highly architecture codes. Please add config if you need "probe"
+interface.
+
+Probe interface is located at
+/sys/devices/system/memory/probe
+
+You can tell the physical address of new memory to the kernel by
+
+% echo start_address_of_new_memory > /sys/devices/system/memory/probe
+
+Then, [start_address_of_new_memory, start_address_of_new_memory + section_size)
+memory range is hot-added. In this case, hotplug script is not called (in
+current implementation). You'll have to online memory by yourself.
+Please see "How to online memory" in this text.
+
+
+
+------------------------------
+5. Logical Memory hot-add phase
+------------------------------
+
+5.1. State of memory
+------------
+To see (online/offline) state of memory section, read 'state' file.
+
+% cat /sys/device/system/memory/memoryXXX/state
+
+
+If the memory section is online, you'll read "online".
+If the memory section is offline, you'll read "offline".
+
+
+5.2. How to online memory
+------------
+Even if the memory is hot-added, it is not at ready-to-use state.
+For using newly added memory, you have to "online" the memory section.
+
+For onlining, you have to write "online" to the section's state file as:
+
+% echo online > /sys/devices/system/memory/memoryXXX/state
+
+After this, section memoryXXX's state will be 'online' and the amount of
+available memory will be increased.
+
+Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
+This may be changed in future.
+
+
+
+------------------------
+6. Logical memory remove
+------------------------
+
+6.1 Memory offline and ZONE_MOVABLE
+------------
+Memory offlining is more complicated than memory online. Because memory offline
+has to make the whole memory section be unused, memory offline can fail if
+the section includes memory which cannot be freed.
+
+In general, memory offline can use 2 techniques.
+
+(1) reclaim and free all memory in the section.
+(2) migrate all pages in the section.
+
+In the current implementation, Linux's memory offline uses method (2), freeing
+all  pages in the section by page migration. But not all pages are
+migratable. Under current Linux, migratable pages are anonymous pages and
+page caches. For offlining a section by migration, the kernel has to guarantee
+that the section contains only migratable pages.
+
+Now, a boot option for making a section which consists of migratable pages is
+supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
+create ZONE_MOVABLE...a zone which is just used for movable pages.
+(See also Documentation/kernel-parameters.txt)
+
+Assume the system has "TOTAL" amount of memory at boot time, this boot option
+creates ZONE_MOVABLE as following.
+
+1) When kernelcore=YYYY boot option is used,
+  Size of memory not for movable pages (not for offline) is YYYY.
+  Size of memory for movable pages (for offline) is TOTAL-YYYY.
+
+2) When movablecore=ZZZZ boot option is used,
+  Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
+  Size of memory for movable pages (for offline) is ZZZZ.
+
+
+Note) Unfortunately, there is no information to show which section belongs
+to ZONE_MOVABLE. This is TBD.
+
+
+6.2. How to offline memory
+------------
+You can offline a section by using the same sysfs interface that was used in
+memory onlining.
+
+% echo offline > /sys/devices/system/memory/memoryXXX/state
+
+If offline succeeds, the state of the memory section is changed to be "offline".
+If it fails, some error core (like -EBUSY) will be returned by the kernel.
+Even if a section does not belong to ZONE_MOVABLE, you can try to offline it.
+If it doesn't contain 'unmovable' memory, you'll get success.
+
+A section under ZONE_MOVABLE is considered to be able to be offlined easily.
+But under some busy state, it may return -EBUSY. Even if a memory section
+cannot be offlined due to -EBUSY, you can retry offlining it and may be able to
+offline it (or not).
+(For example, a page is referred to by some kernel internal call and released
+ soon.)
+
+Consideration:
+Memory hotplug's design direction is to make the possibility of memory offlining
+higher and to guarantee unplugging memory under any situation. But it needs
+more work. Returning -EBUSY under some situation may be good because the user
+can decide to retry more or not by himself. Currently, memory offlining code
+does some amount of retry with 120 seconds timeout.
+
+-------------------------
+7. Physical memory remove
+-------------------------
+Need more implementation yet....
+ - Notification completion of remove works by OS to firmware.
+ - Guard from remove if not yet.
+
+--------------
+8. Future Work
+--------------
+  - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
+    sysctl or new control file.
+  - showing memory section and physical device relationship.
+  - showing memory section and node relationship (maybe good for NUMA)
+  - showing memory section is under ZONE_MOVABLE or not
+  - test and make it better memory offlining.
+  - support HugeTLB page migration and offlining.
+  - memmap removing at memory offline.
+  - physical remove memory.
+
diff --git a/Documentation/sched-design-CFS.txt b/Documentation/sched-design-CFS.txt
index 16feebb7bdc..84901e7c050 100644
--- a/Documentation/sched-design-CFS.txt
+++ b/Documentation/sched-design-CFS.txt
@@ -83,7 +83,7 @@ Some implementation details:
    CFS uses nanosecond granularity accounting and does not rely on any
    jiffies or other HZ detail. Thus the CFS scheduler has no notion of
    'timeslices' and has no heuristics whatsoever. There is only one
-   central tunable:
+   central tunable (you have to switch on CONFIG_SCHED_DEBUG):
 
          /proc/sys/kernel/sched_granularity_ns
 
diff --git a/Documentation/sched-nice-design.txt b/Documentation/sched-nice-design.txt
new file mode 100644
index 00000000000..e2bae5a577e
--- /dev/null
+++ b/Documentation/sched-nice-design.txt
@@ -0,0 +1,108 @@
+This document explains the thinking about the revamped and streamlined
+nice-levels implementation in the new Linux scheduler.
+
+Nice levels were always pretty weak under Linux and people continuously
+pestered us to make nice +19 tasks use up much less CPU time.
+
+Unfortunately that was not that easy to implement under the old
+scheduler, (otherwise we'd have done it long ago) because nice level
+support was historically coupled to timeslice length, and timeslice
+units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
+
+In the O(1) scheduler (in 2003) we changed negative nice levels to be
+much stronger than they were before in 2.4 (and people were happy about
+that change), and we also intentionally calibrated the linear timeslice
+rule so that nice +19 level would be _exactly_ 1 jiffy. To better
+understand it, the timeslice graph went like this (cheesy ASCII art
+alert!):
+
+
+                   A
+             \     | [timeslice length]
+              \    |
+               \   |
+                \  |
+                 \ |
+                  \|___100msecs
+                   |^ . _
+                   |      ^ . _
+                   |            ^ . _
+ -*----------------------------------*-----> [nice level]
+ -20               |                +19
+                   |
+                   |
+
+So that if someone wanted to really renice tasks, +19 would give a much
+bigger hit than the normal linear rule would do. (The solution of
+changing the ABI to extend priorities was discarded early on.)
+
+This approach worked to some degree for some time, but later on with
+HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
+we felt to be a bit excessive. Excessive _not_ because it's too small of
+a CPU utilization, but because it causes too frequent (once per
+millisec) rescheduling. (and would thus trash the cache, etc. Remember,
+this was long ago when hardware was weaker and caches were smaller, and
+people were running number crunching apps at nice +19.)
+
+So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
+right minimal granularity - and this translates to 5% CPU utilization.
+But the fundamental HZ-sensitive property for nice+19 still remained,
+and we never got a single complaint about nice +19 being too _weak_ in
+terms of CPU utilization, we only got complaints about it (still) being
+too _strong_ :-)
+
+To sum it up: we always wanted to make nice levels more consistent, but
+within the constraints of HZ and jiffies and their nasty design level
+coupling to timeslices and granularity it was not really viable.
+
+The second (less frequent but still periodically occuring) complaint
+about Linux's nice level support was its assymetry around the origo
+(which you can see demonstrated in the picture above), or more
+accurately: the fact that nice level behavior depended on the _absolute_
+nice level as well, while the nice API itself is fundamentally
+"relative":
+
+   int nice(int inc);
+
+   asmlinkage long sys_nice(int increment)
+
+(the first one is the glibc API, the second one is the syscall API.)
+Note that the 'inc' is relative to the current nice level. Tools like
+bash's "nice" command mirror this relative API.
+
+With the old scheduler, if you for example started a niced task with +1
+and another task with +2, the CPU split between the two tasks would
+depend on the nice level of the parent shell - if it was at nice -10 the
+CPU split was different than if it was at +5 or +10.
+
+A third complaint against Linux's nice level support was that negative
+nice levels were not 'punchy enough', so lots of people had to resort to
+run audio (and other multimedia) apps under RT priorities such as
+SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
+proof, and a buggy SCHED_FIFO app can also lock up the system for good.
+
+The new scheduler in v2.6.23 addresses all three types of complaints:
+
+To address the first complaint (of nice levels being not "punchy"
+enough), the scheduler was decoupled from 'time slice' and HZ concepts
+(and granularity was made a separate concept from nice levels) and thus
+it was possible to implement better and more consistent nice +19
+support: with the new scheduler nice +19 tasks get a HZ-independent
+1.5%, instead of the variable 3%-5%-9% range they got in the old
+scheduler.
+
+To address the second complaint (of nice levels not being consistent),
+the new scheduler makes nice(1) have the same CPU utilization effect on
+tasks, regardless of their absolute nice levels. So on the new
+scheduler, running a nice +10 and a nice 11 task has the same CPU
+utilization "split" between them as running a nice -5 and a nice -4
+task. (one will get 55% of the CPU, the other 45%.) That is why nice
+levels were changed to be "multiplicative" (or exponential) - that way
+it does not matter which nice level you start out from, the 'relative
+result' will always be the same.
+
+The third complaint (of negative nice levels not being "punchy" enough
+and forcing audio apps to run under the more dangerous SCHED_FIFO
+scheduling policy) is addressed by the new scheduler almost
+automatically: stronger negative nice levels are an automatic
+side-effect of the recalibrated dynamic range of nice levels.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index ba328f25541..ef19142896c 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -1,6 +1,6 @@
 Linux Magic System Request Key Hacks
 Documentation for sysrq.c
-Last update: 2007-MAR-14
+Last update: 2007-AUG-04
 
 *  What is the magic SysRq key?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -78,7 +78,7 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 'g'	- Used by kgdb on ppc and sh platforms.
 
 'h'     - Will display help (actually any other key than those listed
-          above will display help. but 'h' is easy to remember :-)
+          here will display help. but 'h' is easy to remember :-)
 
 'i'     - Send a SIGKILL to all processes, except for init.
 
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index d4f21ffd140..1af7bd5a218 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -396,7 +396,7 @@ void report(struct slabinfo *s)
 	if (strcmp(s->name, "*") == 0)
 		return;
 
-	printf("\nSlabcache: %-20s  Aliases: %2d Order : %2d Objects: %d\n",
+	printf("\nSlabcache: %-20s  Aliases: %2d Order : %2d Objects: %lu\n",
 		s->name, s->aliases, s->order, s->objects);
 	if (s->hwcache_align)
 		printf("** Hardware cacheline aligned\n");
diff --git a/MAINTAINERS b/MAINTAINERS
index fa52a307d89..d3a0684945b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -673,7 +673,7 @@ S:	Maintained
 AUDIT SUBSYSTEM
 P:	David Woodhouse
 M:	dwmw2@infradead.org
-L:	linux-audit@redhat.com
+L:	linux-audit@redhat.com (subscribers-only)
 W:	http://people.redhat.com/sgrubb/audit/
 T:	git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git
 S:	Maintained
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 1d3c1398c42..52c91ccc164 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -271,6 +271,19 @@ titan_dispatch_irqs(u64 mask)
  * Titan Family
  */
 static void __init
+titan_request_irq(unsigned int irq, irq_handler_t handler,
+		  unsigned long irqflags, const char *devname,
+		  void *dev_id)
+{
+	int err;
+	err = request_irq(irq, handler, irqflags, devname, dev_id);
+	if (err) {
+		printk("titan_request_irq for IRQ %d returned %d; ignoring\n",
+		       irq, err);
+	}
+}
+
+static void __init
 titan_late_init(void)
 {
 	/*
@@ -278,15 +291,15 @@ titan_late_init(void)
 	 * all reported to the kernel as machine checks, so the handler
 	 * is a nop so it can be called to count the individual events.
 	 */
-	request_irq(63+16, titan_intr_nop, IRQF_DISABLED,
+	titan_request_irq(63+16, titan_intr_nop, IRQF_DISABLED,
 		    "CChip Error", NULL);
-	request_irq(62+16, titan_intr_nop, IRQF_DISABLED,
+	titan_request_irq(62+16, titan_intr_nop, IRQF_DISABLED,
 		    "PChip 0 H_Error", NULL);
-	request_irq(61+16, titan_intr_nop, IRQF_DISABLED,
+	titan_request_irq(61+16, titan_intr_nop, IRQF_DISABLED,
 		    "PChip 1 H_Error", NULL);
-	request_irq(60+16, titan_intr_nop, IRQF_DISABLED,
+	titan_request_irq(60+16, titan_intr_nop, IRQF_DISABLED,
 		    "PChip 0 C_Error", NULL);
-	request_irq(59+16, titan_intr_nop, IRQF_DISABLED,
+	titan_request_irq(59+16, titan_intr_nop, IRQF_DISABLED,
 		    "PChip 1 C_Error", NULL);
 
 	/* 
@@ -345,9 +358,9 @@ privateer_init_pci(void)
 	 * Hook a couple of extra err interrupts that the
 	 * common titan code won't.
 	 */
-	request_irq(53+16, titan_intr_nop, IRQF_DISABLED,
+	titan_request_irq(53+16, titan_intr_nop, IRQF_DISABLED,
 		    "NMI", NULL);
-	request_irq(50+16, titan_intr_nop, IRQF_DISABLED,
+	titan_request_irq(50+16, titan_intr_nop, IRQF_DISABLED,
 		    "Temperature Warning", NULL);
 
 	/*
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index b45188f8512..673c860ffc2 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -31,6 +31,7 @@
 #include <linux/module.h>
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
+#include <linux/fs.h>
 
 static struct fs_struct init_fs = INIT_FS;
 static struct files_struct init_files = INIT_FILES;
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 5a51dd6ab28..6a7aefe4834 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -33,6 +33,8 @@
 #include <linux/user.h>
 #include <linux/a.out.h>
 #include <linux/uaccess.h>
+#include <linux/fs.h>
+#include <linux/err.h>
 
 #include <asm/blackfin.h>
 #include <asm/fixed_code.h>
diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c
index f5e1ae3d170..abcd14817d0 100644
--- a/arch/blackfin/kernel/sys_bfin.c
+++ b/arch/blackfin/kernel/sys_bfin.c
@@ -37,6 +37,7 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
+#include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/ipc.h>
 #include <linux/unistd.h>
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 8766bd612b4..792a8416fe1 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -31,6 +31,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
+#include <linux/fs.h>
 #include <asm/traps.h>
 #include <asm/cacheflush.h>
 #include <asm/blackfin.h>
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 4b41248b61a..6b4d026a00a 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -180,8 +180,6 @@ source "drivers/isdn/Kconfig"
 
 source "drivers/telephony/Kconfig"
 
-source "drivers/cdrom/Kconfig"
-
 #
 # input before char - char/joystick depends on it. As does USB.
 #
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 275673c192a..1e74f3c5cee 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1496,6 +1496,7 @@ sys_call_table:
 	.long sys_signalfd
 	.long sys_timerfd
 	.long sys_eventfd
+	.long sys_fallocate
 
 
 syscall_table_size = (. - sys_call_table)
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index c85598acb8f..1b66d5c70ea 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -11,6 +11,8 @@
 #include <asm/mce.h>
 #include <asm/nmi.h>
 
+#define MAX_PATCH_LEN (255-1)
+
 #ifdef CONFIG_HOTPLUG_CPU
 static int smp_alt_once;
 
@@ -148,7 +150,8 @@ static unsigned char** find_nop_table(void)
 
 #endif /* CONFIG_X86_64 */
 
-static void nop_out(void *insns, unsigned int len)
+/* Use this to add nops to a buffer, then text_poke the whole buffer. */
+static void add_nops(void *insns, unsigned int len)
 {
 	unsigned char **noptable = find_nop_table();
 
@@ -156,7 +159,7 @@ static void nop_out(void *insns, unsigned int len)
 		unsigned int noplen = len;
 		if (noplen > ASM_NOP_MAX)
 			noplen = ASM_NOP_MAX;
-		text_poke(insns, noptable[noplen], noplen);
+		memcpy(insns, noptable[noplen], noplen);
 		insns += noplen;
 		len -= noplen;
 	}
@@ -174,15 +177,15 @@ extern u8 *__smp_locks[], *__smp_locks_end[];
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
 	struct alt_instr *a;
-	u8 *instr;
-	int diff;
+	char insnbuf[MAX_PATCH_LEN];
 
 	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
 	for (a = start; a < end; a++) {
+		u8 *instr = a->instr;
 		BUG_ON(a->replacementlen > a->instrlen);
+		BUG_ON(a->instrlen > sizeof(insnbuf));
 		if (!boot_cpu_has(a->cpuid))
 			continue;
-		instr = a->instr;
 #ifdef CONFIG_X86_64
 		/* vsyscall code is not mapped yet. resolve it manually. */
 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
@@ -191,9 +194,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 				__FUNCTION__, a->instr, instr);
 		}
 #endif
-		memcpy(instr, a->replacement, a->replacementlen);
-		diff = a->instrlen - a->replacementlen;
-		nop_out(instr + a->replacementlen, diff);
+		memcpy(insnbuf, a->replacement, a->replacementlen);
+		add_nops(insnbuf + a->replacementlen,
+			 a->instrlen - a->replacementlen);
+		text_poke(instr, insnbuf, a->instrlen);
 	}
 }
 
@@ -215,16 +219,18 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
 {
 	u8 **ptr;
+	char insn[1];
 
 	if (noreplace_smp)
 		return;
 
+	add_nops(insn, 1);
 	for (ptr = start; ptr < end; ptr++) {
 		if (*ptr < text)
 			continue;
 		if (*ptr > text_end)
 			continue;
-		nop_out(*ptr, 1);
+		text_poke(*ptr, insn, 1);
 	};
 }
 
@@ -351,6 +357,7 @@ void apply_paravirt(struct paravirt_patch_site *start,
 		    struct paravirt_patch_site *end)
 {
 	struct paravirt_patch_site *p;
+	char insnbuf[MAX_PATCH_LEN];
 
 	if (noreplace_paravirt)
 		return;
@@ -358,13 +365,15 @@ void apply_paravirt(struct paravirt_patch_site *start,
 	for (p = start; p < end; p++) {
 		unsigned int used;
 
-		used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
-					  p->len);
+		BUG_ON(p->len > MAX_PATCH_LEN);
+		used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf,
+					  (unsigned long)p->instr, p->len);
 
 		BUG_ON(used > p->len);
 
 		/* Pad the rest with nops */
-		nop_out(p->instr + used, p->len - used);
+		add_nops(insnbuf + used, p->len - used);
+		text_poke(p->instr, insnbuf, p->len);
 	}
 }
 extern struct paravirt_patch_site __start_parainstructions[],
@@ -379,7 +388,7 @@ void __init alternative_instructions(void)
 	   that might execute the to be patched code.
 	   Other CPUs are not running. */
 	stop_nmi();
-#ifdef CONFIG_MCE
+#ifdef CONFIG_X86_MCE
 	stop_mce();
 #endif
 
@@ -417,7 +426,7 @@ void __init alternative_instructions(void)
 	local_irq_restore(flags);
 
 	restart_nmi();
-#ifdef CONFIG_MCE
+#ifdef CONFIG_X86_MCE
 	restart_mce();
 #endif
 }
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index bfc6cb7df7e..f9fff29e01a 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -61,8 +61,9 @@ static int enable_local_apic __initdata = 0;
 
 /* Local APIC timer verification ok */
 static int local_apic_timer_verify_ok;
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int local_apic_timer_disabled;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk
+   or using CPU MSR check */
+int local_apic_timer_disabled;
 /* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -370,12 +371,9 @@ void __init setup_boot_APIC_clock(void)
 	long delta, deltapm;
 	int pm_referenced = 0;
 
-	if (boot_cpu_has(X86_FEATURE_LAPIC_TIMER_BROKEN))
-		local_apic_timer_disabled = 1;
-
 	/*
 	 * The local apic timer can be disabled via the kernel
-	 * commandline or from the test above. Register the lapic
+	 * commandline or from the CPU detection code. Register the lapic
 	 * timer as a dummy clock event source on SMP systems, so the
 	 * broadcast mechanism is used. On UP systems simply ignore it.
 	 */
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index c7ba455d5ac..dcf6bbb1c7c 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -3,6 +3,7 @@
 #include <linux/mm.h>
 #include <asm/io.h>
 #include <asm/processor.h>
+#include <asm/apic.h>
 
 #include "cpu.h"
 
@@ -22,6 +23,7 @@
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
+#ifdef CONFIG_X86_LOCAL_APIC
 #define ENABLE_C1E_MASK         0x18000000
 #define CPUID_PROCESSOR_SIGNATURE       1
 #define CPUID_XFAM              0x0ff00000
@@ -52,6 +54,7 @@ static __cpuinit int amd_apic_timer_broken(void)
         }
 	return 0;
 }
+#endif
 
 int force_mwait __cpuinitdata;
 
@@ -282,8 +285,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 			num_cache_leaves = 3;
 	}
 
+#ifdef CONFIG_X86_LOCAL_APIC
 	if (amd_apic_timer_broken())
-		set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
+		local_apic_timer_disabled = 1;
+#endif
 
 	if (c->x86 == 0x10 && !force_mwait)
 		clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index 265c5597efb..40978af630e 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -13,7 +13,7 @@
 static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
 #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
 
-#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000)
+#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
 
 static void doublefault_fn(void)
 {
@@ -23,23 +23,23 @@ static void doublefault_fn(void)
 	store_gdt(&gdt_desc);
 	gdt = gdt_desc.address;
 
-	printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
+	printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
 
 	if (ptr_ok(gdt)) {
 		gdt += GDT_ENTRY_TSS << 3;
 		tss = *(u16 *)(gdt+2);
 		tss += *(u8 *)(gdt+4) << 16;
 		tss += *(u8 *)(gdt+7) << 24;
-		printk("double fault, tss at %08lx\n", tss);
+		printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
 
 		if (ptr_ok(tss)) {
 			struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
 
-			printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
+			printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp);
 
-			printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
+			printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
 				t->eax, t->ebx, t->ecx, t->edx);
-			printk("esi = %08lx, edi = %08lx\n",
+			printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
 				t->esi, t->edi);
 		}
 	}
@@ -63,6 +63,7 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
 		.cs		= __KERNEL_CS,
 		.ss		= __KERNEL_DS,
 		.ds		= __USER_DS,
+		.fs		= __KERNEL_PERCPU,
 
 		.__cr3		= __pa(swapper_pg_dir)
 	}
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 7c52b222207..8f0382161c9 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -162,9 +162,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
  * which will be freed later
  */
 
-#ifdef CONFIG_HOTPLUG_CPU
-.section .text,"ax",@progbits
-#else
+#ifndef CONFIG_HOTPLUG_CPU
 .section .init.text,"ax",@progbits
 #endif
 
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index ea962c0667d..739cfb207dd 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -69,7 +69,8 @@ DEF_NATIVE(read_tsc, "rdtsc");
 
 DEF_NATIVE(ud2a, "ud2a");
 
-static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
+static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+			     unsigned long addr, unsigned len)
 {
 	const unsigned char *start, *end;
 	unsigned ret;
@@ -90,7 +91,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
 #undef SITE
 
 	patch_site:
-		ret = paravirt_patch_insns(insns, len, start, end);
+		ret = paravirt_patch_insns(ibuf, len, start, end);
 		break;
 
 	case PARAVIRT_PATCH(make_pgd):
@@ -107,7 +108,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
 		break;
 
 	default:
-		ret = paravirt_patch_default(type, clobbers, insns, len);
+		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
 		break;
 	}
 
@@ -129,68 +130,67 @@ struct branch {
 	u32 delta;
 } __attribute__((packed));
 
-unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
-			     void *site, u16 site_clobbers,
+unsigned paravirt_patch_call(void *insnbuf,
+			     const void *target, u16 tgt_clobbers,
+			     unsigned long addr, u16 site_clobbers,
 			     unsigned len)
 {
-	unsigned char *call = site;
-	unsigned long delta = (unsigned long)target - (unsigned long)(call+5);
-	struct branch b;
+	struct branch *b = insnbuf;
+	unsigned long delta = (unsigned long)target - (addr+5);
 
 	if (tgt_clobbers & ~site_clobbers)
 		return len;	/* target would clobber too much for this site */
 	if (len < 5)
 		return len;	/* call too long for patch site */
 
-	b.opcode = 0xe8; /* call */
-	b.delta = delta;
-	BUILD_BUG_ON(sizeof(b) != 5);
-	text_poke(call, (unsigned char *)&b, 5);
+	b->opcode = 0xe8; /* call */
+	b->delta = delta;
+	BUILD_BUG_ON(sizeof(*b) != 5);
 
 	return 5;
 }
 
-unsigned paravirt_patch_jmp(void *target, void *site, unsigned len)
+unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
+			    unsigned long addr, unsigned len)
 {
-	unsigned char *jmp = site;
-	unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5);
-	struct branch b;
+	struct branch *b = insnbuf;
+	unsigned long delta = (unsigned long)target - (addr+5);
 
 	if (len < 5)
 		return len;	/* call too long for patch site */
 
-	b.opcode = 0xe9;	/* jmp */
-	b.delta = delta;
-	text_poke(jmp, (unsigned char *)&b, 5);
+	b->opcode = 0xe9;	/* jmp */
+	b->delta = delta;
 
 	return 5;
 }
 
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len)
+unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
+				unsigned long addr, unsigned len)
 {
 	void *opfunc = *((void **)&paravirt_ops + type);
 	unsigned ret;
 
 	if (opfunc == NULL)
 		/* If there's no function, patch it with a ud2a (BUG) */
-		ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a);
+		ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a);
 	else if (opfunc == paravirt_nop)
 		/* If the operation is a nop, then nop the callsite */
 		ret = paravirt_patch_nop();
 	else if (type == PARAVIRT_PATCH(iret) ||
 		 type == PARAVIRT_PATCH(irq_enable_sysexit))
 		/* If operation requires a jmp, then jmp */
-		ret = paravirt_patch_jmp(opfunc, site, len);
+		ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len);
 	else
 		/* Otherwise call the function; assume target could
 		   clobber any caller-save reg */
-		ret = paravirt_patch_call(opfunc, CLBR_ANY,
-					  site, clobbers, len);
+		ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY,
+					  addr, clobbers, len);
 
 	return ret;
 }
 
-unsigned paravirt_patch_insns(void *site, unsigned len,
+unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 			      const char *start, const char *end)
 {
 	unsigned insn_len = end - start;
@@ -198,7 +198,7 @@ unsigned paravirt_patch_insns(void *site, unsigned len,
 	if (insn_len > len || start == NULL)
 		insn_len = len;
 	else
-		memcpy(site, start, insn_len);
+		memcpy(insnbuf, start, insn_len);
 
 	return insn_len;
 }
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index 72042bb7ec9..18673e0f193 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -87,12 +87,14 @@ struct vmi_timer_ops vmi_timer_ops;
 #define IRQ_PATCH_INT_MASK 0
 #define IRQ_PATCH_DISABLE  5
 
-static inline void patch_offset(unsigned char *eip, unsigned char *dest)
+static inline void patch_offset(void *insnbuf,
+				unsigned long eip, unsigned long dest)
 {
-        *(unsigned long *)(eip+1) = dest-eip-5;
+        *(unsigned long *)(insnbuf+1) = dest-eip-5;
 }
 
-static unsigned patch_internal(int call, unsigned len, void *insns)
+static unsigned patch_internal(int call, unsigned len, void *insnbuf,
+			       unsigned long eip)
 {
 	u64 reloc;
 	struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
@@ -100,14 +102,14 @@ static unsigned patch_internal(int call, unsigned len, void *insns)
 	switch(rel->type) {
 		case VMI_RELOCATION_CALL_REL:
 			BUG_ON(len < 5);
-			*(char *)insns = MNEM_CALL;
-			patch_offset(insns, rel->eip);
+			*(char *)insnbuf = MNEM_CALL;
+			patch_offset(insnbuf, eip, (unsigned long)rel->eip);
 			return 5;
 
 		case VMI_RELOCATION_JUMP_REL:
 			BUG_ON(len < 5);
-			*(char *)insns = MNEM_JMP;
-			patch_offset(insns, rel->eip);
+			*(char *)insnbuf = MNEM_JMP;
+			patch_offset(insnbuf, eip, (unsigned long)rel->eip);
 			return 5;
 
 		case VMI_RELOCATION_NOP:
@@ -128,21 +130,26 @@ static unsigned patch_internal(int call, unsigned len, void *insns)
  * Apply patch if appropriate, return length of new instruction
  * sequence.  The callee does nop padding for us.
  */
-static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len)
+static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
+			  unsigned long eip, unsigned len)
 {
 	switch (type) {
 		case PARAVIRT_PATCH(irq_disable):
-			return patch_internal(VMI_CALL_DisableInterrupts, len, insns);
+			return patch_internal(VMI_CALL_DisableInterrupts, len,
+					      insns, eip);
 		case PARAVIRT_PATCH(irq_enable):
-			return patch_internal(VMI_CALL_EnableInterrupts, len, insns);
+			return patch_internal(VMI_CALL_EnableInterrupts, len,
+					      insns, eip);
 		case PARAVIRT_PATCH(restore_fl):
-			return patch_internal(VMI_CALL_SetInterruptMask, len, insns);
+			return patch_internal(VMI_CALL_SetInterruptMask, len,
+					      insns, eip);
 		case PARAVIRT_PATCH(save_fl):
-			return patch_internal(VMI_CALL_GetInterruptMask, len, insns);
+			return patch_internal(VMI_CALL_GetInterruptMask, len,
+					      insns, eip);
 		case PARAVIRT_PATCH(iret):
-			return patch_internal(VMI_CALL_IRET, len, insns);
+			return patch_internal(VMI_CALL_IRET, len, insns, eip);
 		case PARAVIRT_PATCH(irq_enable_sysexit):
-			return patch_internal(VMI_CALL_SYSEXIT, len, insns);
+			return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
 		default:
 			break;
 	}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 8927222b3ab..4241a74d16c 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -82,7 +82,7 @@ static void flush_kernel_map(void *arg)
 	struct page *p;
 
 	/* High level code is not ready for clflush yet */
-	if (cpu_has_clflush) {
+	if (0 && cpu_has_clflush) {
 		list_for_each_entry (p, lh, lru)
 			cache_flush_page(p);
 	} else if (boot_cpu_data.x86_model >= 4)
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 85503deeda4..ebc6f3c6634 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -455,3 +455,26 @@ void pcibios_disable_device (struct pci_dev *dev)
 	if (!dev->msi_enabled && pcibios_disable_irq)
 		pcibios_disable_irq(dev);
 }
+
+struct pci_bus *pci_scan_bus_with_sysdata(int busno)
+{
+	struct pci_bus *bus = NULL;
+	struct pci_sysdata *sd;
+
+	/*
+	 * Allocate per-root-bus (not per bus) arch-specific data.
+	 * TODO: leak; this memory is never freed.
+	 * It's arguable whether it's worth the trouble to care.
+	 */
+	sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+	if (!sd) {
+		printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno);
+		return NULL;
+	}
+	sd->node = -1;
+	bus = pci_scan_bus(busno, &pci_root_ops, sd);
+	if (!bus)
+		kfree(sd);
+
+	return bus;
+}
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c
index e7306dbf6c4..c82cbf4c722 100644
--- a/arch/i386/pci/fixup.c
+++ b/arch/i386/pci/fixup.c
@@ -25,9 +25,9 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 		pci_read_config_byte(d, reg++, &subb);
 		DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
 		if (busno)
-			pci_scan_bus(busno, &pci_root_ops, NULL);	/* Bus A */
+			pci_scan_bus_with_sysdata(busno);	/* Bus A */
 		if (suba < subb)
-			pci_scan_bus(suba+1, &pci_root_ops, NULL);	/* Bus B */
+			pci_scan_bus_with_sysdata(suba+1);	/* Bus B */
 	}
 	pcibios_last_bus = -1;
 }
@@ -42,7 +42,7 @@ static void __devinit pci_fixup_i450gx(struct pci_dev *d)
 	u8 busno;
 	pci_read_config_byte(d, 0x4a, &busno);
 	printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno);
-	pci_scan_bus(busno, &pci_root_ops, NULL);
+	pci_scan_bus_with_sysdata(busno);
 	pcibios_last_bus = -1;
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx);
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index f2cb942f828..665db063a40 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -138,8 +138,9 @@ static void __init pirq_peer_trick(void)
 	for(i = 1; i < 256; i++) {
 		if (!busmap[i] || pci_find_bus(0, i))
 			continue;
-		if (pci_scan_bus(i, &pci_root_ops, NULL))
-			printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
+		if (pci_scan_bus_with_sysdata(i))
+			printk(KERN_INFO "PCI: Discovered primary peer "
+			       "bus %02x [IRQ]\n", i);
 	}
 	pcibios_last_bus = -1;
 }
diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c
index 149a9588c25..5565d7016b7 100644
--- a/arch/i386/pci/legacy.c
+++ b/arch/i386/pci/legacy.c
@@ -26,7 +26,7 @@ static void __devinit pcibios_fixup_peer_bridges(void)
 			    l != 0x0000 && l != 0xffff) {
 				DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
 				printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
-				pci_scan_bus(n, &pci_root_ops, NULL);
+				pci_scan_bus_with_sysdata(n);
 				break;
 			}
 		}
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index bb1afd9e589..0d46b7a88b3 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -82,16 +82,15 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
 
 	switch (len) {
 	case 1:
-		*value = readb(mmcfg_virt_addr + reg);
+		*value = mmio_config_readb(mmcfg_virt_addr + reg);
 		break;
 	case 2:
-		*value = readw(mmcfg_virt_addr + reg);
+		*value = mmio_config_readw(mmcfg_virt_addr + reg);
 		break;
 	case 4:
-		*value = readl(mmcfg_virt_addr + reg);
+		*value = mmio_config_readl(mmcfg_virt_addr + reg);
 		break;
 	}
-
 	spin_unlock_irqrestore(&pci_config_lock, flags);
 
 	return 0;
@@ -116,16 +115,15 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
 
 	switch (len) {
 	case 1:
-		writeb(value, mmcfg_virt_addr + reg);
+		mmio_config_writeb(mmcfg_virt_addr, value);
 		break;
 	case 2:
-		writew(value, mmcfg_virt_addr + reg);
+		mmio_config_writew(mmcfg_virt_addr, value);
 		break;
 	case 4:
-		writel(value, mmcfg_virt_addr + reg);
+		mmio_config_writel(mmcfg_virt_addr, value);
 		break;
 	}
-
 	spin_unlock_irqrestore(&pci_config_lock, flags);
 
 	return 0;
diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c
index adbe17a38f6..f5f165f69e0 100644
--- a/arch/i386/pci/numa.c
+++ b/arch/i386/pci/numa.c
@@ -96,10 +96,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
 		pci_read_config_byte(d, reg++, &suba);
 		pci_read_config_byte(d, reg++, &subb);
 		DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
-		if (busno)
-			pci_scan_bus(QUADLOCAL2BUS(quad,busno), &pci_root_ops, NULL);	/* Bus A */
-		if (suba < subb)
-			pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), &pci_root_ops, NULL);	/* Bus B */
+		if (busno) {
+			/* Bus A */
+			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno));
+		}
+		if (suba < subb) {
+			/* Bus B */
+			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1));
+		}
 	}
 	pcibios_last_bus = -1;
 }
@@ -123,8 +127,7 @@ static int __init pci_numa_init(void)
 				continue;
 			printk("Scanning PCI bus %d for quad %d\n", 
 				QUADLOCAL2BUS(quad,0), quad);
-			pci_scan_bus(QUADLOCAL2BUS(quad,0), 
-				&pci_root_ops, NULL);
+			pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0));
 		}
 	return 0;
 }
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index e58bae2076a..8c66f275756 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -104,3 +104,46 @@ extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
 extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
 					   unsigned int devfn);
 extern int __init pci_mmcfg_arch_init(void);
+
+/*
+ * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
+ * on their northbrige except through the * %eax register. As such, you MUST
+ * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
+ * accessor functions.
+ * In fact just use pci_config_*, nothing else please.
+ */
+static inline unsigned char mmio_config_readb(void __iomem *pos)
+{
+	u8 val;
+	asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline unsigned short mmio_config_readw(void __iomem *pos)
+{
+	u16 val;
+	asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline unsigned int mmio_config_readl(void __iomem *pos)
+{
+	u32 val;
+	asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
+	return val;
+}
+
+static inline void mmio_config_writeb(void __iomem *pos, u8 val)
+{
+	asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory");
+}
+
+static inline void mmio_config_writew(void __iomem *pos, u16 val)
+{
+	asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory");
+}
+
+static inline void mmio_config_writel(void __iomem *pos, u32 val)
+{
+	asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory");
+}
diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c
index f1b486d4190..8ecb1c72259 100644
--- a/arch/i386/pci/visws.c
+++ b/arch/i386/pci/visws.c
@@ -101,8 +101,8 @@ static int __init pcibios_init(void)
 		"bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0);
 
 	raw_pci_ops = &pci_direct_conf1;
-	pci_scan_bus(pci_bus0, &pci_root_ops, NULL);
-	pci_scan_bus(pci_bus1, &pci_root_ops, NULL);
+	pci_scan_bus_with_sysdata(pci_bus0);
+	pci_scan_bus_with_sysdata(pci_bus1);
 	pci_fixup_irqs(visws_swizzle, visws_map_irq);
 	pcibios_resource_survey();
 	return 0;
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c
index 9a8c1181c00..f0c37511d8d 100644
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -842,7 +842,8 @@ void __init xen_setup_vcpu_info_placement(void)
 	}
 }
 
-static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
+static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
+			  unsigned long addr, unsigned len)
 {
 	char *start, *end, *reloc;
 	unsigned ret;
@@ -869,7 +870,7 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
 		if (start == NULL || (end-start) > len)
 			goto default_patch;
 
-		ret = paravirt_patch_insns(insns, len, start, end);
+		ret = paravirt_patch_insns(insnbuf, len, start, end);
 
 		/* Note: because reloc is assigned from something that
 		   appears to be an array, gcc assumes it's non-null,
@@ -877,8 +878,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
 		   end. */
 		if (reloc > start && reloc < end) {
 			int reloc_off = reloc - start;
-			long *relocp = (long *)(insns + reloc_off);
-			long delta = start - (char *)insns;
+			long *relocp = (long *)(insnbuf + reloc_off);
+			long delta = start - (char *)addr;
 
 			*relocp += delta;
 		}
@@ -886,7 +887,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
 
 	default_patch:
 	default:
-		ret = paravirt_patch_default(type, clobbers, insns, len);
+		ret = paravirt_patch_default(type, clobbers, insnbuf,
+					     addr, len);
 		break;
 	}
 
diff --git a/arch/sparc/kernel/prom.c b/arch/sparc/kernel/prom.c
index 39fbd3c8ab0..cd4fb79aa3a 100644
--- a/arch/sparc/kernel/prom.c
+++ b/arch/sparc/kernel/prom.c
@@ -102,6 +102,21 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
 }
 EXPORT_SYMBOL(of_set_property);
 
+int of_find_in_proplist(const char *list, const char *match, int len)
+{
+	while (len > 0) {
+		int l;
+
+		if (!strcmp(list, match))
+			return 1;
+		l = strlen(list) + 1;
+		list += l;
+		len -= l;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(of_find_in_proplist);
+
 static unsigned int prom_early_allocated;
 
 static void * __init prom_early_alloc(unsigned long size)
diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c
index 7eb81d3954d..e43db73f2b9 100644
--- a/arch/sparc64/kernel/cpu.c
+++ b/arch/sparc64/kernel/cpu.c
@@ -1,7 +1,7 @@
 /* cpu.c: Dinky routines to look for the kind of Sparc cpu
  *        we are on.
  *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
  */
 
 #include <linux/kernel.h>
@@ -13,6 +13,7 @@
 #include <asm/fpumacro.h>
 #include <asm/cpudata.h>
 #include <asm/spitfire.h>
+#include <asm/oplib.h>
 
 DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
 
@@ -61,21 +62,40 @@ struct cpu_iu_info linux_sparc_chips[] = {
 
 #define NSPARCCHIPS  ARRAY_SIZE(linux_sparc_chips)
 
-char *sparc_cpu_type = "cpu-oops";
-char *sparc_fpu_type = "fpu-oops";
+char *sparc_cpu_type;
+char *sparc_fpu_type;
 
 unsigned int fsr_storage;
 
+static void __init sun4v_cpu_probe(void)
+{
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_NIAGARA1:
+		sparc_cpu_type = "UltraSparc T1 (Niagara)";
+		sparc_fpu_type = "UltraSparc T1 integrated FPU";
+		break;
+
+	case SUN4V_CHIP_NIAGARA2:
+		sparc_cpu_type = "UltraSparc T2 (Niagara2)";
+		sparc_fpu_type = "UltraSparc T2 integrated FPU";
+		break;
+
+	default:
+		printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
+		       prom_cpu_compatible);
+		sparc_cpu_type = "Unknown SUN4V CPU";
+		sparc_fpu_type = "Unknown SUN4V FPU";
+		break;
+	}
+}
+
 void __init cpu_probe(void)
 {
 	unsigned long ver, fpu_vers, manuf, impl, fprs;
 	int i;
 	
-	if (tlb_type == hypervisor) {
-		sparc_cpu_type = "UltraSparc T1 (Niagara)";
-		sparc_fpu_type = "UltraSparc T1 integrated FPU";
-		return;
-	}
+	if (tlb_type == hypervisor)
+		return sun4v_cpu_probe();
 
 	fprs = fprs_read();
 	fprs_write(FPRS_FEF);
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 9dbd833d79d..ac18bd8e273 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -97,7 +97,8 @@ sparc64_boot:
 	.globl	prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
 	.globl	prom_boot_mapped_pc, prom_boot_mapping_mode
 	.globl	prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
-	.globl	is_sun4v
+	.globl	prom_compatible_name, prom_cpu_path, prom_cpu_compatible
+	.globl	is_sun4v, sun4v_chip_type
 prom_peer_name:
 	.asciz	"peer"
 prom_compatible_name:
@@ -106,6 +107,8 @@ prom_finddev_name:
 	.asciz	"finddevice"
 prom_chosen_path:
 	.asciz	"/chosen"
+prom_cpu_path:
+	.asciz	"/cpu"
 prom_getprop_name:
 	.asciz	"getprop"
 prom_mmu_name:
@@ -120,9 +123,13 @@ prom_unmap_name:
 	.asciz	"unmap"
 prom_sun4v_name:
 	.asciz	"sun4v"
+prom_niagara_prefix:
+	.asciz	"SUNW,UltraSPARC-T"
 	.align	4
 prom_root_compatible:
 	.skip	64
+prom_cpu_compatible:
+	.skip	64
 prom_root_node:
 	.word	0
 prom_mmu_ihandle_cache:
@@ -138,6 +145,8 @@ prom_boot_mapping_phys_low:
 	.xword	0
 is_sun4v:
 	.word	0
+sun4v_chip_type:
+	.word	SUN4V_CHIP_INVALID
 1:
 	rd	%pc, %l0
 
@@ -296,13 +305,13 @@ is_sun4v:
 	sethi	%hi(prom_sun4v_name), %g7
 	or	%g7, %lo(prom_sun4v_name), %g7
 	mov	5, %g3
-1:	ldub	[%g7], %g2
+90:	ldub	[%g7], %g2
 	ldub	[%g1], %g4
 	cmp	%g2, %g4
-	bne,pn	%icc, 2f
+	bne,pn	%icc, 80f
 	 add	%g7, 1, %g7
 	subcc	%g3, 1, %g3
-	bne,pt	%xcc, 1b
+	bne,pt	%xcc, 90b
 	 add	%g1, 1, %g1
 
 	sethi	%hi(is_sun4v), %g1
@@ -310,7 +319,80 @@ is_sun4v:
 	mov	1, %g7
 	stw	%g7, [%g1]
 
-2:
+	/* cpu_node = prom_finddevice("/cpu") */
+	mov	(1b - prom_finddev_name), %l1
+	mov	(1b - prom_cpu_path), %l2
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%sp, (192 + 128), %sp
+
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "finddevice"
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, "/cpu"
+	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x20], %l4	! cpu device node
+
+	mov	(1b - prom_getprop_name), %l1
+	mov	(1b - prom_compatible_name), %l2
+	mov	(1b - prom_cpu_compatible), %l5
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l5, %l5
+
+	/* prom_getproperty(cpu_node, "compatible",
+	 *                  &prom_cpu_compatible, 64)
+	 */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, cpu_node
+	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "compatible"
+	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_cpu_compatible
+	mov	64, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, size
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	add	%sp, (192 + 128), %sp
+
+	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	sethi	%hi(prom_niagara_prefix), %g7
+	or	%g7, %lo(prom_niagara_prefix), %g7
+	mov	17, %g3
+90:	ldub	[%g7], %g2
+	ldub	[%g1], %g4
+	cmp	%g2, %g4
+	bne,pn	%icc, 4f
+	 add	%g7, 1, %g7
+	subcc	%g3, 1, %g3
+	bne,pt	%xcc, 90b
+	 add	%g1, 1, %g1
+
+	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	ldub	[%g1 + 17], %g2
+	cmp	%g2, '1'
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA1, %g4
+	cmp	%g2, '2'
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA2, %g4
+4:
+	mov	SUN4V_CHIP_UNKNOWN, %g4
+5:	sethi	%hi(sun4v_chip_type), %g2
+	or	%g2, %lo(sun4v_chip_type), %g2
+	stw	%g4, [%g2]
+
+80:
 	BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
 	BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
 	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
@@ -414,6 +496,24 @@ niagara_tlb_fixup:
 	stw	%g2, [%g1 + %lo(tlb_type)]
 
 	/* Patch copy/clear ops.  */
+	sethi	%hi(sun4v_chip_type), %g1
+	lduw	[%g1 + %lo(sun4v_chip_type)], %g1
+	cmp	%g1, SUN4V_CHIP_NIAGARA1
+	be,pt	%xcc, niagara_patch
+	 cmp	%g1, SUN4V_CHIP_NIAGARA2
+	be,pt	%xcc, niagara_patch
+	 nop
+
+	call	generic_patch_copyops
+	 nop
+	call	generic_patch_bzero
+	 nop
+	call	generic_patch_pageops
+	 nop
+
+	ba,a,pt	%xcc, 80f
+
+niagara_patch:
 	call	niagara_patch_copyops
 	 nop
 	call	niagara_patch_bzero
@@ -421,6 +521,7 @@ niagara_tlb_fixup:
 	call	niagara_patch_pageops
 	 nop
 
+80:
 	/* Patch TLB/cache ops.  */
 	call	hypervisor_patch_cachetlbops
 	 nop
diff --git a/arch/sparc64/kernel/hvtramp.S b/arch/sparc64/kernel/hvtramp.S
index a55c252e18c..b692e044a46 100644
--- a/arch/sparc64/kernel/hvtramp.S
+++ b/arch/sparc64/kernel/hvtramp.S
@@ -115,11 +115,8 @@ hv_cpu_startup:
 	call		hard_smp_processor_id
 	 nop
 
-	mov		%o0, %o1
-	mov		0, %o0
-	mov		0, %o2
-	call		sun4v_init_mondo_queues
-	 mov		1, %o3
+	call		sun4v_register_mondo_queues
+	 nop
 
 	call		init_cur_cpu_trap
 	 mov		%g6, %o0
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index db31bf6b42d..384abf410cf 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -929,7 +929,7 @@ static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type
 	}
 }
 
-static void __cpuinit sun4v_register_mondo_queues(int this_cpu)
+void __cpuinit sun4v_register_mondo_queues(int this_cpu)
 {
 	struct trap_per_cpu *tb = &trap_block[this_cpu];
 
@@ -943,20 +943,10 @@ static void __cpuinit sun4v_register_mondo_queues(int this_cpu)
 			   tb->nonresum_qmask);
 }
 
-static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem)
+static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
 {
 	unsigned long size = PAGE_ALIGN(qmask + 1);
-	unsigned long order = get_order(size);
-	void *p = NULL;
-
-	if (use_bootmem) {
-		p = __alloc_bootmem_low(size, size, 0);
-	} else {
-		struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order);
-		if (page)
-			p = page_address(page);
-	}
-
+	void *p = __alloc_bootmem_low(size, size, 0);
 	if (!p) {
 		prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
 		prom_halt();
@@ -965,19 +955,10 @@ static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask
 	*pa_ptr = __pa(p);
 }
 
-static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem)
+static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
 {
 	unsigned long size = PAGE_ALIGN(qmask + 1);
-	unsigned long order = get_order(size);
-	void *p = NULL;
-
-	if (use_bootmem) {
-		p = __alloc_bootmem_low(size, size, 0);
-	} else {
-		struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order);
-		if (page)
-			p = page_address(page);
-	}
+	void *p = __alloc_bootmem_low(size, size, 0);
 
 	if (!p) {
 		prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
@@ -987,18 +968,14 @@ static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask,
 	*pa_ptr = __pa(p);
 }
 
-static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem)
+static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
 {
 #ifdef CONFIG_SMP
 	void *page;
 
 	BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
 
-	if (use_bootmem)
-		page = alloc_bootmem_low_pages(PAGE_SIZE);
-	else
-		page = (void *) get_zeroed_page(GFP_ATOMIC);
-
+	page = alloc_bootmem_low_pages(PAGE_SIZE);
 	if (!page) {
 		prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
 		prom_halt();
@@ -1009,30 +986,27 @@ static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_
 #endif
 }
 
-/* Allocate and register the mondo and error queues for this cpu.  */
-void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load)
+/* Allocate mondo and error queues for all possible cpus.  */
+static void __init sun4v_init_mondo_queues(void)
 {
-	struct trap_per_cpu *tb = &trap_block[cpu];
+	int cpu;
 
-	if (alloc) {
-		alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask, use_bootmem);
-		alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask, use_bootmem);
-		alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask, use_bootmem);
-		alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask, use_bootmem);
-		alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask, use_bootmem);
-		alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, tb->nonresum_qmask, use_bootmem);
+	for_each_possible_cpu(cpu) {
+		struct trap_per_cpu *tb = &trap_block[cpu];
 
-		init_cpu_send_mondo_info(tb, use_bootmem);
-	}
+		alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
+		alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
+		alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
+		alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
+		alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
+		alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
+			       tb->nonresum_qmask);
 
-	if (load) {
-		if (cpu != hard_smp_processor_id()) {
-			prom_printf("SUN4V: init mondo on cpu %d not %d\n",
-				    cpu, hard_smp_processor_id());
-			prom_halt();
-		}
-		sun4v_register_mondo_queues(cpu);
+		init_cpu_send_mondo_info(tb);
 	}
+
+	/* Load up the boot cpu's entries.  */
+	sun4v_register_mondo_queues(hard_smp_processor_id());
 }
 
 static struct irqaction timer_irq_action = {
@@ -1047,7 +1021,7 @@ void __init init_IRQ(void)
 	memset(&ivector_table[0], 0, sizeof(ivector_table));
 
 	if (tlb_type == hypervisor)
-		sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1);
+		sun4v_init_mondo_queues();
 
 	/* We need to clear any IRQ's pending in the soft interrupt
 	 * registers, a spurious one could be left around from the
diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c
index cce4d0ddf5d..95059c2ec41 100644
--- a/arch/sparc64/kernel/mdesc.c
+++ b/arch/sparc64/kernel/mdesc.c
@@ -568,20 +568,6 @@ static void __init report_platform_properties(void)
 	mdesc_release(hp);
 }
 
-static int inline find_in_proplist(const char *list, const char *match, int len)
-{
-	while (len > 0) {
-		int l;
-
-		if (!strcmp(list, match))
-			return 1;
-		l = strlen(list) + 1;
-		list += l;
-		len -= l;
-	}
-	return 0;
-}
-
 static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
 					struct mdesc_handle *hp,
 					u64 mp)
@@ -596,10 +582,10 @@ static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
 
 	switch (*level) {
 	case 1:
-		if (find_in_proplist(type, "instn", type_len)) {
+		if (of_find_in_proplist(type, "instn", type_len)) {
 			c->icache_size = *size;
 			c->icache_line_size = *line_size;
-		} else if (find_in_proplist(type, "data", type_len)) {
+		} else if (of_find_in_proplist(type, "data", type_len)) {
 			c->dcache_size = *size;
 			c->dcache_line_size = *line_size;
 		}
@@ -677,7 +663,7 @@ static void __devinit set_core_ids(struct mdesc_handle *hp)
 			continue;
 
 		type = mdesc_get_property(hp, mp, "type", &len);
-		if (!find_in_proplist(type, "instn", len))
+		if (!of_find_in_proplist(type, "instn", len))
 			continue;
 
 		mark_core_ids(hp, mp, idx);
@@ -718,8 +704,8 @@ static void __devinit __set_proc_ids(struct mdesc_handle *hp,
 		int len;
 
 		type = mdesc_get_property(hp, mp, "type", &len);
-		if (!find_in_proplist(type, "int", len) &&
-		    !find_in_proplist(type, "integer", len))
+		if (!of_find_in_proplist(type, "int", len) &&
+		    !of_find_in_proplist(type, "integer", len))
 			continue;
 
 		mark_proc_ids(hp, mp, idx);
diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c
index f4e0a9ad9be..d1a78c976ce 100644
--- a/arch/sparc64/kernel/prom.c
+++ b/arch/sparc64/kernel/prom.c
@@ -107,6 +107,21 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
 }
 EXPORT_SYMBOL(of_set_property);
 
+int of_find_in_proplist(const char *list, const char *match, int len)
+{
+	while (len > 0) {
+		int l;
+
+		if (!strcmp(list, match))
+			return 1;
+		l = strlen(list) + 1;
+		list += l;
+		len -= l;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(of_find_in_proplist);
+
 static unsigned int prom_early_allocated;
 
 static void * __init prom_early_alloc(unsigned long size)
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index b448d33321c..b84c49e3697 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -334,8 +334,6 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
 }
 #endif
 
-extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
-
 extern unsigned long sparc64_cpu_startup;
 
 /* The OBP cpu startup callback truncates the 3rd arg cookie to
@@ -359,9 +357,6 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
 	cpu_new_thread = task_thread_info(p);
 
 	if (tlb_type == hypervisor) {
-		/* Alloc the mondo queues, cpu will load them.  */
-		sun4v_init_mondo_queues(0, cpu, 1, 0);
-
 #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
 		if (ldom_domaining_enabled)
 			ldom_startcpu_cpuid(cpu,
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index d270c2f0be0..23fad7ebdd0 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -168,6 +168,7 @@ EXPORT_SYMBOL(change_bit);
 EXPORT_SYMBOL(__flushw_user);
 
 EXPORT_SYMBOL(tlb_type);
+EXPORT_SYMBOL(sun4v_chip_type);
 EXPORT_SYMBOL(get_fb_unmapped_area);
 EXPORT_SYMBOL(flush_icache_range);
 
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index a4dc01a3d23..9448595f906 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -366,11 +366,8 @@ after_lock_tlb:
 	call		hard_smp_processor_id
 	 nop
 	
-	mov		%o0, %o1
-	mov		0, %o0
-	mov		0, %o2
-	call		sun4v_init_mondo_queues
-	 mov		1, %o3
+	call		sun4v_register_mondo_queues
+	 nop
 
 1:	call		init_cur_cpu_trap
 	 ldx		[%l0], %o0
diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c
index 3685daf5157..1550ac5673d 100644
--- a/arch/sparc64/kernel/vio.c
+++ b/arch/sparc64/kernel/vio.c
@@ -16,21 +16,6 @@
 #include <asm/mdesc.h>
 #include <asm/vio.h>
 
-static inline int find_in_proplist(const char *list, const char *match,
-				   int len)
-{
-	while (len > 0) {
-		int l;
-
-		if (!strcmp(list, match))
-			return 1;
-		l = strlen(list) + 1;
-		list += l;
-		len -= l;
-	}
-	return 0;
-}
-
 static const struct vio_device_id *vio_match_device(
 	const struct vio_device_id *matches,
 	const struct vio_dev *dev)
@@ -49,7 +34,7 @@ static const struct vio_device_id *vio_match_device(
 
 		if (matches->compat[0]) {
 			match &= len &&
-				find_in_proplist(compat, matches->compat, len);
+				of_find_in_proplist(compat, matches->compat, len);
 		}
 		if (match)
 			return matches;
@@ -406,7 +391,7 @@ static int __init vio_init(void)
 		       "property\n");
 		goto out_release;
 	}
-	if (!find_in_proplist(compat, channel_devices_compat, len)) {
+	if (!of_find_in_proplist(compat, channel_devices_compat, len)) {
 		printk(KERN_ERR "VIO: Channel devices node lacks (%s) "
 		       "compat entry.\n", channel_devices_compat);
 		goto out_release;
diff --git a/arch/sparc64/lib/GENbzero.S b/arch/sparc64/lib/GENbzero.S
new file mode 100644
index 00000000000..f9c71d64eba
--- /dev/null
+++ b/arch/sparc64/lib/GENbzero.S
@@ -0,0 +1,160 @@
+/* GENbzero.S: Generic sparc64 memset/clear_user.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include <asm/asi.h>
+
+#define EX_ST(x,y)		\
+98:	x,y;			\
+	.section .fixup;	\
+	.align 4;		\
+99:	retl;			\
+	 mov	%o1, %o0;	\
+	.section __ex_table;	\
+	.align 4;		\
+	.word 98b, 99b;		\
+	.text;			\
+	.align 4;
+
+	.align	32
+	.text
+
+	.globl		GENmemset
+	.type		GENmemset, #function
+GENmemset:		/* %o0=buf, %o1=pat, %o2=len */
+	and		%o1, 0xff, %o3
+	mov		%o2, %o1
+	sllx		%o3, 8, %g1
+	or		%g1, %o3, %o2
+	sllx		%o2, 16, %g1
+	or		%g1, %o2, %o2
+	sllx		%o2, 32, %g1
+	ba,pt		%xcc, 1f
+	 or		%g1, %o2, %o2
+
+	.globl		GENbzero
+	.type		GENbzero, #function
+GENbzero:
+	clr		%o2
+1:	brz,pn		%o1, GENbzero_return
+	 mov		%o0, %o3
+
+	/* %o5: saved %asi, restored at GENbzero_done
+	 * %o4:	store %asi to use
+	 */
+	rd		%asi, %o5
+	mov		ASI_P, %o4
+	wr		%o4, 0x0, %asi
+
+GENbzero_from_clear_user:
+	cmp		%o1, 15
+	bl,pn		%icc, GENbzero_tiny
+	 andcc		%o0, 0x7, %g1
+	be,pt		%xcc, 2f
+	 mov		8, %g2
+	sub		%g2, %g1, %g1
+	sub		%o1, %g1, %o1
+1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 1, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 1, %o0
+2:	cmp		%o1, 128
+	bl,pn		%icc, GENbzero_medium
+	 andcc		%o0, (64 - 1), %g1
+	be,pt		%xcc, GENbzero_pre_loop
+	 mov		64, %g2
+	sub		%g2, %g1, %g1
+	sub		%o1, %g1, %o1
+1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 8, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 8, %o0
+
+GENbzero_pre_loop:
+	andn		%o1, (64 - 1), %g1
+	sub		%o1, %g1, %o1
+GENbzero_loop:
+	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x08] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x10] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x18] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x20] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x28] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x30] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x38] %asi)
+	subcc		%g1, 64, %g1
+	bne,pt		%xcc, GENbzero_loop
+	 add		%o0, 64, %o0
+
+	membar		#Sync
+	wr		%o4, 0x0, %asi
+	brz,pn		%o1, GENbzero_done
+GENbzero_medium:
+	 andncc		%o1, 0x7, %g1
+	be,pn		%xcc, 2f
+	 sub		%o1, %g1, %o1
+1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 8, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 8, %o0
+2:	brz,pt		%o1, GENbzero_done
+	 nop
+
+GENbzero_tiny:
+1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
+	subcc		%o1, 1, %o1
+	bne,pt		%icc, 1b
+	 add		%o0, 1, %o0
+
+	/* fallthrough */
+
+GENbzero_done:
+	wr		%o5, 0x0, %asi
+
+GENbzero_return:
+	retl
+	 mov		%o3, %o0
+	.size		GENbzero, .-GENbzero
+	.size		GENmemset, .-GENmemset
+
+	.globl		GENclear_user
+	.type		GENclear_user, #function
+GENclear_user:		/* %o0=buf, %o1=len */
+	rd		%asi, %o5
+	brz,pn		%o1, GENbzero_done
+	 clr		%o3
+	cmp		%o5, ASI_AIUS
+	bne,pn		%icc, GENbzero
+	 clr		%o2
+	ba,pt		%xcc, GENbzero_from_clear_user
+	 mov		ASI_AIUS, %o4
+	.size		GENclear_user, .-GENclear_user
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define GEN_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	generic_patch_bzero
+	.type	generic_patch_bzero,#function
+generic_patch_bzero:
+	GEN_DO_PATCH(memset, GENmemset)
+	GEN_DO_PATCH(__bzero, GENbzero)
+	GEN_DO_PATCH(__clear_user, GENclear_user)
+	retl
+	 nop
+	.size	generic_patch_bzero,.-generic_patch_bzero
diff --git a/arch/sparc64/lib/GENcopy_from_user.S b/arch/sparc64/lib/GENcopy_from_user.S
new file mode 100644
index 00000000000..2b9df99e87f
--- /dev/null
+++ b/arch/sparc64/lib/GENcopy_from_user.S
@@ -0,0 +1,34 @@
+/* GENcopy_from_user.S: Generic sparc64 copy from userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x)		\
+98:	x;			\
+	.section .fixup;	\
+	.align 4;		\
+99:	retl;			\
+	 mov	1, %o0;		\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, 99b;		\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#define FUNC_NAME		GENcopy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] ASI_AIUS, dest
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, memcpy_user_stub;		\
+	 nop
+#endif
+
+#include "GENmemcpy.S"
diff --git a/arch/sparc64/lib/GENcopy_to_user.S b/arch/sparc64/lib/GENcopy_to_user.S
new file mode 100644
index 00000000000..bb3f7084daf
--- /dev/null
+++ b/arch/sparc64/lib/GENcopy_to_user.S
@@ -0,0 +1,38 @@
+/* GENcopy_to_user.S: Generic sparc64 copy to userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x)		\
+98:	x;			\
+	.section .fixup;	\
+	.align 4;		\
+99:	retl;			\
+	 mov	1, %o0;		\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, 99b;		\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#define FUNC_NAME		GENcopy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, memcpy_user_stub;		\
+	 nop
+#endif
+
+#include "GENmemcpy.S"
diff --git a/arch/sparc64/lib/GENmemcpy.S b/arch/sparc64/lib/GENmemcpy.S
new file mode 100644
index 00000000000..89358ee9485
--- /dev/null
+++ b/arch/sparc64/lib/GENmemcpy.S
@@ -0,0 +1,121 @@
+/* GENmemcpy.S: Generic sparc64 memcpy.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#define GLOBAL_SPARE	%g7
+#else
+#define GLOBAL_SPARE	%g5
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x)	x
+#endif
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest)	type [addr], dest
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr)	type src, [addr]
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	GENmemcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+
+	.text
+	.align		64
+
+	.globl	FUNC_NAME
+	.type	FUNC_NAME,#function
+FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
+	srlx		%o2, 31, %g2
+	cmp		%g2, 0
+	tne		%XCC, 5
+	PREAMBLE
+	mov		%o0, GLOBAL_SPARE
+
+	cmp		%o2, 0
+	be,pn		%XCC, 85f
+	 or		%o0, %o1, %o3
+	cmp		%o2, 16
+	blu,a,pn	%XCC, 80f
+	 or		%o3, %o2, %o3
+
+	xor		%o0, %o1, %o4
+	andcc		%o4, 0x7, %g0
+	bne,a,pn	%XCC, 90f
+	 sub		%o0, %o1, %o3
+
+	and		%o0, 0x7, %o4
+	sub		%o4, 0x8, %o4
+	sub		%g0, %o4, %o4
+	sub		%o2, %o4, %o2
+1:	subcc		%o4, 1, %o4
+	EX_LD(LOAD(ldub, %o1, %g1))
+	EX_ST(STORE(stb, %g1, %o0))
+	add		%o1, 1, %o1
+	bne,pt		%XCC, 1b
+	add		%o0, 1, %o0
+
+	andn		%o2, 0x7, %g1
+	sub		%o2, %g1, %o2
+1:	subcc		%g1, 0x8, %g1
+	EX_LD(LOAD(ldx, %o1, %g2))
+	EX_ST(STORE(stx, %g2, %o0))
+	add		%o1, 0x8, %o1
+	bne,pt		%XCC, 1b
+	 add		%o0, 0x8, %o0
+
+	brz,pt		%o2, 85f
+	 sub		%o0, %o1, %o3
+	ba,a,pt		%XCC, 90f
+
+	.align		64
+80: /* 0 < len <= 16 */
+	andcc		%o3, 0x3, %g0
+	bne,pn		%XCC, 90f
+	 sub		%o0, %o1, %o3
+
+1:
+	subcc		%o2, 4, %o2
+	EX_LD(LOAD(lduw, %o1, %g1))
+	EX_ST(STORE(stw, %g1, %o1 + %o3))
+	bgu,pt		%XCC, 1b
+	 add		%o1, 4, %o1
+
+85:	retl
+	 mov		EX_RETVAL(GLOBAL_SPARE), %o0
+
+	.align		32
+90:
+	subcc		%o2, 1, %o2
+	EX_LD(LOAD(ldub, %o1, %g1))
+	EX_ST(STORE(stb, %g1, %o1 + %o3))
+	bgu,pt		%XCC, 90b
+	 add		%o1, 1, %o1
+	retl
+	 mov		EX_RETVAL(GLOBAL_SPARE), %o0
+
+	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc64/lib/GENpage.S b/arch/sparc64/lib/GENpage.S
new file mode 100644
index 00000000000..2ef9d05f21b
--- /dev/null
+++ b/arch/sparc64/lib/GENpage.S
@@ -0,0 +1,77 @@
+/* GENpage.S: Generic clear and copy page.
+ *
+ * Copyright (C) 2007 (davem@davemloft.net)
+ */
+#include <asm/page.h>
+
+	.text
+	.align	32
+
+GENcopy_user_page:
+	set	PAGE_SIZE, %g7
+1:	ldx	[%o1 + 0x00], %o2
+	ldx	[%o1 + 0x08], %o3
+	ldx	[%o1 + 0x10], %o4
+	ldx	[%o1 + 0x18], %o5
+	stx	%o2, [%o0 + 0x00]
+	stx	%o3, [%o0 + 0x08]
+	stx	%o4, [%o0 + 0x10]
+	stx	%o5, [%o0 + 0x18]
+	ldx	[%o1 + 0x20], %o2
+	ldx	[%o1 + 0x28], %o3
+	ldx	[%o1 + 0x30], %o4
+	ldx	[%o1 + 0x38], %o5
+	stx	%o2, [%o0 + 0x20]
+	stx	%o3, [%o0 + 0x28]
+	stx	%o4, [%o0 + 0x30]
+	stx	%o5, [%o0 + 0x38]
+	subcc	%g7, 64, %g7
+	add	%o1, 64, %o1
+	bne,pt	%xcc, 1b
+	 add	%o0, 64, %o0
+	retl
+	 nop
+
+GENclear_page:
+GENclear_user_page:
+	set	PAGE_SIZE, %g7
+1:	stx	%g0, [%o0 + 0x00]
+	stx	%g0, [%o0 + 0x08]
+	stx	%g0, [%o0 + 0x10]
+	stx	%g0, [%o0 + 0x18]
+	stx	%g0, [%o0 + 0x20]
+	stx	%g0, [%o0 + 0x28]
+	stx	%g0, [%o0 + 0x30]
+	stx	%g0, [%o0 + 0x38]
+	subcc	%g7, 64, %g7
+	bne,pt	%xcc, 1b
+	 add	%o0, 64, %o0
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define GEN_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	generic_patch_pageops
+	.type	generic_patch_pageops,#function
+generic_patch_pageops:
+	GEN_DO_PATCH(copy_user_page, GENcopy_user_page)
+	GEN_DO_PATCH(_clear_page, GENclear_page)
+	GEN_DO_PATCH(clear_user_page, GENclear_user_page)
+	retl
+	 nop
+	.size	generic_patch_pageops,.-generic_patch_pageops
diff --git a/arch/sparc64/lib/GENpatch.S b/arch/sparc64/lib/GENpatch.S
new file mode 100644
index 00000000000..fab9e89f16b
--- /dev/null
+++ b/arch/sparc64/lib/GENpatch.S
@@ -0,0 +1,33 @@
+/* GENpatch.S: Patch Ultra-I routines with generic variant.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define GEN_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	generic_patch_copyops
+	.type	generic_patch_copyops,#function
+generic_patch_copyops:
+	GEN_DO_PATCH(memcpy, GENmemcpy)
+	GEN_DO_PATCH(___copy_from_user, GENcopy_from_user)
+	GEN_DO_PATCH(___copy_to_user, GENcopy_to_user)
+	retl
+	 nop
+	.size	generic_patch_copyops,.-generic_patch_copyops
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index c4a6d6e7d03..f95fbfa3eeb 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -1,4 +1,4 @@
-# $Id: Makefile,v 1.25 2000/12/14 22:57:25 davem Exp $
+#
 # Makefile for Sparc64 library files..
 #
 
@@ -13,6 +13,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
 	 U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
 	 NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \
 	 NGpage.o NGbzero.o \
+	 GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o GENpatch.o \
+	 GENpage.o GENbzero.o \
 	 copy_in_user.o user_fixup.o memmove.o \
 	 mcount.o ipcsum.o rwsem.o xor.o
 
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index 1312bfaff30..9fd8030cc54 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -195,6 +195,11 @@ ENTRY(startup_64)
 	movl	%eax, %ds
 	movl	%eax, %es
 	movl	%eax, %ss
+	movl	%eax, %fs
+	movl	%eax, %gs
+	lldt	%ax
+	movl    $0x20, %eax
+	ltr	%ax
 
 	/* Compute the decompressed kernel start address.  It is where
 	 * we were loaded at aligned to a 2M boundary. %rbp contains the
@@ -295,6 +300,8 @@ gdt:
 	.quad	0x0000000000000000	/* NULL descriptor */
 	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
 	.quad	0x00cf92000000ffff	/* __KERNEL_DS */
+	.quad	0x0080890000000000	/* TS descriptor */
+	.quad   0x0000000000000000	/* TS continued */
 gdt_end:
 	.bss
 /* Stack for uncompression */
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 900ff38d68d..925758dbca0 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -791,10 +791,8 @@ static void setup_APIC_timer(unsigned int clocks)
 
 	/* wait for irq slice */
 	if (hpet_address && hpet_use_timer) {
-		int trigger = hpet_readl(HPET_T0_CMP);
-		while (hpet_readl(HPET_COUNTER) >= trigger)
-			/* do nothing */ ;
-		while (hpet_readl(HPET_COUNTER) <  trigger)
+		u32 trigger = hpet_readl(HPET_T0_CMP);
+		while (hpet_readl(HPET_T0_CMP) == trigger)
 			/* do nothing */ ;
 	} else {
 		int c1, c2;
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index ba16c968ca3..71da01e73f0 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -367,16 +367,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
 
 	pdev = to_pci_dev(dev);
 
-	/* is the device behind a bridge? */
-	if (unlikely(pdev->bus->parent))
-		pbus = pdev->bus->parent;
-	else
-		pbus = pdev->bus;
+	pbus = pdev->bus;
+
+	/* is the device behind a bridge? Look for the root bus */
+	while (pbus->parent)
+		pbus = pbus->parent;
 
 	tbl = pci_iommu(pbus);
 
-	BUG_ON(pdev->bus->parent &&
-	       (tbl->it_busno != pdev->bus->parent->number));
+	BUG_ON(tbl && (tbl->it_busno != pbus->number));
 
 	return tbl;
 }
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 7e161c698af..10b9809ce82 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -75,7 +75,8 @@ static void flush_kernel_map(void *arg)
 
 	/* When clflush is available always use it because it is
 	   much cheaper than WBINVD. */
-	if (!cpu_has_clflush)
+	/* clflush is still broken. Disable for now. */
+	if (1 || !cpu_has_clflush)
 		asm volatile("wbinvd" ::: "memory");
 	else list_for_each_entry(pg, l, lru) {
 		void *adr = page_address(pg);
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 65d82736987..4095e4d66a1 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -66,13 +66,13 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
 
 	switch (len) {
 	case 1:
-		*value = readb(addr + reg);
+		*value = mmio_config_readb(addr + reg);
 		break;
 	case 2:
-		*value = readw(addr + reg);
+		*value = mmio_config_readw(addr + reg);
 		break;
 	case 4:
-		*value = readl(addr + reg);
+		*value = mmio_config_readl(addr + reg);
 		break;
 	}
 
@@ -94,13 +94,13 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
 
 	switch (len) {
 	case 1:
-		writeb(value, addr + reg);
+		mmio_config_writeb(addr + reg, value);
 		break;
 	case 2:
-		writew(value, addr + reg);
+		mmio_config_writew(addr + reg, value);
 		break;
 	case 4:
-		writel(value, addr + reg);
+		mmio_config_writel(addr + reg, value);
 		break;
 	}
 
diff --git a/arch/x86_64/vdso/.gitignore b/arch/x86_64/vdso/.gitignore
new file mode 100644
index 00000000000..f8b69d84238
--- /dev/null
+++ b/arch/x86_64/vdso/.gitignore
@@ -0,0 +1 @@
+vdso.lds
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 8c2caff87cc..a15845c164f 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3047,6 +3047,10 @@ static inline void blk_partition_remap(struct bio *bio)
 
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
+
+		blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
+				    bdev->bd_dev, bio->bi_sector,
+				    bio->bi_sector - p->start_sect);
 	}
 }
 
diff --git a/drivers/acpi/resources/rsxface.c b/drivers/acpi/resources/rsxface.c
index f63813a358c..4c3fd4cdaf7 100644
--- a/drivers/acpi/resources/rsxface.c
+++ b/drivers/acpi/resources/rsxface.c
@@ -474,8 +474,6 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context)
 	return (AE_CTRL_TERMINATE);
 }
 
-ACPI_EXPORT_SYMBOL(acpi_rs_match_vendor_resource)
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_walk_resources
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a11b2bd54bb..084358a828e 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1977,12 +1977,13 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
 {
 	ReadCapdata_struct *buf;
 	int return_code;
-	buf = kmalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
-	if (buf == NULL) {
+
+	buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
+	if (!buf) {
 		printk(KERN_WARNING "cciss: out of memory\n");
 		return;
 	}
-	memset(buf, 0, sizeof(ReadCapdata_struct));
+
 	if (withirq)
 		return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
 				ctlr, buf, sizeof(ReadCapdata_struct),
@@ -2003,7 +2004,6 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
 		printk(KERN_INFO "      blocks= %llu block_size= %d\n",
 		(unsigned long long)*total_size+1, *block_size);
 	kfree(buf);
-	return;
 }
 
 static void
@@ -2011,12 +2011,13 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
 {
 	ReadCapdata_struct_16 *buf;
 	int return_code;
-	buf = kmalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
-	if (buf == NULL) {
+
+	buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
+	if (!buf) {
 		printk(KERN_WARNING "cciss: out of memory\n");
 		return;
 	}
-	memset(buf, 0, sizeof(ReadCapdata_struct_16));
+
 	if (withirq) {
 		return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
 			ctlr, buf, sizeof(ReadCapdata_struct_16),
@@ -2038,7 +2039,6 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
 	printk(KERN_INFO "      blocks= %llu block_size= %d\n",
 	       (unsigned long long)*total_size+1, *block_size);
 	kfree(buf);
-	return;
 }
 
 static int cciss_revalidate(struct gendisk *disk)
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index be4e3477d83..eb9799acf65 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -420,18 +420,17 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
 			goto Enomem2;
 	}
 
-	hba[i]->cmd_pool = (cmdlist_t *)pci_alloc_consistent(
+	hba[i]->cmd_pool = pci_alloc_consistent(
 		hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t),
 		&(hba[i]->cmd_pool_dhandle));
-	hba[i]->cmd_pool_bits = kmalloc(
-		((NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG)*sizeof(unsigned long),
+	hba[i]->cmd_pool_bits = kcalloc(
+		(NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long),
 		GFP_KERNEL);
 
 	if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool)
 			goto Enomem1;
 
 	memset(hba[i]->cmd_pool, 0, NR_CMDS * sizeof(cmdlist_t));
-	memset(hba[i]->cmd_pool_bits, 0, ((NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG)*sizeof(unsigned long));
 	printk(KERN_INFO "cpqarray: Finding drives on %s",
 		hba[i]->devname);
 
@@ -1660,45 +1659,30 @@ static void getgeometry(int ctlr)
 
 	info_p->log_drv_map = 0;	
 	
-	id_ldrive = kmalloc(sizeof(id_log_drv_t), GFP_KERNEL);
-	if(id_ldrive == NULL)
-	{
+	id_ldrive = kzalloc(sizeof(id_log_drv_t), GFP_KERNEL);
+	if (!id_ldrive)	{
 		printk( KERN_ERR "cpqarray:  out of memory.\n");
-		return;
+		goto err_0;
 	}
 
-	id_ctlr_buf = kmalloc(sizeof(id_ctlr_t), GFP_KERNEL);
-	if(id_ctlr_buf == NULL)
-	{
-		kfree(id_ldrive);
+	id_ctlr_buf = kzalloc(sizeof(id_ctlr_t), GFP_KERNEL);
+	if (!id_ctlr_buf) {
 		printk( KERN_ERR "cpqarray:  out of memory.\n");
-		return;
+		goto err_1;
 	}
 
-	id_lstatus_buf = kmalloc(sizeof(sense_log_drv_stat_t), GFP_KERNEL);
-	if(id_lstatus_buf == NULL)
-	{
-		kfree(id_ctlr_buf);
-		kfree(id_ldrive);
+	id_lstatus_buf = kzalloc(sizeof(sense_log_drv_stat_t), GFP_KERNEL);
+	if (!id_lstatus_buf) {
 		printk( KERN_ERR "cpqarray:  out of memory.\n");
-		return;
+		goto err_2;
 	}
 
-	sense_config_buf = kmalloc(sizeof(config_t), GFP_KERNEL);
-	if(sense_config_buf == NULL)
-	{
-		kfree(id_lstatus_buf);
-		kfree(id_ctlr_buf);
-		kfree(id_ldrive);
+	sense_config_buf = kzalloc(sizeof(config_t), GFP_KERNEL);
+	if (!sense_config_buf) {
 		printk( KERN_ERR "cpqarray:  out of memory.\n");
-		return;
+		goto err_3;
 	}
 
-	memset(id_ldrive, 0, sizeof(id_log_drv_t));
-	memset(id_ctlr_buf, 0, sizeof(id_ctlr_t));
-	memset(id_lstatus_buf, 0, sizeof(sense_log_drv_stat_t));
-	memset(sense_config_buf, 0, sizeof(config_t));
-
 	info_p->phys_drives = 0;
 	info_p->log_drv_map = 0;
 	info_p->drv_assign_map = 0;
@@ -1712,13 +1696,8 @@ static void getgeometry(int ctlr)
 		 * so the idastubopen will fail on all logical drives
 		 * on the controller.
 		 */
-		 /* Free all the buffers and return */ 
 		printk(KERN_ERR "cpqarray: error sending ID controller\n");
-		kfree(sense_config_buf);
-                kfree(id_lstatus_buf);
-                kfree(id_ctlr_buf);
-                kfree(id_ldrive);
-                return;
+                goto err_4;
         }
 
 	info_p->log_drives = id_ctlr_buf->nr_drvs;
@@ -1764,12 +1743,7 @@ static void getgeometry(int ctlr)
 				" failed to report status of logical drive %d\n"
 			 "Access to this controller has been disabled\n",
 				ctlr, log_unit);
-			/* Free all the buffers and return */
-                	kfree(sense_config_buf);
-                	kfree(id_lstatus_buf);
-                	kfree(id_ctlr_buf);
-                	kfree(id_ldrive);
-                	return;
+                	goto err_4;
 		}
 		/*
 		   Make sure the logical drive is configured
@@ -1798,14 +1772,8 @@ static void getgeometry(int ctlr)
 				 sizeof(config_t), 0, 0, log_unit);
 				if (ret_code == IO_ERROR) {
 					info_p->log_drv_map = 0;
-					/* Free all the buffers and return */
                 			printk(KERN_ERR "cpqarray: error sending sense config\n");
-                			kfree(sense_config_buf);
-                			kfree(id_lstatus_buf);
-                			kfree(id_ctlr_buf);
-                			kfree(id_ldrive);
-                			return;
-
+                			goto err_4;
 				}
 
 				info_p->phys_drives =
@@ -1820,12 +1788,18 @@ static void getgeometry(int ctlr)
 			log_index = log_index + 1;
 		}		/* end of if logical drive configured */
 	}			/* end of for log_unit */
+
+	/* Free all the buffers and return */
+err_4:
 	kfree(sense_config_buf);
-  	kfree(id_ldrive);
+err_3:
   	kfree(id_lstatus_buf);
+err_2:
 	kfree(id_ctlr_buf);
+err_1:
+  	kfree(id_ldrive);
+err_0:
 	return;
-
 }
 
 static void __exit cpqarray_exit(void)
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 85916e2665d..af3969a9c96 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -41,7 +41,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/completion.h>
 #include <linux/device.h>
-#include <linux/kernel.h>
 
 #include <asm/uaccess.h>
 #include <asm/vio.h>
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index cb27e8863d7..3ede0b63da1 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -902,26 +902,17 @@ static int ace_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static int ace_ioctl(struct inode *inode, struct file *filp,
-		     unsigned int cmd, unsigned long arg)
+static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-	struct ace_device *ace = inode->i_bdev->bd_disk->private_data;
-	struct hd_geometry __user *geo = (struct hd_geometry __user *)arg;
-	struct hd_geometry g;
-	dev_dbg(ace->dev, "ace_ioctl()\n");
-
-	switch (cmd) {
-	case HDIO_GETGEO:
-		g.heads = ace->cf_id.heads;
-		g.sectors = ace->cf_id.sectors;
-		g.cylinders = ace->cf_id.cyls;
-		g.start = 0;
-		return copy_to_user(geo, &g, sizeof(g)) ? -EFAULT : 0;
+	struct ace_device *ace = bdev->bd_disk->private_data;
 
-	default:
-		return -ENOTTY;
-	}
-	return -ENOTTY;
+	dev_dbg(ace->dev, "ace_getgeo()\n");
+
+	geo->heads = ace->cf_id.heads;
+	geo->sectors = ace->cf_id.sectors;
+	geo->cylinders = ace->cf_id.cyls;
+
+	return 0;
 }
 
 static struct block_device_operations ace_fops = {
@@ -930,7 +921,7 @@ static struct block_device_operations ace_fops = {
 	.release = ace_release,
 	.media_changed = ace_media_changed,
 	.revalidate_disk = ace_revalidate_disk,
-	.ioctl = ace_ioctl,
+	.getgeo = ace_getgeo,
 };
 
 /* --------------------------------------------------------------------
diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c
index feeccbaec43..3d6bd0baa56 100644
--- a/drivers/char/hvc_lguest.c
+++ b/drivers/char/hvc_lguest.c
@@ -35,6 +35,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/lguest_bus.h>
+#include <asm/paravirt.h>
 #include "hvc_console.h"
 
 /*D:340 This is our single console input buffer, with associated "struct
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index fee58e03dbe..4177f6db83e 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1629,7 +1629,7 @@ static int cmm_open(struct inode *inode, struct file *filp)
 {
 	struct cm4000_dev *dev;
 	struct pcmcia_device *link;
-	int rc, minor = iminor(inode);
+	int minor = iminor(inode);
 
 	if (minor >= CM4000_MAX_DEV)
 		return -ENODEV;
@@ -1668,7 +1668,6 @@ static int cmm_open(struct inode *inode, struct file *filp)
 	start_monitor(dev);
 
 	link->open = 1;		/* only one open per device */
-	rc = 0;
 
 	DEBUGP(2, dev, "<- cmm_open\n");
 	return nonseekable_open(inode, filp);
@@ -1824,7 +1823,7 @@ static int cm4000_resume(struct pcmcia_device *link)
 
 static void cm4000_release(struct pcmcia_device *link)
 {
-	cmm_cm4000_release(link->priv);	/* delay release until device closed */
+	cmm_cm4000_release(link);	/* delay release until device closed */
 	pcmcia_disable_device(link);
 }
 
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index af88181a17f..b24a3e7bbb9 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -599,7 +599,7 @@ cs_release:
 
 static void reader_release(struct pcmcia_device *link)
 {
-	cm4040_reader_release(link->priv);
+	cm4040_reader_release(link);
 	pcmcia_disable_device(link);
 }
 
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index de37ebc3a4c..51ea93cab6c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -369,25 +369,54 @@ static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)
 }
 
 /**
- *	tty_buffer_flush		-	flush full tty buffers
+ *	__tty_buffer_flush		-	flush full tty buffers
  *	@tty: tty to flush
  *
- *	flush all the buffers containing receive data
+ *	flush all the buffers containing receive data. Caller must
+ *	hold the buffer lock and must have ensured no parallel flush to
+ *	ldisc is running.
  *
- *	Locking: none
+ *	Locking: Caller must hold tty->buf.lock
  */
 
-static void tty_buffer_flush(struct tty_struct *tty)
+static void __tty_buffer_flush(struct tty_struct *tty)
 {
 	struct tty_buffer *thead;
-	unsigned long flags;
 
-	spin_lock_irqsave(&tty->buf.lock, flags);
 	while((thead = tty->buf.head) != NULL) {
 		tty->buf.head = thead->next;
 		tty_buffer_free(tty, thead);
 	}
 	tty->buf.tail = NULL;
+}
+
+/**
+ *	tty_buffer_flush		-	flush full tty buffers
+ *	@tty: tty to flush
+ *
+ *	flush all the buffers containing receive data. If the buffer is
+ *	being processed by flush_to_ldisc then we defer the processing
+ *	to that function
+ *
+ *	Locking: none
+ */
+
+static void tty_buffer_flush(struct tty_struct *tty)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&tty->buf.lock, flags);
+
+	/* If the data is being pushed to the tty layer then we can't
+	   process it here. Instead set a flag and the flush_to_ldisc
+	   path will process the flush request before it exits */
+	if (test_bit(TTY_FLUSHING, &tty->flags)) {
+		set_bit(TTY_FLUSHPENDING, &tty->flags);
+		spin_unlock_irqrestore(&tty->buf.lock, flags);
+		wait_event(tty->read_wait,
+				test_bit(TTY_FLUSHPENDING, &tty->flags) == 0);
+		return;
+	} else
+		__tty_buffer_flush(tty);
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 }
 
@@ -3594,6 +3623,7 @@ static void flush_to_ldisc(struct work_struct *work)
 		return;
 
 	spin_lock_irqsave(&tty->buf.lock, flags);
+	set_bit(TTY_FLUSHING, &tty->flags);	/* So we know a flush is running */
 	head = tty->buf.head;
 	if (head != NULL) {
 		tty->buf.head = NULL;
@@ -3607,6 +3637,11 @@ static void flush_to_ldisc(struct work_struct *work)
 				tty_buffer_free(tty, tbuf);
 				continue;
 			}
+			/* Ldisc or user is trying to flush the buffers
+			   we are feeding to the ldisc, stop feeding the
+			   line discipline as we want to empty the queue */
+			if (test_bit(TTY_FLUSHPENDING, &tty->flags))
+				break;
 			if (!tty->receive_room) {
 				schedule_delayed_work(&tty->buf.work, 1);
 				break;
@@ -3620,8 +3655,17 @@ static void flush_to_ldisc(struct work_struct *work)
 			disc->receive_buf(tty, char_buf, flag_buf, count);
 			spin_lock_irqsave(&tty->buf.lock, flags);
 		}
+		/* Restore the queue head */
 		tty->buf.head = head;
 	}
+	/* We may have a deferred request to flush the input buffer,
+	   if so pull the chain under the lock and empty the queue */
+	if (test_bit(TTY_FLUSHPENDING, &tty->flags)) {
+		__tty_buffer_flush(tty);
+		clear_bit(TTY_FLUSHPENDING, &tty->flags);
+		wake_up(&tty->read_wait);
+	}
+	clear_bit(TTY_FLUSHING, &tty->flags);
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 
 	tty_ldisc_deref(disc);
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 0a46e8837d9..4a315f08a56 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -453,6 +453,11 @@ static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
 	 * lguest_pages". */
 	copy_in_guest_info(lg, pages);
 
+	/* Set the trap number to 256 (impossible value).  If we fault while
+	 * switching to the Guest (bad segment registers or bug), this will
+	 * cause us to abort the Guest. */
+	lg->regs->trapnum = 256;
+
 	/* Now: we push the "eflags" register on the stack, then do an "lcall".
 	 * This is how we change from using the kernel code segment to using
 	 * the dedicated lguest code segment, as well as jumping into the
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 49787e964a0..49aa55577d0 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -195,13 +195,16 @@ static int has_err(unsigned int trap)
 /* deliver_trap() returns true if it could deliver the trap. */
 int deliver_trap(struct lguest *lg, unsigned int num)
 {
-	u32 lo = lg->idt[num].a, hi = lg->idt[num].b;
+	/* Trap numbers are always 8 bit, but we set an impossible trap number
+	 * for traps inside the Switcher, so check that here. */
+	if (num >= ARRAY_SIZE(lg->idt))
+		return 0;
 
 	/* Early on the Guest hasn't set the IDT entries (or maybe it put a
 	 * bogus one in): if we fail here, the Guest will be killed. */
-	if (!idt_present(lo, hi))
+	if (!idt_present(lg->idt[num].a, lg->idt[num].b))
 		return 0;
-	set_guest_interrupt(lg, lo, hi, has_err(num));
+	set_guest_interrupt(lg, lg->idt[num].a, lg->idt[num].b, has_err(num));
 	return 1;
 }
 
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
index 1bc1546c7fd..6e135ac0834 100644
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -323,9 +323,12 @@ static void lguest_write_gdt_entry(struct desc_struct *dt,
  * __thread variables).  So we have a hypercall specifically for this case. */
 static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
 {
+	/* There's one problem which normal hardware doesn't have: the Host
+	 * can't handle us removing entries we're currently using.  So we clear
+	 * the GS register here: if it's needed it'll be reloaded anyway. */
+	loadsegment(gs, 0);
 	lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
 }
-/*:*/
 
 /*G:038 That's enough excitement for now, back to ploughing through each of
  * the paravirt_ops (we're about 1/3 of the way through).
@@ -687,7 +690,8 @@ static struct clocksource lguest_clock = {
 	.rating		= 400,
 	.read		= lguest_clock_read,
 	.mask		= CLOCKSOURCE_MASK(64),
-	.mult		= 1,
+	.mult		= 1 << 22,
+	.shift		= 22,
 };
 
 /* The "scheduler clock" is just our real clock, adjusted to start at zero */
@@ -770,7 +774,6 @@ static void lguest_time_init(void)
 	 * way, the "rating" is initialized so high that it's always chosen
 	 * over any other clocksource. */
 	if (lguest_data.tsc_khz) {
-		lguest_clock.shift = 22;
 		lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
 							 lguest_clock.shift);
 		lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS;
@@ -933,23 +936,24 @@ static const struct lguest_insns
 /* Now our patch routine is fairly simple (based on the native one in
  * paravirt.c).  If we have a replacement, we copy it in and return how much of
  * the available space we used. */
-static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len)
+static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
+			     unsigned long addr, unsigned len)
 {
 	unsigned int insn_len;
 
 	/* Don't do anything special if we don't have a replacement */
 	if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
-		return paravirt_patch_default(type, clobber, insns, len);
+		return paravirt_patch_default(type, clobber, ibuf, addr, len);
 
 	insn_len = lguest_insns[type].end - lguest_insns[type].start;
 
 	/* Similarly if we can't fit replacement (shouldn't happen, but let's
 	 * be thorough). */
 	if (len < insn_len)
-		return paravirt_patch_default(type, clobber, insns, len);
+		return paravirt_patch_default(type, clobber, ibuf, addr, len);
 
 	/* Copy in our instructions. */
-	memcpy(insns, lguest_insns[type].start, insn_len);
+	memcpy(ibuf, lguest_insns[type].start, insn_len);
 	return insn_len;
 }
 
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c
index 55a7940ca73..9e7752cc800 100644
--- a/drivers/lguest/lguest_bus.c
+++ b/drivers/lguest/lguest_bus.c
@@ -5,6 +5,7 @@
 #include <linux/bootmem.h>
 #include <linux/lguest_bus.h>
 #include <asm/io.h>
+#include <asm/paravirt.h>
 
 static ssize_t type_show(struct device *_dev,
                          struct device_attribute *attr, char *buf)
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index f675a41a80d..9b81119f46e 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -43,22 +43,6 @@
  * begin.
  */
 
-/* Is the descriptor the Guest wants us to put in OK?
- *
- * The flag which Intel says must be zero: must be zero.  The descriptor must
- * be present, (this is actually checked earlier but is here for thorougness),
- * and the descriptor type must be 1 (a memory segment).  */
-static int desc_ok(const struct desc_struct *gdt)
-{
-	return ((gdt->b & 0x00209000) == 0x00009000);
-}
-
-/* Is the segment present?  (Otherwise it can't be used by the Guest). */
-static int segment_present(const struct desc_struct *gdt)
-{
-	return gdt->b & 0x8000;
-}
-
 /* There are several entries we don't let the Guest set.  The TSS entry is the
  * "Task State Segment" which controls all kinds of delicate things.  The
  * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the
@@ -71,37 +55,11 @@ static int ignored_gdt(unsigned int num)
 		|| num == GDT_ENTRY_DOUBLEFAULT_TSS);
 }
 
-/* If the Guest asks us to remove an entry from the GDT, we have to be careful.
- * If one of the segment registers is pointing at that entry the Switcher will
- * crash when it tries to reload the segment registers for the Guest.
- *
- * It doesn't make much sense for the Guest to try to remove its own code, data
- * or stack segments while they're in use: assume that's a Guest bug.  If it's
- * one of the lesser segment registers using the removed entry, we simply set
- * that register to 0 (unusable). */
-static void check_segment_use(struct lguest *lg, unsigned int desc)
-{
-	/* GDT entries are 8 bytes long, so we divide to get the index and
-	 * ignore the bottom bits. */
-	if (lg->regs->gs / 8 == desc)
-		lg->regs->gs = 0;
-	if (lg->regs->fs / 8 == desc)
-		lg->regs->fs = 0;
-	if (lg->regs->es / 8 == desc)
-		lg->regs->es = 0;
-	if (lg->regs->ds / 8 == desc
-	    || lg->regs->cs / 8 == desc
-	    || lg->regs->ss / 8 == desc)
-		kill_guest(lg, "Removed live GDT entry %u", desc);
-}
-/*:*/
-/*M:009 We wouldn't need to check for removal of in-use segments if we handled
- * faults in the Switcher.  However, it's probably not a worthwhile
- * optimization. :*/
-
-/*H:610 Once the GDT has been changed, we look through the changed entries and
- * see if they're OK.  If not, we'll call kill_guest() and the Guest will never
- * get to use the invalid entries. */
+/*H:610 Once the GDT has been changed, we fix the new entries up a little.  We
+ * don't care if they're invalid: the worst that can happen is a General
+ * Protection Fault in the Switcher when it restores a Guest segment register
+ * which tries to use that entry.  Then we kill the Guest for causing such a
+ * mess: the message will be "unhandled trap 256". */
 static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
 {
 	unsigned int i;
@@ -112,16 +70,6 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
 		if (ignored_gdt(i))
 			continue;
 
-		/* We could fault in switch_to_guest if they are using
-		 * a removed segment. */
-		if (!segment_present(&lg->gdt[i])) {
-			check_segment_use(lg, i);
-			continue;
-		}
-
-		if (!desc_ok(&lg->gdt[i]))
-			kill_guest(lg, "Bad GDT descriptor %i", i);
-
 		/* Segment descriptors contain a privilege level: the Guest is
 		 * sometimes careless and leaves this as 0, even though it's
 		 * running at privilege level 1.  If so, we fix it here. */
diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S
index d418179ea6b..7c9c230cc84 100644
--- a/drivers/lguest/switcher.S
+++ b/drivers/lguest/switcher.S
@@ -47,6 +47,7 @@
 // Down here in the depths of assembler code.
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
+#include <asm/page.h>
 #include "lg.h"
 
 // We mark the start of the code to copy
@@ -182,13 +183,15 @@ ENTRY(switch_to_guest)
 	movl	$(LGUEST_DS), %eax;					\
 	movl	%eax, %ds;						\
 	/* So where are we?  Which CPU, which struct?			\
-	 * The stack is our clue: our TSS sets				\
-	 * It at the end of "struct lguest_pages"			\
-	 * And we then pushed and pushed and pushed Guest regs:		\
-	 * Now stack points atop the "struct lguest_regs".   		\
-	 * Subtract that offset, and we find our struct. */		\
+	 * The stack is our clue: our TSS starts			\
+	 * It at the end of "struct lguest_pages".			\
+	 * Or we may have stumbled while restoring			\
+	 * Our Guest segment regs while in switch_to_guest,		\
+	 * The fault pushed atop that part-unwound stack.		\
+	 * If we round the stack down to the page start			\
+	 * We're at the start of "struct lguest_pages". */		\
 	movl	%esp, %eax;						\
-	subl	$LGUEST_PAGES_regs, %eax;				\
+	andl	$(~(1 << PAGE_SHIFT - 1)), %eax;			\
 	/* Save our trap number: the switch will obscure it		\
 	 * (The Guest regs are not mapped here in the Host)		\
 	 * %ebx holds it safe for deliver_to_host */			\
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 141ff9fa296..2120155929a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -580,8 +580,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
 		/* the bio has been remapped so dispatch it */
 
 		blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
-				    tio->io->bio->bi_bdev->bd_dev, sector,
-				    clone->bi_sector);
+				    tio->io->bio->bi_bdev->bd_dev,
+				    clone->bi_sector, sector);
 
 		generic_make_request(clone);
 	} else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index c9a289c6c13..b0abc7d9280 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -117,7 +117,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 	struct mmc_host *host = card->host;
 	u64 limit = BLK_BOUNCE_HIGH;
 	int ret;
-	unsigned int bouncesz;
 
 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
 		limit = *mmc_dev(host)->dma_mask;
@@ -134,6 +133,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
 	if (host->max_hw_segs == 1) {
+		unsigned int bouncesz;
+
 		bouncesz = MMC_QUEUE_BOUNCESZ;
 
 		if (bouncesz > host->max_req_size)
@@ -156,14 +157,14 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 				GFP_KERNEL);
 			if (!mq->sg) {
 				ret = -ENOMEM;
-				goto free_bounce_buf;
+				goto cleanup_queue;
 			}
 
 			mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
 				bouncesz / 512, GFP_KERNEL);
 			if (!mq->bounce_sg) {
 				ret = -ENOMEM;
-				goto free_sg;
+				goto cleanup_queue;
 			}
 		}
 	}
@@ -197,14 +198,13 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
  	if (mq->bounce_sg)
  		kfree(mq->bounce_sg);
  	mq->bounce_sg = NULL;
- free_sg:
-	kfree(mq->sg);
+ cleanup_queue:
+ 	if (mq->sg)
+		kfree(mq->sg);
 	mq->sg = NULL;
- free_bounce_buf:
 	if (mq->bounce_buf)
 		kfree(mq->bounce_buf);
 	mq->bounce_buf = NULL;
- cleanup_queue:
 	blk_cleanup_queue(mq->queue);
 	return ret;
 }
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index 62564ccde03..bfebd2fa7ad 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -83,7 +83,7 @@
 
 #define AT91_MCI_ERRORS	(AT91_MCI_RINDE | AT91_MCI_RDIRE | AT91_MCI_RCRCE	\
 		| AT91_MCI_RENDE | AT91_MCI_RTOE | AT91_MCI_DCRCE		\
-		| AT91_MCI_DTOE | AT91_MCI_OVRE | AT91_MCI_UNRE)			
+		| AT91_MCI_DTOE | AT91_MCI_OVRE | AT91_MCI_UNRE)
 
 #define at91_mci_read(host, reg)	__raw_readl((host)->baseaddr + (reg))
 #define at91_mci_write(host, reg, val)	__raw_writel((val), (host)->baseaddr + (reg))
@@ -676,15 +676,15 @@ static irqreturn_t at91_mci_irq(int irq, void *devid)
 
 	int_status = at91_mci_read(host, AT91_MCI_SR);
 	int_mask = at91_mci_read(host, AT91_MCI_IMR);
-	
+
 	pr_debug("MCI irq: status = %08X, %08X, %08X\n", int_status, int_mask,
 		int_status & int_mask);
-	
+
 	int_status = int_status & int_mask;
 
 	if (int_status & AT91_MCI_ERRORS) {
 		completed = 1;
-		
+
 		if (int_status & AT91_MCI_UNRE)
 			pr_debug("MMC: Underrun error\n");
 		if (int_status & AT91_MCI_OVRE)
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index e0c9808fd42..9bf2a877113 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1266,7 +1266,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev)
 	return 0;
 }
 
-static void __devexit wbsd_free_mmc(struct device *dev)
+static void wbsd_free_mmc(struct device *dev)
 {
 	struct mmc_host *mmc;
 	struct wbsd_host *host;
@@ -1358,7 +1358,7 @@ static int __devinit wbsd_request_region(struct wbsd_host *host, int base)
 	return 0;
 }
 
-static void __devexit wbsd_release_regions(struct wbsd_host *host)
+static void wbsd_release_regions(struct wbsd_host *host)
 {
 	if (host->base)
 		release_region(host->base, 8);
@@ -1434,7 +1434,7 @@ err:
 		"Falling back on FIFO.\n", dma);
 }
 
-static void __devexit wbsd_release_dma(struct wbsd_host *host)
+static void wbsd_release_dma(struct wbsd_host *host)
 {
 	if (host->dma_addr) {
 		dma_unmap_single(mmc_dev(host->mmc), host->dma_addr,
@@ -1484,7 +1484,7 @@ static int __devinit wbsd_request_irq(struct wbsd_host *host, int irq)
 	return 0;
 }
 
-static void __devexit wbsd_release_irq(struct wbsd_host *host)
+static void  wbsd_release_irq(struct wbsd_host *host)
 {
 	if (!host->irq)
 		return;
@@ -1535,7 +1535,7 @@ static int __devinit wbsd_request_resources(struct wbsd_host *host,
  * Release all resources for the host.
  */
 
-static void __devexit wbsd_release_resources(struct wbsd_host *host)
+static void wbsd_release_resources(struct wbsd_host *host)
 {
 	wbsd_release_dma(host);
 	wbsd_release_irq(host);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 8c86b802f21..d091b2430b4 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -7,6 +7,7 @@
 
 #include <linux/device.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 56f6389a300..3c1984ecf36 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1704,10 +1704,8 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 		}
 	}
 
-	local_irq_save(flags);
-	if (!spin_trylock(&adapter->lock)) {
+	if (!spin_trylock_irqsave(&adapter->lock, flags)) {
 		/* Can't get lock - tell upper layer to requeue */
-		local_irq_restore(flags);
 		dev_printk(KERN_DEBUG, &adapter->pdev->dev, "tx locked\n");
 		return NETDEV_TX_LOCKED;
 	}
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index 8ee2c2c86b4..d67f97bfa3a 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -39,7 +39,7 @@
 #include <asm/io.h>
 
 #define DRV_NAME	"ehea"
-#define DRV_VERSION	"EHEA_0072"
+#define DRV_VERSION	"EHEA_0073"
 
 /* eHEA capability flags */
 #define DLPAR_PORT_ADD_REM 1
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 58702f54c3f..9756211e83c 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1326,7 +1326,6 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 	u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0];
 	int skb_data_size = skb->len - skb->data_len;
 	int headersize;
-	u64 tmp_addr;
 
 	/* Packet is TCP with TSO enabled */
 	swqe->tx_control |= EHEA_SWQE_TSO;
@@ -1347,9 +1346,8 @@ static void write_swqe2_TSO(struct sk_buff *skb,
 			/* set sg1entry data */
 			sg1entry->l_key = lkey;
 			sg1entry->len = skb_data_size - headersize;
-
-			tmp_addr = (u64)(skb->data + headersize);
-			sg1entry->vaddr = ehea_map_vaddr(tmp_addr);
+			sg1entry->vaddr =
+				ehea_map_vaddr(skb->data + headersize);
 			swqe->descriptors++;
 		}
 	} else
@@ -1362,7 +1360,6 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
 	int skb_data_size = skb->len - skb->data_len;
 	u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0];
 	struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry;
-	u64 tmp_addr;
 
 	/* Packet is any nonTSO type
 	 *
@@ -1379,8 +1376,8 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
 			/* copy sg1entry data */
 			sg1entry->l_key = lkey;
 			sg1entry->len = skb_data_size - SWQE2_MAX_IMM;
-			tmp_addr = (u64)(skb->data + SWQE2_MAX_IMM);
-			sg1entry->vaddr = ehea_map_vaddr(tmp_addr);
+			sg1entry->vaddr =
+				ehea_map_vaddr(skb->data + SWQE2_MAX_IMM);
 			swqe->descriptors++;
 		}
 	} else {
@@ -1395,7 +1392,6 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev,
 	struct ehea_vsgentry *sg_list, *sg1entry, *sgentry;
 	skb_frag_t *frag;
 	int nfrags, sg1entry_contains_frag_data, i;
-	u64 tmp_addr;
 
 	nfrags = skb_shinfo(skb)->nr_frags;
 	sg1entry = &swqe->u.immdata_desc.sg_entry;
@@ -1417,9 +1413,9 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev,
 			/* copy sg1entry data */
 			sg1entry->l_key = lkey;
 			sg1entry->len = frag->size;
-			tmp_addr =  (u64)(page_address(frag->page)
-					  + frag->page_offset);
-			sg1entry->vaddr = ehea_map_vaddr(tmp_addr);
+			sg1entry->vaddr =
+				ehea_map_vaddr(page_address(frag->page)
+					       + frag->page_offset);
 			swqe->descriptors++;
 			sg1entry_contains_frag_data = 1;
 		}
@@ -1431,10 +1427,9 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev,
 
 			sgentry->l_key = lkey;
 			sgentry->len = frag->size;
-
-			tmp_addr = (u64)(page_address(frag->page)
-					 + frag->page_offset);
-			sgentry->vaddr = ehea_map_vaddr(tmp_addr);
+			sgentry->vaddr =
+				ehea_map_vaddr(page_address(frag->page)
+					       + frag->page_offset);
 			swqe->descriptors++;
 		}
 	}
@@ -2165,24 +2160,18 @@ static int ehea_clean_all_portres(struct ehea_port *port)
 	return ret;
 }
 
-static void ehea_remove_adapter_mr (struct ehea_adapter *adapter)
+static void ehea_remove_adapter_mr(struct ehea_adapter *adapter)
 {
-	int i;
-
-	for (i=0; i < EHEA_MAX_PORTS; i++)
-		if (adapter->port[i])
-			return;
+	if (adapter->active_ports)
+		return;
 
 	ehea_rem_mr(&adapter->mr);
 }
 
-static int ehea_add_adapter_mr (struct ehea_adapter *adapter)
+static int ehea_add_adapter_mr(struct ehea_adapter *adapter)
 {
-	int i;
-
-	for (i=0; i < EHEA_MAX_PORTS; i++)
-		if (adapter->port[i])
-			return 0;
+	if (adapter->active_ports)
+		return 0;
 
 	return ehea_reg_kernel_mr(adapter, &adapter->mr);
 }
@@ -3099,6 +3088,7 @@ out:
 
 static void __exit ehea_module_exit(void)
 {
+	destroy_workqueue(ehea_driver_wq);
 	driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities);
 	ibmebus_unregister_driver(&ehea_driver);
 	ehea_destroy_busmap();
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index d96eb722954..acba90f1638 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -963,7 +963,7 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_
 {
 	int rc, i;
 	struct net_device *netdev;
-	struct ibmveth_adapter *adapter = NULL;
+	struct ibmveth_adapter *adapter;
 
 	unsigned char *mac_addr_p;
 	unsigned int *mcastFilterSize_p;
@@ -997,7 +997,6 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_
 	SET_MODULE_OWNER(netdev);
 
 	adapter = netdev->priv;
-	memset(adapter, 0, sizeof(adapter));
 	dev->dev.driver_data = netdev;
 
 	adapter->vdev = dev;
@@ -1280,24 +1279,28 @@ const char * buf, size_t count)
 			int i;
 			/* Make sure there is a buffer pool with buffers that
 			   can hold a packet of the size of the MTU */
-			for(i = 0; i<IbmVethNumBufferPools; i++) {
+			for (i = 0; i < IbmVethNumBufferPools; i++) {
 				if (pool == &adapter->rx_buff_pool[i])
 					continue;
 				if (!adapter->rx_buff_pool[i].active)
 					continue;
-				if (mtu < adapter->rx_buff_pool[i].buff_size) {
-					pool->active = 0;
-					h_free_logical_lan_buffer(adapter->
-								  vdev->
-								  unit_address,
-								  pool->
-								  buff_size);
-				}
+				if (mtu <= adapter->rx_buff_pool[i].buff_size)
+					break;
 			}
-			if (pool->active) {
+
+			if (i == IbmVethNumBufferPools) {
 				ibmveth_error_printk("no active pool >= MTU\n");
 				return -EPERM;
 			}
+
+			pool->active = 0;
+			if (netif_running(netdev)) {
+				adapter->pool_config = 1;
+				ibmveth_close(netdev);
+				adapter->pool_config = 0;
+				if ((rc = ibmveth_open(netdev)))
+					return rc;
+			}
 		}
 	} else if (attr == &veth_num_attr) {
 		if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index bb69ccae8ac..72cc15a6cab 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -73,9 +73,6 @@ static inline long h_send_logical_lan(unsigned long unit_address,
 #define h_change_logical_lan_mac(ua, mac) \
   plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac)
 
-#define h_free_logical_lan_buffer(ua, bufsize) \
-  plpar_hcall_norets(H_FREE_LOGICAL_LAN_BUFFER, ua, bufsize)
-
 #define IbmVethNumBufferPools 5
 #define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
 #define IBMVETH_MAX_MTU 68
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index f71dab34766..e323efd4ed1 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -261,7 +261,7 @@ void phy_sanitize_settings(struct phy_device *phydev)
 
 	/* Sanitize settings based on PHY capabilities */
 	if ((features & SUPPORTED_Autoneg) == 0)
-		phydev->autoneg = 0;
+		phydev->autoneg = AUTONEG_DISABLE;
 
 	idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex),
 			features);
@@ -374,7 +374,7 @@ int phy_mii_ioctl(struct phy_device *phydev,
 		if (mii_data->phy_id == phydev->addr) {
 			switch(mii_data->reg_num) {
 			case MII_BMCR:
-				if (val & (BMCR_RESET|BMCR_ANENABLE))
+				if ((val & (BMCR_RESET|BMCR_ANENABLE)) == 0)
 					phydev->autoneg = AUTONEG_DISABLE;
 				else
 					phydev->autoneg = AUTONEG_ENABLE;
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index c9333b9dd51..b85ab4a8f2a 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -725,6 +725,12 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
 
 	auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
 
+	if (tp->mac_version == RTL_GIGA_MAC_VER_12) {
+		/* Vendor specific (0x1f) and reserved (0x0e) MII registers. */
+		mdio_write(ioaddr, 0x1f, 0x0000);
+		mdio_write(ioaddr, 0x0e, 0x0000);
+	}
+
 	tp->phy_auto_nego_reg = auto_nego;
 	tp->phy_1000_ctrl_reg = giga_ctrl;
 
@@ -2760,14 +2766,16 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 			rtl8169_check_link_status(dev, tp, ioaddr);
 
 #ifdef CONFIG_R8169_NAPI
-		RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event);
-		tp->intr_mask = ~tp->napi_event;
-
-		if (likely(netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
-		else if (netif_msg_intr(tp)) {
-			printk(KERN_INFO "%s: interrupt %04x taken in poll\n",
-			       dev->name, status);
+		if (status & tp->napi_event) {
+			RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event);
+			tp->intr_mask = ~tp->napi_event;
+
+			if (likely(netif_rx_schedule_prep(dev)))
+				__netif_rx_schedule(dev);
+			else if (netif_msg_intr(tp)) {
+				printk(KERN_INFO "%s: interrupt %04x in poll\n",
+				       dev->name, status);
+			}
 		}
 		break;
 #else
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index ec2ad9f0efa..d470b19c081 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -1593,6 +1593,9 @@ static int __devinit sis190_get_mac_addr_from_apc(struct pci_dev *pdev,
 		  pci_name(pdev));
 
 	isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0965, NULL);
+	if (!isa_bridge)
+		isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0966, NULL);
+
 	if (!isa_bridge) {
 		net_probe(tp, KERN_INFO "%s: Can not find ISA bridge.\n",
 			  pci_name(pdev));
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index f8429449dc1..6ff3a1627af 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -299,7 +299,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 
 #define SMC_CAN_USE_8BIT       1
 #define SMC_CAN_USE_16BIT      1
-#define SMC_CAN_USE_32BIT      1
+#define SMC_CAN_USE_32BIT      0
 
 #define SMC_inb(a, r)          inb((a) + (r))
 #define SMC_inw(a, r)          inw((a) + (r))
@@ -310,8 +310,6 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 
 #endif  /* BOARDS */
 
-#define set_irq_type(irq, type) do {} while (0)
-
 #elif   defined(CONFIG_M32R)
 
 #define SMC_CAN_USE_8BIT	0
diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c
index a8994c7b858..64bef7c1236 100644
--- a/drivers/net/ucc_geth_ethtool.c
+++ b/drivers/net/ucc_geth_ethtool.c
@@ -379,7 +379,6 @@ static const struct ethtool_ops uec_ethtool_ops = {
 	.get_stats_count        = uec_get_stats_count,
 	.get_strings            = uec_get_strings,
 	.get_ethtool_stats      = uec_get_ethtool_stats,
-	.get_perm_addr          = ethtool_op_get_perm_addr,
 };
 
 void uec_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c
index 5f8c2d30a32..6c257b88ce5 100644
--- a/drivers/net/ucc_geth_mii.c
+++ b/drivers/net/ucc_geth_mii.c
@@ -272,7 +272,8 @@ int __init uec_mdio_init(void)
 	return of_register_platform_driver(&uec_mdio_driver);
 }
 
-void __exit uec_mdio_exit(void)
+/* called from __init ucc_geth_init, therefore can not be __exit */
+void uec_mdio_exit(void)
 {
 	of_unregister_platform_driver(&uec_mdio_driver);
 }
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_phy.c b/drivers/net/wireless/bcm43xx/bcm43xx_phy.c
index d779199c30d..b37f1e34870 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_phy.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_phy.c
@@ -1638,7 +1638,7 @@ void bcm43xx_phy_set_baseband_attenuation(struct bcm43xx_private *bcm,
 		return;
 	}
 
-	if (phy->analog == 1) {
+	if (phy->analog > 1) {
 		value = bcm43xx_phy_read(bcm, 0x0060) & ~0x003C;
 		value |= (baseband_attenuation << 2) & 0x003C;
 	} else {
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index cea85894b7f..e61c6d5ba1a 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -466,7 +466,7 @@ static int rtl8187_add_interface(struct ieee80211_hw *dev,
 		return -EOPNOTSUPP;
 	}
 
-	priv->hwaddr = conf->mac_addr;
+	priv->hwaddr = conf->mac_addr ? conf->mac_addr : dev->wiphy->perm_addr;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index f6c487aa824..26869d107e5 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -822,7 +822,7 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs,
 		cs->control |= ZD_CS_MULTICAST;
 
 	/* PS-POLL */
-	if (stype == IEEE80211_STYPE_PSPOLL)
+	if (ftype == IEEE80211_FTYPE_CTL && stype == IEEE80211_STYPE_PSPOLL)
 		cs->control |= ZD_CS_PS_POLL_FRAME;
 
 	/* Unicast data frames over the threshold should have RTS */
diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c
index 0c16a2b39b4..2adf856e44c 100644
--- a/drivers/spi/spi_mpc83xx.c
+++ b/drivers/spi/spi_mpc83xx.c
@@ -86,7 +86,7 @@ struct mpc83xx_spi {
 
 	unsigned nsecs;		/* (clock cycle time)/2 */
 
-	u32 sysclk;
+	u32 spibrg;		/* SPIBRG input clock */
 	u32 rx_shift;		/* RX data reg shift when in qe mode */
 	u32 tx_shift;		/* TX data reg shift when in qe mode */
 
@@ -148,6 +148,8 @@ static void mpc83xx_spi_chipselect(struct spi_device *spi, int value)
 	if (value == BITBANG_CS_ACTIVE) {
 		u32 regval = mpc83xx_spi_read_reg(&mpc83xx_spi->base->mode);
 		u32 len = spi->bits_per_word;
+		u8 pm;
+
 		if (len == 32)
 			len = 0;
 		else
@@ -169,17 +171,20 @@ static void mpc83xx_spi_chipselect(struct spi_device *spi, int value)
 
 		regval |= SPMODE_LEN(len);
 
-		if ((mpc83xx_spi->sysclk / spi->max_speed_hz) >= 64) {
-			u8 pm = mpc83xx_spi->sysclk / (spi->max_speed_hz * 64);
+		if ((mpc83xx_spi->spibrg / spi->max_speed_hz) >= 64) {
+			pm = mpc83xx_spi->spibrg / (spi->max_speed_hz * 64) - 1;
 			if (pm > 0x0f) {
-				printk(KERN_WARNING "MPC83xx SPI: SPICLK can't be less then a SYSCLK/1024!\n"
-						"Requested SPICLK is %d Hz. Will use %d Hz instead.\n",
-						spi->max_speed_hz, mpc83xx_spi->sysclk / 1024);
+				dev_err(&spi->dev, "Requested speed is too "
+					"low: %d Hz. Will use %d Hz instead.\n",
+					spi->max_speed_hz,
+					mpc83xx_spi->spibrg / 1024);
 				pm = 0x0f;
 			}
 			regval |= SPMODE_PM(pm) | SPMODE_DIV16;
 		} else {
-			u8 pm = mpc83xx_spi->sysclk / (spi->max_speed_hz * 4);
+			pm = mpc83xx_spi->spibrg / (spi->max_speed_hz * 4);
+			if (pm)
+				pm--;
 			regval |= SPMODE_PM(pm);
 		}
 
@@ -429,13 +434,17 @@ static int __init mpc83xx_spi_probe(struct platform_device *dev)
 	mpc83xx_spi->bitbang.chipselect = mpc83xx_spi_chipselect;
 	mpc83xx_spi->bitbang.setup_transfer = mpc83xx_spi_setup_transfer;
 	mpc83xx_spi->bitbang.txrx_bufs = mpc83xx_spi_bufs;
-	mpc83xx_spi->sysclk = pdata->sysclk;
 	mpc83xx_spi->activate_cs = pdata->activate_cs;
 	mpc83xx_spi->deactivate_cs = pdata->deactivate_cs;
 	mpc83xx_spi->qe_mode = pdata->qe_mode;
 	mpc83xx_spi->get_rx = mpc83xx_spi_rx_buf_u8;
 	mpc83xx_spi->get_tx = mpc83xx_spi_tx_buf_u8;
 
+	if (mpc83xx_spi->qe_mode)
+		mpc83xx_spi->spibrg = pdata->sysclk / 2;
+	else
+		mpc83xx_spi->spibrg = pdata->sysclk;
+
 	mpc83xx_spi->rx_shift = 0;
 	mpc83xx_spi->tx_shift = 0;
 	if (mpc83xx_spi->qe_mode) {
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 630f781aeb1..c55459c592b 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -183,7 +183,9 @@ static int spidev_message(struct spidev_data *spidev,
 
 		if (u_tmp->rx_buf) {
 			k_tmp->rx_buf = buf;
-			if (!access_ok(VERIFY_WRITE, u_tmp->rx_buf, u_tmp->len))
+			if (!access_ok(VERIFY_WRITE, (u8 __user *)
+						(ptrdiff_t) u_tmp->rx_buf,
+						u_tmp->len))
 				goto done;
 		}
 		if (u_tmp->tx_buf) {
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index decfdc8eb9c..e58c87b3e3a 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -127,8 +127,20 @@ static int last_fb_vc = MAX_NR_CONSOLES - 1;
 static int fbcon_is_default = 1; 
 static int fbcon_has_exited;
 static int primary_device = -1;
+
+#ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
 static int map_override;
 
+static inline void fbcon_map_override(void)
+{
+	map_override = 1;
+}
+#else
+static inline void fbcon_map_override(void)
+{
+}
+#endif /* CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY */
+
 /* font data */
 static char fontname[40];
 
@@ -506,7 +518,7 @@ static int __init fb_console_setup(char *this_opt)
 						(options[j++]-'0') % FB_MAX;
 				}
 
-				map_override = 1;
+				fbcon_map_override();
 			}
 
 			return 1;
diff --git a/drivers/video/matrox/g450_pll.c b/drivers/video/matrox/g450_pll.c
index 7c76e079ca7..d42346e7fdd 100644
--- a/drivers/video/matrox/g450_pll.c
+++ b/drivers/video/matrox/g450_pll.c
@@ -331,16 +331,19 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
 					tmp |= M1064_XPIXCLKCTRL_PLL_UP;
 				}
 				matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp);
-#ifdef __powerpc__
-				/* This is necessary to avoid jitter on PowerPC
-				 * (OpenFirmware) systems, but apparently
-				 * introduces jitter, at least on a x86-64
-				 * using DVI.
-				 * A simple workaround is disable for non-PPC.
-				 */
-				matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL, 0);
-#endif /* __powerpc__ */
-				matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, xpwrctrl);
+				/* DVI PLL preferred for frequencies up to
+				   panel link max, standard PLL otherwise */
+				if (fout >= MINFO->max_pixel_clock_panellink)
+					tmp = 0;
+				else tmp =
+					M1064_XDVICLKCTRL_DVIDATAPATHSEL |
+					M1064_XDVICLKCTRL_C1DVICLKSEL |
+					M1064_XDVICLKCTRL_C1DVICLKEN |
+					M1064_XDVICLKCTRL_DVILOOPCTL |
+					M1064_XDVICLKCTRL_P1LOOPBWDTCTL;
+				matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL,tmp);
+				matroxfb_DAC_out(PMINFO M1064_XPWRCTRL,
+						 xpwrctrl);
 
 				matroxfb_DAC_unlock_irqrestore(flags);
 			}
diff --git a/drivers/video/matrox/matroxfb_DAC1064.h b/drivers/video/matrox/matroxfb_DAC1064.h
index df39c319373..7a98ce8043d 100644
--- a/drivers/video/matrox/matroxfb_DAC1064.h
+++ b/drivers/video/matrox/matroxfb_DAC1064.h
@@ -33,6 +33,21 @@ void DAC1064_global_restore(WPMINFO2);
 #define     M1064_XCURCTRL_3COLOR	0x01	/* transparent, 0, 1, 2 */
 #define     M1064_XCURCTRL_XGA		0x02	/* 0, 1, transparent, complement */
 #define     M1064_XCURCTRL_XWIN		0x03	/* transparent, transparent, 0, 1 */
+	/* drive DVI by standard(0)/DVI(1) PLL */
+	/* if set(1), C?DVICLKEN and C?DVICLKSEL must be set(1) */
+#define      M1064_XDVICLKCTRL_DVIDATAPATHSEL   0x01
+	/* drive CRTC1 by standard(0)/DVI(1) PLL */
+#define      M1064_XDVICLKCTRL_C1DVICLKSEL      0x02
+	/* drive CRTC2 by standard(0)/DVI(1) PLL */
+#define      M1064_XDVICLKCTRL_C2DVICLKSEL      0x04
+	/* pixel clock allowed to(0)/blocked from(1) driving CRTC1 */
+#define      M1064_XDVICLKCTRL_C1DVICLKEN       0x08
+	/* DVI PLL loop filter bandwidth selection bits */
+#define      M1064_XDVICLKCTRL_DVILOOPCTL       0x30
+	/* CRTC2 pixel clock allowed to(0)/blocked from(1) driving CRTC2 */
+#define      M1064_XDVICLKCTRL_C2DVICLKEN       0x40
+	/* P1PLL loop filter bandwith selection */
+#define      M1064_XDVICLKCTRL_P1LOOPBWDTCTL    0x80
 #define M1064_XCURCOL0RED	0x08
 #define M1064_XCURCOL0GREEN	0x09
 #define M1064_XCURCOL0BLUE	0x0A
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index d59577c8de8..f3107ad7e54 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -424,6 +424,7 @@ struct matrox_fb_info {
 		      } mmio;
 
 	unsigned int	max_pixel_clock;
+	unsigned int	max_pixel_clock_panellink;
 
 	struct matrox_switch*	hw_switch;
 
diff --git a/drivers/video/matrox/matroxfb_misc.c b/drivers/video/matrox/matroxfb_misc.c
index 5948e54b9ef..ab7fb50bc1d 100644
--- a/drivers/video/matrox/matroxfb_misc.c
+++ b/drivers/video/matrox/matroxfb_misc.c
@@ -658,6 +658,7 @@ static int parse_pins5(WPMINFO const struct matrox_bios* bd) {
 		MINFO->values.reg.mctlwtst_core = (MINFO->values.reg.mctlwtst & ~7) |
 		                                  wtst_xlat[MINFO->values.reg.mctlwtst & 7];
 	}
+	MINFO->max_pixel_clock_panellink = bd->pins[47] * 4000;
 	return 0;
 }
 
diff --git a/drivers/video/pvr2fb.c b/drivers/video/pvr2fb.c
index f9300266044..7d6c29800d1 100644
--- a/drivers/video/pvr2fb.c
+++ b/drivers/video/pvr2fb.c
@@ -94,6 +94,7 @@
 #define DISP_DIWCONF (DISP_BASE + 0xe8)
 #define DISP_DIWHSTRT (DISP_BASE + 0xec)
 #define DISP_DIWVSTRT (DISP_BASE + 0xf0)
+#define DISP_PIXDEPTH (DISP_BASE + 0x108)
 
 /* Pixel clocks, one for TV output, doubled for VGA output */
 #define TV_CLK 74239
@@ -143,6 +144,7 @@ static struct pvr2fb_par {
 	unsigned char is_lowres;	/* Is horizontal pixel-doubling enabled? */
 
 	unsigned long mmio_base;	/* MMIO base */
+	u32 palette[16];
 } *currentpar;
 
 static struct fb_info *fb_info;
@@ -599,6 +601,7 @@ static void pvr2_init_display(struct fb_info *info)
 
 	/* bits per pixel */
 	fb_writel(fb_readl(DISP_DIWMODE) | (--bytesperpixel << 2), DISP_DIWMODE);
+	fb_writel(bytesperpixel << 2, DISP_PIXDEPTH);
 
 	/* video enable, color sync, interlace,
 	 * hsync and vsync polarity (currently unused) */
@@ -790,7 +793,7 @@ static int __devinit pvr2fb_common_init(void)
 	fb_info->fbops		= &pvr2fb_ops;
 	fb_info->fix		= pvr2_fix;
 	fb_info->par		= currentpar;
-	fb_info->pseudo_palette	= (void *)(fb_info->par + 1);
+	fb_info->pseudo_palette	= currentpar->palette;
 	fb_info->flags		= FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
 
 	if (video_output == VO_VGA)
@@ -807,6 +810,8 @@ static int __devinit pvr2fb_common_init(void)
 
 	if (register_framebuffer(fb_info) < 0)
 		goto out_err;
+	/*Must write PIXDEPTH to register before anything is displayed - so force init */
+	pvr2_init_display(fb_info);
 
 	modememused = get_line_length(fb_info->var.xres_virtual,
 				      fb_info->var.bits_per_pixel);
@@ -1082,14 +1087,15 @@ static int __init pvr2fb_init(void)
 #endif
 	size = sizeof(struct fb_info) + sizeof(struct pvr2fb_par) + 16 * sizeof(u32);
 
-	fb_info = kzalloc(size, GFP_KERNEL);
+	fb_info = framebuffer_alloc(sizeof(struct pvr2fb_par), NULL);
+
 	if (!fb_info) {
 		printk(KERN_ERR "Failed to allocate memory for fb_info\n");
 		return -ENOMEM;
 	}
 
 
-	currentpar = (struct pvr2fb_par *)(fb_info + 1);
+	currentpar = fb_info->par;
 
 	for (i = 0; i < ARRAY_SIZE(board_driver); i++) {
 		struct pvr2_board *pvr_board = board_driver + i;
@@ -1102,7 +1108,7 @@ static int __init pvr2fb_init(void)
 		if (ret != 0) {
 			printk(KERN_ERR "pvr2fb: Failed init of %s device\n",
 				pvr_board->name);
-			kfree(fb_info);
+			framebuffer_release(fb_info);
 			break;
 		}
 	}
@@ -1126,7 +1132,7 @@ static void __exit pvr2fb_exit(void)
 #endif
 
 	unregister_framebuffer(fb_info);
-	kfree(fb_info);
+	framebuffer_release(fb_info);
 }
 
 module_init(pvr2fb_init);
diff --git a/drivers/video/stifb.c b/drivers/video/stifb.c
index c97709ecbad..e7c8db2eb49 100644
--- a/drivers/video/stifb.c
+++ b/drivers/video/stifb.c
@@ -1100,13 +1100,18 @@ stifb_init_fb(struct sti_struct *sti, int bpp_pref)
 	/* only supported cards are allowed */
 	switch (fb->id) {
 	case CRT_ID_VISUALIZE_EG:
-		/* look for a double buffering device like e.g. the 
-		   "INTERNAL_EG_DX1024" in the RDI precisionbook laptop
-		   which won't work. The same device in non-double 
-		   buffering mode returns "INTERNAL_EG_X1024". */
-		if (strstr(sti->outptr.dev_name, "EG_DX")) {
-		   printk(KERN_WARNING 
-			"stifb: ignoring '%s'. Disable double buffering in IPL menu.\n",
+		/* Visualize cards can run either in "double buffer" or
+ 		  "standard" mode. Depending on the mode, the card reports
+		  a different device name, e.g. "INTERNAL_EG_DX1024" in double
+		  buffer mode and "INTERNAL_EG_X1024" in standard mode.
+		  Since this driver only supports standard mode, we check
+		  if the device name contains the string "DX" and tell the
+		  user how to reconfigure the card. */
+		if (strstr(sti->outptr.dev_name, "DX")) {
+		   printk(KERN_WARNING "WARNING: stifb framebuffer driver does not "
+			"support '%s' in double-buffer mode.\n"
+			KERN_WARNING "WARNING: Please disable the double-buffer mode "
+			"in IPL menu (the PARISC-BIOS).\n",
 			sti->outptr.dev_name);
 		   goto out_err0;
 		}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 2bc1428d621..a6c9078af12 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -3161,12 +3161,9 @@ COMPATIBLE_IOCTL(SIOCSIWSENS)
 COMPATIBLE_IOCTL(SIOCGIWSENS)
 COMPATIBLE_IOCTL(SIOCSIWRANGE)
 COMPATIBLE_IOCTL(SIOCSIWPRIV)
-COMPATIBLE_IOCTL(SIOCGIWPRIV)
 COMPATIBLE_IOCTL(SIOCSIWSTATS)
-COMPATIBLE_IOCTL(SIOCGIWSTATS)
 COMPATIBLE_IOCTL(SIOCSIWAP)
 COMPATIBLE_IOCTL(SIOCGIWAP)
-COMPATIBLE_IOCTL(SIOCSIWSCAN)
 COMPATIBLE_IOCTL(SIOCSIWRATE)
 COMPATIBLE_IOCTL(SIOCGIWRATE)
 COMPATIBLE_IOCTL(SIOCSIWRTS)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 52bb2638f7a..6874785bb65 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -974,6 +974,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	dio->get_block = get_block;
 	dio->end_io = end_io;
 	dio->map_bh.b_private = NULL;
+	dio->map_bh.b_state = 0;
 	dio->final_block_in_bio = -1;
 	dio->next_block_for_io = -1;
 
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 0a50942b437..131954b3fb9 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -353,6 +353,10 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n");
 		goto out;
 	}
+	if (special_file(lower_inode->i_mode)) {
+		ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n");
+		goto out;
+	}
 	if (!nd) {
 		ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
 				"as we *think* we are about to unlink\n");
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e557a676692..a98497264fe 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -813,6 +813,15 @@ out:
 	return rc;
 }
 
+static void do_sysfs_unregistration(void)
+{
+	sysfs_remove_file(&ecryptfs_subsys.kobj,
+			  &sysfs_attr_version.attr);
+	sysfs_remove_file(&ecryptfs_subsys.kobj,
+			  &sysfs_attr_version_str.attr);
+	subsystem_unregister(&ecryptfs_subsys);
+}
+
 static int __init ecryptfs_init(void)
 {
 	int rc;
@@ -851,6 +860,9 @@ static int __init ecryptfs_init(void)
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
 				"initialize the eCryptfs netlink socket\n");
+		do_sysfs_unregistration();
+		unregister_filesystem(&ecryptfs_fs_type);
+		ecryptfs_free_kmem_caches();
 	}
 out:
 	return rc;
@@ -858,11 +870,7 @@ out:
 
 static void __exit ecryptfs_exit(void)
 {
-	sysfs_remove_file(&ecryptfs_subsys.kobj,
-			  &sysfs_attr_version.attr);
-	sysfs_remove_file(&ecryptfs_subsys.kobj,
-			  &sysfs_attr_version_str.attr);
-	subsystem_unregister(&ecryptfs_subsys);
+	do_sysfs_unregistration();
 	ecryptfs_release_messaging(ecryptfs_transport);
 	unregister_filesystem(&ecryptfs_fs_type);
 	ecryptfs_free_kmem_caches();
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 20ac403469a..c55a761c22b 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -20,10 +20,8 @@
 #include "delegation.h"
 #include "internal.h"
 
-static void nfs_free_delegation(struct nfs_delegation *delegation)
+static void nfs_do_free_delegation(struct nfs_delegation *delegation)
 {
-	if (delegation->cred)
-		put_rpccred(delegation->cred);
 	kfree(delegation);
 }
 
@@ -31,7 +29,18 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
 {
 	struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
 
-	nfs_free_delegation(delegation);
+	nfs_do_free_delegation(delegation);
+}
+
+static void nfs_free_delegation(struct nfs_delegation *delegation)
+{
+	struct rpc_cred *cred;
+
+	cred = rcu_dereference(delegation->cred);
+	rcu_assign_pointer(delegation->cred, NULL);
+	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+	if (cred)
+		put_rpccred(cred);
 }
 
 static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
@@ -166,7 +175,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
 	int res = 0;
 
 	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
-	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+	nfs_free_delegation(delegation);
 	return res;
 }
 
@@ -448,7 +457,7 @@ restart:
 		spin_unlock(&clp->cl_lock);
 		rcu_read_unlock();
 		if (delegation != NULL)
-			call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+			nfs_free_delegation(delegation);
 		goto restart;
 	}
 	rcu_read_unlock();
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bca6cdcb9f0..71a49c3acab 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -468,7 +468,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 		ctx->lockowner = current->files;
 		ctx->error = 0;
 		ctx->dir_cookie = 0;
-		kref_init(&ctx->kref);
+		atomic_set(&ctx->count, 1);
 	}
 	return ctx;
 }
@@ -476,21 +476,18 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 {
 	if (ctx != NULL)
-		kref_get(&ctx->kref);
+		atomic_inc(&ctx->count);
 	return ctx;
 }
 
-static void nfs_free_open_context(struct kref *kref)
+void put_nfs_open_context(struct nfs_open_context *ctx)
 {
-	struct nfs_open_context *ctx = container_of(kref,
-			struct nfs_open_context, kref);
+	struct inode *inode = ctx->path.dentry->d_inode;
 
-	if (!list_empty(&ctx->list)) {
-		struct inode *inode = ctx->path.dentry->d_inode;
-		spin_lock(&inode->i_lock);
-		list_del(&ctx->list);
-		spin_unlock(&inode->i_lock);
-	}
+	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
+		return;
+	list_del(&ctx->list);
+	spin_unlock(&inode->i_lock);
 	if (ctx->state != NULL)
 		nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
 	if (ctx->cred != NULL)
@@ -500,11 +497,6 @@ static void nfs_free_open_context(struct kref *kref)
 	kfree(ctx);
 }
 
-void put_nfs_open_context(struct nfs_open_context *ctx)
-{
-	kref_put(&ctx->kref, nfs_free_open_context);
-}
-
 /*
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 7f86e65182e..aea76d0e5fb 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -175,10 +175,8 @@ static void nfs_expire_automounts(struct work_struct *work)
 
 void nfs_release_automount_timer(void)
 {
-	if (list_empty(&nfs_automount_list)) {
-		cancel_delayed_work(&nfs_automount_task);
-		flush_scheduled_work();
-	}
+	if (list_empty(&nfs_automount_list))
+		cancel_delayed_work_sync(&nfs_automount_task);
 }
 
 /*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6ca2795ccd9..62b3ae28031 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -332,11 +332,9 @@ static int can_open_cached(struct nfs4_state *state, int mode)
 	switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
 		case FMODE_READ:
 			ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
-			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
 			break;
 		case FMODE_WRITE:
 			ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
-			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
 			break;
 		case FMODE_READ|FMODE_WRITE:
 			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
@@ -1260,7 +1258,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
-			nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
+			nfs_set_open_stateid(state, &calldata->res.stateid, 0);
 			renew_lease(server, calldata->timestamp);
 			break;
 		case -NFS4ERR_STALE_STATEID:
@@ -1286,23 +1284,19 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		.rpc_cred = state->owner->so_cred,
 	};
 	int clear_rd, clear_wr, clear_rdwr;
-	int mode;
 
 	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
 
-	mode = FMODE_READ|FMODE_WRITE;
 	clear_rd = clear_wr = clear_rdwr = 0;
 	spin_lock(&state->owner->so_lock);
 	/* Calculate the change in open mode */
 	if (state->n_rdwr == 0) {
 		if (state->n_rdonly == 0) {
-			mode &= ~FMODE_READ;
 			clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
 			clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
 		}
 		if (state->n_wronly == 0) {
-			mode &= ~FMODE_WRITE;
 			clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
 			clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
 		}
@@ -1314,9 +1308,13 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		return;
 	}
 	nfs_fattr_init(calldata->res.fattr);
-	if (mode != 0)
+	if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-	calldata->arg.open_flags = mode;
+		calldata->arg.open_flags = FMODE_READ;
+	} else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
+		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+		calldata->arg.open_flags = FMODE_WRITE;
+	}
 	calldata->timestamp = jiffies;
 	rpc_call_setup(task, &msg, 0);
 }
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 0505ca12403..3ea352d82eb 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -127,16 +127,15 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	flush_scheduled_work();
+	cancel_delayed_work(&server->nfs_client->cl_renewd);
 }
 
 void
 nfs4_kill_renewd(struct nfs_client *clp)
 {
 	down_read(&clp->cl_sem);
-	cancel_delayed_work(&clp->cl_renewd);
+	cancel_delayed_work_sync(&clp->cl_renewd);
 	up_read(&clp->cl_sem);
-	flush_scheduled_work();
 }
 
 /*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e9662ba81d8..3e4adf8c831 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -341,8 +341,6 @@ nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode)
 		else
 			list_move_tail(&state->open_states, &state->owner->so_states);
 	}
-	if (mode == 0)
-		list_del_init(&state->inode_states);
 	state->state = mode;
 }
 
@@ -415,8 +413,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
 	if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
 		return;
 	spin_lock(&inode->i_lock);
-	if (!list_empty(&state->inode_states))
-		list_del(&state->inode_states);
+	list_del(&state->inode_states);
 	list_del(&state->open_states);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&owner->so_lock);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f5e11f4fa95..4f517665c9a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3731,7 +3731,6 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 {
 	int status;
 	struct buffer_head *last_eb_bh = NULL;
-	struct buffer_head *bh = NULL;
 	struct ocfs2_insert_type insert = {0, };
 	struct ocfs2_extent_rec rec;
 
@@ -3783,9 +3782,6 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
 		ocfs2_extent_map_insert_rec(inode, &rec);
 
 bail:
-	if (bh)
-		brelse(bh);
-
 	if (last_eb_bh)
 		brelse(last_eb_bh);
 
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index f0bdfd944c4..685c18065c8 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -854,17 +854,25 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
 	struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
 	ssize_t ret;
 
-
-	mutex_lock(&sc->sc_send_lock);
-	ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
-					 virt_to_page(kmalloced_virt),
-					 (long)kmalloced_virt & ~PAGE_MASK,
-					 size, MSG_DONTWAIT);
-	mutex_unlock(&sc->sc_send_lock);
-	if (ret != size) {
+	while (1) {
+		mutex_lock(&sc->sc_send_lock);
+		ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
+						 virt_to_page(kmalloced_virt),
+						 (long)kmalloced_virt & ~PAGE_MASK,
+						 size, MSG_DONTWAIT);
+		mutex_unlock(&sc->sc_send_lock);
+		if (ret == size)
+			break;
+		if (ret == (ssize_t)-EAGAIN) {
+			mlog(0, "sendpage of size %zu to " SC_NODEF_FMT
+			     " returned EAGAIN\n", size, SC_NODEF_ARGS(sc));
+			cond_resched();
+			continue;
+		}
 		mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 
 		     " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
 		o2net_ensure_shutdown(nn, sc, 0);
+		break;
 	}
 }
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c4034f693e7..4ffa715be09 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -187,6 +187,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
 	int ret;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	handle_t *handle;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data;
 
 	mlog_entry_void();
 
@@ -197,11 +198,27 @@ int ocfs2_update_inode_atime(struct inode *inode,
 		goto out;
 	}
 
+	ret = ocfs2_journal_access(handle, inode, bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_commit;
+	}
+
+	/*
+	 * Don't use ocfs2_mark_inode_dirty() here as we don't always
+	 * have i_mutex to guard against concurrent changes to other
+	 * inode fields.
+	 */
 	inode->i_atime = CURRENT_TIME;
-	ret = ocfs2_mark_inode_dirty(handle, inode, bh);
+	di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
+	di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+
+	ret = ocfs2_journal_dirty(handle, bh);
 	if (ret < 0)
 		mlog_errno(ret);
 
+out_commit:
 	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out:
 	mlog_exit(ret);
@@ -1011,6 +1028,11 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (size_change && attr->ia_size != i_size_read(inode)) {
+		if (attr->ia_size > sb->s_maxbytes) {
+			status = -EFBIG;
+			goto bail_unlock;
+		}
+
 		if (i_size_read(inode) > attr->ia_size)
 			status = ocfs2_truncate_file(inode, bh, attr->ia_size);
 		else
@@ -1516,7 +1538,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct buffer_head *di_bh = NULL;
 	handle_t *handle;
-	unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits);
+	unsigned long long max_off = inode->i_sb->s_maxbytes;
 
 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
 		return -EROFS;
@@ -1942,7 +1964,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
 		}
 
 		dst = kmap_atomic(page, KM_USER0);
-		memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
+		memcpy(dst + (pos & (loff_t)(PAGE_CACHE_SIZE - 1)), buf, bytes);
 		kunmap_atomic(dst, KM_USER0);
 		flush_dcache_page(page);
 		ocfs2_put_write_source(user_page);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d430fdab16e..701e6d04ed5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1080,6 +1080,7 @@ static int ocfs2_rename(struct inode *old_dir,
 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
 						    // this is the 1st dirent bh
 	nlink_t old_dir_nlink = old_dir->i_nlink;
+	struct ocfs2_dinode *old_di;
 
 	/* At some point it might be nice to break this function up a
 	 * bit. */
@@ -1354,7 +1355,20 @@ static int ocfs2_rename(struct inode *old_dir,
 
 	old_inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(old_inode);
-	ocfs2_mark_inode_dirty(handle, old_inode, old_inode_bh);
+
+	status = ocfs2_journal_access(handle, old_inode, old_inode_bh,
+				      OCFS2_JOURNAL_ACCESS_WRITE);
+	if (status >= 0) {
+		old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
+
+		old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
+		old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
+
+		status = ocfs2_journal_dirty(handle, old_inode_bh);
+		if (status < 0)
+			mlog_errno(status);
+	} else
+		mlog_errno(status);
 
 	/* now that the name has been added to new_dir, remove the old name */
 	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 5cc90a40b3c..58307853fb4 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -494,16 +494,16 @@ static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb,
 /*
  * Find the 1st page index which covers the given clusters.
  */
-static inline unsigned long ocfs2_align_clusters_to_page_index(struct super_block *sb,
+static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb,
 							u32 clusters)
 {
 	unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
-	unsigned long index = clusters;
+        pgoff_t index = clusters;
 
 	if (PAGE_CACHE_SHIFT > cbits) {
-		index = clusters >> (PAGE_CACHE_SHIFT - cbits);
+		index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits);
 	} else if (PAGE_CACHE_SHIFT < cbits) {
-		index = clusters << (cbits - PAGE_CACHE_SHIFT);
+		index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT);
 	}
 
 	return index;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 200c7d4790d..f2fc9a795de 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -316,39 +316,51 @@ static void ocfs2_destroy_inode(struct inode *inode)
 	kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
 }
 
-/* From xfs_super.c:xfs_max_file_offset
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.
- */
-unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
+static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
+						unsigned int cbits)
 {
-	unsigned int pagefactor = 1;
-	unsigned int bitshift = BITS_PER_LONG - 1;
-
-	/* Figure out maximum filesize, on Linux this can depend on
-	 * the filesystem blocksize (on 32 bit platforms).
-	 * __block_prepare_write does this in an [unsigned] long...
-	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
-	 * So, for page sized blocks (4K on 32 bit platforms),
-	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
-	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
-	 * but for smaller blocksizes it is less (bbits = log2 bsize).
-	 * Note1: get_block_t takes a long (implicit cast from above)
-	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
-	 * can optionally convert the [unsigned] long from above into
-	 * an [unsigned] long long.
+	unsigned int bytes = 1 << cbits;
+	unsigned int trim = bytes;
+	unsigned int bitshift = 32;
+
+	/*
+	 * i_size and all block offsets in ocfs2 are always 64 bits
+	 * wide. i_clusters is 32 bits, in cluster-sized units. So on
+	 * 64 bit platforms, cluster size will be the limiting factor.
 	 */
 
 #if BITS_PER_LONG == 32
 # if defined(CONFIG_LBD)
 	BUILD_BUG_ON(sizeof(sector_t) != 8);
-	pagefactor = PAGE_CACHE_SIZE;
-	bitshift = BITS_PER_LONG;
+	/*
+	 * We might be limited by page cache size.
+	 */
+	if (bytes > PAGE_CACHE_SIZE) {
+		bytes = PAGE_CACHE_SIZE;
+		trim = 1;
+		/*
+		 * Shift by 31 here so that we don't get larger than
+		 * MAX_LFS_FILESIZE
+		 */
+		bitshift = 31;
+	}
 # else
-	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+	/*
+	 * We are limited by the size of sector_t. Use block size, as
+	 * that's what we expose to the VFS.
+	 */
+	bytes = 1 << bbits;
+	trim = 1;
+	bitshift = 31;
 # endif
 #endif
 
-	return (((unsigned long long)pagefactor) << bitshift) - 1;
+	/*
+	 * Trim by a whole cluster when we can actually approach the
+	 * on-disk limits. Otherwise we can overflow i_clusters when
+	 * an extent start is at the max offset.
+	 */
+	return (((unsigned long long)bytes) << bitshift) - trim;
 }
 
 static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
@@ -1259,8 +1271,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
 				  int sector_size)
 {
 	int status = 0;
-	int i;
-	struct ocfs2_dinode *di = NULL;
+	int i, cbits, bbits;
+	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
 	struct inode *inode = NULL;
 	struct buffer_head *bitmap_bh = NULL;
 	struct ocfs2_journal *journal;
@@ -1279,9 +1291,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	sb->s_fs_info = osb;
 	sb->s_op = &ocfs2_sops;
 	sb->s_export_op = &ocfs2_export_ops;
+	sb->s_time_gran = 1;
 	sb->s_flags |= MS_NOATIME;
 	/* this is needed to support O_LARGEFILE */
-	sb->s_maxbytes = ocfs2_max_file_offset(sb->s_blocksize_bits);
+	cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
+	bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
+	sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
 
 	osb->sb = sb;
 	/* Save off for ocfs2_rw_direct */
@@ -1341,8 +1356,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
 		goto bail;
 	}
 
-	di = (struct ocfs2_dinode *)bh->b_data;
-
 	osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
 	if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
 		mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index 3b9cb3d0b00..783f5270f2a 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -45,6 +45,4 @@ void __ocfs2_abort(struct super_block *sb,
 
 #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
 
-unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
-
 #endif /* OCFS2_SUPER_H */
diff --git a/include/asm-alpha/fcntl.h b/include/asm-alpha/fcntl.h
index 87f2cf459e2..25da0017ec8 100644
--- a/include/asm-alpha/fcntl.h
+++ b/include/asm-alpha/fcntl.h
@@ -16,6 +16,7 @@
 #define O_LARGEFILE	0400000 /* will be set by the kernel on every open */
 #define O_DIRECT	02000000 /* direct disk access - should check with OSF/1 */
 #define O_NOATIME	04000000
+#define O_CLOEXEC	010000000 /* set close_on_exec */
 
 #define F_GETLK		7
 #define F_SETLK		8
diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h
index 7306c71a892..cd84f1771e3 100644
--- a/include/asm-frv/unistd.h
+++ b/include/asm-frv/unistd.h
@@ -330,10 +330,11 @@
 #define __NR_signalfd		321
 #define __NR_timerfd		322
 #define __NR_eventfd		323
+#define __NR_fallocate		324
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 324
+#define NR_syscalls 325
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 /* #define __ARCH_WANT_OLD_READDIR */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f605e8d0eed..5f0d797d33f 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -2,6 +2,7 @@
 #define _ASM_GENERIC_PGTABLE_H
 
 #ifndef __ASSEMBLY__
+#ifdef CONFIG_MMU
 
 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 /*
@@ -133,41 +134,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #endif
 
 /*
- * A facility to provide lazy MMU batching.  This allows PTE updates and
- * page invalidations to be delayed until a call to leave lazy MMU mode
- * is issued.  Some architectures may benefit from doing this, and it is
- * beneficial for both shadow and direct mode hypervisors, which may batch
- * the PTE updates which happen during this window.  Note that using this
- * interface requires that read hazards be removed from the code.  A read
- * hazard could result in the direct mode hypervisor case, since the actual
- * write to the page tables may not yet have taken place, so reads though
- * a raw PTE pointer after it has been modified are not guaranteed to be
- * up to date.  This mode can only be entered and left under the protection of
- * the page table locks for all page tables which may be modified.  In the UP
- * case, this is required so that preemption is disabled, and in the SMP case,
- * it must synchronize the delayed page table writes properly on other CPUs.
- */
-#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-#define arch_enter_lazy_mmu_mode()	do {} while (0)
-#define arch_leave_lazy_mmu_mode()	do {} while (0)
-#define arch_flush_lazy_mmu_mode()	do {} while (0)
-#endif
-
-/*
- * A facility to provide batching of the reload of page tables with the
- * actual context switch code for paravirtualized guests.  By convention,
- * only one of the lazy modes (CPU, MMU) should be active at any given
- * time, entry should never be nested, and entry and exits should always
- * be paired.  This is for sanity of maintaining and reasoning about the
- * kernel code.
- */
-#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-#define arch_enter_lazy_cpu_mode()	do {} while (0)
-#define arch_leave_lazy_cpu_mode()	do {} while (0)
-#define arch_flush_lazy_cpu_mode()	do {} while (0)
-#endif
-
-/*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
  * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
@@ -233,6 +199,43 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd)
 	}
 	return 0;
 }
+#endif /* CONFIG_MMU */
+
+/*
+ * A facility to provide lazy MMU batching.  This allows PTE updates and
+ * page invalidations to be delayed until a call to leave lazy MMU mode
+ * is issued.  Some architectures may benefit from doing this, and it is
+ * beneficial for both shadow and direct mode hypervisors, which may batch
+ * the PTE updates which happen during this window.  Note that using this
+ * interface requires that read hazards be removed from the code.  A read
+ * hazard could result in the direct mode hypervisor case, since the actual
+ * write to the page tables may not yet have taken place, so reads though
+ * a raw PTE pointer after it has been modified are not guaranteed to be
+ * up to date.  This mode can only be entered and left under the protection of
+ * the page table locks for all page tables which may be modified.  In the UP
+ * case, this is required so that preemption is disabled, and in the SMP case,
+ * it must synchronize the delayed page table writes properly on other CPUs.
+ */
+#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+#define arch_enter_lazy_mmu_mode()	do {} while (0)
+#define arch_leave_lazy_mmu_mode()	do {} while (0)
+#define arch_flush_lazy_mmu_mode()	do {} while (0)
+#endif
+
+/*
+ * A facility to provide batching of the reload of page tables with the
+ * actual context switch code for paravirtualized guests.  By convention,
+ * only one of the lazy modes (CPU, MMU) should be active at any given
+ * time, entry should never be nested, and entry and exits should always
+ * be paired.  This is for sanity of maintaining and reasoning about the
+ * kernel code.
+ */
+#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
+#define arch_enter_lazy_cpu_mode()	do {} while (0)
+#define arch_leave_lazy_cpu_mode()	do {} while (0)
+#define arch_flush_lazy_cpu_mode()	do {} while (0)
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index 1e8f6f252dd..4091b33dcb1 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -116,6 +116,8 @@ extern void enable_NMI_through_LVT0 (void * dummy);
 extern int timer_over_8254;
 extern int local_apic_timer_c2_ok;
 
+extern int local_apic_timer_disabled;
+
 #else /* !CONFIG_X86_LOCAL_APIC */
 static inline void lapic_shutdown(void) { }
 
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index c961c03cf1e..7b3aa28ebc6 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -79,7 +79,7 @@
 #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
 #define X86_FEATURE_PEBS	(3*32+12)  /* Precise-Event Based Sampling */
 #define X86_FEATURE_BTS		(3*32+13)  /* Branch Trace Store */
-#define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 */
+/* 14 free */
 #define X86_FEATURE_SYNC_RDTSC	(3*32+15)  /* RDTSC synchronizes the CPU */
 #define X86_FEATURE_REP_GOOD   (3*32+16) /* rep microcode works well on this CPU */
 
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 7df88be2dd9..9fa3fa9e62d 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -47,7 +47,8 @@ struct paravirt_ops
 	 * The patch function should return the number of bytes of code
 	 * generated, as we nop pad the rest in generic code.
 	 */
-	unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len);
+	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
+			  unsigned long addr, unsigned len);
 
 	/* Basic arch-specific setup */
 	void (*arch_setup)(void);
@@ -253,13 +254,16 @@ extern struct paravirt_ops paravirt_ops;
 
 unsigned paravirt_patch_nop(void);
 unsigned paravirt_patch_ignore(unsigned len);
-unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
-			     void *site, u16 site_clobbers,
+unsigned paravirt_patch_call(void *insnbuf,
+			     const void *target, u16 tgt_clobbers,
+			     unsigned long addr, u16 site_clobbers,
 			     unsigned len);
-unsigned paravirt_patch_jmp(void *target, void *site, unsigned len);
-unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len);
+unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
+			    unsigned long addr, unsigned len);
+unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
+				unsigned long addr, unsigned len);
 
-unsigned paravirt_patch_insns(void *site, unsigned len,
+unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 			      const char *start, const char *end);
 
 int paravirt_disable_iospace(void);
diff --git a/include/asm-i386/pci.h b/include/asm-i386/pci.h
index d790343e998..4fcacc71138 100644
--- a/include/asm-i386/pci.h
+++ b/include/asm-i386/pci.h
@@ -8,6 +8,9 @@ struct pci_sysdata {
 	int		node;		/* NUMA node */
 };
 
+/* scan a bus after allocating a pci_sysdata for it */
+extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
+
 #include <linux/mm.h>		/* for struct page */
 
 /* Can be used to override the logic in pci_scan_bus for skipping
diff --git a/include/asm-sparc/prom.h b/include/asm-sparc/prom.h
index 350676c589f..71f2a199832 100644
--- a/include/asm-sparc/prom.h
+++ b/include/asm-sparc/prom.h
@@ -67,6 +67,7 @@ extern int of_set_property(struct device_node *node, const char *name, void *val
 extern int of_getintprop_default(struct device_node *np,
 				 const char *name,
 				 int def);
+extern int of_find_in_proplist(const char *list, const char *match, int len);
 
 extern void prom_build_devicetree(void);
 
diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h
index 3f23c5dc5f2..86dc5c018a1 100644
--- a/include/asm-sparc64/oplib.h
+++ b/include/asm-sparc64/oplib.h
@@ -1,8 +1,7 @@
-/* $Id: oplib.h,v 1.14 2001/12/19 00:29:51 davem Exp $
- * oplib.h:  Describes the interface and available routines in the
+/* oplib.h:  Describes the interface and available routines in the
  *           Linux Prom library.
  *
- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
@@ -31,8 +30,10 @@ extern int prom_chosen_node;
 extern const char prom_peer_name[];
 extern const char prom_compatible_name[];
 extern const char prom_root_compatible[];
+extern const char prom_cpu_compatible[];
 extern const char prom_finddev_name[];
 extern const char prom_chosen_path[];
+extern const char prom_cpu_path[];
 extern const char prom_getprop_name[];
 extern const char prom_mmu_name[];
 extern const char prom_callmethod_name[];
diff --git a/include/asm-sparc64/prom.h b/include/asm-sparc64/prom.h
index 31dcb92fbae..07843f9f05d 100644
--- a/include/asm-sparc64/prom.h
+++ b/include/asm-sparc64/prom.h
@@ -76,6 +76,7 @@ extern int of_set_property(struct device_node *node, const char *name, void *val
 extern int of_getintprop_default(struct device_node *np,
 				 const char *name,
 				 int def);
+extern int of_find_in_proplist(const char *list, const char *match, int len);
 
 extern void prom_build_devicetree(void);
 
diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h
index 23ad8a7987a..cf7807813e8 100644
--- a/include/asm-sparc64/spitfire.h
+++ b/include/asm-sparc64/spitfire.h
@@ -38,6 +38,11 @@
 
 #define L1DCACHE_SIZE		0x4000
 
+#define SUN4V_CHIP_INVALID	0x00
+#define SUN4V_CHIP_NIAGARA1	0x01
+#define SUN4V_CHIP_NIAGARA2	0x02
+#define SUN4V_CHIP_UNKNOWN	0xff
+
 #ifndef __ASSEMBLY__
 
 enum ultra_tlb_layout {
@@ -49,6 +54,8 @@ enum ultra_tlb_layout {
 
 extern enum ultra_tlb_layout tlb_type;
 
+extern int sun4v_chip_type;
+
 extern int cheetah_pcache_forced_on;
 extern void cheetah_enable_pcache(void);
 
diff --git a/include/asm-sparc64/xor.h b/include/asm-sparc64/xor.h
index 8ce3f1813e2..a0233884fc9 100644
--- a/include/asm-sparc64/xor.h
+++ b/include/asm-sparc64/xor.h
@@ -63,4 +63,8 @@ static struct xor_block_template xor_block_niagara = {
 
 /* For VIS for everything except Niagara.  */
 #define XOR_SELECT_TEMPLATE(FASTEST) \
-	(tlb_type == hypervisor ? &xor_block_niagara : &xor_block_VIS)
+	((tlb_type == hypervisor && \
+	  (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \
+	   sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \
+	 &xor_block_niagara : \
+	 &xor_block_VIS)
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
index 88926eb44f5..5da8cb0c059 100644
--- a/include/asm-x86_64/pci.h
+++ b/include/asm-x86_64/pci.h
@@ -10,6 +10,8 @@ struct pci_sysdata {
 	void*		iommu;		/* IOMMU private data */
 };
 
+extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
+
 #ifdef CONFIG_CALGARY_IOMMU
 static inline void* pci_iommu(struct pci_bus *bus)
 {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4d85262b4fa..1ddef34f43c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -24,6 +24,8 @@
 #include <linux/mempool.h>
 #include <linux/ioprio.h>
 
+#ifdef CONFIG_BLOCK
+
 /* Platforms may set this to teach the BIO layer about IOMMU hardware. */
 #include <asm/io.h>
 
@@ -361,4 +363,5 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
 	__bio_kmap_irq((bio), (bio)->bi_idx, (flags))
 #define bio_kunmap_irq(buf,flags)	__bio_kunmap_irq(buf, flags)
 
+#endif /* CONFIG_BLOCK */
 #endif /* __LINUX_BIO_H */
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 90874a5d7d7..7b5d56b82b5 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -105,7 +105,7 @@ struct blk_io_trace {
  */
 struct blk_io_trace_remap {
 	__be32 device;
-	u32 __pad;
+	__be32 device_from;
 	__be64 sector;
 };
 
@@ -272,6 +272,7 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 		return;
 
 	r.device = cpu_to_be32(dev);
+	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
 	r.sector = cpu_to_be64(to);
 
 	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index e0bd46eb241..def5a659b8a 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -123,7 +123,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old,
 extern void clockevents_set_mode(struct clock_event_device *dev,
 				 enum clock_event_mode mode);
 extern int clockevents_register_notifier(struct notifier_block *nb);
-extern void clockevents_unregister_notifier(struct notifier_block *nb);
 extern int clockevents_program_event(struct clock_event_device *dev,
 				     ktime_t expires, ktime_t now);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6bf13956294..16421f662a7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1659,7 +1659,6 @@ extern int sb_min_blocksize(struct super_block *, int);
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
-extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
diff --git a/include/linux/init.h b/include/linux/init.h
index 1a4a283d19a..74b1f43bf98 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -43,7 +43,7 @@
 #define __init		__attribute__ ((__section__ (".init.text"))) __cold
 #define __initdata	__attribute__ ((__section__ (".init.data")))
 #define __exitdata	__attribute__ ((__section__(".exit.data")))
-#define __exit_call	__attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) __cold
+#define __exit_call	__attribute_used__ __attribute__ ((__section__ (".exitcall.exit")))
 
 /* modpost check for section mismatches during the kernel build.
  * A section mismatch happens when there are references from a
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4300bb462d2..f592df74b3c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -224,9 +224,9 @@ extern void hex_dump_to_buffer(const void *buf, size_t len,
 				char *linebuf, size_t linebuflen, bool ascii);
 extern void print_hex_dump(const char *level, const char *prefix_str,
 				int prefix_type, int rowsize, int groupsize,
-				void *buf, size_t len, bool ascii);
+				const void *buf, size_t len, bool ascii);
 extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-			void *buf, size_t len);
+			const void *buf, size_t len);
 #define hex_asc(x)	"0123456789abcdef"[x]
 
 #ifdef DEBUG
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9ba4aec37c5..157dcb055b5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -71,7 +71,7 @@ struct nfs_access_entry {
 
 struct nfs4_state;
 struct nfs_open_context {
-	struct kref kref;
+	atomic_t count;
 	struct path path;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 28e3664fdf1..cd13a78c5db 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -75,7 +75,6 @@ struct proc_dir_entry {
 	write_proc_t *write_proc;
 	atomic_t count;		/* use count */
 	int deleted;		/* delete flag */
-	void *set;
 	int pde_users;	/* number of callers into module in progress */
 	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
 	struct completion *pde_unload_completion;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c6b7485eac7..fe17d7d750c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -281,7 +281,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head,
 extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
 				void (*func)(struct rcu_head *head)));
 extern void synchronize_rcu(void);
-void synchronize_idle(void);
 extern void rcu_barrier(void);
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17249fae501..682ef87da6e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -139,7 +139,7 @@ struct cfs_rq;
 extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
 extern void proc_sched_set_task(struct task_struct *p);
 extern void
-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now);
+print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
 #else
 static inline void
 proc_sched_show_task(struct task_struct *p, struct seq_file *m)
@@ -149,7 +149,7 @@ static inline void proc_sched_set_task(struct task_struct *p)
 {
 }
 static inline void
-print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
+print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
 }
 #endif
@@ -855,26 +855,24 @@ struct sched_domain;
 struct sched_class {
 	struct sched_class *next;
 
-	void (*enqueue_task) (struct rq *rq, struct task_struct *p,
-			      int wakeup, u64 now);
-	void (*dequeue_task) (struct rq *rq, struct task_struct *p,
-			      int sleep, u64 now);
+	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
+	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
 	void (*yield_task) (struct rq *rq, struct task_struct *p);
 
 	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
 
-	struct task_struct * (*pick_next_task) (struct rq *rq, u64 now);
-	void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now);
+	struct task_struct * (*pick_next_task) (struct rq *rq);
+	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
-	int (*load_balance) (struct rq *this_rq, int this_cpu,
+	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
 			struct rq *busiest,
 			unsigned long max_nr_move, unsigned long max_load_move,
 			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *all_pinned, unsigned long *total_load_moved);
+			int *all_pinned, int *this_best_prio);
 
 	void (*set_curr_task) (struct rq *rq);
 	void (*task_tick) (struct rq *rq, struct task_struct *p);
-	void (*task_new) (struct rq *rq, struct task_struct *p, u64 now);
+	void (*task_new) (struct rq *rq, struct task_struct *p);
 };
 
 struct load_weight {
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 691a1748d9d..6570719eafd 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -274,6 +274,8 @@ struct tty_struct {
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
 #define TTY_HUPPED 		18	/* Post driver->hangup() */
+#define TTY_FLUSHING		19	/* Flushing to ldisc in progress */
+#define TTY_FLUSHPENDING	20	/* Queued buffer flush pending */
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
 
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 7a671603fca..9bf059817ae 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -21,4 +21,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
 extern int nf_conntrack_ipv4_compat_init(void);
 extern void nf_conntrack_ipv4_compat_fini(void);
 
+extern void need_ipv4_conntrack(void);
+
 #endif /*_NF_CONNTRACK_IPV4_H*/
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a777d376141..3401293359e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1992,19 +1992,19 @@ int __audit_signal_info(int sig, struct task_struct *t)
 	extern uid_t audit_sig_uid;
 	extern u32 audit_sig_sid;
 
-	if (audit_pid && t->tgid == audit_pid &&
-	    (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1)) {
-		audit_sig_pid = tsk->pid;
-		if (ctx)
-			audit_sig_uid = ctx->loginuid;
-		else
-			audit_sig_uid = tsk->uid;
-		selinux_get_task_sid(tsk, &audit_sig_sid);
+	if (audit_pid && t->tgid == audit_pid) {
+		if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
+			audit_sig_pid = tsk->pid;
+			if (ctx)
+				audit_sig_uid = ctx->loginuid;
+			else
+				audit_sig_uid = tsk->uid;
+			selinux_get_task_sid(tsk, &audit_sig_sid);
+		}
+		if (!audit_signals || audit_dummy_context())
+			return 0;
 	}
 
-	if (!audit_signals) /* audit_context checked in wrapper */
-		return 0;
-
 	/* optimize the common case by putting first signal recipient directly
 	 * in audit_context */
 	if (!ctx->target_pid) {
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index c3827274688..5bfeaed7e48 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -62,15 +62,6 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
 	 */
 	desc->chip->enable(irq);
 
-	/*
-	 * Temporary hack to figure out more about the problem, which
-	 * is causing the ancient network cards to die.
-	 */
-	if (desc->handle_irq != handle_edge_irq) {
-		WARN_ON_ONCE(1);
-		return;
-	}
-
 	if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
 		desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY;
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3e9f513a728..4b8a4493c54 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1063,6 +1063,11 @@ EXPORT_SYMBOL_GPL(register_kprobe);
 EXPORT_SYMBOL_GPL(unregister_kprobe);
 EXPORT_SYMBOL_GPL(register_jprobe);
 EXPORT_SYMBOL_GPL(unregister_jprobe);
+#ifdef CONFIG_KPROBES
 EXPORT_SYMBOL_GPL(jprobe_return);
+#endif
+
+#ifdef CONFIG_KPROBES
 EXPORT_SYMBOL_GPL(register_kretprobe);
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
+#endif
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index a3b7854b8f7..a686590d88c 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -709,7 +709,8 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
 				region->end_pfn << PAGE_SHIFT);
 
 		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
-			memory_bm_set_bit(bm, pfn);
+			if (pfn_valid(pfn))
+				memory_bm_set_bit(bm, pfn);
 	}
 }
 
diff --git a/kernel/profile.c b/kernel/profile.c
index 5b20fe977be..cb1e37d2dac 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -199,11 +199,11 @@ EXPORT_SYMBOL_GPL(register_timer_hook);
 EXPORT_SYMBOL_GPL(unregister_timer_hook);
 EXPORT_SYMBOL_GPL(task_handoff_register);
 EXPORT_SYMBOL_GPL(task_handoff_unregister);
+EXPORT_SYMBOL_GPL(profile_event_register);
+EXPORT_SYMBOL_GPL(profile_event_unregister);
 
 #endif /* CONFIG_PROFILING */
 
-EXPORT_SYMBOL_GPL(profile_event_register);
-EXPORT_SYMBOL_GPL(profile_event_unregister);
 
 #ifdef CONFIG_SMP
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 72bb9483d94..6247e4a8350 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -263,6 +263,7 @@ struct rq {
 
 	unsigned int clock_warps, clock_overflows;
 	unsigned int clock_unstable_events;
+	u64 tick_timestamp;
 
 	atomic_t nr_iowait;
 
@@ -318,15 +319,19 @@ static inline int cpu_of(struct rq *rq)
 }
 
 /*
- * Per-runqueue clock, as finegrained as the platform can give us:
+ * Update the per-runqueue clock, as finegrained as the platform can give
+ * us, but without assuming monotonicity, etc.:
  */
-static unsigned long long __rq_clock(struct rq *rq)
+static void __update_rq_clock(struct rq *rq)
 {
 	u64 prev_raw = rq->prev_clock_raw;
 	u64 now = sched_clock();
 	s64 delta = now - prev_raw;
 	u64 clock = rq->clock;
 
+#ifdef CONFIG_SCHED_DEBUG
+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+#endif
 	/*
 	 * Protect against sched_clock() occasionally going backwards:
 	 */
@@ -337,8 +342,11 @@ static unsigned long long __rq_clock(struct rq *rq)
 		/*
 		 * Catch too large forward jumps too:
 		 */
-		if (unlikely(delta > 2*TICK_NSEC)) {
-			clock++;
+		if (unlikely(clock + delta > rq->tick_timestamp + TICK_NSEC)) {
+			if (clock < rq->tick_timestamp + TICK_NSEC)
+				clock = rq->tick_timestamp + TICK_NSEC;
+			else
+				clock++;
 			rq->clock_overflows++;
 		} else {
 			if (unlikely(delta > rq->clock_max_delta))
@@ -349,18 +357,12 @@ static unsigned long long __rq_clock(struct rq *rq)
 
 	rq->prev_clock_raw = now;
 	rq->clock = clock;
-
-	return clock;
 }
 
-static inline unsigned long long rq_clock(struct rq *rq)
+static void update_rq_clock(struct rq *rq)
 {
-	int this_cpu = smp_processor_id();
-
-	if (this_cpu == cpu_of(rq))
-		return __rq_clock(rq);
-
-	return rq->clock;
+	if (likely(smp_processor_id() == cpu_of(rq)))
+		__update_rq_clock(rq);
 }
 
 /*
@@ -386,9 +388,12 @@ unsigned long long cpu_clock(int cpu)
 {
 	unsigned long long now;
 	unsigned long flags;
+	struct rq *rq;
 
 	local_irq_save(flags);
-	now = rq_clock(cpu_rq(cpu));
+	rq = cpu_rq(cpu);
+	update_rq_clock(rq);
+	now = rq->clock;
 	local_irq_restore(flags);
 
 	return now;
@@ -637,6 +642,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
 
 #define WMULT_SHIFT	32
 
+/*
+ * Shift right and round:
+ */
+#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+
 static unsigned long
 calc_delta_mine(unsigned long delta_exec, unsigned long weight,
 		struct load_weight *lw)
@@ -644,18 +654,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
 	u64 tmp;
 
 	if (unlikely(!lw->inv_weight))
-		lw->inv_weight = WMULT_CONST / lw->weight;
+		lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1;
 
 	tmp = (u64)delta_exec * weight;
 	/*
 	 * Check whether we'd overflow the 64-bit multiplication:
 	 */
-	if (unlikely(tmp > WMULT_CONST)) {
-		tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight)
-				>> (WMULT_SHIFT/2);
-	} else {
-		tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT;
-	}
+	if (unlikely(tmp > WMULT_CONST))
+		tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
+			WMULT_SHIFT/2);
+	else
+		tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT);
 
 	return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
 }
@@ -703,11 +712,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec)
  * the relative distance between them is ~25%.)
  */
 static const int prio_to_weight[40] = {
-/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921,
-/* -10 */  9537,  7629,  6103,  4883,  3906,  3125,  2500,  2000,  1600,  1280,
-/*   0 */  NICE_0_LOAD /* 1024 */,
-/*   1 */          819,   655,   524,   419,   336,   268,   215,   172,   137,
-/*  10 */   110,    87,    70,    56,    45,    36,    29,    23,    18,    15,
+ /* -20 */     88761,     71755,     56483,     46273,     36291,
+ /* -15 */     29154,     23254,     18705,     14949,     11916,
+ /* -10 */      9548,      7620,      6100,      4904,      3906,
+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
+ /*   0 */      1024,       820,       655,       526,       423,
+ /*   5 */       335,       272,       215,       172,       137,
+ /*  10 */       110,        87,        70,        56,        45,
+ /*  15 */        36,        29,        23,        18,        15,
 };
 
 /*
@@ -718,14 +730,14 @@ static const int prio_to_weight[40] = {
  * into multiplications:
  */
 static const u32 prio_to_wmult[40] = {
-/* -20 */     48356,     60446,     75558,     94446,    118058,
-/* -15 */    147573,    184467,    230589,    288233,    360285,
-/* -10 */    450347,    562979,    703746,    879575,   1099582,
-/*  -5 */   1374389,   1717986,   2147483,   2684354,   3355443,
-/*   0 */   4194304,   5244160,   6557201,   8196502,  10250518,
-/*   5 */  12782640,  16025997,  19976592,  24970740,  31350126,
-/*  10 */  39045157,  49367440,  61356675,  76695844,  95443717,
-/*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+ /* -20 */     48388,     59856,     76040,     92818,    118348,
+ /* -15 */    147320,    184698,    229616,    287308,    360437,
+ /* -10 */    449829,    563644,    704093,    875809,   1099582,
+ /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
+ /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
+ /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
+ /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
+ /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
 };
 
 static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
@@ -745,8 +757,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		      unsigned long max_nr_move, unsigned long max_load_move,
 		      struct sched_domain *sd, enum cpu_idle_type idle,
 		      int *all_pinned, unsigned long *load_moved,
-		      int this_best_prio, int best_prio, int best_prio_seen,
-		      struct rq_iterator *iterator);
+		      int *this_best_prio, struct rq_iterator *iterator);
 
 #include "sched_stats.h"
 #include "sched_rt.c"
@@ -782,14 +793,14 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls)
  * This function is called /before/ updating rq->ls.load
  * and when switching tasks.
  */
-static void update_curr_load(struct rq *rq, u64 now)
+static void update_curr_load(struct rq *rq)
 {
 	struct load_stat *ls = &rq->ls;
 	u64 start;
 
 	start = ls->load_update_start;
-	ls->load_update_start = now;
-	ls->delta_stat += now - start;
+	ls->load_update_start = rq->clock;
+	ls->delta_stat += rq->clock - start;
 	/*
 	 * Stagger updates to ls->delta_fair. Very frequent updates
 	 * can be expensive.
@@ -798,30 +809,28 @@ static void update_curr_load(struct rq *rq, u64 now)
 		__update_curr_load(rq, ls);
 }
 
-static inline void
-inc_load(struct rq *rq, const struct task_struct *p, u64 now)
+static inline void inc_load(struct rq *rq, const struct task_struct *p)
 {
-	update_curr_load(rq, now);
+	update_curr_load(rq);
 	update_load_add(&rq->ls.load, p->se.load.weight);
 }
 
-static inline void
-dec_load(struct rq *rq, const struct task_struct *p, u64 now)
+static inline void dec_load(struct rq *rq, const struct task_struct *p)
 {
-	update_curr_load(rq, now);
+	update_curr_load(rq);
 	update_load_sub(&rq->ls.load, p->se.load.weight);
 }
 
-static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now)
+static void inc_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running++;
-	inc_load(rq, p, now);
+	inc_load(rq, p);
 }
 
-static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now)
+static void dec_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running--;
-	dec_load(rq, p, now);
+	dec_load(rq, p);
 }
 
 static void set_load_weight(struct task_struct *p)
@@ -848,18 +857,16 @@ static void set_load_weight(struct task_struct *p)
 	p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
 }
 
-static void
-enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	sched_info_queued(p);
-	p->sched_class->enqueue_task(rq, p, wakeup, now);
+	p->sched_class->enqueue_task(rq, p, wakeup);
 	p->se.on_rq = 1;
 }
 
-static void
-dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
 {
-	p->sched_class->dequeue_task(rq, p, sleep, now);
+	p->sched_class->dequeue_task(rq, p, sleep);
 	p->se.on_rq = 0;
 }
 
@@ -914,13 +921,11 @@ static int effective_prio(struct task_struct *p)
  */
 static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
 {
-	u64 now = rq_clock(rq);
-
 	if (p->state == TASK_UNINTERRUPTIBLE)
 		rq->nr_uninterruptible--;
 
-	enqueue_task(rq, p, wakeup, now);
-	inc_nr_running(p, rq, now);
+	enqueue_task(rq, p, wakeup);
+	inc_nr_running(p, rq);
 }
 
 /*
@@ -928,13 +933,13 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
  */
 static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
 {
-	u64 now = rq_clock(rq);
+	update_rq_clock(rq);
 
 	if (p->state == TASK_UNINTERRUPTIBLE)
 		rq->nr_uninterruptible--;
 
-	enqueue_task(rq, p, 0, now);
-	inc_nr_running(p, rq, now);
+	enqueue_task(rq, p, 0);
+	inc_nr_running(p, rq);
 }
 
 /*
@@ -942,13 +947,11 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
  */
 static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
 {
-	u64 now = rq_clock(rq);
-
 	if (p->state == TASK_UNINTERRUPTIBLE)
 		rq->nr_uninterruptible++;
 
-	dequeue_task(rq, p, sleep, now);
-	dec_nr_running(p, rq, now);
+	dequeue_task(rq, p, sleep);
+	dec_nr_running(p, rq);
 }
 
 /**
@@ -1516,6 +1519,7 @@ out_set_cpu:
 
 out_activate:
 #endif /* CONFIG_SMP */
+	update_rq_clock(rq);
 	activate_task(rq, p, 1);
 	/*
 	 * Sync wakeups (i.e. those types of wakeups where the waker
@@ -1647,12 +1651,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	unsigned long flags;
 	struct rq *rq;
 	int this_cpu;
-	u64 now;
 
 	rq = task_rq_lock(p, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
 	this_cpu = smp_processor_id(); /* parent's CPU */
-	now = rq_clock(rq);
+	update_rq_clock(rq);
 
 	p->prio = effective_prio(p);
 
@@ -1666,8 +1669,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		 * Let the scheduling class do new task startup
 		 * management (if any):
 		 */
-		p->sched_class->task_new(rq, p, now);
-		inc_nr_running(p, rq, now);
+		p->sched_class->task_new(rq, p);
+		inc_nr_running(p, rq);
 	}
 	check_preempt_curr(rq, p);
 	task_rq_unlock(rq, &flags);
@@ -1954,7 +1957,6 @@ static void update_cpu_load(struct rq *this_rq)
 	unsigned long total_load = this_rq->ls.load.weight;
 	unsigned long this_load =  total_load;
 	struct load_stat *ls = &this_rq->ls;
-	u64 now = __rq_clock(this_rq);
 	int i, scale;
 
 	this_rq->nr_load_updates++;
@@ -1962,7 +1964,7 @@ static void update_cpu_load(struct rq *this_rq)
 		goto do_avg;
 
 	/* Update delta_fair/delta_exec fields first */
-	update_curr_load(this_rq, now);
+	update_curr_load(this_rq);
 
 	fair_delta64 = ls->delta_fair + 1;
 	ls->delta_fair = 0;
@@ -1970,8 +1972,8 @@ static void update_cpu_load(struct rq *this_rq)
 	exec_delta64 = ls->delta_exec + 1;
 	ls->delta_exec = 0;
 
-	sample_interval64 = now - ls->load_update_last;
-	ls->load_update_last = now;
+	sample_interval64 = this_rq->clock - ls->load_update_last;
+	ls->load_update_last = this_rq->clock;
 
 	if ((s64)sample_interval64 < (s64)TICK_NSEC)
 		sample_interval64 = TICK_NSEC;
@@ -2026,6 +2028,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
 			spin_lock(&rq1->lock);
 		}
 	}
+	update_rq_clock(rq1);
+	update_rq_clock(rq2);
 }
 
 /*
@@ -2166,8 +2170,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		      unsigned long max_nr_move, unsigned long max_load_move,
 		      struct sched_domain *sd, enum cpu_idle_type idle,
 		      int *all_pinned, unsigned long *load_moved,
-		      int this_best_prio, int best_prio, int best_prio_seen,
-		      struct rq_iterator *iterator)
+		      int *this_best_prio, struct rq_iterator *iterator)
 {
 	int pulled = 0, pinned = 0, skip_for_load;
 	struct task_struct *p;
@@ -2192,12 +2195,8 @@ next:
 	 */
 	skip_for_load = (p->se.load.weight >> 1) > rem_load_move +
 							 SCHED_LOAD_SCALE_FUZZ;
-	if (skip_for_load && p->prio < this_best_prio)
-		skip_for_load = !best_prio_seen && p->prio == best_prio;
-	if (skip_for_load ||
+	if ((skip_for_load && p->prio >= *this_best_prio) ||
 	    !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
-
-		best_prio_seen |= p->prio == best_prio;
 		p = iterator->next(iterator->arg);
 		goto next;
 	}
@@ -2211,8 +2210,8 @@ next:
 	 * and the prescribed amount of weighted load.
 	 */
 	if (pulled < max_nr_move && rem_load_move > 0) {
-		if (p->prio < this_best_prio)
-			this_best_prio = p->prio;
+		if (p->prio < *this_best_prio)
+			*this_best_prio = p->prio;
 		p = iterator->next(iterator->arg);
 		goto next;
 	}
@@ -2231,32 +2230,52 @@ out:
 }
 
 /*
- * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
- * load from busiest to this_rq, as part of a balancing operation within
- * "domain". Returns the number of tasks moved.
+ * move_tasks tries to move up to max_load_move weighted load from busiest to
+ * this_rq, as part of a balancing operation within domain "sd".
+ * Returns 1 if successful and 0 otherwise.
  *
  * Called with both runqueues locked.
  */
 static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-		      unsigned long max_nr_move, unsigned long max_load_move,
+		      unsigned long max_load_move,
 		      struct sched_domain *sd, enum cpu_idle_type idle,
 		      int *all_pinned)
 {
 	struct sched_class *class = sched_class_highest;
-	unsigned long load_moved, total_nr_moved = 0, nr_moved;
-	long rem_load_move = max_load_move;
+	unsigned long total_load_moved = 0;
+	int this_best_prio = this_rq->curr->prio;
 
 	do {
-		nr_moved = class->load_balance(this_rq, this_cpu, busiest,
-				max_nr_move, (unsigned long)rem_load_move,
-				sd, idle, all_pinned, &load_moved);
-		total_nr_moved += nr_moved;
-		max_nr_move -= nr_moved;
-		rem_load_move -= load_moved;
+		total_load_moved +=
+			class->load_balance(this_rq, this_cpu, busiest,
+				ULONG_MAX, max_load_move - total_load_moved,
+				sd, idle, all_pinned, &this_best_prio);
 		class = class->next;
-	} while (class && max_nr_move && rem_load_move > 0);
+	} while (class && max_load_move > total_load_moved);
+
+	return total_load_moved > 0;
+}
 
-	return total_nr_moved;
+/*
+ * move_one_task tries to move exactly one task from busiest to this_rq, as
+ * part of active balancing operations within "domain".
+ * Returns 1 if successful and 0 otherwise.
+ *
+ * Called with both runqueues locked.
+ */
+static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+			 struct sched_domain *sd, enum cpu_idle_type idle)
+{
+	struct sched_class *class;
+	int this_best_prio = MAX_PRIO;
+
+	for (class = sched_class_highest; class; class = class->next)
+		if (class->load_balance(this_rq, this_cpu, busiest,
+					1, ULONG_MAX, sd, idle, NULL,
+					&this_best_prio))
+			return 1;
+
+	return 0;
 }
 
 /*
@@ -2588,11 +2607,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
  */
 #define MAX_PINNED_INTERVAL	512
 
-static inline unsigned long minus_1_or_zero(unsigned long n)
-{
-	return n > 0 ? n - 1 : 0;
-}
-
 /*
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
@@ -2601,7 +2615,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 			struct sched_domain *sd, enum cpu_idle_type idle,
 			int *balance)
 {
-	int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
+	int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
 	struct sched_group *group;
 	unsigned long imbalance;
 	struct rq *busiest;
@@ -2642,18 +2656,17 @@ redo:
 
 	schedstat_add(sd, lb_imbalance[idle], imbalance);
 
-	nr_moved = 0;
+	ld_moved = 0;
 	if (busiest->nr_running > 1) {
 		/*
 		 * Attempt to move tasks. If find_busiest_group has found
 		 * an imbalance but busiest->nr_running <= 1, the group is
-		 * still unbalanced. nr_moved simply stays zero, so it is
+		 * still unbalanced. ld_moved simply stays zero, so it is
 		 * correctly treated as an imbalance.
 		 */
 		local_irq_save(flags);
 		double_rq_lock(this_rq, busiest);
-		nr_moved = move_tasks(this_rq, this_cpu, busiest,
-				      minus_1_or_zero(busiest->nr_running),
+		ld_moved = move_tasks(this_rq, this_cpu, busiest,
 				      imbalance, sd, idle, &all_pinned);
 		double_rq_unlock(this_rq, busiest);
 		local_irq_restore(flags);
@@ -2661,7 +2674,7 @@ redo:
 		/*
 		 * some other cpu did the load balance for us.
 		 */
-		if (nr_moved && this_cpu != smp_processor_id())
+		if (ld_moved && this_cpu != smp_processor_id())
 			resched_cpu(this_cpu);
 
 		/* All tasks on this runqueue were pinned by CPU affinity */
@@ -2673,7 +2686,7 @@ redo:
 		}
 	}
 
-	if (!nr_moved) {
+	if (!ld_moved) {
 		schedstat_inc(sd, lb_failed[idle]);
 		sd->nr_balance_failed++;
 
@@ -2722,10 +2735,10 @@ redo:
 			sd->balance_interval *= 2;
 	}
 
-	if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
+	if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
 	    !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
 		return -1;
-	return nr_moved;
+	return ld_moved;
 
 out_balanced:
 	schedstat_inc(sd, lb_balanced[idle]);
@@ -2757,7 +2770,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
 	struct sched_group *group;
 	struct rq *busiest = NULL;
 	unsigned long imbalance;
-	int nr_moved = 0;
+	int ld_moved = 0;
 	int sd_idle = 0;
 	int all_pinned = 0;
 	cpumask_t cpus = CPU_MASK_ALL;
@@ -2792,12 +2805,13 @@ redo:
 
 	schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance);
 
-	nr_moved = 0;
+	ld_moved = 0;
 	if (busiest->nr_running > 1) {
 		/* Attempt to move tasks */
 		double_lock_balance(this_rq, busiest);
-		nr_moved = move_tasks(this_rq, this_cpu, busiest,
-					minus_1_or_zero(busiest->nr_running),
+		/* this_rq->clock is already updated */
+		update_rq_clock(busiest);
+		ld_moved = move_tasks(this_rq, this_cpu, busiest,
 					imbalance, sd, CPU_NEWLY_IDLE,
 					&all_pinned);
 		spin_unlock(&busiest->lock);
@@ -2809,7 +2823,7 @@ redo:
 		}
 	}
 
-	if (!nr_moved) {
+	if (!ld_moved) {
 		schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
 		if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
 		    !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
@@ -2817,7 +2831,7 @@ redo:
 	} else
 		sd->nr_balance_failed = 0;
 
-	return nr_moved;
+	return ld_moved;
 
 out_balanced:
 	schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]);
@@ -2894,6 +2908,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 
 	/* move a task from busiest_rq to target_rq */
 	double_lock_balance(busiest_rq, target_rq);
+	update_rq_clock(busiest_rq);
+	update_rq_clock(target_rq);
 
 	/* Search for an sd spanning us and the target CPU. */
 	for_each_domain(target_cpu, sd) {
@@ -2905,8 +2921,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 	if (likely(sd)) {
 		schedstat_inc(sd, alb_cnt);
 
-		if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
-			       ULONG_MAX, sd, CPU_IDLE, NULL))
+		if (move_one_task(target_rq, target_cpu, busiest_rq,
+				  sd, CPU_IDLE))
 			schedstat_inc(sd, alb_pushed);
 		else
 			schedstat_inc(sd, alb_failed);
@@ -3175,8 +3191,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		      unsigned long max_nr_move, unsigned long max_load_move,
 		      struct sched_domain *sd, enum cpu_idle_type idle,
 		      int *all_pinned, unsigned long *load_moved,
-		      int this_best_prio, int best_prio, int best_prio_seen,
-		      struct rq_iterator *iterator)
+		      int *this_best_prio, struct rq_iterator *iterator)
 {
 	*load_moved = 0;
 
@@ -3202,7 +3217,8 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 	rq = task_rq_lock(p, &flags);
 	ns = p->se.sum_exec_runtime;
 	if (rq->curr == p) {
-		delta_exec = rq_clock(rq) - p->se.exec_start;
+		update_rq_clock(rq);
+		delta_exec = rq->clock - p->se.exec_start;
 		if ((s64)delta_exec > 0)
 			ns += delta_exec;
 	}
@@ -3296,11 +3312,19 @@ void scheduler_tick(void)
 	int cpu = smp_processor_id();
 	struct rq *rq = cpu_rq(cpu);
 	struct task_struct *curr = rq->curr;
+	u64 next_tick = rq->tick_timestamp + TICK_NSEC;
 
 	spin_lock(&rq->lock);
+	__update_rq_clock(rq);
+	/*
+	 * Let rq->clock advance by at least TICK_NSEC:
+	 */
+	if (unlikely(rq->clock < next_tick))
+		rq->clock = next_tick;
+	rq->tick_timestamp = rq->clock;
+	update_cpu_load(rq);
 	if (curr != rq->idle) /* FIXME: needed? */
 		curr->sched_class->task_tick(rq, curr);
-	update_cpu_load(rq);
 	spin_unlock(&rq->lock);
 
 #ifdef CONFIG_SMP
@@ -3382,7 +3406,7 @@ static inline void schedule_debug(struct task_struct *prev)
  * Pick up the highest-prio task:
  */
 static inline struct task_struct *
-pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
+pick_next_task(struct rq *rq, struct task_struct *prev)
 {
 	struct sched_class *class;
 	struct task_struct *p;
@@ -3392,14 +3416,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
 	 * the fair class we can call that function directly:
 	 */
 	if (likely(rq->nr_running == rq->cfs.nr_running)) {
-		p = fair_sched_class.pick_next_task(rq, now);
+		p = fair_sched_class.pick_next_task(rq);
 		if (likely(p))
 			return p;
 	}
 
 	class = sched_class_highest;
 	for ( ; ; ) {
-		p = class->pick_next_task(rq, now);
+		p = class->pick_next_task(rq);
 		if (p)
 			return p;
 		/*
@@ -3418,7 +3442,6 @@ asmlinkage void __sched schedule(void)
 	struct task_struct *prev, *next;
 	long *switch_count;
 	struct rq *rq;
-	u64 now;
 	int cpu;
 
 need_resched:
@@ -3436,6 +3459,7 @@ need_resched_nonpreemptible:
 
 	spin_lock_irq(&rq->lock);
 	clear_tsk_need_resched(prev);
+	__update_rq_clock(rq);
 
 	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
 		if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
@@ -3450,9 +3474,8 @@ need_resched_nonpreemptible:
 	if (unlikely(!rq->nr_running))
 		idle_balance(cpu, rq);
 
-	now = __rq_clock(rq);
-	prev->sched_class->put_prev_task(rq, prev, now);
-	next = pick_next_task(rq, prev, now);
+	prev->sched_class->put_prev_task(rq, prev);
+	next = pick_next_task(rq, prev);
 
 	sched_info_switch(prev, next);
 
@@ -3895,17 +3918,16 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	unsigned long flags;
 	int oldprio, on_rq;
 	struct rq *rq;
-	u64 now;
 
 	BUG_ON(prio < 0 || prio > MAX_PRIO);
 
 	rq = task_rq_lock(p, &flags);
-	now = rq_clock(rq);
+	update_rq_clock(rq);
 
 	oldprio = p->prio;
 	on_rq = p->se.on_rq;
 	if (on_rq)
-		dequeue_task(rq, p, 0, now);
+		dequeue_task(rq, p, 0);
 
 	if (rt_prio(prio))
 		p->sched_class = &rt_sched_class;
@@ -3915,7 +3937,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	p->prio = prio;
 
 	if (on_rq) {
-		enqueue_task(rq, p, 0, now);
+		enqueue_task(rq, p, 0);
 		/*
 		 * Reschedule if we are currently running on this runqueue and
 		 * our priority decreased, or if we are not currently running on
@@ -3938,7 +3960,6 @@ void set_user_nice(struct task_struct *p, long nice)
 	int old_prio, delta, on_rq;
 	unsigned long flags;
 	struct rq *rq;
-	u64 now;
 
 	if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
 		return;
@@ -3947,7 +3968,7 @@ void set_user_nice(struct task_struct *p, long nice)
 	 * the task might be in the middle of scheduling on another CPU.
 	 */
 	rq = task_rq_lock(p, &flags);
-	now = rq_clock(rq);
+	update_rq_clock(rq);
 	/*
 	 * The RT priorities are set via sched_setscheduler(), but we still
 	 * allow the 'normal' nice value to be set - but as expected
@@ -3960,8 +3981,8 @@ void set_user_nice(struct task_struct *p, long nice)
 	}
 	on_rq = p->se.on_rq;
 	if (on_rq) {
-		dequeue_task(rq, p, 0, now);
-		dec_load(rq, p, now);
+		dequeue_task(rq, p, 0);
+		dec_load(rq, p);
 	}
 
 	p->static_prio = NICE_TO_PRIO(nice);
@@ -3971,8 +3992,8 @@ void set_user_nice(struct task_struct *p, long nice)
 	delta = p->prio - old_prio;
 
 	if (on_rq) {
-		enqueue_task(rq, p, 0, now);
-		inc_load(rq, p, now);
+		enqueue_task(rq, p, 0);
+		inc_load(rq, p);
 		/*
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
@@ -4208,6 +4229,7 @@ recheck:
 		spin_unlock_irqrestore(&p->pi_lock, flags);
 		goto recheck;
 	}
+	update_rq_clock(rq);
 	on_rq = p->se.on_rq;
 	if (on_rq)
 		deactivate_task(rq, p, 0);
@@ -4463,10 +4485,8 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
 out_unlock:
 	read_unlock(&tasklist_lock);
 	mutex_unlock(&sched_hotcpu_mutex);
-	if (retval)
-		return retval;
 
-	return 0;
+	return retval;
 }
 
 /**
@@ -4966,6 +4986,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
 	on_rq = p->se.on_rq;
 	if (on_rq)
 		deactivate_task(rq_src, p, 0);
+
 	set_task_cpu(p, dest_cpu);
 	if (on_rq) {
 		activate_task(rq_dest, p, 0);
@@ -5198,7 +5219,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
 	for ( ; ; ) {
 		if (!rq->nr_running)
 			break;
-		next = pick_next_task(rq, rq->curr, rq_clock(rq));
+		update_rq_clock(rq);
+		next = pick_next_task(rq, rq->curr);
 		if (!next)
 			break;
 		migrate_dead(dead_cpu, next);
@@ -5210,12 +5232,19 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
 
 static struct ctl_table sd_ctl_dir[] = {
-	{CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, },
+	{
+		.procname	= "sched_domain",
+		.mode		= 0755,
+	},
 	{0,},
 };
 
 static struct ctl_table sd_ctl_root[] = {
-	{CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, },
+	{
+		.procname	= "kernel",
+		.mode		= 0755,
+		.child		= sd_ctl_dir,
+	},
 	{0,},
 };
 
@@ -5231,11 +5260,10 @@ static struct ctl_table *sd_alloc_ctl_entry(int n)
 }
 
 static void
-set_table_entry(struct ctl_table *entry, int ctl_name,
+set_table_entry(struct ctl_table *entry,
 		const char *procname, void *data, int maxlen,
 		mode_t mode, proc_handler *proc_handler)
 {
-	entry->ctl_name = ctl_name;
 	entry->procname = procname;
 	entry->data = data;
 	entry->maxlen = maxlen;
@@ -5248,28 +5276,28 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
 {
 	struct ctl_table *table = sd_alloc_ctl_entry(14);
 
-	set_table_entry(&table[0], 1, "min_interval", &sd->min_interval,
+	set_table_entry(&table[0], "min_interval", &sd->min_interval,
 		sizeof(long), 0644, proc_doulongvec_minmax);
-	set_table_entry(&table[1], 2, "max_interval", &sd->max_interval,
+	set_table_entry(&table[1], "max_interval", &sd->max_interval,
 		sizeof(long), 0644, proc_doulongvec_minmax);
-	set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx,
+	set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx,
+	set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx,
+	set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx,
+	set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx,
+	set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor,
+	set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct,
+	set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[10], 11, "cache_nice_tries",
+	set_table_entry(&table[10], "cache_nice_tries",
 		&sd->cache_nice_tries,
 		sizeof(int), 0644, proc_dointvec_minmax);
-	set_table_entry(&table[12], 13, "flags", &sd->flags,
+	set_table_entry(&table[12], "flags", &sd->flags,
 		sizeof(int), 0644, proc_dointvec_minmax);
 
 	return table;
@@ -5289,7 +5317,6 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
 	i = 0;
 	for_each_domain(cpu, sd) {
 		snprintf(buf, 32, "domain%d", i);
-		entry->ctl_name = i + 1;
 		entry->procname = kstrdup(buf, GFP_KERNEL);
 		entry->mode = 0755;
 		entry->child = sd_alloc_ctl_domain_table(sd);
@@ -5310,7 +5337,6 @@ static void init_sched_domain_sysctl(void)
 
 	for (i = 0; i < cpu_num; i++, entry++) {
 		snprintf(buf, 32, "cpu%d", i);
-		entry->ctl_name = i + 1;
 		entry->procname = kstrdup(buf, GFP_KERNEL);
 		entry->mode = 0755;
 		entry->child = sd_alloc_ctl_cpu_table(i);
@@ -5379,6 +5405,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		rq->migration_thread = NULL;
 		/* Idle task back to normal (off runqueue, low prio) */
 		rq = task_rq_lock(rq->idle, &flags);
+		update_rq_clock(rq);
 		deactivate_task(rq, rq->idle, 0);
 		rq->idle->static_prio = MAX_PRIO;
 		__setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
@@ -6616,12 +6643,13 @@ void normalize_rt_tasks(void)
 			goto out_unlock;
 #endif
 
+		update_rq_clock(rq);
 		on_rq = p->se.on_rq;
 		if (on_rq)
-			deactivate_task(task_rq(p), p, 0);
+			deactivate_task(rq, p, 0);
 		__setscheduler(rq, p, SCHED_NORMAL, 0);
 		if (on_rq) {
-			activate_task(task_rq(p), p, 0);
+			activate_task(rq, p, 0);
 			resched_task(rq->curr);
 		}
 #ifdef CONFIG_SMP
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 8421b9399e1..87e524762b8 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -29,7 +29,7 @@
  } while (0)
 
 static void
-print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now)
+print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 {
 	if (rq->curr == p)
 		SEQ_printf(m, "R");
@@ -56,7 +56,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now)
 #endif
 }
 
-static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now)
+static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 {
 	struct task_struct *g, *p;
 
@@ -77,7 +77,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now)
 		if (!p->se.on_rq || task_cpu(p) != rq_cpu)
 			continue;
 
-		print_task(m, rq, p, now);
+		print_task(m, rq, p);
 	} while_each_thread(g, p);
 
 	read_unlock_irq(&tasklist_lock);
@@ -106,9 +106,9 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 		(long long)wait_runtime_rq_sum);
 }
 
-void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
+void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
-	SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq);
+	SEQ_printf(m, "\ncfs_rq\n");
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
@@ -124,7 +124,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
 	print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
 }
 
-static void print_cpu(struct seq_file *m, int cpu, u64 now)
+static void print_cpu(struct seq_file *m, int cpu)
 {
 	struct rq *rq = &per_cpu(runqueues, cpu);
 
@@ -166,9 +166,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 	P(cpu_load[4]);
 #undef P
 
-	print_cfs_stats(m, cpu, now);
+	print_cfs_stats(m, cpu);
 
-	print_rq(m, rq, cpu, now);
+	print_rq(m, rq, cpu);
 }
 
 static int sched_debug_show(struct seq_file *m, void *v)
@@ -184,7 +184,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now);
 
 	for_each_online_cpu(cpu)
-		print_cpu(m, cpu, now);
+		print_cpu(m, cpu);
 
 	SEQ_printf(m, "\n");
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6f579ff5a9b..c5af38948a1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -222,21 +222,25 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity)
 {
 	u64 tmp;
 
+	if (likely(curr->load.weight == NICE_0_LOAD))
+		return granularity;
 	/*
-	 * Negative nice levels get the same granularity as nice-0:
+	 * Positive nice levels get the same granularity as nice-0:
 	 */
-	if (likely(curr->load.weight >= NICE_0_LOAD))
-		return granularity;
+	if (likely(curr->load.weight < NICE_0_LOAD)) {
+		tmp = curr->load.weight * (u64)granularity;
+		return (long) (tmp >> NICE_0_SHIFT);
+	}
 	/*
-	 * Positive nice level tasks get linearly finer
+	 * Negative nice level tasks get linearly finer
 	 * granularity:
 	 */
-	tmp = curr->load.weight * (u64)granularity;
+	tmp = curr->load.inv_weight * (u64)granularity;
 
 	/*
 	 * It will always fit into 'long':
 	 */
-	return (long) (tmp >> NICE_0_SHIFT);
+	return (long) (tmp >> WMULT_SHIFT);
 }
 
 static inline void
@@ -281,26 +285,25 @@ add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
  * are not in our scheduling class.
  */
 static inline void
-__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now)
+__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	unsigned long delta, delta_exec, delta_fair;
-	long delta_mine;
+	unsigned long delta, delta_exec, delta_fair, delta_mine;
 	struct load_weight *lw = &cfs_rq->load;
 	unsigned long load = lw->weight;
 
-	if (unlikely(!load))
-		return;
-
 	delta_exec = curr->delta_exec;
 	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 
 	curr->sum_exec_runtime += delta_exec;
 	cfs_rq->exec_clock += delta_exec;
 
+	if (unlikely(!load))
+		return;
+
 	delta_fair = calc_delta_fair(delta_exec, lw);
 	delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
 
-	if (cfs_rq->sleeper_bonus > sysctl_sched_stat_granularity) {
+	if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) {
 		delta = calc_delta_mine(cfs_rq->sleeper_bonus,
 					curr->load.weight, lw);
 		if (unlikely(delta > cfs_rq->sleeper_bonus))
@@ -321,7 +324,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now)
 	add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec);
 }
 
-static void update_curr(struct cfs_rq *cfs_rq, u64 now)
+static void update_curr(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq_curr(cfs_rq);
 	unsigned long delta_exec;
@@ -334,22 +337,22 @@ static void update_curr(struct cfs_rq *cfs_rq, u64 now)
 	 * since the last time we changed load (this cannot
 	 * overflow on 32 bits):
 	 */
-	delta_exec = (unsigned long)(now - curr->exec_start);
+	delta_exec = (unsigned long)(rq_of(cfs_rq)->clock - curr->exec_start);
 
 	curr->delta_exec += delta_exec;
 
 	if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) {
-		__update_curr(cfs_rq, curr, now);
+		__update_curr(cfs_rq, curr);
 		curr->delta_exec = 0;
 	}
-	curr->exec_start = now;
+	curr->exec_start = rq_of(cfs_rq)->clock;
 }
 
 static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	se->wait_start_fair = cfs_rq->fair_clock;
-	schedstat_set(se->wait_start, now);
+	schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
 }
 
 /*
@@ -377,8 +380,7 @@ calc_weighted(unsigned long delta, unsigned long weight, int shift)
 /*
  * Task is being enqueued - update stats:
  */
-static void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	s64 key;
 
@@ -387,7 +389,7 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 	 * a dequeue/enqueue event is a NOP)
 	 */
 	if (se != cfs_rq_curr(cfs_rq))
-		update_stats_wait_start(cfs_rq, se, now);
+		update_stats_wait_start(cfs_rq, se);
 	/*
 	 * Update the key:
 	 */
@@ -407,7 +409,8 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 					(WMULT_SHIFT - NICE_0_SHIFT);
 		} else {
 			tmp = se->wait_runtime;
-			key -= (tmp * se->load.weight) >> NICE_0_SHIFT;
+			key -= (tmp * se->load.inv_weight) >>
+					(WMULT_SHIFT - NICE_0_SHIFT);
 		}
 	}
 
@@ -418,11 +421,12 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
  * Note: must be called with a freshly updated rq->fair_clock.
  */
 static inline void
-__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	unsigned long delta_fair = se->delta_fair_run;
 
-	schedstat_set(se->wait_max, max(se->wait_max, now - se->wait_start));
+	schedstat_set(se->wait_max, max(se->wait_max,
+			rq_of(cfs_rq)->clock - se->wait_start));
 
 	if (unlikely(se->load.weight != NICE_0_LOAD))
 		delta_fair = calc_weighted(delta_fair, se->load.weight,
@@ -432,7 +436,7 @@ __update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 }
 
 static void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	unsigned long delta_fair;
 
@@ -442,7 +446,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 	se->delta_fair_run += delta_fair;
 	if (unlikely(abs(se->delta_fair_run) >=
 				sysctl_sched_stat_granularity)) {
-		__update_stats_wait_end(cfs_rq, se, now);
+		__update_stats_wait_end(cfs_rq, se);
 		se->delta_fair_run = 0;
 	}
 
@@ -451,34 +455,34 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 }
 
 static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	update_curr(cfs_rq, now);
+	update_curr(cfs_rq);
 	/*
 	 * Mark the end of the wait period if dequeueing a
 	 * waiting task:
 	 */
 	if (se != cfs_rq_curr(cfs_rq))
-		update_stats_wait_end(cfs_rq, se, now);
+		update_stats_wait_end(cfs_rq, se);
 }
 
 /*
  * We are picking a new current task - update its stats:
  */
 static inline void
-update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	/*
 	 * We are starting a new run period:
 	 */
-	se->exec_start = now;
+	se->exec_start = rq_of(cfs_rq)->clock;
 }
 
 /*
  * We are descheduling a task - update its stats:
  */
 static inline void
-update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	se->exec_start = 0;
 }
@@ -487,8 +491,7 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
  * Scheduling class queueing methods:
  */
 
-static void
-__enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	unsigned long load = cfs_rq->load.weight, delta_fair;
 	long prev_runtime;
@@ -522,8 +525,7 @@ __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
 }
 
-static void
-enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *tsk = task_of(se);
 	unsigned long delta_fair;
@@ -538,7 +540,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 	se->delta_fair_sleep += delta_fair;
 	if (unlikely(abs(se->delta_fair_sleep) >=
 				sysctl_sched_stat_granularity)) {
-		__enqueue_sleeper(cfs_rq, se, now);
+		__enqueue_sleeper(cfs_rq, se);
 		se->delta_fair_sleep = 0;
 	}
 
@@ -546,7 +548,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 
 #ifdef CONFIG_SCHEDSTATS
 	if (se->sleep_start) {
-		u64 delta = now - se->sleep_start;
+		u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
@@ -558,7 +560,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 		se->sum_sleep_runtime += delta;
 	}
 	if (se->block_start) {
-		u64 delta = now - se->block_start;
+		u64 delta = rq_of(cfs_rq)->clock - se->block_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
@@ -573,26 +575,24 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 }
 
 static void
-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
-	       int wakeup, u64 now)
+enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
 {
 	/*
 	 * Update the fair clock.
 	 */
-	update_curr(cfs_rq, now);
+	update_curr(cfs_rq);
 
 	if (wakeup)
-		enqueue_sleeper(cfs_rq, se, now);
+		enqueue_sleeper(cfs_rq, se);
 
-	update_stats_enqueue(cfs_rq, se, now);
+	update_stats_enqueue(cfs_rq, se);
 	__enqueue_entity(cfs_rq, se);
 }
 
 static void
-dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
-	       int sleep, u64 now)
+dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 {
-	update_stats_dequeue(cfs_rq, se, now);
+	update_stats_dequeue(cfs_rq, se);
 	if (sleep) {
 		se->sleep_start_fair = cfs_rq->fair_clock;
 #ifdef CONFIG_SCHEDSTATS
@@ -600,9 +600,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 			struct task_struct *tsk = task_of(se);
 
 			if (tsk->state & TASK_INTERRUPTIBLE)
-				se->sleep_start = now;
+				se->sleep_start = rq_of(cfs_rq)->clock;
 			if (tsk->state & TASK_UNINTERRUPTIBLE)
-				se->block_start = now;
+				se->block_start = rq_of(cfs_rq)->clock;
 		}
 		cfs_rq->wait_runtime -= se->wait_runtime;
 #endif
@@ -629,7 +629,7 @@ __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
 }
 
 static inline void
-set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
+set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	/*
 	 * Any task has to be enqueued before it get to execute on
@@ -638,49 +638,46 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
 	 * done a put_prev_task_fair() shortly before this, which
 	 * updated rq->fair_clock - used by update_stats_wait_end())
 	 */
-	update_stats_wait_end(cfs_rq, se, now);
-	update_stats_curr_start(cfs_rq, se, now);
+	update_stats_wait_end(cfs_rq, se);
+	update_stats_curr_start(cfs_rq, se);
 	set_cfs_rq_curr(cfs_rq, se);
 }
 
-static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq, u64 now)
+static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *se = __pick_next_entity(cfs_rq);
 
-	set_next_entity(cfs_rq, se, now);
+	set_next_entity(cfs_rq, se);
 
 	return se;
 }
 
-static void
-put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev, u64 now)
+static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 {
 	/*
 	 * If still on the runqueue then deactivate_task()
 	 * was not called and update_curr() has to be done:
 	 */
 	if (prev->on_rq)
-		update_curr(cfs_rq, now);
+		update_curr(cfs_rq);
 
-	update_stats_curr_end(cfs_rq, prev, now);
+	update_stats_curr_end(cfs_rq, prev);
 
 	if (prev->on_rq)
-		update_stats_wait_start(cfs_rq, prev, now);
+		update_stats_wait_start(cfs_rq, prev);
 	set_cfs_rq_curr(cfs_rq, NULL);
 }
 
 static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	struct rq *rq = rq_of(cfs_rq);
 	struct sched_entity *next;
-	u64 now = __rq_clock(rq);
 
 	/*
 	 * Dequeue and enqueue the task to update its
 	 * position within the tree:
 	 */
-	dequeue_entity(cfs_rq, curr, 0, now);
-	enqueue_entity(cfs_rq, curr, 0, now);
+	dequeue_entity(cfs_rq, curr, 0);
+	enqueue_entity(cfs_rq, curr, 0);
 
 	/*
 	 * Reschedule if another task tops the current one.
@@ -785,8 +782,7 @@ static inline int is_same_group(struct task_struct *curr, struct task_struct *p)
  * increased. Here we update the fair scheduling stats and
  * then put the task into the rbtree:
  */
-static void
-enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
@@ -795,7 +791,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
 		if (se->on_rq)
 			break;
 		cfs_rq = cfs_rq_of(se);
-		enqueue_entity(cfs_rq, se, wakeup, now);
+		enqueue_entity(cfs_rq, se, wakeup);
 	}
 }
 
@@ -804,15 +800,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
  * decreased. We remove the task from the rbtree and
  * update the fair scheduling stats:
  */
-static void
-dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
-		dequeue_entity(cfs_rq, se, sleep, now);
+		dequeue_entity(cfs_rq, se, sleep);
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight)
 			break;
@@ -825,14 +820,14 @@ dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now)
 static void yield_task_fair(struct rq *rq, struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
-	u64 now = __rq_clock(rq);
 
+	__update_rq_clock(rq);
 	/*
 	 * Dequeue and enqueue the task to update its
 	 * position within the tree:
 	 */
-	dequeue_entity(cfs_rq, &p->se, 0, now);
-	enqueue_entity(cfs_rq, &p->se, 0, now);
+	dequeue_entity(cfs_rq, &p->se, 0);
+	enqueue_entity(cfs_rq, &p->se, 0);
 }
 
 /*
@@ -845,7 +840,8 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
 	unsigned long gran;
 
 	if (unlikely(rt_prio(p->prio))) {
-		update_curr(cfs_rq, rq_clock(rq));
+		update_rq_clock(rq);
+		update_curr(cfs_rq);
 		resched_task(curr);
 		return;
 	}
@@ -861,7 +857,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
 		__check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran);
 }
 
-static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
+static struct task_struct *pick_next_task_fair(struct rq *rq)
 {
 	struct cfs_rq *cfs_rq = &rq->cfs;
 	struct sched_entity *se;
@@ -870,7 +866,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
 		return NULL;
 
 	do {
-		se = pick_next_entity(cfs_rq, now);
+		se = pick_next_entity(cfs_rq);
 		cfs_rq = group_cfs_rq(se);
 	} while (cfs_rq);
 
@@ -880,14 +876,14 @@ static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
 /*
  * Account for a descheduled task:
  */
-static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now)
+static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
 {
 	struct sched_entity *se = &prev->se;
 	struct cfs_rq *cfs_rq;
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
-		put_prev_entity(cfs_rq, se, now);
+		put_prev_entity(cfs_rq, se);
 	}
 }
 
@@ -930,6 +926,7 @@ static struct task_struct *load_balance_next_fair(void *arg)
 	return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr;
@@ -943,12 +940,13 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
 
 	return p->prio;
 }
+#endif
 
-static int
+static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-			unsigned long max_nr_move, unsigned long max_load_move,
-			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *all_pinned, unsigned long *total_load_moved)
+		  unsigned long max_nr_move, unsigned long max_load_move,
+		  struct sched_domain *sd, enum cpu_idle_type idle,
+		  int *all_pinned, int *this_best_prio)
 {
 	struct cfs_rq *busy_cfs_rq;
 	unsigned long load_moved, total_nr_moved = 0, nr_moved;
@@ -959,15 +957,14 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	cfs_rq_iterator.next = load_balance_next_fair;
 
 	for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
+#ifdef CONFIG_FAIR_GROUP_SCHED
 		struct cfs_rq *this_cfs_rq;
 		long imbalance;
 		unsigned long maxload;
-		int this_best_prio, best_prio, best_prio_seen = 0;
 
 		this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
 
-		imbalance = busy_cfs_rq->load.weight -
-						 this_cfs_rq->load.weight;
+		imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
 		/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
 		if (imbalance <= 0)
 			continue;
@@ -976,27 +973,17 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		imbalance /= 2;
 		maxload = min(rem_load_move, imbalance);
 
-		this_best_prio = cfs_rq_best_prio(this_cfs_rq);
-		best_prio = cfs_rq_best_prio(busy_cfs_rq);
-
-		/*
-		 * Enable handling of the case where there is more than one task
-		 * with the best priority. If the current running task is one
-		 * of those with prio==best_prio we know it won't be moved
-		 * and therefore it's safe to override the skip (based on load)
-		 * of any task we find with that prio.
-		 */
-		if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se)
-			best_prio_seen = 1;
-
+		*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
+#else
+# define maxload rem_load_move
+#endif
 		/* pass busy_cfs_rq argument into
 		 * load_balance_[start|next]_fair iterators
 		 */
 		cfs_rq_iterator.arg = busy_cfs_rq;
 		nr_moved = balance_tasks(this_rq, this_cpu, busiest,
 				max_nr_move, maxload, sd, idle, all_pinned,
-				&load_moved, this_best_prio, best_prio,
-				best_prio_seen, &cfs_rq_iterator);
+				&load_moved, this_best_prio, &cfs_rq_iterator);
 
 		total_nr_moved += nr_moved;
 		max_nr_move -= nr_moved;
@@ -1006,9 +993,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 			break;
 	}
 
-	*total_load_moved = max_load_move - rem_load_move;
-
-	return total_nr_moved;
+	return max_load_move - rem_load_move;
 }
 
 /*
@@ -1032,14 +1017,14 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr)
  * monopolize the CPU. Note: the parent runqueue is locked,
  * the child is not running yet.
  */
-static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now)
+static void task_new_fair(struct rq *rq, struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
 	struct sched_entity *se = &p->se;
 
 	sched_info_queued(p);
 
-	update_stats_enqueue(cfs_rq, se, now);
+	update_stats_enqueue(cfs_rq, se);
 	/*
 	 * Child runs first: we let it run before the parent
 	 * until it reschedules once. We set up the key so that
@@ -1072,15 +1057,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now)
  */
 static void set_curr_task_fair(struct rq *rq)
 {
-	struct task_struct *curr = rq->curr;
-	struct sched_entity *se = &curr->se;
-	u64 now = rq_clock(rq);
-	struct cfs_rq *cfs_rq;
+	struct sched_entity *se = &rq->curr.se;
 
-	for_each_sched_entity(se) {
-		cfs_rq = cfs_rq_of(se);
-		set_next_entity(cfs_rq, se, now);
-	}
+	for_each_sched_entity(se)
+		set_next_entity(cfs_rq_of(se), se);
 }
 #else
 static void set_curr_task_fair(struct rq *rq)
@@ -1109,12 +1089,11 @@ struct sched_class fair_sched_class __read_mostly = {
 };
 
 #ifdef CONFIG_SCHED_DEBUG
-void print_cfs_stats(struct seq_file *m, int cpu, u64 now)
+static void print_cfs_stats(struct seq_file *m, int cpu)
 {
-	struct rq *rq = cpu_rq(cpu);
 	struct cfs_rq *cfs_rq;
 
-	for_each_leaf_cfs_rq(rq, cfs_rq)
-		print_cfs_rq(m, cpu, cfs_rq, now);
+	for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+		print_cfs_rq(m, cpu, cfs_rq);
 }
 #endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 41841e741c4..3503fb2d9f9 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -13,7 +13,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
 	resched_task(rq->idle);
 }
 
-static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
+static struct task_struct *pick_next_task_idle(struct rq *rq)
 {
 	schedstat_inc(rq, sched_goidle);
 
@@ -25,7 +25,7 @@ static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
  * message if some code attempts to do it:
  */
 static void
-dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
 {
 	spin_unlock_irq(&rq->lock);
 	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
@@ -33,15 +33,15 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now)
 	spin_lock_irq(&rq->lock);
 }
 
-static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now)
+static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
 {
 }
 
-static int
+static unsigned long
 load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
 			unsigned long max_nr_move, unsigned long max_load_move,
 			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *all_pinned, unsigned long *total_load_moved)
+			int *all_pinned, int *this_best_prio)
 {
 	return 0;
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 002fcf8d3f6..dcdcad632fd 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -7,7 +7,7 @@
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
  */
-static inline void update_curr_rt(struct rq *rq, u64 now)
+static inline void update_curr_rt(struct rq *rq)
 {
 	struct task_struct *curr = rq->curr;
 	u64 delta_exec;
@@ -15,18 +15,17 @@ static inline void update_curr_rt(struct rq *rq, u64 now)
 	if (!task_has_rt_policy(curr))
 		return;
 
-	delta_exec = now - curr->se.exec_start;
+	delta_exec = rq->clock - curr->se.exec_start;
 	if (unlikely((s64)delta_exec < 0))
 		delta_exec = 0;
 
 	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
-	curr->se.exec_start = now;
+	curr->se.exec_start = rq->clock;
 }
 
-static void
-enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
+static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct rt_prio_array *array = &rq->rt.active;
 
@@ -37,12 +36,11 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
 /*
  * Adding/removing a task to/from a priority array:
  */
-static void
-dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now)
+static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
 {
 	struct rt_prio_array *array = &rq->rt.active;
 
-	update_curr_rt(rq, now);
+	update_curr_rt(rq);
 
 	list_del(&p->run_list);
 	if (list_empty(array->queue + p->prio))
@@ -75,7 +73,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
 		resched_task(rq->curr);
 }
 
-static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now)
+static struct task_struct *pick_next_task_rt(struct rq *rq)
 {
 	struct rt_prio_array *array = &rq->rt.active;
 	struct task_struct *next;
@@ -89,14 +87,14 @@ static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now)
 	queue = array->queue + idx;
 	next = list_entry(queue->next, struct task_struct, run_list);
 
-	next->se.exec_start = now;
+	next->se.exec_start = rq->clock;
 
 	return next;
 }
 
-static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now)
+static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
-	update_curr_rt(rq, now);
+	update_curr_rt(rq);
 	p->se.exec_start = 0;
 }
 
@@ -172,28 +170,15 @@ static struct task_struct *load_balance_next_rt(void *arg)
 	return p;
 }
 
-static int
+static unsigned long
 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 			unsigned long max_nr_move, unsigned long max_load_move,
 			struct sched_domain *sd, enum cpu_idle_type idle,
-			int *all_pinned, unsigned long *load_moved)
+			int *all_pinned, int *this_best_prio)
 {
-	int this_best_prio, best_prio, best_prio_seen = 0;
 	int nr_moved;
 	struct rq_iterator rt_rq_iterator;
-
-	best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
-	this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
-
-	/*
-	 * Enable handling of the case where there is more than one task
-	 * with the best priority.   If the current running task is one
-	 * of those with prio==best_prio we know it won't be moved
-	 * and therefore it's safe to override the skip (based on load)
-	 * of any task we find with that prio.
-	 */
-	if (busiest->curr->prio == best_prio)
-		best_prio_seen = 1;
+	unsigned long load_moved;
 
 	rt_rq_iterator.start = load_balance_start_rt;
 	rt_rq_iterator.next = load_balance_next_rt;
@@ -203,11 +188,10 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	rt_rq_iterator.arg = busiest;
 
 	nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
-			max_load_move, sd, idle, all_pinned, load_moved,
-			this_best_prio, best_prio, best_prio_seen,
-			&rt_rq_iterator);
+			max_load_move, sd, idle, all_pinned, &load_moved,
+			this_best_prio, &rt_rq_iterator);
 
-	return nr_moved;
+	return load_moved;
 }
 
 static void task_tick_rt(struct rq *rq, struct task_struct *p)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 79c891e6266..8bdb8c07e04 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1023,6 +1023,7 @@ static ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_doulongvec_minmax,
 	},
+#endif
 #ifdef CONFIG_NUMA
 	{
 		.ctl_name	= CTL_UNNUMBERED,
@@ -1034,7 +1035,6 @@ static ctl_table vm_table[] = {
 		.strategy	= &sysctl_string,
 	},
 #endif
-#endif
 #if defined(CONFIG_X86_32) || \
    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
 	{
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 2ad1c37b8df..41dd3105ce7 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -113,16 +113,6 @@ int clockevents_register_notifier(struct notifier_block *nb)
 	return ret;
 }
 
-/**
- * clockevents_unregister_notifier - unregister a clock events change listener
- */
-void clockevents_unregister_notifier(struct notifier_block *nb)
-{
-	spin_lock(&clockevents_lock);
-	raw_notifier_chain_unregister(&clockevents_chain, nb);
-	spin_unlock(&clockevents_lock);
-}
-
 /*
  * Notify about a clock event change. Called with clockevents_lock
  * held.
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 473f5aed6ca..bd5edaeaa80 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -145,9 +145,9 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
  */
 void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
 			int rowsize, int groupsize,
-			void *buf, size_t len, bool ascii)
+			const void *buf, size_t len, bool ascii)
 {
-	u8 *ptr = buf;
+	const u8 *ptr = buf;
 	int i, linelen, remaining = len;
 	unsigned char linebuf[200];
 
@@ -189,7 +189,7 @@ EXPORT_SYMBOL(print_hex_dump);
  * rowsize of 16, groupsize of 1, and ASCII output included.
  */
 void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-			void *buf, size_t len)
+			const void *buf, size_t len)
 {
 	print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
 			buf, len, 1);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6cf700d4184..90b657b50f8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -843,7 +843,7 @@ static void shrink_readahead_size_eio(struct file *filp,
 /**
  * do_generic_mapping_read - generic file read routine
  * @mapping:	address_space to be read
- * @ra:		file's readahead state
+ * @_ra:	file's readahead state
  * @filp:	the file to read
  * @ppos:	current file position
  * @desc:	read_descriptor
@@ -1218,26 +1218,6 @@ out:
 }
 EXPORT_SYMBOL(generic_file_aio_read);
 
-int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
-{
-	ssize_t written;
-	unsigned long count = desc->count;
-	struct file *file = desc->arg.data;
-
-	if (size > count)
-		size = count;
-
-	written = file->f_op->sendpage(file, page, offset,
-				       size, &file->f_pos, size<count);
-	if (written < 0) {
-		desc->error = written;
-		written = 0;
-	}
-	desc->count = count - written;
-	desc->written += written;
-	return written;
-}
-
 static ssize_t
 do_readahead(struct address_space *mapping, struct file *filp,
 	     unsigned long index, unsigned long nr)
diff --git a/mm/slub.c b/mm/slub.c
index 6c6d74ff069..69d02e3e439 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -211,7 +211,8 @@ static inline void ClearSlabDebug(struct page *page)
 #define MAX_OBJECTS_PER_SLAB 65535
 
 /* Internal SLUB flags */
-#define __OBJECT_POISON 0x80000000	/* Poison object */
+#define __OBJECT_POISON		0x80000000 /* Poison object */
+#define __SYSFS_ADD_DEFERRED	0x40000000 /* Not yet visible via sysfs */
 
 /* Not all arches define cache_line_size */
 #ifndef cache_line_size
@@ -2277,10 +2278,26 @@ panic:
 }
 
 #ifdef CONFIG_ZONE_DMA
+
+static void sysfs_add_func(struct work_struct *w)
+{
+	struct kmem_cache *s;
+
+	down_write(&slub_lock);
+	list_for_each_entry(s, &slab_caches, list) {
+		if (s->flags & __SYSFS_ADD_DEFERRED) {
+			s->flags &= ~__SYSFS_ADD_DEFERRED;
+			sysfs_slab_add(s);
+		}
+	}
+	up_write(&slub_lock);
+}
+
+static DECLARE_WORK(sysfs_add_work, sysfs_add_func);
+
 static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 {
 	struct kmem_cache *s;
-	struct kmem_cache *x;
 	char *text;
 	size_t realsize;
 
@@ -2289,22 +2306,36 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
 		return s;
 
 	/* Dynamically create dma cache */
-	x = kmalloc(kmem_size, flags & ~SLUB_DMA);
-	if (!x)
-		panic("Unable to allocate memory for dma cache\n");
+	if (flags & __GFP_WAIT)
+		down_write(&slub_lock);
+	else {
+		if (!down_write_trylock(&slub_lock))
+			goto out;
+	}
+
+	if (kmalloc_caches_dma[index])
+		goto unlock_out;
 
 	realsize = kmalloc_caches[index].objsize;
-	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
-			(unsigned int)realsize);
-	s = create_kmalloc_cache(x, text, realsize, flags);
-	down_write(&slub_lock);
-	if (!kmalloc_caches_dma[index]) {
-		kmalloc_caches_dma[index] = s;
-		up_write(&slub_lock);
-		return s;
+	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize),
+	s = kmalloc(kmem_size, flags & ~SLUB_DMA);
+
+	if (!s || !text || !kmem_cache_open(s, flags, text,
+			realsize, ARCH_KMALLOC_MINALIGN,
+			SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) {
+		kfree(s);
+		kfree(text);
+		goto unlock_out;
 	}
+
+	list_add(&s->list, &slab_caches);
+	kmalloc_caches_dma[index] = s;
+
+	schedule_work(&sysfs_add_work);
+
+unlock_out:
 	up_write(&slub_lock);
-	kmem_cache_destroy(s);
+out:
 	return kmalloc_caches_dma[index];
 }
 #endif
@@ -2500,15 +2531,11 @@ int kmem_cache_shrink(struct kmem_cache *s)
 				slab_unlock(page);
 				discard_slab(s, page);
 			} else {
-				if (n->nr_partial > MAX_PARTIAL)
-					list_move(&page->lru,
-					slabs_by_inuse + page->inuse);
+				list_move(&page->lru,
+				slabs_by_inuse + page->inuse);
 			}
 		}
 
-		if (n->nr_partial <= MAX_PARTIAL)
-			goto out;
-
 		/*
 		 * Rebuild the partial list with the slabs filled up most
 		 * first and the least used slabs at the end.
@@ -2516,7 +2543,6 @@ int kmem_cache_shrink(struct kmem_cache *s)
 		for (i = s->objects - 1; i >= 0; i--)
 			list_splice(slabs_by_inuse + i, n->partial.prev);
 
-	out:
 		spin_unlock_irqrestore(&n->list_lock, flags);
 	}
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 2030bb8c2d3..0bf17da40d5 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -25,6 +25,7 @@
 #include <linux/random.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
+#include <net/sock.h>
 
 #include <asm/byteorder.h>
 #include <asm/system.h>
diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c
index f13937bf9e8..d054e9224b3 100644
--- a/net/ieee80211/softmac/ieee80211softmac_wx.c
+++ b/net/ieee80211/softmac/ieee80211softmac_wx.c
@@ -74,8 +74,8 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
 	struct ieee80211softmac_auth_queue_item *authptr;
 	int length = 0;
 
+check_assoc_again:
 	mutex_lock(&sm->associnfo.mutex);
-
 	/* Check if we're already associating to this or another network
 	 * If it's another network, cancel and start over with our new network
 	 * If it's our network, ignore the change, we're already doing it!
@@ -98,13 +98,18 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
 				cancel_delayed_work(&authptr->work);
 			sm->associnfo.bssvalid = 0;
 			sm->associnfo.bssfixed = 0;
-			flush_scheduled_work();
 			sm->associnfo.associating = 0;
 			sm->associnfo.associated = 0;
+			/* We must unlock to avoid deadlocks with the assoc workqueue
+			 * on the associnfo.mutex */
+			mutex_unlock(&sm->associnfo.mutex);
+			flush_scheduled_work();
+			/* Avoid race! Check assoc status again. Maybe someone started an
+			 * association while we flushed. */
+			goto check_assoc_again;
 		}
 	}
 
-
 	sm->associnfo.static_essid = 0;
 	sm->associnfo.assoc_wait = 0;
 
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 32180431565..6d0c0f7364a 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -387,12 +387,17 @@ static int recent_seq_open(struct inode *inode, struct file *file)
 	st = kzalloc(sizeof(*st), GFP_KERNEL);
 	if (st == NULL)
 		return -ENOMEM;
+
 	ret = seq_open(file, &recent_seq_ops);
-	if (ret)
+	if (ret) {
 		kfree(st);
+		goto out;
+	}
+
 	st->table    = pde->data;
 	seq          = file->private_data;
 	seq->private = st;
+out:
 	return ret;
 }
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 64552afd01c..d9b5177989c 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -509,3 +509,9 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 
 module_init(nf_conntrack_l3proto_ipv4_init);
 module_exit(nf_conntrack_l3proto_ipv4_fini);
+
+void need_ipv4_conntrack(void)
+{
+	return;
+}
+EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 332814dac50..46cc99def16 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -328,7 +328,7 @@ static int __init nf_nat_standalone_init(void)
 {
 	int ret = 0;
 
-	need_conntrack();
+	need_ipv4_conntrack();
 
 #ifdef CONFIG_XFRM
 	BUG_ON(ip_nat_decode_session != NULL);
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index b66556c0a5b..5215691f276 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -79,7 +79,6 @@ static u32 htcp_cwnd_undo(struct sock *sk)
 static inline void measure_rtt(struct sock *sk, u32 srtt)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	const struct tcp_sock *tp = tcp_sk(sk);
 	struct htcp *ca = inet_csk_ca(sk);
 
 	/* keep track of minimum RTT seen so far, minRTT is zero at first */
@@ -87,8 +86,7 @@ static inline void measure_rtt(struct sock *sk, u32 srtt)
 		ca->minRTT = srtt;
 
 	/* max RTT */
-	if (icsk->icsk_ca_state == TCP_CA_Open
-	    && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
+	if (icsk->icsk_ca_state == TCP_CA_Open) {
 		if (ca->maxRTT < ca->minRTT)
 			ca->maxRTT = ca->minRTT;
 		if (ca->maxRTT < srtt
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 799a9208c4b..095be91829c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -271,9 +271,11 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
 	}
 }
 
-#define DEBUGFS_DEL(name, type)\
-	debugfs_remove(sdata->debugfs.type.name);\
-	sdata->debugfs.type.name = NULL;
+#define DEBUGFS_DEL(name, type)					\
+	do {							\
+		debugfs_remove(sdata->debugfs.type.name);	\
+		sdata->debugfs.type.name = NULL;		\
+	} while (0)
 
 static void del_sta_files(struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index d41e696f398..da34ea70276 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -157,7 +157,7 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
 	struct sta_info *sta = file->private_data;
 	for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
 		p += scnprintf(p, sizeof(buf)+buf-p, "%x ",
-			       sta->last_seq_ctrl[i]);
+			       le16_to_cpu(sta->last_seq_ctrl[i]));
 	p += scnprintf(p, sizeof(buf)+buf-p, "\n");
 	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
 }
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index c944b17d0fc..8ec5ed192b5 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -1650,6 +1650,7 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb,
 	if (skb_headroom(skb) < headroom) {
 		if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) {
 			dev_kfree_skb(skb);
+			dev_put(odev);
 			return 0;
 		}
 	}
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index d0e1ab5589d..e7904db5532 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -697,17 +697,24 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
-	if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
-		if (sdata->type == IEEE80211_IF_TYPE_STA ||
-		    sdata->type == IEEE80211_IF_TYPE_IBSS) {
+	switch (sdata->type) {
+	case IEEE80211_IF_TYPE_STA:
+	case IEEE80211_IF_TYPE_IBSS:
+		if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
 			ssid = sdata->u.sta.ssid;
 			ssid_len = sdata->u.sta.ssid_len;
-		} else if (sdata->type == IEEE80211_IF_TYPE_AP) {
+		}
+		break;
+	case IEEE80211_IF_TYPE_AP:
+		if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
 			ssid = sdata->u.ap.ssid;
 			ssid_len = sdata->u.ap.ssid_len;
-		} else
-			return -EINVAL;
+		}
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
+
 	return ieee80211_sta_req_scan(dev, ssid, ssid_len);
 }
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6f89b105a20..2863e72b409 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1052,17 +1052,18 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
 	}
 	/* implicit 'else' */
 
-	/* we only allow nat config for new conntracks */
-	if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
-
 	/* We manipulate the conntrack inside the global conntrack table lock,
 	 * so there's no need to increase the refcount */
 	err = -EEXIST;
-	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
-		err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda);
+	if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
+		/* we only allow nat config for new conntracks */
+		if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h),
+						 cda);
+	}
 
 out_unlock:
 	write_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f46a0aeec44..b6c844b7e1c 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -126,7 +126,9 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def)
 
 	if (domain != NULL) {
 		bkt = netlbl_domhsh_hash(domain);
-		list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list)
+		list_for_each_entry_rcu(iter,
+				     &rcu_dereference(netlbl_domhsh)->tbl[bkt],
+				     list)
 			if (iter->valid && strcmp(iter->domain, domain) == 0)
 				return iter;
 	}
@@ -227,7 +229,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 		spin_lock(&netlbl_domhsh_lock);
 		if (netlbl_domhsh_search(entry->domain, 0) == NULL)
 			list_add_tail_rcu(&entry->list,
-					  &netlbl_domhsh->tbl[bkt]);
+				    &rcu_dereference(netlbl_domhsh)->tbl[bkt]);
 		else
 			ret_val = -EEXIST;
 		spin_unlock(&netlbl_domhsh_lock);
@@ -423,8 +425,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
 	     iter_bkt < rcu_dereference(netlbl_domhsh)->size;
 	     iter_bkt++, chain_cnt = 0) {
 		list_for_each_entry_rcu(iter_entry,
-					&netlbl_domhsh->tbl[iter_bkt],
-					list)
+				&rcu_dereference(netlbl_domhsh)->tbl[iter_bkt],
+				list)
 			if (iter_entry->valid) {
 				if (chain_cnt++ < *skip_chain)
 					continue;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4bbc59cc237..53995af9ca4 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -736,9 +736,6 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
 {
 	dprintk("RPC:       gss_free_ctx\n");
 
-	if (ctx->gc_gss_ctx)
-		gss_delete_sec_context(&ctx->gc_gss_ctx);
-
 	kfree(ctx->gc_wire_ctx.data);
 	kfree(ctx);
 }
@@ -753,7 +750,13 @@ gss_free_ctx_callback(struct rcu_head *head)
 static void
 gss_free_ctx(struct gss_cl_ctx *ctx)
 {
+	struct gss_ctx *gc_gss_ctx;
+
+	gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx);
+	rcu_assign_pointer(ctx->gc_gss_ctx, NULL);
 	call_rcu(&ctx->gc_rcu, gss_free_ctx_callback);
+	if (gc_gss_ctx)
+		gss_delete_sec_context(&gc_gss_ctx);
 }
 
 static void
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 01c3c410520..ebe344f34d1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -371,8 +371,7 @@ int cache_unregister(struct cache_detail *cd)
 	}
 	if (list_empty(&cache_list)) {
 		/* module must be being unloaded so its safe to kill the worker */
-		cancel_delayed_work(&cache_cleaner);
-		flush_scheduled_work();
+		cancel_delayed_work_sync(&cache_cleaner);
 	}
 	return 0;
 }
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 650af064ff8..669e12a4ed1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -132,8 +132,7 @@ rpc_close_pipes(struct inode *inode)
 		rpci->nwriters = 0;
 		if (ops->release_pipe)
 			ops->release_pipe(inode);
-		cancel_delayed_work(&rpci->queue_timeout);
-		flush_workqueue(rpciod_workqueue);
+		cancel_delayed_work_sync(&rpci->queue_timeout);
 	}
 	rpc_inode_setowner(inode, NULL);
 	mutex_unlock(&inode->i_mutex);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b5723c262a3..954d7ec86c7 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -50,8 +50,6 @@ static RPC_WAITQ(delay_queue, "delayq");
 /*
  * rpciod-related stuff
  */
-static DEFINE_MUTEX(rpciod_mutex);
-static atomic_t rpciod_users = ATOMIC_INIT(0);
 struct workqueue_struct *rpciod_workqueue;
 
 /*
@@ -961,60 +959,49 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
 	spin_unlock(&clnt->cl_lock);
 }
 
+int rpciod_up(void)
+{
+	return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
+}
+
+void rpciod_down(void)
+{
+	module_put(THIS_MODULE);
+}
+
 /*
- * Start up the rpciod process if it's not already running.
+ * Start up the rpciod workqueue.
  */
-int
-rpciod_up(void)
+static int rpciod_start(void)
 {
 	struct workqueue_struct *wq;
-	int error = 0;
-
-	if (atomic_inc_not_zero(&rpciod_users))
-		return 0;
-
-	mutex_lock(&rpciod_mutex);
 
-	/* Guard against races with rpciod_down() */
-	if (rpciod_workqueue != NULL)
-		goto out_ok;
 	/*
 	 * Create the rpciod thread and wait for it to start.
 	 */
 	dprintk("RPC:       creating workqueue rpciod\n");
-	error = -ENOMEM;
 	wq = create_workqueue("rpciod");
-	if (wq == NULL)
-		goto out;
-
 	rpciod_workqueue = wq;
-	error = 0;
-out_ok:
-	atomic_inc(&rpciod_users);
-out:
-	mutex_unlock(&rpciod_mutex);
-	return error;
+	return rpciod_workqueue != NULL;
 }
 
-void
-rpciod_down(void)
+static void rpciod_stop(void)
 {
-	if (!atomic_dec_and_test(&rpciod_users))
-		return;
+	struct workqueue_struct *wq = NULL;
 
-	mutex_lock(&rpciod_mutex);
+	if (rpciod_workqueue == NULL)
+		return;
 	dprintk("RPC:       destroying workqueue rpciod\n");
 
-	if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) {
-		destroy_workqueue(rpciod_workqueue);
-		rpciod_workqueue = NULL;
-	}
-	mutex_unlock(&rpciod_mutex);
+	wq = rpciod_workqueue;
+	rpciod_workqueue = NULL;
+	destroy_workqueue(wq);
 }
 
 void
 rpc_destroy_mempool(void)
 {
+	rpciod_stop();
 	if (rpc_buffer_mempool)
 		mempool_destroy(rpc_buffer_mempool);
 	if (rpc_task_mempool)
@@ -1048,6 +1035,8 @@ rpc_init_mempool(void)
 						      rpc_buffer_slabp);
 	if (!rpc_buffer_mempool)
 		goto err_nomem;
+	if (!rpciod_start())
+		goto err_nomem;
 	return 0;
 err_nomem:
 	rpc_destroy_mempool();
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 73751ab6ec0..dae7d30dca0 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -9,7 +9,7 @@ use strict;
 my $P = $0;
 $P =~ s@.*/@@g;
 
-my $V = '0.08';
+my $V = '0.09';
 
 use Getopt::Long qw(:config no_auto_abbrev);
 
@@ -311,7 +311,7 @@ sub process {
 
 	my $Ident	= qr{[A-Za-z\d_]+};
 	my $Storage	= qr{extern|static};
-	my $Sparse	= qr{__user|__kernel|__force|__iomem};
+	my $Sparse	= qr{__user|__kernel|__force|__iomem|__must_check|__init_refok};
 	my $NonptrType	= qr{
 				\b
 				(?:const\s+)?
@@ -325,6 +325,7 @@ sub process {
 					unsigned|
 					float|
 					double|
+					bool|
 					long\s+int|
 					long\s+long|
 					long\s+long\s+int|
@@ -340,7 +341,8 @@ sub process {
 			  }x;
 	my $Type	= qr{
 				\b$NonptrType\b
-				(?:\s*\*+\s*const|\s*\*+)?
+				(?:\s*\*+\s*const|\s*\*+|(?:\s*\[\s*\])+)?
+				(?:\s+$Sparse)*
 			  }x;
 	my $Declare	= qr{(?:$Storage\s+)?$Type};
 	my $Attribute	= qr{const|__read_mostly|__init|__initdata|__meminit};
@@ -494,16 +496,15 @@ sub process {
 			ERROR("use tabs not spaces\n" . $herevet);
 		}
 
-		#
-		# The rest of our checks refer specifically to C style
-		# only apply those _outside_ comments.
-		#
-		next if ($in_comment);
-
 # Remove comments from the line before processing.
-		$line =~ s@/\*.*\*/@@g;
-		$line =~ s@/\*.*@@;
-		$line =~ s@.*\*/@@;
+		my $comment_edge = ($line =~ s@/\*.*\*/@@g) +
+				   ($line =~ s@/\*.*@@) +
+				   ($line =~ s@^(.).*\*/@$1@);
+
+# The rest of our checks refer specifically to C style
+# only apply those _outside_ comments.  Only skip
+# lines in the middle of comments.
+		next if (!$comment_edge && $in_comment);
 
 # Standardise the strings and chars within the input to simplify matching.
 		$line = sanitise_line($line);
@@ -599,7 +600,7 @@ sub process {
 			if (($prevline !~ /^}/) &&
 			   ($prevline !~ /^\+}/) &&
 			   ($prevline !~ /^ }/) &&
-			   ($prevline !~ /\s$name(?:\s+$Attribute)?\s*(?:;|=)/)) {
+			   ($prevline !~ /\b\Q$name\E(?:\s+$Attribute)?\s*(?:;|=)/)) {
 				WARN("EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr);
 			}
 		}
@@ -680,9 +681,9 @@ sub process {
 
 # check for spaces between functions and their parentheses.
 		if ($line =~ /($Ident)\s+\(/ &&
-		    $1 !~ /^(?:if|for|while|switch|return|volatile)$/ &&
+		    $1 !~ /^(?:if|for|while|switch|return|volatile|__volatile__|__attribute__|format|__extension__|Copyright)$/ &&
 		    $line !~ /$Type\s+\(/ && $line !~ /^.\#\s*define\b/) {
-			ERROR("no space between function name and open parenthesis '('\n" . $herecurr);
+			WARN("no space between function name and open parenthesis '('\n" . $herecurr);
 		}
 # Check operator spacing.
 		# Note we expand the line with the leading + as the real
@@ -712,6 +713,7 @@ sub process {
 					$c = 'W' if ($elements[$n + 2] =~ /^\s/);
 					$c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/);
 					$c = 'O' if ($elements[$n + 2] eq '');
+					$c = 'E' if ($elements[$n + 2] =~ /\s*\\$/);
 				} else {
 					$c = 'E';
 				}
@@ -812,7 +814,11 @@ sub process {
 
 				# All the others need spaces both sides.
 				} elsif ($ctx !~ /[EW]x[WE]/) {
-					ERROR("need spaces around that '$op' $at\n" . $hereptr);
+					# Ignore email addresses <foo@bar>
+					if (!($op eq '<' && $cb =~ /$;\S+\@\S+>/) &&
+					    !($op eq '>' && $cb =~ /<\S+\@\S+$;/)) {
+						ERROR("need spaces around that '$op' $at\n" . $hereptr);
+					}
 				}
 				$off += length($elements[$n + 1]);
 			}
@@ -823,15 +829,24 @@ sub process {
 			WARN("multiple assignments should be avoided\n" . $herecurr);
 		}
 
-# check for multiple declarations, allowing for a function declaration
-# continuation.
-		if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ &&
-		    $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) {
-			WARN("declaring multiple variables together should be avoided\n" . $herecurr);
-		}
+## # check for multiple declarations, allowing for a function declaration
+## # continuation.
+## 		if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ &&
+## 		    $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) {
+##
+## 			# Remove any bracketed sections to ensure we do not
+## 			# falsly report the parameters of functions.
+## 			my $ln = $line;
+## 			while ($ln =~ s/\([^\(\)]*\)//g) {
+## 			}
+## 			if ($ln =~ /,/) {
+## 				WARN("declaring multiple variables together should be avoided\n" . $herecurr);
+## 			}
+## 		}
 
 #need space before brace following if, while, etc
-		if ($line =~ /\(.*\){/ || $line =~ /do{/) {
+		if (($line =~ /\(.*\){/ && $line !~ /\($Type\){/) ||
+		    $line =~ /do{/) {
 			ERROR("need a space before the open brace '{'\n" . $herecurr);
 		}
 
@@ -841,6 +856,22 @@ sub process {
 			ERROR("need a space after that close brace '}'\n" . $herecurr);
 		}
 
+# check spacing on square brackets
+		if ($line =~ /\[\s/ && $line !~ /\[\s*$/) {
+			ERROR("no space after that open square bracket '['\n" . $herecurr);
+		}
+		if ($line =~ /\s\]/) {
+			ERROR("no space before that close square bracket ']'\n" . $herecurr);
+		}
+
+# check spacing on paretheses
+		if ($line =~ /\(\s/ && $line !~ /\(\s*$/) {
+			ERROR("no space after that open parenthesis '('\n" . $herecurr);
+		}
+		if ($line =~ /\s\)/) {
+			ERROR("no space before that close parenthesis ')'\n" . $herecurr);
+		}
+
 #goto labels aren't indented, allow a single space however
 		if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
 		   !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
@@ -910,7 +941,7 @@ sub process {
 			# grabbing the statement after the identifier
 			$prevline =~ m{^(.#\s*define\s*$Ident(?:\([^\)]*\))?\s*)(.*)\\\s*$};
 			##print "1<$1> 2<$2>\n";
-			if ($2 ne '') {
+			if (defined $2 && $2 ne '') {
 				$off = length($1);
 				$ln--;
 				$cnt++;
@@ -950,8 +981,10 @@ sub process {
 				my ($lvl, @block) = ctx_block_level($nr, $cnt);
 
 				my $stmt = join(' ', @block);
-				$stmt =~ s/^[^{]*{//;
-				$stmt =~ s/}[^}]*$//;
+				$stmt =~ s/(^[^{]*){//;
+				my $before = $1;
+				$stmt =~ s/}([^}]*$)//;
+				my $after = $1;
 
 				#print "block<" . join(' ', @block) . "><" . scalar(@block) . ">\n";
 				#print "stmt<$stmt>\n\n";
@@ -963,12 +996,14 @@ sub process {
 				# Also nested if's often require braces to
 				# disambiguate the else binding so shhh there.
 				my @semi = ($stmt =~ /;/g);
+				push(@semi, "/**/") if ($stmt =~ m@/\*@);
 				##print "semi<" . scalar(@semi) . ">\n";
 				if ($lvl == 0 && scalar(@semi) < 2 &&
-				    $stmt !~ /{/ && $stmt !~ /\bif\b/) {
+				    $stmt !~ /{/ && $stmt !~ /\bif\b/ &&
+				    $before !~ /}/ && $after !~ /{/) {
 				    	my $herectx = "$here\n" . join("\n", @control, @block[1 .. $#block]) . "\n";
 				    	shift(@block);
-					ERROR("braces {} are not necessary for single statement blocks\n" . $herectx);
+					WARN("braces {} are not necessary for single statement blocks\n" . $herectx);
 				}
 			}
 		}
@@ -1013,6 +1048,11 @@ sub process {
 #			$clean = 0;
 #		}
 
+# warn about spacing in #ifdefs
+		if ($line =~ /^.#\s*(ifdef|ifndef|elif)\s\s+/) {
+			ERROR("exactly one space required after that #$1\n" . $herecurr);
+		}
+
 # check for spinlock_t definitions without a comment.
 		if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/) {
 			my $which = $1;
@@ -1027,14 +1067,14 @@ sub process {
 			}
 		}
 # check of hardware specific defines
-		if ($line =~ m@^.#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@) {
+		if ($line =~ m@^.#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
 			CHK("architecture specific defines should be avoided\n" .  $herecurr);
 		}
 
 # check the location of the inline attribute, that it is between
 # storage class and type.
-		if ($line =~ /$Type\s+(?:inline|__always_inline)\b/ ||
-		    $line =~ /\b(?:inline|always_inline)\s+$Storage/) {
+		if ($line =~ /$Type\s+(?:inline|__always_inline|noinline)\b/ ||
+		    $line =~ /\b(?:inline|__always_inline|noinline)\s+$Storage/) {
 			ERROR("inline keyword should sit between storage class and type\n" . $herecurr);
 		}