850 files changed, 28516 insertions, 24638 deletions
diff --git a/CREDITS b/CREDITS
index cf919aaacc9..08feda2667d 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2702,7 +2702,7 @@ S: Canada K2P 0X8
 
 N: Mikael Pettersson
 E: mikpe@it.uu.se
-W: http://www.csd.uu.se/~mikpe/
+W: http://user.it.uu.se/~mikpe/linux/
 D: Miscellaneous fixes
 
 N: Reed H. Petty
diff --git a/Documentation/DMA-mapping.txt b/Documentation/DMA-mapping.txt
index e07f2530326..3c8ae020b6a 100644
--- a/Documentation/DMA-mapping.txt
+++ b/Documentation/DMA-mapping.txt
@@ -514,7 +514,7 @@ With scatterlists, you map a region gathered from several regions by:
 	int i, count = pci_map_sg(dev, sglist, nents, direction);
 	struct scatterlist *sg;
 
-	for (i = 0, sg = sglist; i < count; i++, sg++) {
+	for_each_sg(sglist, sg, count, i) {
 		hw_address[i] = sg_dma_address(sg);
 		hw_len[i] = sg_dma_len(sg);
 	}
@@ -782,5 +782,5 @@ following people:
 	Jay Estabrook <Jay.Estabrook@compaq.com>
 	Thomas Sailer <sailer@ife.ee.ethz.ch>
 	Andrea Arcangeli <andrea@suse.de>
-	Jens Axboe <axboe@suse.de>
+	Jens Axboe <jens.axboe@oracle.com>
 	David Mosberger-Tang <davidm@hpl.hp.com>
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index c64e969dc33..dceb3092149 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -330,7 +330,7 @@ Here is a list of some of the different kernel trees available:
     - ACPI development tree, Len Brown <len.brown@intel.com>
 	git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
 
-    - Block development tree, Jens Axboe <axboe@suse.de>
+    - Block development tree, Jens Axboe <jens.axboe@oracle.com>
 	git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
 
     - DRM development tree, Dave Airlie <airlied@linux.ie>
diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
new file mode 100644
index 00000000000..961a0513f8c
--- /dev/null
+++ b/Documentation/block/00-INDEX
@@ -0,0 +1,20 @@
+00-INDEX
+	- This file
+as-iosched.txt
+	- Anticipatory IO scheduler
+barrier.txt
+	- I/O Barriers
+biodoc.txt
+	- Notes on the Generic Block Layer Rewrite in Linux 2.5
+capability.txt
+	- Generic Block Device Capability (/sys/block/<disk>/capability)
+deadline-iosched.txt
+	- Deadline IO scheduler tunables
+ioprio.txt
+	- Block io priorities (in CFQ scheduler)
+request.txt
+	- The members of struct request (in include/linux/blkdev.h)
+stat.txt
+	- Block layer statistics in /sys/block/<dev>/stat
+switching-sched.txt
+	- Switching I/O schedulers at runtime
diff --git a/Documentation/block/as-iosched.txt b/Documentation/block/as-iosched.txt
index a598fe10a29..738b72be128 100644
--- a/Documentation/block/as-iosched.txt
+++ b/Documentation/block/as-iosched.txt
@@ -20,15 +20,10 @@ actually has a head for each physical device in the logical RAID device.
 However, setting the antic_expire (see tunable parameters below) produces
 very similar behavior to the deadline IO scheduler.
 
-
 Selecting IO schedulers
 -----------------------
-To choose IO schedulers at boot time, use the argument 'elevator=deadline'.
-'noop', 'as' and 'cfq' (the default) are also available. IO schedulers are
-assigned globally at boot time only presently. It's also possible to change
-the IO scheduler for a determined device on the fly, as described in
-Documentation/block/switching-sched.txt.
-
+Refer to Documentation/block/switching-sched.txt for information on
+selecting an io scheduler on a per-device basis.
 
 Anticipatory IO scheduler Policies
 ----------------------------------
@@ -115,7 +110,7 @@ statistics (average think time, average seek distance) on the process
 that submitted the just completed request are examined.  If it seems
 likely that that process will submit another request soon, and that
 request is likely to be near the just completed request, then the IO
-scheduler will stop dispatching more read requests for up time (antic_expire)
+scheduler will stop dispatching more read requests for up to (antic_expire)
 milliseconds, hoping that process will submit a new request near the one
 that just completed.  If such a request is made, then it is dispatched
 immediately.  If the antic_expire wait time expires, then the IO scheduler
@@ -165,3 +160,13 @@ The parameters are:
     for big seek time devices though not a linear correspondence - most
     processes have only a few ms thinktime.
 
+In addition to the tunables above there is a read-only file named est_time
+which, when read, will show:
+
+    - The probability of a task exiting without a cooperating task
+      submitting an anticipated IO.
+
+    - The current mean think time.
+
+    - The seek distance used to determine if an incoming IO is better.
+
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index dc3f49e3e53..93f223b9723 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -2,7 +2,7 @@
 	=====================================================
 
 Notes Written on Jan 15, 2002:
-	Jens Axboe <axboe@suse.de>
+	Jens Axboe <jens.axboe@oracle.com>
 	Suparna Bhattacharya <suparna@in.ibm.com>
 
 Last Updated May 2, 2002
@@ -21,7 +21,7 @@ Credits:
 ---------
 
 2.5 bio rewrite:
-	Jens Axboe <axboe@suse.de>
+	Jens Axboe <jens.axboe@oracle.com>
 
 Many aspects of the generic block layer redesign were driven by and evolved
 over discussions, prior patches and the collective experience of several
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
index be08ffd1e9b..c23cab13c3d 100644
--- a/Documentation/block/deadline-iosched.txt
+++ b/Documentation/block/deadline-iosched.txt
@@ -5,16 +5,10 @@ This little file attempts to document how the deadline io scheduler works.
 In particular, it will clarify the meaning of the exposed tunables that may be
 of interest to power users.
 
-Each io queue has a set of io scheduler tunables associated with it. These
-tunables control how the io scheduler works. You can find these entries
-in:
-
-/sys/block/<device>/queue/iosched
-
-assuming that you have sysfs mounted on /sys. If you don't have sysfs mounted,
-you can do so by typing:
-
-# mount none /sys -t sysfs
+Selecting IO schedulers
+-----------------------
+Refer to Documentation/block/switching-sched.txt for information on
+selecting an io scheduler on a per-device basis.
 
 
 ********************************************************************************
@@ -41,14 +35,11 @@ fifo_batch
 
 When a read request expires its deadline, we must move some requests from
 the sorted io scheduler list to the block device dispatch queue. fifo_batch
-controls how many requests we move, based on the cost of each request. A
-request is either qualified as a seek or a stream. The io scheduler knows
-the last request that was serviced by the drive (or will be serviced right
-before this one). See seek_cost and stream_unit.
+controls how many requests we move.
 
 
-write_starved	(number of dispatches)
--------------
+writes_starved	(number of dispatches)
+--------------
 
 When we have to move requests from the io scheduler queue to the block
 device dispatch queue, we always give a preference to reads. However, we
@@ -73,6 +64,6 @@ that comes at basically 0 cost we leave that on. We simply disable the
 rbtree front sector lookup when the io scheduler merge function is called.
 
 
-Nov 11 2002, Jens Axboe <axboe@suse.de>
+Nov 11 2002, Jens Axboe <jens.axboe@oracle.com>
 
 
diff --git a/Documentation/block/ioprio.txt b/Documentation/block/ioprio.txt
index 35e516b0b8a..8ed8c59380b 100644
--- a/Documentation/block/ioprio.txt
+++ b/Documentation/block/ioprio.txt
@@ -180,4 +180,4 @@ int main(int argc, char *argv[])
 ---> snip ionice.c tool <---
 
 
-March 11 2005, Jens Axboe <axboe@suse.de>
+March 11 2005, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/request.txt b/Documentation/block/request.txt
index fff58acb40a..754e104ed36 100644
--- a/Documentation/block/request.txt
+++ b/Documentation/block/request.txt
@@ -1,7 +1,7 @@
 
 struct request documentation
 
-Jens Axboe <axboe@suse.de> 27/05/02
+Jens Axboe <jens.axboe@oracle.com> 27/05/02
 
 1.0
 Index
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
index 5fa130a6753..634c952e196 100644
--- a/Documentation/block/switching-sched.txt
+++ b/Documentation/block/switching-sched.txt
@@ -1,3 +1,18 @@
+To choose IO schedulers at boot time, use the argument 'elevator=deadline'.
+'noop', 'as' and 'cfq' (the default) are also available. IO schedulers are
+assigned globally at boot time only presently.
+
+Each io queue has a set of io scheduler tunables associated with it. These
+tunables control how the io scheduler works. You can find these entries
+in:
+
+/sys/block/<device>/queue/iosched
+
+assuming that you have sysfs mounted on /sys. If you don't have sysfs mounted,
+you can do so by typing:
+
+# mount none /sys -t sysfs
+
 As of the Linux 2.6.10 kernel, it is now possible to change the
 IO scheduler for a given block device on the fly (thus making it possible,
 for instance, to set the CFQ scheduler for the system default, but
@@ -20,3 +35,9 @@ noop anticipatory deadline [cfq]
 # echo anticipatory > /sys/block/hda/queue/scheduler
 # cat /sys/block/hda/queue/scheduler
 noop [anticipatory] deadline cfq
+
+Each io queue has a set of io scheduler tunables associated with it. These
+tunables control how the io scheduler works. You can find these entries
+in:
+
+/sys/block/<device>/queue/iosched
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 866b7613942..552cabac060 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -133,12 +133,6 @@ changes occur:
 	The ia64 sn2 platform is one example of a platform
 	that uses this interface.
 
-8) void lazy_mmu_prot_update(pte_t pte)
-	This interface is called whenever the protection on
-	any user PTEs change.  This interface provides a notification
-	to architecture specific code to take appropriate action.
-
-
 Next, we have the cache flushing interfaces.  In general, when Linux
 is changing an existing virtual-->physical mapping to a new value,
 the sequence will be in one of the following forms:
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index f2c0a684293..ec9de6917f0 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -35,7 +35,8 @@ CONTENTS:
 ----------------------
 
 Cpusets provide a mechanism for assigning a set of CPUs and Memory
-Nodes to a set of tasks.
+Nodes to a set of tasks.   In this document "Memory Node" refers to
+an on-line node that contains memory.
 
 Cpusets constrain the CPU and Memory placement of tasks to only
 the resources within a tasks current cpuset.  They form a nested
@@ -86,9 +87,6 @@ This can be especially valuable on:
       and a database), or
     * NUMA systems running large HPC applications with demanding
       performance characteristics.
-    * Also cpu_exclusive cpusets are useful for servers running orthogonal
-      workloads such as RT applications requiring low latency and HPC
-      applications that are throughput sensitive
 
 These subsets, or "soft partitions" must be able to be dynamically
 adjusted, as the job mix changes, without impacting other concurrently
@@ -131,8 +129,6 @@ Cpusets extends these two mechanisms as follows:
  - A cpuset may be marked exclusive, which ensures that no other
    cpuset (except direct ancestors and descendents) may contain
    any overlapping CPUs or Memory Nodes.
-   Also a cpu_exclusive cpuset would be associated with a sched
-   domain.
  - You can list all the tasks (by pid) attached to any cpuset.
 
 The implementation of cpusets requires a few, simple hooks
@@ -144,9 +140,6 @@ into the rest of the kernel, none in performance critical paths:
    allowed in that tasks cpuset.
  - in sched.c migrate_all_tasks(), to keep migrating tasks within
    the CPUs allowed by their cpuset, if possible.
- - in sched.c, a new API partition_sched_domains for handling
-   sched domain changes associated with cpu_exclusive cpusets
-   and related changes in both sched.c and arch/ia64/kernel/domain.c
  - in the mbind and set_mempolicy system calls, to mask the requested
    Memory Nodes by what's allowed in that tasks cpuset.
  - in page_alloc.c, to restrict memory to allowed nodes.
@@ -220,8 +213,8 @@ and name space for cpusets, with a minimum of additional kernel code.
 The cpus and mems files in the root (top_cpuset) cpuset are
 read-only.  The cpus file automatically tracks the value of
 cpu_online_map using a CPU hotplug notifier, and the mems file
-automatically tracks the value of node_online_map using the
-cpuset_track_online_nodes() hook.
+automatically tracks the value of node_states[N_MEMORY]--i.e.,
+nodes with memory--using the cpuset_track_online_nodes() hook.
 
 
 1.4 What are exclusive cpusets ?
@@ -231,15 +224,6 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than
 a direct ancestor or descendent, may share any of the same CPUs or
 Memory Nodes.
 
-A cpuset that is cpu_exclusive has a scheduler (sched) domain
-associated with it.  The sched domain consists of all CPUs in the
-current cpuset that are not part of any exclusive child cpusets.
-This ensures that the scheduler load balancing code only balances
-against the CPUs that are in the sched domain as defined above and
-not all of the CPUs in the system. This removes any overhead due to
-load balancing code trying to pull tasks outside of the cpu_exclusive
-cpuset only to be prevented by the tasks' cpus_allowed mask.
-
 A cpuset that is mem_exclusive restricts kernel allocations for
 page, buffer and other data commonly shared by the kernel across
 multiple users.  All cpusets, whether mem_exclusive or not, restrict
diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX
index 92e89aeef52..caabbd395e6 100644
--- a/Documentation/fb/00-INDEX
+++ b/Documentation/fb/00-INDEX
@@ -5,21 +5,49 @@ please mail me.
 
 00-INDEX
 	- this file
+arkfb.txt
+	- info on the fbdev driver for ARK Logic chips.
+aty128fb.txt
+	- info on the ATI Rage128 frame buffer driver.
+cirrusfb.txt
+	- info on the driver for Cirrus Logic chipsets.
+cyblafb/
+	- directory with documentation files related to the cyblafb driver.
+deferred_io.txt
+	- an introduction to deferred IO.
+fbcon.txt
+	- intro to and usage guide for the framebuffer console (fbcon).
 framebuffer.txt
-	- introduction to frame buffer devices
+	- introduction to frame buffer devices.
+imacfb.txt
+	- info on the generic EFI platform driver for Intel based Macs.
+intel810.txt
+	- documentation for the Intel 810/815 framebuffer driver.
+intelfb.txt
+	- docs for Intel 830M/845G/852GM/855GM/865G/915G/945G fb driver.
 internals.txt
-	- quick overview of frame buffer device internals
+	- quick overview of frame buffer device internals.
+matroxfb.txt
+	- info on the Matrox framebuffer driver for Alpha, Intel and PPC.
 modedb.txt
-	- info on the video mode database
-aty128fb.txt
-	- info on the ATI Rage128 frame buffer driver
-clgenfb.txt
-	- info on the Cirrus Logic frame buffer driver
+	- info on the video mode database.
 matroxfb.txt
-	- info on the Matrox frame buffer driver
+	- info on the Matrox frame buffer driver.
 pvr2fb.txt
-	- info on the PowerVR 2 frame buffer driver
+	- info on the PowerVR 2 frame buffer driver.
+pxafb.txt
+	- info on the driver for the PXA25x LCD controller.
+s3fb.txt
+	- info on the fbdev driver for S3 Trio/Virge chips.
+sa1100fb.txt
+	- information about the driver for the SA-1100 LCD controller.
+sisfb.txt
+	- info on the framebuffer device driver for various SiS chips.
+sstfb.txt
+	- info on the frame buffer driver for 3dfx' Voodoo Graphics boards.
 tgafb.txt
 	- info on the TGA (DECChip 21030) frame buffer driver
 vesafb.txt
 	- info on the VESA frame buffer device
+vt8623fb.txt
+	- info on the fb driver for the graphics core in VIA VT8623 chipsets.
diff --git a/Documentation/fb/uvesafb.txt b/Documentation/fb/uvesafb.txt
new file mode 100644
index 00000000000..bcfc233a008
--- /dev/null
+++ b/Documentation/fb/uvesafb.txt
@@ -0,0 +1,188 @@
+
+uvesafb - A Generic Driver for VBE2+ compliant video cards
+==========================================================
+
+1. Requirements
+---------------
+
+uvesafb should work with any video card that has a Video BIOS compliant
+with the VBE 2.0 standard.
+
+Unlike other drivers, uvesafb makes use of a userspace helper called
+v86d.  v86d is used to run the x86 Video BIOS code in a simulated and
+controlled environment.  This allows uvesafb to function on arches other
+than x86.  Check the v86d documentation for a list of currently supported
+arches.
+
+v86d source code can be downloaded from the following website:
+  http://dev.gentoo.org/~spock/projects/uvesafb
+
+Please refer to the v86d documentation for detailed configuration and
+installation instructions.
+
+Note that the v86d userspace helper has to be available at all times in
+order for uvesafb to work properly.  If you want to use uvesafb during
+early boot, you will have to include v86d into an initramfs image, and
+either compile it into the kernel or use it as an initrd.
+
+2. Caveats and limitations
+--------------------------
+
+uvesafb is a _generic_ driver which supports a wide variety of video
+cards, but which is ultimately limited by the Video BIOS interface.
+The most important limitations are:
+
+- Lack of any type of acceleration.
+- A strict and limited set of supported video modes.  Often the native
+  or most optimal resolution/refresh rate for your setup will not work
+  with uvesafb, simply because the Video BIOS doesn't support the
+  video mode you want to use.  This can be especially painful with
+  widescreen panels, where native video modes don't have the 4:3 aspect
+  ratio, which is what most BIOS-es are limited to.
+- Adjusting the refresh rate is only possible with a VBE 3.0 compliant
+  Video BIOS.  Note that many nVidia Video BIOS-es claim to be VBE 3.0
+  compliant, while they simply ignore any refresh rate settings.
+
+3. Configuration
+----------------
+
+uvesafb can be compiled either as a module, or directly into the kernel.
+In both cases it supports the same set of configuration options, which
+are either given on the kernel command line or as module parameters, e.g.:
+
+ video=uvesafb:1024x768-32,mtrr:3,ywrap (compiled into the kernel)
+
+ # modprobe uvesafb mode=1024x768-32 mtrr=3 scroll=ywrap  (module)
+
+Accepted options:
+
+ypan    Enable display panning using the VESA protected mode
+        interface.  The visible screen is just a window of the
+        video memory, console scrolling is done by changing the
+        start of the window.  Available on x86 only.
+
+ywrap   Same as ypan, but assumes your gfx board can wrap-around
+        the video memory (i.e. starts reading from top if it
+        reaches the end of video memory).  Faster than ypan.
+        Available on x86 only.
+
+redraw  Scroll by redrawing the affected part of the screen, this
+        is the safe (and slow) default.
+
+(If you're using uvesafb as a module, the above three options are
+ used a parameter of the scroll option, e.g. scroll=ypan.)
+
+vgapal  Use the standard VGA registers for palette changes.
+
+pmipal  Use the protected mode interface for palette changes.
+        This is the default if the protected mode interface is
+        available.  Available on x86 only.
+
+mtrr:n  Setup memory type range registers for the framebuffer
+        where n:
+              0 - disabled (equivalent to nomtrr) (default)
+              1 - uncachable
+              2 - write-back
+              3 - write-combining
+              4 - write-through
+
+        If you see the following in dmesg, choose the type that matches
+        the old one.  In this example, use "mtrr:2".
+...
+mtrr: type mismatch for e0000000,8000000 old: write-back new: write-combining
+...
+
+nomtrr  Do not use memory type range registers.
+
+vremap:n
+        Remap 'n' MiB of video RAM.  If 0 or not specified, remap memory
+        according to video mode.
+
+vtotal:n
+        If the video BIOS of your card incorrectly determines the total
+        amount of video RAM, use this option to override the BIOS (in MiB).
+
+<mode>  The mode you want to set, in the standard modedb format.  Refer to
+        modedb.txt for a detailed description.  When uvesafb is compiled as
+        a module, the mode string should be provided as a value of the
+        'mode' option.
+
+vbemode:x
+        Force the use of VBE mode x.  The mode will only be set if it's
+        found in the VBE-provided list of supported modes.
+        NOTE: The mode number 'x' should be specified in VESA mode number
+        notation, not the Linux kernel one (eg. 257 instead of 769).
+        HINT: If you use this option because normal <mode> parameter does
+        not work for you and you use a X server, you'll probably want to
+        set the 'nocrtc' option to ensure that the video mode is properly
+        restored after console <-> X switches.
+
+nocrtc  Do not use CRTC timings while setting the video mode.  This option
+        has any effect only if the Video BIOS is VBE 3.0 compliant.  Use it
+        if you have problems with modes set the standard way.  Note that
+        using this option implies that any refresh rate adjustments will
+        be ignored and the refresh rate will stay at your BIOS default (60 Hz).
+
+noedid  Do not try to fetch and use EDID-provided modes.
+
+noblank Disable hardware blanking.
+
+v86d:path
+        Set path to the v86d executable. This option is only available as
+        a module parameter, and not as a part of the video= string.  If you
+        need to use it and have uvesafb built into the kernel, use
+        uvesafb.v86d="path".
+
+Additionally, the following parameters may be provided.  They all override the
+EDID-provided values and BIOS defaults.  Refer to your monitor's specs to get
+the correct values for maxhf, maxvf and maxclk for your hardware.
+
+maxhf:n     Maximum horizontal frequency (in kHz).
+maxvf:n     Maximum vertical frequency (in Hz).
+maxclk:n    Maximum pixel clock (in MHz).
+
+4. The sysfs interface
+----------------------
+
+uvesafb provides several sysfs nodes for configurable parameters and
+additional information.
+
+Driver attributes:
+
+/sys/bus/platform/drivers/uvesafb
+  - v86d (default: /sbin/v86d)
+    Path to the v86d executable. v86d is started by uvesafb
+    if an instance of the daemon isn't already running.
+
+Device attributes:
+
+/sys/bus/platform/drivers/uvesafb/uvesafb.0
+  - nocrtc
+    Use the default refresh rate (60 Hz) if set to 1.
+
+  - oem_product_name
+  - oem_product_rev
+  - oem_string
+  - oem_vendor
+    Information about the card and its maker.
+
+  - vbe_modes
+    A list of video modes supported by the Video BIOS along with their
+    VBE mode numbers in hex.
+
+  - vbe_version
+    A BCD value indicating the implemented VBE standard.
+
+5. Miscellaneous
+----------------
+
+Uvesafb will set a video mode with the default refresh rate and timings
+from the Video BIOS if you set pixclock to 0 in fb_var_screeninfo.
+
+
+--
+ Michal Januszewski <spock@gentoo.org>
+ Last updated: 2007-06-16
+
+ Documentation of the uvesafb options is loosely based on vesafb.txt.
+
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index f0f825808ca..fe26cc97852 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -178,15 +178,18 @@ prototypes:
 locking rules:
 	All except set_page_dirty may block
 
-			BKL	PageLocked(page)
+			BKL	PageLocked(page)	i_sem
 writepage:		no	yes, unlocks (see below)
 readpage:		no	yes, unlocks
 sync_page:		no	maybe
 writepages:		no
 set_page_dirty		no	no
 readpages:		no
-prepare_write:		no	yes
-commit_write:		no	yes
+prepare_write:		no	yes			yes
+commit_write:		no	yes			yes
+write_begin:		no	locks the page		yes
+write_end:		no	yes, unlocks		yes
+perform_write:		no	n/a			yes
 bmap:			yes
 invalidatepage:		no	yes
 releasepage:		no	yes
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 045f3e055a2..6f8e16e3d6c 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -537,6 +537,12 @@ struct address_space_operations {
 			struct list_head *pages, unsigned nr_pages);
 	int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
 	int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
+	int (*write_begin)(struct file *, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata);
+	int (*write_end)(struct file *, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata);
 	sector_t (*bmap)(struct address_space *, sector_t);
 	int (*invalidatepage) (struct page *, unsigned long);
 	int (*releasepage) (struct page *, int);
@@ -615,11 +621,7 @@ struct address_space_operations {
   	any basic-blocks on storage, then those blocks should be
   	pre-read (if they haven't been read already) so that the
   	updated blocks can be written out properly.
-	The page will be locked.  If prepare_write wants to unlock the
-  	page it, like readpage, may do so and return
-  	AOP_TRUNCATED_PAGE.
-	In this case the prepare_write will be retried one the lock is
-  	regained.
+	The page will be locked.
 
 	Note: the page _must not_ be marked uptodate in this function
 	(or anywhere else) unless it actually is uptodate right now. As
@@ -633,6 +635,45 @@ struct address_space_operations {
         operations.  It should avoid returning an error if possible -
         errors should have been handled by prepare_write.
 
+  write_begin: This is intended as a replacement for prepare_write. The
+	key differences being that:
+		- it returns a locked page (in *pagep) rather than being
+		  given a pre locked page;
+		- it must be able to cope with short writes (where the
+		  length passed to write_begin is greater than the number
+		  of bytes copied into the page).
+
+	Called by the generic buffered write code to ask the filesystem to
+	prepare to write len bytes at the given offset in the file. The
+	address_space should check that the write will be able to complete,
+	by allocating space if necessary and doing any other internal
+	housekeeping.  If the write will update parts of any basic-blocks on
+	storage, then those blocks should be pre-read (if they haven't been
+	read already) so that the updated blocks can be written out properly.
+
+        The filesystem must return the locked pagecache page for the specified
+	offset, in *pagep, for the caller to write into.
+
+	flags is a field for AOP_FLAG_xxx flags, described in
+	include/linux/fs.h.
+
+        A void * may be returned in fsdata, which then gets passed into
+        write_end.
+
+        Returns 0 on success; < 0 on failure (which is the error code), in
+	which case write_end is not called.
+
+  write_end: After a successful write_begin, and data copy, write_end must
+        be called. len is the original len passed to write_begin, and copied
+        is the amount that was able to be copied (copied == len is always true
+	if write_begin was called with the AOP_FLAG_UNINTERRUPTIBLE flag).
+
+        The filesystem must take care of unlocking the page and releasing it
+        refcount, and updating i_size.
+
+        Returns < 0 on failure, otherwise the number of bytes (<= 'copied')
+        that were able to be copied into pagecache.
+
   bmap: called by the VFS to map a logical block offset within object to
   	physical block number. This method is used by the FIBMAP
   	ioctl and for working with swap-files.  To be able to swap to
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 085e4a095ea..eb247997f67 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -349,6 +349,11 @@ and is between 256 and 4096 characters. It is defined in the file
 	blkmtd_bs=
 	blkmtd_count=
 
+	boot_delay=	Milliseconds to delay each printk during boot.
+			Values larger than 10 seconds (10000) are changed to
+			no delay (0).
+			Format: integer
+
 	bttv.card=	[HW,V4L] bttv (bt848 + bt878 based grabber cards)
 	bttv.radio=	Most important insmod options are available as
 			kernel args too.
@@ -906,6 +911,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			n must be a power of two.  The default size
 			is set in the kernel config file.
 
+	logo.nologo	[FB] Disables display of the built-in Linux logo.
+			This may be used to provide more screen space for
+			kernel log messages and is useful when debugging
+			kernel boot problems.
+
 	lp=0		[LP]	Specify parallel ports to use, e.g,
 	lp=port[,port...]	lp=none,parport0 (lp0 not configured, lp1 uses
 	lp=reset		first parallel port). 'lp=0' disables the
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
index 76ea6c837be..8861e47e5a2 100644
--- a/Documentation/spi/spi-summary
+++ b/Documentation/spi/spi-summary
@@ -156,21 +156,29 @@ using the driver model to connect controller and protocol drivers using
 device tables provided by board specific initialization code.  SPI
 shows up in sysfs in several locations:
 
+   /sys/devices/.../CTLR ... physical node for a given SPI controller
+
    /sys/devices/.../CTLR/spiB.C ... spi_device on bus "B",
 	chipselect C, accessed through CTLR.
 
+   /sys/bus/spi/devices/spiB.C ... symlink to that physical
+   	.../CTLR/spiB.C device
+
    /sys/devices/.../CTLR/spiB.C/modalias ... identifies the driver
 	that should be used with this device (for hotplug/coldplug)
 
-   /sys/bus/spi/devices/spiB.C ... symlink to the physical
-   	spiB.C device
-
    /sys/bus/spi/drivers/D ... driver for one or more spi*.* devices
 
-   /sys/class/spi_master/spiB ... class device for the controller
-	managing bus "B".  All the spiB.* devices share the same
+   /sys/class/spi_master/spiB ... symlink (or actual device node) to
+	a logical node which could hold class related state for the
+	controller managing bus "B".  All spiB.* devices share one
 	physical SPI bus segment, with SCLK, MOSI, and MISO.
 
+Note that the actual location of the controller's class state depends
+on whether you enabled CONFIG_SYSFS_DEPRECATED or not.  At this time,
+the only class-specific state is the bus number ("B" in "spiB"), so
+those /sys/class entries are only useful to quickly identify busses.
+
 
 How does board-specific init code declare SPI devices?
 ------------------------------------------------------
@@ -337,7 +345,8 @@ SPI protocol drivers somewhat resemble platform device drivers:
 
 The driver core will autmatically attempt to bind this driver to any SPI
 device whose board_info gave a modalias of "CHIP".  Your probe() code
-might look like this unless you're creating a class_device:
+might look like this unless you're creating a device which is managing
+a bus (appearing under /sys/class/spi_master).
 
 	static int __devinit CHIP_probe(struct spi_device *spi)
 	{
@@ -442,7 +451,7 @@ An SPI controller will probably be registered on the platform_bus; write
 a driver to bind to the device, whichever bus is involved.
 
 The main task of this type of driver is to provide an "spi_master".
-Use spi_alloc_master() to allocate the master, and class_get_devdata()
+Use spi_alloc_master() to allocate the master, and spi_master_get_devdata()
 to get the driver-private data allocated for that device.
 
 	struct spi_master	*master;
@@ -452,7 +461,7 @@ to get the driver-private data allocated for that device.
 	if (!master)
 		return -ENODEV;
 
-	c = class_get_devdata(&master->cdev);
+	c = spi_master_get_devdata(master);
 
 The driver will initialize the fields of that spi_master, including the
 bus number (maybe the same as the platform device ID) and three methods
diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c
index 218e8621529..cf0e3ce0d52 100644
--- a/Documentation/spi/spidev_test.c
+++ b/Documentation/spi/spidev_test.c
@@ -29,7 +29,7 @@ static void pabort(const char *s)
 	abort();
 }
 
-static char *device = "/dev/spidev1.1";
+static const char *device = "/dev/spidev1.1";
 static uint8_t mode;
 static uint8_t bits = 8;
 static uint32_t speed = 500000;
@@ -69,7 +69,7 @@ static void transfer(int fd)
 	puts("");
 }
 
-void print_usage(char *prog)
+void print_usage(const char *prog)
 {
 	printf("Usage: %s [-DsbdlHOLC3]\n", prog);
 	puts("  -D --device   device to use (default /dev/spidev1.1)\n"
@@ -88,7 +88,7 @@ void print_usage(char *prog)
 void parse_opts(int argc, char *argv[])
 {
 	while (1) {
-		static struct option lopts[] = {
+		static const struct option lopts[] = {
 			{ "device",  1, 0, 'D' },
 			{ "speed",   1, 0, 's' },
 			{ "delay",   1, 0, 'd' },
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
index 8242f52d0f2..dd498649799 100644
--- a/Documentation/vm/numa_memory_policy.txt
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -302,31 +302,30 @@ MEMORY POLICIES AND CPUSETS
 
 Memory policies work within cpusets as described above.  For memory policies
 that require a node or set of nodes, the nodes are restricted to the set of
-nodes whose memories are allowed by the cpuset constraints.  If the
-intersection of the set of nodes specified for the policy and the set of nodes
-allowed by the cpuset is the empty set, the policy is considered invalid and
-cannot be installed.
+nodes whose memories are allowed by the cpuset constraints.  If the nodemask
+specified for the policy contains nodes that are not allowed by the cpuset, or
+the intersection of the set of nodes specified for the policy and the set of
+nodes with memory is the empty set, the policy is considered invalid
+and cannot be installed.
 
 The interaction of memory policies and cpusets can be problematic for a
 couple of reasons:
 
-1) the memory policy APIs take physical node id's as arguments.  However, the
-   memory policy APIs do not provide a way to determine what nodes are valid
-   in the context where the application is running.  An application MAY consult
-   the cpuset file system [directly or via an out of tree, and not generally
-   available, libcpuset API] to obtain this information, but then the
-   application must be aware that it is running in a cpuset and use what are
-   intended primarily as administrative APIs.
-
-   However, as long as the policy specifies at least one node that is valid
-   in the controlling cpuset, the policy can be used.
+1) the memory policy APIs take physical node id's as arguments.  As mentioned
+   above, it is illegal to specify nodes that are not allowed in the cpuset.
+   The application must query the allowed nodes using the get_mempolicy()
+   API with the MPOL_F_MEMS_ALLOWED flag to determine the allowed nodes and
+   restrict itself to those nodes.  However, the resources available to a
+   cpuset can be changed by the system administrator, or a workload manager
+   application, at any time.  So, a task may still get errors attempting to
+   specify policy nodes, and must query the allowed memories again.
 
 2) when tasks in two cpusets share access to a memory region, such as shared
    memory segments created by shmget() of mmap() with the MAP_ANONYMOUS and
    MAP_SHARED flags, and any of the tasks install shared policy on the region,
    only nodes whose memories are allowed in both cpusets may be used in the
-   policies.  Again, obtaining this information requires "stepping outside"
-   the memory policy APIs, as well as knowing in what cpusets other task might
-   be attaching to the shared region, to use the cpuset information.
+   policies.  Obtaining this information requires "stepping outside" the
+   memory policy APIs to use the cpuset information and requires that one
+   know in what cpusets other task might be attaching to the shared region.
    Furthermore, if the cpusets' allowed memory sets are disjoint, "local"
    allocation is the only valid policy.
diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86_64/mm.txt
index f42798ed1c5..b89b6d2bebf 100644
--- a/Documentation/x86_64/mm.txt
+++ b/Documentation/x86_64/mm.txt
@@ -9,6 +9,7 @@ ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
 ffff810000000000 - ffffc0ffffffffff (=46 bits) direct mapping of all phys. memory
 ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole
 ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space
+ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffffff80000000 - ffffffff82800000 (=40 MB)   kernel text mapping, from phys 0
 ... unused hole ...
diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile
index 1b704ee54bf..d1004b4d942 100644
--- a/arch/alpha/Makefile
+++ b/arch/alpha/Makefile
@@ -12,73 +12,22 @@ NM := $(NM) -B
 
 LDFLAGS_vmlinux	:= -static -N #-relax
 CHECKFLAGS	+= -D__alpha__ -m64
-cflags-y	:= -pipe -mno-fp-regs -ffixed-8
-
-# Determine if we can use the BWX instructions with GAS.
-old_gas := $(shell if $(AS) --version 2>&1 | grep 'version 2.7' > /dev/null; then echo y; else echo n; fi)
-
-ifeq ($(old_gas),y)
-$(error The assembler '$(AS)' does not support the BWX instruction)
-endif
-
-# Determine if GCC understands the -mcpu= option.
-have_mcpu        := $(call cc-option-yn, -mcpu=ev5)
-have_mcpu_pca56  := $(call cc-option-yn, -mcpu=pca56)
-have_mcpu_ev6    := $(call cc-option-yn, -mcpu=ev6)
-have_mcpu_ev67   := $(call cc-option-yn, -mcpu=ev67)
-have_msmall_data := $(call cc-option-yn, -msmall-data)
-
-cflags-$(have_msmall_data) += -msmall-data
-
-# Turn on the proper cpu optimizations.
-ifeq ($(have_mcpu),y)
-  mcpu_done := n
-  # If GENERIC, make sure to turn off any instruction set extensions that
-  # the host compiler might have on by default.  Given that EV4 and EV5
-  # have the same instruction set, prefer EV5 because an EV5 schedule is
-  # more likely to keep an EV4 processor busy than vice-versa.
-  ifeq ($(CONFIG_ALPHA_GENERIC),y)
-    mcpu := ev5
-    mcpu_done := y
-  endif
-  ifeq ($(mcpu_done)$(CONFIG_ALPHA_SX164)$(have_mcpu_pca56),nyy)
-    mcpu := pca56
-    mcpu_done := y
-  endif
-  ifeq ($(mcpu_done)$(CONFIG_ALPHA_POLARIS)$(have_mcpu_pca56),nyy)
-    mcpu := pca56
-    mcpu_done := y
-  endif
-  ifeq ($(mcpu_done)$(CONFIG_ALPHA_EV4),ny)
-    mcpu := ev4
-    mcpu_done := y
-  endif
-  ifeq ($(mcpu_done)$(CONFIG_ALPHA_EV56),ny)
-    mcpu := ev56
-    mcpu_done := y
-  endif
-  ifeq ($(mcpu_done)$(CONFIG_ALPHA_EV5),ny)
-    mcpu := ev5
-    mcpu_done := y
-  endif
-  ifeq ($(mcpu_done)$(CONFIG_ALPHA_EV67)$(have_mcpu_ev67),nyy)
-    mcpu := ev67
-    mcpu_done := y
-  endif
-  ifeq ($(mcpu_done)$(CONFIG_ALPHA_EV6),ny)
-    ifeq ($(have_mcpu_ev6),y)
-      mcpu := ev6
-    else
-      ifeq ($(have_mcpu_pca56),y)
-        mcpu := pca56
-      else
-        mcpu=ev56
-      endif
-    endif
-    mcpu_done := y
-  endif
-  cflags-$(mcpu_done) += -mcpu=$(mcpu)
-endif
+cflags-y	:= -pipe -mno-fp-regs -ffixed-8 -msmall-data
+
+cpuflags-$(CONFIG_ALPHA_EV67)		:= -mcpu=ev67
+cpuflags-$(CONFIG_ALPHA_EV6)		:= -mcpu=ev6
+cpuflags-$(CONFIG_ALPHA_POLARIS)	:= -mcpu=pca56
+cpuflags-$(CONFIG_ALPHA_SX164)		:= -mcpu=pca56
+cpuflags-$(CONFIG_ALPHA_EV56)		:= -mcpu=ev56
+cpuflags-$(CONFIG_ALPHA_EV5)		:= -mcpu=ev5
+cpuflags-$(CONFIG_ALPHA_EV4)		:= -mcpu=ev4
+# If GENERIC, make sure to turn off any instruction set extensions that
+# the host compiler might have on by default.  Given that EV4 and EV5
+# have the same instruction set, prefer EV5 because an EV5 schedule is
+# more likely to keep an EV4 processor busy than vice-versa.
+cpuflags-$(CONFIG_ALPHA_GENERIC)	:= -mcpu=ev5
+
+cflags-y				+= $(cpuflags-y)
 
 
 # For TSUNAMI, we must have the assembler not emulate our instructions.
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index debc8f03886..5fc61e281ac 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -917,15 +917,6 @@ sys_pipe:
 .end sys_pipe
 
 	.align	4
-	.globl	sys_ptrace
-	.ent	sys_ptrace
-sys_ptrace:
-	.prologue 0
-	mov	$sp, $20
-	jmp	$31, do_sys_ptrace
-.end sys_ptrace
-
-	.align	4
 	.globl	sys_execve
 	.ent	sys_execve
 sys_execve:
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 83a78184226..1e9ad52c460 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -260,38 +260,12 @@ void ptrace_disable(struct task_struct *child)
 	ptrace_cancel_bpt(child);
 }
 
-asmlinkage long
-do_sys_ptrace(long request, long pid, long addr, long data,
-	      struct pt_regs *regs)
+long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-	struct task_struct *child;
 	unsigned long tmp;
 	size_t copied;
 	long ret;
 
-	lock_kernel();
-	DBG(DBG_MEM, ("request=%ld pid=%ld addr=0x%lx data=0x%lx\n",
-		      request, pid, addr, data));
-	if (request == PTRACE_TRACEME) {
-		ret = ptrace_traceme();
-		goto out_notsk;
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child)) {
-		ret = PTR_ERR(child);
-		goto out_notsk;
-	}
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		goto out;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out;
-
 	switch (request) {
 	/* When I and D space are separate, these will need to be fixed.  */
 	case PTRACE_PEEKTEXT: /* read word at location addr. */
@@ -301,13 +275,13 @@ do_sys_ptrace(long request, long pid, long addr, long data,
 		if (copied != sizeof(tmp))
 			break;
 		
-		regs->r0 = 0;	/* special return: no errors */
+		force_successful_syscall_return();
 		ret = tmp;
 		break;
 
 	/* Read register number ADDR. */
 	case PTRACE_PEEKUSR:
-		regs->r0 = 0;	/* special return: no errors */
+		force_successful_syscall_return();
 		ret = get_reg(child, addr);
 		DBG(DBG_MEM, ("peek $%ld->%#lx\n", addr, ret));
 		break;
@@ -353,7 +327,7 @@ do_sys_ptrace(long request, long pid, long addr, long data,
 		/* make sure single-step breakpoint is gone. */
 		ptrace_cancel_bpt(child);
 		wake_up_process(child);
-		goto out;
+		break;
 
 	case PTRACE_SINGLESTEP:  /* execute single instruction. */
 		ret = -EIO;
@@ -366,20 +340,12 @@ do_sys_ptrace(long request, long pid, long addr, long data,
 		wake_up_process(child);
 		/* give it a chance to run. */
 		ret = 0;
-		goto out;
-
-	case PTRACE_DETACH:	 /* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		goto out;
+		break;
 
 	default:
 		ret = ptrace_request(child, request, addr, data);
-		goto out;
+		break;
 	}
- out:
-	put_task_struct(child);
- out_notsk:
-	unlock_kernel();
 	return ret;
 }
 
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 7af07d3ad5f..55c05b511f4 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -1,4 +1,5 @@
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
 
 OUTPUT_FORMAT("elf64-alpha")
 OUTPUT_ARCH(alpha)
@@ -8,138 +9,145 @@ jiffies = jiffies_64;
 SECTIONS
 {
 #ifdef CONFIG_ALPHA_LEGACY_START_ADDRESS
-  . = 0xfffffc0000310000;
+	. = 0xfffffc0000310000;
 #else
-  . = 0xfffffc0001010000;
+	. = 0xfffffc0001010000;
 #endif
 
-  _text = .;					/* Text and read-only data */
-  .text : { 
+	_text = .;	/* Text and read-only data */
+	.text : {
 	*(.text.head)
-	TEXT_TEXT
-	SCHED_TEXT
-	LOCK_TEXT
-	*(.fixup)
-	*(.gnu.warning)
-  } :kernel
-  _etext = .;					/* End of text section */
-
-  . = ALIGN(16);
-  __start___ex_table = .;			/* Exception table */
-  __ex_table : { *(__ex_table) }
-  __stop___ex_table = .;
-
-  NOTES :kernel :note
-  .dummy : { *(.dummy) } :kernel
-
-  RODATA
-
-  /* Will be freed after init */
-  . = ALIGN(8192);				/* Init code and data */
-  __init_begin = .;
-  .init.text : { 
-	_sinittext = .;
-	*(.init.text)
-	_einittext = .;
-  }
-  .init.data : { *(.init.data) }
-
-  . = ALIGN(16);
-  __setup_start = .;
-  .init.setup : { *(.init.setup) }
-  __setup_end = .;
-
-  . = ALIGN(8);
-  __initcall_start = .;
-  .initcall.init : {
-	INITCALLS
-  }
-  __initcall_end = .;
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		*(.fixup)
+		*(.gnu.warning)
+	} :kernel
+	_etext = .;	/* End of text section */
+
+	/* Exception table */
+	. = ALIGN(16);
+	__ex_table : {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	}
+
+	NOTES :kernel :note
+	.dummy : {
+		*(.dummy)
+	} :kernel
+
+	RODATA
+
+	/* Will be freed after init */
+	. = ALIGN(PAGE_SIZE);
+	/* Init code and data */
+	__init_begin = .;
+	.init.text : {
+		_sinittext = .;
+		*(.init.text)
+		_einittext = .;
+	}
+	.init.data : {
+		*(.init.data)
+	}
+
+	. = ALIGN(16);
+	.init.setup : {
+		__setup_start = .;
+		*(.init.setup)
+		__setup_end = .;
+	}
+
+	. = ALIGN(8);
+	.initcall.init : {
+		__initcall_start = .;
+		INITCALLS
+		__initcall_end = .;
+	}
 
 #ifdef CONFIG_BLK_DEV_INITRD
-  . = ALIGN(8192);
-  __initramfs_start = .;
-  .init.ramfs : { *(.init.ramfs) }
-  __initramfs_end = .;
+	. = ALIGN(PAGE_SIZE);
+	.init.ramfs : {
+		__initramfs_start = .;
+		*(.init.ramfs)
+		__initramfs_end = .;
+	}
 #endif
 
-  . = ALIGN(8);
-  .con_initcall.init : {
-	__con_initcall_start = .;
-	*(.con_initcall.init)
-	__con_initcall_end = .;
-  }
-
-  . = ALIGN(8);
-  SECURITY_INIT
-
-  PERCPU(8192)
-
-  . = ALIGN(2*8192);
-  __init_end = .;
-  /* Freed after init ends here */
-
-  /* Note 2 page alignment above.  */
-  .data.init_thread : { *(.data.init_thread) }
-
-  . = ALIGN(8192);
-  .data.page_aligned : { *(.data.page_aligned) }
-
-  . = ALIGN(64);
-  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
-
-  _data = .;
-  .data : {					/* Data */
-	DATA_DATA
-	CONSTRUCTORS
-  }
-
-  .got : { *(.got) }
-  .sdata : { *(.sdata) }
-
-  _edata = .;					/* End of data section */
-
-  __bss_start = .;
-  .sbss : { *(.sbss) *(.scommon) }
-  .bss : { *(.bss) *(COMMON) }
-  __bss_stop = .;
-
-  _end = .;
-
-  /* Sections to be discarded */
-  /DISCARD/ : { *(.exit.text) *(.exit.data) *(.exitcall.exit) }
-
-  .mdebug 0 : { *(.mdebug) }
-  .note 0 : { *(.note) }
-  .comment 0 : { *(.comment) }
-
-  /* Stabs debugging sections */
-  .stab 0 : { *(.stab) }
-  .stabstr 0 : { *(.stabstr) }
-  .stab.excl 0 : { *(.stab.excl) }
-  .stab.exclstr 0 : { *(.stab.exclstr) }
-  .stab.index 0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  /* DWARF 1 */
-  .debug          0 : { *(.debug) }
-  .line           0 : { *(.line) }
-  /* GNU DWARF 1 extensions */
-  .debug_srcinfo  0 : { *(.debug_srcinfo) }
-  .debug_sfnames  0 : { *(.debug_sfnames) }
-  /* DWARF 1.1 and DWARF 2 */
-  .debug_aranges  0 : { *(.debug_aranges) }
-  .debug_pubnames 0 : { *(.debug_pubnames) }
-  /* DWARF 2 */
-  .debug_info     0 : { *(.debug_info) }
-  .debug_abbrev   0 : { *(.debug_abbrev) }
-  .debug_line     0 : { *(.debug_line) }
-  .debug_frame    0 : { *(.debug_frame) }
-  .debug_str      0 : { *(.debug_str) }
-  .debug_loc      0 : { *(.debug_loc) }
-  .debug_macinfo  0 : { *(.debug_macinfo) }
-  /* SGI/MIPS DWARF 2 extensions */
-  .debug_weaknames 0 : { *(.debug_weaknames) }
-  .debug_funcnames 0 : { *(.debug_funcnames) }
-  .debug_typenames 0 : { *(.debug_typenames) }
-  .debug_varnames  0 : { *(.debug_varnames) }
+	. = ALIGN(8);
+	.con_initcall.init : {
+		__con_initcall_start = .;
+		*(.con_initcall.init)
+		__con_initcall_end = .;
+	}
+
+	. = ALIGN(8);
+	SECURITY_INIT
+
+	PERCPU(PAGE_SIZE)
+
+	. = ALIGN(2 * PAGE_SIZE);
+	__init_end = .;
+	/* Freed after init ends here */
+
+	/* Note 2 page alignment above.  */
+	.data.init_thread : {
+		*(.data.init_thread)
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	.data.page_aligned : {
+		*(.data.page_aligned)
+	}
+
+	. = ALIGN(64);
+	.data.cacheline_aligned : {
+		*(.data.cacheline_aligned)
+	}
+
+	_data = .;
+	/* Data */
+	.data : {
+		DATA_DATA
+		CONSTRUCTORS
+	}
+
+	.got : {
+		*(.got)
+	}
+	.sdata : {
+		*(.sdata)
+	}
+	_edata = .;	/* End of data section */
+
+	__bss_start = .;
+	.sbss : {
+		*(.sbss)
+		*(.scommon)
+	}
+	.bss : {
+		*(.bss)
+		*(COMMON)
+	}
+	__bss_stop = .;
+	_end = .;
+
+	/* Sections to be discarded */
+	/DISCARD/ : {
+		*(.exit.text)
+		*(.exit.data)
+		*(.exitcall.exit)
+	}
+
+	.mdebug 0 : {
+		*(.mdebug)
+	}
+	.note 0 : {
+		*(.note)
+	}
+
+	STABS_DEBUG
+	DWARF_DEBUG
 }
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
index a0e18da594d..25154df3055 100644
--- a/arch/alpha/mm/fault.c
+++ b/arch/alpha/mm/fault.c
@@ -197,7 +197,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
 	       current->comm, current->pid);
 	if (!user_mode(regs))
 		goto no_context;
-	do_exit(SIGKILL);
+	do_group_exit(SIGKILL);
 
  do_sigbus:
 	/* Send a sigbus, regardless of whether we were in kernel
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 78c9f1a3d41..5feee722ea9 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -731,10 +731,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			ret = 0;
 			break;
 
-		case PTRACE_DETACH:
-			ret = ptrace_detach(child, data);
-			break;
-
 		case PTRACE_GETREGS:
 			ret = ptrace_getregs(child, (void __user *)data);
 			break;
diff --git a/arch/arm/mach-s3c2410/mach-amlm5900.c b/arch/arm/mach-s3c2410/mach-amlm5900.c
index 43bb5e10630..a67a0685664 100644
--- a/arch/arm/mach-s3c2410/mach-amlm5900.c
+++ b/arch/arm/mach-s3c2410/mach-amlm5900.c
@@ -168,13 +168,31 @@ static void __init amlm5900_map_io(void)
 }
 
 #ifdef CONFIG_FB_S3C2410
-static struct s3c2410fb_mach_info __initdata amlm5900_lcd_info = {
+static struct s3c2410fb_display __initdata amlm5900_lcd_info = {
 	.width		= 160,
 	.height		= 160,
 
-/* commented out until stn patch is submitted
-*	.type		= S3C2410_LCDCON1_STN4,
-*/
+	.type		= S3C2410_LCDCON1_STN4,
+
+	.pixclock	= 680000, /* HCLK = 100MHz */
+	.xres		= 160,
+	.yres		= 160,
+	.bpp		= 4,
+	.left_margin	= 1 << (4 + 3),
+	.right_margin	= 8 << 3,
+	.hsync_len	= 48,
+	.upper_margin	= 0,
+	.lower_margin	= 0,
+
+	.lcdcon5	= 0x00000001,
+};
+
+static struct s3c2410fb_mach_info __initdata amlm5900_fb_info = {
+
+	.displays = &amlm5900_lcd_info,
+	.num_displays = 1,
+	.default_display = 0,
+
 	.gpccon =	0xaaaaaaaa,
 	.gpccon_mask =	0xffffffff,
 	.gpcup =	0x0000ffff,
@@ -184,32 +202,6 @@ static struct s3c2410fb_mach_info __initdata amlm5900_lcd_info = {
 	.gpdcon_mask =	0xffffffff,
 	.gpdup =	0x0000ffff,
 	.gpdup_mask =	0xffffffff,
-
-	.xres		= {
-		.min		= 160,
-		.max		= 160,
-		.defval		= 160,
-	},
-
-	.yres		= {
-		.min		= 160,
-		.max	        = 160,
-		.defval		= 160,
-	},
-
-	.bpp		= {
-		.min		= 4,
-		.max		= 4,
-		.defval		= 4,
-	},
-
-	.regs		= {
-		.lcdcon1	= 0x00008225,
-		.lcdcon2	= 0x0027c000,
-		.lcdcon3	= 0x00182708,
-		.lcdcon4	= 0x00000002,
-		.lcdcon5	= 0x00000001,
-	}
 };
 #endif
 
@@ -239,7 +231,7 @@ static void __init amlm5900_init(void)
 {
 	amlm5900_init_pm();
 #ifdef CONFIG_FB_S3C2410
-	s3c24xx_fb_set_platdata(&amlm5900_lcd_info);
+	s3c24xx_fb_set_platdata(&amlm5900_fb_info);
 #endif
 	platform_add_devices(amlm5900_devices, ARRAY_SIZE(amlm5900_devices));
 }
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index bc926992b4e..587864fe25f 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -467,35 +467,70 @@ static struct platform_device bast_device_axpp = {
 
 /* LCD/VGA controller */
 
-static struct s3c2410fb_mach_info __initdata bast_lcd_info = {
-	.width		= 640,
-	.height		= 480,
-
-	.xres		= {
-		.min		= 320,
-		.max		= 1024,
-		.defval		= 640,
-	},
+static struct s3c2410fb_display __initdata bast_lcd_info[] = {
+	{
+		.type		= S3C2410_LCDCON1_TFT,
+		.width		= 640,
+		.height		= 480,
+
+		.pixclock	= 33333,
+		.xres		= 640,
+		.yres		= 480,
+		.bpp		= 4,
+		.left_margin	= 40,
+		.right_margin	= 20,
+		.hsync_len	= 88,
+		.upper_margin	= 30,
+		.lower_margin	= 32,
+		.vsync_len	= 3,
 
-	.yres		= {
-		.min		= 240,
-		.max	        = 600,
-		.defval		= 480,
+		.lcdcon5	= 0x00014b02,
 	},
+	{
+		.type		= S3C2410_LCDCON1_TFT,
+		.width		= 640,
+		.height		= 480,
+
+		.pixclock	= 33333,
+		.xres		= 640,
+		.yres		= 480,
+		.bpp		= 8,
+		.left_margin	= 40,
+		.right_margin	= 20,
+		.hsync_len	= 88,
+		.upper_margin	= 30,
+		.lower_margin	= 32,
+		.vsync_len	= 3,
 
-	.bpp		= {
-		.min		= 4,
-		.max		= 16,
-		.defval		= 8,
+		.lcdcon5	= 0x00014b02,
 	},
+	{
+		.type		= S3C2410_LCDCON1_TFT,
+		.width		= 640,
+		.height		= 480,
+
+		.pixclock	= 33333,
+		.xres		= 640,
+		.yres		= 480,
+		.bpp		= 16,
+		.left_margin	= 40,
+		.right_margin	= 20,
+		.hsync_len	= 88,
+		.upper_margin	= 30,
+		.lower_margin	= 32,
+		.vsync_len	= 3,
 
-	.regs		= {
-		.lcdcon1	= 0x00000176,
-		.lcdcon2	= 0x1d77c7c2,
-		.lcdcon3	= 0x013a7f13,
-		.lcdcon4	= 0x00000057,
 		.lcdcon5	= 0x00014b02,
-	}
+	},
+};
+
+/* LCD/VGA controller */
+
+static struct s3c2410fb_mach_info __initdata bast_fb_info = {
+
+	.displays = bast_lcd_info,
+	.num_displays = ARRAY_SIZE(bast_lcd_info),
+	.default_display = 4,
 };
 
 /* Standard BAST devices */
@@ -552,7 +587,7 @@ static void __init bast_map_io(void)
 
 static void __init bast_init(void)
 {
-	s3c24xx_fb_set_platdata(&bast_lcd_info);
+	s3c24xx_fb_set_platdata(&bast_fb_info);
 	platform_add_devices(bast_devices, ARRAY_SIZE(bast_devices));
 }
 
diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 9a172b4ad72..7c1145e87c1 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -133,29 +133,31 @@ static struct s3c2410_udc_mach_info h1940_udc_cfg __initdata = {
 /**
  * Set lcd on or off
  **/
-static struct s3c2410fb_mach_info h1940_lcdcfg __initdata = {
-	.fixed_syncs=		1,
-	.regs={
-		.lcdcon1=	S3C2410_LCDCON1_TFT16BPP | \
-				S3C2410_LCDCON1_TFT | \
-				S3C2410_LCDCON1_CLKVAL(0x0C),
-
-		.lcdcon2=	S3C2410_LCDCON2_VBPD(7) | \
-				S3C2410_LCDCON2_LINEVAL(319) | \
-				S3C2410_LCDCON2_VFPD(6) | \
-				S3C2410_LCDCON2_VSPW(0),
-
-		.lcdcon3=	S3C2410_LCDCON3_HBPD(19) | \
-				S3C2410_LCDCON3_HOZVAL(239) | \
-				S3C2410_LCDCON3_HFPD(7),
-
-		.lcdcon4=	S3C2410_LCDCON4_MVAL(0) | \
-				S3C2410_LCDCON4_HSPW(3),
-
-		.lcdcon5=	S3C2410_LCDCON5_FRM565 | \
-				S3C2410_LCDCON5_INVVLINE | \
-				S3C2410_LCDCON5_HWSWP,
-	},
+static struct s3c2410fb_display h1940_lcd __initdata = {
+	.lcdcon5=	S3C2410_LCDCON5_FRM565 | \
+			S3C2410_LCDCON5_INVVLINE | \
+			S3C2410_LCDCON5_HWSWP,
+
+	.type =		S3C2410_LCDCON1_TFT,
+	.width =	240,
+	.height =	320,
+	.pixclock =	260000,
+	.xres =		240,
+	.yres =		320,
+	.bpp =		16,
+	.left_margin =	20,
+	.right_margin =	8,
+	.hsync_len =	4,
+	.upper_margin =	8,
+	.lower_margin = 7,
+	.vsync_len =	1,
+};
+
+static struct s3c2410fb_mach_info h1940_fb_info __initdata = {
+	.displays = &h1940_lcd,
+	.num_displays = 1,
+	.default_display = 0,
+
 	.lpcsel=	0x02,
 	.gpccon=	0xaa940659,
 	.gpccon_mask=	0xffffffff,
@@ -165,12 +167,6 @@ static struct s3c2410fb_mach_info h1940_lcdcfg __initdata = {
 	.gpdcon_mask=	0xffffffff,
 	.gpdup=		0x0000faff,
 	.gpdup_mask=	0xffffffff,
-
-	.width=		240,
-	.height=	320,
-	.xres=		{240,240,240},
-	.yres=		{320,320,320},
-	.bpp=		{16,16,16},
 };
 
 static struct platform_device s3c_device_leds = {
@@ -217,7 +213,7 @@ static void __init h1940_init(void)
 {
 	u32 tmp;
 
-	s3c24xx_fb_set_platdata(&h1940_lcdcfg);
+	s3c24xx_fb_set_platdata(&h1940_fb_info);
  	s3c24xx_udc_set_platdata(&h1940_udc_cfg);
 
 	/* Turn off suspend on both USB ports, and switch the
diff --git a/arch/arm/mach-s3c2410/mach-qt2410.c b/arch/arm/mach-s3c2410/mach-qt2410.c
index e670b1e1631..a1caf4b0ada 100644
--- a/arch/arm/mach-s3c2410/mach-qt2410.c
+++ b/arch/arm/mach-s3c2410/mach-qt2410.c
@@ -95,157 +95,83 @@ static struct s3c2410_uartcfg smdk2410_uartcfgs[] = {
 
 /* LCD driver info */
 
-/* Configuration for 640x480 SHARP LQ080V3DG01 */
-static struct s3c2410fb_mach_info qt2410_biglcd_cfg __initdata = {
-	.regs	= {
-
-		.lcdcon1	= S3C2410_LCDCON1_TFT16BPP |
-				  S3C2410_LCDCON1_TFT |
-				  S3C2410_LCDCON1_CLKVAL(0x01),	/* HCLK/4 */
-
-		.lcdcon2	= S3C2410_LCDCON2_VBPD(18) |	/* 19 */
-				  S3C2410_LCDCON2_LINEVAL(479) |
-				  S3C2410_LCDCON2_VFPD(10) |	/* 11 */
-				  S3C2410_LCDCON2_VSPW(14),	/* 15 */
-
-		.lcdcon3	= S3C2410_LCDCON3_HBPD(43) |	/* 44 */
-				  S3C2410_LCDCON3_HOZVAL(639) |	/* 640 */
-				  S3C2410_LCDCON3_HFPD(115),	/* 116 */
-
-		.lcdcon4	= S3C2410_LCDCON4_MVAL(0) |
-				  S3C2410_LCDCON4_HSPW(95),	/* 96 */
-
-		.lcdcon5	= S3C2410_LCDCON5_FRM565 |
-				  S3C2410_LCDCON5_INVVLINE |
-				  S3C2410_LCDCON5_INVVFRAME |
-				  S3C2410_LCDCON5_PWREN |
-				  S3C2410_LCDCON5_HWSWP,
+static struct s3c2410fb_display qt2410_lcd_cfg[] __initdata = {
+	{
+		/* Configuration for 640x480 SHARP LQ080V3DG01 */
+		.lcdcon5 = S3C2410_LCDCON5_FRM565 |
+			   S3C2410_LCDCON5_INVVLINE |
+			   S3C2410_LCDCON5_INVVFRAME |
+			   S3C2410_LCDCON5_PWREN |
+			   S3C2410_LCDCON5_HWSWP,
+
+		.type		= S3C2410_LCDCON1_TFT,
+		.width		= 640,
+		.height		= 480,
+
+		.pixclock	= 40000, /* HCLK/4 */
+		.xres		= 640,
+		.yres		= 480,
+		.bpp		= 16,
+		.left_margin	= 44,
+		.right_margin	= 116,
+		.hsync_len	= 96,
+		.upper_margin	= 19,
+		.lower_margin	= 11,
+		.vsync_len	= 15,
 	},
-
-	.lpcsel		= ((0xCE6) & ~7) | 1<<4,
-
-	.width		= 640,
-	.height		= 480,
-
-	.xres		= {
-		.min	= 640,
-		.max	= 640,
-		.defval	= 640,
-	},
-
-	.yres		= {
-		.min	= 480,
-		.max	= 480,
-		.defval = 480,
-	},
-
-	.bpp		= {
-		.min	= 16,
-		.max	= 16,
-		.defval = 16,
-	},
-};
-
-/* Configuration for 480x640 toppoly TD028TTEC1 */
-static struct s3c2410fb_mach_info qt2410_prodlcd_cfg __initdata = {
-	.regs	= {
-
-		.lcdcon1	= S3C2410_LCDCON1_TFT16BPP |
-				  S3C2410_LCDCON1_TFT |
-				  S3C2410_LCDCON1_CLKVAL(0x01),	/* HCLK/4 */
-
-		.lcdcon2	= S3C2410_LCDCON2_VBPD(1) |	/* 2 */
-				  S3C2410_LCDCON2_LINEVAL(639) |/* 640 */
-				  S3C2410_LCDCON2_VFPD(3) |	/* 4 */
-				  S3C2410_LCDCON2_VSPW(1),	/* 2 */
-
-		.lcdcon3	= S3C2410_LCDCON3_HBPD(7) |	/* 8 */
-				  S3C2410_LCDCON3_HOZVAL(479) |	/* 479 */
-				  S3C2410_LCDCON3_HFPD(23),	/* 24 */
-
-		.lcdcon4	= S3C2410_LCDCON4_MVAL(0) |
-				  S3C2410_LCDCON4_HSPW(7),	/* 8 */
-
-		.lcdcon5	= S3C2410_LCDCON5_FRM565 |
-				  S3C2410_LCDCON5_INVVLINE |
-				  S3C2410_LCDCON5_INVVFRAME |
-				  S3C2410_LCDCON5_PWREN |
-				  S3C2410_LCDCON5_HWSWP,
-	},
-
-	.lpcsel		= ((0xCE6) & ~7) | 1<<4,
-
-	.width		= 480,
-	.height		= 640,
-
-	.xres		= {
-		.min	= 480,
-		.max	= 480,
-		.defval	= 480,
+	{
+		/* Configuration for 480x640 toppoly TD028TTEC1 */
+		.lcdcon5 = S3C2410_LCDCON5_FRM565 |
+			   S3C2410_LCDCON5_INVVLINE |
+			   S3C2410_LCDCON5_INVVFRAME |
+			   S3C2410_LCDCON5_PWREN |
+			   S3C2410_LCDCON5_HWSWP,
+
+		.type		= S3C2410_LCDCON1_TFT,
+		.width		= 480,
+		.height		= 640,
+		.pixclock	= 40000, /* HCLK/4 */
+		.xres		= 480,
+		.yres		= 640,
+		.bpp		= 16,
+		.left_margin	= 8,
+		.right_margin	= 24,
+		.hsync_len	= 8,
+		.upper_margin	= 2,
+		.lower_margin	= 4,
+		.vsync_len	= 2,
 	},
-
-	.yres		= {
-		.min	= 640,
-		.max	= 640,
-		.defval = 640,
-	},
-
-	.bpp		= {
-		.min	= 16,
-		.max	= 16,
-		.defval = 16,
+	{
+		/* Config for 240x320 LCD */
+		.lcdcon5 = S3C2410_LCDCON5_FRM565 |
+			   S3C2410_LCDCON5_INVVLINE |
+			   S3C2410_LCDCON5_INVVFRAME |
+			   S3C2410_LCDCON5_PWREN |
+			   S3C2410_LCDCON5_HWSWP,
+
+		.type		= S3C2410_LCDCON1_TFT,
+		.width		= 240,
+		.height		= 320,
+		.pixclock	= 100000, /* HCLK/10 */
+		.xres		= 240,
+		.yres		= 320,
+		.bpp		= 16,
+		.left_margin	= 13,
+		.right_margin	= 8,
+		.hsync_len	= 4,
+		.upper_margin	= 2,
+		.lower_margin	= 7,
+		.vsync_len	= 4,
 	},
 };
 
-/* Config for 240x320 LCD */
-static struct s3c2410fb_mach_info qt2410_lcd_cfg __initdata = {
-	.regs	= {
-
-		.lcdcon1	= S3C2410_LCDCON1_TFT16BPP |
-				  S3C2410_LCDCON1_TFT |
-				  S3C2410_LCDCON1_CLKVAL(0x04),
-
-		.lcdcon2	= S3C2410_LCDCON2_VBPD(1) |
-				  S3C2410_LCDCON2_LINEVAL(319) |
-				  S3C2410_LCDCON2_VFPD(6) |
-				  S3C2410_LCDCON2_VSPW(3),
-
-		.lcdcon3	= S3C2410_LCDCON3_HBPD(12) |
-				  S3C2410_LCDCON3_HOZVAL(239) |
-				  S3C2410_LCDCON3_HFPD(7),
 
-		.lcdcon4	= S3C2410_LCDCON4_MVAL(0) |
-				  S3C2410_LCDCON4_HSPW(3),
-
-		.lcdcon5	= S3C2410_LCDCON5_FRM565 |
-				  S3C2410_LCDCON5_INVVLINE |
-				  S3C2410_LCDCON5_INVVFRAME |
-				  S3C2410_LCDCON5_PWREN |
-				  S3C2410_LCDCON5_HWSWP,
-	},
+static struct s3c2410fb_mach_info qt2410_fb_info __initdata = {
+	.displays 	= qt2410_lcd_cfg,
+	.num_displays 	= ARRAY_SIZE(qt2410_lcd_cfg),
+	.default_display = 0,
 
 	.lpcsel		= ((0xCE6) & ~7) | 1<<4,
-
-	.width		= 240,
-	.height		= 320,
-
-	.xres		= {
-		.min	= 240,
-		.max	= 240,
-		.defval	= 240,
-	},
-
-	.yres		= {
-		.min	= 320,
-		.max	= 320,
-		.defval = 320,
-	},
-
-	.bpp		= {
-		.min	= 16,
-		.max	= 16,
-		.defval = 16,
-	},
 };
 
 /* CS8900 */
@@ -408,16 +334,17 @@ static void __init qt2410_machine_init(void)
 
 	switch (tft_type) {
 	case 'p': /* production */
-		s3c24xx_fb_set_platdata(&qt2410_prodlcd_cfg);
+		qt2410_fb_info.default_display = 1;
 		break;
 	case 'b': /* big */
-		s3c24xx_fb_set_platdata(&qt2410_biglcd_cfg);
+		qt2410_fb_info.default_display = 0;
 		break;
 	case 's': /* small */
 	default:
-		s3c24xx_fb_set_platdata(&qt2410_lcd_cfg);
+		qt2410_fb_info.default_display = 2;
 		break;
 	}
+	s3c24xx_fb_set_platdata(&qt2410_fb_info);
 
 	s3c2410_gpio_cfgpin(S3C2410_GPB0, S3C2410_GPIO_OUTPUT);
 	s3c2410_gpio_setpin(S3C2410_GPB0, 1);
diff --git a/arch/arm/mach-s3c2440/mach-rx3715.c b/arch/arm/mach-s3c2440/mach-rx3715.c
index b59e6d39f2f..bac40c4878a 100644
--- a/arch/arm/mach-s3c2440/mach-rx3715.c
+++ b/arch/arm/mach-s3c2440/mach-rx3715.c
@@ -110,28 +110,32 @@ static struct s3c2410_uartcfg rx3715_uartcfgs[] = {
 
 /* framebuffer lcd controller information */
 
-static struct s3c2410fb_mach_info rx3715_lcdcfg __initdata = {
-	.regs	= {
-		.lcdcon1 =	S3C2410_LCDCON1_TFT16BPP | \
-				S3C2410_LCDCON1_TFT | \
-				S3C2410_LCDCON1_CLKVAL(0x0C),
-
-		.lcdcon2 =	S3C2410_LCDCON2_VBPD(5) | \
-				S3C2410_LCDCON2_LINEVAL(319) | \
-				S3C2410_LCDCON2_VFPD(6) | \
-				S3C2410_LCDCON2_VSPW(2),
-
-		.lcdcon3 =	S3C2410_LCDCON3_HBPD(35) | \
-				S3C2410_LCDCON3_HOZVAL(239) | \
-				S3C2410_LCDCON3_HFPD(35),
-
-		.lcdcon4 =	S3C2410_LCDCON4_MVAL(0) | \
-				S3C2410_LCDCON4_HSPW(7),
-
-		.lcdcon5 =	S3C2410_LCDCON5_INVVLINE |
-				S3C2410_LCDCON5_FRM565 |
-				S3C2410_LCDCON5_HWSWP,
-	},
+static struct s3c2410fb_display rx3715_lcdcfg __initdata = {
+	.lcdcon5 =	S3C2410_LCDCON5_INVVLINE |
+			S3C2410_LCDCON5_FRM565 |
+			S3C2410_LCDCON5_HWSWP,
+
+	.type		= S3C2410_LCDCON1_TFT,
+	.width		= 240,
+	.height		= 320,
+
+	.pixclock	= 260000,
+	.xres		= 240,
+	.yres		= 320,
+	.bpp		= 16,
+	.left_margin	= 36,
+	.right_margin	= 36,
+	.hsync_len	= 8,
+	.upper_margin	= 6,
+	.lower_margin	= 7,
+	.vsync_len	= 3,
+};
+
+static struct s3c2410fb_mach_info rx3715_fb_info __initdata = {
+
+	.displays =	&rx3715_lcdcfg,
+	.num_displays =	1,
+	.default_display = 0,
 
 	.lpcsel =	0xf82,
 
@@ -144,28 +148,6 @@ static struct s3c2410fb_mach_info rx3715_lcdcfg __initdata = {
 	.gpdcon_mask =	0xffc0fff0,
 	.gpdup =	0x0000faff,
 	.gpdup_mask =	0xffffffff,
-
-	.fixed_syncs =	1,
-	.width  =	240,
-	.height =	320,
-
-	.xres	= {
-		.min =		240,
-		.max =		240,
-		.defval =	240,
-	},
-
-	.yres	= {
-		.max =		320,
-		.min =		320,
-		.defval	=	320,
-	},
-
-	.bpp	= {
-		.min =		16,
-		.max =		16,
-		.defval =	16,
-	},
 };
 
 static struct mtd_partition rx3715_nand_part[] = {
@@ -224,7 +206,7 @@ static void __init rx3715_init_machine(void)
 #endif
 	s3c2410_pm_init();
 
-	s3c24xx_fb_set_platdata(&rx3715_lcdcfg);
+	s3c24xx_fb_set_platdata(&rx3715_fb_info);
 	platform_add_devices(rx3715_devices, ARRAY_SIZE(rx3715_devices));
 }
 
diff --git a/arch/arm/mach-s3c2440/mach-smdk2440.c b/arch/arm/mach-s3c2440/mach-smdk2440.c
index 670115b8a12..4552828bf80 100644
--- a/arch/arm/mach-s3c2440/mach-smdk2440.c
+++ b/arch/arm/mach-s3c2440/mach-smdk2440.c
@@ -103,31 +103,35 @@ static struct s3c2410_uartcfg smdk2440_uartcfgs[] __initdata = {
 
 /* LCD driver info */
 
-static struct s3c2410fb_mach_info smdk2440_lcd_cfg __initdata = {
-	.regs	= {
-
-		.lcdcon1	= S3C2410_LCDCON1_TFT16BPP |
-				  S3C2410_LCDCON1_TFT |
-				  S3C2410_LCDCON1_CLKVAL(0x04),
-
-		.lcdcon2	= S3C2410_LCDCON2_VBPD(7) |
-				  S3C2410_LCDCON2_LINEVAL(319) |
-				  S3C2410_LCDCON2_VFPD(6) |
-				  S3C2410_LCDCON2_VSPW(3),
-
-		.lcdcon3	= S3C2410_LCDCON3_HBPD(19) |
-				  S3C2410_LCDCON3_HOZVAL(239) |
-				  S3C2410_LCDCON3_HFPD(7),
-
-		.lcdcon4	= S3C2410_LCDCON4_MVAL(0) |
-				  S3C2410_LCDCON4_HSPW(3),
-
-		.lcdcon5	= S3C2410_LCDCON5_FRM565 |
-				  S3C2410_LCDCON5_INVVLINE |
-				  S3C2410_LCDCON5_INVVFRAME |
-				  S3C2410_LCDCON5_PWREN |
-				  S3C2410_LCDCON5_HWSWP,
-	},
+static struct s3c2410fb_display smdk2440_lcd_cfg __initdata = {
+
+	.lcdcon5	= S3C2410_LCDCON5_FRM565 |
+			  S3C2410_LCDCON5_INVVLINE |
+			  S3C2410_LCDCON5_INVVFRAME |
+			  S3C2410_LCDCON5_PWREN |
+			  S3C2410_LCDCON5_HWSWP,
+
+	.type		= S3C2410_LCDCON1_TFT,
+
+	.width		= 240,
+	.height		= 320,
+
+	.pixclock	= 166667, /* HCLK 60 MHz, divisor 10 */
+	.xres		= 240,
+	.yres		= 320,
+	.bpp		= 16,
+	.left_margin	= 20,
+	.right_margin	= 8,
+	.hsync_len	= 4,
+	.upper_margin	= 8,
+	.lower_margin	= 7,
+	.vsync_len	= 4,
+};
+
+static struct s3c2410fb_mach_info smdk2440_fb_info __initdata = {
+	.displays	= &smdk2440_lcd_cfg,
+	.num_displays	= 1,
+	.default_display = 0,
 
 #if 0
 	/* currently setup by downloader */
@@ -142,28 +146,6 @@ static struct s3c2410fb_mach_info smdk2440_lcd_cfg __initdata = {
 #endif
 
 	.lpcsel		= ((0xCE6) & ~7) | 1<<4,
-	.type		= S3C2410_LCDCON1_TFT16BPP,
-
-	.width		= 240,
-	.height		= 320,
-
-	.xres		= {
-		.min	= 240,
-		.max	= 240,
-		.defval	= 240,
-	},
-
-	.yres		= {
-		.min	= 320,
-		.max	= 320,
-		.defval = 320,
-	},
-
-	.bpp		= {
-		.min	= 16,
-		.max	= 16,
-		.defval = 16,
-	},
 };
 
 static struct platform_device *smdk2440_devices[] __initdata = {
@@ -183,7 +165,7 @@ static void __init smdk2440_map_io(void)
 
 static void __init smdk2440_machine_init(void)
 {
-	s3c24xx_fb_set_platdata(&smdk2440_lcd_cfg);
+	s3c24xx_fb_set_platdata(&smdk2440_fb_info);
 
 	platform_add_devices(smdk2440_devices, ARRAY_SIZE(smdk2440_devices));
 	smdk_machine_init();
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 846cce48e2b..59ed1d05b71 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -266,7 +266,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 		 * the page fault gracefully.
 		 */
 		printk("VM: killing process %s\n", tsk->comm);
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 		return 0;
 	}
 	if (fault & VM_FAULT_SIGBUS) {
diff --git a/arch/avr32/kernel/kprobes.c b/arch/avr32/kernel/kprobes.c
index 4942ee662e0..20b1c9d8f94 100644
--- a/arch/avr32/kernel/kprobes.c
+++ b/arch/avr32/kernel/kprobes.c
@@ -22,6 +22,8 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe);
 static unsigned long kprobe_status;
 static struct pt_regs jprobe_saved_regs;
 
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	int ret = 0;
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
index 39060cbeb2a..9e16b8a447f 100644
--- a/arch/avr32/kernel/ptrace.c
+++ b/arch/avr32/kernel/ptrace.c
@@ -227,11 +227,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = 0;
 		break;
 
-	/* Detach a process that was attached */
-	case PTRACE_DETACH:
-		ret = ptrace_detach(child, data);
-		break;
-
 	case PTRACE_GETREGS:
 		ret = ptrace_getregs(child, (void __user *)data);
 		break;
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index ae2d2c593b2..11472f8701b 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -216,7 +216,7 @@ out_of_memory:
 	}
 	printk("VM: Killing process %s\n", tsk->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index 64ce5fea860..85caf9b711a 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -385,12 +385,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			break;
 		}
 
-	case PTRACE_DETACH:
-		{		/* detach a process that was attached. */
-			ret = ptrace_detach(child, data);
-			break;
-		}
-
 	case PTRACE_GETREGS:
 		{
 
diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c
index f4f9db698b4..b570ae9b6ca 100644
--- a/arch/cris/arch-v10/kernel/ptrace.c
+++ b/arch/cris/arch-v10/kernel/ptrace.c
@@ -177,10 +177,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			ret = 0;
 			break;
 
-		case PTRACE_DETACH:
-			ret = ptrace_detach(child, data);
-			break;
-
 		/* Get all GP registers from the child. */
 		case PTRACE_GETREGS: {
 		  	int i;
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 077e973c33f..575a14bb110 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -254,8 +254,12 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  * it needs to be IRQF_DISABLED to make the jiffies update work properly
  */
 
-static struct irqaction irq2  = { timer_interrupt, IRQF_SHARED | IRQF_DISABLED,
-				  CPU_MASK_NONE, "timer", NULL, NULL};
+static struct irqaction irq2  = {
+	.handler = timer_interrupt,
+	.flags = IRQF_SHARED | IRQF_DISABLED,
+	.mask = CPU_MASK_NONE,
+	.name = "timer",
+};
 
 void __init
 time_init(void)
diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c
index 38ece0cd47c..2df60529a8a 100644
--- a/arch/cris/arch-v32/kernel/ptrace.c
+++ b/arch/cris/arch-v32/kernel/ptrace.c
@@ -245,10 +245,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			break;
 
 		}
-		case PTRACE_DETACH:
-			ret = ptrace_detach(child, data);
-			break;
-
 		/* Get all GP registers from the child. */
 		case PTRACE_GETREGS: {
 		  	int i;
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index 77e655f2656..697494bc2de 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -63,8 +63,12 @@ static unsigned long irq_regs[NR_CPUS] =
 
 static irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id, struct pt_regs *regs);
 static int send_ipi(int vector, int wait, cpumask_t cpu_mask);
-static struct irqaction irq_ipi  = { crisv32_ipi_interrupt, IRQF_DISABLED,
-                                     CPU_MASK_NONE, "ipi", NULL, NULL};
+static struct irqaction irq_ipi  = {
+	.handler = crisv32_ipi_interrupt,
+	.flags = IRQF_DISABLED,
+	.mask = CPU_MASK_NONE,
+	.name = "ipi",
+};
 
 extern void cris_mmu_init(void);
 extern void cris_timer_init(void);
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 8672ab7d797..8aab8143069 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -360,7 +360,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
 	up_read(&mm->mmap_sem);
 	printk("VM: killing process %s\n", tsk->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
  do_sigbus:
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index ed588d73d7d..e83e0bccfab 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -43,7 +43,10 @@ unsigned long __delay_loops_MHz;
 static irqreturn_t timer_interrupt(int irq, void *dummy);
 
 static struct irqaction timer_irq  = {
-	timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
+	.handler = timer_interrupt,
+	.flags = IRQF_DISABLED,
+	.mask = CPU_MASK_NONE,
+	.name = "timer",
 };
 
 static inline int set_rtc_mmss(unsigned long nowtime)
diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c
index 6798fa0257b..05093d41d98 100644
--- a/arch/frv/mm/fault.c
+++ b/arch/frv/mm/fault.c
@@ -259,7 +259,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 	up_read(&mm->mmap_sem);
 	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(__frame))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
  do_sigbus:
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2e6310b8eab..59b91ac861a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -54,6 +54,11 @@ config ARCH_HAS_ILOG2_U64
 	bool
 	default n
 
+config HUGETLB_PAGE_SIZE_VARIABLE
+	bool
+	depends on HUGETLB_PAGE
+	default y
+
 config GENERIC_FIND_NEXT_BIT
 	bool
 	default y
@@ -300,6 +305,9 @@ config HOTPLUG_CPU
 config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+
 config SCHED_SMT
 	bool "SMT scheduler support"
 	depends on SMP
@@ -348,6 +356,7 @@ config ARCH_FLATMEM_ENABLE
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on ARCH_DISCONTIGMEM_ENABLE
+	select SPARSEMEM_VMEMMAP_ENABLE
 
 config ARCH_DISCONTIGMEM_DEFAULT
 	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB)
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index e980e7aa230..4338f4123f3 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -396,7 +396,7 @@ sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
 		printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
 		       startsg->dma_address, startsg->dma_length,
 		       sba_sg_address(startsg));
-		startsg++;
+		startsg = sg_next(startsg);
 	}
 }
 
@@ -409,7 +409,7 @@ sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
 	while (the_nents-- > 0) {
 		if (sba_sg_address(the_sg) == 0x0UL)
 			sba_dump_sg(NULL, startsg, nents);
-		the_sg++;
+		the_sg = sg_next(the_sg);
 	}
 }
 
@@ -1201,7 +1201,7 @@ sba_fill_pdir(
 			u32 pide = startsg->dma_address & ~PIDE_FLAG;
 			dma_offset = (unsigned long) pide & ~iovp_mask;
 			startsg->dma_address = 0;
-			dma_sg++;
+			dma_sg = sg_next(dma_sg);
 			dma_sg->dma_address = pide | ioc->ibase;
 			pdirp = &(ioc->pdir_base[pide >> iovp_shift]);
 			n_mappings++;
@@ -1228,7 +1228,7 @@ sba_fill_pdir(
 				pdirp++;
 			} while (cnt > 0);
 		}
-		startsg++;
+		startsg = sg_next(startsg);
 	}
 	/* force pdir update */
 	wmb();
@@ -1297,7 +1297,7 @@ sba_coalesce_chunks( struct ioc *ioc,
 		while (--nents > 0) {
 			unsigned long vaddr;	/* tmp */
 
-			startsg++;
+			startsg = sg_next(startsg);
 
 			/* PARANOID */
 			startsg->dma_address = startsg->dma_length = 0;
@@ -1407,7 +1407,7 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
 #ifdef ALLOW_IOV_BYPASS_SG
 	ASSERT(to_pci_dev(dev)->dma_mask);
 	if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
-		for (sg = sglist ; filled < nents ; filled++, sg++){
+		for_each_sg(sglist, sg, nents, filled) {
 			sg->dma_length = sg->length;
 			sg->dma_address = virt_to_phys(sba_sg_address(sg));
 		}
@@ -1501,7 +1501,7 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in
 	while (nents && sglist->dma_length) {
 
 		sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
-		sglist++;
+		sglist = sg_next(sglist);
 		nents--;
 	}
 
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index d62fa76e5a7..a3a558a0675 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -360,6 +360,7 @@ static struct scsi_host_template driver_template = {
 	.max_sectors		= 1024,
 	.cmd_per_lun		= SIMSCSI_REQ_QUEUE_LEN,
 	.use_clustering		= DISABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static int __init
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
index 6d198339bf8..44817d97ab4 100644
--- a/arch/ia64/kernel/gate.lds.S
+++ b/arch/ia64/kernel/gate.lds.S
@@ -1,7 +1,8 @@
 /*
- * Linker script for gate DSO.  The gate pages are an ELF shared object prelinked to its
- * virtual address, with only one read-only segment and one execute-only segment (both fit
- * in one page).  This script controls its layout.
+ * Linker script for gate DSO.  The gate pages are an ELF shared object
+ * prelinked to its virtual address, with only one read-only segment and
+ * one execute-only segment (both fit in one page).  This script controls
+ * its layout.
  */
 
 
@@ -9,72 +10,80 @@
 
 SECTIONS
 {
-  . = GATE_ADDR + SIZEOF_HEADERS;
-
-  .hash				: { *(.hash) }				:readable
-  .gnu.hash			: { *(.gnu.hash) }
-  .dynsym			: { *(.dynsym) }
-  .dynstr			: { *(.dynstr) }
-  .gnu.version			: { *(.gnu.version) }
-  .gnu.version_d		: { *(.gnu.version_d) }
-  .gnu.version_r		: { *(.gnu.version_r) }
-  .dynamic			: { *(.dynamic) }			:readable :dynamic
-
-  /*
-   * This linker script is used both with -r and with -shared.  For the layouts to match,
-   * we need to skip more than enough space for the dynamic symbol table et al.  If this
-   * amount is insufficient, ld -shared will barf.  Just increase it here.
-   */
-  . = GATE_ADDR + 0x500;
-
-  .data.patch			: {
-				    __start_gate_mckinley_e9_patchlist = .;
-				    *(.data.patch.mckinley_e9)
-				    __end_gate_mckinley_e9_patchlist = .;
-
-				    __start_gate_vtop_patchlist = .;
-				    *(.data.patch.vtop)
-				    __end_gate_vtop_patchlist = .;
-
-				    __start_gate_fsyscall_patchlist = .;
-				    *(.data.patch.fsyscall_table)
-				    __end_gate_fsyscall_patchlist = .;
-
-				    __start_gate_brl_fsys_bubble_down_patchlist = .;
-				    *(.data.patch.brl_fsys_bubble_down)
-				    __end_gate_brl_fsys_bubble_down_patchlist = .;
-  }									:readable
-  .IA_64.unwind_info		: { *(.IA_64.unwind_info*) }
-  .IA_64.unwind			: { *(.IA_64.unwind*) }			:readable :unwind
+	. = GATE_ADDR + SIZEOF_HEADERS;
+
+	.hash			: { *(.hash) }		:readable
+	.gnu.hash		: { *(.gnu.hash) }
+	.dynsym			: { *(.dynsym) }
+	.dynstr			: { *(.dynstr) }
+	.gnu.version		: { *(.gnu.version) }
+	.gnu.version_d		: { *(.gnu.version_d) }
+	.gnu.version_r		: { *(.gnu.version_r) }
+
+	.dynamic		: { *(.dynamic) }	:readable	:dynamic
+
+	/*
+	 * This linker script is used both with -r and with -shared.  For
+	 * the layouts to match, we need to skip more than enough space for
+	 * the dynamic symbol table et al.  If this amount is insufficient,
+	 * ld -shared will barf.  Just increase it here.
+	 */
+	. = GATE_ADDR + 0x500;
+
+	.data.patch		: {
+		__start_gate_mckinley_e9_patchlist = .;
+		*(.data.patch.mckinley_e9)
+		__end_gate_mckinley_e9_patchlist = .;
+
+		__start_gate_vtop_patchlist = .;
+		*(.data.patch.vtop)
+		__end_gate_vtop_patchlist = .;
+
+		__start_gate_fsyscall_patchlist = .;
+		*(.data.patch.fsyscall_table)
+		__end_gate_fsyscall_patchlist = .;
+
+		__start_gate_brl_fsys_bubble_down_patchlist = .;
+		*(.data.patch.brl_fsys_bubble_down)
+		__end_gate_brl_fsys_bubble_down_patchlist = .;
+	}						:readable
+
+	.IA_64.unwind_info	: { *(.IA_64.unwind_info*) }
+	.IA_64.unwind		: { *(.IA_64.unwind*) }	:readable	:unwind
 #ifdef HAVE_BUGGY_SEGREL
-  .text (GATE_ADDR + PAGE_SIZE)	: { *(.text) *(.text.*) }		:readable
+	.text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) }	:readable
 #else
-  . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
-  .text				: { *(.text) *(.text.*) }		:epc
+	. = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
+	.text			: { *(.text) *(.text.*) }	:epc
 #endif
 
-  /DISCARD/			: {
-  	*(.got.plt) *(.got)
-	*(.data .data.* .gnu.linkonce.d.*)
-	*(.dynbss)
-	*(.bss .bss.* .gnu.linkonce.b.*)
-	*(__ex_table)
-	*(__mca_table)
-  }
+	/DISCARD/		: {
+		*(.got.plt) *(.got)
+		*(.data .data.* .gnu.linkonce.d.*)
+		*(.dynbss)
+		*(.bss .bss.* .gnu.linkonce.b.*)
+		*(__ex_table)
+		*(__mca_table)
+	}
 }
 
 /*
+ * ld does not recognize this name token; use the constant.
+ */
+#define	PT_IA_64_UNWIND	0x70000001
+
+/*
  * We must supply the ELF program headers explicitly to get just one
  * PT_LOAD segment, and set the flags explicitly to make segments read-only.
  */
 PHDRS
 {
-  readable  PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
+	readable	PT_LOAD	FILEHDR	PHDRS	FLAGS(4);	/* PF_R */
 #ifndef HAVE_BUGGY_SEGREL
-  epc	    PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
+	epc		PT_LOAD	FILEHDR PHDRS	FLAGS(1);	/* PF_X */
 #endif
-  dynamic   PT_DYNAMIC			FLAGS(4);	/* PF_R */
-  unwind    0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
+	dynamic		PT_DYNAMIC		FLAGS(4);	/* PF_R */
+	unwind		PT_IA_64_UNWIND;
 }
 
 /*
@@ -82,14 +91,14 @@ PHDRS
  */
 VERSION
 {
-  LINUX_2.5 {
-    global:
-	__kernel_syscall_via_break;
-	__kernel_syscall_via_epc;
-	__kernel_sigtramp;
-
-    local: *;
-  };
+	LINUX_2.5 {
+	global:
+		__kernel_syscall_via_break;
+		__kernel_syscall_via_epc;
+		__kernel_sigtramp;
+
+	local: *;
+	};
 }
 
 /* The ELF entry point can be used to set the AT_SYSINFO value.  */
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 5dc98b5abcf..5fd65d8302c 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -40,6 +40,8 @@ extern void jprobe_inst_return(void);
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
 enum instruction_type {A, I, M, F, B, L, X, u};
 static enum instruction_type bundle_encoding[32][3] = {
   { M, I, I },				/* 00 */
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 9e392a30d19..777c8d8bd5e 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -528,10 +528,6 @@ setup_arch (char **cmdline_p)
 
 #ifdef CONFIG_SMP
 	cpu_physical_id(0) = hard_smp_processor_id();
-
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
-
 	check_for_logical_procs();
 	if (smp_num_cpucores > 1)
 		printk(KERN_INFO
@@ -873,6 +869,14 @@ cpu_init (void)
 	void *cpu_data;
 
 	cpu_data = per_cpu_init();
+	/*
+	 * insert boot cpu into sibling and core mapes
+	 * (must be done after per_cpu area is setup)
+	 */
+	if (smp_processor_id() == 0) {
+		cpu_set(0, per_cpu(cpu_sibling_map, 0));
+		cpu_set(0, cpu_core_map[0]);
+	}
 
 	/*
 	 * We set ar.k3 so that assembly code in MCA handler can compute
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 308772f7cdd..c57dbce25c1 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -138,7 +138,9 @@ cpumask_t cpu_possible_map = CPU_MASK_NONE;
 EXPORT_SYMBOL(cpu_possible_map);
 
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
+DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+
 int smp_num_siblings = 1;
 int smp_num_cpucores = 1;
 
@@ -650,12 +652,12 @@ clear_cpu_sibling_map(int cpu)
 {
 	int i;
 
-	for_each_cpu_mask(i, cpu_sibling_map[cpu])
-		cpu_clear(cpu, cpu_sibling_map[i]);
+	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
 	for_each_cpu_mask(i, cpu_core_map[cpu])
 		cpu_clear(cpu, cpu_core_map[i]);
 
-	cpu_sibling_map[cpu] = cpu_core_map[cpu] = CPU_MASK_NONE;
+	per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE;
 }
 
 static void
@@ -666,7 +668,7 @@ remove_siblinginfo(int cpu)
 	if (cpu_data(cpu)->threads_per_core == 1 &&
 	    cpu_data(cpu)->cores_per_socket == 1) {
 		cpu_clear(cpu, cpu_core_map[cpu]);
-		cpu_clear(cpu, cpu_sibling_map[cpu]);
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, cpu));
 		return;
 	}
 
@@ -807,8 +809,8 @@ set_cpu_sibling_map(int cpu)
 			cpu_set(i, cpu_core_map[cpu]);
 			cpu_set(cpu, cpu_core_map[i]);
 			if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) {
-				cpu_set(i, cpu_sibling_map[cpu]);
-				cpu_set(cpu, cpu_sibling_map[i]);
+				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
 			}
 		}
 	}
@@ -839,7 +841,7 @@ __cpu_up (unsigned int cpu)
 
 	if (cpu_data(cpu)->threads_per_core == 1 &&
 	    cpu_data(cpu)->cores_per_socket == 1) {
-		cpu_set(cpu, cpu_sibling_map[cpu]);
+		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 		cpu_set(cpu, cpu_core_map[cpu]);
 		return 0;
 	}
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index c58e933694d..a7be4f20342 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -196,7 +196,7 @@ unsigned long uncached_alloc_page(int starting_nid)
 	nid = starting_nid;
 
 	do {
-		if (!node_online(nid))
+		if (!node_state(nid, N_HIGH_MEMORY))
 			continue;
 		uc_pool = &uncached_pools[nid];
 		if (uc_pool->pool == NULL)
@@ -268,7 +268,7 @@ static int __init uncached_init(void)
 {
 	int nid;
 
-	for_each_online_node(nid) {
+	for_each_node_state(nid, N_ONLINE) {
 		uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid);
 		mutex_init(&uncached_pools[nid].add_chunk_mutex);
 	}
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 0d34585058c..5628067a74d 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -715,3 +715,11 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
 	scatter_node_data();
 }
 #endif
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate(struct page *start_page,
+						unsigned long size, int node)
+{
+	return vmemmap_populate_basepages(start_page, size, node);
+}
+#endif
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 9150ffaff9e..32f26253c4e 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -281,6 +281,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 	}
 	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 }
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index a9ff685aea2..d3ce8f3bcaa 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -194,6 +194,6 @@ static int __init hugetlb_setup_sz(char *str)
 	 * override here with new page shift.
 	 */
 	ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2);
-	return 1;
+	return 0;
 }
-__setup("hugepagesz=", hugetlb_setup_sz);
+early_param("hugepagesz", hugetlb_setup_sz);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index c14abefabaf..3e10152abbf 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -54,15 +54,12 @@ struct page *zero_page_memmap_ptr;	/* map entry for zero page */
 EXPORT_SYMBOL(zero_page_memmap_ptr);
 
 void
-lazy_mmu_prot_update (pte_t pte)
+__ia64_sync_icache_dcache (pte_t pte)
 {
 	unsigned long addr;
 	struct page *page;
 	unsigned long order;
 
-	if (!pte_exec(pte))
-		return;				/* not an executable page... */
-
 	page = pte_page(pte);
 	addr = (unsigned long) page_address(page);
 
@@ -721,10 +718,21 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
 	return ret;
 }
-
+#ifdef CONFIG_MEMORY_HOTREMOVE
 int remove_memory(u64 start, u64 size)
 {
-	return -EINVAL;
+	unsigned long start_pfn, end_pfn;
+	unsigned long timeout = 120 * HZ;
+	int ret;
+	start_pfn = start >> PAGE_SHIFT;
+	end_pfn = start_pfn + (size >> PAGE_SHIFT);
+	ret = offline_pages(start_pfn, end_pfn, timeout);
+	if (ret)
+		goto out;
+	/* we can free mem_map at this point */
+out:
+	return ret;
 }
 EXPORT_SYMBOL_GPL(remove_memory);
+#endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index d79ddacfba2..ecd8a52b9b9 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -218,16 +218,17 @@ EXPORT_SYMBOL(sn_dma_unmap_single);
  *
  * Unmap a set of streaming mode DMA translations.
  */
-void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
 		     int nhwentries, int direction)
 {
 	int i;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+	struct scatterlist *sg;
 
 	BUG_ON(dev->bus != &pci_bus_type);
 
-	for (i = 0; i < nhwentries; i++, sg++) {
+	for_each_sg(sgl, sg, nhwentries, i) {
 		provider->dma_unmap(pdev, sg->dma_address, direction);
 		sg->dma_address = (dma_addr_t) NULL;
 		sg->dma_length = 0;
@@ -244,11 +245,11 @@ EXPORT_SYMBOL(sn_dma_unmap_sg);
  *
  * Maps each entry of @sg for DMA.
  */
-int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
+int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
 		  int direction)
 {
 	unsigned long phys_addr;
-	struct scatterlist *saved_sg = sg;
+	struct scatterlist *saved_sg = sgl, *sg;
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
 	int i;
@@ -258,7 +259,7 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
 	/*
 	 * Setup a DMA address for each entry in the scatterlist.
 	 */
-	for (i = 0; i < nhwentries; i++, sg++) {
+	for_each_sg(sgl, sg, nhwentries, i) {
 		phys_addr = SG_ENT_PHYS_ADDRESS(sg);
 		sg->dma_address = provider->dma_map(pdev,
 						    phys_addr, sg->length,
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 62a51429306..ed4d0756c5d 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -570,7 +570,7 @@ withdraw_debug_trap(struct pt_regs *regs)
 	}
 }
 
-static void
+void
 init_debug_traps(struct task_struct *child)
 {
 	struct debug_trap *p = &child->thread.debug_trap;
@@ -593,8 +593,8 @@ void ptrace_disable(struct task_struct *child)
 	/* nothing to do.. */
 }
 
-static int
-do_ptrace(long request, struct task_struct *child, long addr, long data)
+long
+arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	int ret;
 
@@ -704,14 +704,6 @@ do_ptrace(long request, struct task_struct *child, long addr, long data)
 		break;
 	}
 
-	/*
-	 * detach a process that was attached.
-	 */
-	case PTRACE_DETACH:
-		ret = 0;
-		ret = ptrace_detach(child, data);
-		break;
-
 	case PTRACE_GETREGS:
 		ret = ptrace_getregs(child, (void __user *)data);
 		break;
@@ -728,42 +720,6 @@ do_ptrace(long request, struct task_struct *child, long addr, long data)
 	return ret;
 }
 
-asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
-{
-	struct task_struct *child;
-	int ret;
-
-	lock_kernel();
-	if (request == PTRACE_TRACEME) {
-		ret = ptrace_traceme();
-		goto out;
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child)) {
-		ret = PTR_ERR(child);
-		goto out;
-	}
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		if (ret == 0)
-			init_debug_traps(child);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret == 0)
-		ret = do_ptrace(request, child, addr, data);
-
-out_tsk:
-	put_task_struct(child);
-out:
-	unlock_kernel();
-
-	return ret;
-}
-
 /* notification of system call entry/exit
  * - triggered by current->work.syscall_trace
  */
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index 3858c9f39ba..994cc155635 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -228,8 +228,12 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-struct irqaction irq0 = { timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE,
-			  "MFT2", NULL, NULL };
+struct irqaction irq0 = {
+	.handler = timer_interrupt,
+	.flags = IRQF_DISABLED,
+	.mask = CPU_MASK_NONE,
+	.name = "MFT2",
+};
 
 void __init time_init(void)
 {
diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c
index 676a1c443d2..70a766aad3e 100644
--- a/arch/m32r/mm/fault.c
+++ b/arch/m32r/mm/fault.c
@@ -278,7 +278,7 @@ out_of_memory:
 	}
 	printk("VM: killing process %s\n", tsk->comm);
 	if (error_code & ACE_USERMODE)
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index e792d3cba4c..2075543c2d9 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -226,10 +226,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		wake_up_process(child);
 		break;
 
-	case PTRACE_DETACH:	/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
 	case PTRACE_GETREGS:	/* Get all gp regs from the child. */
 		for (i = 0; i < 19; i++) {
 			tmp = get_reg(child, i);
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index 578b48f47b9..eaa61868115 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -188,7 +188,7 @@ out_of_memory:
 
 	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 
 no_context:
 	current->thread.signo = SIGBUS;
diff --git a/arch/mips/jmr3927/rbhma3100/setup.c b/arch/mips/jmr3927/rbhma3100/setup.c
index 7f14f70a1b8..0c7aee1682c 100644
--- a/arch/mips/jmr3927/rbhma3100/setup.c
+++ b/arch/mips/jmr3927/rbhma3100/setup.c
@@ -425,7 +425,7 @@ static int __init jmr3927_rtc_init(void)
 		.flags	= IORESOURCE_MEM,
 	};
 	struct platform_device *dev;
-	dev = platform_device_register_simple("ds1742", -1, &res, 1);
+	dev = platform_device_register_simple("rtc-ds1742", -1, &res, 1);
 	return IS_ERR(dev) ? PTR_ERR(dev) : 0;
 }
 device_initcall(jmr3927_rtc_init);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 58aa6fec114..999f7853de2 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -435,10 +435,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		wake_up_process(child);
 		break;
 
-	case PTRACE_DETACH: /* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
 	case PTRACE_GET_THREAD_AREA:
 		ret = put_user(task_thread_info(child)->tp_value,
 				(unsigned long __user *) data);
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 521771b373d..5699c7713e2 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -180,7 +180,7 @@ out_of_memory:
 	}
 	printk("VM: killing process %s\n", tsk->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
index acaf613358c..b97102a1c63 100644
--- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
+++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
@@ -963,7 +963,7 @@ static int __init toshiba_rbtx4927_rtc_init(void)
 		.flags	= IORESOURCE_MEM,
 	};
 	struct platform_device *dev =
-		platform_device_register_simple("ds1742", -1, &res, 1);
+		platform_device_register_simple("rtc-ds1742", -1, &res, 1);
 	return IS_ERR(dev) ? PTR_ERR(dev) : 0;
 }
 device_initcall(toshiba_rbtx4927_rtc_init);
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 26ec774c502..49c63797078 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -329,10 +329,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		/* give it a chance to run. */
 		goto out_wake;
 
-	case PTRACE_DETACH:
-		ret = ptrace_detach(child, data);
-		goto out_tsk;
-
 	case PTRACE_GETEVENTMSG:
                 ret = put_user(child->ptrace_message, (unsigned int __user *) data);
 		goto out_tsk;
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 7899ab87785..1c091b415cd 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -263,6 +263,6 @@ no_context:
 	up_read(&mm->mmap_sem);
 	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 }
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 037664d496d..5e001ad588a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -295,6 +295,7 @@ config ARCH_FLATMEM_ENABLE
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on PPC64
+	select SPARSEMEM_VMEMMAP_ENABLE
 
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
index 7b0e754383c..9001104b56b 100644
--- a/arch/powerpc/kernel/dma_64.c
+++ b/arch/powerpc/kernel/dma_64.c
@@ -154,12 +154,13 @@ static void dma_direct_unmap_single(struct device *dev, dma_addr_t dma_addr,
 {
 }
 
-static int dma_direct_map_sg(struct device *dev, struct scatterlist *sg,
+static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 			     int nents, enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
-	for (i = 0; i < nents; i++, sg++) {
+	for_each_sg(sgl, sg, nents, i) {
 		sg->dma_address = (page_to_phys(sg->page) + sg->offset) |
 			dma_direct_offset;
 		sg->dma_length = sg->length;
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 53bf64623bd..2e16ca5778a 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -87,15 +87,16 @@ static void ibmebus_unmap_single(struct device *dev,
 }
 
 static int ibmebus_map_sg(struct device *dev,
-			  struct scatterlist *sg,
+			  struct scatterlist *sgl,
 			  int nents, enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
-	for (i = 0; i < nents; i++) {
-		sg[i].dma_address = (dma_addr_t)page_address(sg[i].page)
-			+ sg[i].offset;
-		sg[i].dma_length = sg[i].length;
+	for_each_sg(sgl, sg, nents, i) {
+		sg->dma_address = (dma_addr_t)page_address(sg->page)
+			+ sg->offset;
+		sg->dma_length = sg->length;
 	}
 
 	return nents;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index e4ec6eee81a..306a6f75b6c 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -277,7 +277,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 	dma_addr_t dma_next = 0, dma_addr;
 	unsigned long flags;
 	struct scatterlist *s, *outs, *segstart;
-	int outcount, incount;
+	int outcount, incount, i;
 	unsigned long handle;
 
 	BUG_ON(direction == DMA_NONE);
@@ -297,7 +297,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
-	for (s = outs; nelems; nelems--, s++) {
+	for_each_sg(sglist, s, nelems, i) {
 		unsigned long vaddr, npages, entry, slen;
 
 		slen = s->length;
@@ -341,7 +341,8 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 			if (novmerge || (dma_addr != dma_next)) {
 				/* Can't merge: create a new segment */
 				segstart = s;
-				outcount++; outs++;
+				outcount++;
+				outs = sg_next(outs);
 				DBG("    can't merge, new segment.\n");
 			} else {
 				outs->dma_length += s->length;
@@ -374,7 +375,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 	 * next entry of the sglist if we didn't fill the list completely
 	 */
 	if (outcount < incount) {
-		outs++;
+		outs = sg_next(outs);
 		outs->dma_address = DMA_ERROR_CODE;
 		outs->dma_length = 0;
 	}
@@ -385,7 +386,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 	return outcount;
 
  failure:
-	for (s = &sglist[0]; s <= outs; s++) {
+	for_each_sg(sglist, s, nelems, i) {
 		if (s->dma_length != 0) {
 			unsigned long vaddr, npages;
 
@@ -395,6 +396,8 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
 		}
+		if (s == outs)
+			break;
 	}
 	spin_unlock_irqrestore(&(tbl->it_lock), flags);
 	return 0;
@@ -404,6 +407,7 @@ int iommu_map_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 		int nelems, enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	unsigned long flags;
 
 	BUG_ON(direction == DMA_NONE);
@@ -413,15 +417,16 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
+	sg = sglist;
 	while (nelems--) {
 		unsigned int npages;
-		dma_addr_t dma_handle = sglist->dma_address;
+		dma_addr_t dma_handle = sg->dma_address;
 
-		if (sglist->dma_length == 0)
+		if (sg->dma_length == 0)
 			break;
-		npages = iommu_num_pages(dma_handle,sglist->dma_length);
+		npages = iommu_num_pages(dma_handle, sg->dma_length);
 		__iommu_free(tbl, dma_handle, npages);
-		sglist++;
+		sg = sg_next(sg);
 	}
 
 	/* Flush/invalidate TLBs if necessary. As for iommu_free(), we
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 440f5a87271..5338e485571 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -38,6 +38,8 @@
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	int ret = 0;
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index cf7732cdd6c..3e17d154d0d 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -505,10 +505,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = ptrace_set_debugreg(child, addr, data);
 		break;
 
-	case PTRACE_DETACH:
-		ret = ptrace_detach(child, data);
-		break;
-
 #ifdef CONFIG_PPC64
 	case PTRACE_GETREGS64:
 #endif
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 36c90ba2d31..2de00f870ed 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -413,16 +413,28 @@ void __init smp_setup_cpu_maps(void)
 		of_node_put(dn);
 	}
 
+	vdso_data->processorCount = num_present_cpus();
+#endif /* CONFIG_PPC64 */
+}
+
+/*
+ * Being that cpu_sibling_map is now a per_cpu array, then it cannot
+ * be initialized until the per_cpu areas have been created.  This
+ * function is now called from setup_per_cpu_areas().
+ */
+void __init smp_setup_cpu_sibling_map(void)
+{
+#if defined(CONFIG_PPC64)
+	int cpu;
+
 	/*
 	 * Do the sibling map; assume only two threads per processor.
 	 */
 	for_each_possible_cpu(cpu) {
-		cpu_set(cpu, cpu_sibling_map[cpu]);
+		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 		if (cpu_has_feature(CPU_FTR_SMT))
-			cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]);
+			cpu_set(cpu ^ 0x1, per_cpu(cpu_sibling_map, cpu));
 	}
-
-	vdso_data->processorCount = num_present_cpus();
 #endif /* CONFIG_PPC64 */
 }
 #endif /* CONFIG_SMP */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 008ab6823b0..0e014550b83 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -597,6 +597,9 @@ void __init setup_per_cpu_areas(void)
 		paca[i].data_offset = ptr - __per_cpu_start;
 		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 	}
+
+	/* Now that per_cpu is setup, initialize cpu_sibling_map */
+	smp_setup_cpu_sibling_map();
 }
 #endif
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index d30f08fa029..338950aeb6f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -61,11 +61,11 @@ struct thread_info *secondary_ti;
 
 cpumask_t cpu_possible_map = CPU_MASK_NONE;
 cpumask_t cpu_online_map = CPU_MASK_NONE;
-cpumask_t cpu_sibling_map[NR_CPUS] = { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 
 EXPORT_SYMBOL(cpu_online_map);
 EXPORT_SYMBOL(cpu_possible_map);
-EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 
 /* SMP operations for this machine */
 struct smp_ops_t *smp_ops;
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 26e138c4ce1..9352ab5200e 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -1,130 +1,147 @@
-
 /*
  * This is the infamous ld script for the 32 bits vdso
  * library
  */
 #include <asm/vdso.h>
 
-/* Default link addresses for the vDSOs */
 OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
 OUTPUT_ARCH(powerpc:common)
 ENTRY(_start)
 
 SECTIONS
 {
-  . = VDSO32_LBASE + SIZEOF_HEADERS;
-  .hash           : { *(.hash) }			:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-
-  .note		  : { *(.note.*) } 			:text	:note
-
-  . = ALIGN (16);
-  .text :
-  {
-    *(.text .stub .text.* .gnu.linkonce.t.*)
-  }
-  PROVIDE (__etext = .);
-  PROVIDE (_etext = .);
-  PROVIDE (etext = .);
-
-  . = ALIGN(8);
-  __ftr_fixup : {
-    *(__ftr_fixup)
-  }
+	. = VDSO32_LBASE + SIZEOF_HEADERS;
+
+	.hash          	: { *(.hash) }			:text
+	.gnu.hash      	: { *(.gnu.hash) }
+	.dynsym        	: { *(.dynsym) }
+	.dynstr        	: { *(.dynstr) }
+	.gnu.version   	: { *(.gnu.version) }
+	.gnu.version_d 	: { *(.gnu.version_d) }
+	.gnu.version_r 	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.*)
+	}
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	. = ALIGN(8);
+	__ftr_fixup	: { *(__ftr_fixup) }
 
 #ifdef CONFIG_PPC64
-  . = ALIGN(8);
-  __fw_ftr_fixup : {
-    *(__fw_ftr_fixup)
-  }
+	. = ALIGN(8);
+	__fw_ftr_fixup	: { *(__fw_ftr_fixup) }
 #endif
 
-  /* Other stuff is appended to the text segment: */
-  .rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-  .rodata1		: { *(.rodata1) }
-
-  .eh_frame_hdr		: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-  .eh_frame		: { KEEP (*(.eh_frame)) }	:text
-  .gcc_except_table	: { *(.gcc_except_table) }
-  .fixup		: { *(.fixup) }
-
-  .dynamic		: { *(.dynamic) }		:text	:dynamic
-  .got : { *(.got) }
-  .plt : { *(.plt) }
-
-  _end = .;
-  __end = .;
-  PROVIDE (end = .);
-
-
-  /* Stabs debugging sections are here too
-   */
-  .stab 0 : { *(.stab) }
-  .stabstr 0 : { *(.stabstr) }
-  .stab.excl 0 : { *(.stab.excl) }
-  .stab.exclstr 0 : { *(.stab.exclstr) }
-  .stab.index 0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  .comment 0 : { *(.comment) }
-  .debug 0 : { *(.debug) }
-  .line 0 : { *(.line) }
-
-  .debug_srcinfo 0 : { *(.debug_srcinfo) }
-  .debug_sfnames 0 : { *(.debug_sfnames) }
-
-  .debug_aranges 0 : { *(.debug_aranges) }
-  .debug_pubnames 0 : { *(.debug_pubnames) }
-
-  .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
-  .debug_abbrev 0 : { *(.debug_abbrev) }
-  .debug_line 0 : { *(.debug_line) }
-  .debug_frame 0 : { *(.debug_frame) }
-  .debug_str 0 : { *(.debug_str) }
-  .debug_loc 0 : { *(.debug_loc) }
-  .debug_macinfo 0 : { *(.debug_macinfo) }
-
-  .debug_weaknames 0 : { *(.debug_weaknames) }
-  .debug_funcnames 0 : { *(.debug_funcnames) }
-  .debug_typenames 0 : { *(.debug_typenames) }
-  .debug_varnames 0 : { *(.debug_varnames) }
-
-  /DISCARD/ : { *(.note.GNU-stack) }
-  /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) }
-  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table) }
+	.fixup		: { *(.fixup) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+	.got		: { *(.got) }
+	.plt		: { *(.plt) }
+
+	_end = .;
+	__end = .;
+	PROVIDE(end = .);
+
+	/*
+	 * Stabs debugging sections are here too.
+	 */
+	.stab 0 : { *(.stab) }
+	.stabstr 0 : { *(.stabstr) }
+	.stab.excl 0 : { *(.stab.excl) }
+	.stab.exclstr 0 : { *(.stab.exclstr) }
+	.stab.index 0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment       0 : { *(.comment) }
+
+	/*
+	 * DWARF debug sections.
+	 * Symbols in the DWARF debugging sections are relative to the beginning
+	 * of the section so we begin them at 0.
+	 */
+	/* DWARF 1 */
+	.debug          0 : { *(.debug) }
+	.line           0 : { *(.line) }
+	/* GNU DWARF 1 extensions */
+	.debug_srcinfo  0 : { *(.debug_srcinfo) }
+	.debug_sfnames  0 : { *(.debug_sfnames) }
+	/* DWARF 1.1 and DWARF 2 */
+	.debug_aranges  0 : { *(.debug_aranges) }
+	.debug_pubnames 0 : { *(.debug_pubnames) }
+	/* DWARF 2 */
+	.debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
+	.debug_abbrev   0 : { *(.debug_abbrev) }
+	.debug_line     0 : { *(.debug_line) }
+	.debug_frame    0 : { *(.debug_frame) }
+	.debug_str      0 : { *(.debug_str) }
+	.debug_loc      0 : { *(.debug_loc) }
+	.debug_macinfo  0 : { *(.debug_macinfo) }
+	/* SGI/MIPS DWARF 2 extensions */
+	.debug_weaknames 0 : { *(.debug_weaknames) }
+	.debug_funcnames 0 : { *(.debug_funcnames) }
+	.debug_typenames 0 : { *(.debug_typenames) }
+	.debug_varnames  0 : { *(.debug_varnames) }
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
 }
 
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
 
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
 PHDRS
 {
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
 }
 
-
 /*
  * This controls what symbols we export from the DSO.
  */
 VERSION
 {
-  VDSO_VERSION_STRING {
-    global:
-	__kernel_datapage_offset; /* Has to be there for the kernel to find */
-	__kernel_get_syscall_map;
-	__kernel_gettimeofday;
-	__kernel_clock_gettime;
-	__kernel_clock_getres;
-	__kernel_get_tbfreq;
-	__kernel_sync_dicache;
-	__kernel_sync_dicache_p5;
-	__kernel_sigtramp32;
-	__kernel_sigtramp_rt32;
-    local: *;
-  };
+	VDSO_VERSION_STRING {
+	global:
+		/*
+		 * Has to be there for the kernel to find
+		 */
+		__kernel_datapage_offset;
+
+		__kernel_get_syscall_map;
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_getres;
+		__kernel_get_tbfreq;
+		__kernel_sync_dicache;
+		__kernel_sync_dicache_p5;
+		__kernel_sigtramp32;
+		__kernel_sigtramp_rt32;
+
+	local: *;
+	};
 }
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 2d70f35d50b..932b3fdb34b 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -10,100 +10,114 @@ ENTRY(_start)
 
 SECTIONS
 {
-  . = VDSO64_LBASE + SIZEOF_HEADERS;
-  .hash           : { *(.hash) }		:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
-
-  .note		  : { *(.note.*) }		:text	:note
-
-  . = ALIGN (16);
-  .text           :
-  {
-    *(.text .stub .text.* .gnu.linkonce.t.*)
-    *(.sfpr .glink)
-  }						:text
-  PROVIDE (__etext = .);
-  PROVIDE (_etext = .);
-  PROVIDE (etext = .);
-
-  . = ALIGN(8);
-  __ftr_fixup : {
-    *(__ftr_fixup)
-  }
-
-  . = ALIGN(8);
-  __fw_ftr_fixup : {
-    *(__fw_ftr_fixup)
-  }
-
-  /* Other stuff is appended to the text segment: */
-  .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-  .rodata1        : { *(.rodata1) }
-  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text	:eh_frame_hdr
-  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
-  .gcc_except_table   : { *(.gcc_except_table) }
-
-  .opd           ALIGN(8) : { KEEP (*(.opd)) }
-  .got		 ALIGN(8) : { *(.got .toc) }
-  .rela.dyn	 ALIGN(8) : { *(.rela.dyn) }
-
-  .dynamic        : { *(.dynamic) }		:text	:dynamic
-
-  _end = .;
-  PROVIDE (end = .);
-
-  /* Stabs debugging sections are here too
-   */
-  .stab          0 : { *(.stab) }
-  .stabstr       0 : { *(.stabstr) }
-  .stab.excl     0 : { *(.stab.excl) }
-  .stab.exclstr  0 : { *(.stab.exclstr) }
-  .stab.index    0 : { *(.stab.index) }
-  .stab.indexstr 0 : { *(.stab.indexstr) }
-  .comment       0 : { *(.comment) }
-  /* DWARF debug sectio/ns.
-     Symbols in the DWARF debugging sections are relative to the beginning
-     of the section so we begin them at 0.  */
-  /* DWARF 1 */
-  .debug          0 : { *(.debug) }
-  .line           0 : { *(.line) }
-  /* GNU DWARF 1 extensions */
-  .debug_srcinfo  0 : { *(.debug_srcinfo) }
-  .debug_sfnames  0 : { *(.debug_sfnames) }
-  /* DWARF 1.1 and DWARF 2 */
-  .debug_aranges  0 : { *(.debug_aranges) }
-  .debug_pubnames 0 : { *(.debug_pubnames) }
-  /* DWARF 2 */
-  .debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
-  .debug_abbrev   0 : { *(.debug_abbrev) }
-  .debug_line     0 : { *(.debug_line) }
-  .debug_frame    0 : { *(.debug_frame) }
-  .debug_str      0 : { *(.debug_str) }
-  .debug_loc      0 : { *(.debug_loc) }
-  .debug_macinfo  0 : { *(.debug_macinfo) }
-  /* SGI/MIPS DWARF 2 extensions */
-  .debug_weaknames 0 : { *(.debug_weaknames) }
-  .debug_funcnames 0 : { *(.debug_funcnames) }
-  .debug_typenames 0 : { *(.debug_typenames) }
-  .debug_varnames  0 : { *(.debug_varnames) }
-
-  /DISCARD/ : { *(.note.GNU-stack) }
-  /DISCARD/ : { *(.branch_lt) }
-  /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.*) }
-  /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) }
+	. = VDSO64_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.*)
+		*(.sfpr .glink)
+	}						:text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	. = ALIGN(8);
+	__ftr_fixup	: { *(__ftr_fixup) }
+
+	. = ALIGN(8);
+	__fw_ftr_fixup	: { *(__fw_ftr_fixup) }
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table) }
+
+	.opd ALIGN(8)	: { KEEP (*(.opd)) }
+	.got ALIGN(8)	: { *(.got .toc) }
+	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/*
+	 * Stabs debugging sections are here too.
+	 */
+	.stab          0 : { *(.stab) }
+	.stabstr       0 : { *(.stabstr) }
+	.stab.excl     0 : { *(.stab.excl) }
+	.stab.exclstr  0 : { *(.stab.exclstr) }
+	.stab.index    0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment       0 : { *(.comment) }
+
+	/*
+	 * DWARF debug sections.
+	 * Symbols in the DWARF debugging sections are relative to the beginning
+	 * of the section so we begin them at 0.
+	 */
+	/* DWARF 1 */
+	.debug          0 : { *(.debug) }
+	.line           0 : { *(.line) }
+	/* GNU DWARF 1 extensions */
+	.debug_srcinfo  0 : { *(.debug_srcinfo) }
+	.debug_sfnames  0 : { *(.debug_sfnames) }
+	/* DWARF 1.1 and DWARF 2 */
+	.debug_aranges  0 : { *(.debug_aranges) }
+	.debug_pubnames 0 : { *(.debug_pubnames) }
+	/* DWARF 2 */
+	.debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
+	.debug_abbrev   0 : { *(.debug_abbrev) }
+	.debug_line     0 : { *(.debug_line) }
+	.debug_frame    0 : { *(.debug_frame) }
+	.debug_str      0 : { *(.debug_str) }
+	.debug_loc      0 : { *(.debug_loc) }
+	.debug_macinfo  0 : { *(.debug_macinfo) }
+	/* SGI/MIPS DWARF 2 extensions */
+	.debug_weaknames 0 : { *(.debug_weaknames) }
+	.debug_funcnames 0 : { *(.debug_funcnames) }
+	.debug_typenames 0 : { *(.debug_typenames) }
+	.debug_varnames  0 : { *(.debug_varnames) }
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
 }
 
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
 PHDRS
 {
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
 }
 
 /*
@@ -111,17 +125,22 @@ PHDRS
  */
 VERSION
 {
-  VDSO_VERSION_STRING {
-    global:
-	__kernel_datapage_offset; /* Has to be there for the kernel to find */
-	__kernel_get_syscall_map;
-    	__kernel_gettimeofday;
-	__kernel_clock_gettime;
-	__kernel_clock_getres;
-	__kernel_get_tbfreq;
-	__kernel_sync_dicache;
-	__kernel_sync_dicache_p5;
-	__kernel_sigtramp_rt64;
-    local: *;
-  };
+	VDSO_VERSION_STRING {
+	global:
+		/*
+		 * Has to be there for the kernel to find
+		 */
+		__kernel_datapage_offset;
+
+		__kernel_get_syscall_map;
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_getres;
+		__kernel_get_tbfreq;
+		__kernel_sync_dicache;
+		__kernel_sync_dicache_p5;
+		__kernel_sigtramp_rt64;
+
+	local: *;
+	};
 }
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fa90f6561b9..29ed495444f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -183,3 +183,70 @@ void pgtable_cache_init(void)
 						     zero_ctor);
 	}
 }
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * Given an address within the vmemmap, determine the pfn of the page that
+ * represents the start of the section it is within.  Note that we have to
+ * do this by hand as the proffered address may not be correctly aligned.
+ * Subtraction of non-aligned pointers produces undefined results.
+ */
+unsigned long __meminit vmemmap_section_start(unsigned long page)
+{
+	unsigned long offset = page - ((unsigned long)(vmemmap));
+
+	/* Return the pfn of the start of the section. */
+	return (offset / sizeof(struct page)) & PAGE_SECTION_MASK;
+}
+
+/*
+ * Check if this vmemmap page is already initialised.  If any section
+ * which overlaps this vmemmap page is initialised then this page is
+ * initialised already.
+ */
+int __meminit vmemmap_populated(unsigned long start, int page_size)
+{
+	unsigned long end = start + page_size;
+
+	for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
+		if (pfn_valid(vmemmap_section_start(start)))
+			return 1;
+
+	return 0;
+}
+
+int __meminit vmemmap_populate(struct page *start_page,
+					unsigned long nr_pages, int node)
+{
+	unsigned long mode_rw;
+	unsigned long start = (unsigned long)start_page;
+	unsigned long end = (unsigned long)(start_page + nr_pages);
+	unsigned long page_size = 1 << mmu_psize_defs[mmu_linear_psize].shift;
+
+	mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
+
+	/* Align to the page size of the linear mapping. */
+	start = _ALIGN_DOWN(start, page_size);
+
+	for (; start < end; start += page_size) {
+		int mapped;
+		void *p;
+
+		if (vmemmap_populated(start, page_size))
+			continue;
+
+		p = vmemmap_alloc_block(page_size, node);
+		if (!p)
+			return -ENOMEM;
+
+		printk(KERN_WARNING "vmemmap %08lx allocated at %p, "
+					"physical %p.\n", start, p, __pa(p));
+
+		mapped = htab_bolt_mapping(start, start + page_size,
+					__pa(p), mode_rw, mmu_linear_psize);
+		BUG_ON(mapped < 0);
+	}
+
+	return 0;
+}
+#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 32dcfc9b008..81eb96ec13b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -129,51 +129,6 @@ int __devinit arch_add_memory(int nid, u64 start, u64 size)
 	return __add_pages(zone, start_pfn, nr_pages);
 }
 
-/*
- * First pass at this code will check to determine if the remove
- * request is within the RMO.  Do not allow removal within the RMO.
- */
-int __devinit remove_memory(u64 start, u64 size)
-{
-	struct zone *zone;
-	unsigned long start_pfn, end_pfn, nr_pages;
-
-	start_pfn = start >> PAGE_SHIFT;
-	nr_pages = size >> PAGE_SHIFT;
-	end_pfn = start_pfn + nr_pages;
-
-	printk("%s(): Attempting to remove memoy in range "
-			"%lx to %lx\n", __func__, start, start+size);
-	/*
-	 * check for range within RMO
-	 */
-	zone = page_zone(pfn_to_page(start_pfn));
-
-	printk("%s(): memory will be removed from "
-			"the %s zone\n", __func__, zone->name);
-
-	/*
-	 * not handling removing memory ranges that
-	 * overlap multiple zones yet
-	 */
-	if (end_pfn > (zone->zone_start_pfn + zone->spanned_pages))
-		goto overlap;
-
-	/* make sure it is NOT in RMO */
-	if ((start < lmb.rmo_size) || ((start+size) < lmb.rmo_size)) {
-		printk("%s(): range to be removed must NOT be in RMO!\n",
-			__func__);
-		goto in_rmo;
-	}
-
-	return __remove_pages(zone, start_pfn, nr_pages);
-
-overlap:
-	printk("%s(): memory range to be removed overlaps "
-		"multiple zones!!!\n", __func__);
-in_rmo:
-	return -1;
-}
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 void show_mem(void)
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c
index 5123e9d4164..13d5a87f13b 100644
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c
@@ -117,7 +117,7 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	policy->cur = cbe_freqs[cur_pmode].frequency;
 
 #ifdef CONFIG_SMP
-	policy->cpus = cpu_sibling_map[policy->cpu];
+	policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
 #endif
 
 	cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 190ff4b59a5..07e64b48e7f 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -616,17 +616,18 @@ static void ps3_unmap_single(struct device *_dev, dma_addr_t dma_addr,
 	}
 }
 
-static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sg, int nents,
-	enum dma_data_direction direction)
+static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl,
+	int nents, enum dma_data_direction direction)
 {
 #if defined(CONFIG_PS3_DYNAMIC_DMA)
 	BUG_ON("do");
 	return -EPERM;
 #else
 	struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+	struct scatterlist *sg;
 	int i;
 
-	for (i = 0; i < nents; i++, sg++) {
+	for_each_sg(sgl, sg, nents, i) {
 		int result = ps3_dma_map(dev->d_region,
 			page_to_phys(sg->page) + sg->offset, sg->length,
 					 &sg->dma_address, 0);
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
index b98244e277f..94913ddcf76 100644
--- a/arch/ppc/mm/fault.c
+++ b/arch/ppc/mm/fault.c
@@ -297,7 +297,7 @@ out_of_memory:
 	}
 	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	return SIGKILL;
 
 do_sigbus:
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index e40373d9fbc..c5549a20628 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -33,6 +33,8 @@
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	/* Make sure the probe isn't going on a difficult instruction */
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index f4503ca2763..1d81bf9488a 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -683,11 +683,6 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
 		wake_up_process(child);
 		return 0;
 
-	case PTRACE_DETACH:
-		/* detach a process that was attached. */
-		return ptrace_detach(child, data);
-
-
 	/* Do requests that differ for 31/64 bit */
 	default:
 #ifdef CONFIG_COMPAT
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 4c1ac341ec8..14c241ccdd4 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -218,7 +218,7 @@ static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
 	}
 	printk("VM: killing process %s\n", tsk->comm);
 	if (regs->psw.mask & PSW_MASK_PSTATE)
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	do_no_context(regs, error_code, address);
 	return 0;
 }
diff --git a/arch/sh/kernel/ptrace.c b/arch/sh/kernel/ptrace.c
index f64a2d2416d..ac725f0aeb7 100644
--- a/arch/sh/kernel/ptrace.c
+++ b/arch/sh/kernel/ptrace.c
@@ -211,10 +211,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 	}
 
-	case PTRACE_DETACH: /* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
 #ifdef CONFIG_SH_DSP
 	case PTRACE_GETDSPREGS: {
 		unsigned long dp;
diff --git a/arch/sh/kernel/vsyscall/vsyscall.lds.S b/arch/sh/kernel/vsyscall/vsyscall.lds.S
index b13c3d439fe..c9bf2af35d3 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.lds.S
+++ b/arch/sh/kernel/vsyscall/vsyscall.lds.S
@@ -17,45 +17,52 @@ ENTRY(__kernel_vsyscall);
 
 SECTIONS
 {
-  . = SIZEOF_HEADERS;
+	. = SIZEOF_HEADERS;
 
-  .hash           : { *(.hash) }		:text
-  .gnu.hash       : { *(.gnu.hash) }
-  .dynsym         : { *(.dynsym) }
-  .dynstr         : { *(.dynstr) }
-  .gnu.version    : { *(.gnu.version) }
-  .gnu.version_d  : { *(.gnu.version_d) }
-  .gnu.version_r  : { *(.gnu.version_r) }
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
 
-  /* This linker script is used both with -r and with -shared.
-     For the layouts to match, we need to skip more than enough
-     space for the dynamic symbol table et al.  If this amount
-     is insufficient, ld -shared will barf.  Just increase it here.  */
-  . = 0x400;
+	/*
+	 * This linker script is used both with -r and with -shared.
+	 * For the layouts to match, we need to skip more than enough
+	 * space for the dynamic symbol table et al.  If this amount
+	 * is insufficient, ld -shared will barf.  Just increase it here.
+	 */
+	. = 0x400;
 
-  .text           : { *(.text) }		:text =0x90909090
-  .note		  : { *(.note.*) }		:text :note
-  .eh_frame_hdr   : { *(.eh_frame_hdr) }	:text :eh_frame_hdr
-  .eh_frame       : { KEEP (*(.eh_frame)) }	:text
-  .dynamic        : { *(.dynamic) }		:text :dynamic
-  .useless        : {
-  	*(.got.plt) *(.got)
-	*(.data .data.* .gnu.linkonce.d.*)
-	*(.dynbss)
-	*(.bss .bss.* .gnu.linkonce.b.*)
-  }						:text
+	.text		: { *(.text) } 			:text	=0x90909090
+	.note		: { *(.note.*) }		:text	:note
+	.eh_frame_hdr	: { *(.eh_frame_hdr ) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+	.useless	: {
+	      *(.got.plt) *(.got)
+	      *(.data .data.* .gnu.linkonce.d.*)
+	      *(.dynbss)
+	      *(.bss .bss.* .gnu.linkonce.b.*)
+	}						:text
 }
 
 /*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
  * We must supply the ELF program headers explicitly to get just one
  * PT_LOAD segment, and set the flags explicitly to make segments read-only.
  */
 PHDRS
 {
-  text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
-  dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
-  note PT_NOTE FLAGS(4); /* PF_R */
-  eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
 }
 
 /*
@@ -63,12 +70,12 @@ PHDRS
  */
 VERSION
 {
-  LINUX_2.6 {
-    global:
-    	__kernel_vsyscall;
-    	__kernel_sigreturn;
-    	__kernel_rt_sigreturn;
+	LINUX_2.6 {
+	global:
+		__kernel_vsyscall;
+		__kernel_sigreturn;
+		__kernel_rt_sigreturn;
 
-    local: *;
-  };
+	local: *;
+	};
 }
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 04a39aa7f1f..4729668ce5b 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -214,7 +214,7 @@ out_of_memory:
 	}
 	printk("VM: killing process %s\n", tsk->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 82b68c789a5..d5e160da64b 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -294,12 +294,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
-int remove_memory(u64 start, u64 size)
-{
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
-
 #ifdef CONFIG_NUMA
 int memory_add_physaddr_to_nid(u64 addr)
 {
diff --git a/arch/sh64/kernel/ptrace.c b/arch/sh64/kernel/ptrace.c
index df06c647746..8a2d339cf76 100644
--- a/arch/sh64/kernel/ptrace.c
+++ b/arch/sh64/kernel/ptrace.c
@@ -244,10 +244,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 	}
 
-	case PTRACE_DETACH: /* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		break;
diff --git a/arch/sh64/mm/fault.c b/arch/sh64/mm/fault.c
index 0d069d82141..dd81c669c79 100644
--- a/arch/sh64/mm/fault.c
+++ b/arch/sh64/mm/fault.c
@@ -334,7 +334,7 @@ out_of_memory:
 	}
 	printk("VM: killing process %s\n", tsk->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index 62182d2d7b0..9c3ed88853f 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/pci.h>		/* struct pci_dev */
 #include <linux/proc_fs.h>
+#include <linux/scatterlist.h>
 
 #include <asm/io.h>
 #include <asm/vaddrs.h>
@@ -717,19 +718,19 @@ void pci_unmap_page(struct pci_dev *hwdev,
  * Device ownership issues as mentioned above for pci_map_single are
  * the same here.
  */
-int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents,
+int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
     int direction)
 {
+	struct scatterlist *sg;
 	int n;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 	/* IIep is write-through, not flushing. */
-	for (n = 0; n < nents; n++) {
+	for_each_sg(sgl, sg, nents, n) {
 		BUG_ON(page_address(sg->page) == NULL);
 		sg->dvma_address =
 			virt_to_phys(page_address(sg->page)) + sg->offset;
 		sg->dvma_length = sg->length;
-		sg++;
 	}
 	return nents;
 }
@@ -738,19 +739,19 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents,
  * Again, cpu read rules concerning calls here are the same as for
  * pci_unmap_single() above.
  */
-void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents,
+void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents,
     int direction)
 {
+	struct scatterlist *sg;
 	int n;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 	if (direction != PCI_DMA_TODEVICE) {
-		for (n = 0; n < nents; n++) {
+		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg->page) == NULL);
 			mmu_inval_dma_area(
 			    (unsigned long) page_address(sg->page),
 			    (sg->length + PAGE_SIZE-1) & PAGE_MASK);
-			sg++;
 		}
 	}
 }
@@ -789,34 +790,34 @@ void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t ba, size_t
  * The same as pci_dma_sync_single_* but for a scatter-gather list,
  * same rules and usage.
  */
-void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, int direction)
 {
+	struct scatterlist *sg;
 	int n;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 	if (direction != PCI_DMA_TODEVICE) {
-		for (n = 0; n < nents; n++) {
+		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg->page) == NULL);
 			mmu_inval_dma_area(
 			    (unsigned long) page_address(sg->page),
 			    (sg->length + PAGE_SIZE-1) & PAGE_MASK);
-			sg++;
 		}
 	}
 }
 
-void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction)
+void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, int direction)
 {
+	struct scatterlist *sg;
 	int n;
 
 	BUG_ON(direction == PCI_DMA_NONE);
 	if (direction != PCI_DMA_TODEVICE) {
-		for (n = 0; n < nents; n++) {
+		for_each_sg(sgl, sg, nents, n) {
 			BUG_ON(page_address(sg->page) == NULL);
 			mmu_inval_dma_area(
 			    (unsigned long) page_address(sg->page),
 			    (sg->length + PAGE_SIZE-1) & PAGE_MASK);
-			sg++;
 		}
 	}
 }
diff --git a/arch/sparc/mm/fault.c b/arch/sparc/mm/fault.c
index 50747fe4435..e4d9c8e19df 100644
--- a/arch/sparc/mm/fault.c
+++ b/arch/sparc/mm/fault.c
@@ -369,7 +369,7 @@ out_of_memory:
 	up_read(&mm->mmap_sem);
 	printk("VM: killing process %s\n", tsk->comm);
 	if (from_user)
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
index 7c89893b1fe..375b4db6370 100644
--- a/arch/sparc/mm/io-unit.c
+++ b/arch/sparc/mm/io-unit.c
@@ -11,8 +11,8 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>	/* pte_offset_map => kmap_atomic */
 #include <linux/bitops.h>
+#include <linux/scatterlist.h>
 
-#include <asm/scatterlist.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/sbus.h>
@@ -144,8 +144,9 @@ static void iounit_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus
 	spin_lock_irqsave(&iounit->lock, flags);
 	while (sz != 0) {
 		--sz;
-		sg[sz].dvma_address = iounit_get_area(iounit, (unsigned long)page_address(sg[sz].page) + sg[sz].offset, sg[sz].length);
-		sg[sz].dvma_length = sg[sz].length;
+		sg->dvma_address = iounit_get_area(iounit, (unsigned long)page_address(sg->page) + sg->offset, sg->length);
+		sg->dvma_length = sg->length;
+		sg = sg_next(sg);
 	}
 	spin_unlock_irqrestore(&iounit->lock, flags);
 }
@@ -173,11 +174,12 @@ static void iounit_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_
 	spin_lock_irqsave(&iounit->lock, flags);
 	while (sz != 0) {
 		--sz;
-		len = ((sg[sz].dvma_address & ~PAGE_MASK) + sg[sz].length + (PAGE_SIZE-1)) >> PAGE_SHIFT;
-		vaddr = (sg[sz].dvma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT;
+		len = ((sg->dvma_address & ~PAGE_MASK) + sg->length + (PAGE_SIZE-1)) >> PAGE_SHIFT;
+		vaddr = (sg->dvma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT;
 		IOD(("iounit_release %08lx-%08lx\n", (long)vaddr, (long)len+vaddr));
 		for (len += vaddr; vaddr < len; vaddr++)
 			clear_bit(vaddr, iounit->bmap);
+		sg = sg_next(sg);
 	}
 	spin_unlock_irqrestore(&iounit->lock, flags);
 }
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index 52e907af9d2..283656d9f6e 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -12,8 +12,8 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>	/* pte_offset_map => kmap_atomic */
+#include <linux/scatterlist.h>
 
-#include <asm/scatterlist.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/sbus.h>
@@ -240,7 +240,7 @@ static void iommu_get_scsi_sgl_noflush(struct scatterlist *sg, int sz, struct sb
 		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
 		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
 		sg->dvma_length = (__u32) sg->length;
-		sg++;
+		sg = sg_next(sg);
 	}
 }
 
@@ -254,7 +254,7 @@ static void iommu_get_scsi_sgl_gflush(struct scatterlist *sg, int sz, struct sbu
 		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
 		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
 		sg->dvma_length = (__u32) sg->length;
-		sg++;
+		sg = sg_next(sg);
 	}
 }
 
@@ -285,7 +285,7 @@ static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct sbu
 
 		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
 		sg->dvma_length = (__u32) sg->length;
-		sg++;
+		sg = sg_next(sg);
 	}
 }
 
@@ -325,7 +325,7 @@ static void iommu_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_b
 		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
 		iommu_release_one(sg->dvma_address & PAGE_MASK, n, sbus);
 		sg->dvma_address = 0x21212121;
-		sg++;
+		sg = sg_next(sg);
 	}
 }
 
diff --git a/arch/sparc/mm/sun4c.c b/arch/sparc/mm/sun4c.c
index 005a3e72d4f..ee6708fc449 100644
--- a/arch/sparc/mm/sun4c.c
+++ b/arch/sparc/mm/sun4c.c
@@ -17,8 +17,8 @@
 #include <linux/highmem.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
+#include <linux/scatterlist.h>
 
-#include <asm/scatterlist.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
@@ -1228,8 +1228,9 @@ static void sun4c_get_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus *
 {
 	while (sz != 0) {
 		--sz;
-		sg[sz].dvma_address = (__u32)sun4c_lockarea(page_address(sg[sz].page) + sg[sz].offset, sg[sz].length);
-		sg[sz].dvma_length = sg[sz].length;
+		sg->dvma_address = (__u32)sun4c_lockarea(page_address(sg->page) + sg->offset, sg->length);
+		sg->dvma_length = sg->length;
+		sg = sg_next(sg);
 	}
 }
 
@@ -1244,7 +1245,8 @@ static void sun4c_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_b
 {
 	while (sz != 0) {
 		--sz;
-		sun4c_unlockarea((char *)sg[sz].dvma_address, sg[sz].length);
+		sun4c_unlockarea((char *)sg->dvma_address, sg->length);
+		sg = sg_next(sg);
 	}
 }
 
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 33dabf588bd..2f22fa90461 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -240,10 +240,10 @@ config ARCH_SELECT_MEMORY_MODEL
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
+	select SPARSEMEM_VMEMMAP_ENABLE
 
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
-	select SPARSEMEM_STATIC
 
 source "mm/Kconfig"
 
diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c
index b35a62167e9..db3ffcf7a12 100644
--- a/arch/sparc64/kernel/iommu.c
+++ b/arch/sparc64/kernel/iommu.c
@@ -10,6 +10,7 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
+#include <linux/scatterlist.h>
 
 #ifdef CONFIG_PCI
 #include <linux/pci.h>
@@ -480,7 +481,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
 			   unsigned long iopte_protection)
 {
 	struct scatterlist *dma_sg = sg;
-	struct scatterlist *sg_end = sg + nelems;
+	struct scatterlist *sg_end = sg_last(sg, nelems);
 	int i;
 
 	for (i = 0; i < nused; i++) {
@@ -515,7 +516,7 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
 					len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
 					break;
 				}
-				sg++;
+				sg = sg_next(sg);
 			}
 
 			pteval = iopte_protection | (pteval & IOPTE_PAGE);
@@ -528,24 +529,24 @@ static inline void fill_sg(iopte_t *iopte, struct scatterlist *sg,
 			}
 
 			pteval = (pteval & IOPTE_PAGE) + len;
-			sg++;
+			sg = sg_next(sg);
 
 			/* Skip over any tail mappings we've fully mapped,
 			 * adjusting pteval along the way.  Stop when we
 			 * detect a page crossing event.
 			 */
-			while (sg < sg_end &&
+			while (sg != sg_end &&
 			       (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
 			       (pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
 			       ((pteval ^
 				 (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
 				pteval += sg->length;
-				sg++;
+				sg = sg_next(sg);
 			}
 			if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
 				pteval = ~0UL;
 		} while (dma_npages != 0);
-		dma_sg++;
+		dma_sg = sg_next(dma_sg);
 	}
 }
 
@@ -606,7 +607,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
 	sgtmp = sglist;
 	while (used && sgtmp->dma_length) {
 		sgtmp->dma_address += dma_base;
-		sgtmp++;
+		sgtmp = sg_next(sgtmp);
 		used--;
 	}
 	used = nelems - used;
@@ -642,6 +643,7 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	struct strbuf *strbuf;
 	iopte_t *base;
 	unsigned long flags, ctx, i, npages;
+	struct scatterlist *sg, *sgprv;
 	u32 bus_addr;
 
 	if (unlikely(direction == DMA_NONE)) {
@@ -654,11 +656,14 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
 
 	bus_addr = sglist->dma_address & IO_PAGE_MASK;
 
-	for (i = 1; i < nelems; i++)
-		if (sglist[i].dma_length == 0)
+	sgprv = NULL;
+	for_each_sg(sglist, sg, nelems, i) {
+		if (sg->dma_length == 0)
 			break;
-	i--;
-	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
+		sgprv = sg;
+	}
+
+	npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length) -
 		  bus_addr) >> IO_PAGE_SHIFT;
 
 	base = iommu->page_table +
@@ -730,6 +735,7 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
 	struct iommu *iommu;
 	struct strbuf *strbuf;
 	unsigned long flags, ctx, npages, i;
+	struct scatterlist *sg, *sgprv;
 	u32 bus_addr;
 
 	iommu = dev->archdata.iommu;
@@ -753,11 +759,14 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev,
 
 	/* Step 2: Kick data out of streaming buffers. */
 	bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
-	for(i = 1; i < nelems; i++)
-		if (!sglist[i].dma_length)
+	sgprv = NULL;
+	for_each_sg(sglist, sg, nelems, i) {
+		if (sg->dma_length == 0)
 			break;
-	i--;
-	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
+		sgprv = sg;
+	}
+
+	npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length)
 		  - bus_addr) >> IO_PAGE_SHIFT;
 	strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index c93a15b785f..d94f901d321 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -42,6 +42,8 @@
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	p->ainsn.insn[0] = *p->addr;
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
index d4024ac0d61..964527d2ffa 100644
--- a/arch/sparc64/kernel/ktlb.S
+++ b/arch/sparc64/kernel/ktlb.S
@@ -226,6 +226,15 @@ kvmap_dtlb_load:
 	ba,pt		%xcc, sun4v_dtlb_load
 	 mov		%g5, %g3
 
+kvmap_vmemmap:
+	sub		%g4, %g5, %g5
+	srlx		%g5, 22, %g5
+	sethi		%hi(vmemmap_table), %g1
+	sllx		%g5, 3, %g5
+	or		%g1, %lo(vmemmap_table), %g1
+	ba,pt		%xcc, kvmap_dtlb_load
+	 ldx		[%g1 + %g5], %g5
+
 kvmap_dtlb_nonlinear:
 	/* Catch kernel NULL pointer derefs.  */
 	sethi		%hi(PAGE_SIZE), %g5
@@ -233,6 +242,13 @@ kvmap_dtlb_nonlinear:
 	bleu,pn		%xcc, kvmap_dtlb_longpath
 	 nop
 
+	/* Do not use the TSB for vmemmap.  */
+	mov		(VMEMMAP_BASE >> 24), %g5
+	sllx		%g5, 24, %g5
+	cmp		%g4,%g5
+	bgeu,pn		%xcc, kvmap_vmemmap
+	 nop
+
 	KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
 
 kvmap_dtlb_tsbmiss:
diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c
index 95de1444ee6..cacacfae545 100644
--- a/arch/sparc64/kernel/pci_sun4v.c
+++ b/arch/sparc64/kernel/pci_sun4v.c
@@ -13,6 +13,7 @@
 #include <linux/irq.h>
 #include <linux/msi.h>
 #include <linux/log2.h>
+#include <linux/scatterlist.h>
 
 #include <asm/iommu.h>
 #include <asm/irq.h>
@@ -373,7 +374,7 @@ static inline long fill_sg(long entry, struct device *dev,
 			   int nused, int nelems, unsigned long prot)
 {
 	struct scatterlist *dma_sg = sg;
-	struct scatterlist *sg_end = sg + nelems;
+	struct scatterlist *sg_end = sg_last(sg, nelems);
 	unsigned long flags;
 	int i;
 
@@ -413,7 +414,7 @@ static inline long fill_sg(long entry, struct device *dev,
 					len -= (IO_PAGE_SIZE - (tmp & (IO_PAGE_SIZE - 1UL)));
 					break;
 				}
-				sg++;
+				sg = sg_next(sg);
 			}
 
 			pteval = (pteval & IOPTE_PAGE);
@@ -431,24 +432,25 @@ static inline long fill_sg(long entry, struct device *dev,
 			}
 
 			pteval = (pteval & IOPTE_PAGE) + len;
-			sg++;
+			sg = sg_next(sg);
 
 			/* Skip over any tail mappings we've fully mapped,
 			 * adjusting pteval along the way.  Stop when we
 			 * detect a page crossing event.
 			 */
-			while (sg < sg_end &&
-			       (pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
+			while ((pteval << (64 - IO_PAGE_SHIFT)) != 0UL &&
 			       (pteval == SG_ENT_PHYS_ADDRESS(sg)) &&
 			       ((pteval ^
 				 (SG_ENT_PHYS_ADDRESS(sg) + sg->length - 1UL)) >> IO_PAGE_SHIFT) == 0UL) {
 				pteval += sg->length;
-				sg++;
+				if (sg == sg_end)
+					break;
+				sg = sg_next(sg);
 			}
 			if ((pteval << (64 - IO_PAGE_SHIFT)) == 0UL)
 				pteval = ~0UL;
 		} while (dma_npages != 0);
-		dma_sg++;
+		dma_sg = sg_next(dma_sg);
 	}
 
 	if (unlikely(iommu_batch_end() < 0L))
@@ -510,7 +512,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
 	sgtmp = sglist;
 	while (used && sgtmp->dma_length) {
 		sgtmp->dma_address += dma_base;
-		sgtmp++;
+		sgtmp = sg_next(sgtmp);
 		used--;
 	}
 	used = nelems - used;
@@ -545,6 +547,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	struct pci_pbm_info *pbm;
 	struct iommu *iommu;
 	unsigned long flags, i, npages;
+	struct scatterlist *sg, *sgprv;
 	long entry;
 	u32 devhandle, bus_addr;
 
@@ -558,12 +561,15 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	devhandle = pbm->devhandle;
 	
 	bus_addr = sglist->dma_address & IO_PAGE_MASK;
-
-	for (i = 1; i < nelems; i++)
-		if (sglist[i].dma_length == 0)
+	sgprv = NULL;
+	for_each_sg(sglist, sg, nelems, i) {
+		if (sg->dma_length == 0)
 			break;
-	i--;
-	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length) -
+
+		sgprv = sg;
+	}
+
+	npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length) -
 		  bus_addr) >> IO_PAGE_SHIFT;
 
 	entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index c73b7a48b03..407d74a8a54 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -52,14 +52,13 @@ int sparc64_multi_core __read_mostly;
 
 cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
 cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
-	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
 EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_SYMBOL(cpu_online_map);
-EXPORT_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
 
 static cpumask_t smp_commenced_mask;
@@ -1261,16 +1260,16 @@ void __devinit smp_fill_in_sib_core_maps(void)
 	for_each_present_cpu(i) {
 		unsigned int j;
 
-		cpus_clear(cpu_sibling_map[i]);
+		cpus_clear(per_cpu(cpu_sibling_map, i));
 		if (cpu_data(i).proc_id == -1) {
-			cpu_set(i, cpu_sibling_map[i]);
+			cpu_set(i, per_cpu(cpu_sibling_map, i));
 			continue;
 		}
 
 		for_each_present_cpu(j) {
 			if (cpu_data(i).proc_id ==
 			    cpu_data(j).proc_id)
-				cpu_set(j, cpu_sibling_map[i]);
+				cpu_set(j, per_cpu(cpu_sibling_map, i));
 		}
 	}
 }
@@ -1342,9 +1341,9 @@ int __cpu_disable(void)
 		cpu_clear(cpu, cpu_core_map[i]);
 	cpus_clear(cpu_core_map[cpu]);
 
-	for_each_cpu_mask(i, cpu_sibling_map[cpu])
-		cpu_clear(cpu, cpu_sibling_map[i]);
-	cpus_clear(cpu_sibling_map[cpu]);
+	for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
+	cpus_clear(per_cpu(cpu_sibling_map, cpu));
 
 	c = &cpu_data(cpu);
 
diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c
index 9f7740eee8d..e2027f27c0f 100644
--- a/arch/sparc64/mm/fault.c
+++ b/arch/sparc64/mm/fault.c
@@ -463,7 +463,7 @@ out_of_memory:
 	up_read(&mm->mmap_sem);
 	printk("VM: killing process %s\n", current->comm);
 	if (!(regs->tstate & TSTATE_PRIV))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto handle_kernel_fault;
 
 intr_or_no_mm:
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index f0ab9aab308..100c4456ed1 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1645,6 +1645,58 @@ EXPORT_SYMBOL(_PAGE_E);
 unsigned long _PAGE_CACHE __read_mostly;
 EXPORT_SYMBOL(_PAGE_CACHE);
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+
+#define VMEMMAP_CHUNK_SHIFT	22
+#define VMEMMAP_CHUNK		(1UL << VMEMMAP_CHUNK_SHIFT)
+#define VMEMMAP_CHUNK_MASK	~(VMEMMAP_CHUNK - 1UL)
+#define VMEMMAP_ALIGN(x)	(((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK)
+
+#define VMEMMAP_SIZE	((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \
+			  sizeof(struct page *)) >> VMEMMAP_CHUNK_SHIFT)
+unsigned long vmemmap_table[VMEMMAP_SIZE];
+
+int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
+{
+	unsigned long vstart = (unsigned long) start;
+	unsigned long vend = (unsigned long) (start + nr);
+	unsigned long phys_start = (vstart - VMEMMAP_BASE);
+	unsigned long phys_end = (vend - VMEMMAP_BASE);
+	unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK;
+	unsigned long end = VMEMMAP_ALIGN(phys_end);
+	unsigned long pte_base;
+
+	pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
+		    _PAGE_CP_4U | _PAGE_CV_4U |
+		    _PAGE_P_4U | _PAGE_W_4U);
+	if (tlb_type == hypervisor)
+		pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
+			    _PAGE_CP_4V | _PAGE_CV_4V |
+			    _PAGE_P_4V | _PAGE_W_4V);
+
+	for (; addr < end; addr += VMEMMAP_CHUNK) {
+		unsigned long *vmem_pp =
+			vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT);
+		void *block;
+
+		if (!(*vmem_pp & _PAGE_VALID)) {
+			block = vmemmap_alloc_block(1UL << 22, node);
+			if (!block)
+				return -ENOMEM;
+
+			*vmem_pp = pte_base | __pa(block);
+
+			printk(KERN_INFO "[%p-%p] page_structs=%lu "
+			       "node=%d entry=%lu/%lu\n", start, block, nr,
+			       node,
+			       addr >> VMEMMAP_CHUNK_SHIFT,
+			       VMEMMAP_SIZE >> VMEMMAP_CHUNK_SHIFT);
+		}
+	}
+	return 0;
+}
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
 static void prot_init_common(unsigned long page_none,
 			     unsigned long page_shared,
 			     unsigned long page_copy,
@@ -1909,9 +1961,4 @@ void online_page(struct page *page)
 	num_physpages++;
 }
 
-int remove_memory(u64 start, u64 size)
-{
-	return -EINVAL;
-}
-
 #endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index e6ff3026654..740d8a922e4 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -55,6 +55,14 @@ config GENERIC_BUG
 	default y
 	depends on BUG
 
+config GENERIC_TIME
+	bool
+	default y
+
+config GENERIC_CLOCKEVENTS
+	bool
+	default y
+
 # Used in kernel/irq/manage.c and include/linux/irq.h
 config IRQ_RELEASE_METHOD
 	bool
@@ -62,63 +70,25 @@ config IRQ_RELEASE_METHOD
 
 menu "UML-specific options"
 
-config MODE_TT
-	bool "Tracing thread support (DEPRECATED)"
-	default n
-	depends on BROKEN
-	help
-	This option controls whether tracing thread support is compiled
-	into UML. This option is largely obsolete, given that skas0 provides
-	skas security and performance without needing to patch the host.
-	It is safe to say 'N' here; saying 'Y' may cause additional problems
-	with the resulting binary even if you run UML in SKAS mode, and running
-	in TT mode is strongly *NOT RECOMMENDED*.
-
 config STATIC_LINK
 	bool "Force a static link"
 	default n
-	depends on !MODE_TT
 	help
-	If CONFIG_MODE_TT is disabled, then this option gives you the ability
-	to force a static link of UML.  Normally, if only skas mode is built
-	in to UML, it will be linked as a shared binary.  This is inconvenient
-	for use in a chroot jail.  So, if you intend to run UML inside a
-	chroot, and you disable CONFIG_MODE_TT, you probably want to say Y
-	here.
-	Additionally, this option enables using higher memory spaces (up to
-	2.75G) for UML - disabling CONFIG_MODE_TT and enabling this option leads
-	to best results for this.
-
-config KERNEL_HALF_GIGS
-	int "Kernel address space size (in .5G units)"
-	default "1"
-	depends on MODE_TT
-	help
-        This determines the amount of address space that UML will allocate for
-        its own, measured in half Gigabyte units.  The default is 1.
-        Change this only if you need to boot UML with an unusually large amount
-        of physical memory.
-
-config MODE_SKAS
-	bool "Separate Kernel Address Space support" if MODE_TT
-	default y
-	help
-	This option controls whether skas (separate kernel address space)
-	support is compiled in.
-	Unless you have specific needs to use TT mode (which applies almost only
-	to developers), you should say Y here.
-	SKAS mode will make use of the SKAS3 patch if it is applied on the host
-	(and your UML will run in SKAS3 mode), but if no SKAS patch is applied
-	on the host it will run in SKAS0 mode, which is anyway faster than TT
-	mode.
+	  This option gives you the ability to force a static link of UML.
+	  Normally, UML is linked as a shared binary.  This is inconvenient for
+	  use in a chroot jail.  So, if you intend to run UML inside a chroot,
+	  you probably want to say Y here.
+	  Additionally, this option enables using higher memory spaces (up to
+	  2.75G) for UML.
 
 source "arch/um/Kconfig.arch"
 source "mm/Kconfig"
+source "kernel/time/Kconfig"
 
 config LD_SCRIPT_STATIC
 	bool
 	default y
-	depends on MODE_TT || STATIC_LINK
+	depends on STATIC_LINK
 
 config LD_SCRIPT_DYN
 	bool
@@ -128,18 +98,18 @@ config LD_SCRIPT_DYN
 config NET
 	bool "Networking support"
 	help
-	Unless you really know what you are doing, you should say Y here.
-	The reason is that some programs need kernel networking support even
-	when running on a stand-alone machine that isn't connected to any
-	other computer. If you are upgrading from an older kernel, you
-	should consider updating your networking tools too because changes
-	in the kernel and the tools often go hand in hand. The tools are
-	contained in the package net-tools, the location and version number
-	of which are given in <file:Documentation/Changes>.
+	  Unless you really know what you are doing, you should say Y here.
+	  The reason is that some programs need kernel networking support even
+	  when running on a stand-alone machine that isn't connected to any
+	  other computer. If you are upgrading from an older kernel, you
+	  should consider updating your networking tools too because changes
+	  in the kernel and the tools often go hand in hand. The tools are
+	  contained in the package net-tools, the location and version number
+	  of which are given in <file:Documentation/Changes>.
 
-	For a general introduction to Linux networking, it is highly
-	recommended to read the NET-HOWTO, available from
-	<http://www.tldp.org/docs.html#howto>.
+	  For a general introduction to Linux networking, it is highly
+	  recommended to read the NET-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
 
 
 source "fs/Kconfig.binfmt"
@@ -147,99 +117,99 @@ source "fs/Kconfig.binfmt"
 config HOSTFS
 	tristate "Host filesystem"
 	help
-        While the User-Mode Linux port uses its own root file system for
-        booting and normal file access, this module lets the UML user
-        access files stored on the host.  It does not require any
-        network connection between the Host and UML.  An example use of
-        this might be:
+          While the User-Mode Linux port uses its own root file system for
+          booting and normal file access, this module lets the UML user
+          access files stored on the host.  It does not require any
+          network connection between the Host and UML.  An example use of
+          this might be:
 
-        mount none /tmp/fromhost -t hostfs -o /tmp/umlshare
+          mount none /tmp/fromhost -t hostfs -o /tmp/umlshare
 
-        where /tmp/fromhost is an empty directory inside UML and
-        /tmp/umlshare is a directory on the host with files the UML user
-        wishes to access.
+          where /tmp/fromhost is an empty directory inside UML and
+          /tmp/umlshare is a directory on the host with files the UML user
+          wishes to access.
 
-        For more information, see
-        <http://user-mode-linux.sourceforge.net/hostfs.html>.
+          For more information, see
+          <http://user-mode-linux.sourceforge.net/hostfs.html>.
 
-        If you'd like to be able to work with files stored on the host,
-        say Y or M here; otherwise say N.
+          If you'd like to be able to work with files stored on the host,
+          say Y or M here; otherwise say N.
 
 config HPPFS
 	tristate "HoneyPot ProcFS (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
 	help
-	hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
-	entries to be overridden, removed, or fabricated from the host.
-	Its purpose is to allow a UML to appear to be a physical machine
-	by removing or changing anything in /proc which gives away the
-	identity of a UML.
+	  hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
+	  entries to be overridden, removed, or fabricated from the host.
+	  Its purpose is to allow a UML to appear to be a physical machine
+	  by removing or changing anything in /proc which gives away the
+	  identity of a UML.
 
-	See <http://user-mode-linux.sf.net/hppfs.html> for more information.
+	  See <http://user-mode-linux.sf.net/hppfs.html> for more information.
 
-	You only need this if you are setting up a UML honeypot.  Otherwise,
-	it is safe to say 'N' here.
+	  You only need this if you are setting up a UML honeypot.  Otherwise,
+	  it is safe to say 'N' here.
 
 config MCONSOLE
 	bool "Management console"
 	default y
 	help
-        The user mode linux management console is a low-level interface to
-        the kernel, somewhat like the i386 SysRq interface.  Since there is
-        a full-blown operating system running under every user mode linux
-        instance, there is much greater flexibility possible than with the
-        SysRq mechanism.
+          The user mode linux management console is a low-level interface to
+          the kernel, somewhat like the i386 SysRq interface.  Since there is
+          a full-blown operating system running under every user mode linux
+          instance, there is much greater flexibility possible than with the
+          SysRq mechanism.
 
-        If you answer 'Y' to this option, to use this feature, you need the
-        mconsole client (called uml_mconsole) which is present in CVS in
-        2.4.5-9um and later (path /tools/mconsole), and is also in the
-        distribution RPM package in 2.4.6 and later.
+          If you answer 'Y' to this option, to use this feature, you need the
+          mconsole client (called uml_mconsole) which is present in CVS in
+          2.4.5-9um and later (path /tools/mconsole), and is also in the
+          distribution RPM package in 2.4.6 and later.
 
-        It is safe to say 'Y' here.
+          It is safe to say 'Y' here.
 
 config MAGIC_SYSRQ
 	bool "Magic SysRq key"
 	depends on MCONSOLE
-	---help---
-	If you say Y here, you will have some control over the system even
-	if the system crashes for example during kernel debugging (e.g., you
-	will be able to flush the buffer cache to disk, reboot the system
-	immediately or dump some status information). A key for each of the
-	possible requests is provided.
+	help
+	  If you say Y here, you will have some control over the system even
+	  if the system crashes for example during kernel debugging (e.g., you
+	  will be able to flush the buffer cache to disk, reboot the system
+	  immediately or dump some status information). A key for each of the
+	  possible requests is provided.
 
-	This is the feature normally accomplished by pressing a key
-	while holding SysRq (Alt+PrintScreen).
+	  This is the feature normally accomplished by pressing a key
+	  while holding SysRq (Alt+PrintScreen).
 
-	On UML, this is accomplished by sending a "sysrq" command with
-	mconsole, followed by the letter for the requested command.
+	  On UML, this is accomplished by sending a "sysrq" command with
+	  mconsole, followed by the letter for the requested command.
 
-	The keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
-	unless you really know what this hack does.
+	  The keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
+	  unless you really know what this hack does.
 
 config SMP
 	bool "Symmetric multi-processing support (EXPERIMENTAL)"
 	default n
 	#SMP_BROKEN is for x86_64.
-	depends on MODE_TT && EXPERIMENTAL && (!SMP_BROKEN || (BROKEN && SMP_BROKEN))
+	depends on EXPERIMENTAL && (!SMP_BROKEN || (BROKEN && SMP_BROKEN))
 	help
-	This option enables UML SMP support.
-	It is NOT related to having a real SMP box. Not directly, at least.
+	  This option enables UML SMP support.
+	  It is NOT related to having a real SMP box. Not directly, at least.
 
-	UML implements virtual SMP by allowing as many processes to run
-	simultaneously on the host as there are virtual processors configured.
+	  UML implements virtual SMP by allowing as many processes to run
+	  simultaneously on the host as there are virtual processors configured.
 
-	Obviously, if the host is a uniprocessor, those processes will
-	timeshare, but, inside UML, will appear to be running simultaneously.
-	If the host is a multiprocessor, then UML processes may run
-	simultaneously, depending on the host scheduler.
+	  Obviously, if the host is a uniprocessor, those processes will
+	  timeshare, but, inside UML, will appear to be running simultaneously.
+	  If the host is a multiprocessor, then UML processes may run
+	  simultaneously, depending on the host scheduler.
 
-	This, however, is supported only in TT mode. So, if you use the SKAS
-	patch on your host, switching to TT mode and enabling SMP usually gives
-	you worse performances.
-	Also, since the support for SMP has been under-developed, there could
-	be some bugs being exposed by enabling SMP.
+	  This, however, is supported only in TT mode. So, if you use the SKAS
+	  patch on your host, switching to TT mode and enabling SMP usually
+	  gives	you worse performances.
+	  Also, since the support for SMP has been under-developed, there could
+	  be some bugs being exposed by enabling SMP.
 
-	If you don't know what to do, say N.
+	  If you don't know what to do, say N.
 
 config NR_CPUS
 	int "Maximum number of CPUs (2-32)"
@@ -251,29 +221,24 @@ config NEST_LEVEL
 	int "Nesting level"
 	default "0"
 	help
-        This is set to the number of layers of UMLs that this UML will be run
-        in.  Normally, this is zero, meaning that it will run directly on the
-        host.  Setting it to one will build a UML that can run inside a UML
-        that is running on the host.  Generally, if you intend this UML to run
-        inside another UML, set CONFIG_NEST_LEVEL to one more than the host
-        UML.
-
-        Note that if the hosting UML has its CONFIG_KERNEL_HALF_GIGS set to
-        greater than one, then the guest UML should have its CONFIG_NEST_LEVEL
-        set to the host's CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS.
-        Only change this if you are running nested UMLs.
+          This is set to the number of layers of UMLs that this UML will be run
+          in.  Normally, this is zero, meaning that it will run directly on the
+          host.  Setting it to one will build a UML that can run inside a UML
+          that is running on the host.  Generally, if you intend this UML to run
+          inside another UML, set CONFIG_NEST_LEVEL to one more than the host
+          UML.
 
 config HIGHMEM
 	bool "Highmem support (EXPERIMENTAL)"
 	depends on !64BIT && EXPERIMENTAL
 	default n
 	help
-	This was used to allow UML to run with big amounts of memory.
-	Currently it is unstable, so if unsure say N.
+	  This was used to allow UML to run with big amounts of memory.
+	  Currently it is unstable, so if unsure say N.
 
-	To use big amounts of memory, it is recommended to disable TT mode (i.e.
-	CONFIG_MODE_TT) and enable static linking (i.e. CONFIG_STATIC_LINK) -
-	this should allow the guest to use up to 2.75G of memory.
+	  To use big amounts of memory, it is recommended enable static
+	  linking (i.e. CONFIG_STATIC_LINK) - this should allow the
+	  guest to use up to 2.75G of memory.
 
 config KERNEL_STACK_ORDER
 	int "Kernel stack size order"
@@ -281,20 +246,9 @@ config KERNEL_STACK_ORDER
 	range 1 10 if 64BIT
 	default 0 if !64BIT
 	help
-	This option determines the size of UML kernel stacks.  They will
-	be 1 << order pages.  The default is OK unless you're running Valgrind
-	on UML, in which case, set this to 3.
-
-config UML_REAL_TIME_CLOCK
-	bool "Real-time Clock"
-	default y
-	help
-	This option makes UML time deltas match wall clock deltas.  This should
-	normally be enabled.  The exception would be if you are debugging with
-	UML and spend long times with UML stopped at a breakpoint.  In this
-	case, when UML is restarted, it will call the timer enough times to make
-	up for the time spent at the breakpoint.  This could result in a
-	noticeable lag.  If this is a problem, then disable this option.
+	  This option determines the size of UML kernel stacks.  They will
+	  be 1 << order pages.  The default is OK unless you're running Valgrind
+	  on UML, in which case, set this to 3.
 
 endmenu
 
diff --git a/arch/um/Kconfig.char b/arch/um/Kconfig.char
index a5b079d5e86..9a78d354f0b 100644
--- a/arch/um/Kconfig.char
+++ b/arch/um/Kconfig.char
@@ -5,7 +5,7 @@ config STDERR_CONSOLE
 	bool "stderr console"
 	default y
 	help
-	console driver which dumps all printk messages to stderr.
+	  console driver which dumps all printk messages to stderr.
 
 config STDIO_CONSOLE
 	bool
@@ -14,60 +14,58 @@ config STDIO_CONSOLE
 config SSL
 	bool "Virtual serial line"
 	help
-        The User-Mode Linux environment allows you to create virtual serial
-        lines on the UML that are usually made to show up on the host as
-        ttys or ptys.
+          The User-Mode Linux environment allows you to create virtual serial
+          lines on the UML that are usually made to show up on the host as
+          ttys or ptys.
 
-        See <http://user-mode-linux.sourceforge.net/input.html> for more
-        information and command line examples of how to use this facility.
+          See <http://user-mode-linux.sourceforge.net/input.html> for more
+          information and command line examples of how to use this facility.
 
-        Unless you have a specific reason for disabling this, say Y.
+          Unless you have a specific reason for disabling this, say Y.
 
 config NULL_CHAN
 	bool "null channel support"
 	help
-        This option enables support for attaching UML consoles and serial
-        lines to a device similar to /dev/null.  Data written to it disappears
-        and there is never any data to be read.
+          This option enables support for attaching UML consoles and serial
+          lines to a device similar to /dev/null.  Data written to it disappears
+          and there is never any data to be read.
 
 config PORT_CHAN
 	bool "port channel support"
 	help
-        This option enables support for attaching UML consoles and serial
-        lines to host portals.  They may be accessed with 'telnet <host>
-        <port number>'.  Any number of consoles and serial lines may be
-        attached to a single portal, although what UML device you get when
-        you telnet to that portal will be unpredictable.
-        It is safe to say 'Y' here.
+          This option enables support for attaching UML consoles and serial
+          lines to host portals.  They may be accessed with 'telnet <host>
+          <port number>'.  Any number of consoles and serial lines may be
+          attached to a single portal, although what UML device you get when
+          you telnet to that portal will be unpredictable.
+          It is safe to say 'Y' here.
 
 config PTY_CHAN
 	bool "pty channel support"
 	help
-        This option enables support for attaching UML consoles and serial
-        lines to host pseudo-terminals.  Access to both traditional
-        pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled
-        with this option.  The assignment of UML devices to host devices
-        will be announced in the kernel message log.
-        It is safe to say 'Y' here.
+          This option enables support for attaching UML consoles and serial
+          lines to host pseudo-terminals.  Access to both traditional
+          pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled
+          with this option.  The assignment of UML devices to host devices
+          will be announced in the kernel message log.
+          It is safe to say 'Y' here.
 
 config TTY_CHAN
 	bool "tty channel support"
 	help
-        This option enables support for attaching UML consoles and serial
-        lines to host terminals.  Access to both virtual consoles
-        (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and
-        /dev/pts/*) are controlled by this option.
-        It is safe to say 'Y' here.
+          This option enables support for attaching UML consoles and serial
+          lines to host terminals.  Access to both virtual consoles
+          (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and
+          /dev/pts/*) are controlled by this option.
+          It is safe to say 'Y' here.
 
 config XTERM_CHAN
 	bool "xterm channel support"
 	help
-        This option enables support for attaching UML consoles and serial
-        lines to xterms.  Each UML device so assigned will be brought up in
-        its own xterm.
-        If you disable this option, then CONFIG_PT_PROXY will be disabled as
-        well, since UML's gdb currently requires an xterm.
-        It is safe to say 'Y' here.
+          This option enables support for attaching UML consoles and serial
+          lines to xterms.  Each UML device so assigned will be brought up in
+          its own xterm.
+          It is safe to say 'Y' here.
 
 config NOCONFIG_CHAN
 	bool
@@ -77,39 +75,39 @@ config CON_ZERO_CHAN
 	string "Default main console channel initialization"
 	default "fd:0,fd:1"
 	help
-        This is the string describing the channel to which the main console
-        will be attached by default.  This value can be overridden from the
-        command line.  The default value is "fd:0,fd:1", which attaches the
-        main console to stdin and stdout.
-        It is safe to leave this unchanged.
+          This is the string describing the channel to which the main console
+          will be attached by default.  This value can be overridden from the
+          command line.  The default value is "fd:0,fd:1", which attaches the
+          main console to stdin and stdout.
+          It is safe to leave this unchanged.
 
 config CON_CHAN
 	string "Default console channel initialization"
 	default "xterm"
 	help
-        This is the string describing the channel to which all consoles
-        except the main console will be attached by default.  This value can
-        be overridden from the command line.  The default value is "xterm",
-        which brings them up in xterms.
-        It is safe to leave this unchanged, although you may wish to change
-        this if you expect the UML that you build to be run in environments
-        which don't have X or xterm available.
+          This is the string describing the channel to which all consoles
+          except the main console will be attached by default.  This value can
+          be overridden from the command line.  The default value is "xterm",
+          which brings them up in xterms.
+          It is safe to leave this unchanged, although you may wish to change
+          this if you expect the UML that you build to be run in environments
+          which don't have X or xterm available.
 
 config SSL_CHAN
 	string "Default serial line channel initialization"
 	default "pty"
 	help
-        This is the string describing the channel to which the serial lines
-        will be attached by default.  This value can be overridden from the
-        command line.  The default value is "pty", which attaches them to
-        traditional pseudo-terminals.
-        It is safe to leave this unchanged, although you may wish to change
-        this if you expect the UML that you build to be run in environments
-        which don't have a set of /dev/pty* devices.
+          This is the string describing the channel to which the serial lines
+          will be attached by default.  This value can be overridden from the
+          command line.  The default value is "pty", which attaches them to
+          traditional pseudo-terminals.
+          It is safe to leave this unchanged, although you may wish to change
+          this if you expect the UML that you build to be run in environments
+          which don't have a set of /dev/pty* devices.
 
 config UNIX98_PTYS
 	bool "Unix98 PTY support"
-	---help---
+	help
 	  A pseudo terminal (PTY) is a software device consisting of two
 	  halves: a master and a slave. The slave device behaves identical to
 	  a physical terminal; the master device is used by a process to
@@ -132,7 +130,7 @@ config UNIX98_PTYS
 config LEGACY_PTYS
 	bool "Legacy (BSD) PTY support"
 	default y
-	---help---
+	help
 	  A pseudo terminal (PTY) is a software device consisting of two
 	  halves: a master and a slave. The slave device behaves identical to
 	  a physical terminal; the master device is used by a process to
@@ -170,7 +168,7 @@ config LEGACY_PTY_COUNT
 	int "Maximum number of legacy PTY in use"
 	depends on LEGACY_PTYS
 	default "256"
-	---help---
+	help
 	  The maximum number of legacy PTYs that can be used at any one time.
 	  The default is 256, and should be more than enough.  Embedded
 	  systems may want to reduce this to save memory.
@@ -196,10 +194,10 @@ config UML_WATCHDOG
 config UML_SOUND
 	tristate "Sound support"
 	help
-        This option enables UML sound support.  If enabled, it will pull in
-        soundcore and the UML hostaudio relay, which acts as a intermediary
-        between the host's dsp and mixer devices and the UML sound system.
-        It is safe to say 'Y' here.
+          This option enables UML sound support.  If enabled, it will pull in
+          soundcore and the UML hostaudio relay, which acts as a intermediary
+          between the host's dsp and mixer devices and the UML sound system.
+          It is safe to say 'Y' here.
 
 config SOUND
 	tristate
@@ -217,22 +215,21 @@ config HW_RANDOM
 config UML_RANDOM
 	tristate "Hardware random number generator"
 	help
-	This option enables UML's "hardware" random number generator.  It
-	attaches itself to the host's /dev/random, supplying as much entropy
-	as the host has, rather than the small amount the UML gets from its
-	own drivers.  It registers itself as a standard hardware random number
-	generator, major 10, minor 183, and the canonical device name is
-	/dev/hwrng.
-	The way to make use of this is to install the rng-tools package
-	(check your distro, or download from
-	http://sourceforge.net/projects/gkernel/).  rngd periodically reads
-	/dev/hwrng and injects the entropy into /dev/random.
+	  This option enables UML's "hardware" random number generator.  It
+	  attaches itself to the host's /dev/random, supplying as much entropy
+	  as the host has, rather than the small amount the UML gets from its
+	  own drivers.  It registers itself as a standard hardware random number
+	  generator, major 10, minor 183, and the canonical device name is
+	  /dev/hwrng.
+	  The way to make use of this is to install the rng-tools package
+	  (check your distro, or download from
+	  http://sourceforge.net/projects/gkernel/).  rngd periodically reads
+	  /dev/hwrng and injects the entropy into /dev/random.
 
 config MMAPPER
 	tristate "iomem emulation driver"
 	help
-	This driver allows a host file to be used as emulated IO memory inside
-	UML.
+	  This driver allows a host file to be used as emulated IO memory inside
+	  UML.
 
 endmenu
-
diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug
index c86f5eb29fd..1f6462ffd3e 100644
--- a/arch/um/Kconfig.debug
+++ b/arch/um/Kconfig.debug
@@ -2,50 +2,31 @@ menu "Kernel hacking"
 
 source "lib/Kconfig.debug"
 
-config CMDLINE_ON_HOST
-	bool "Show command line arguments on the host in TT mode"
-	depends on MODE_TT
-	default !DEBUG_INFO
-	help
-	This controls whether arguments in guest processes should be shown on
-	the host's ps output.
-	Enabling this option hinders debugging on some recent GDB versions
-	(because GDB gets "confused" when we do an execvp()). So probably you
-	should disable it.
-
-config PT_PROXY
-	bool "Enable ptrace proxy"
-	depends on XTERM_CHAN && DEBUG_INFO && MODE_TT
-	help
-	This option enables a debugging interface which allows gdb to debug
-	the kernel without needing to actually attach to kernel threads.
-	If you want to do kernel debugging, say Y here; otherwise say N.
-
 config GPROF
 	bool "Enable gprof support"
-	depends on DEBUG_INFO && MODE_SKAS && !MODE_TT
+	depends on DEBUG_INFO
 	help
-        This allows profiling of a User-Mode Linux kernel with the gprof
-        utility.
+	  This allows profiling of a User-Mode Linux kernel with the gprof
+	  utility.
 
-        See <http://user-mode-linux.sourceforge.net/gprof.html> for more
-        details.
+	  See <http://user-mode-linux.sourceforge.net/gprof.html> for more
+	  details.
 
-        If you're involved in UML kernel development and want to use gprof,
-        say Y.  If you're unsure, say N.
+	  If you're involved in UML kernel development and want to use gprof,
+	  say Y.  If you're unsure, say N.
 
 config GCOV
 	bool "Enable gcov support"
-	depends on DEBUG_INFO && MODE_SKAS
+	depends on DEBUG_INFO
 	help
-        This option allows developers to retrieve coverage data from a UML
-        session.
+	  This option allows developers to retrieve coverage data from a UML
+	  session.
 
-        See <http://user-mode-linux.sourceforge.net/gprof.html> for more
-        details.
+	  See <http://user-mode-linux.sourceforge.net/gprof.html> for more
+	  details.
 
-        If you're involved in UML kernel development and want to use gcov,
-        say Y.  If you're unsure, say N.
+	  If you're involved in UML kernel development and want to use gcov,
+	  say Y.  If you're unsure, say N.
 
 config DEBUG_STACK_USAGE
 	bool "Stack utilization instrumentation"
diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386
index d6cffb27fff..9876d80d85d 100644
--- a/arch/um/Kconfig.i386
+++ b/arch/um/Kconfig.i386
@@ -65,20 +65,6 @@ config 3_LEVEL_PGTABLES
 	However, this it experimental on 32-bit architectures, so if unsure say
 	N (on x86-64 it's automatically enabled, instead, as it's safe there).
 
-config STUB_CODE
-	hex
-	default 0xbfffe000 if !HOST_VMSPLIT_2G
-	default 0x7fffe000 if HOST_VMSPLIT_2G
-
-config STUB_DATA
-	hex
-	default 0xbffff000 if !HOST_VMSPLIT_2G
-	default 0x7ffff000 if HOST_VMSPLIT_2G
-
-config STUB_START
-	hex
-	default STUB_CODE
-
 config ARCH_HAS_SC_SIGNALS
 	bool
 	default y
diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
index 14a04ebdeae..66e50026ade 100644
--- a/arch/um/Kconfig.net
+++ b/arch/um/Kconfig.net
@@ -108,6 +108,28 @@ config UML_NET_DAEMON
         more than one without conflict.  If you don't need UML networking,
         say N.
 
+config UML_NET_VDE
+	bool "VDE transport"
+	depends on UML_NET
+	help
+	This User-Mode Linux network transport allows one or more running
+	UMLs on a single host to communicate with each other and also
+	with the rest of the world using Virtual Distributed Ethernet,
+	an improved fork of uml_switch.
+
+	You must have libvdeplug installed in order to build the vde
+	transport into UML.
+
+	To use this form of networking, you will need to run vde_switch
+	on the host.
+
+	For more information, see <http://wiki.virtualsquare.org/>
+	That site has a good overview of what VDE is and also examples
+	of the UML command line to use to enable VDE networking.
+
+	If you need UML networking with VDE,
+	say Y.
+
 config UML_NET_MCAST
 	bool "Multicast transport"
 	depends on UML_NET
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index f60e9e50642..d632e9a89cc 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -17,24 +17,12 @@ config SEMAPHORE_SLEEPERS
 
 config TOP_ADDR
  	hex
-	default 0x80000000
+	default 0x7fc0000000
 
 config 3_LEVEL_PGTABLES
        bool
        default y
 
-config STUB_CODE
-	hex
-	default 0x7fbfffe000
-
-config STUB_DATA
-	hex
-	default 0x7fbffff000
-
-config STUB_START
-	hex
-	default STUB_CODE
-
 config ARCH_HAS_SC_SIGNALS
 	bool
 	default n
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 0666729eb97..ab22fdeedf2 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -2,7 +2,7 @@
 # This file is included by the global makefile so that you can add your own
 # architecture-specific flags and dependencies.
 #
-# Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 # Licensed under the GPL
 #
 
@@ -31,18 +31,9 @@ SYMLINK_HEADERS := $(foreach header,$(SYMLINK_HEADERS),include/asm-um/$(header))
 ARCH_SYMLINKS = include/asm-um/arch $(ARCH_DIR)/include/sysdep $(ARCH_DIR)/os \
 	$(SYMLINK_HEADERS) $(ARCH_DIR)/include/uml-config.h
 
-um-modes-$(CONFIG_MODE_TT) += tt
-um-modes-$(CONFIG_MODE_SKAS) += skas
+MODE_INCLUDE	+= -I$(srctree)/$(ARCH_DIR)/include/skas
 
-MODE_INCLUDE	+= $(foreach mode,$(um-modes-y),\
-		   -I$(srctree)/$(ARCH_DIR)/include/$(mode))
-
-MAKEFILES-INCL	+= $(foreach mode,$(um-modes-y),\
-		   $(srctree)/$(ARCH_DIR)/Makefile-$(mode))
-
-ifneq ($(MAKEFILES-INCL),)
-  include $(MAKEFILES-INCL)
-endif
+include $(srctree)/$(ARCH_DIR)/Makefile-skas
 
 ARCH_INCLUDE	:= -I$(ARCH_DIR)/include
 ifneq ($(KBUILD_SRC),)
@@ -60,7 +51,8 @@ SYS_DIR		:= $(ARCH_DIR)/include/sysdep-$(SUBARCH)
 
 CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\"	\
 	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap	\
-	-Din6addr_loopback=kernel_in6addr_loopback
+	-Din6addr_loopback=kernel_in6addr_loopback \
+	-Din6addr_any=kernel_in6addr_any
 
 AFLAGS += $(ARCH_INCLUDE)
 
@@ -88,9 +80,8 @@ CFLAGS += $(call cc-option,-fno-unit-at-a-time,)
 # included; the values here are meaningless
 
 CONFIG_NEST_LEVEL ?= 0
-CONFIG_KERNEL_HALF_GIGS ?= 0
 
-SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000)
+SIZE = ($(CONFIG_NEST_LEVEL) * 0x20000000)
 
 PHONY += linux
 
@@ -123,7 +114,6 @@ CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,)
 	$(call cc-option, -fno-stack-protector,) \
 	$(call cc-option, -fno-stack-protector-all,)
 
-CPP_MODE-$(CONFIG_MODE_TT) := -DMODE_TT
 CONFIG_KERNEL_STACK_ORDER ?= 2
 STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
 
@@ -131,13 +121,10 @@ ifndef START
   START = $(shell echo $$[ $(TOP_ADDR) - $(SIZE) ] )
 endif
 
-CPPFLAGS_vmlinux.lds = -U$(SUBARCH) \
-	-DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
-	-DELF_FORMAT="$(ELF_FORMAT)" $(CPP_MODE-y) \
-	-DKERNEL_STACK_SIZE=$(STACK_SIZE) \
-	-DUNMAP_PATH=arch/um/sys-$(SUBARCH)/unmap.o
+CPPFLAGS_vmlinux.lds = -U$(SUBARCH) -DSTART=$(START) -DELF_ARCH=$(ELF_ARCH) \
+	-DELF_FORMAT="$(ELF_FORMAT)" -DKERNEL_STACK_SIZE=$(STACK_SIZE)
 
-#The wrappers will select whether using "malloc" or the kernel allocator.
+# The wrappers will select whether using "malloc" or the kernel allocator.
 LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
 
 CFLAGS_vmlinux := $(LINK-y) $(LINK_WRAPS)
@@ -150,8 +137,8 @@ define cmd_vmlinux__
 	FORCE ,$^) ; rm -f linux
 endef
 
-#When cleaning we don't include .config, so we don't include
-#TT or skas makefiles and don't clean skas_ptregs.h.
+# When cleaning we don't include .config, so we don't include
+# TT or skas makefiles and don't clean skas_ptregs.h.
 CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/include/uml-config.h \
 	$(ARCH_DIR)/include/user_constants.h \
 	$(ARCH_DIR)/include/kern_constants.h $(ARCH_DIR)/Kconfig.arch
diff --git a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
index 60107ed4905..ae61e3c271e 100644
--- a/arch/um/Makefile-i386
+++ b/arch/um/Makefile-i386
@@ -2,11 +2,7 @@ core-y += arch/um/sys-i386/ arch/x86/crypto/
 
 TOP_ADDR := $(CONFIG_TOP_ADDR)
 
-ifeq ($(CONFIG_MODE_SKAS),y)
-  ifneq ($(CONFIG_MODE_TT),y)
-     START := 0x8048000
-  endif
-endif
+START := 0x8048000
 
 LDFLAGS			+= -m elf_i386
 ELF_ARCH		:= $(SUBARCH)
diff --git a/arch/um/defconfig b/arch/um/defconfig
index 1e0f677c2f4..f609edede06 100644
--- a/arch/um/defconfig
+++ b/arch/um/defconfig
@@ -12,9 +12,7 @@ CONFIG_IRQ_RELEASE_METHOD=y
 #
 # UML-specific options
 #
-# CONFIG_MODE_TT is not set
 # CONFIG_STATIC_LINK is not set
-CONFIG_MODE_SKAS=y
 
 #
 # Host processor type and features
@@ -61,9 +59,6 @@ CONFIG_SEMAPHORE_SLEEPERS=y
 # CONFIG_HOST_2G_2G is not set
 CONFIG_TOP_ADDR=0xc0000000
 # CONFIG_3_LEVEL_PGTABLES is not set
-CONFIG_STUB_CODE=0xbfffe000
-CONFIG_STUB_DATA=0xbffff000
-CONFIG_STUB_START=0xbfffe000
 CONFIG_ARCH_HAS_SC_SIGNALS=y
 CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA=y
 CONFIG_GENERIC_HWEIGHT=y
@@ -75,6 +70,9 @@ CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 # CONFIG_SPARSEMEM_STATIC is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LD_SCRIPT_DYN=y
 CONFIG_NET=y
 CONFIG_BINFMT_ELF=y
@@ -82,11 +80,10 @@ CONFIG_BINFMT_MISC=m
 # CONFIG_HOSTFS is not set
 # CONFIG_HPPFS is not set
 CONFIG_MCONSOLE=y
-# CONFIG_MAGIC_SYSRQ is not set
+CONFIG_MAGIC_SYSRQ=y
 CONFIG_NEST_LEVEL=0
 # CONFIG_HIGHMEM is not set
 CONFIG_KERNEL_STACK_ORDER=0
-CONFIG_UML_REAL_TIME_CLOCK=y
 
 #
 # Code maturity level options
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index de17d4c6e02..634968150bd 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -19,10 +19,16 @@ harddog-objs := harddog_kern.o harddog_user.o
 
 LDFLAGS_pcap.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libpcap.a)
 
-targets := pcap_kern.o pcap_user.o
+LDFLAGS_vde.o := -r $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
+
+targets := pcap_kern.o pcap_user.o vde_kern.o vde_user.o
 
 $(obj)/pcap.o: $(obj)/pcap_kern.o $(obj)/pcap_user.o
 	$(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_pcap.o)
+
+$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
+	$(LD) -r -dp -o $@ $^ $(LDFLAGS) $(LDFLAGS_vde.o)
+
 #XXX: The call below does not work because the flags are added before the
 # object name, so nothing from the library gets linked.
 #$(call if_changed,ld)
@@ -37,6 +43,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
 obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
 obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
 obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 
+obj-$(CONFIG_UML_NET_VDE) += vde.o
 obj-$(CONFIG_UML_NET_MCAST) += mcast.o 
 obj-$(CONFIG_UML_NET_PCAP) += pcap.o
 obj-$(CONFIG_UML_NET) += net.o 
@@ -54,6 +61,6 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
 
 # pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o
 
 include arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 629b00e3b0b..db3082b4da4 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -1,28 +1,19 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/tty.h>
-#include <linux/string.h>
 #include <linux/tty_flip.h>
-#include <asm/irq.h>
 #include "chan_kern.h"
-#include "kern.h"
-#include "irq_user.h"
-#include "sigio.h"
-#include "line.h"
 #include "os.h"
 
 #ifdef CONFIG_NOCONFIG_CHAN
 static void *not_configged_init(char *str, int device,
 				const struct chan_opts *opts)
 {
-	printk("Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	return NULL;
 }
@@ -30,34 +21,34 @@ static void *not_configged_init(char *str, int device,
 static int not_configged_open(int input, int output, int primary, void *data,
 			      char **dev_out)
 {
-	printk("Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	return -ENODEV;
 }
 
 static void not_configged_close(int fd, void *data)
 {
-	printk("Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 }
 
 static int not_configged_read(int fd, char *c_out, void *data)
 {
-	printk("Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	return -EIO;
 }
 
 static int not_configged_write(int fd, const char *buf, int len, void *data)
 {
-	printk("Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	return -EIO;
 }
 
 static int not_configged_console_write(int fd, const char *buf, int len)
 {
-	printk("Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	return -EIO;
 }
@@ -65,14 +56,14 @@ static int not_configged_console_write(int fd, const char *buf, int len)
 static int not_configged_window_size(int fd, void *data, unsigned short *rows,
 				     unsigned short *cols)
 {
-	printk("Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 	return -ENODEV;
 }
 
 static void not_configged_free(void *data)
 {
-	printk("Using a channel type which is configured out of "
+	printk(KERN_ERR "Using a channel type which is configured out of "
 	       "UML\n");
 }
 
@@ -89,64 +80,17 @@ static const struct chan_ops not_configged_ops = {
 };
 #endif /* CONFIG_NOCONFIG_CHAN */
 
-void generic_close(int fd, void *unused)
-{
-	os_close_file(fd);
-}
-
-int generic_read(int fd, char *c_out, void *unused)
-{
-	int n;
-
-	n = os_read_file(fd, c_out, sizeof(*c_out));
-
-	if(n == -EAGAIN)
-		return 0;
-	else if(n == 0)
-		return -EIO;
-	return n;
-}
-
-/* XXX Trivial wrapper around os_write_file */
-
-int generic_write(int fd, const char *buf, int n, void *unused)
-{
-	return os_write_file(fd, buf, n);
-}
-
-int generic_window_size(int fd, void *unused, unsigned short *rows_out,
-			unsigned short *cols_out)
-{
-	int rows, cols;
-	int ret;
-
-	ret = os_window_size(fd, &rows, &cols);
-	if(ret < 0)
-		return ret;
-
-	ret = ((*rows_out != rows) || (*cols_out != cols));
-
-	*rows_out = rows;
-	*cols_out = cols;
-
-	return ret;
-}
-
-void generic_free(void *data)
-{
-	kfree(data);
-}
-
 static void tty_receive_char(struct tty_struct *tty, char ch)
 {
-	if(tty == NULL) return;
+	if (tty == NULL)
+		return;
 
-	if(I_IXON(tty) && !I_IXOFF(tty) && !tty->raw) {
-		if(ch == STOP_CHAR(tty)){
+	if (I_IXON(tty) && !I_IXOFF(tty) && !tty->raw) {
+		if (ch == STOP_CHAR(tty)) {
 			stop_tty(tty);
 			return;
 		}
-		else if(ch == START_CHAR(tty)){
+		else if (ch == START_CHAR(tty)) {
 			start_tty(tty);
 			return;
 		}
@@ -159,14 +103,14 @@ static int open_one_chan(struct chan *chan)
 {
 	int fd, err;
 
-	if(chan->opened)
+	if (chan->opened)
 		return 0;
 
-	if(chan->ops->open == NULL)
+	if (chan->ops->open == NULL)
 		fd = 0;
 	else fd = (*chan->ops->open)(chan->input, chan->output, chan->primary,
 				     chan->data, &chan->dev);
-	if(fd < 0)
+	if (fd < 0)
 		return fd;
 
 	err = os_set_fd_block(fd, 0);
@@ -187,10 +131,10 @@ int open_chan(struct list_head *chans)
 	struct chan *chan;
 	int ret, err = 0;
 
-	list_for_each(ele, chans){
+	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
 		ret = open_one_chan(chan);
-		if(chan->primary)
+		if (chan->primary)
 			err = ret;
 	}
 	return err;
@@ -201,9 +145,9 @@ void chan_enable_winch(struct list_head *chans, struct tty_struct *tty)
 	struct list_head *ele;
 	struct chan *chan;
 
-	list_for_each(ele, chans){
+	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
-		if(chan->primary && chan->output && chan->ops->winch){
+		if (chan->primary && chan->output && chan->ops->winch) {
 			register_winch(chan->fd, tty);
 			return;
 		}
@@ -216,7 +160,7 @@ int enable_chan(struct line *line)
 	struct chan *chan;
 	int err;
 
-	list_for_each(ele, &line->chan_list){
+	list_for_each(ele, &line->chan_list) {
 		chan = list_entry(ele, struct chan, list);
 		err = open_one_chan(chan);
 		if (err) {
@@ -226,7 +170,7 @@ int enable_chan(struct line *line)
 			continue;
 		}
 
-		if(chan->enabled)
+		if (chan->enabled)
 			continue;
 		err = line_setup_irq(chan->fd, chan->input, chan->output, line,
 				     chan);
@@ -263,12 +207,12 @@ void free_irqs(void)
 	list_splice_init(&irqs_to_free, &list);
 	spin_unlock_irqrestore(&irqs_to_free_lock, flags);
 
-	list_for_each(ele, &list){
+	list_for_each(ele, &list) {
 		chan = list_entry(ele, struct chan, free_list);
 
-		if(chan->input)
+		if (chan->input)
 			free_irq(chan->line->driver->read_irq, chan);
-		if(chan->output)
+		if (chan->output)
 			free_irq(chan->line->driver->write_irq, chan);
 		chan->enabled = 0;
 	}
@@ -278,22 +222,22 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
 {
 	unsigned long flags;
 
-	if(!chan->opened)
+	if (!chan->opened)
 		return;
 
-	if(delay_free_irq){
+	if (delay_free_irq) {
 		spin_lock_irqsave(&irqs_to_free_lock, flags);
 		list_add(&chan->free_list, &irqs_to_free);
 		spin_unlock_irqrestore(&irqs_to_free_lock, flags);
 	}
 	else {
-		if(chan->input)
+		if (chan->input)
 			free_irq(chan->line->driver->read_irq, chan);
-		if(chan->output)
+		if (chan->output)
 			free_irq(chan->line->driver->write_irq, chan);
 		chan->enabled = 0;
 	}
-	if(chan->ops->close != NULL)
+	if (chan->ops->close != NULL)
 		(*chan->ops->close)(chan->fd, chan->data);
 
 	chan->opened = 0;
@@ -322,7 +266,7 @@ void deactivate_chan(struct list_head *chans, int irq)
 	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
 
-		if(chan->enabled && chan->input)
+		if (chan->enabled && chan->input)
 			deactivate_fd(chan->fd, irq);
 	}
 }
@@ -335,7 +279,7 @@ void reactivate_chan(struct list_head *chans, int irq)
 	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
 
-		if(chan->enabled && chan->input)
+		if (chan->enabled && chan->input)
 			reactivate_fd(chan->fd, irq);
 	}
 }
@@ -347,10 +291,14 @@ int write_chan(struct list_head *chans, const char *buf, int len,
 	struct chan *chan = NULL;
 	int n, ret = 0;
 
+	if (len == 0)
+		return 0;
+
 	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
 		if (!chan->output || (chan->ops->write == NULL))
 			continue;
+
 		n = chan->ops->write(chan->fd, buf, len, chan->data);
 		if (chan->primary) {
 			ret = n;
@@ -367,12 +315,14 @@ int console_write_chan(struct list_head *chans, const char *buf, int len)
 	struct chan *chan;
 	int n, ret = 0;
 
-	list_for_each(ele, chans){
+	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
-		if(!chan->output || (chan->ops->console_write == NULL))
+		if (!chan->output || (chan->ops->console_write == NULL))
 			continue;
+
 		n = chan->ops->console_write(chan->fd, buf, len);
-		if(chan->primary) ret = n;
+		if (chan->primary)
+			ret = n;
 	}
 	return ret;
 }
@@ -382,10 +332,11 @@ int console_open_chan(struct line *line, struct console *co)
 	int err;
 
 	err = open_chan(&line->chan_list);
-	if(err)
+	if (err)
 		return err;
 
-	printk("Console initialized on /dev/%s%d\n", co->name, co->index);
+	printk(KERN_INFO "Console initialized on /dev/%s%d\n", co->name,
+	       co->index);
 	return 0;
 }
 
@@ -395,10 +346,10 @@ int chan_window_size(struct list_head *chans, unsigned short *rows_out,
 	struct list_head *ele;
 	struct chan *chan;
 
-	list_for_each(ele, chans){
+	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
-		if(chan->primary){
-			if(chan->ops->window_size == NULL)
+		if (chan->primary) {
+			if (chan->ops->window_size == NULL)
 				return 0;
 			return chan->ops->window_size(chan->fd, chan->data,
 						      rows_out, cols_out);
@@ -413,10 +364,11 @@ static void free_one_chan(struct chan *chan, int delay_free_irq)
 
 	close_one_chan(chan, delay_free_irq);
 
-	if(chan->ops->free != NULL)
+	if (chan->ops->free != NULL)
 		(*chan->ops->free)(chan->data);
 
-	if(chan->primary && chan->output) ignore_sigio_fd(chan->fd);
+	if (chan->primary && chan->output)
+		ignore_sigio_fd(chan->fd);
 	kfree(chan);
 }
 
@@ -425,7 +377,7 @@ static void free_chan(struct list_head *chans, int delay_free_irq)
 	struct list_head *ele, *next;
 	struct chan *chan;
 
-	list_for_each_safe(ele, next, chans){
+	list_for_each_safe(ele, next, chans) {
 		chan = list_entry(ele, struct chan, list);
 		free_one_chan(chan, delay_free_irq);
 	}
@@ -436,14 +388,14 @@ static int one_chan_config_string(struct chan *chan, char *str, int size,
 {
 	int n = 0;
 
-	if(chan == NULL){
+	if (chan == NULL) {
 		CONFIG_CHUNK(str, size, n, "none", 1);
 		return n;
 	}
 
 	CONFIG_CHUNK(str, size, n, chan->ops->type, 0);
 
-	if(chan->dev == NULL){
+	if (chan->dev == NULL) {
 		CONFIG_CHUNK(str, size, n, "", 1);
 		return n;
 	}
@@ -463,7 +415,7 @@ static int chan_pair_config_string(struct chan *in, struct chan *out,
 	str += n;
 	size -= n;
 
-	if(in == out){
+	if (in == out) {
 		CONFIG_CHUNK(str, size, n, "", 1);
 		return n;
 	}
@@ -483,13 +435,13 @@ int chan_config_string(struct list_head *chans, char *str, int size,
 	struct list_head *ele;
 	struct chan *chan, *in = NULL, *out = NULL;
 
-	list_for_each(ele, chans){
+	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
-		if(!chan->primary)
+		if (!chan->primary)
 			continue;
-		if(chan->input)
+		if (chan->input)
 			in = chan;
-		if(chan->output)
+		if (chan->output)
 			out = chan;
 	}
 
@@ -548,27 +500,27 @@ static struct chan *parse_chan(struct line *line, char *str, int device,
 
 	ops = NULL;
 	data = NULL;
-	for(i = 0; i < ARRAY_SIZE(chan_table); i++){
+	for(i = 0; i < ARRAY_SIZE(chan_table); i++) {
 		entry = &chan_table[i];
-		if(!strncmp(str, entry->key, strlen(entry->key))){
+		if (!strncmp(str, entry->key, strlen(entry->key))) {
 			ops = entry->ops;
 			str += strlen(entry->key);
 			break;
 		}
 	}
-	if(ops == NULL){
+	if (ops == NULL) {
 		*error_out = "No match for configured backends";
 		return NULL;
 	}
 
 	data = (*ops->init)(str, device, opts);
-	if(data == NULL){
+	if (data == NULL) {
 		*error_out = "Configuration failed";
 		return NULL;
 	}
 
 	chan = kmalloc(sizeof(*chan), GFP_ATOMIC);
-	if(chan == NULL){
+	if (chan == NULL) {
 		*error_out = "Memory allocation failed";
 		return NULL;
 	}
@@ -594,26 +546,26 @@ int parse_chan_pair(char *str, struct line *line, int device,
 	struct chan *new, *chan;
 	char *in, *out;
 
-	if(!list_empty(chans)){
+	if (!list_empty(chans)) {
 		chan = list_entry(chans->next, struct chan, list);
 		free_chan(chans, 0);
 		INIT_LIST_HEAD(chans);
 	}
 
 	out = strchr(str, ',');
-	if(out != NULL){
+	if (out != NULL) {
 		in = str;
 		*out = '\0';
 		out++;
 		new = parse_chan(line, in, device, opts, error_out);
-		if(new == NULL)
+		if (new == NULL)
 			return -1;
 
 		new->input = 1;
 		list_add(&new->list, chans);
 
 		new = parse_chan(line, out, device, opts, error_out);
-		if(new == NULL)
+		if (new == NULL)
 			return -1;
 
 		list_add(&new->list, chans);
@@ -621,7 +573,7 @@ int parse_chan_pair(char *str, struct line *line, int device,
 	}
 	else {
 		new = parse_chan(line, str, device, opts, error_out);
-		if(new == NULL)
+		if (new == NULL)
 			return -1;
 
 		list_add(&new->list, chans);
@@ -636,9 +588,9 @@ int chan_out_fd(struct list_head *chans)
 	struct list_head *ele;
 	struct chan *chan;
 
-	list_for_each(ele, chans){
+	list_for_each(ele, chans) {
 		chan = list_entry(ele, struct chan, list);
-		if(chan->primary && chan->output)
+		if (chan->primary && chan->output)
 			return chan->fd;
 	}
 	return -1;
@@ -652,23 +604,25 @@ void chan_interrupt(struct list_head *chans, struct delayed_work *task,
 	int err;
 	char c;
 
-	list_for_each_safe(ele, next, chans){
+	list_for_each_safe(ele, next, chans) {
 		chan = list_entry(ele, struct chan, list);
-		if(!chan->input || (chan->ops->read == NULL)) continue;
+		if (!chan->input || (chan->ops->read == NULL))
+			continue;
 		do {
 			if (tty && !tty_buffer_request_room(tty, 1)) {
 				schedule_delayed_work(task, 1);
 				goto out;
 			}
 			err = chan->ops->read(chan->fd, &c, chan->data);
-			if(err > 0)
+			if (err > 0)
 				tty_receive_char(tty, c);
-		} while(err > 0);
+		} while (err > 0);
 
-		if(err == 0) reactivate_fd(chan->fd, irq);
-		if(err == -EIO){
-			if(chan->primary){
-				if(tty != NULL)
+		if (err == 0)
+			reactivate_fd(chan->fd, irq);
+		if (err == -EIO) {
+			if (chan->primary) {
+				if (tty != NULL)
 					tty_hangup(tty);
 				close_chan(chans, 1);
 				return;
@@ -677,5 +631,6 @@ void chan_interrupt(struct list_head *chans, struct delayed_work *task,
 		}
 	}
  out:
-	if(tty) tty_flip_buffer_push(tty);
+	if (tty)
+		tty_flip_buffer_push(tty);
 }
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index 4d438f36ea2..b88e93b3a39 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -1,51 +1,107 @@
-/* 
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
-#include <unistd.h>
 #include <stdlib.h>
+#include <unistd.h>
 #include <errno.h>
-#include <termios.h>
-#include <string.h>
-#include <signal.h>
 #include <sched.h>
-#include <sys/stat.h>
+#include <signal.h>
+#include <termios.h>
 #include <sys/ioctl.h>
-#include <sys/socket.h>
-#include "kern_util.h"
 #include "chan_user.h"
-#include "user.h"
 #include "os.h"
-#include "choose-mode.h"
-#include "mode.h"
+#include "um_malloc.h"
+#include "user.h"
+
+void generic_close(int fd, void *unused)
+{
+	close(fd);
+}
+
+int generic_read(int fd, char *c_out, void *unused)
+{
+	int n;
+
+	n = read(fd, c_out, sizeof(*c_out));
+	if (n > 0)
+		return n;
+	else if (errno == EAGAIN)
+		return 0;
+	else if (n == 0)
+		return -EIO;
+	return -errno;
+}
+
+/* XXX Trivial wrapper around write */
+
+int generic_write(int fd, const char *buf, int n, void *unused)
+{
+	int err;
+
+	err = write(fd, buf, n);
+	if (err > 0)
+		return err;
+	else if (errno == EAGAIN)
+		return 0;
+	else if (err == 0)
+		return -EIO;
+	return -errno;
+}
+
+int generic_window_size(int fd, void *unused, unsigned short *rows_out,
+			unsigned short *cols_out)
+{
+	struct winsize size;
+	int ret;
+
+	if (ioctl(fd, TIOCGWINSZ, &size) < 0)
+		return -errno;
+
+	ret = ((*rows_out != size.ws_row) || (*cols_out != size.ws_col));
+
+	*rows_out = size.ws_row;
+	*cols_out = size.ws_col;
+
+	return ret;
+}
+
+void generic_free(void *data)
+{
+	kfree(data);
+}
 
 int generic_console_write(int fd, const char *buf, int n)
 {
 	struct termios save, new;
 	int err;
 
-	if(isatty(fd)){
+	if (isatty(fd)) {
 		CATCH_EINTR(err = tcgetattr(fd, &save));
 		if (err)
 			goto error;
 		new = save;
-		/* The terminal becomes a bit less raw, to handle \n also as
+		/*
+		 * The terminal becomes a bit less raw, to handle \n also as
 		 * "Carriage Return", not only as "New Line". Otherwise, the new
-		 * line won't start at the first column.*/
+		 * line won't start at the first column.
+		 */
 		new.c_oflag |= OPOST;
 		CATCH_EINTR(err = tcsetattr(fd, TCSAFLUSH, &new));
 		if (err)
 			goto error;
 	}
 	err = generic_write(fd, buf, n, NULL);
-	/* Restore raw mode, in any case; we *must* ignore any error apart
-	 * EINTR, except for debug.*/
-	if(isatty(fd))
+	/*
+	 * Restore raw mode, in any case; we *must* ignore any error apart
+	 * EINTR, except for debug.
+	 */
+	if (isatty(fd))
 		CATCH_EINTR(tcsetattr(fd, TCSAFLUSH, &save));
-	return(err);
+	return err;
 error:
-	return(-errno);
+	return -errno;
 }
 
 /*
@@ -82,62 +138,73 @@ static int winch_thread(void *arg)
 	struct winch_data *data = arg;
 	sigset_t sigs;
 	int pty_fd, pipe_fd;
-	int count, err;
+	int count;
 	char c = 1;
 
 	pty_fd = data->pty_fd;
 	pipe_fd = data->pipe_fd;
-	count = os_write_file(pipe_fd, &c, sizeof(c));
-	if(count != sizeof(c))
-		printk("winch_thread : failed to write synchronization "
-		       "byte, err = %d\n", -count);
+	count = write(pipe_fd, &c, sizeof(c));
+	if (count != sizeof(c))
+		printk(UM_KERN_ERR "winch_thread : failed to write "
+		       "synchronization byte, err = %d\n", -count);
 
-	/* We are not using SIG_IGN on purpose, so don't fix it as I thought to
+	/*
+	 * We are not using SIG_IGN on purpose, so don't fix it as I thought to
 	 * do! If using SIG_IGN, the sigsuspend() call below would not stop on
-	 * SIGWINCH. */
+	 * SIGWINCH.
+	 */
 
 	signal(SIGWINCH, winch_handler);
 	sigfillset(&sigs);
 	/* Block all signals possible. */
-	if(sigprocmask(SIG_SETMASK, &sigs, NULL) < 0){
-		printk("winch_thread : sigprocmask failed, errno = %d\n", 
-		       errno);
+	if (sigprocmask(SIG_SETMASK, &sigs, NULL) < 0) {
+		printk(UM_KERN_ERR "winch_thread : sigprocmask failed, "
+		       "errno = %d\n", errno);
 		exit(1);
 	}
 	/* In sigsuspend(), block anything else than SIGWINCH. */
 	sigdelset(&sigs, SIGWINCH);
 
-	if(setsid() < 0){
-		printk("winch_thread : setsid failed, errno = %d\n", errno);
+	if (setsid() < 0) {
+		printk(UM_KERN_ERR "winch_thread : setsid failed, errno = %d\n",
+		       errno);
+		exit(1);
+	}
+
+	if (ioctl(pty_fd, TIOCSCTTY, 0) < 0) {
+		printk(UM_KERN_ERR "winch_thread : TIOCSCTTY failed on "
+		       "fd %d err = %d\n", pty_fd, errno);
 		exit(1);
 	}
 
-	err = os_new_tty_pgrp(pty_fd, os_getpid());
-	if(err < 0){
-		printk("winch_thread : new_tty_pgrp failed on fd %d, "
-		       "err = %d\n", pty_fd, -err);
+	if (tcsetpgrp(pty_fd, os_getpid()) < 0) {
+		printk(UM_KERN_ERR "winch_thread : tcsetpgrp failed on "
+		       "fd %d err = %d\n", pty_fd, errno);
 		exit(1);
 	}
 
-	/* These are synchronization calls between various UML threads on the
+	/*
+	 * These are synchronization calls between various UML threads on the
 	 * host - since they are not different kernel threads, we cannot use
 	 * kernel semaphores. We don't use SysV semaphores because they are
-	 * persistent. */
-	count = os_read_file(pipe_fd, &c, sizeof(c));
-	if(count != sizeof(c))
-		printk("winch_thread : failed to read synchronization byte, "
-		       "err = %d\n", -count);
-
-	while(1){
-		/* This will be interrupted by SIGWINCH only, since
+	 * persistent.
+	 */
+	count = read(pipe_fd, &c, sizeof(c));
+	if (count != sizeof(c))
+		printk(UM_KERN_ERR "winch_thread : failed to read "
+		       "synchronization byte, err = %d\n", errno);
+
+	while(1) {
+		/*
+		 * This will be interrupted by SIGWINCH only, since
 		 * other signals are blocked.
 		 */
 		sigsuspend(&sigs);
 
-		count = os_write_file(pipe_fd, &c, sizeof(c));
-		if(count != sizeof(c))
-			printk("winch_thread : write failed, err = %d\n",
-			       -count);
+		count = write(pipe_fd, &c, sizeof(c));
+		if (count != sizeof(c))
+			printk(UM_KERN_ERR "winch_thread : write failed, "
+			       "err = %d\n", errno);
 	}
 }
 
@@ -149,44 +216,49 @@ static int winch_tramp(int fd, struct tty_struct *tty, int *fd_out,
 	char c;
 
 	err = os_pipe(fds, 1, 1);
-	if(err < 0){
-		printk("winch_tramp : os_pipe failed, err = %d\n", -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "winch_tramp : os_pipe failed, err = %d\n",
+		       -err);
 		goto out;
 	}
 
 	data = ((struct winch_data) { .pty_fd 		= fd,
 				      .pipe_fd 		= fds[1] } );
-	/* CLONE_FILES so this thread doesn't hold open files which are open
+	/*
+	 * CLONE_FILES so this thread doesn't hold open files which are open
 	 * now, but later closed in a different thread.  This is a
 	 * problem with /dev/net/tun, which if held open by this
 	 * thread, prevents the TUN/TAP device from being reused.
 	 */
 	err = run_helper_thread(winch_thread, &data, CLONE_FILES, stack_out);
-	if(err < 0){
-		printk("fork of winch_thread failed - errno = %d\n", -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "fork of winch_thread failed - errno = %d\n",
+		       -err);
 		goto out_close;
 	}
 
 	*fd_out = fds[0];
-	n = os_read_file(fds[0], &c, sizeof(c));
-	if(n != sizeof(c)){
-		printk("winch_tramp : failed to read synchronization byte\n");
-		printk("read failed, err = %d\n", -n);
-		printk("fd %d will not support SIGWINCH\n", fd);
-                err = -EINVAL;
+	n = read(fds[0], &c, sizeof(c));
+	if (n != sizeof(c)) {
+		printk(UM_KERN_ERR "winch_tramp : failed to read "
+		       "synchronization byte\n");
+		printk(UM_KERN_ERR "read failed, err = %d\n", errno);
+		printk(UM_KERN_ERR "fd %d will not support SIGWINCH\n", fd);
+		err = -EINVAL;
 		goto out_close;
 	}
 
 	if (os_set_fd_block(*fd_out, 0)) {
-		printk("winch_tramp: failed to set thread_fd non-blocking.\n");
+		printk(UM_KERN_ERR "winch_tramp: failed to set thread_fd "
+		       "non-blocking.\n");
 		goto out_close;
 	}
 
 	return err;
 
  out_close:
-	os_close_file(fds[1]);
-	os_close_file(fds[0]);
+	close(fds[1]);
+	close(fds[0]);
  out:
 	return err;
 }
@@ -197,21 +269,20 @@ void register_winch(int fd, struct tty_struct *tty)
 	int pid, thread, count, thread_fd = -1;
 	char c = 1;
 
-	if(!isatty(fd))
+	if (!isatty(fd))
 		return;
 
 	pid = tcgetpgrp(fd);
-	if (!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, tty) &&
-	    (pid == -1)) {
+	if (!is_skas_winch(pid, fd, tty) && (pid == -1)) {
 		thread = winch_tramp(fd, tty, &thread_fd, &stack);
 		if (thread < 0)
 			return;
 
 		register_winch_irq(thread_fd, fd, thread, tty, stack);
 
-		count = os_write_file(thread_fd, &c, sizeof(c));
-		if(count != sizeof(c))
-			printk("register_winch : failed to write "
-			       "synchronization byte, err = %d\n", -count);
+		count = write(thread_fd, &c, sizeof(c));
+		if (count != sizeof(c))
+			printk(UM_KERN_ERR "register_winch : failed to write "
+			       "synchronization byte, err = %d\n", errno);
 	}
 }
diff --git a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
index 0ec4052db9c..93f227a25ba 100644
--- a/arch/um/drivers/cow_user.c
+++ b/arch/um/drivers/cow_user.c
@@ -1,17 +1,18 @@
-#include <stddef.h>
-#include <string.h>
-#include <errno.h>
-/* _XOPEN_SOURCE is needed for pread, but we define _GNU_SOURCE, which defines
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
+ * Licensed under the GPL
+ */
+
+/*
+ * _XOPEN_SOURCE is needed for pread, but we define _GNU_SOURCE, which defines
  * that.
  */
 #include <unistd.h>
 #include <byteswap.h>
-#include <sys/time.h>
-#include <sys/param.h>
-#include <sys/user.h>
-
-#include "os.h"
-
+#include <errno.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <asm/types.h>
 #include "cow.h"
 #include "cow_sys.h"
 
@@ -28,7 +29,8 @@ struct cow_header_v1 {
 	__s32 sectorsize;
 } __attribute__((packed));
 
-/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in
+/*
+ * Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in
  * case other systems have different values for MAXPATHLEN.
  *
  * The same must hold for V2 - we want file format compatibility, not anything
@@ -46,7 +48,8 @@ struct cow_header_v2 {
 	__s32 sectorsize;
 } __attribute__((packed));
 
-/* Changes from V2 -
+/*
+ * Changes from V2 -
  *	PATH_LEN_V3 as described above
  *	Explicitly specify field bit lengths for systems with different
  *		lengths for the usual C types.  Not sure whether char or
@@ -70,7 +73,8 @@ struct cow_header_v2 {
  *	Fixed (finally!) the rounding bug
  */
 
-/* Until Dec2005, __attribute__((packed)) was left out from the below
+/*
+ * Until Dec2005, __attribute__((packed)) was left out from the below
  * definition, leading on 64-bit systems to 4 bytes of padding after mtime, to
  * align size to 8-byte alignment.  This shifted all fields above (no padding
  * was present on 32-bit, no other padding was added).
@@ -122,7 +126,7 @@ void cow_sizes(int version, __u64 size, int sectorsize, int align,
 	       int bitmap_offset, unsigned long *bitmap_len_out,
 	       int *data_offset_out)
 {
-	if(version < 3){
+	if (version < 3) {
 		*bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
 
 		*data_offset_out = bitmap_offset + *bitmap_len_out;
@@ -144,46 +148,46 @@ static int absolutize(char *to, int size, char *from)
 	char save_cwd[256], *slash;
 	int remaining;
 
-	if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
+	if (getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
 		cow_printf("absolutize : unable to get cwd - errno = %d\n",
 			   errno);
-		return(-1);
+		return -1;
 	}
 	slash = strrchr(from, '/');
-	if(slash != NULL){
+	if (slash != NULL) {
 		*slash = '\0';
-		if(chdir(from)){
+		if (chdir(from)) {
 			*slash = '/';
 			cow_printf("absolutize : Can't cd to '%s' - "
 				   "errno = %d\n", from, errno);
-			return(-1);
+			return -1;
 		}
 		*slash = '/';
-		if(getcwd(to, size) == NULL){
+		if (getcwd(to, size) == NULL) {
 			cow_printf("absolutize : unable to get cwd of '%s' - "
 			       "errno = %d\n", from, errno);
-			return(-1);
+			return -1;
 		}
 		remaining = size - strlen(to);
-		if(strlen(slash) + 1 > remaining){
+		if (strlen(slash) + 1 > remaining) {
 			cow_printf("absolutize : unable to fit '%s' into %d "
 			       "chars\n", from, size);
-			return(-1);
+			return -1;
 		}
 		strcat(to, slash);
 	}
 	else {
-		if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
+		if (strlen(save_cwd) + 1 + strlen(from) + 1 > size) {
 			cow_printf("absolutize : unable to fit '%s' into %d "
 			       "chars\n", from, size);
-			return(-1);
+			return -1;
 		}
 		strcpy(to, save_cwd);
 		strcat(to, "/");
 		strcat(to, from);
 	}
 	chdir(save_cwd);
-	return(0);
+	return 0;
 }
 
 int write_cow_header(char *cow_file, int fd, char *backing_file,
@@ -194,22 +198,23 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
 	int err;
 
 	err = cow_seek_file(fd, 0);
-	if(err < 0){
+	if (err < 0) {
 		cow_printf("write_cow_header - lseek failed, err = %d\n", -err);
 		goto out;
 	}
 
 	err = -ENOMEM;
 	header = cow_malloc(sizeof(*header));
-	if(header == NULL){
-		cow_printf("write_cow_header - failed to allocate COW V3 header\n");
+	if (header == NULL) {
+		cow_printf("write_cow_header - failed to allocate COW V3 "
+			   "header\n");
 		goto out;
 	}
 	header->magic = htonl(COW_MAGIC);
 	header->version = htonl(COW_VERSION);
 
 	err = -EINVAL;
-	if(strlen(backing_file) > sizeof(header->backing_file) - 1){
+	if (strlen(backing_file) > sizeof(header->backing_file) - 1) {
 		/* Below, %zd is for a size_t value */
 		cow_printf("Backing file name \"%s\" is too long - names are "
 			   "limited to %zd characters\n", backing_file,
@@ -217,12 +222,12 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
 		goto out_free;
 	}
 
-	if(absolutize(header->backing_file, sizeof(header->backing_file),
+	if (absolutize(header->backing_file, sizeof(header->backing_file),
 		      backing_file))
 		goto out_free;
 
 	err = os_file_modtime(header->backing_file, &modtime);
-	if(err < 0){
+	if (err < 0) {
 		cow_printf("write_cow_header - backing file '%s' mtime "
 			   "request failed, err = %d\n", header->backing_file,
 			   -err);
@@ -230,7 +235,7 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
 	}
 
 	err = cow_file_size(header->backing_file, size);
-	if(err < 0){
+	if (err < 0) {
 		cow_printf("write_cow_header - couldn't get size of "
 			   "backing file '%s', err = %d\n",
 			   header->backing_file, -err);
@@ -244,7 +249,7 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
 	header->cow_format = COW_BITMAP;
 
 	err = cow_write_file(fd, header, sizeof(*header));
-	if(err != sizeof(*header)){
+	if (err != sizeof(*header)) {
 		cow_printf("write_cow_header - write of header to "
 			   "new COW file '%s' failed, err = %d\n", cow_file,
 			   -err);
@@ -254,14 +259,14 @@ int write_cow_header(char *cow_file, int fd, char *backing_file,
  out_free:
 	cow_free(header);
  out:
-	return(err);
+	return err;
 }
 
 int file_reader(__u64 offset, char *buf, int len, void *arg)
 {
 	int fd = *((int *) arg);
 
-	return(pread(fd, buf, len, offset));
+	return pread(fd, buf, len, offset);
 }
 
 /* XXX Need to sanity-check the values read from the header */
@@ -278,31 +283,29 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 	unsigned long version, magic;
 
 	header = cow_malloc(sizeof(*header));
-	if(header == NULL){
+	if (header == NULL) {
 	        cow_printf("read_cow_header - Failed to allocate header\n");
-		return(-ENOMEM);
+		return -ENOMEM;
 	}
 	err = -EINVAL;
 	n = (*reader)(0, (char *) header, sizeof(*header), arg);
-	if(n < offsetof(typeof(header->v1), backing_file)){
+	if (n < offsetof(typeof(header->v1), backing_file)) {
 		cow_printf("read_cow_header - short header\n");
 		goto out;
 	}
 
 	magic = header->v1.magic;
-	if(magic == COW_MAGIC) {
+	if (magic == COW_MAGIC)
 		version = header->v1.version;
-	}
-	else if(magic == ntohl(COW_MAGIC)){
+	else if (magic == ntohl(COW_MAGIC))
 		version = ntohl(header->v1.version);
-	}
 	/* No error printed because the non-COW case comes through here */
 	else goto out;
 
 	*version_out = version;
 
-	if(version == 1){
-		if(n < sizeof(header->v1)){
+	if (version == 1) {
+		if (n < sizeof(header->v1)) {
 			cow_printf("read_cow_header - failed to read V1 "
 				   "header\n");
 			goto out;
@@ -314,8 +317,8 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 		*align_out = *sectorsize_out;
 		file = header->v1.backing_file;
 	}
-	else if(version == 2){
-		if(n < sizeof(header->v2)){
+	else if (version == 2) {
+		if (n < sizeof(header->v2)) {
 			cow_printf("read_cow_header - failed to read V2 "
 				   "header\n");
 			goto out;
@@ -328,8 +331,8 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 		file = header->v2.backing_file;
 	}
 	/* This is very subtle - see above at union cow_header definition */
-	else if(version == 3 && (*((int*)header->v3.backing_file) != 0)){
-		if(n < sizeof(header->v3)){
+	else if (version == 3 && (*((int*)header->v3.backing_file) != 0)) {
+		if (n < sizeof(header->v3)) {
 			cow_printf("read_cow_header - failed to read V3 "
 				   "header\n");
 			goto out;
@@ -345,17 +348,18 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 		*bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out);
 		file = header->v3.backing_file;
 	}
-	else if(version == 3){
+	else if (version == 3) {
 		cow_printf("read_cow_header - broken V3 file with"
 			   " 64-bit layout - recovering content.\n");
 
-		if(n < sizeof(header->v3_b)){
+		if (n < sizeof(header->v3_b)) {
 			cow_printf("read_cow_header - failed to read V3 "
 				   "header\n");
 			goto out;
 		}
 
-		/* this was used until Dec2005 - 64bits are needed to represent
+		/*
+		 * this was used until Dec2005 - 64bits are needed to represent
 		 * 2038+. I.e. we can safely do this truncating cast.
 		 *
 		 * Additionally, we must use ntohl() instead of ntohll(), since
@@ -381,7 +385,7 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 	}
 	err = -ENOMEM;
 	*backing_file_out = cow_strdup(file);
-	if(*backing_file_out == NULL){
+	if (*backing_file_out == NULL) {
 		cow_printf("read_cow_header - failed to allocate backing "
 			   "file\n");
 		goto out;
@@ -389,7 +393,7 @@ int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
 	err = 0;
  out:
 	cow_free(header);
-	return(err);
+	return err;
 }
 
 int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
@@ -402,7 +406,7 @@ int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
 
 	err = write_cow_header(cow_file, fd, backing_file, sectorsize,
 			       alignment, &size);
-	if(err)
+	if (err)
 		goto out;
 
 	*bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment);
@@ -411,17 +415,18 @@ int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
 
 	offset = *data_offset_out + size - sizeof(zero);
 	err = cow_seek_file(fd, offset);
-	if(err < 0){
+	if (err < 0) {
 		cow_printf("cow bitmap lseek failed : err = %d\n", -err);
 		goto out;
 	}
 
-	/* does not really matter how much we write it is just to set EOF
+	/*
+	 * does not really matter how much we write it is just to set EOF
 	 * this also sets the entire COW bitmap
 	 * to zero without having to allocate it
 	 */
 	err = cow_write_file(fd, &zero, sizeof(zero));
-	if(err != sizeof(zero)){
+	if (err != sizeof(zero)) {
 		cow_printf("Write of bitmap to new COW file '%s' failed, "
 			   "err = %d\n", cow_file, -err);
 		if (err >= 0)
@@ -429,15 +434,7 @@ int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
 		goto out;
 	}
 
-	return(0);
-
+	return 0;
  out:
-	return(err);
+	return err;
 }
-
-/*
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h
index 3bc3cf6b94a..6e0e891f8a0 100644
--- a/arch/um/drivers/daemon.h
+++ b/arch/um/drivers/daemon.h
@@ -1,8 +1,11 @@
-/* 
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
+#ifndef __DAEMON_H__
+#define __DAEMON_H__
+
 #include "net_user.h"
 
 #define SWITCH_VERSION 3
@@ -20,16 +23,7 @@ struct daemon_data {
 
 extern const struct net_user_info daemon_user_info;
 
-extern int daemon_user_write(int fd, void *buf, int len, 
+extern int daemon_user_write(int fd, void *buf, int len,
 			     struct daemon_data *pri);
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+#endif
diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
index adeece11e59..d53ff52bb40 100644
--- a/arch/um/drivers/daemon_kern.c
+++ b/arch/um/drivers/daemon_kern.c
@@ -1,16 +1,14 @@
 /*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and 
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 by various other people who didn't put their name here.
  * Licensed under the GPL.
  */
 
-#include "linux/kernel.h"
 #include "linux/init.h"
-#include "linux/netdevice.h"
-#include "linux/etherdevice.h"
+#include <linux/netdevice.h>
 #include "net_kern.h"
-#include "net_user.h"
 #include "daemon.h"
 
 struct daemon_init {
@@ -36,25 +34,21 @@ static void daemon_init(struct net_device *dev, void *data)
 	dpri->data_addr = NULL;
 	dpri->local_addr = NULL;
 
-	printk("daemon backend (uml_switch version %d) - %s:%s", 
+	printk("daemon backend (uml_switch version %d) - %s:%s",
 	       SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock);
 	printk("\n");
 }
 
-static int daemon_read(int fd, struct sk_buff **skb, 
-		       struct uml_net_private *lp)
+static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
-	if(*skb == NULL) return(-ENOMEM);
-	return(net_recvfrom(fd, skb_mac_header(*skb),
-			    (*skb)->dev->mtu + ETH_HEADER_OTHER));
+	return net_recvfrom(fd, skb_mac_header(skb),
+			    skb->dev->mtu + ETH_HEADER_OTHER);
 }
 
-static int daemon_write(int fd, struct sk_buff **skb,
-			struct uml_net_private *lp)
+static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	return(daemon_user_write(fd, (*skb)->data, (*skb)->len, 
-				 (struct daemon_data *) &lp->user));
+	return daemon_user_write(fd, skb->data, skb->len,
+				 (struct daemon_data *) &lp->user);
 }
 
 static const struct net_kern_info daemon_kern_info = {
@@ -72,14 +66,14 @@ static int daemon_setup(char *str, char **mac_out, void *data)
 	*init = ((struct daemon_init)
 		{ .sock_type 		= "unix",
 		  .ctl_sock 		= "/tmp/uml.ctl" });
-	
+
 	remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock,
 			       NULL);
-	if(remain != NULL)
+	if (remain != NULL)
 		printk(KERN_WARNING "daemon_setup : Ignoring data socket "
 		       "specification\n");
-	
-	return(1);
+
+	return 1;
 }
 
 static struct transport daemon_transport = {
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index 8d2008f0668..f23c109a055 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -1,24 +1,23 @@
 /*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and 
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
  * Licensed under the GPL.
  */
 
-#include <errno.h>
-#include <unistd.h>
 #include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
 #include <sys/socket.h>
-#include <sys/un.h>
 #include <sys/time.h>
-#include "net_user.h"
+#include <sys/un.h>
 #include "daemon.h"
-#include "kern_util.h"
-#include "user.h"
+#include "net_user.h"
 #include "os.h"
 #include "um_malloc.h"
-
-#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER)
+#include "user.h"
 
 enum request_type { REQ_NEW_CONTROL };
 
@@ -36,8 +35,9 @@ static struct sockaddr_un *new_addr(void *name, int len)
 	struct sockaddr_un *sun;
 
 	sun = kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
-	if(sun == NULL){
-		printk("new_addr: allocation of sockaddr_un failed\n");
+	if (sun == NULL) {
+		printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
+		       "failed\n");
 		return NULL;
 	}
 	sun->sun_family = AF_UNIX;
@@ -54,38 +54,39 @@ static int connect_to_switch(struct daemon_data *pri)
 	int fd, n, err;
 
 	pri->control = socket(AF_UNIX, SOCK_STREAM, 0);
-	if(pri->control < 0){
+	if (pri->control < 0) {
 		err = -errno;
-		printk("daemon_open : control socket failed, errno = %d\n", 
-		       -err);
+		printk(UM_KERN_ERR "daemon_open : control socket failed, "
+		       "errno = %d\n", -err);
 		return err;
 	}
 
-	if(connect(pri->control, (struct sockaddr *) ctl_addr, 
-		   sizeof(*ctl_addr)) < 0){
+	if (connect(pri->control, (struct sockaddr *) ctl_addr,
+		   sizeof(*ctl_addr)) < 0) {
 		err = -errno;
-		printk("daemon_open : control connect failed, errno = %d\n",
-		       -err);
+		printk(UM_KERN_ERR "daemon_open : control connect failed, "
+		       "errno = %d\n", -err);
 		goto out;
 	}
 
 	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
-	if(fd < 0){
+	if (fd < 0) {
 		err = -errno;
-		printk("daemon_open : data socket failed, errno = %d\n",
-		       -err);
+		printk(UM_KERN_ERR "daemon_open : data socket failed, "
+		       "errno = %d\n", -err);
 		goto out;
 	}
-	if(bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0){
+	if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) {
 		err = -errno;
-		printk("daemon_open : data bind failed, errno = %d\n",
-		       -err);
+		printk(UM_KERN_ERR "daemon_open : data bind failed, "
+		       "errno = %d\n", -err);
 		goto out_close;
 	}
 
 	sun = kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
-	if(sun == NULL){
-		printk("new_addr: allocation of sockaddr_un failed\n");
+	if (sun == NULL) {
+		printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
+		       "failed\n");
 		err = -ENOMEM;
 		goto out_close;
 	}
@@ -94,18 +95,18 @@ static int connect_to_switch(struct daemon_data *pri)
 	req.version = SWITCH_VERSION;
 	req.type = REQ_NEW_CONTROL;
 	req.sock = *local_addr;
-	n = os_write_file(pri->control, &req, sizeof(req));
-	if(n != sizeof(req)){
-		printk("daemon_open : control setup request failed, err = %d\n",
-		       -n);
+	n = write(pri->control, &req, sizeof(req));
+	if (n != sizeof(req)) {
+		printk(UM_KERN_ERR "daemon_open : control setup request "
+		       "failed, err = %d\n", -errno);
 		err = -ENOTCONN;
 		goto out_free;
 	}
 
-	n = os_read_file(pri->control, sun, sizeof(*sun));
-	if(n != sizeof(*sun)){
-		printk("daemon_open : read of data socket failed, err = %d\n",
-		       -n);
+	n = read(pri->control, sun, sizeof(*sun));
+	if (n != sizeof(*sun)) {
+		printk(UM_KERN_ERR "daemon_open : read of data socket failed, "
+		       "err = %d\n", -errno);
 		err = -ENOTCONN;
 		goto out_free;
 	}
@@ -116,9 +117,9 @@ static int connect_to_switch(struct daemon_data *pri)
  out_free:
 	kfree(sun);
  out_close:
-	os_close_file(fd);
+	close(fd);
  out:
-	os_close_file(pri->control);
+	close(pri->control);
 	return err;
 }
 
@@ -132,8 +133,8 @@ static int daemon_user_init(void *data, void *dev)
 		int usecs;
 	} name;
 
-	if(!strcmp(pri->sock_type, "unix"))
-		pri->ctl_addr = new_addr(pri->ctl_sock, 
+	if (!strcmp(pri->sock_type, "unix"))
+		pri->ctl_addr = new_addr(pri->ctl_sock,
 					 strlen(pri->ctl_sock) + 1);
 	name.zero = 0;
 	name.pid = os_getpid();
@@ -142,7 +143,7 @@ static int daemon_user_init(void *data, void *dev)
 	pri->local_addr = new_addr(&name, sizeof(name));
 	pri->dev = dev;
 	pri->fd = connect_to_switch(pri);
-	if(pri->fd < 0){
+	if (pri->fd < 0) {
 		kfree(pri->local_addr);
 		pri->local_addr = NULL;
 		return pri->fd;
@@ -161,9 +162,9 @@ static void daemon_remove(void *data)
 {
 	struct daemon_data *pri = data;
 
-	os_close_file(pri->fd);
+	close(pri->fd);
 	pri->fd = -1;
-	os_close_file(pri->control);
+	close(pri->control);
 	pri->control = -1;
 
 	kfree(pri->data_addr);
@@ -181,18 +182,13 @@ int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri)
 	return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
 }
 
-static int daemon_set_mtu(int mtu, void *data)
-{
-	return mtu;
-}
-
 const struct net_user_info daemon_user_info = {
 	.init		= daemon_user_init,
 	.open		= daemon_open,
 	.close	 	= NULL,
 	.remove	 	= daemon_remove,
-	.set_mtu	= daemon_set_mtu,
 	.add_address	= NULL,
 	.delete_address = NULL,
-	.max_packet	= MAX_PACKET - ETH_HEADER_OTHER
+	.mtu		= ETH_MAX_PACKET,
+	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
 };
diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c
index 39c01ffd45c..0a2bb5b64b8 100644
--- a/arch/um/drivers/fd.c
+++ b/arch/um/drivers/fd.c
@@ -1,17 +1,18 @@
-/* 
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <termios.h>
 #include <errno.h>
-#include "user.h"
+#include <termios.h>
 #include "chan_user.h"
+#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
+#include "user.h"
 
 struct fd_chan {
 	int fd;
@@ -26,22 +27,26 @@ static void *fd_init(char *str, int device, const struct chan_opts *opts)
 	char *end;
 	int n;
 
-	if(*str != ':'){
-		printk("fd_init : channel type 'fd' must specify a file "
-		       "descriptor\n");
-		return(NULL);
+	if (*str != ':') {
+		printk(UM_KERN_ERR "fd_init : channel type 'fd' must specify a "
+		       "file descriptor\n");
+		return NULL;
 	}
 	str++;
 	n = strtoul(str, &end, 0);
-	if((*end != '\0') || (end == str)){
-		printk("fd_init : couldn't parse file descriptor '%s'\n", str);
-		return(NULL);
+	if ((*end != '\0') || (end == str)) {
+		printk(UM_KERN_ERR "fd_init : couldn't parse file descriptor "
+		       "'%s'\n", str);
+		return NULL;
 	}
+
 	data = kmalloc(sizeof(*data), UM_GFP_KERNEL);
-	if(data == NULL) return(NULL);
+	if (data == NULL)
+		return NULL;
+
 	*data = ((struct fd_chan) { .fd  	= n,
 				    .raw  	= opts->raw });
-	return(data);
+	return data;
 }
 
 static int fd_open(int input, int output, int primary, void *d, char **dev_out)
@@ -49,18 +54,18 @@ static int fd_open(int input, int output, int primary, void *d, char **dev_out)
 	struct fd_chan *data = d;
 	int err;
 
-	if(data->raw && isatty(data->fd)){
+	if (data->raw && isatty(data->fd)) {
 		CATCH_EINTR(err = tcgetattr(data->fd, &data->tt));
-		if(err)
-			return(err);
+		if (err)
+			return err;
 
 		err = raw(data->fd);
-		if(err)
-			return(err);
+		if (err)
+			return err;
 	}
 	sprintf(data->str, "%d", data->fd);
 	*dev_out = data->str;
-	return(data->fd);
+	return data->fd;
 }
 
 static void fd_close(int fd, void *d)
@@ -68,13 +73,14 @@ static void fd_close(int fd, void *d)
 	struct fd_chan *data = d;
 	int err;
 
-	if(data->raw && isatty(fd)){
-		CATCH_EINTR(err = tcsetattr(fd, TCSAFLUSH, &data->tt));
-		if(err)
-			printk("Failed to restore terminal state - "
-			       "errno = %d\n", -err);
-		data->raw = 0;
-	}
+	if (!data->raw || !isatty(fd))
+		return;
+
+	CATCH_EINTR(err = tcsetattr(fd, TCSAFLUSH, &data->tt));
+	if (err)
+		printk(UM_KERN_ERR "Failed to restore terminal state - "
+		       "errno = %d\n", -err);
+	data->raw = 0;
 }
 
 const struct chan_ops fd_ops = {
@@ -89,14 +95,3 @@ const struct chan_ops fd_ops = {
 	.free		= generic_free,
 	.winch		= 1,
 };
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 55601687b3b..a9ad4bd6d95 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -69,7 +69,7 @@ static int harddog_open(struct inode *inode, struct file *file)
 	spin_lock(&lock);
 	if(timer_alive)
 		goto err;
-#ifdef CONFIG_HARDDOG_NOWAYOUT
+#ifdef CONFIG_WATCHDOG_NOWAYOUT
 	__module_get(THIS_MODULE);
 #endif
 
diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
index 1171790f742..b56f8e0196a 100644
--- a/arch/um/drivers/harddog_user.c
+++ b/arch/um/drivers/harddog_user.c
@@ -1,16 +1,13 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
-#include "user.h"
-#include "mconsole.h"
 #include "os.h"
-#include "choose-mode.h"
-#include "mode.h"
+#include "user.h"
 
 struct dog_data {
 	int stdin;
@@ -25,10 +22,10 @@ static void pre_exec(void *d)
 	dup2(data->stdin, 0);
 	dup2(data->stdout, 1);
 	dup2(data->stdout, 2);
-	os_close_file(data->stdin);
-	os_close_file(data->stdout);
-	os_close_file(data->close_me[0]);
-	os_close_file(data->close_me[1]);
+	close(data->stdin);
+	close(data->stdout);
+	close(data->close_me[0]);
+	close(data->close_me[1]);
 }
 
 int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock)
@@ -42,13 +39,13 @@ int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock)
 	char **args = NULL;
 
 	err = os_pipe(in_fds, 1, 0);
-	if(err < 0){
+	if (err < 0) {
 		printk("harddog_open - os_pipe failed, err = %d\n", -err);
 		goto out;
 	}
 
 	err = os_pipe(out_fds, 1, 0);
-	if(err < 0){
+	if (err < 0) {
 		printk("harddog_open - os_pipe failed, err = %d\n", -err);
 		goto out_close_in;
 	}
@@ -58,37 +55,37 @@ int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock)
 	data.close_me[0] = out_fds[1];
 	data.close_me[1] = in_fds[0];
 
-	if(sock != NULL){
+	if (sock != NULL) {
 		mconsole_args[2] = sock;
 		args = mconsole_args;
 	}
 	else {
 		/* XXX The os_getpid() is not SMP correct */
-		sprintf(pid_buf, "%d", CHOOSE_MODE(tracing_pid, os_getpid()));
+		sprintf(pid_buf, "%d", os_getpid());
 		args = pid_args;
 	}
 
 	pid = run_helper(pre_exec, &data, args);
 
-	os_close_file(out_fds[0]);
-	os_close_file(in_fds[1]);
+	close(out_fds[0]);
+	close(in_fds[1]);
 
-	if(pid < 0){
+	if (pid < 0) {
 		err = -pid;
 		printk("harddog_open - run_helper failed, errno = %d\n", -err);
 		goto out_close_out;
 	}
 
-	n = os_read_file(in_fds[0], &c, sizeof(c));
-	if(n == 0){
+	n = read(in_fds[0], &c, sizeof(c));
+	if (n == 0) {
 		printk("harddog_open - EOF on watchdog pipe\n");
 		helper_wait(pid);
 		err = -EIO;
 		goto out_close_out;
 	}
-	else if(n < 0){
+	else if (n < 0) {
 		printk("harddog_open - read of watchdog pipe failed, "
-		       "err = %d\n", -n);
+		       "err = %d\n", errno);
 		helper_wait(pid);
 		err = n;
 		goto out_close_out;
@@ -98,19 +95,19 @@ int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock)
 	return 0;
 
  out_close_in:
-	os_close_file(in_fds[0]);
-	os_close_file(in_fds[1]);
+	close(in_fds[0]);
+	close(in_fds[1]);
  out_close_out:
-	os_close_file(out_fds[0]);
-	os_close_file(out_fds[1]);
+	close(out_fds[0]);
+	close(out_fds[1]);
  out:
 	return err;
 }
 
 void stop_watchdog(int in_fd, int out_fd)
 {
-	os_close_file(in_fd);
-	os_close_file(out_fd);
+	close(in_fd);
+	close(out_fd);
 }
 
 int ping_watchdog(int fd)
@@ -118,10 +115,11 @@ int ping_watchdog(int fd)
 	int n;
 	char c = '\n';
 
-	n = os_write_file(fd, &c, sizeof(c));
-	if(n != sizeof(c)){
-		printk("ping_watchdog - write failed, err = %d\n", -n);
-		if(n < 0)
+	n = write(fd, &c, sizeof(c));
+	if (n != sizeof(c)) {
+		printk("ping_watchdog - write failed, ret = %d, err = %d\n",
+		       n, errno);
+		if (n < 0)
 			return n;
 		return -EIO;
 	}
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 10e08a8c17c..ff1b22b69e9 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -1,16 +1,14 @@
-/* 
- * Copyright (C) 2002 Steve Schmidtke 
+/*
+ * Copyright (C) 2002 Steve Schmidtke
  * Licensed under the GPL
  */
 
+#include "linux/fs.h"
 #include "linux/module.h"
-#include "linux/init.h"
 #include "linux/slab.h"
-#include "linux/fs.h"
 #include "linux/sound.h"
 #include "linux/soundcard.h"
 #include "asm/uaccess.h"
-#include "kern_util.h"
 #include "init.h"
 #include "os.h"
 
@@ -25,7 +23,8 @@ struct hostmixer_state {
 #define HOSTAUDIO_DEV_DSP "/dev/sound/dsp"
 #define HOSTAUDIO_DEV_MIXER "/dev/sound/mixer"
 
-/* Changed either at boot time or module load time.  At boot, this is
+/*
+ * Changed either at boot time or module load time.  At boot, this is
  * single-threaded; at module load, multiple modules would each have
  * their own copy of these variables.
  */
@@ -44,7 +43,7 @@ static char *mixer = HOSTAUDIO_DEV_MIXER;
 static int set_dsp(char *name, int *add)
 {
 	dsp = name;
-	return(0);
+	return 0;
 }
 
 __uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP);
@@ -52,7 +51,7 @@ __uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP);
 static int set_mixer(char *name, int *add)
 {
 	mixer = name;
-	return(0);
+	return 0;
 }
 
 __uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP);
@@ -77,23 +76,23 @@ static ssize_t hostaudio_read(struct file *file, char __user *buffer,
 	int err;
 
 #ifdef DEBUG
-	printk("hostaudio: read called, count = %d\n", count);
+	printk(KERN_DEBUG "hostaudio: read called, count = %d\n", count);
 #endif
 
 	kbuf = kmalloc(count, GFP_KERNEL);
-	if(kbuf == NULL)
-		return(-ENOMEM);
+	if (kbuf == NULL)
+		return -ENOMEM;
 
 	err = os_read_file(state->fd, kbuf, count);
-	if(err < 0)
+	if (err < 0)
 		goto out;
 
-	if(copy_to_user(buffer, kbuf, err))
+	if (copy_to_user(buffer, kbuf, err))
 		err = -EFAULT;
 
 out:
 	kfree(kbuf);
-	return(err);
+	return err;
 }
 
 static ssize_t hostaudio_write(struct file *file, const char __user *buffer,
@@ -104,40 +103,40 @@ static ssize_t hostaudio_write(struct file *file, const char __user *buffer,
 	int err;
 
 #ifdef DEBUG
-	printk("hostaudio: write called, count = %d\n", count);
+	printk(KERN_DEBUG "hostaudio: write called, count = %d\n", count);
 #endif
 
 	kbuf = kmalloc(count, GFP_KERNEL);
-	if(kbuf == NULL)
-		return(-ENOMEM);
+	if (kbuf == NULL)
+		return -ENOMEM;
 
 	err = -EFAULT;
-	if(copy_from_user(kbuf, buffer, count))
+	if (copy_from_user(kbuf, buffer, count))
 		goto out;
 
 	err = os_write_file(state->fd, kbuf, count);
-	if(err < 0)
+	if (err < 0)
 		goto out;
 	*ppos += err;
 
  out:
 	kfree(kbuf);
-	return(err);
+	return err;
 }
 
-static unsigned int hostaudio_poll(struct file *file, 
+static unsigned int hostaudio_poll(struct file *file,
 				   struct poll_table_struct *wait)
 {
 	unsigned int mask = 0;
 
 #ifdef DEBUG
-	printk("hostaudio: poll called (unimplemented)\n");
+	printk(KERN_DEBUG "hostaudio: poll called (unimplemented)\n");
 #endif
 
-	return(mask);
+	return mask;
 }
 
-static int hostaudio_ioctl(struct inode *inode, struct file *file, 
+static int hostaudio_ioctl(struct inode *inode, struct file *file,
 			   unsigned int cmd, unsigned long arg)
 {
 	struct hostaudio_state *state = file->private_data;
@@ -145,7 +144,7 @@ static int hostaudio_ioctl(struct inode *inode, struct file *file,
 	int err;
 
 #ifdef DEBUG
-	printk("hostaudio: ioctl called, cmd = %u\n", cmd);
+	printk(KERN_DEBUG "hostaudio: ioctl called, cmd = %u\n", cmd);
 #endif
 	switch(cmd){
 	case SNDCTL_DSP_SPEED:
@@ -154,8 +153,8 @@ static int hostaudio_ioctl(struct inode *inode, struct file *file,
 	case SNDCTL_DSP_CHANNELS:
 	case SNDCTL_DSP_SUBDIVIDE:
 	case SNDCTL_DSP_SETFRAGMENT:
-		if(get_user(data, (int __user *) arg))
-			return(-EFAULT);
+		if (get_user(data, (int __user *) arg))
+			return EFAULT;
 		break;
 	default:
 		break;
@@ -170,14 +169,14 @@ static int hostaudio_ioctl(struct inode *inode, struct file *file,
 	case SNDCTL_DSP_CHANNELS:
 	case SNDCTL_DSP_SUBDIVIDE:
 	case SNDCTL_DSP_SETFRAGMENT:
-		if(put_user(data, (int __user *) arg))
-			return(-EFAULT);
+		if (put_user(data, (int __user *) arg))
+			return -EFAULT;
 		break;
 	default:
 		break;
 	}
 
-	return(err);
+	return err;
 }
 
 static int hostaudio_open(struct inode *inode, struct file *file)
@@ -187,24 +186,26 @@ static int hostaudio_open(struct inode *inode, struct file *file)
 	int ret;
 
 #ifdef DEBUG
-	printk("hostaudio: open called (host: %s)\n", dsp);
+	printk(KERN_DEBUG "hostaudio: open called (host: %s)\n", dsp);
 #endif
 
 	state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL);
-	if(state == NULL)
-		return(-ENOMEM);
+	if (state == NULL)
+		return -ENOMEM;
 
-	if(file->f_mode & FMODE_READ) r = 1;
-	if(file->f_mode & FMODE_WRITE) w = 1;
+	if (file->f_mode & FMODE_READ)
+		r = 1;
+	if (file->f_mode & FMODE_WRITE)
+		w = 1;
 
 	ret = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
-	if(ret < 0){
+	if (ret < 0) {
 		kfree(state);
-		return(ret);
+		return ret;
 	}
 	state->fd = ret;
 	file->private_data = state;
-	return(0);
+	return 0;
 }
 
 static int hostaudio_release(struct inode *inode, struct file *file)
@@ -212,26 +213,26 @@ static int hostaudio_release(struct inode *inode, struct file *file)
 	struct hostaudio_state *state = file->private_data;
 
 #ifdef DEBUG
-	printk("hostaudio: release called\n");
+	printk(KERN_DEBUG "hostaudio: release called\n");
 #endif
 	os_close_file(state->fd);
 	kfree(state);
 
-	return(0);
+	return 0;
 }
 
 /* /dev/mixer file operations */
 
-static int hostmixer_ioctl_mixdev(struct inode *inode, struct file *file, 
+static int hostmixer_ioctl_mixdev(struct inode *inode, struct file *file,
 				  unsigned int cmd, unsigned long arg)
 {
 	struct hostmixer_state *state = file->private_data;
 
 #ifdef DEBUG
-	printk("hostmixer: ioctl called\n");
+	printk(KERN_DEBUG "hostmixer: ioctl called\n");
 #endif
 
-	return(os_ioctl_generic(state->fd, cmd, arg));
+	return os_ioctl_generic(state->fd, cmd, arg);
 }
 
 static int hostmixer_open_mixdev(struct inode *inode, struct file *file)
@@ -241,26 +242,29 @@ static int hostmixer_open_mixdev(struct inode *inode, struct file *file)
 	int ret;
 
 #ifdef DEBUG
-	printk("hostmixer: open called (host: %s)\n", mixer);
+	printk(KERN_DEBUG "hostmixer: open called (host: %s)\n", mixer);
 #endif
 
 	state = kmalloc(sizeof(struct hostmixer_state), GFP_KERNEL);
-	if(state == NULL) return(-ENOMEM);
+	if (state == NULL)
+		return -ENOMEM;
 
-	if(file->f_mode & FMODE_READ) r = 1;
-	if(file->f_mode & FMODE_WRITE) w = 1;
+	if (file->f_mode & FMODE_READ)
+		r = 1;
+	if (file->f_mode & FMODE_WRITE)
+		w = 1;
 
 	ret = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0);
-        
-	if(ret < 0){
-		printk("hostaudio_open_mixdev failed to open '%s', err = %d\n",
-		       dsp, -ret);
+
+	if (ret < 0) {
+		printk(KERN_ERR "hostaudio_open_mixdev failed to open '%s', "
+		       "err = %d\n", dsp, -ret);
 		kfree(state);
-		return(ret);
+		return ret;
 	}
 
 	file->private_data = state;
-	return(0);
+	return 0;
 }
 
 static int hostmixer_release(struct inode *inode, struct file *file)
@@ -268,13 +272,13 @@ static int hostmixer_release(struct inode *inode, struct file *file)
 	struct hostmixer_state *state = file->private_data;
 
 #ifdef DEBUG
-	printk("hostmixer: release called\n");
+	printk(KERN_DEBUG "hostmixer: release called\n");
 #endif
 
 	os_close_file(state->fd);
 	kfree(state);
 
-	return(0);
+	return 0;
 }
 
 /* kernel module operations */
@@ -314,13 +318,13 @@ static int __init hostaudio_init_module(void)
 	       dsp, mixer);
 
 	module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
-	if(module_data.dev_audio < 0){
+	if (module_data.dev_audio < 0) {
 		printk(KERN_ERR "hostaudio: couldn't register DSP device!\n");
 		return -ENODEV;
 	}
 
 	module_data.dev_mixer = register_sound_mixer(&hostmixer_fops, -1);
-	if(module_data.dev_mixer < 0){
+	if (module_data.dev_mixer < 0) {
 		printk(KERN_ERR "hostmixer: couldn't register mixer "
 		       "device!\n");
 		unregister_sound_dsp(module_data.dev_audio);
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 3e0b68e297f..76fe0b0da99 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -1,22 +1,14 @@
 /*
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/sched.h"
-#include "linux/slab.h"
-#include "linux/list.h"
+#include "linux/irqreturn.h"
 #include "linux/kd.h"
-#include "linux/interrupt.h"
-#include "asm/uaccess.h"
 #include "chan_kern.h"
+#include "irq_kern.h"
 #include "irq_user.h"
-#include "line.h"
-#include "kern.h"
-#include "kern_util.h"
 #include "os.h"
-#include "irq_kern.h"
 
 #define LINE_BUFSIZE 4096
 
@@ -35,12 +27,13 @@ static void line_timer_cb(struct work_struct *work)
 {
 	struct line *line = container_of(work, struct line, task.work);
 
-	if(!line->throttled)
+	if (!line->throttled)
 		chan_interrupt(&line->chan_list, &line->task, line->tty,
 			       line->driver->read_irq);
 }
 
-/* Returns the free space inside the ring buffer of this line.
+/*
+ * Returns the free space inside the ring buffer of this line.
  *
  * Should be called while holding line->lock (this does not modify datas).
  */
@@ -107,11 +100,12 @@ static int buffer_data(struct line *line, const char *buf, int len)
 {
 	int end, room;
 
-	if(line->buffer == NULL){
+	if (line->buffer == NULL) {
 		line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC);
 		if (line->buffer == NULL) {
-			printk("buffer_data - atomic allocation failed\n");
-			return(0);
+			printk(KERN_ERR "buffer_data - atomic allocation "
+			       "failed\n");
+			return 0;
 		}
 		line->head = line->buffer;
 		line->tail = line->buffer;
@@ -122,7 +116,7 @@ static int buffer_data(struct line *line, const char *buf, int len)
 
 	end = line->buffer + LINE_BUFSIZE - line->tail;
 
-	if (len < end){
+	if (len < end) {
 		memcpy(line->tail, buf, len);
 		line->tail += len;
 	}
@@ -162,8 +156,10 @@ static int flush_buffer(struct line *line)
 		if (n < 0)
 			return n;
 		if (n == count) {
-			/* We have flushed from ->head to buffer end, now we
-			 * must flush only from the beginning to ->tail.*/
+			/*
+			 * We have flushed from ->head to buffer end, now we
+			 * must flush only from the beginning to ->tail.
+			 */
 			line->head = line->buffer;
 		} else {
 			line->head += n;
@@ -175,7 +171,7 @@ static int flush_buffer(struct line *line)
 	n = write_chan(&line->chan_list, line->head, count,
 		       line->driver->write_irq);
 
-	if(n < 0)
+	if (n < 0)
 		return n;
 
 	line->head += n;
@@ -189,19 +185,18 @@ void line_flush_buffer(struct tty_struct *tty)
 	int err;
 
 	/*XXX: copied from line_write, verify if it is correct!*/
-	if(tty->stopped)
+	if (tty->stopped)
 		return;
 
 	spin_lock_irqsave(&line->lock, flags);
 	err = flush_buffer(line);
-	/*if (err == 1)
-		err = 0;*/
 	spin_unlock_irqrestore(&line->lock, flags);
-	//return err;
 }
 
-/* We map both ->flush_chars and ->put_char (which go in pair) onto ->flush_buffer
- * and ->write. Hope it's not that bad.*/
+/*
+ * We map both ->flush_chars and ->put_char (which go in pair) onto
+ * ->flush_buffer and ->write. Hope it's not that bad.
+ */
 void line_flush_chars(struct tty_struct *tty)
 {
 	line_flush_buffer(tty);
@@ -216,18 +211,15 @@ int line_write(struct tty_struct *tty, const unsigned char *buf, int len)
 {
 	struct line *line = tty->driver_data;
 	unsigned long flags;
-	int n, err, ret = 0;
+	int n, ret = 0;
 
-	if(tty->stopped)
+	if (tty->stopped)
 		return 0;
 
 	spin_lock_irqsave(&line->lock, flags);
-	if (line->head != line->tail) {
+	if (line->head != line->tail)
 		ret = buffer_data(line, buf, len);
-		err = flush_buffer(line);
-		if (err <= 0 && (err != -EAGAIN || !ret))
-			ret = err;
-	} else {
+	else {
 		n = write_chan(&line->chan_list, buf, len,
 			       line->driver->write_irq);
 		if (n < 0) {
@@ -257,17 +249,17 @@ static const struct {
 } tty_ioctls[] = {
 	/* don't print these, they flood the log ... */
 	{ TCGETS,      NULL,       "TCGETS"      },
-        { TCSETS,      NULL,       "TCSETS"      },
-        { TCSETSW,     NULL,       "TCSETSW"     },
-        { TCFLSH,      NULL,       "TCFLSH"      },
-        { TCSBRK,      NULL,       "TCSBRK"      },
+	{ TCSETS,      NULL,       "TCSETS"      },
+	{ TCSETSW,     NULL,       "TCSETSW"     },
+	{ TCFLSH,      NULL,       "TCFLSH"      },
+	{ TCSBRK,      NULL,       "TCSBRK"      },
 
 	/* general tty stuff */
-        { TCSETSF,     KERN_DEBUG, "TCSETSF"     },
-        { TCGETA,      KERN_DEBUG, "TCGETA"      },
-        { TIOCMGET,    KERN_DEBUG, "TIOCMGET"    },
-        { TCSBRKP,     KERN_DEBUG, "TCSBRKP"     },
-        { TIOCMSET,    KERN_DEBUG, "TIOCMSET"    },
+	{ TCSETSF,     KERN_DEBUG, "TCSETSF"     },
+	{ TCGETA,      KERN_DEBUG, "TCGETA"      },
+	{ TIOCMGET,    KERN_DEBUG, "TIOCMGET"    },
+	{ TCSBRKP,     KERN_DEBUG, "TCSBRKP"     },
+	{ TIOCMSET,    KERN_DEBUG, "TIOCMSET"    },
 
 	/* linux-specific ones */
 	{ TIOCLINUX,   KERN_INFO,  "TIOCLINUX"   },
@@ -324,12 +316,7 @@ int line_ioctl(struct tty_struct *tty, struct file * file,
 		for (i = 0; i < ARRAY_SIZE(tty_ioctls); i++)
 			if (cmd == tty_ioctls[i].cmd)
 				break;
-		if (i < ARRAY_SIZE(tty_ioctls)) {
-			if (NULL != tty_ioctls[i].level)
-				printk("%s%s: %s: ioctl %s called\n",
-				       tty_ioctls[i].level, __FUNCTION__,
-				       tty->name, tty_ioctls[i].name);
-		} else {
+		if (i == ARRAY_SIZE(tty_ioctls)) {
 			printk(KERN_ERR "%s: %s: unknown ioctl: 0x%x\n",
 			       __FUNCTION__, tty->name, cmd);
 		}
@@ -355,11 +342,12 @@ void line_unthrottle(struct tty_struct *tty)
 	chan_interrupt(&line->chan_list, &line->task, tty,
 		       line->driver->read_irq);
 
-	/* Maybe there is enough stuff pending that calling the interrupt
+	/*
+	 * Maybe there is enough stuff pending that calling the interrupt
 	 * throttles us again.  In this case, line->throttled will be 1
 	 * again and we shouldn't turn the interrupt back on.
 	 */
-	if(!line->throttled)
+	if (!line->throttled)
 		reactivate_chan(&line->chan_list, line->driver->read_irq);
 }
 
@@ -370,27 +358,30 @@ static irqreturn_t line_write_interrupt(int irq, void *data)
 	struct tty_struct *tty = line->tty;
 	int err;
 
-	/* Interrupts are disabled here because we registered the interrupt with
-	 * IRQF_DISABLED (see line_setup_irq).*/
+	/*
+	 * Interrupts are disabled here because we registered the interrupt with
+	 * IRQF_DISABLED (see line_setup_irq).
+	 */
 
 	spin_lock(&line->lock);
 	err = flush_buffer(line);
 	if (err == 0) {
 		return IRQ_NONE;
-	} else if(err < 0) {
+	} else if (err < 0) {
 		line->head = line->buffer;
 		line->tail = line->buffer;
 	}
 	spin_unlock(&line->lock);
 
-	if(tty == NULL)
+	if (tty == NULL)
 		return IRQ_NONE;
 
 	if (test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) &&
 	   (tty->ldisc.write_wakeup != NULL))
 		(tty->ldisc.write_wakeup)(tty);
 
-	/* BLOCKING mode
+	/*
+	 * BLOCKING mode
 	 * In blocking mode, everything sleeps on tty->write_wait.
 	 * Sleeping in the console driver would break non-blocking
 	 * writes.
@@ -420,7 +411,8 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
 	return err;
 }
 
-/* Normally, a driver like this can rely mostly on the tty layer
+/*
+ * Normally, a driver like this can rely mostly on the tty layer
  * locking, particularly when it comes to the driver structure.
  * However, in this case, mconsole requests can come in "from the
  * side", and race with opens and closes.
@@ -442,11 +434,11 @@ int line_open(struct line *lines, struct tty_struct *tty)
 	int err = -ENODEV;
 
 	spin_lock(&line->count_lock);
-	if(!line->valid)
+	if (!line->valid)
 		goto out_unlock;
 
 	err = 0;
-	if(tty->count > 1)
+	if (tty->count > 1)
 		goto out_unlock;
 
 	spin_unlock(&line->count_lock);
@@ -460,7 +452,7 @@ int line_open(struct line *lines, struct tty_struct *tty)
 
 	INIT_DELAYED_WORK(&line->task, line_timer_cb);
 
-	if(!line->sigio){
+	if (!line->sigio) {
 		chan_enable_winch(&line->chan_list, tty);
 		line->sigio = 1;
 	}
@@ -481,20 +473,21 @@ void line_close(struct tty_struct *tty, struct file * filp)
 {
 	struct line *line = tty->driver_data;
 
-	/* If line_open fails (and tty->driver_data is never set),
+	/*
+	 * If line_open fails (and tty->driver_data is never set),
 	 * tty_open will call line_close.  So just return in this case.
 	 */
-	if(line == NULL)
+	if (line == NULL)
 		return;
 
 	/* We ignore the error anyway! */
 	flush_buffer(line);
 
 	spin_lock(&line->count_lock);
-	if(!line->valid)
+	if (!line->valid)
 		goto out_unlock;
 
-	if(tty->count > 1)
+	if (tty->count > 1)
 		goto out_unlock;
 
 	spin_unlock(&line->count_lock);
@@ -502,10 +495,10 @@ void line_close(struct tty_struct *tty, struct file * filp)
 	line->tty = NULL;
 	tty->driver_data = NULL;
 
-	if(line->sigio){
+	if (line->sigio) {
 		unregister_winch(tty);
 		line->sigio = 0;
-        }
+	}
 
 	return;
 
@@ -529,12 +522,12 @@ static int setup_one_line(struct line *lines, int n, char *init, int init_prio,
 
 	spin_lock(&line->count_lock);
 
-	if(line->tty != NULL){
+	if (line->tty != NULL) {
 		*error_out = "Device is already open";
 		goto out;
 	}
 
-	if (line->init_pri <= init_prio){
+	if (line->init_pri <= init_prio) {
 		line->init_pri = init_prio;
 		if (!strcmp(init, "none"))
 			line->valid = 0;
@@ -549,7 +542,8 @@ out:
 	return err;
 }
 
-/* Common setup code for both startup command line and mconsole initialization.
+/*
+ * Common setup code for both startup command line and mconsole initialization.
  * @lines contains the array (of size @num) to modify;
  * @init is the setup string;
  * @error_out is an error string in the case of failure;
@@ -561,14 +555,16 @@ int line_setup(struct line *lines, unsigned int num, char *init,
 	int i, n, err;
 	char *end;
 
-	if(*init == '=') {
-		/* We said con=/ssl= instead of con#=, so we are configuring all
-		 * consoles at once.*/
+	if (*init == '=') {
+		/*
+		 * We said con=/ssl= instead of con#=, so we are configuring all
+		 * consoles at once.
+		 */
 		n = -1;
 	}
 	else {
 		n = simple_strtoul(init, &end, 0);
-		if(*end != '='){
+		if (*end != '=') {
 			*error_out = "Couldn't parse device number";
 			return -EINVAL;
 		}
@@ -580,16 +576,16 @@ int line_setup(struct line *lines, unsigned int num, char *init,
 		*error_out = "Device number out of range";
 		return -EINVAL;
 	}
-	else if (n >= 0){
+	else if (n >= 0) {
 		err = setup_one_line(lines, n, init, INIT_ONE, error_out);
-		if(err)
+		if (err)
 			return err;
 	}
 	else {
-		for(i = 0; i < num; i++){
+		for(i = 0; i < num; i++) {
 			err = setup_one_line(lines, i, init, INIT_ALL,
 					     error_out);
-			if(err)
+			if (err)
 				return err;
 		}
 	}
@@ -603,18 +599,18 @@ int line_config(struct line *lines, unsigned int num, char *str,
 	char *new;
 	int n;
 
-	if(*str == '='){
+	if (*str == '=') {
 		*error_out = "Can't configure all devices from mconsole";
 		return -EINVAL;
 	}
 
 	new = kstrdup(str, GFP_KERNEL);
-	if(new == NULL){
+	if (new == NULL) {
 		*error_out = "Failed to allocate memory";
 		return -ENOMEM;
 	}
 	n = line_setup(lines, num, new, error_out);
-	if(n < 0)
+	if (n < 0)
 		return n;
 
 	line = &lines[n];
@@ -629,12 +625,12 @@ int line_get_config(char *name, struct line *lines, unsigned int num, char *str,
 	int dev, n = 0;
 
 	dev = simple_strtoul(name, &end, 0);
-	if((*end != '\0') || (end == name)){
+	if ((*end != '\0') || (end == name)) {
 		*error_out = "line_get_config failed to parse device number";
 		return 0;
 	}
 
-	if((dev < 0) || (dev >= num)){
+	if ((dev < 0) || (dev >= num)) {
 		*error_out = "device number out of range";
 		return 0;
 	}
@@ -642,9 +638,9 @@ int line_get_config(char *name, struct line *lines, unsigned int num, char *str,
 	line = &lines[dev];
 
 	spin_lock(&line->count_lock);
-	if(!line->valid)
+	if (!line->valid)
 		CONFIG_CHUNK(str, size, n, "none", 1);
-	else if(line->tty == NULL)
+	else if (line->tty == NULL)
 		CONFIG_CHUNK(str, size, n, line->init_str, 1);
 	else n = chan_config_string(&line->chan_list, str, size, error_out);
 	spin_unlock(&line->count_lock);
@@ -655,16 +651,16 @@ int line_get_config(char *name, struct line *lines, unsigned int num, char *str,
 int line_id(char **str, int *start_out, int *end_out)
 {
 	char *end;
-        int n;
+	int n;
 
 	n = simple_strtoul(*str, &end, 0);
-	if((*end != '\0') || (end == *str))
-                return -1;
+	if ((*end != '\0') || (end == *str))
+		return -1;
 
-        *str = end;
-        *start_out = n;
-        *end_out = n;
-        return n;
+	*str = end;
+	*start_out = n;
+	*end_out = n;
+	return n;
 }
 
 int line_remove(struct line *lines, unsigned int num, int n, char **error_out)
@@ -674,7 +670,7 @@ int line_remove(struct line *lines, unsigned int num, int n, char **error_out)
 
 	sprintf(config, "%d=none", n);
 	err = line_setup(lines, num, config, error_out);
-	if(err >= 0)
+	if (err >= 0)
 		err = 0;
 	return err;
 }
@@ -700,14 +696,14 @@ struct tty_driver *register_lines(struct line_driver *line_driver,
 	tty_set_operations(driver, ops);
 
 	if (tty_register_driver(driver)) {
-		printk("%s: can't register %s driver\n",
-		       __FUNCTION__,line_driver->name);
+		printk(KERN_ERR "register_lines : can't register %s driver\n",
+		       line_driver->name);
 		put_tty_driver(driver);
 		return NULL;
 	}
 
-	for(i = 0; i < nlines; i++){
-		if(!lines[i].valid)
+	for(i = 0; i < nlines; i++) {
+		if (!lines[i].valid)
 			tty_unregister_device(driver, i);
 	}
 
@@ -724,20 +720,20 @@ void lines_init(struct line *lines, int nlines, struct chan_opts *opts)
 	char *error;
 	int i;
 
-	for(i = 0; i < nlines; i++){
+	for(i = 0; i < nlines; i++) {
 		line = &lines[i];
 		INIT_LIST_HEAD(&line->chan_list);
 
-		if(line->init_str == NULL)
+		if (line->init_str == NULL)
 			continue;
 
 		line->init_str = kstrdup(line->init_str, GFP_KERNEL);
-		if(line->init_str == NULL)
-			printk("lines_init - kstrdup returned NULL\n");
+		if (line->init_str == NULL)
+			printk(KERN_ERR "lines_init - kstrdup returned NULL\n");
 
-		if(parse_chan_pair(line->init_str, line, i, opts, &error)){
-			printk("parse_chan_pair failed for device %d : %s\n",
-			       i, error);
+		if (parse_chan_pair(line->init_str, line, i, opts, &error)) {
+			printk(KERN_ERR "parse_chan_pair failed for "
+			       "device %d : %s\n", i, error);
 			line->valid = 0;
 		}
 	}
@@ -775,14 +771,14 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 	int err;
 	char c;
 
-	if(winch->fd != -1){
+	if (winch->fd != -1) {
 		err = generic_read(winch->fd, &c, NULL);
-		if(err < 0){
-			if(err != -EAGAIN){
-				printk("winch_interrupt : read failed, "
-				       "errno = %d\n", -err);
-				printk("fd %d is losing SIGWINCH support\n",
-				       winch->tty_fd);
+		if (err < 0) {
+			if (err != -EAGAIN) {
+				printk(KERN_ERR "winch_interrupt : "
+				       "read failed, errno = %d\n", -err);
+				printk(KERN_ERR "fd %d is losing SIGWINCH "
+				       "support\n", winch->tty_fd);
 				free_winch(winch, 0);
 				return IRQ_HANDLED;
 			}
@@ -797,7 +793,7 @@ static irqreturn_t winch_interrupt(int irq, void *data)
 		kill_pgrp(tty->pgrp, SIGWINCH, 1);
 	}
  out:
-	if(winch->fd != -1)
+	if (winch->fd != -1)
 		reactivate_fd(winch->fd, WINCH_IRQ);
 	return IRQ_HANDLED;
 }
@@ -809,7 +805,7 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty,
 
 	winch = kmalloc(sizeof(*winch), GFP_KERNEL);
 	if (winch == NULL) {
-		printk("register_winch_irq - kmalloc failed\n");
+		printk(KERN_ERR "register_winch_irq - kmalloc failed\n");
 		goto cleanup;
 	}
 
@@ -823,7 +819,8 @@ void register_winch_irq(int fd, int tty_fd, int pid, struct tty_struct *tty,
 	if (um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt,
 			   IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM,
 			   "winch", winch) < 0) {
-		printk("register_winch_irq - failed to register IRQ\n");
+		printk(KERN_ERR "register_winch_irq - failed to register "
+		       "IRQ\n");
 		goto out_free;
 	}
 
@@ -849,13 +846,13 @@ static void unregister_winch(struct tty_struct *tty)
 
 	spin_lock(&winch_handler_lock);
 
-	list_for_each(ele, &winch_handlers){
+	list_for_each(ele, &winch_handlers) {
 		winch = list_entry(ele, struct winch, list);
-                if(winch->tty == tty){
+		if (winch->tty == tty) {
 			free_winch(winch, 1);
 			break;
-                }
-        }
+		}
+	}
 	spin_unlock(&winch_handler_lock);
 }
 
@@ -866,7 +863,7 @@ static void winch_cleanup(void)
 
 	spin_lock(&winch_handler_lock);
 
-	list_for_each_safe(ele, next, &winch_handlers){
+	list_for_each_safe(ele, next, &winch_handlers) {
 		winch = list_entry(ele, struct winch, list);
 		free_winch(winch, 1);
 	}
@@ -881,13 +878,13 @@ char *add_xterm_umid(char *base)
 	int len;
 
 	umid = get_umid();
-	if(*umid == '\0')
+	if (*umid == '\0')
 		return base;
 
 	len = strlen(base) + strlen(" ()") + strlen(umid) + 1;
 	title = kmalloc(len, GFP_KERNEL);
-	if(title == NULL){
-		printk("Failed to allocate buffer for xterm title\n");
+	if (title == NULL) {
+		printk(KERN_ERR "Failed to allocate buffer for xterm title\n");
 		return base;
 	}
 
diff --git a/arch/um/drivers/mcast.h b/arch/um/drivers/mcast.h
index bc56af9d3e5..6fa282e896b 100644
--- a/arch/um/drivers/mcast.h
+++ b/arch/um/drivers/mcast.h
@@ -1,8 +1,11 @@
 /* 
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
+#ifndef __DRIVERS_MCAST_H
+#define __DRIVERS_MCAST_H
+
 #include "net_user.h"
 
 struct mcast_data {
@@ -18,13 +21,4 @@ extern const struct net_user_info mcast_user_info;
 extern int mcast_user_write(int fd, void *buf, int len, 
 			    struct mcast_data *pri);
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+#endif
diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c
index e6b8e0dd72a..822092f149b 100644
--- a/arch/um/drivers/mcast_kern.c
+++ b/arch/um/drivers/mcast_kern.c
@@ -1,24 +1,20 @@
 /*
  * user-mode-linux networking multicast transport
  * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  *
  * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and 
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
  *
  * Licensed under the GPL.
  */
 
-#include "linux/kernel.h"
 #include "linux/init.h"
-#include "linux/netdevice.h"
-#include "linux/etherdevice.h"
-#include "linux/in.h"
-#include "linux/inet.h"
-#include "net_kern.h"
-#include "net_user.h"
+#include <linux/netdevice.h>
 #include "mcast.h"
+#include "net_kern.h"
 
 struct mcast_init {
 	char *addr;
@@ -39,26 +35,20 @@ static void mcast_init(struct net_device *dev, void *data)
 	dpri->ttl = init->ttl;
 	dpri->dev = dev;
 
-	printk("mcast backend ");
-	printk("multicast address: %s:%u, TTL:%u ",
+	printk("mcast backend multicast address: %s:%u, TTL:%u\n",
 	       dpri->addr, dpri->port, dpri->ttl);
-
-	printk("\n");
 }
 
-static int mcast_read(int fd, struct sk_buff **skb, struct uml_net_private *lp)
+static int mcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
-	if(*skb == NULL) return(-ENOMEM);
-	return(net_recvfrom(fd, skb_mac_header(*skb),
-			    (*skb)->dev->mtu + ETH_HEADER_OTHER));
+	return net_recvfrom(fd, skb_mac_header(skb),
+			    skb->dev->mtu + ETH_HEADER_OTHER);
 }
 
-static int mcast_write(int fd, struct sk_buff **skb,
-			struct uml_net_private *lp)
+static int mcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	return mcast_user_write(fd, (*skb)->data, (*skb)->len, 
-				 (struct mcast_data *) &lp->user);
+	return mcast_user_write(fd, skb->data, skb->len,
+				(struct mcast_data *) &lp->user);
 }
 
 static const struct net_kern_info mcast_kern_info = {
@@ -81,34 +71,34 @@ int mcast_setup(char *str, char **mac_out, void *data)
 
 	remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str,
 			       NULL);
-	if(remain != NULL){
+	if (remain != NULL) {
 		printk(KERN_ERR "mcast_setup - Extra garbage on "
 		       "specification : '%s'\n", remain);
-		return(0);
+		return 0;
 	}
-	
-	if(port_str != NULL){
+
+	if (port_str != NULL) {
 		init->port = simple_strtoul(port_str, &last, 10);
-		if((*last != '\0') || (last == port_str)){
-			printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", 
+		if ((*last != '\0') || (last == port_str)) {
+			printk(KERN_ERR "mcast_setup - Bad port : '%s'\n",
 			       port_str);
-			return(0);
+			return 0;
 		}
 	}
 
-	if(ttl_str != NULL){
+	if (ttl_str != NULL) {
 		init->ttl = simple_strtoul(ttl_str, &last, 10);
-		if((*last != '\0') || (last == ttl_str)){
-			printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n", 
+		if ((*last != '\0') || (last == ttl_str)) {
+			printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n",
 			       ttl_str);
-			return(0);
+			return 0;
 		}
 	}
 
 	printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr,
 	       init->port, init->ttl);
 
-	return(1);
+	return 1;
 }
 
 static struct transport mcast_transport = {
diff --git a/arch/um/drivers/mcast_user.c b/arch/um/drivers/mcast_user.c
index 236a3dfc297..5f647d7a729 100644
--- a/arch/um/drivers/mcast_user.c
+++ b/arch/um/drivers/mcast_user.c
@@ -1,9 +1,10 @@
 /*
  * user-mode-linux networking multicast transport
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
  *
  * based on the existing uml-networking code, which is
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and 
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
  *
@@ -11,28 +12,22 @@
  *
  */
 
-#include <errno.h>
 #include <unistd.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-#include <sys/time.h>
+#include <errno.h>
 #include <netinet/in.h>
-#include "net_user.h"
 #include "mcast.h"
-#include "kern_util.h"
-#include "user.h"
-#include "os.h"
+#include "net_user.h"
 #include "um_malloc.h"
-
-#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER)
+#include "user.h"
 
 static struct sockaddr_in *new_addr(char *addr, unsigned short port)
 {
 	struct sockaddr_in *sin;
 
 	sin = kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL);
-	if(sin == NULL){
-		printk("new_addr: allocation of sockaddr_in failed\n");
+	if (sin == NULL) {
+		printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in "
+		       "failed\n");
 		return NULL;
 	}
 	sin->sin_family = AF_INET;
@@ -71,17 +66,17 @@ static int mcast_open(void *data)
 
 	fd = socket(AF_INET, SOCK_DGRAM, 0);
 
-	if (fd < 0){
+	if (fd < 0) {
 		err = -errno;
-		printk("mcast_open : data socket failed, errno = %d\n", 
-		       errno);
+		printk(UM_KERN_ERR "mcast_open : data socket failed, "
+		       "errno = %d\n", errno);
 		goto out;
 	}
 
 	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
 		err = -errno;
-		printk("mcast_open: SO_REUSEADDR failed, errno = %d\n",
-			errno);
+		printk(UM_KERN_ERR "mcast_open: SO_REUSEADDR failed, "
+		       "errno = %d\n", errno);
 		goto out_close;
 	}
 
@@ -89,45 +84,46 @@ static int mcast_open(void *data)
 	if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl,
 		       sizeof(pri->ttl)) < 0) {
 		err = -errno;
-		printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n",
-			errno);
+		printk(UM_KERN_ERR "mcast_open: IP_MULTICAST_TTL failed, "
+		       "error = %d\n", errno);
 		goto out_close;
 	}
 
 	/* set LOOP, so data does get fed back to local sockets */
 	if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) {
 		err = -errno;
-		printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n",
-			errno);
+		printk(UM_KERN_ERR "mcast_open: IP_MULTICAST_LOOP failed, "
+		       "error = %d\n", errno);
 		goto out_close;
 	}
 
 	/* bind socket to mcast address */
 	if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) {
 		err = -errno;
-		printk("mcast_open : data bind failed, errno = %d\n", errno);
+		printk(UM_KERN_ERR "mcast_open : data bind failed, "
+		       "errno = %d\n", errno);
 		goto out_close;
 	}
 
 	/* subscribe to the multicast group */
 	mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr;
 	mreq.imr_interface.s_addr = 0;
-	if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, 
+	if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP,
 		       &mreq, sizeof(mreq)) < 0) {
 		err = -errno;
-		printk("mcast_open: IP_ADD_MEMBERSHIP failed, error = %d\n",
-			errno);
-		printk("There appears not to be a multicast-capable network "
-		       "interface on the host.\n");
-		printk("eth0 should be configured in order to use the "
-		       "multicast transport.\n");
+		printk(UM_KERN_ERR "mcast_open: IP_ADD_MEMBERSHIP failed, "
+		       "error = %d\n", errno);
+		printk(UM_KERN_ERR "There appears not to be a multicast-"
+		       "capable network interface on the host.\n");
+		printk(UM_KERN_ERR "eth0 should be configured in order to use "
+		       "the multicast transport.\n");
 		goto out_close;
 	}
 
 	return fd;
 
  out_close:
-	os_close_file(fd);
+	close(fd);
  out:
 	return err;
 }
@@ -142,11 +138,11 @@ static void mcast_close(int fd, void *data)
 	mreq.imr_interface.s_addr = 0;
 	if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP,
 		       &mreq, sizeof(mreq)) < 0) {
-		printk("mcast_open: IP_DROP_MEMBERSHIP failed, error = %d\n",
-			errno);
+		printk(UM_KERN_ERR "mcast_open: IP_DROP_MEMBERSHIP failed, "
+		       "error = %d\n", errno);
 	}
 
-	os_close_file(fd);
+	close(fd);
 }
 
 int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri)
@@ -156,18 +152,13 @@ int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri)
 	return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
 }
 
-static int mcast_set_mtu(int mtu, void *data)
-{
-	return mtu;
-}
-
 const struct net_user_info mcast_user_info = {
 	.init		= mcast_user_init,
 	.open		= mcast_open,
 	.close	 	= mcast_close,
 	.remove	 	= mcast_remove,
-	.set_mtu	= mcast_set_mtu,
 	.add_address	= NULL,
 	.delete_address = NULL,
-	.max_packet	= MAX_PACKET - ETH_HEADER_OTHER
+	.mtu		= ETH_MAX_PACKET,
+	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
 };
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index d8709050740..0f3c7d14a6e 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -1,44 +1,35 @@
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
- * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/slab.h"
-#include "linux/init.h"
-#include "linux/notifier.h"
-#include "linux/reboot.h"
-#include "linux/utsname.h"
+#include "linux/console.h"
 #include "linux/ctype.h"
 #include "linux/interrupt.h"
-#include "linux/sysrq.h"
-#include "linux/workqueue.h"
+#include "linux/list.h"
+#include "linux/mm.h"
 #include "linux/module.h"
-#include "linux/file.h"
-#include "linux/fs.h"
-#include "linux/namei.h"
+#include "linux/notifier.h"
+#include "linux/reboot.h"
 #include "linux/proc_fs.h"
+#include "linux/slab.h"
 #include "linux/syscalls.h"
-#include "linux/list.h"
-#include "linux/mm.h"
-#include "linux/console.h"
-#include "asm/irq.h"
+#include "linux/utsname.h"
+#include "linux/workqueue.h"
 #include "asm/uaccess.h"
+#include "init.h"
+#include "irq_kern.h"
+#include "irq_user.h"
 #include "kern_util.h"
-#include "kern.h"
 #include "mconsole.h"
 #include "mconsole_kern.h"
-#include "irq_user.h"
-#include "init.h"
 #include "os.h"
-#include "irq_kern.h"
-#include "choose-mode.h"
 
 static int do_unlink_socket(struct notifier_block *notifier,
 			    unsigned long what, void *data)
 {
-	return(mconsole_unlink_socket());
+	return mconsole_unlink_socket();
 }
 
 
@@ -59,10 +50,9 @@ static void mc_work_proc(struct work_struct *unused)
 	struct mconsole_entry *req;
 	unsigned long flags;
 
-	while(!list_empty(&mc_requests)){
+	while (!list_empty(&mc_requests)) {
 		local_irq_save(flags);
-		req = list_entry(mc_requests.next, struct mconsole_entry,
-				 list);
+		req = list_entry(mc_requests.next, struct mconsole_entry, list);
 		list_del(&req->list);
 		local_irq_restore(flags);
 		req->request.cmd->handler(&req->request);
@@ -80,12 +70,12 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id)
 	static struct mc_request req;	/* that's OK */
 
 	fd = (long) dev_id;
-	while (mconsole_get_request(fd, &req)){
-		if(req.cmd->context == MCONSOLE_INTR)
+	while (mconsole_get_request(fd, &req)) {
+		if (req.cmd->context == MCONSOLE_INTR)
 			(*req.cmd->handler)(&req);
 		else {
 			new = kmalloc(sizeof(*new), GFP_NOWAIT);
-			if(new == NULL)
+			if (new == NULL)
 				mconsole_reply(&req, "Out of memory", 1, 0);
 			else {
 				new->request = req;
@@ -94,10 +84,10 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id)
 			}
 		}
 	}
-	if(!list_empty(&mc_requests))
+	if (!list_empty(&mc_requests))
 		schedule_work(&mconsole_work);
 	reactivate_fd(fd, MCONSOLE_IRQ);
-	return(IRQ_HANDLED);
+	return IRQ_HANDLED;
 }
 
 void mconsole_version(struct mc_request *req)
@@ -105,8 +95,8 @@ void mconsole_version(struct mc_request *req)
 	char version[256];
 
 	sprintf(version, "%s %s %s %s %s", utsname()->sysname,
-		utsname()->nodename, utsname()->release,
-		utsname()->version, utsname()->machine);
+		utsname()->nodename, utsname()->release, utsname()->version,
+		utsname()->machine);
 	mconsole_reply(req, version, 0, 0);
 }
 
@@ -118,7 +108,7 @@ void mconsole_log(struct mc_request *req)
 	ptr += strlen("log ");
 
 	len = req->len - (ptr - req->request.data);
-	printk("%.*s", len, ptr);
+	printk(KERN_WARNING "%.*s", len, ptr);
 	mconsole_reply(req, "", 0, 0);
 }
 
@@ -137,17 +127,17 @@ void mconsole_proc(struct mc_request *req)
 	char *ptr = req->request.data, *buf;
 
 	ptr += strlen("proc");
-	while(isspace(*ptr)) ptr++;
+	while (isspace(*ptr)) ptr++;
 
 	proc = get_fs_type("proc");
-	if(proc == NULL){
+	if (proc == NULL) {
 		mconsole_reply(req, "procfs not registered", 1, 0);
 		goto out;
 	}
 
 	super = (*proc->get_sb)(proc, 0, NULL, NULL);
 	put_filesystem(proc);
-	if(super == NULL){
+	if (super == NULL) {
 		mconsole_reply(req, "Failed to get procfs superblock", 1, 0);
 		goto out;
 	}
@@ -162,29 +152,29 @@ void mconsole_proc(struct mc_request *req)
 	 * if commenting out these two calls + the below read cycle. To
 	 * make UML crash again, it was enough to readd either one.*/
 	err = link_path_walk(ptr, &nd);
-	if(err){
+	if (err) {
 		mconsole_reply(req, "Failed to look up file", 1, 0);
 		goto out_kill;
 	}
 
 	file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
-	if(IS_ERR(file)){
+	if (IS_ERR(file)) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
 		goto out_kill;
 	}
 	/*END*/
 
 	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if(buf == NULL){
+	if (buf == NULL) {
 		mconsole_reply(req, "Failed to allocate buffer", 1, 0);
 		goto out_fput;
 	}
 
-	if((file->f_op != NULL) && (file->f_op->read != NULL)){
+	if ((file->f_op != NULL) && (file->f_op->read != NULL)) {
 		do {
 			n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1,
 						&file->f_pos);
-			if(n >= 0){
+			if (n >= 0) {
 				buf[n] = '\0';
 				mconsole_reply(req, buf, 0, (n > 0));
 			}
@@ -193,7 +183,7 @@ void mconsole_proc(struct mc_request *req)
 					       1, 0);
 				goto out_free;
 			}
-		} while(n > 0);
+		} while (n > 0);
 	}
 	else mconsole_reply(req, "", 0, 0);
 
@@ -217,18 +207,19 @@ void mconsole_proc(struct mc_request *req)
 	char *ptr = req->request.data;
 
 	ptr += strlen("proc");
-	while(isspace(*ptr)) ptr++;
+	while (isspace(*ptr))
+		ptr++;
 	snprintf(path, sizeof(path), "/proc/%s", ptr);
 
 	fd = sys_open(path, 0, 0);
 	if (fd < 0) {
 		mconsole_reply(req, "Failed to open file", 1, 0);
-		printk("open %s: %d\n",path,fd);
+		printk(KERN_ERR "open %s: %d\n",path,fd);
 		goto out;
 	}
 
 	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if(buf == NULL){
+	if (buf == NULL) {
 		mconsole_reply(req, "Failed to allocate buffer", 1, 0);
 		goto out_close;
 	}
@@ -239,7 +230,7 @@ void mconsole_proc(struct mc_request *req)
 			mconsole_reply(req, "Read of file failed", 1, 0);
 			goto out_free;
 		}
-		/*Begin the file content on his own line.*/
+		/* Begin the file content on his own line. */
 		if (first_chunk) {
 			mconsole_reply(req, "\n", 0, 1);
 			first_chunk = 0;
@@ -351,12 +342,12 @@ static struct mc_device *mconsole_find_dev(char *name)
 	struct list_head *ele;
 	struct mc_device *dev;
 
-	list_for_each(ele, &mconsole_devices){
+	list_for_each(ele, &mconsole_devices) {
 		dev = list_entry(ele, struct mc_device, list);
-		if(!strncmp(name, dev->name, strlen(dev->name)))
-			return(dev);
+		if (!strncmp(name, dev->name, strlen(dev->name)))
+			return dev;
 	}
-	return(NULL);
+	return NULL;
 }
 
 #define UNPLUGGED_PER_PAGE \
@@ -378,15 +369,15 @@ static int mem_config(char *str, char **error_out)
 	int err = -EINVAL, i, add;
 	char *ret;
 
-	if(str[0] != '='){
+	if (str[0] != '=') {
 		*error_out = "Expected '=' after 'mem'";
 		goto out;
 	}
 
 	str++;
-	if(str[0] == '-')
+	if (str[0] == '-')
 		add = 0;
-	else if(str[0] == '+'){
+	else if (str[0] == '+') {
 		add = 1;
 	}
 	else {
@@ -396,7 +387,7 @@ static int mem_config(char *str, char **error_out)
 
 	str++;
 	diff = memparse(str, &ret);
-	if(*ret != '\0'){
+	if (*ret != '\0') {
 		*error_out = "Failed to parse memory increment";
 		goto out;
 	}
@@ -404,17 +395,17 @@ static int mem_config(char *str, char **error_out)
 	diff /= PAGE_SIZE;
 
 	down(&plug_mem_mutex);
-	for(i = 0; i < diff; i++){
+	for (i = 0; i < diff; i++) {
 		struct unplugged_pages *unplugged;
 		void *addr;
 
-		if(add){
-			if(list_empty(&unplugged_pages))
+		if (add) {
+			if (list_empty(&unplugged_pages))
 				break;
 
 			unplugged = list_entry(unplugged_pages.next,
 					       struct unplugged_pages, list);
-			if(unplug_index > 0)
+			if (unplug_index > 0)
 				addr = unplugged->pages[--unplug_index];
 			else {
 				list_del(&unplugged->list);
@@ -429,11 +420,11 @@ static int mem_config(char *str, char **error_out)
 			struct page *page;
 
 			page = alloc_page(GFP_ATOMIC);
-			if(page == NULL)
+			if (page == NULL)
 				break;
 
 			unplugged = page_address(page);
-			if(unplug_index == UNPLUGGED_PER_PAGE){
+			if (unplug_index == UNPLUGGED_PER_PAGE) {
 				list_add(&unplugged->list, &unplugged_pages);
 				unplug_index = 0;
 			}
@@ -445,9 +436,9 @@ static int mem_config(char *str, char **error_out)
 						       struct unplugged_pages,
 						       list);
 				err = os_drop_memory(addr, PAGE_SIZE);
-				if(err){
-					printk("Failed to release memory - "
-					       "errno = %d\n", err);
+				if (err) {
+					printk(KERN_ERR "Failed to release "
+					       "memory - errno = %d\n", err);
 					*error_out = "Failed to release memory";
 					goto out_unlock;
 				}
@@ -501,10 +492,10 @@ static struct mc_device mem_mc = {
 
 static int __init mem_mc_init(void)
 {
-	if(can_drop_memory())
+	if (can_drop_memory())
 		mconsole_register_dev(&mem_mc);
-	else printk("Can't release memory to the host - memory hotplug won't "
-		    "be supported\n");
+	else printk(KERN_ERR "Can't release memory to the host - memory "
+		    "hotplug won't be supported\n");
 	return 0;
 }
 
@@ -519,7 +510,7 @@ static void mconsole_get_config(int (*get_config)(char *, char *, int,
 	char default_buf[CONFIG_BUF_SIZE], *error, *buf;
 	int n, size;
 
-	if(get_config == NULL){
+	if (get_config == NULL) {
 		mconsole_reply(req, "No get_config routine defined", 1, 0);
 		return;
 	}
@@ -528,30 +519,30 @@ static void mconsole_get_config(int (*get_config)(char *, char *, int,
 	size = ARRAY_SIZE(default_buf);
 	buf = default_buf;
 
-	while(1){
+	while (1) {
 		n = (*get_config)(name, buf, size, &error);
-		if(error != NULL){
+		if (error != NULL) {
 			mconsole_reply(req, error, 1, 0);
 			goto out;
 		}
 
-		if(n <= size){
+		if (n <= size) {
 			mconsole_reply(req, buf, 0, 0);
 			goto out;
 		}
 
-		if(buf != default_buf)
+		if (buf != default_buf)
 			kfree(buf);
 
 		size = n;
 		buf = kmalloc(size, GFP_KERNEL);
-		if(buf == NULL){
+		if (buf == NULL) {
 			mconsole_reply(req, "Failed to allocate buffer", 1, 0);
 			return;
 		}
 	}
  out:
-	if(buf != default_buf)
+	if (buf != default_buf)
 		kfree(buf);
 }
 
@@ -562,19 +553,20 @@ void mconsole_config(struct mc_request *req)
 	int err;
 
 	ptr += strlen("config");
-	while(isspace(*ptr)) ptr++;
+	while (isspace(*ptr))
+		ptr++;
 	dev = mconsole_find_dev(ptr);
-	if(dev == NULL){
+	if (dev == NULL) {
 		mconsole_reply(req, "Bad configuration option", 1, 0);
 		return;
 	}
 
 	name = &ptr[strlen(dev->name)];
 	ptr = name;
-	while((*ptr != '=') && (*ptr != '\0'))
+	while ((*ptr != '=') && (*ptr != '\0'))
 		ptr++;
 
-	if(*ptr == '='){
+	if (*ptr == '=') {
 		err = (*dev->config)(name, &error_string);
 		mconsole_reply(req, error_string, err, 0);
 	}
@@ -589,9 +581,9 @@ void mconsole_remove(struct mc_request *req)
 	int err, start, end, n;
 
 	ptr += strlen("remove");
-	while(isspace(*ptr)) ptr++;
+	while (isspace(*ptr)) ptr++;
 	dev = mconsole_find_dev(ptr);
-	if(dev == NULL){
+	if (dev == NULL) {
 		mconsole_reply(req, "Bad remove option", 1, 0);
 		return;
 	}
@@ -600,11 +592,11 @@ void mconsole_remove(struct mc_request *req)
 
 	err = 1;
 	n = (*dev->id)(&ptr, &start, &end);
-	if(n < 0){
+	if (n < 0) {
 		err_msg = "Couldn't parse device number";
 		goto out;
 	}
-	else if((n < start) || (n > end)){
+	else if ((n < start) || (n > end)) {
 		sprintf(error, "Invalid device number - must be between "
 			"%d and %d", start, end);
 		err_msg = error;
@@ -613,16 +605,16 @@ void mconsole_remove(struct mc_request *req)
 
 	err_msg = NULL;
 	err = (*dev->remove)(n, &err_msg);
-	switch(err){
+	switch(err) {
 	case 0:
 		err_msg = "";
 		break;
 	case -ENODEV:
-		if(err_msg == NULL)
+		if (err_msg == NULL)
 			err_msg = "Device doesn't exist";
 		break;
 	case -EBUSY:
-		if(err_msg == NULL)
+		if (err_msg == NULL)
 			err_msg = "Device is currently open";
 		break;
 	default:
@@ -640,35 +632,28 @@ struct mconsole_output {
 static DEFINE_SPINLOCK(client_lock);
 static LIST_HEAD(clients);
 static char console_buf[MCONSOLE_MAX_DATA];
-static int console_index = 0;
 
 static void console_write(struct console *console, const char *string,
-			  unsigned len)
+			  unsigned int len)
 {
 	struct list_head *ele;
 	int n;
 
-	if(list_empty(&clients))
+	if (list_empty(&clients))
 		return;
 
-	while(1){
-		n = min((size_t) len, ARRAY_SIZE(console_buf) - console_index);
-		strncpy(&console_buf[console_index], string, n);
-		console_index += n;
+	while (len > 0) {
+		n = min((size_t) len, ARRAY_SIZE(console_buf));
+		strncpy(console_buf, string, n);
 		string += n;
 		len -= n;
-		if(len == 0)
-			return;
 
-		list_for_each(ele, &clients){
+		list_for_each(ele, &clients) {
 			struct mconsole_output *entry;
 
 			entry = list_entry(ele, struct mconsole_output, list);
-			mconsole_reply_len(entry->req, console_buf,
-					   console_index, 0, 1);
+			mconsole_reply_len(entry->req, console_buf, n, 0, 1);
 		}
-
-		console_index = 0;
 	}
 }
 
@@ -698,8 +683,7 @@ static void with_console(struct mc_request *req, void (*proc)(void *),
 
 	(*proc)(arg);
 
-	mconsole_reply_len(req, console_buf, console_index, 0, 0);
-	console_index = 0;
+	mconsole_reply_len(req, "", 0, 0, 0);
 
 	spin_lock_irqsave(&client_lock, flags);
 	list_del(&entry.list);
@@ -707,6 +691,9 @@ static void with_console(struct mc_request *req, void (*proc)(void *),
 }
 
 #ifdef CONFIG_MAGIC_SYSRQ
+
+#include <linux/sysrq.h>
+
 static void sysrq_proc(void *arg)
 {
 	char *op = arg;
@@ -718,12 +705,13 @@ void mconsole_sysrq(struct mc_request *req)
 	char *ptr = req->request.data;
 
 	ptr += strlen("sysrq");
-	while(isspace(*ptr)) ptr++;
+	while (isspace(*ptr)) ptr++;
 
-	/* With 'b', the system will shut down without a chance to reply,
+	/*
+	 * With 'b', the system will shut down without a chance to reply,
 	 * so in this case, we reply first.
 	 */
-	if(*ptr == 'b')
+	if (*ptr == 'b')
 		mconsole_reply(req, "", 0, 0);
 
 	with_console(req, sysrq_proc, ptr);
@@ -735,8 +723,6 @@ void mconsole_sysrq(struct mc_request *req)
 }
 #endif
 
-#ifdef CONFIG_MODE_SKAS
-
 static void stack_proc(void *arg)
 {
 	struct task_struct *from = current, *to = arg;
@@ -745,29 +731,34 @@ static void stack_proc(void *arg)
 	switch_to(from, to, from);
 }
 
-/* Mconsole stack trace
+/*
+ * Mconsole stack trace
  *  Added by Allan Graves, Jeff Dike
  *  Dumps a stacks registers to the linux console.
  *  Usage stack <pid>.
  */
-static void do_stack_trace(struct mc_request *req)
+void mconsole_stack(struct mc_request *req)
 {
 	char *ptr = req->request.data;
 	int pid_requested= -1;
 	struct task_struct *from = NULL;
 	struct task_struct *to = NULL;
 
-	/* Would be nice:
+	/*
+	 * Would be nice:
 	 * 1) Send showregs output to mconsole.
 	 * 2) Add a way to stack dump all pids.
 	 */
 
 	ptr += strlen("stack");
-	while(isspace(*ptr)) ptr++;
+	while (isspace(*ptr))
+		ptr++;
 
-	/* Should really check for multiple pids or reject bad args here */
+	/*
+	 * Should really check for multiple pids or reject bad args here
+	 */
 	/* What do the arguments in mconsole_reply mean? */
-	if(sscanf(ptr, "%d", &pid_requested) == 0){
+	if (sscanf(ptr, "%d", &pid_requested) == 0) {
 		mconsole_reply(req, "Please specify a pid", 1, 0);
 		return;
 	}
@@ -775,25 +766,15 @@ static void do_stack_trace(struct mc_request *req)
 	from = current;
 
 	to = find_task_by_pid(pid_requested);
-	if((to == NULL) || (pid_requested == 0)) {
+	if ((to == NULL) || (pid_requested == 0)) {
 		mconsole_reply(req, "Couldn't find that pid", 1, 0);
 		return;
 	}
 	with_console(req, stack_proc, to);
 }
-#endif /* CONFIG_MODE_SKAS */
 
-void mconsole_stack(struct mc_request *req)
-{
-	/* This command doesn't work in TT mode, so let's check and then
-	 * get out of here
-	 */
-	CHOOSE_MODE(mconsole_reply(req, "Sorry, this doesn't work in TT mode",
-				   1, 0),
-		    do_stack_trace(req));
-}
-
-/* Changed by mconsole_setup, which is __setup, and called before SMP is
+/*
+ * Changed by mconsole_setup, which is __setup, and called before SMP is
  * active.
  */
 static char *notify_socket = NULL;
@@ -805,13 +786,14 @@ static int __init mconsole_init(void)
 	int err;
 	char file[256];
 
-	if(umid_file_name("mconsole", file, sizeof(file))) return(-1);
+	if (umid_file_name("mconsole", file, sizeof(file)))
+		return -1;
 	snprintf(mconsole_socket_name, sizeof(file), "%s", file);
 
 	sock = os_create_unix_socket(file, sizeof(file), 1);
-	if (sock < 0){
-		printk("Failed to initialize management console\n");
-		return(1);
+	if (sock < 0) {
+		printk(KERN_ERR "Failed to initialize management console\n");
+		return 1;
 	}
 
 	register_reboot_notifier(&reboot_notifier);
@@ -819,14 +801,14 @@ static int __init mconsole_init(void)
 	err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt,
 			     IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM,
 			     "mconsole", (void *)sock);
-	if (err){
-		printk("Failed to get IRQ for management console\n");
-		return(1);
+	if (err) {
+		printk(KERN_ERR "Failed to get IRQ for management console\n");
+		return 1;
 	}
 
-	if(notify_socket != NULL){
+	if (notify_socket != NULL) {
 		notify_socket = kstrdup(notify_socket, GFP_KERNEL);
-		if(notify_socket != NULL)
+		if (notify_socket != NULL)
 			mconsole_notify(notify_socket, MCONSOLE_SOCKET,
 					mconsole_socket_name,
 					strlen(mconsole_socket_name) + 1);
@@ -834,9 +816,9 @@ static int __init mconsole_init(void)
 			    "string\n");
 	}
 
-	printk("mconsole (version %d) initialized on %s\n",
+	printk(KERN_INFO "mconsole (version %d) initialized on %s\n",
 	       MCONSOLE_VERSION, mconsole_socket_name);
-	return(0);
+	return 0;
 }
 
 __initcall(mconsole_init);
@@ -847,10 +829,10 @@ static int write_proc_mconsole(struct file *file, const char __user *buffer,
 	char *buf;
 
 	buf = kmalloc(count + 1, GFP_KERNEL);
-	if(buf == NULL)
-		return(-ENOMEM);
+	if (buf == NULL)
+		return -ENOMEM;
 
-	if(copy_from_user(buf, buffer, count)){
+	if (copy_from_user(buf, buffer, count)) {
 		count = -EFAULT;
 		goto out;
 	}
@@ -860,24 +842,26 @@ static int write_proc_mconsole(struct file *file, const char __user *buffer,
 	mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count);
  out:
 	kfree(buf);
-	return(count);
+	return count;
 }
 
 static int create_proc_mconsole(void)
 {
 	struct proc_dir_entry *ent;
 
-	if(notify_socket == NULL) return(0);
+	if (notify_socket == NULL)
+		return 0;
 
 	ent = create_proc_entry("mconsole", S_IFREG | 0200, NULL);
-	if(ent == NULL){
-		printk(KERN_INFO "create_proc_mconsole : create_proc_entry failed\n");
-		return(0);
+	if (ent == NULL) {
+		printk(KERN_INFO "create_proc_mconsole : create_proc_entry "
+		       "failed\n");
+		return 0;
 	}
 
 	ent->read_proc = NULL;
 	ent->write_proc = write_proc_mconsole;
-	return(0);
+	return 0;
 }
 
 static DEFINE_SPINLOCK(notify_spinlock);
@@ -894,19 +878,19 @@ void unlock_notify(void)
 
 __initcall(create_proc_mconsole);
 
-#define NOTIFY "=notify:"
+#define NOTIFY "notify:"
 
 static int mconsole_setup(char *str)
 {
-	if(!strncmp(str, NOTIFY, strlen(NOTIFY))){
+	if (!strncmp(str, NOTIFY, strlen(NOTIFY))) {
 		str += strlen(NOTIFY);
 		notify_socket = str;
 	}
 	else printk(KERN_ERR "mconsole_setup : Unknown option - '%s'\n", str);
-	return(1);
+	return 1;
 }
 
-__setup("mconsole", mconsole_setup);
+__setup("mconsole=", mconsole_setup);
 
 __uml_help(mconsole_setup,
 "mconsole=notify:<socket>\n"
@@ -921,11 +905,12 @@ static int notify_panic(struct notifier_block *self, unsigned long unused1,
 {
 	char *message = ptr;
 
-	if(notify_socket == NULL) return(0);
+	if (notify_socket == NULL)
+		return 0;
 
 	mconsole_notify(notify_socket, MCONSOLE_PANIC, message,
 			strlen(message) + 1);
-	return(0);
+	return 0;
 }
 
 static struct notifier_block panic_exit_notifier = {
@@ -938,14 +923,14 @@ static int add_notifier(void)
 {
 	atomic_notifier_chain_register(&panic_notifier_list,
 			&panic_exit_notifier);
-	return(0);
+	return 0;
 }
 
 __initcall(add_notifier);
 
 char *mconsole_notify_socket(void)
 {
-	return(notify_socket);
+	return notify_socket;
 }
 
 EXPORT_SYMBOL(mconsole_notify_socket);
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index f31e71546e5..430c024a19b 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -1,25 +1,22 @@
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
- * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stdio.h>
-#include <stdlib.h>
 #include <errno.h>
-#include <signal.h>
+#include <string.h>
+#include <unistd.h>
 #include <sys/socket.h>
-#include <sys/types.h>
 #include <sys/uio.h>
 #include <sys/un.h>
-#include <unistd.h>
-#include "user.h"
-#include "sysdep/ptrace.h"
+#include "kern_constants.h"
 #include "mconsole.h"
-#include "os.h"
+#include "user.h"
 
 static struct mconsole_command commands[] = {
-	/* With uts namespaces, uts information becomes process-specific, so
+	/*
+	 * With uts namespaces, uts information becomes process-specific, so
 	 * we need a process context.  If we try handling this in interrupt
 	 * context, we may hit an exiting process without a valid uts
 	 * namespace.
@@ -36,7 +33,7 @@ static struct mconsole_command commands[] = {
 	{ "go", mconsole_go, MCONSOLE_INTR },
 	{ "log", mconsole_log, MCONSOLE_INTR },
 	{ "proc", mconsole_proc, MCONSOLE_PROC },
-        { "stack", mconsole_stack, MCONSOLE_INTR },
+	{ "stack", mconsole_stack, MCONSOLE_INTR },
 };
 
 /* Initialized in mconsole_init, which is an initcall */
@@ -44,21 +41,21 @@ char mconsole_socket_name[256];
 
 int mconsole_reply_v0(struct mc_request *req, char *reply)
 {
-        struct iovec iov;
-        struct msghdr msg;
+	struct iovec iov;
+	struct msghdr msg;
 
-        iov.iov_base = reply;
-        iov.iov_len = strlen(reply);
+	iov.iov_base = reply;
+	iov.iov_len = strlen(reply);
 
-        msg.msg_name = &(req->origin);
-        msg.msg_namelen = req->originlen;
-        msg.msg_iov = &iov;
-        msg.msg_iovlen = 1;
-        msg.msg_control = NULL;
-        msg.msg_controllen = 0;
-        msg.msg_flags = 0;
+	msg.msg_name = &(req->origin);
+	msg.msg_namelen = req->originlen;
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags = 0;
 
-        return sendmsg(req->originating_fd, &msg, 0);
+	return sendmsg(req->originating_fd, &msg, 0);
 }
 
 static struct mconsole_command *mconsole_parse(struct mc_request *req)
@@ -66,10 +63,10 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req)
 	struct mconsole_command *cmd;
 	int i;
 
-	for(i = 0; i < ARRAY_SIZE(commands); i++){
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
 		cmd = &commands[i];
-		if(!strncmp(req->request.data, cmd->command, 
-			    strlen(cmd->command))){
+		if (!strncmp(req->request.data, cmd->command,
+			    strlen(cmd->command))) {
 			return cmd;
 		}
 	}
@@ -94,9 +91,9 @@ int mconsole_get_request(int fd, struct mc_request *req)
 
 	req->originating_fd = fd;
 
-	if(req->request.magic != MCONSOLE_MAGIC){
+	if (req->request.magic != MCONSOLE_MAGIC) {
 		/* Unversioned request */
-		len = MIN(sizeof(req->request.data) - 1, 
+		len = MIN(sizeof(req->request.data) - 1,
 			  strlen((char *) &req->request));
 		memmove(req->request.data, &req->request, len);
 		req->request.data[len] = '\0';
@@ -107,32 +104,33 @@ int mconsole_get_request(int fd, struct mc_request *req)
 
 		mconsole_reply_v0(req, "ERR Version 0 mconsole clients are "
 				  "not supported by this driver");
-		return(0);
+		return 0;
 	}
 
-	if(req->request.len >= MCONSOLE_MAX_DATA){
+	if (req->request.len >= MCONSOLE_MAX_DATA) {
 		mconsole_reply(req, "Request too large", 1, 0);
-		return(0);
+		return 0;
 	}
-	if(req->request.version != MCONSOLE_VERSION){
-		mconsole_reply(req, "This driver only supports version " 
-                               STRING(MCONSOLE_VERSION) " clients", 1, 0);
+	if (req->request.version != MCONSOLE_VERSION) {
+		mconsole_reply(req, "This driver only supports version "
+			       STRING(MCONSOLE_VERSION) " clients", 1, 0);
 	}
-	
+
 	req->request.data[req->request.len] = '\0';
 	req->cmd = mconsole_parse(req);
-	if(req->cmd == NULL){
+	if (req->cmd == NULL) {
 		mconsole_reply(req, "Unknown command", 1, 0);
-		return(0);
+		return 0;
 	}
 
-	return(1);
+	return 1;
 }
 
 int mconsole_reply_len(struct mc_request *req, const char *str, int total,
 		       int err, int more)
 {
-	/* XXX This is a stack consumption problem.  It'd be nice to
+	/*
+	 * XXX This is a stack consumption problem.  It'd be nice to
 	 * make it global and serialize access to it, but there are a
 	 * ton of callers to this function.
 	 */
@@ -147,7 +145,7 @@ int mconsole_reply_len(struct mc_request *req, const char *str, int total,
 
 		len = MIN(total, MCONSOLE_MAX_DATA - 1);
 
-		if(len == total) reply.more = more;
+		if (len == total) reply.more = more;
 		else reply.more = 1;
 
 		memcpy(reply.data, str, len);
@@ -161,9 +159,10 @@ int mconsole_reply_len(struct mc_request *req, const char *str, int total,
 		n = sendto(req->originating_fd, &reply, len, 0,
 			   (struct sockaddr *) req->origin, req->originlen);
 
-		if(n < 0) return(-errno);
-	} while(total > 0);
-	return(0);
+		if (n < 0)
+			return -errno;
+	} while (total > 0);
+	return 0;
 }
 
 int mconsole_reply(struct mc_request *req, const char *str, int err, int more)
@@ -187,18 +186,18 @@ int mconsole_notify(char *sock_name, int type, const void *data, int len)
 	int n, err = 0;
 
 	lock_notify();
-	if(notify_sock < 0){
+	if (notify_sock < 0) {
 		notify_sock = socket(PF_UNIX, SOCK_DGRAM, 0);
-		if(notify_sock < 0){
+		if (notify_sock < 0) {
 			err = -errno;
-			printk("mconsole_notify - socket failed, errno = %d\n",
-			       err);
+			printk(UM_KERN_ERR "mconsole_notify - socket failed, "
+			       "errno = %d\n", errno);
 		}
 	}
 	unlock_notify();
-	
-	if(err)
-		return(err);
+
+	if (err)
+		return err;
 
 	target.sun_family = AF_UNIX;
 	strcpy(target.sun_path, sock_name);
@@ -212,22 +211,12 @@ int mconsole_notify(char *sock_name, int type, const void *data, int len)
 
 	err = 0;
 	len = sizeof(packet) + packet.len - sizeof(packet.data);
-	n = sendto(notify_sock, &packet, len, 0, (struct sockaddr *) &target, 
+	n = sendto(notify_sock, &packet, len, 0, (struct sockaddr *) &target,
 		   sizeof(target));
-	if(n < 0){
+	if (n < 0) {
 		err = -errno;
-		printk("mconsole_notify - sendto failed, errno = %d\n", errno);
+		printk(UM_KERN_ERR "mconsole_notify - sendto failed, "
+		       "errno = %d\n", errno);
 	}
-	return(err);
+	return err;
 }
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
index 867666a0233..67b2f55a602 100644
--- a/arch/um/drivers/mmapper_kern.c
+++ b/arch/um/drivers/mmapper_kern.c
@@ -9,27 +9,29 @@
  *
  */
 
-#include <linux/init.h> 
-#include <linux/module.h>
-#include <linux/mm.h> 
+#include <linux/stddef.h>
+#include <linux/types.h>
 #include <linux/fs.h>
+#include <linux/init.h>
 #include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mm.h>
 #include <asm/uaccess.h>
 #include "mem_user.h"
- 
+
 /* These are set in mmapper_init, which is called at boot time */
 static unsigned long mmapper_size;
-static unsigned long p_buf = 0;
-static char *v_buf = NULL;
+static unsigned long p_buf;
+static char *v_buf;
 
-static ssize_t
-mmapper_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t mmapper_read(struct file *file, char __user *buf, size_t count,
+			    loff_t *ppos)
 {
 	return simple_read_from_buffer(buf, count, ppos, v_buf, mmapper_size);
 }
 
-static ssize_t
-mmapper_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+static ssize_t mmapper_write(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
 {
 	if (*ppos > mmapper_size)
 		return -EINVAL;
@@ -39,48 +41,46 @@ mmapper_write(struct file *file, const char __user *buf, size_t count, loff_t *p
 
 	if (copy_from_user(&v_buf[*ppos], buf, count))
 		return -EFAULT;
-	
+
 	return count;
 }
 
-static int 
-mmapper_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-	 unsigned long arg)
+static int mmapper_ioctl(struct inode *inode, struct file *file,
+			 unsigned int cmd, unsigned long arg)
 {
-	return(-ENOIOCTLCMD);
+	return -ENOIOCTLCMD;
 }
 
-static int 
-mmapper_mmap(struct file *file, struct vm_area_struct * vma)
+static int mmapper_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int ret = -EINVAL;
 	int size;
 
 	if (vma->vm_pgoff != 0)
 		goto out;
-	
+
 	size = vma->vm_end - vma->vm_start;
-	if(size > mmapper_size) return(-EFAULT);
+	if (size > mmapper_size)
+		return -EFAULT;
 
-	/* XXX A comment above remap_pfn_range says it should only be
+	/*
+	 * XXX A comment above remap_pfn_range says it should only be
 	 * called when the mm semaphore is held
 	 */
 	if (remap_pfn_range(vma, vma->vm_start, p_buf >> PAGE_SHIFT, size,
-			     vma->vm_page_prot))
+			    vma->vm_page_prot))
 		goto out;
 	ret = 0;
 out:
 	return ret;
 }
 
-static int
-mmapper_open(struct inode *inode, struct file *file)
+static int mmapper_open(struct inode *inode, struct file *file)
 {
 	return 0;
 }
 
-static int 
-mmapper_release(struct inode *inode, struct file *file)
+static int mmapper_release(struct inode *inode, struct file *file)
 {
 	return 0;
 }
@@ -95,7 +95,9 @@ static const struct file_operations mmapper_fops = {
 	.release	= mmapper_release,
 };
 
-/* No locking needed - only used (and modified) by below initcall and exitcall. */
+/*
+ * No locking needed - only used (and modified) by below initcall and exitcall.
+ */
 static struct miscdevice mmapper_dev = {
 	.minor		= MISC_DYNAMIC_MINOR,
 	.name		= "mmapper",
@@ -109,13 +111,13 @@ static int __init mmapper_init(void)
 	printk(KERN_INFO "Mapper v0.1\n");
 
 	v_buf = (char *) find_iomem("mmapper", &mmapper_size);
-	if(mmapper_size == 0){
+	if (mmapper_size == 0) {
 		printk(KERN_ERR "mmapper_init - find_iomem failed\n");
 		goto out;
 	}
 
 	err = misc_register(&mmapper_dev);
-	if(err){
+	if (err) {
 		printk(KERN_ERR "mmapper - misc_register failed, err = %d\n",
 		       err);
 		goto out;
@@ -136,9 +138,3 @@ module_exit(mmapper_exit);
 
 MODULE_AUTHOR("Greg Lonnon <glonnon@ridgerun.com>");
 MODULE_DESCRIPTION("DSPLinux simulator mmapper driver");
-/*
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index d35d0c1ee7f..8c01fa81a1a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -1,33 +1,28 @@
 /*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
  * Licensed under the GPL.
  */
 
-#include "linux/kernel.h"
-#include "linux/netdevice.h"
-#include "linux/rtnetlink.h"
-#include "linux/skbuff.h"
-#include "linux/socket.h"
-#include "linux/spinlock.h"
-#include "linux/module.h"
-#include "linux/init.h"
-#include "linux/etherdevice.h"
-#include "linux/list.h"
-#include "linux/inetdevice.h"
-#include "linux/ctype.h"
-#include "linux/bootmem.h"
-#include "linux/ethtool.h"
-#include "linux/platform_device.h"
-#include "asm/uaccess.h"
-#include "kern_util.h"
-#include "net_kern.h"
-#include "net_user.h"
-#include "mconsole_kern.h"
+#include <linux/bootmem.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
 #include "init.h"
-#include "irq_user.h"
 #include "irq_kern.h"
+#include "irq_user.h"
+#include "mconsole_kern.h"
+#include "net_kern.h"
+#include "net_user.h"
 
 static inline void set_ether_mac(struct net_device *dev, unsigned char *addr)
 {
@@ -39,6 +34,46 @@ static inline void set_ether_mac(struct net_device *dev, unsigned char *addr)
 static DEFINE_SPINLOCK(opened_lock);
 static LIST_HEAD(opened);
 
+/*
+ * The drop_skb is used when we can't allocate an skb.  The
+ * packet is read into drop_skb in order to get the data off the
+ * connection to the host.
+ * It is reallocated whenever a maximum packet size is seen which is
+ * larger than any seen before.  update_drop_skb is called from
+ * eth_configure when a new interface is added.
+ */
+static DEFINE_SPINLOCK(drop_lock);
+static struct sk_buff *drop_skb;
+static int drop_max;
+
+static int update_drop_skb(int max)
+{
+	struct sk_buff *new;
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&drop_lock, flags);
+
+	if (max <= drop_max)
+		goto out;
+
+	err = -ENOMEM;
+	new = dev_alloc_skb(max);
+	if (new == NULL)
+		goto out;
+
+	skb_put(new, max);
+
+	kfree_skb(drop_skb);
+	drop_skb = new;
+	drop_max = max;
+	err = 0;
+out:
+	spin_unlock_irqrestore(&drop_lock, flags);
+
+	return err;
+}
+
 static int uml_net_rx(struct net_device *dev)
 {
 	struct uml_net_private *lp = dev->priv;
@@ -46,16 +81,19 @@ static int uml_net_rx(struct net_device *dev)
 	struct sk_buff *skb;
 
 	/* If we can't allocate memory, try again next round. */
-	skb = dev_alloc_skb(dev->mtu);
+	skb = dev_alloc_skb(lp->max_packet);
 	if (skb == NULL) {
+		drop_skb->dev = dev;
+		/* Read a packet into drop_skb and don't do anything with it. */
+		(*lp->read)(lp->fd, drop_skb, lp);
 		lp->stats.rx_dropped++;
 		return 0;
 	}
 
 	skb->dev = dev;
-	skb_put(skb, dev->mtu);
+	skb_put(skb, lp->max_packet);
 	skb_reset_mac_header(skb);
-	pkt_len = (*lp->read)(lp->fd, &skb, lp);
+	pkt_len = (*lp->read)(lp->fd, skb, lp);
 
 	if (pkt_len > 0) {
 		skb_trim(skb, pkt_len);
@@ -84,12 +122,12 @@ irqreturn_t uml_net_interrupt(int irq, void *dev_id)
 	struct uml_net_private *lp = dev->priv;
 	int err;
 
-	if(!netif_running(dev))
-		return(IRQ_NONE);
+	if (!netif_running(dev))
+		return IRQ_NONE;
 
 	spin_lock(&lp->lock);
-	while((err = uml_net_rx(dev)) > 0) ;
-	if(err < 0) {
+	while ((err = uml_net_rx(dev)) > 0) ;
+	if (err < 0) {
 		printk(KERN_ERR
 		       "Device '%s' read returned %d, shutting it down\n",
 		       dev->name, err);
@@ -115,20 +153,20 @@ static int uml_net_open(struct net_device *dev)
 	struct uml_net_private *lp = dev->priv;
 	int err;
 
-	if(lp->fd >= 0){
+	if (lp->fd >= 0) {
 		err = -ENXIO;
 		goto out;
 	}
 
 	lp->fd = (*lp->open)(&lp->user);
-	if(lp->fd < 0){
+	if (lp->fd < 0) {
 		err = lp->fd;
 		goto out;
 	}
 
 	err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt,
 			     IRQF_DISABLED | IRQF_SHARED, dev->name, dev);
-	if(err != 0){
+	if (err != 0) {
 		printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err);
 		err = -ENETUNREACH;
 		goto out_close;
@@ -141,7 +179,7 @@ static int uml_net_open(struct net_device *dev)
 	 * is full when we get here.  In this case, new data is never queued,
 	 * SIGIOs never arrive, and the net never works.
 	 */
-	while((err = uml_net_rx(dev)) > 0) ;
+	while ((err = uml_net_rx(dev)) > 0) ;
 
 	spin_lock(&opened_lock);
 	list_add(&lp->list, &opened);
@@ -149,7 +187,7 @@ static int uml_net_open(struct net_device *dev)
 
 	return 0;
 out_close:
-	if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user);
+	if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user);
 	lp->fd = -1;
 out:
 	return err;
@@ -162,7 +200,7 @@ static int uml_net_close(struct net_device *dev)
 	netif_stop_queue(dev);
 
 	free_irq(dev->irq, dev);
-	if(lp->close != NULL)
+	if (lp->close != NULL)
 		(*lp->close)(lp->fd, &lp->user);
 	lp->fd = -1;
 
@@ -183,9 +221,9 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	spin_lock_irqsave(&lp->lock, flags);
 
-	len = (*lp->write)(lp->fd, &skb, lp);
+	len = (*lp->write)(lp->fd, skb, lp);
 
-	if(len == skb->len) {
+	if (len == skb->len) {
 		lp->stats.tx_packets++;
 		lp->stats.tx_bytes += skb->len;
 		dev->trans_start = jiffies;
@@ -194,7 +232,7 @@ static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		/* this is normally done in the interrupt when tx finishes */
 		netif_wake_queue(dev);
 	}
-	else if(len == 0){
+	else if (len == 0) {
 		netif_start_queue(dev);
 		lp->stats.tx_dropped++;
 	}
@@ -218,8 +256,10 @@ static struct net_device_stats *uml_net_get_stats(struct net_device *dev)
 
 static void uml_net_set_multicast_list(struct net_device *dev)
 {
-	if (dev->flags & IFF_PROMISC) return;
-	else if (dev->mc_count)	dev->flags |= IFF_ALLMULTI;
+	if (dev->flags & IFF_PROMISC)
+		return;
+	else if (dev->mc_count)
+		dev->flags |= IFF_ALLMULTI;
 	else dev->flags &= ~IFF_ALLMULTI;
 }
 
@@ -243,22 +283,9 @@ static int uml_net_set_mac(struct net_device *dev, void *addr)
 
 static int uml_net_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct uml_net_private *lp = dev->priv;
-	int err = 0;
-
-	spin_lock_irq(&lp->lock);
-
-	new_mtu = (*lp->set_mtu)(new_mtu, &lp->user);
-	if(new_mtu < 0){
-		err = new_mtu;
-		goto out;
-	}
-
 	dev->mtu = new_mtu;
 
- out:
-	spin_unlock_irq(&lp->lock);
-	return err;
+	return 0;
 }
 
 static void uml_net_get_drvinfo(struct net_device *dev,
@@ -288,13 +315,13 @@ static void setup_etheraddr(char *str, unsigned char *addr, char *name)
 	char *end;
 	int i;
 
-	if(str == NULL)
+	if (str == NULL)
 		goto random;
 
-	for(i=0;i<6;i++){
+	for (i = 0;i < 6; i++) {
 		addr[i] = simple_strtoul(str, &end, 16);
-		if((end == str) ||
-		   ((*end != ':') && (*end != ',') && (*end != '\0'))){
+		if ((end == str) ||
+		   ((*end != ':') && (*end != ',') && (*end != '\0'))) {
 			printk(KERN_ERR
 			       "setup_etheraddr: failed to parse '%s' "
 			       "as an ethernet address\n", str);
@@ -349,7 +376,7 @@ static void net_device_release(struct device *dev)
 	struct net_device *netdev = device->dev;
 	struct uml_net_private *lp = netdev->priv;
 
-	if(lp->remove != NULL)
+	if (lp->remove != NULL)
 		(*lp->remove)(&lp->user);
 	list_del(&device->list);
 	kfree(device);
@@ -413,7 +440,7 @@ static void eth_configure(int n, void *init, char *mac,
 	device->pdev.name = DRIVER_NAME;
 	device->pdev.dev.release = net_device_release;
 	device->pdev.dev.driver_data = device;
-	if(platform_device_register(&device->pdev))
+	if (platform_device_register(&device->pdev))
 		goto out_free_netdev;
 	SET_NETDEV_DEV(dev,&device->pdev.dev);
 
@@ -430,6 +457,7 @@ static void eth_configure(int n, void *init, char *mac,
 		  .dev 			= dev,
 		  .fd 			= -1,
 		  .mac 			= { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0},
+		  .max_packet		= transport->user->max_packet,
 		  .protocol 		= transport->kern->protocol,
 		  .open 		= transport->user->open,
 		  .close 		= transport->user->close,
@@ -437,8 +465,7 @@ static void eth_configure(int n, void *init, char *mac,
 		  .read 		= transport->kern->read,
 		  .write 		= transport->kern->write,
 		  .add_address 		= transport->user->add_address,
-		  .delete_address  	= transport->user->delete_address,
-		  .set_mtu 		= transport->user->set_mtu });
+		  .delete_address  	= transport->user->delete_address });
 
 	init_timer(&lp->tl);
 	spin_lock_init(&lp->lock);
@@ -450,7 +477,7 @@ static void eth_configure(int n, void *init, char *mac,
 		goto out_unregister;
 
 	set_ether_mac(dev, device->mac);
-	dev->mtu = transport->user->max_packet;
+	dev->mtu = transport->user->mtu;
 	dev->open = uml_net_open;
 	dev->hard_start_xmit = uml_net_start_xmit;
 	dev->stop = uml_net_close;
@@ -463,6 +490,10 @@ static void eth_configure(int n, void *init, char *mac,
 	dev->watchdog_timeo = (HZ >> 1);
 	dev->irq = UM_ETH_IRQ;
 
+	err = update_drop_skb(lp->max_packet);
+	if (err)
+		goto out_undo_user_init;
+
 	rtnl_lock();
 	err = register_netdevice(dev);
 	rtnl_unlock();
@@ -493,9 +524,9 @@ static struct uml_net *find_device(int n)
 	struct list_head *ele;
 
 	spin_lock(&devices_lock);
-	list_for_each(ele, &devices){
+	list_for_each(ele, &devices) {
 		device = list_entry(ele, struct uml_net, list);
-		if(device->index == n)
+		if (device->index == n)
 			goto out;
 	}
 	device = NULL;
@@ -511,19 +542,19 @@ static int eth_parse(char *str, int *index_out, char **str_out,
 	int n, err = -EINVAL;;
 
 	n = simple_strtoul(str, &end, 0);
-	if(end == str){
+	if (end == str) {
 		*error_out = "Bad device number";
 		return err;
 	}
 
 	str = end;
-	if(*str != '='){
+	if (*str != '=') {
 		*error_out = "Expected '=' after device number";
 		return err;
 	}
 
 	str++;
-	if(find_device(n)){
+	if (find_device(n)) {
 		*error_out = "Device already configured";
 		return err;
 	}
@@ -551,20 +582,20 @@ static int check_transport(struct transport *transport, char *eth, int n,
 	int len;
 
 	len = strlen(transport->name);
-	if(strncmp(eth, transport->name, len))
+	if (strncmp(eth, transport->name, len))
 		return 0;
 
 	eth += len;
-	if(*eth == ',')
+	if (*eth == ',')
 		eth++;
-	else if(*eth != '\0')
+	else if (*eth != '\0')
 		return 0;
 
 	*init_out = kmalloc(transport->setup_size, GFP_KERNEL);
-	if(*init_out == NULL)
+	if (*init_out == NULL)
 		return 1;
 
-	if(!transport->setup(eth, mac_out, *init_out)){
+	if (!transport->setup(eth, mac_out, *init_out)) {
 		kfree(*init_out);
 		*init_out = NULL;
 	}
@@ -584,13 +615,13 @@ void register_transport(struct transport *new)
 	list_add(&new->list, &transports);
 	spin_unlock(&transports_lock);
 
-	list_for_each_safe(ele, next, &eth_cmd_line){
+	list_for_each_safe(ele, next, &eth_cmd_line) {
 		eth = list_entry(ele, struct eth_init, list);
 		match = check_transport(new, eth->init, eth->index, &init,
 					&mac);
-		if(!match)
+		if (!match)
 			continue;
-		else if(init != NULL){
+		else if (init != NULL) {
 			eth_configure(eth->index, init, mac, new);
 			kfree(init);
 		}
@@ -607,11 +638,11 @@ static int eth_setup_common(char *str, int index)
 	int found = 0;
 
 	spin_lock(&transports_lock);
-	list_for_each(ele, &transports){
+	list_for_each(ele, &transports) {
 		transport = list_entry(ele, struct transport, list);
-	        if(!check_transport(transport, str, index, &init, &mac))
+	        if (!check_transport(transport, str, index, &init, &mac))
 			continue;
-		if(init != NULL){
+		if (init != NULL) {
 			eth_configure(index, init, mac, transport);
 			kfree(init);
 		}
@@ -630,15 +661,15 @@ static int __init eth_setup(char *str)
 	int n, err;
 
 	err = eth_parse(str, &n, &str, &error);
-	if(err){
+	if (err) {
 		printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n",
 		       str, error);
 		return 1;
 	}
 
 	new = alloc_bootmem(sizeof(*new));
-	if (new == NULL){
-		printk("eth_init : alloc_bootmem failed\n");
+	if (new == NULL) {
+		printk(KERN_ERR "eth_init : alloc_bootmem failed\n");
 		return 1;
 	}
 
@@ -661,36 +692,36 @@ static int net_config(char *str, char **error_out)
 	int n, err;
 
 	err = eth_parse(str, &n, &str, error_out);
-	if(err)
+	if (err)
 		return err;
 
 	/* This string is broken up and the pieces used by the underlying
 	 * driver.  So, it is freed only if eth_setup_common fails.
 	 */
 	str = kstrdup(str, GFP_KERNEL);
-	if(str == NULL){
+	if (str == NULL) {
 	        *error_out = "net_config failed to strdup string";
 		return -ENOMEM;
 	}
 	err = !eth_setup_common(str, n);
-	if(err)
+	if (err)
 		kfree(str);
-	return(err);
+	return err;
 }
 
 static int net_id(char **str, int *start_out, int *end_out)
 {
-        char *end;
-        int n;
+	char *end;
+	int n;
 
 	n = simple_strtoul(*str, &end, 0);
-	if((*end != '\0') || (end == *str))
+	if ((*end != '\0') || (end == *str))
 		return -1;
 
-        *start_out = n;
-        *end_out = n;
-        *str = end;
-        return n;
+	*start_out = n;
+	*end_out = n;
+	*str = end;
+	return n;
 }
 
 static int net_remove(int n, char **error_out)
@@ -700,12 +731,12 @@ static int net_remove(int n, char **error_out)
 	struct uml_net_private *lp;
 
 	device = find_device(n);
-	if(device == NULL)
+	if (device == NULL)
 		return -ENODEV;
 
 	dev = device->dev;
 	lp = dev->priv;
-	if(lp->fd > 0)
+	if (lp->fd > 0)
 		return -EBUSY;
 	unregister_netdev(dev);
 	platform_device_unregister(&device->pdev);
@@ -731,13 +762,13 @@ static int uml_inetaddr_event(struct notifier_block *this, unsigned long event,
 	void (*proc)(unsigned char *, unsigned char *, void *);
 	unsigned char addr_buf[4], netmask_buf[4];
 
-	if(dev->open != uml_net_open)
+	if (dev->open != uml_net_open)
 		return NOTIFY_DONE;
 
 	lp = dev->priv;
 
 	proc = NULL;
-	switch (event){
+	switch (event) {
 	case NETDEV_UP:
 		proc = lp->add_address;
 		break;
@@ -745,7 +776,7 @@ static int uml_inetaddr_event(struct notifier_block *this, unsigned long event,
 		proc = lp->delete_address;
 		break;
 	}
-	if(proc != NULL){
+	if (proc != NULL) {
 		memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf));
 		memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf));
 		(*proc)(addr_buf, netmask_buf, &lp->user);
@@ -773,13 +804,13 @@ static int uml_net_init(void)
 	 * addresses which have already been set up get handled properly.
 	 */
 	spin_lock(&opened_lock);
-	list_for_each(ele, &opened){
+	list_for_each(ele, &opened) {
 		lp = list_entry(ele, struct uml_net_private, list);
 		ip = lp->dev->ip_ptr;
-		if(ip == NULL)
+		if (ip == NULL)
 			continue;
 		in = ip->ifa_list;
-		while(in != NULL){
+		while (in != NULL) {
 			uml_inetaddr_event(NULL, NETDEV_UP, in);
 			in = in->ifa_next;
 		}
@@ -797,12 +828,12 @@ static void close_devices(void)
 	struct uml_net_private *lp;
 
 	spin_lock(&opened_lock);
-	list_for_each(ele, &opened){
+	list_for_each(ele, &opened) {
 		lp = list_entry(ele, struct uml_net_private, list);
 		free_irq(lp->dev->irq, lp->dev);
-		if((lp->close != NULL) && (lp->fd >= 0))
+		if ((lp->close != NULL) && (lp->fd >= 0))
 			(*lp->close)(lp->fd, &lp->user);
-		if(lp->remove != NULL)
+		if (lp->remove != NULL)
 			(*lp->remove)(&lp->user);
 	}
 	spin_unlock(&opened_lock);
@@ -810,19 +841,6 @@ static void close_devices(void)
 
 __uml_exitcall(close_devices);
 
-struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra)
-{
-	if((skb != NULL) && (skb_tailroom(skb) < extra)){
-	  	struct sk_buff *skb2;
-
-		skb2 = skb_copy_expand(skb, 0, extra, GFP_ATOMIC);
-		dev_kfree_skb(skb);
-		skb = skb2;
-	}
-	if(skb != NULL) skb_put(skb, extra);
-	return(skb);
-}
-
 void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *,
 					void *),
 		    void *arg)
@@ -832,9 +850,9 @@ void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *,
 	struct in_ifaddr *in;
 	unsigned char address[4], netmask[4];
 
-	if(ip == NULL) return;
+	if (ip == NULL) return;
 	in = ip->ifa_list;
-	while(in != NULL){
+	while (in != NULL) {
 		memcpy(address, &in->ifa_address, sizeof(address));
 		memcpy(netmask, &in->ifa_mask, sizeof(netmask));
 		(*cb)(address, netmask, arg);
@@ -849,15 +867,15 @@ int dev_netmask(void *d, void *m)
 	struct in_ifaddr *in;
 	__be32 *mask_out = m;
 
-	if(ip == NULL)
-		return(1);
+	if (ip == NULL)
+		return 1;
 
 	in = ip->ifa_list;
-	if(in == NULL)
-		return(1);
+	if (in == NULL)
+		return 1;
 
 	*mask_out = in->ifa_mask;
-	return(0);
+	return 0;
 }
 
 void *get_output_buffer(int *len_out)
@@ -865,7 +883,7 @@ void *get_output_buffer(int *len_out)
 	void *ret;
 
 	ret = (void *) __get_free_pages(GFP_KERNEL, 0);
-	if(ret) *len_out = PAGE_SIZE;
+	if (ret) *len_out = PAGE_SIZE;
 	else *len_out = 0;
 	return ret;
 }
@@ -881,16 +899,16 @@ int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out,
 	char *remain;
 
 	remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL);
-	if(remain != NULL){
-		printk("tap_setup_common - Extra garbage on specification : "
-		       "'%s'\n", remain);
-		return(1);
+	if (remain != NULL) {
+		printk(KERN_ERR "tap_setup_common - Extra garbage on "
+		       "specification : '%s'\n", remain);
+		return 1;
 	}
 
-	return(0);
+	return 0;
 }
 
 unsigned short eth_protocol(struct sk_buff *skb)
 {
-	return(eth_type_trans(skb, skb->dev));
+	return eth_type_trans(skb, skb->dev);
 }
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index da946e3e1bf..90d7f2e8ead 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -1,34 +1,32 @@
-/* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stddef.h>
-#include <stdarg.h>
-#include <unistd.h>
 #include <stdio.h>
+#include <unistd.h>
+#include <stdarg.h>
 #include <errno.h>
-#include <stdlib.h>
+#include <stddef.h>
 #include <string.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
-#include <sys/time.h>
-#include "user.h"
-#include "kern_util.h"
 #include "net_user.h"
+#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "kern_constants.h"
+#include "user.h"
 
 int tap_open_common(void *dev, char *gate_addr)
 {
 	int tap_addr[4];
 
-	if(gate_addr == NULL)
+	if (gate_addr == NULL)
 		return 0;
-	if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], 
-		  &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){
-		printk("Invalid tap IP address - '%s'\n", gate_addr);
+	if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
+		  &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) {
+		printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n",
+		       gate_addr);
 		return -EINVAL;
 	}
 	return 0;
@@ -38,15 +36,15 @@ void tap_check_ips(char *gate_addr, unsigned char *eth_addr)
 {
 	int tap_addr[4];
 
-	if((gate_addr != NULL) && 
-	   (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], 
-		   &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) &&
-	   (eth_addr[0] == tap_addr[0]) && 
-	   (eth_addr[1] == tap_addr[1]) && 
-	   (eth_addr[2] == tap_addr[2]) && 
-	   (eth_addr[3] == tap_addr[3])){
-		printk("The tap IP address and the UML eth IP address"
-		       " must be different\n");
+	if ((gate_addr != NULL) &&
+	    (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
+		    &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) &&
+	    (eth_addr[0] == tap_addr[0]) &&
+	    (eth_addr[1] == tap_addr[1]) &&
+	    (eth_addr[2] == tap_addr[2]) &&
+	    (eth_addr[3] == tap_addr[3])) {
+		printk(UM_KERN_ERR "The tap IP address and the UML eth IP "
+		       "address must be different\n");
 	}
 }
 
@@ -57,24 +55,28 @@ void read_output(int fd, char *output, int len)
 	char c;
 	char *str;
 
-	if(output == NULL){
+	if (output == NULL) {
 		output = &c;
 		len = sizeof(c);
 	}
-		
+
 	*output = '\0';
-	ret = os_read_file(fd, &remain, sizeof(remain));
+	ret = read(fd, &remain, sizeof(remain));
 
 	if (ret != sizeof(remain)) {
+		if (ret < 0)
+			ret = -errno;
 		expected = sizeof(remain);
 		str = "length";
 		goto err;
 	}
 
-	while(remain != 0){
+	while (remain != 0) {
 		expected = (remain < len) ? remain : len;
-		ret = os_read_file(fd, output, expected);
+		ret = read(fd, output, expected);
 		if (ret != expected) {
+			if (ret < 0)
+				ret = -errno;
 			str = "data";
 			goto err;
 		}
@@ -85,20 +87,22 @@ void read_output(int fd, char *output, int len)
 
 err:
 	if (ret < 0)
-		printk("read_output - read of %s failed, errno = %d\n", str, -ret);
+		printk(UM_KERN_ERR "read_output - read of %s failed, "
+		       "errno = %d\n", str, -ret);
 	else
-		printk("read_output - read of %s failed, read only %d of %d bytes\n", str, ret, expected);
+		printk(UM_KERN_ERR "read_output - read of %s failed, read only "
+		       "%d of %d bytes\n", str, ret, expected);
 }
 
 int net_read(int fd, void *buf, int len)
 {
 	int n;
 
-	n = os_read_file(fd,  buf,  len);
+	n = read(fd,  buf,  len);
 
-	if(n == -EAGAIN)
+	if ((n < 0) && (errno == EAGAIN))
 		return 0;
-	else if(n == 0)
+	else if (n == 0)
 		return -ENOTCONN;
 	return n;
 }
@@ -108,12 +112,12 @@ int net_recvfrom(int fd, void *buf, int len)
 	int n;
 
 	CATCH_EINTR(n = recvfrom(fd,  buf,  len, 0, NULL, NULL));
-	if(n < 0){
-		if(errno == EAGAIN)
+	if (n < 0) {
+		if (errno == EAGAIN)
 			return 0;
 		return -errno;
 	}
-	else if(n == 0)
+	else if (n == 0)
 		return -ENOTCONN;
 	return n;
 }
@@ -122,11 +126,11 @@ int net_write(int fd, void *buf, int len)
 {
 	int n;
 
-	n = os_write_file(fd, buf, len);
+	n = write(fd, buf, len);
 
-	if(n == -EAGAIN)
+	if ((n < 0) && (errno == EAGAIN))
 		return 0;
-	else if(n == 0)
+	else if (n == 0)
 		return -ENOTCONN;
 	return n;
 }
@@ -136,12 +140,12 @@ int net_send(int fd, void *buf, int len)
 	int n;
 
 	CATCH_EINTR(n = send(fd, buf, len, 0));
-	if(n < 0){
-		if(errno == EAGAIN)
+	if (n < 0) {
+		if (errno == EAGAIN)
 			return 0;
 		return -errno;
 	}
-	else if(n == 0)
+	else if (n == 0)
 		return -ENOTCONN;
 	return n;
 }
@@ -152,12 +156,12 @@ int net_sendto(int fd, void *buf, int len, void *to, int sock_len)
 
 	CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to,
 			       sock_len));
-	if(n < 0){
-		if(errno == EAGAIN)
+	if (n < 0) {
+		if (errno == EAGAIN)
 			return 0;
 		return -errno;
 	}
-	else if(n == 0)
+	else if (n == 0)
 		return -ENOTCONN;
 	return n;
 }
@@ -171,7 +175,7 @@ static void change_pre_exec(void *arg)
 {
 	struct change_pre_exec_data *data = arg;
 
-	os_close_file(data->close_me);
+	close(data->close_me);
 	dup2(data->stdout, 1);
 }
 
@@ -181,8 +185,9 @@ static int change_tramp(char **argv, char *output, int output_len)
 	struct change_pre_exec_data pe_data;
 
 	err = os_pipe(fds, 1, 0);
-	if(err < 0){
-		printk("change_tramp - pipe failed, err = %d\n", -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n",
+		       -err);
 		return err;
 	}
 	pe_data.close_me = fds[0];
@@ -192,8 +197,8 @@ static int change_tramp(char **argv, char *output, int output_len)
 	if (pid > 0)	/* Avoid hang as we won't get data in failure case. */
 		read_output(fds[0], output, output_len);
 
-	os_close_file(fds[0]);
-	os_close_file(fds[1]);
+	close(fds[0]);
+	close(fds[1]);
 
 	if (pid > 0)
 		CATCH_EINTR(err = waitpid(pid, NULL, 0));
@@ -206,25 +211,26 @@ static void change(char *dev, char *what, unsigned char *addr,
 	char addr_buf[sizeof("255.255.255.255\0")];
 	char netmask_buf[sizeof("255.255.255.255\0")];
 	char version[sizeof("nnnnn\0")];
-	char *argv[] = { "uml_net", version, what, dev, addr_buf, 
+	char *argv[] = { "uml_net", version, what, dev, addr_buf,
 			 netmask_buf, NULL };
 	char *output;
 	int output_len, pid;
 
 	sprintf(version, "%d", UML_NET_VERSION);
 	sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
-	sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], 
+	sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1],
 		netmask[2], netmask[3]);
 
 	output_len = UM_KERN_PAGE_SIZE;
 	output = kmalloc(output_len, UM_GFP_KERNEL);
-	if(output == NULL)
-		printk("change : failed to allocate output buffer\n");
+	if (output == NULL)
+		printk(UM_KERN_ERR "change : failed to allocate output "
+		       "buffer\n");
 
 	pid = change_tramp(argv, output, output_len);
-	if(pid < 0) return;
+	if (pid < 0) return;
 
-	if(output != NULL){
+	if (output != NULL) {
 		printk("%s", output);
 		kfree(output);
 	}
@@ -246,13 +252,13 @@ char *split_if_spec(char *str, ...)
 	va_list ap;
 
 	va_start(ap, str);
-	while((arg = va_arg(ap, char **)) != NULL){
-		if(*str == '\0')
+	while ((arg = va_arg(ap, char **)) != NULL) {
+		if (*str == '\0')
 			return NULL;
 		end = strchr(str, ',');
-		if(end != str)
+		if (end != str)
 			*arg = str;
-		if(end == NULL)
+		if (end == NULL)
 			return NULL;
 		*end++ = '\0';
 		str = end;
diff --git a/arch/um/drivers/null.c b/arch/um/drivers/null.c
index 9016c68beee..21ad3d7932b 100644
--- a/arch/um/drivers/null.c
+++ b/arch/um/drivers/null.c
@@ -1,10 +1,11 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
-#include <stdlib.h>
+#include <stddef.h>
 #include <errno.h>
+#include <fcntl.h>
 #include "chan_user.h"
 #include "os.h"
 
@@ -13,19 +14,23 @@ static int null_chan;
 
 static void *null_init(char *str, int device, const struct chan_opts *opts)
 {
-	return(&null_chan);
+	return &null_chan;
 }
 
 static int null_open(int input, int output, int primary, void *d,
 		     char **dev_out)
 {
+	int fd;
+
 	*dev_out = NULL;
-	return(os_open_file(DEV_NULL, of_rdwr(OPENFLAGS()), 0));
+
+	fd = open(DEV_NULL, O_RDWR);
+	return (fd < 0) ? -errno : fd;
 }
 
 static int null_read(int fd, char *c_out, void *unused)
 {
-	return(-ENODEV);
+	return -ENODEV;
 }
 
 static void null_free(void *data)
@@ -44,14 +49,3 @@ const struct chan_ops null_ops = {
 	.free		= null_free,
 	.winch		= 0,
 };
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index c329931673d..3a750dd39be 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -1,13 +1,11 @@
 /*
- * Copyright (C) 2002 Jeff Dike <jdike@karaya.com>
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL.
  */
 
 #include "linux/init.h"
-#include "linux/netdevice.h"
-#include "linux/etherdevice.h"
+#include <linux/netdevice.h>
 #include "net_kern.h"
-#include "net_user.h"
 #include "pcap_user.h"
 
 struct pcap_init {
@@ -33,19 +31,14 @@ void pcap_init(struct net_device *dev, void *data)
 	printk("pcap backend, host interface %s\n", ppri->host_if);
 }
 
-static int pcap_read(int fd, struct sk_buff **skb,
-		       struct uml_net_private *lp)
+static int pcap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
-	if(*skb == NULL)
-		return -ENOMEM;
-
-	return pcap_user_read(fd, skb_mac_header(*skb),
-			      (*skb)->dev->mtu + ETH_HEADER_OTHER,
+	return pcap_user_read(fd, skb_mac_header(skb),
+			      skb->dev->mtu + ETH_HEADER_OTHER,
 			      (struct pcap_data *) &lp->user);
 }
 
-static int pcap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp)
+static int pcap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
 	return -EPERM;
 }
@@ -71,28 +64,29 @@ int pcap_setup(char *str, char **mac_out, void *data)
 
 	remain = split_if_spec(str, &host_if, &init->filter,
 			       &options[0], &options[1], mac_out, NULL);
-	if(remain != NULL){
+	if (remain != NULL) {
 		printk(KERN_ERR "pcap_setup - Extra garbage on "
 		       "specification : '%s'\n", remain);
 		return 0;
 	}
 
-	if(host_if != NULL)
+	if (host_if != NULL)
 		init->host_if = host_if;
 
-	for(i = 0; i < ARRAY_SIZE(options); i++){
-		if(options[i] == NULL)
+	for (i = 0; i < ARRAY_SIZE(options); i++) {
+		if (options[i] == NULL)
 			continue;
-		if(!strcmp(options[i], "promisc"))
+		if (!strcmp(options[i], "promisc"))
 			init->promisc = 1;
-		else if(!strcmp(options[i], "nopromisc"))
+		else if (!strcmp(options[i], "nopromisc"))
 			init->promisc = 0;
-		else if(!strcmp(options[i], "optimize"))
+		else if (!strcmp(options[i], "optimize"))
 			init->optimize = 1;
-		else if(!strcmp(options[i], "nooptimize"))
+		else if (!strcmp(options[i], "nooptimize"))
 			init->optimize = 0;
 		else {
-			printk("pcap_setup : bad option - '%s'\n", options[i]);
+			printk(KERN_ERR "pcap_setup : bad option - '%s'\n",
+			       options[i]);
 			return 0;
 		}
 	}
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
index 1316456e2a2..e9809356c53 100644
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
@@ -1,21 +1,17 @@
 /*
- * Copyright (C) 2002 Jeff Dike <jdike@karaya.com>
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL.
  */
 
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
 #include <errno.h>
 #include <pcap.h>
+#include <string.h>
 #include <asm/types.h>
 #include "net_user.h"
 #include "pcap_user.h"
-#include "user.h"
-#include "um_malloc.h"
 #include "kern_constants.h"
-
-#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER)
+#include "um_malloc.h"
+#include "user.h"
 
 #define PCAP_FD(p) (*(int *)(p))
 
@@ -25,8 +21,9 @@ static int pcap_user_init(void *data, void *dev)
 	pcap_t *p;
 	char errors[PCAP_ERRBUF_SIZE];
 
-	p = pcap_open_live(pri->host_if, MAX_PACKET, pri->promisc, 0, errors);
-	if(p == NULL){
+	p = pcap_open_live(pri->host_if, ETH_MAX_PACKET + ETH_HEADER_OTHER,
+			   pri->promisc, 0, errors);
+	if (p == NULL) {
 		printk(UM_KERN_ERR "pcap_user_init : pcap_open_live failed - "
 		       "'%s'\n", errors);
 		return -EINVAL;
@@ -43,50 +40,55 @@ static int pcap_open(void *data)
 	__u32 netmask;
 	int err;
 
-	if(pri->pcap == NULL)
+	if (pri->pcap == NULL)
 		return -ENODEV;
 
-	if(pri->filter != NULL){
+	if (pri->filter != NULL) {
 		err = dev_netmask(pri->dev, &netmask);
-		if(err < 0){
+		if (err < 0) {
 			printk(UM_KERN_ERR "pcap_open : dev_netmask failed\n");
 			return -EIO;
 		}
 
-		pri->compiled = kmalloc(sizeof(struct bpf_program), UM_GFP_KERNEL);
-		if(pri->compiled == NULL){
+		pri->compiled = kmalloc(sizeof(struct bpf_program),
+					UM_GFP_KERNEL);
+		if (pri->compiled == NULL) {
 			printk(UM_KERN_ERR "pcap_open : kmalloc failed\n");
 			return -ENOMEM;
 		}
 
-		err = pcap_compile(pri->pcap, 
-				   (struct bpf_program *) pri->compiled, 
+		err = pcap_compile(pri->pcap,
+				   (struct bpf_program *) pri->compiled,
 				   pri->filter, pri->optimize, netmask);
-		if(err < 0){
+		if (err < 0) {
 			printk(UM_KERN_ERR "pcap_open : pcap_compile failed - "
 			       "'%s'\n", pcap_geterr(pri->pcap));
-			return -EIO;
+			goto out;
 		}
 
 		err = pcap_setfilter(pri->pcap, pri->compiled);
-		if(err < 0){
+		if (err < 0) {
 			printk(UM_KERN_ERR "pcap_open : pcap_setfilter "
 			       "failed - '%s'\n", pcap_geterr(pri->pcap));
-			return -EIO;
+			goto out;
 		}
 	}
 
 	return PCAP_FD(pri->pcap);
+
+ out:
+	kfree(pri->compiled);
+	return -EIO;
 }
 
 static void pcap_remove(void *data)
 {
 	struct pcap_data *pri = data;
 
-	if(pri->compiled != NULL)
+	if (pri->compiled != NULL)
 		pcap_freecode(pri->compiled);
 
-	if(pri->pcap != NULL)
+	if (pri->pcap != NULL)
 		pcap_close(pri->pcap);
 }
 
@@ -95,7 +97,7 @@ struct pcap_handler_data {
 	int len;
 };
 
-static void handler(u_char *data, const struct pcap_pkthdr *header, 
+static void handler(u_char *data, const struct pcap_pkthdr *header,
 		    const u_char *packet)
 {
 	int len;
@@ -115,12 +117,12 @@ int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri)
 	int n;
 
 	n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata);
-	if(n < 0){
+	if (n < 0) {
 		printk(UM_KERN_ERR "pcap_dispatch failed - %s\n",
 		       pcap_geterr(pri->pcap));
 		return -EIO;
 	}
-	else if(n == 0) 
+	else if (n == 0)
 		return 0;
 	return hdata.len;
 }
@@ -130,8 +132,8 @@ const struct net_user_info pcap_user_info = {
 	.open		= pcap_open,
 	.close	 	= NULL,
 	.remove	 	= pcap_remove,
-	.set_mtu	= NULL,
 	.add_address	= NULL,
 	.delete_address = NULL,
-	.max_packet	= MAX_PACKET - ETH_HEADER_OTHER
+	.mtu		= ETH_MAX_PACKET,
+	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
 };
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 1c8efd95c42..330543b3129 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -1,24 +1,16 @@
 /*
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
-#include "linux/list.h"
-#include "linux/sched.h"
-#include "linux/slab.h"
+#include "linux/completion.h"
 #include "linux/interrupt.h"
-#include "linux/spinlock.h"
-#include "linux/errno.h"
+#include "linux/list.h"
 #include "asm/atomic.h"
-#include "asm/semaphore.h"
-#include "asm/errno.h"
-#include "kern_util.h"
-#include "kern.h"
-#include "irq_user.h"
-#include "irq_kern.h"
-#include "port.h"
 #include "init.h"
+#include "irq_kern.h"
 #include "os.h"
+#include "port.h"
 
 struct port_list {
 	struct list_head list;
@@ -53,8 +45,8 @@ static irqreturn_t pipe_interrupt(int irq, void *data)
 	int fd;
 
 	fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
-	if(fd < 0){
-		if(fd == -EAGAIN)
+	if (fd < 0) {
+		if (fd == -EAGAIN)
 			return IRQ_NONE;
 
 		printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n",
@@ -81,18 +73,18 @@ static irqreturn_t pipe_interrupt(int irq, void *data)
 static int port_accept(struct port_list *port)
 {
 	struct connection *conn;
-	int fd, socket[2], pid, ret = 0;
+	int fd, socket[2], pid;
 
 	fd = port_connection(port->fd, socket, &pid);
-	if(fd < 0){
-		if(fd != -EAGAIN)
+	if (fd < 0) {
+		if (fd != -EAGAIN)
 			printk(KERN_ERR "port_accept : port_connection "
 			       "returned %d\n", -fd);
 		goto out;
 	}
 
 	conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
-	if(conn == NULL){
+	if (conn == NULL) {
 		printk(KERN_ERR "port_accept : failed to allocate "
 		       "connection\n");
 		goto out_close;
@@ -104,17 +96,17 @@ static int port_accept(struct port_list *port)
 		  .telnetd_pid 	= pid,
 		  .port 	= port });
 
-	if(um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt,
+	if (um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt,
 			  IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM,
-			  "telnetd", conn)){
+			  "telnetd", conn)) {
 		printk(KERN_ERR "port_accept : failed to get IRQ for "
 		       "telnetd\n");
 		goto out_free;
 	}
 
-	if(atomic_read(&port->wait_count) == 0){
+	if (atomic_read(&port->wait_count) == 0) {
 		os_write_file(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG));
-		printk("No one waiting for port\n");
+		printk(KERN_ERR "No one waiting for port\n");
 	}
 	list_add(&conn->list, &port->pending);
 	return 1;
@@ -123,28 +115,29 @@ static int port_accept(struct port_list *port)
 	kfree(conn);
  out_close:
 	os_close_file(fd);
-	if(pid != -1)
-		os_kill_process(pid, 1);
+	os_kill_process(pid, 1);
  out:
-	return ret;
+	return 0;
 }
 
 static DECLARE_MUTEX(ports_sem);
 static LIST_HEAD(ports);
 
-void port_work_proc(struct work_struct *unused)
+static void port_work_proc(struct work_struct *unused)
 {
 	struct port_list *port;
 	struct list_head *ele;
 	unsigned long flags;
 
 	local_irq_save(flags);
-	list_for_each(ele, &ports){
+	list_for_each(ele, &ports) {
 		port = list_entry(ele, struct port_list, list);
-		if(!port->has_connection)
+		if (!port->has_connection)
 			continue;
+
 		reactivate_fd(port->fd, ACCEPT_IRQ);
-		while(port_accept(port)) ;
+		while (port_accept(port))
+			;
 		port->has_connection = 0;
 	}
 	local_irq_restore(flags);
@@ -169,25 +162,27 @@ void *port_data(int port_num)
 	int fd;
 
 	down(&ports_sem);
-	list_for_each(ele, &ports){
+	list_for_each(ele, &ports) {
 		port = list_entry(ele, struct port_list, list);
-		if(port->port == port_num) goto found;
+		if (port->port == port_num)
+			goto found;
 	}
 	port = kmalloc(sizeof(struct port_list), GFP_KERNEL);
-	if(port == NULL){
+	if (port == NULL) {
 		printk(KERN_ERR "Allocation of port list failed\n");
 		goto out;
 	}
 
 	fd = port_listen_fd(port_num);
-	if(fd < 0){
+	if (fd < 0) {
 		printk(KERN_ERR "binding to port %d failed, errno = %d\n",
 		       port_num, -fd);
 		goto out_free;
 	}
-	if(um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt,
+
+	if (um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt,
 			  IRQF_DISABLED | IRQF_SHARED | IRQF_SAMPLE_RANDOM,
-			  "port", port)){
+			  "port", port)) {
 		printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num);
 		goto out_close;
 	}
@@ -206,7 +201,7 @@ void *port_data(int port_num)
 
  found:
 	dev = kmalloc(sizeof(struct port_dev), GFP_KERNEL);
-	if(dev == NULL){
+	if (dev == NULL) {
 		printk(KERN_ERR "Allocation of port device entry failed\n");
 		goto out;
 	}
@@ -216,10 +211,10 @@ void *port_data(int port_num)
 				    .telnetd_pid  	= -1 });
 	goto out;
 
- out_free:
-	kfree(port);
  out_close:
 	os_close_file(fd);
+ out_free:
+	kfree(port);
  out:
 	up(&ports_sem);
 	return dev;
@@ -233,9 +228,9 @@ int port_wait(void *data)
 	int fd;
 
 	atomic_inc(&port->wait_count);
-	while(1){
+	while (1) {
 		fd = -ERESTARTSYS;
-		if(wait_for_completion_interruptible(&port->done))
+		if (wait_for_completion_interruptible(&port->done))
 			goto out;
 
 		spin_lock(&port->lock);
@@ -258,7 +253,8 @@ int port_wait(void *data)
 		 */
 		free_irq(TELNETD_IRQ, conn);
 
-		if(conn->fd >= 0) break;
+		if (conn->fd >= 0)
+			break;
 		os_close_file(conn->fd);
 		kfree(conn);
 	}
@@ -276,9 +272,9 @@ void port_remove_dev(void *d)
 {
 	struct port_dev *dev = d;
 
-	if(dev->helper_pid != -1)
+	if (dev->helper_pid != -1)
 		os_kill_process(dev->helper_pid, 0);
-	if(dev->telnetd_pid != -1)
+	if (dev->telnetd_pid != -1)
 		os_kill_process(dev->telnetd_pid, 1);
 	dev->helper_pid = -1;
 	dev->telnetd_pid = -1;
@@ -297,7 +293,7 @@ static void free_port(void)
 	struct list_head *ele;
 	struct port_list *port;
 
-	list_for_each(ele, &ports){
+	list_for_each(ele, &ports) {
 		port = list_entry(ele, struct port_list, list);
 		free_irq_by_fd(port->fd);
 		os_close_file(port->fd);
diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
index c799b00012c..addd7590265 100644
--- a/arch/um/drivers/port_user.c
+++ b/arch/um/drivers/port_user.c
@@ -1,24 +1,20 @@
 /*
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
 #include <stdio.h>
-#include <stddef.h>
 #include <stdlib.h>
-#include <string.h>
 #include <errno.h>
-#include <unistd.h>
 #include <termios.h>
-#include <sys/socket.h>
-#include <sys/un.h>
+#include <unistd.h>
 #include <netinet/in.h>
-#include "kern_util.h"
-#include "user.h"
 #include "chan_user.h"
-#include "port.h"
+#include "kern_constants.h"
 #include "os.h"
+#include "port.h"
 #include "um_malloc.h"
+#include "user.h"
 
 struct port_chan {
 	int raw;
@@ -34,24 +30,25 @@ static void *port_init(char *str, int device, const struct chan_opts *opts)
 	char *end;
 	int port;
 
-	if(*str != ':'){
-		printk("port_init : channel type 'port' must specify a "
-		       "port number\n");
+	if (*str != ':') {
+		printk(UM_KERN_ERR "port_init : channel type 'port' must "
+		       "specify a port number\n");
 		return NULL;
 	}
 	str++;
 	port = strtoul(str, &end, 0);
-	if((*end != '\0') || (end == str)){
-		printk("port_init : couldn't parse port '%s'\n", str);
+	if ((*end != '\0') || (end == str)) {
+		printk(UM_KERN_ERR "port_init : couldn't parse port '%s'\n",
+		       str);
 		return NULL;
 	}
 
 	kern_data = port_data(port);
-	if(kern_data == NULL)
+	if (kern_data == NULL)
 		return NULL;
 
 	data = kmalloc(sizeof(*data), UM_GFP_KERNEL);
-	if(data == NULL)
+	if (data == NULL)
 		goto err;
 
 	*data = ((struct port_chan) { .raw  		= opts->raw,
@@ -79,13 +76,13 @@ static int port_open(int input, int output, int primary, void *d,
 	int fd, err;
 
 	fd = port_wait(data->kernel_data);
-	if((fd >= 0) && data->raw){
+	if ((fd >= 0) && data->raw) {
 		CATCH_EINTR(err = tcgetattr(fd, &data->tt));
-		if(err)
+		if (err)
 			return err;
 
 		err = raw(fd);
-		if(err)
+		if (err)
 			return err;
 	}
 	*dev_out = data->dev;
@@ -119,11 +116,11 @@ int port_listen_fd(int port)
 	int fd, err, arg;
 
 	fd = socket(PF_INET, SOCK_STREAM, 0);
-	if(fd == -1)
+	if (fd == -1)
 		return -errno;
 
 	arg = 1;
-	if(setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &arg, sizeof(arg)) < 0){
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &arg, sizeof(arg)) < 0) {
 		err = -errno;
 		goto out;
 	}
@@ -131,23 +128,23 @@ int port_listen_fd(int port)
 	addr.sin_family = AF_INET;
 	addr.sin_port = htons(port);
 	addr.sin_addr.s_addr = htonl(INADDR_ANY);
-	if(bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0){
+	if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
 		err = -errno;
 		goto out;
 	}
 
-	if(listen(fd, 1) < 0){
+	if (listen(fd, 1) < 0) {
 		err = -errno;
 		goto out;
 	}
 
 	err = os_set_fd_block(fd, 0);
-	if(err < 0)
+	if (err < 0)
 		goto out;
 
 	return fd;
  out:
-	os_close_file(fd);
+	close(fd);
 	return err;
 }
 
@@ -163,10 +160,10 @@ void port_pre_exec(void *arg)
 	dup2(data->sock_fd, 0);
 	dup2(data->sock_fd, 1);
 	dup2(data->sock_fd, 2);
-	os_close_file(data->sock_fd);
+	close(data->sock_fd);
 	dup2(data->pipe_fd, 3);
-	os_shutdown_socket(3, 1, 0);
-	os_close_file(data->pipe_fd);
+	shutdown(3, SHUT_RD);
+	close(data->pipe_fd);
 }
 
 int port_connection(int fd, int *socket, int *pid_out)
@@ -176,12 +173,12 @@ int port_connection(int fd, int *socket, int *pid_out)
 			 "/usr/lib/uml/port-helper", NULL };
 	struct port_pre_exec_data data;
 
-	new = os_accept_connection(fd);
-	if(new < 0)
-		return new;
+	new = accept(fd, NULL, 0);
+	if (new < 0)
+		return -errno;
 
 	err = os_pipe(socket, 0, 0);
-	if(err < 0)
+	if (err < 0)
 		goto out_close;
 
 	data = ((struct port_pre_exec_data)
@@ -189,18 +186,18 @@ int port_connection(int fd, int *socket, int *pid_out)
 		  .pipe_fd 		= socket[1] });
 
 	err = run_helper(port_pre_exec, &data, argv);
-	if(err < 0)
+	if (err < 0)
 		goto out_shutdown;
 
 	*pid_out = err;
 	return new;
 
  out_shutdown:
-	os_shutdown_socket(socket[0], 1, 1);
-	os_close_file(socket[0]);
-	os_shutdown_socket(socket[1], 1, 1);
-	os_close_file(socket[1]);
+	shutdown(socket[0], SHUT_RDWR);
+	close(socket[0]);
+	shutdown(socket[1], SHUT_RDWR);
+	close(socket[1]);
  out_close:
-	os_close_file(new);
+	close(new);
 	return err;
 }
diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c
index 1e3fd619a83..49c79dda604 100644
--- a/arch/um/drivers/pty.c
+++ b/arch/um/drivers/pty.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
@@ -6,16 +6,16 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <string.h>
-#include <fcntl.h>
 #include <errno.h>
+#include <fcntl.h>
+#include <string.h>
 #include <termios.h>
 #include <sys/stat.h>
 #include "chan_user.h"
-#include "os.h"
-#include "user.h"
 #include "kern_constants.h"
+#include "os.h"
 #include "um_malloc.h"
+#include "user.h"
 
 struct pty_chan {
 	void (*announce)(char *dev_name, int dev);
@@ -33,7 +33,7 @@ static void *pty_chan_init(char *str, int device, const struct chan_opts *opts)
 	if (data == NULL)
 		return NULL;
 
-	*data = ((struct pty_chan) { .announce  	= opts->announce, 
+	*data = ((struct pty_chan) { .announce  	= opts->announce,
 				     .dev  		= device,
 				     .raw  		= opts->raw });
 	return data;
@@ -56,11 +56,11 @@ static int pts_open(int input, int output, int primary, void *d,
 	if (data->raw) {
 		CATCH_EINTR(err = tcgetattr(fd, &data->tt));
 		if (err)
-			return err;
+			goto out_close;
 
 		err = raw(fd);
 		if (err)
-			return err;
+			goto out_close;
 	}
 
 	dev = ptsname(fd);
@@ -71,6 +71,10 @@ static int pts_open(int input, int output, int primary, void *d,
 		(*data->announce)(dev, data->dev);
 
 	return fd;
+
+out_close:
+	close(fd);
+	return err;
 }
 
 static int getmaster(char *line)
@@ -97,7 +101,7 @@ static int getmaster(char *line)
 				*tp = 't';
 				err = access(line, R_OK | W_OK);
 				*tp = 'p';
-				if(!err)
+				if (!err)
 					return master;
 				close(master);
 			}
@@ -119,12 +123,14 @@ static int pty_open(int input, int output, int primary, void *d,
 	if (fd < 0)
 		return fd;
 
-	if(data->raw){
+	if (data->raw) {
 		err = raw(fd);
-		if (err)
+		if (err) {
+			close(fd);
 			return err;
+		}
 	}
-	
+
 	if (data->announce)
 		(*data->announce)(dev, data->dev);
 
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index 125c44f7763..ae67e7158e7 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -1,11 +1,12 @@
-#include "linux/kernel.h"
-#include "linux/stddef.h"
-#include "linux/init.h"
-#include "linux/netdevice.h"
-#include "linux/if_arp.h"
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL.
+ */
+
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
 #include "net_kern.h"
-#include "net_user.h"
-#include "kern.h"
 #include "slip.h"
 
 struct slip_init {
@@ -43,21 +44,19 @@ void slip_init(struct net_device *dev, void *data)
 
 static unsigned short slip_protocol(struct sk_buff *skbuff)
 {
-	return(htons(ETH_P_IP));
+	return htons(ETH_P_IP);
 }
 
-static int slip_read(int fd, struct sk_buff **skb, 
-		       struct uml_net_private *lp)
+static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	return(slip_user_read(fd, skb_mac_header(*skb), (*skb)->dev->mtu,
-			      (struct slip_data *) &lp->user));
+	return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
+			      (struct slip_data *) &lp->user);
 }
 
-static int slip_write(int fd, struct sk_buff **skb,
-		      struct uml_net_private *lp)
+static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	return(slip_user_write(fd, (*skb)->data, (*skb)->len, 
-			       (struct slip_data *) &lp->user));
+	return slip_user_write(fd, skb->data, skb->len,
+			       (struct slip_data *) &lp->user);
 }
 
 const struct net_kern_info slip_kern_info = {
@@ -71,12 +70,11 @@ static int slip_setup(char *str, char **mac_out, void *data)
 {
 	struct slip_init *init = data;
 
-	*init = ((struct slip_init)
-		{ .gate_addr 		= NULL });
+	*init = ((struct slip_init) { .gate_addr = NULL });
 
-	if(str[0] != '\0') 
+	if (str[0] != '\0')
 		init->gate_addr = str;
-	return(1);
+	return 1;
 }
 
 static struct transport slip_transport = {
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index c0b73c28cff..5f06204d687 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -1,21 +1,22 @@
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL.
+ */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <stddef.h>
-#include <sched.h>
-#include <string.h>
 #include <errno.h>
+#include <fcntl.h>
+#include <string.h>
 #include <sys/termios.h>
 #include <sys/wait.h>
-#include <sys/signal.h>
-#include "kern_util.h"
-#include "user.h"
+#include "kern_constants.h"
 #include "net_user.h"
-#include "slip.h"
-#include "slip_common.h"
 #include "os.h"
+#include "slip.h"
 #include "um_malloc.h"
-#include "kern_constants.h"
+#include "user.h"
 
 static int slip_user_init(void *data, void *dev)
 {
@@ -31,8 +32,9 @@ static int set_up_tty(int fd)
 	struct termios tios;
 
 	if (tcgetattr(fd, &tios) < 0) {
-		printk("could not get initial terminal attributes\n");
-		return(-1);
+		printk(UM_KERN_ERR "could not get initial terminal "
+		       "attributes\n");
+		return -1;
 	}
 
 	tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL;
@@ -48,10 +50,10 @@ static int set_up_tty(int fd)
 	cfsetispeed(&tios, B38400);
 
 	if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) {
-		printk("failed to set terminal attributes\n");
-		return(-1);
+		printk(UM_KERN_ERR "failed to set terminal attributes\n");
+		return -1;
 	}
-	return(0);
+	return 0;
 }
 
 struct slip_pre_exec_data {
@@ -64,9 +66,11 @@ static void slip_pre_exec(void *arg)
 {
 	struct slip_pre_exec_data *data = arg;
 
-	if(data->stdin >= 0) dup2(data->stdin, 0);
+	if (data->stdin >= 0)
+		dup2(data->stdin, 0);
 	dup2(data->stdout, 1);
-	if(data->close_me >= 0) os_close_file(data->close_me);
+	if (data->close_me >= 0)
+		close(data->close_me);
 }
 
 static int slip_tramp(char **argv, int fd)
@@ -76,8 +80,9 @@ static int slip_tramp(char **argv, int fd)
 	int status, pid, fds[2], err, output_len;
 
 	err = os_pipe(fds, 1, 0);
-	if(err < 0){
-		printk("slip_tramp : pipe failed, err = %d\n", -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n",
+		       -err);
 		goto out;
 	}
 
@@ -86,41 +91,42 @@ static int slip_tramp(char **argv, int fd)
 	pe_data.stdout = fds[1];
 	pe_data.close_me = fds[0];
 	err = run_helper(slip_pre_exec, &pe_data, argv);
-	if(err < 0)
+	if (err < 0)
 		goto out_close;
 	pid = err;
 
 	output_len = UM_KERN_PAGE_SIZE;
 	output = kmalloc(output_len, UM_GFP_KERNEL);
-	if(output == NULL){
-		printk("slip_tramp : failed to allocate output buffer\n");
+	if (output == NULL) {
+		printk(UM_KERN_ERR "slip_tramp : failed to allocate output "
+		       "buffer\n");
 		os_kill_process(pid, 1);
 		err = -ENOMEM;
 		goto out_free;
 	}
 
-	os_close_file(fds[1]);
+	close(fds[1]);
 	read_output(fds[0], output, output_len);
 	printk("%s", output);
 
 	CATCH_EINTR(err = waitpid(pid, &status, 0));
-	if(err < 0)
+	if (err < 0)
 		err = errno;
-	else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){
-		printk("'%s' didn't exit with status 0\n", argv[0]);
+	else if (!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) {
+		printk(UM_KERN_ERR "'%s' didn't exit with status 0\n", argv[0]);
 		err = -EINVAL;
 	}
 	else err = 0;
 
-	os_close_file(fds[0]);
+	close(fds[0]);
 
 out_free:
 	kfree(output);
 	return err;
 
 out_close:
-	os_close_file(fds[0]);
-	os_close_file(fds[1]);
+	close(fds[0]);
+	close(fds[1]);
 out:
 	return err;
 }
@@ -130,60 +136,64 @@ static int slip_open(void *data)
 	struct slip_data *pri = data;
 	char version_buf[sizeof("nnnnn\0")];
 	char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
-	char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, 
+	char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf,
 			 NULL };
 	int sfd, mfd, err;
 
 	err = get_pty();
-	if(err < 0){
-		printk("slip-open : Failed to open pty, err = %d\n", -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n",
+		       -err);
 		goto out;
 	}
 	mfd = err;
 
-	err = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0);
-	if(err < 0){
-		printk("Couldn't open tty for slip line, err = %d\n", -err);
+	err = open(ptsname(mfd), O_RDWR, 0);
+	if (err < 0) {
+		printk(UM_KERN_ERR "Couldn't open tty for slip line, "
+		       "err = %d\n", -err);
 		goto out_close;
 	}
 	sfd = err;
 
-	if(set_up_tty(sfd))
+	if (set_up_tty(sfd))
 		goto out_close2;
 
 	pri->slave = sfd;
 	pri->slip.pos = 0;
 	pri->slip.esc = 0;
-	if(pri->gate_addr != NULL){
+	if (pri->gate_addr != NULL) {
 		sprintf(version_buf, "%d", UML_NET_VERSION);
 		strcpy(gate_buf, pri->gate_addr);
 
 		err = slip_tramp(argv, sfd);
 
-		if(err < 0){
-			printk("slip_tramp failed - err = %d\n", -err);
+		if (err < 0) {
+			printk(UM_KERN_ERR "slip_tramp failed - err = %d\n",
+			       -err);
 			goto out_close2;
 		}
 		err = os_get_ifname(pri->slave, pri->name);
-		if(err < 0){
-			printk("get_ifname failed, err = %d\n", -err);
+		if (err < 0) {
+			printk(UM_KERN_ERR "get_ifname failed, err = %d\n",
+			       -err);
 			goto out_close2;
 		}
 		iter_addresses(pri->dev, open_addr, pri->name);
 	}
 	else {
 		err = os_set_slip(sfd);
-		if(err < 0){
-			printk("Failed to set slip discipline encapsulation - "
-			       "err = %d\n", -err);
+		if (err < 0) {
+			printk(UM_KERN_ERR "Failed to set slip discipline "
+			       "encapsulation - err = %d\n", -err);
 			goto out_close2;
 		}
 	}
-	return(mfd);
+	return mfd;
 out_close2:
-	os_close_file(sfd);
+	close(sfd);
 out_close:
-	os_close_file(mfd);
+	close(mfd);
 out:
 	return err;
 }
@@ -192,21 +202,21 @@ static void slip_close(int fd, void *data)
 {
 	struct slip_data *pri = data;
 	char version_buf[sizeof("nnnnn\0")];
-	char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, 
+	char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name,
 			 NULL };
 	int err;
 
-	if(pri->gate_addr != NULL)
+	if (pri->gate_addr != NULL)
 		iter_addresses(pri->dev, close_addr, pri->name);
 
 	sprintf(version_buf, "%d", UML_NET_VERSION);
 
 	err = slip_tramp(argv, pri->slave);
 
-	if(err != 0)
-		printk("slip_tramp failed - errno = %d\n", -err);
-	os_close_file(fd);
-	os_close_file(pri->slave);
+	if (err != 0)
+		printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err);
+	close(fd);
+	close(pri->slave);
 	pri->slave = -1;
 }
 
@@ -220,17 +230,13 @@ int slip_user_write(int fd, void *buf, int len, struct slip_data *pri)
 	return slip_proto_write(fd, buf, len, &pri->slip);
 }
 
-static int slip_set_mtu(int mtu, void *data)
-{
-	return(mtu);
-}
-
 static void slip_add_addr(unsigned char *addr, unsigned char *netmask,
 			  void *data)
 {
 	struct slip_data *pri = data;
 
-	if(pri->slave < 0) return;
+	if (pri->slave < 0)
+		return;
 	open_addr(addr, netmask, pri->name);
 }
 
@@ -239,7 +245,8 @@ static void slip_del_addr(unsigned char *addr, unsigned char *netmask,
 {
 	struct slip_data *pri = data;
 
-	if(pri->slave < 0) return;
+	if (pri->slave < 0)
+		return;
 	close_addr(addr, netmask, pri->name);
 }
 
@@ -248,8 +255,8 @@ const struct net_user_info slip_user_info = {
 	.open		= slip_open,
 	.close	 	= slip_close,
 	.remove	 	= NULL,
-	.set_mtu	= slip_set_mtu,
 	.add_address	= slip_add_addr,
 	.delete_address = slip_del_addr,
-	.max_packet	= BUF_SIZE
+	.mtu		= BUF_SIZE,
+	.max_packet	= BUF_SIZE,
 };
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index 0a0324a6d29..240ee650865 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -1,11 +1,14 @@
-#include "linux/kernel.h"
-#include "linux/stddef.h"
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL.
+ */
+
+#include <linux/if_arp.h>
 #include "linux/init.h"
-#include "linux/netdevice.h"
-#include "linux/if_arp.h"
+#include <linux/netdevice.h>
+#include <linux/string.h>
 #include "net_kern.h"
 #include "net_user.h"
-#include "kern.h"
 #include "slirp.h"
 
 struct slirp_init {
@@ -39,29 +42,26 @@ void slirp_init(struct net_device *dev, void *data)
 	dev->tx_queue_len = 256;
 	dev->flags = IFF_NOARP;
 	printk("SLIRP backend - command line:");
-	for(i=0;spri->argw.argv[i]!=NULL;i++) {
+	for (i = 0; spri->argw.argv[i] != NULL; i++)
 		printk(" '%s'",spri->argw.argv[i]);
-	}
 	printk("\n");
 }
 
 static unsigned short slirp_protocol(struct sk_buff *skbuff)
 {
-	return(htons(ETH_P_IP));
+	return htons(ETH_P_IP);
 }
 
-static int slirp_read(int fd, struct sk_buff **skb, 
-		       struct uml_net_private *lp)
+static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	return(slirp_user_read(fd, skb_mac_header(*skb), (*skb)->dev->mtu,
-			      (struct slirp_data *) &lp->user));
+	return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
+			       (struct slirp_data *) &lp->user);
 }
 
-static int slirp_write(int fd, struct sk_buff **skb,
-		      struct uml_net_private *lp)
+static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	return(slirp_user_write(fd, (*skb)->data, (*skb)->len, 
-			       (struct slirp_data *) &lp->user));
+	return slirp_user_write(fd, skb->data, skb->len,
+				(struct slirp_data *) &lp->user);
 }
 
 const struct net_kern_info slirp_kern_info = {
@@ -76,31 +76,32 @@ static int slirp_setup(char *str, char **mac_out, void *data)
 	struct slirp_init *init = data;
 	int i=0;
 
-	*init = ((struct slirp_init)
-		{ .argw = { { "slirp", NULL  } } });
+	*init = ((struct slirp_init) { .argw = { { "slirp", NULL  } } });
 
 	str = split_if_spec(str, mac_out, NULL);
 
-	if(str == NULL) { /* no command line given after MAC addr */
-		return(1);
-	}
+	if (str == NULL) /* no command line given after MAC addr */
+		return 1;
 
 	do {
-		if(i>=SLIRP_MAX_ARGS-1) {
-			printk("slirp_setup: truncating slirp arguments\n");
+		if (i >= SLIRP_MAX_ARGS - 1) {
+			printk(KERN_WARNING "slirp_setup: truncating slirp "
+			       "arguments\n");
 			break;
 		}
 		init->argw.argv[i++] = str;
 		while(*str && *str!=',') {
-			if(*str=='_') *str=' ';
+			if (*str == '_')
+				*str=' ';
 			str++;
 		}
-		if(*str!=',')
+		if (*str != ',')
 			break;
-		*str++='\0';
-	} while(1);
-	init->argw.argv[i]=NULL;
-	return(1);
+		*str++ = '\0';
+	} while (1);
+
+	init->argw.argv[i] = NULL;
+	return 1;
 }
 
 static struct transport slirp_transport = {
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
index 0e462f64f22..1865089ff41 100644
--- a/arch/um/drivers/slirp_user.c
+++ b/arch/um/drivers/slirp_user.c
@@ -1,18 +1,17 @@
-#include <stdio.h>
-#include <stdlib.h>
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL.
+ */
+
 #include <unistd.h>
-#include <stddef.h>
-#include <sched.h>
-#include <string.h>
 #include <errno.h>
+#include <string.h>
 #include <sys/wait.h>
-#include <sys/signal.h>
-#include "kern_util.h"
-#include "user.h"
+#include "kern_constants.h"
 #include "net_user.h"
-#include "slirp.h"
-#include "slip_common.h"
 #include "os.h"
+#include "slirp.h"
+#include "user.h"
 
 static int slirp_user_init(void *data, void *dev)
 {
@@ -31,8 +30,10 @@ static void slirp_pre_exec(void *arg)
 {
 	struct slirp_pre_exec_data *data = arg;
 
-	if(data->stdin != -1) dup2(data->stdin, 0);
-	if(data->stdout != -1) dup2(data->stdout, 1);
+	if (data->stdin != -1)
+		dup2(data->stdin, 0);
+	if (data->stdout != -1)
+		dup2(data->stdout, 1);
 }
 
 static int slirp_tramp(char **argv, int fd)
@@ -44,7 +45,7 @@ static int slirp_tramp(char **argv, int fd)
 	pe_data.stdout = fd;
 	pid = run_helper(slirp_pre_exec, &pe_data, argv);
 
-	return(pid);
+	return pid;
 }
 
 static int slirp_open(void *data)
@@ -53,12 +54,12 @@ static int slirp_open(void *data)
 	int fds[2], pid, err;
 
 	err = os_pipe(fds, 1, 1);
-	if(err)
-		return(err);
+	if (err)
+		return err;
 
 	err = slirp_tramp(pri->argw.argv, fds[1]);
-	if(err < 0){
-		printk("slirp_tramp failed - errno = %d\n", -err);
+	if (err < 0) {
+		printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err);
 		goto out;
 	}
 	pid = err;
@@ -68,10 +69,10 @@ static int slirp_open(void *data)
 	pri->slip.esc = 0;
 	pri->pid = err;
 
-	return(fds[0]);
+	return fds[0];
 out:
-	os_close_file(fds[0]);
-	os_close_file(fds[1]);
+	close(fds[0]);
+	close(fds[1]);
 	return err;
 }
 
@@ -80,31 +81,33 @@ static void slirp_close(int fd, void *data)
 	struct slirp_data *pri = data;
 	int status,err;
 
-	os_close_file(fd);
-	os_close_file(pri->slave);
+	close(fd);
+	close(pri->slave);
 
 	pri->slave = -1;
 
-	if(pri->pid<1) {
-		printk("slirp_close: no child process to shut down\n");
+	if (pri->pid<1) {
+		printk(UM_KERN_ERR "slirp_close: no child process to shut "
+		       "down\n");
 		return;
 	}
 
 #if 0
-	if(kill(pri->pid, SIGHUP)<0) {
-		printk("slirp_close: sending hangup to %d failed (%d)\n",
-			pri->pid, errno);
+	if (kill(pri->pid, SIGHUP)<0) {
+		printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed "
+		       "(%d)\n", pri->pid, errno);
 	}
 #endif
 
 	CATCH_EINTR(err = waitpid(pri->pid, &status, WNOHANG));
-	if(err < 0) {
-		printk("slirp_close: waitpid returned %d\n", errno);
+	if (err < 0) {
+		printk(UM_KERN_ERR "slirp_close: waitpid returned %d\n", errno);
 		return;
 	}
 
-	if(err == 0) {
-		printk("slirp_close: process %d has not exited\n", pri->pid);
+	if (err == 0) {
+		printk(UM_KERN_ERR "slirp_close: process %d has not exited\n",
+		       pri->pid);
 		return;
 	}
 
@@ -121,18 +124,13 @@ int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri)
 	return slip_proto_write(fd, buf, len, &pri->slip);
 }
 
-static int slirp_set_mtu(int mtu, void *data)
-{
-	return(mtu);
-}
-
 const struct net_user_info slirp_user_info = {
 	.init		= slirp_user_init,
 	.open		= slirp_open,
 	.close	 	= slirp_close,
 	.remove	 	= NULL,
-	.set_mtu	= slirp_set_mtu,
 	.add_address	= NULL,
 	.delete_address = NULL,
-	.max_packet	= BUF_SIZE
+	.mtu		= BUF_SIZE,
+	.max_packet	= BUF_SIZE,
 };
diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c
index a9f87e19c5b..c930fedc517 100644
--- a/arch/um/drivers/tty.c
+++ b/arch/um/drivers/tty.c
@@ -1,16 +1,16 @@
 /*
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
-#include <stdio.h>
-#include <termios.h>
 #include <errno.h>
-#include <unistd.h>
+#include <fcntl.h>
+#include <termios.h>
 #include "chan_user.h"
-#include "user.h"
+#include "kern_constants.h"
 #include "os.h"
 #include "um_malloc.h"
+#include "user.h"
 
 struct tty_chan {
 	char *dev;
@@ -22,15 +22,15 @@ static void *tty_chan_init(char *str, int device, const struct chan_opts *opts)
 {
 	struct tty_chan *data;
 
-	if(*str != ':'){
-		printk("tty_init : channel type 'tty' must specify "
+	if (*str != ':') {
+		printk(UM_KERN_ERR "tty_init : channel type 'tty' must specify "
 		       "a device\n");
 		return NULL;
 	}
 	str++;
 
 	data = kmalloc(sizeof(*data), UM_GFP_KERNEL);
-	if(data == NULL)
+	if (data == NULL)
 		return NULL;
 	*data = ((struct tty_chan) { .dev 	= str,
 				     .raw 	= opts->raw });
@@ -42,19 +42,26 @@ static int tty_open(int input, int output, int primary, void *d,
 		    char **dev_out)
 {
 	struct tty_chan *data = d;
-	int fd, err;
+	int fd, err, mode = 0;
+
+	if (input && output)
+		mode = O_RDWR;
+	else if (input)
+		mode = O_RDONLY;
+	else if (output)
+		mode = O_WRONLY;
 
-	fd = os_open_file(data->dev, of_set_rw(OPENFLAGS(), input, output), 0);
-	if(fd < 0)
-		return fd;
+	fd = open(data->dev, mode);
+	if (fd < 0)
+		return -errno;
 
-	if(data->raw){
+	if (data->raw) {
 		CATCH_EINTR(err = tcgetattr(fd, &data->tt));
-		if(err)
+		if (err)
 			return err;
 
 		err = raw(fd);
-		if(err)
+		if (err)
 			return err;
 	}
 
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 0eabe73c964..25b248a0250 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -615,7 +615,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 		blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
 
 		err = -ENOMEM;
-		ubd_dev->cow.bitmap = (void *) vmalloc(ubd_dev->cow.bitmap_len);
+		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 		if(ubd_dev->cow.bitmap == NULL){
 			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 			goto error;
diff --git a/arch/um/drivers/vde.h b/arch/um/drivers/vde.h
new file mode 100644
index 00000000000..fc3a05902ba
--- /dev/null
+++ b/arch/um/drivers/vde.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
+ * Licensed under the GPL.
+ */
+
+#ifndef __UM_VDE_H__
+#define __UM_VDE_H__
+
+struct vde_data {
+	char *vde_switch;
+	char *descr;
+	void *args;
+	void *conn;
+	void *dev;
+};
+
+struct vde_init {
+	char *vde_switch;
+	char *descr;
+	int port;
+	char *group;
+	int mode;
+};
+
+extern const struct net_user_info vde_user_info;
+
+extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init);
+
+extern int vde_user_read(void *conn, void *buf, int len);
+extern int vde_user_write(void *conn, void *buf, int len);
+
+#endif
diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c
new file mode 100644
index 00000000000..add7e722def
--- /dev/null
+++ b/arch/um/drivers/vde_kern.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
+ * Licensed under the GPL.
+ *
+ * Transport usage:
+ *  ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description>
+ *
+ */
+
+#include "linux/init.h"
+#include <linux/netdevice.h>
+#include "net_kern.h"
+#include "net_user.h"
+#include "vde.h"
+
+static void vde_init(struct net_device *dev, void *data)
+{
+	struct vde_init *init = data;
+	struct uml_net_private *pri;
+	struct vde_data *vpri;
+
+	pri = dev->priv;
+	vpri = (struct vde_data *) pri->user;
+
+	vpri->vde_switch = init->vde_switch;
+	vpri->descr = init->descr ? init->descr : "UML vde_transport";
+	vpri->args = NULL;
+	vpri->conn = NULL;
+	vpri->dev = dev;
+
+	printk("vde backend - %s, ", vpri->vde_switch ?
+	       vpri->vde_switch : "(default socket)");
+
+	vde_init_libstuff(vpri, init);
+
+	printk("\n");
+}
+
+static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
+{
+	struct vde_data *pri = (struct vde_data *) &lp->user;
+
+	if (pri->conn != NULL)
+		return vde_user_read(pri->conn, skb_mac_header(skb),
+				     skb->dev->mtu + ETH_HEADER_OTHER);
+
+	printk(KERN_ERR "vde_read - we have no VDECONN to read from");
+	return -EBADF;
+}
+
+static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
+{
+	struct vde_data *pri = (struct vde_data *) &lp->user;
+
+	if (pri->conn != NULL)
+		return vde_user_write((void *)pri->conn, skb->data,
+				      skb->len);
+
+	printk(KERN_ERR "vde_write - we have no VDECONN to write to");
+	return -EBADF;
+}
+
+static const struct net_kern_info vde_kern_info = {
+	.init			= vde_init,
+	.protocol		= eth_protocol,
+	.read			= vde_read,
+	.write			= vde_write,
+};
+
+static int vde_setup(char *str, char **mac_out, void *data)
+{
+	struct vde_init *init = data;
+	char *remain, *port_str = NULL, *mode_str = NULL, *last;
+
+	*init = ((struct vde_init)
+		{ .vde_switch		= NULL,
+		  .descr		= NULL,
+		  .port			= 0,
+		  .group		= NULL,
+		  .mode			= 0 });
+
+	remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str,
+				&init->group, &mode_str, &init->descr, NULL);
+
+	if (remain != NULL)
+		printk(KERN_WARNING "vde_setup - Ignoring extra data :"
+		       "'%s'\n", remain);
+
+	if (port_str != NULL) {
+		init->port = simple_strtoul(port_str, &last, 10);
+		if ((*last != '\0') || (last == port_str)) {
+			printk(KERN_ERR "vde_setup - Bad port : '%s'\n",
+						port_str);
+			return 0;
+		}
+	}
+
+	if (mode_str != NULL) {
+		init->mode = simple_strtoul(mode_str, &last, 8);
+		if ((*last != '\0') || (last == mode_str)) {
+			printk(KERN_ERR "vde_setup - Bad mode : '%s'\n",
+						mode_str);
+			return 0;
+		}
+	}
+
+	printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ?
+	       init->vde_switch : "(default socket)");
+
+	return 1;
+}
+
+static struct transport vde_transport = {
+	.list 		= LIST_HEAD_INIT(vde_transport.list),
+	.name 		= "vde",
+	.setup  	= vde_setup,
+	.user 		= &vde_user_info,
+	.kern 		= &vde_kern_info,
+	.private_size 	= sizeof(struct vde_data),
+	.setup_size 	= sizeof(struct vde_init),
+};
+
+static int register_vde(void)
+{
+	register_transport(&vde_transport);
+	return 0;
+}
+
+late_initcall(register_vde);
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
new file mode 100644
index 00000000000..d9941fe5f93
--- /dev/null
+++ b/arch/um/drivers/vde_user.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
+ * Licensed under the GPL.
+ */
+
+#include <stddef.h>
+#include <errno.h>
+#include <libvdeplug.h>
+#include "kern_constants.h"
+#include "net_user.h"
+#include "um_malloc.h"
+#include "user.h"
+#include "vde.h"
+
+static int vde_user_init(void *data, void *dev)
+{
+	struct vde_data *pri = data;
+	VDECONN *conn = NULL;
+	int err = -EINVAL;
+
+	pri->dev = dev;
+
+	conn = vde_open(pri->vde_switch, pri->descr, pri->args);
+
+	if (conn == NULL) {
+		err = -errno;
+		printk(UM_KERN_ERR "vde_user_init: vde_open failed, "
+		       "errno = %d\n", errno);
+		return err;
+	}
+
+	printk(UM_KERN_INFO "vde backend - connection opened\n");
+
+	pri->conn = conn;
+
+	return 0;
+}
+
+static int vde_user_open(void *data)
+{
+	struct vde_data *pri = data;
+
+	if (pri->conn != NULL)
+		return vde_datafd(pri->conn);
+
+	printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open");
+	return -EINVAL;
+}
+
+static void vde_remove(void *data)
+{
+	struct vde_data *pri = data;
+
+	if (pri->conn != NULL) {
+		printk(UM_KERN_INFO "vde backend - closing connection\n");
+		vde_close(pri->conn);
+		pri->conn = NULL;
+		kfree(pri->args);
+		pri->args = NULL;
+		return;
+	}
+
+	printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove");
+}
+
+const struct net_user_info vde_user_info = {
+	.init		= vde_user_init,
+	.open		= vde_user_open,
+	.close	 	= NULL,
+	.remove	 	= vde_remove,
+	.add_address	= NULL,
+	.delete_address = NULL,
+	.mtu		= ETH_MAX_PACKET,
+	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
+};
+
+void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init)
+{
+	struct vde_open_args *args;
+
+	vpri->args = kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL);
+	if (vpri->args == NULL) {
+		printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args"
+		       "allocation failed");
+		return;
+	}
+
+	args = vpri->args;
+
+	args->port = init->port;
+	args->group = init->group;
+	args->mode = init->mode ? init->mode : 0700;
+
+	args->port ?  printk(UM_KERN_INFO "port %d", args->port) :
+		printk(UM_KERN_INFO "undefined port");
+}
+
+int vde_user_read(void *conn, void *buf, int len)
+{
+	VDECONN *vconn = conn;
+	int rv;
+
+	if (vconn == NULL)
+		return 0;
+
+	rv = vde_recv(vconn, buf, len, 0);
+	if (rv < 0) {
+		if (errno == EAGAIN)
+			return 0;
+		return -errno;
+	}
+	else if (rv == 0)
+		return -ENOTCONN;
+
+	return rv;
+}
+
+int vde_user_write(void *conn, void *buf, int len)
+{
+	VDECONN *vconn = conn;
+
+	if (vconn == NULL)
+		return 0;
+
+	return vde_send(vconn, buf, len, 0);
+}
+
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index fd817e54154..8a1c18a9b24 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -1,20 +1,21 @@
-/* 
+/*
  * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stdlib.h>
+#include <stddef.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
-#include <string.h>
 #include <errno.h>
+#include <string.h>
 #include <termios.h>
 #include "chan_user.h"
+#include "kern_constants.h"
 #include "os.h"
-#include "init.h"
+#include "um_malloc.h"
 #include "user.h"
 #include "xterm.h"
-#include "kern_constants.h"
 
 struct xterm_chan {
 	int pid;
@@ -29,7 +30,7 @@ static void *xterm_init(char *str, int device, const struct chan_opts *opts)
 {
 	struct xterm_chan *data;
 
-	data = malloc(sizeof(*data));
+	data = kmalloc(sizeof(*data), UM_GFP_KERNEL);
 	if (data == NULL)
 		return NULL;
 	*data = ((struct xterm_chan) { .pid 		= -1,
@@ -95,8 +96,10 @@ static int xterm_open(int input, int output, int primary, void *d,
 	if (access(argv[4], X_OK) < 0)
 		argv[4] = "port-helper";
 
-	/* Check that DISPLAY is set, this doesn't guarantee the xterm
-	 * will work but w/o it we can be pretty sure it won't. */
+	/*
+	 * Check that DISPLAY is set, this doesn't guarantee the xterm
+	 * will work but w/o it we can be pretty sure it won't.
+	 */
 	if (getenv("DISPLAY") == NULL) {
 		printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n");
 		return -ENODEV;
@@ -195,7 +198,7 @@ static int xterm_open(int input, int output, int primary, void *d,
 static void xterm_close(int fd, void *d)
 {
 	struct xterm_chan *data = d;
-	
+
 	if (data->pid != -1)
 		os_kill_process(data->pid, 1);
 	data->pid = -1;
@@ -207,11 +210,6 @@ static void xterm_close(int fd, void *d)
 	os_close_file(fd);
 }
 
-static void xterm_free(void *d)
-{
-	free(d);
-}
-
 const struct chan_ops xterm_ops = {
 	.type		= "xterm",
 	.init		= xterm_init,
@@ -221,6 +219,6 @@ const struct chan_ops xterm_ops = {
 	.write		= generic_write,
 	.console_write	= generic_console_write,
 	.window_size	= generic_window_size,
-	.free		= xterm_free,
+	.free		= generic_free,
 	.winch		= 1,
 };
diff --git a/arch/um/include/arch.h b/arch/um/include/arch.h
index 10ad52daa8c..49c601ff2ba 100644
--- a/arch/um/include/arch.h
+++ b/arch/um/include/arch.h
@@ -9,7 +9,7 @@
 #include "sysdep/ptrace.h"
 
 extern void arch_check_bugs(void);
-extern int arch_fixup(unsigned long address, union uml_pt_regs *regs);
-extern int arch_handle_signal(int sig, union uml_pt_regs *regs);
+extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs);
+extern int arch_handle_signal(int sig, struct uml_pt_regs *regs);
 
 #endif
diff --git a/arch/um/include/as-layout.h b/arch/um/include/as-layout.h
index fccf187bf4e..a5cdf953e04 100644
--- a/arch/um/include/as-layout.h
+++ b/arch/um/include/as-layout.h
@@ -6,6 +6,28 @@
 #ifndef __START_H__
 #define __START_H__
 
+#include "uml-config.h"
+#include "kern_constants.h"
+
+/*
+ * Assembly doesn't want any casting, but C does, so define these
+ * without casts here, and define new symbols with casts inside the C
+ * section.
+ */
+#define ASM_STUB_CODE (UML_CONFIG_TOP_ADDR - 2 * UM_KERN_PAGE_SIZE)
+#define ASM_STUB_DATA (UML_CONFIG_TOP_ADDR - UM_KERN_PAGE_SIZE)
+#define ASM_STUB_START ASM_STUB_CODE
+
+/*
+ * This file is included by the assembly stubs, which just want the
+ * definitions above.
+ */
+#ifndef __ASSEMBLY__
+
+#define STUB_CODE ((unsigned long) ASM_STUB_CODE)
+#define STUB_DATA ((unsigned long) ASM_STUB_DATA)
+#define STUB_START ((unsigned long) ASM_STUB_START)
+
 #include "sysdep/ptrace.h"
 
 struct cpu_task {
@@ -28,8 +50,9 @@ extern unsigned long _unprotected_end;
 extern unsigned long brk_start;
 
 extern int linux_main(int argc, char **argv);
-extern void set_cmdline(char *cmd);
 
-extern void (*sig_info[])(int, union uml_pt_regs *);
+extern void (*sig_info[])(int, struct uml_pt_regs *);
+
+#endif
 
 #endif
diff --git a/arch/um/include/choose-mode.h b/arch/um/include/choose-mode.h
deleted file mode 100644
index b87b36a87d9..00000000000
--- a/arch/um/include/choose-mode.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __CHOOSE_MODE_H__
-#define __CHOOSE_MODE_H__
-
-#include "uml-config.h"
-
-#if defined(UML_CONFIG_MODE_TT) && defined(UML_CONFIG_MODE_SKAS)
-#define CHOOSE_MODE(tt, skas) (mode_tt ? (tt) : (skas))
-
-extern int mode_tt;
-static inline void *__choose_mode(void *tt, void *skas) {
-	return mode_tt ? tt : skas;
-}
-
-#define __CHOOSE_MODE(tt, skas) (*( (typeof(tt) *) __choose_mode(&(tt), &(skas))))
-
-#elif defined(UML_CONFIG_MODE_SKAS)
-#define CHOOSE_MODE(tt, skas) (skas)
-
-#elif defined(UML_CONFIG_MODE_TT)
-#define CHOOSE_MODE(tt, skas) (tt)
-
-#else
-#error CONFIG_MODE_SKAS and CONFIG_MODE_TT are both disabled
-#endif
-
-#define CHOOSE_MODE_PROC(tt, skas, args...) \
-	CHOOSE_MODE(tt(args), skas(args))
-
-#ifndef __CHOOSE_MODE
-#define __CHOOSE_MODE(tt, skas) CHOOSE_MODE(tt, skas)
-#endif
-
-#endif
diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h
index 6eee343e53e..0edab695ed4 100644
--- a/arch/um/include/common-offsets.h
+++ b/arch/um/include/common-offsets.h
@@ -1,15 +1,13 @@
 /* for use by sys-$SUBARCH/kernel-offsets.c */
 
 DEFINE(KERNEL_MADV_REMOVE, MADV_REMOVE);
-#ifdef CONFIG_MODE_TT
-OFFSET(HOST_TASK_EXTERN_PID, task_struct, thread.mode.tt.extern_pid);
-#endif
 
 OFFSET(HOST_TASK_REGS, task_struct, thread.regs);
 OFFSET(HOST_TASK_PID, task_struct, pid);
 
 DEFINE(UM_KERN_PAGE_SIZE, PAGE_SIZE);
 DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
+DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
 DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
 
 DEFINE_STR(UM_KERN_EMERG, KERN_EMERG);
@@ -34,3 +32,9 @@ DEFINE(UM_GFP_ATOMIC, GFP_ATOMIC);
 DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
 
 DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
+
+DEFINE(UM_HZ, HZ);
+
+DEFINE(UM_USEC_PER_SEC, USEC_PER_SEC);
+DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
+DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
diff --git a/arch/um/include/irq_user.h b/arch/um/include/irq_user.h
index 15d311b9be9..884a9c17eea 100644
--- a/arch/um/include/irq_user.h
+++ b/arch/um/include/irq_user.h
@@ -1,12 +1,12 @@
 /*
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #ifndef __IRQ_USER_H__
 #define __IRQ_USER_H__
 
-#include "uml-config.h"
+#include "sysdep/ptrace.h"
 
 struct irq_fd {
 	struct irq_fd *next;
@@ -21,7 +21,7 @@ struct irq_fd {
 
 enum { IRQ_READ, IRQ_WRITE };
 
-extern void sigio_handler(int sig, union uml_pt_regs *regs);
+extern void sigio_handler(int sig, struct uml_pt_regs *regs);
 extern int activate_fd(int irq, int fd, int type, void *dev_id);
 extern void free_irq_by_irq_and_dev(unsigned int irq, void *dev_id);
 extern void free_irq_by_fd(int fd);
@@ -30,8 +30,4 @@ extern void deactivate_fd(int fd, int irqnum);
 extern int deactivate_all_fds(void);
 extern int activate_ipi(int fd, int pid);
 
-#ifdef CONFIG_MODE_TT
-extern void forward_interrupts(int pid);
-#endif
-
 #endif
diff --git a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
index 6c2be26f1d7..74ce8e5370a 100644
--- a/arch/um/include/kern_util.h
+++ b/arch/um/include/kern_util.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -8,9 +8,8 @@
 
 #include "sysdep/ptrace.h"
 #include "sysdep/faultinfo.h"
-#include "uml-config.h"
 
-typedef void (*kern_hndl)(int, union uml_pt_regs *);
+typedef void (*kern_hndl)(int, struct uml_pt_regs *);
 
 struct kern_handlers {
 	kern_hndl relay_signal;
@@ -34,9 +33,6 @@ extern int nsyscalls;
 	UML_ROUND_DOWN(((unsigned long) addr) + PAGE_SIZE - 1)
 
 extern int kernel_fork(unsigned long flags, int (*fn)(void *), void * arg);
-#ifdef UML_CONFIG_MODE_TT
-extern unsigned long stack_sp(unsigned long page);
-#endif
 extern int kernel_thread_proc(void *data);
 extern void syscall_segv(int sig);
 extern int current_pid(void);
@@ -44,7 +40,7 @@ extern unsigned long alloc_stack(int order, int atomic);
 extern int do_signal(void);
 extern int is_stack_fault(unsigned long sp);
 extern unsigned long segv(struct faultinfo fi, unsigned long ip,
-			  int is_user, union uml_pt_regs *regs);
+			  int is_user, struct uml_pt_regs *regs);
 extern int handle_page_fault(unsigned long address, unsigned long ip,
 			     int is_write, int is_user, int *code_out);
 extern void syscall_ready(void);
@@ -57,7 +53,7 @@ extern int need_finish_fork(void);
 extern void free_stack(unsigned long stack, int order);
 extern void add_input_request(int op, void (*proc)(int), void *arg);
 extern char *current_cmd(void);
-extern void timer_handler(int sig, union uml_pt_regs *regs);
+extern void timer_handler(int sig, struct uml_pt_regs *regs);
 extern int set_signals(int enable);
 extern int pid_to_processor_id(int pid);
 extern void deliver_signals(void *t);
@@ -67,9 +63,8 @@ extern void finish_fork(void);
 extern void paging_init(void);
 extern void init_flush_vm(void);
 extern void *syscall_sp(void *t);
-extern void syscall_trace(union uml_pt_regs *regs, int entryexit);
-extern int hz(void);
-extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs);
+extern void syscall_trace(struct uml_pt_regs *regs, int entryexit);
+extern unsigned int do_IRQ(int irq, struct uml_pt_regs *regs);
 extern void interrupt_end(void);
 extern void initial_thread_cb(void (*proc)(void *), void *arg);
 extern int debugger_signal(int status, int pid);
@@ -79,10 +74,9 @@ extern int init_ptrace_proxy(int idle_pid, int startup, int stop);
 extern int init_parent_proxy(int pid);
 extern int singlestepping(void *t);
 extern void check_stack_overflow(void *ptr);
-extern void relay_signal(int sig, union uml_pt_regs *regs);
+extern void relay_signal(int sig, struct uml_pt_regs *regs);
 extern int user_context(unsigned long sp);
-extern void timer_irq(union uml_pt_regs *regs);
-extern void unprotect_stack(unsigned long stack);
+extern void timer_irq(struct uml_pt_regs *regs);
 extern void do_uml_exitcalls(void);
 extern int attach_debugger(int idle_pid, int pid, int stop);
 extern int config_gdb(char *str);
@@ -113,11 +107,9 @@ extern void time_init_kern(void);
 
 /* Are we disallowed to sleep? Used to choose between GFP_KERNEL and GFP_ATOMIC. */
 extern int __cant_sleep(void);
-extern void sigio_handler(int sig, union uml_pt_regs *regs);
-
-extern void copy_sc(union uml_pt_regs *regs, void *from);
-
+extern void sigio_handler(int sig, struct uml_pt_regs *regs);
+extern void copy_sc(struct uml_pt_regs *regs, void *from);
 extern unsigned long to_irq_stack(unsigned long *mask_out);
 unsigned long from_irq_stack(int nested);
-
+extern int start_uml(void);
 #endif
diff --git a/arch/um/include/mconsole.h b/arch/um/include/mconsole.h
index b282839c162..c139ae1d682 100644
--- a/arch/um/include/mconsole.h
+++ b/arch/um/include/mconsole.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -63,7 +63,7 @@ struct mc_request
 
 	struct mconsole_request request;
 	struct mconsole_command *cmd;
-	union uml_pt_regs regs;
+	struct uml_pt_regs regs;
 };
 
 extern char mconsole_socket_name[];
@@ -96,14 +96,3 @@ extern void lock_notify(void);
 extern void unlock_notify(void);
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/mem.h b/arch/um/include/mem.h
index e8ff0d8fa61..5cd40e99e8d 100644
--- a/arch/um/include/mem.h
+++ b/arch/um/include/mem.h
@@ -1,18 +1,12 @@
 /* 
- * Copyright (C) 2002, 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #ifndef __MEM_H__
 #define __MEM_H__
 
-#include "linux/types.h"
-
-extern int phys_mapping(unsigned long phys, __u64 *offset_out);
-extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w);
-extern int is_remapped(void *virt);
-extern int physmem_remove_mapping(void *virt);
-extern void physmem_forget_descriptor(int fd);
+extern int phys_mapping(unsigned long phys, unsigned long long *offset_out);
 
 extern unsigned long uml_physmem;
 static inline unsigned long to_phys(void *virt)
@@ -26,14 +20,3 @@ static inline void *to_virt(unsigned long phys)
 }
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/mode.h b/arch/um/include/mode.h
deleted file mode 100644
index 786cf563eb0..00000000000
--- a/arch/um/include/mode.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __MODE_H__
-#define __MODE_H__
-
-#include "uml-config.h"
-
-#ifdef UML_CONFIG_MODE_TT
-#include "mode-tt.h"
-#endif
-
-#ifdef UML_CONFIG_MODE_SKAS
-#include "mode-skas.h"
-#endif
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/mode_kern.h b/arch/um/include/mode_kern.h
deleted file mode 100644
index 88e5e77bf51..00000000000
--- a/arch/um/include/mode_kern.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __MODE_KERN_H__
-#define __MODE_KERN_H__
-
-#ifdef CONFIG_MODE_TT
-#include "mode_kern_tt.h"
-#endif
-
-#ifdef CONFIG_MODE_SKAS
-#include "mode_kern_skas.h"
-#endif
-
-#endif
diff --git a/arch/um/include/net_kern.h b/arch/um/include/net_kern.h
index 9237056b910..d843c7924a7 100644
--- a/arch/um/include/net_kern.h
+++ b/arch/um/include/net_kern.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -30,24 +30,24 @@ struct uml_net_private {
 	struct work_struct work;
 	int fd;
 	unsigned char mac[ETH_ALEN];
+	int max_packet;
 	unsigned short (*protocol)(struct sk_buff *);
 	int (*open)(void *);
 	void (*close)(int, void *);
 	void (*remove)(void *);
-	int (*read)(int, struct sk_buff **skb, struct uml_net_private *);
-	int (*write)(int, struct sk_buff **skb, struct uml_net_private *);
+	int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
+	int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
 
 	void (*add_address)(unsigned char *, unsigned char *, void *);
 	void (*delete_address)(unsigned char *, unsigned char *, void *);
-	int (*set_mtu)(int mtu, void *);
 	char user[0];
 };
 
 struct net_kern_info {
 	void (*init)(struct net_device *, void *);
 	unsigned short (*protocol)(struct sk_buff *);
-	int (*read)(int, struct sk_buff **skb, struct uml_net_private *);
-	int (*write)(int, struct sk_buff **skb, struct uml_net_private *);
+	int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
+	int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
 };
 
 struct transport {
@@ -62,7 +62,6 @@ struct transport {
 
 extern struct net_device *ether_init(int);
 extern unsigned short ether_protocol(struct sk_buff *);
-extern struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra);
 extern int tap_setup_common(char *str, char *type, char **dev_name,
 			    char **mac_out, char **gate_addr);
 extern void register_transport(struct transport *new);
diff --git a/arch/um/include/net_user.h b/arch/um/include/net_user.h
index cfe7c50634b..63bee158cd8 100644
--- a/arch/um/include/net_user.h
+++ b/arch/um/include/net_user.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -18,10 +18,10 @@ struct net_user_info {
 	int (*open)(void *);
 	void (*close)(int, void *);
 	void (*remove)(void *);
-	int (*set_mtu)(int mtu, void *);
 	void (*add_address)(unsigned char *, unsigned char *, void *);
 	void (*delete_address)(unsigned char *, unsigned char *, void *);
 	int max_packet;
+	int mtu;
 };
 
 extern void ether_user_init(void *data, void *dev);
diff --git a/arch/um/include/os.h b/arch/um/include/os.h
index 930b261ea48..fbf0a87c6ea 100644
--- a/arch/um/include/os.h
+++ b/arch/um/include/os.h
@@ -1,20 +1,18 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #ifndef __OS_H__
 #define __OS_H__
 
-#include "uml-config.h"
-#include "asm/types.h"
-#include "../os/include/file.h"
-#include "sysdep/ptrace.h"
-#include "kern_util.h"
-#include "skas/mm_id.h"
+#include <stdarg.h>
 #include "irq_user.h"
+#include "kern_util.h"
+#include "longjmp.h"
+#include "mm_id.h"
 #include "sysdep/tls.h"
-#include "sysdep/archsetjmp.h"
+#include "../os/include/file.h"
 
 #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
 
@@ -130,18 +128,15 @@ static inline struct openflags of_cloexec(struct openflags flags)
 extern int os_stat_file(const char *file_name, struct uml_stat *buf);
 extern int os_stat_fd(const int fd, struct uml_stat *buf);
 extern int os_access(const char *file, int mode);
-extern void os_print_error(int error, const char* str);
 extern int os_get_exec_close(int fd, int *close_on_exec);
-extern int os_set_exec_close(int fd, int close_on_exec);
+extern int os_set_exec_close(int fd);
 extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
-extern int os_window_size(int fd, int *rows, int *cols);
-extern int os_new_tty_pgrp(int fd, int pid);
 extern int os_get_ifname(int fd, char *namebuf);
 extern int os_set_slip(int fd);
 extern int os_set_owner(int fd, int pid);
 extern int os_mode_fd(int fd, int mode);
 
-extern int os_seek_file(int fd, __u64 offset);
+extern int os_seek_file(int fd, unsigned long long offset);
 extern int os_open_file(char *file, struct openflags flags, int mode);
 extern int os_read_file(int fd, void *buf, int len);
 extern int os_write_file(int fd, const void *buf, int count);
@@ -179,11 +174,7 @@ extern void check_host_supports_tls(int *supports_tls, int *tls_min);
 
 /* Make sure they are clear when running in TT mode. Required by
  * SEGV_MAYBE_FIXABLE */
-#ifdef UML_CONFIG_MODE_SKAS
 #define clear_can_do_skas() do { ptrace_faultinfo = proc_mm = 0; } while (0)
-#else
-#define clear_can_do_skas() do {} while (0)
-#endif
 
 /* mem.c */
 extern int create_mem_file(unsigned long long len);
@@ -194,20 +185,13 @@ extern int os_process_parent(int pid);
 extern void os_stop_process(int pid);
 extern void os_kill_process(int pid, int reap_child);
 extern void os_kill_ptraced_process(int pid, int reap_child);
-#ifdef UML_CONFIG_MODE_TT
-extern void os_usr1_process(int pid);
-#endif
 extern long os_ptrace_ldt(long pid, long addr, long data);
 
 extern int os_getpid(void);
 extern int os_getpgrp(void);
 
-#ifdef UML_CONFIG_MODE_TT
-extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int));
-extern void stop(void);
-#endif
 extern void init_new_thread_signals(void);
-extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr);
+extern int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr);
 
 extern int os_map_memory(void *virt, int fd, unsigned long long off,
 			 unsigned long len, int r, int w, int x);
@@ -218,21 +202,9 @@ extern int os_drop_memory(void *addr, int length);
 extern int can_drop_memory(void);
 extern void os_flush_stdout(void);
 
-/* tt.c
- * for tt mode only (will be deleted in future...)
- */
-extern void forward_ipi(int fd, int pid);
-extern void kill_child_dead(int pid);
-extern int wait_for_stop(int pid, int sig, int cont_type, void *relay);
-extern int protect_memory(unsigned long addr, unsigned long len,
-			  int r, int w, int x, int must_succeed);
-extern void forward_pending_sigio(int target);
-extern int start_fork_tramp(void *arg, unsigned long temp_stack,
-			    int clone_flags, int (*tramp)(void *));
-
 /* uaccess.c */
 extern unsigned long __do_user_copy(void *to, const void *from, int n,
-				    void **fault_addr, void **fault_catcher,
+				    void **fault_addr, jmp_buf **fault_catcher,
 				    void (*op)(void *to, const void *from,
 					       int n), int *faulted_out);
 
@@ -255,6 +227,7 @@ extern int set_umid(char *name);
 extern char *get_umid(void);
 
 /* signal.c */
+extern void timer_init(void);
 extern void set_sigstack(void *sig_stack, int size);
 extern void remove_sigstack(void);
 extern void set_handler(int sig, void (*handler)(int), int flags, ...);
@@ -266,7 +239,6 @@ extern int set_signals(int enable);
 
 /* trap.c */
 extern void os_fill_handlinfo(struct kern_handlers h);
-extern void do_longjmp(void *p, int val);
 
 /* util.c */
 extern void stack_protections(unsigned long address);
@@ -277,17 +249,12 @@ extern int setjmp_wrapper(void (*proc)(void *, void *), ...);
 extern void os_dump_core(void);
 
 /* time.c */
-#define BILLION (1000 * 1000 * 1000)
-
-extern void switch_timers(int to_real);
-extern void idle_sleep(int secs);
-extern int set_interval(int is_virtual);
-#ifdef CONFIG_MODE_TT
-extern void enable_timer(void);
-#endif
-extern void disable_timer(void);
+extern void idle_sleep(unsigned long long nsecs);
+extern int set_interval(void);
+extern int timer_one_shot(int ticks);
+extern long long disable_timer(void);
 extern void uml_idle_timer(void);
-extern unsigned long long os_nsecs(void);
+extern long long os_nsecs(void);
 
 /* skas/mem.c */
 extern long run_syscall_stub(struct mm_id * mm_idp,
@@ -308,7 +275,9 @@ extern int protect(struct mm_id * mm_idp, unsigned long addr,
 extern int is_skas_winch(int pid, int fd, void *data);
 extern int start_userspace(unsigned long stub_stack);
 extern int copy_context_skas0(unsigned long stack, int pid);
-extern void userspace(union uml_pt_regs *regs);
+extern void save_registers(int pid, struct uml_pt_regs *regs);
+extern void restore_registers(int pid, struct uml_pt_regs *regs);
+extern void userspace(struct uml_pt_regs *regs);
 extern void map_stub_pages(int fd, unsigned long code,
 			   unsigned long data, unsigned long stack);
 extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
diff --git a/arch/um/include/registers.h b/arch/um/include/registers.h
index f845b3629a6..0e27406a43a 100644
--- a/arch/um/include/registers.h
+++ b/arch/um/include/registers.h
@@ -9,13 +9,15 @@
 #include "sysdep/ptrace.h"
 #include "sysdep/archsetjmp.h"
 
-extern void init_thread_registers(union uml_pt_regs *to);
+extern void init_thread_registers(struct uml_pt_regs *to);
 extern int save_fp_registers(int pid, unsigned long *fp_regs);
 extern int restore_fp_registers(int pid, unsigned long *fp_regs);
-extern void save_registers(int pid, union uml_pt_regs *regs);
-extern void restore_registers(int pid, union uml_pt_regs *regs);
+extern int save_fpx_registers(int pid, unsigned long *fp_regs);
+extern int restore_fpx_registers(int pid, unsigned long *fp_regs);
+extern void save_registers(int pid, struct uml_pt_regs *regs);
+extern void restore_registers(int pid, struct uml_pt_regs *regs);
 extern void init_registers(int pid);
-extern void get_safe_registers(unsigned long * regs, unsigned long * fp_regs);
+extern void get_safe_registers(unsigned long *regs);
 extern unsigned long get_thread_reg(int reg, jmp_buf *buf);
 
 #endif
diff --git a/arch/um/include/skas/mmu-skas.h b/arch/um/include/skas/mmu-skas.h
deleted file mode 100644
index b26986c0c3d..00000000000
--- a/arch/um/include/skas/mmu-skas.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_MMU_H
-#define __SKAS_MMU_H
-
-#include "mm_id.h"
-#include "asm/ldt.h"
-
-struct mmu_context_skas {
-	struct mm_id id;
-	unsigned long last_page_table;
-#ifdef CONFIG_3_LEVEL_PGTABLES
-	unsigned long last_pmd;
-#endif
-	uml_ldt_t ldt;
-};
-
-extern void switch_mm_skas(struct mm_id * mm_idp);
-
-#endif
diff --git a/arch/um/include/skas/mode-skas.h b/arch/um/include/skas/mode-skas.h
index 8bc6916bbbb..e065feb000d 100644
--- a/arch/um/include/skas/mode-skas.h
+++ b/arch/um/include/skas/mode-skas.h
@@ -1,18 +1,11 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{linux.intel,addtoit}.com)
  * Licensed under the GPL
  */
 
 #ifndef __MODE_SKAS_H__
 #define __MODE_SKAS_H__
 
-#include <sysdep/ptrace.h>
-
-extern unsigned long exec_regs[];
-extern unsigned long exec_fp_regs[];
-extern unsigned long exec_fpx_regs[];
-extern int have_fpx_regs;
-
 extern void kill_off_processes_skas(void);
 
 #endif
diff --git a/arch/um/include/skas/mode_kern_skas.h b/arch/um/include/skas/mode_kern_skas.h
deleted file mode 100644
index 8ee6285dfac..00000000000
--- a/arch/um/include/skas/mode_kern_skas.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_MODE_KERN_H__
-#define __SKAS_MODE_KERN_H__
-
-#include "linux/sched.h"
-#include "asm/page.h"
-#include "asm/ptrace.h"
-
-extern void flush_thread_skas(void);
-extern void switch_to_skas(void *prev, void *next);
-extern void start_thread_skas(struct pt_regs *regs, unsigned long eip,
-			      unsigned long esp);
-extern int copy_thread_skas(int nr, unsigned long clone_flags,
-			    unsigned long sp, unsigned long stack_top,
-			    struct task_struct *p, struct pt_regs *regs);
-extern void release_thread_skas(struct task_struct *task);
-extern void init_idle_skas(void);
-extern void flush_tlb_kernel_range_skas(unsigned long start,
-					unsigned long end);
-extern void flush_tlb_kernel_vm_skas(void);
-extern void __flush_tlb_one_skas(unsigned long addr);
-extern void flush_tlb_range_skas(struct vm_area_struct *vma,
-				 unsigned long start, unsigned long end);
-extern void flush_tlb_mm_skas(struct mm_struct *mm);
-extern void force_flush_all_skas(void);
-extern long execute_syscall_skas(void *r);
-extern void before_mem_skas(unsigned long unused);
-extern unsigned long set_task_sizes_skas(unsigned long *task_size_out);
-extern int start_uml_skas(void);
-extern int external_pid_skas(struct task_struct *task);
-extern int thread_pid_skas(struct task_struct *task);
-extern void flush_tlb_page_skas(struct vm_area_struct *vma,
-				unsigned long address);
-
-#define kmem_end_skas (host_task_size - 1024 * 1024)
-
-#endif
diff --git a/arch/um/include/skas/skas.h b/arch/um/include/skas/skas.h
index e88926b1607..b073f8a86bd 100644
--- a/arch/um/include/skas/skas.h
+++ b/arch/um/include/skas/skas.h
@@ -1,12 +1,11 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #ifndef __SKAS_H
 #define __SKAS_H
 
-#include "mm_id.h"
 #include "sysdep/ptrace.h"
 
 extern int userspace_pid[];
@@ -15,7 +14,7 @@ extern int skas_needs_stub;
 
 extern int user_thread(unsigned long stack, int flags);
 extern void new_thread_handler(void);
-extern void handle_syscall(union uml_pt_regs *regs);
+extern void handle_syscall(struct uml_pt_regs *regs);
 extern int new_mm(unsigned long stack);
 extern void get_skas_faultinfo(int pid, struct faultinfo * fi);
 extern long execute_syscall_skas(void *r);
diff --git a/arch/um/include/skas/uaccess-skas.h b/arch/um/include/skas/uaccess-skas.h
deleted file mode 100644
index 224a75f4c02..00000000000
--- a/arch/um/include/skas/uaccess-skas.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SKAS_UACCESS_H
-#define __SKAS_UACCESS_H
-
-#include "asm/errno.h"
-
-/* No SKAS-specific checking. */
-#define access_ok_skas(type, addr, size) 0
-
-extern int copy_from_user_skas(void *to, const void __user *from, int n);
-extern int copy_to_user_skas(void __user *to, const void *from, int n);
-extern int strncpy_from_user_skas(char *dst, const char __user *src, int count);
-extern int __clear_user_skas(void __user *mem, int len);
-extern int clear_user_skas(void __user *mem, int len);
-extern int strnlen_user_skas(const void __user *str, int len);
-
-#endif
diff --git a/arch/um/include/sysdep-i386/kernel-offsets.h b/arch/um/include/sysdep-i386/kernel-offsets.h
index 97ec9d894d7..5868526b5ee 100644
--- a/arch/um/include/sysdep-i386/kernel-offsets.h
+++ b/arch/um/include/sysdep-i386/kernel-offsets.h
@@ -17,6 +17,5 @@
 
 void foo(void)
 {
-	OFFSET(HOST_TASK_DEBUGREGS, task_struct, thread.arch.debugregs);
 #include <common-offsets.h>
 }
diff --git a/arch/um/include/sysdep-i386/ptrace.h b/arch/um/include/sysdep-i386/ptrace.h
index 52b398bcafc..11c08969d13 100644
--- a/arch/um/include/sysdep-i386/ptrace.h
+++ b/arch/um/include/sysdep-i386/ptrace.h
@@ -1,5 +1,5 @@
-/* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -9,17 +9,11 @@
 #include "uml-config.h"
 #include "user_constants.h"
 #include "sysdep/faultinfo.h"
-#include "choose-mode.h"
 
 #define MAX_REG_NR (UM_FRAME_SIZE / sizeof(unsigned long))
 #define MAX_REG_OFFSET (UM_FRAME_SIZE)
 
-#ifdef UML_CONFIG_PT_PROXY
-extern void update_debugregs(int seq);
-#else
 static inline void update_debugregs(int seq) {}
-#endif
-
 
 /* syscall emulation path in ptrace */
 
@@ -31,12 +25,6 @@ void set_using_sysemu(int value);
 int get_using_sysemu(void);
 extern int sysemu_supported;
 
-#ifdef UML_CONFIG_MODE_TT
-#include "sysdep/sc.h"
-#endif
-
-#ifdef UML_CONFIG_MODE_SKAS
-
 #include "skas_ptregs.h"
 
 #define REGS_IP(r) ((r)[HOST_IP])
@@ -60,70 +48,36 @@ extern int sysemu_supported;
 
 #define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r))
 
-#endif
 #ifndef PTRACE_SYSEMU_SINGLESTEP
 #define PTRACE_SYSEMU_SINGLESTEP 32
 #endif
 
-union uml_pt_regs {
-#ifdef UML_CONFIG_MODE_TT
-	struct tt_regs {
-		long syscall;
-		void *sc;
-                struct faultinfo faultinfo;
-	} tt;
-#endif
-#ifdef UML_CONFIG_MODE_SKAS
-	struct skas_regs {
-		unsigned long regs[MAX_REG_NR];
-		unsigned long fp[HOST_FP_SIZE];
-		unsigned long xfp[HOST_XFP_SIZE];
-                struct faultinfo faultinfo;
-		long syscall;
-		int is_user;
-	} skas;
-#endif
+struct uml_pt_regs {
+	unsigned long gp[MAX_REG_NR];
+	struct faultinfo faultinfo;
+	long syscall;
+	int is_user;
 };
 
 #define EMPTY_UML_PT_REGS { }
 
-extern int mode_tt;
-
-#define UPT_SC(r) ((r)->tt.sc)
-#define UPT_IP(r) \
-	__CHOOSE_MODE(SC_IP(UPT_SC(r)), REGS_IP((r)->skas.regs))
-#define UPT_SP(r) \
-	__CHOOSE_MODE(SC_SP(UPT_SC(r)), REGS_SP((r)->skas.regs))
-#define UPT_EFLAGS(r) \
-	__CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs))
-#define UPT_EAX(r) \
-	__CHOOSE_MODE(SC_EAX(UPT_SC(r)), REGS_EAX((r)->skas.regs))
-#define UPT_EBX(r) \
-	__CHOOSE_MODE(SC_EBX(UPT_SC(r)), REGS_EBX((r)->skas.regs))
-#define UPT_ECX(r) \
-	__CHOOSE_MODE(SC_ECX(UPT_SC(r)), REGS_ECX((r)->skas.regs))
-#define UPT_EDX(r) \
-	__CHOOSE_MODE(SC_EDX(UPT_SC(r)), REGS_EDX((r)->skas.regs))
-#define UPT_ESI(r) \
-	__CHOOSE_MODE(SC_ESI(UPT_SC(r)), REGS_ESI((r)->skas.regs))
-#define UPT_EDI(r) \
-	__CHOOSE_MODE(SC_EDI(UPT_SC(r)), REGS_EDI((r)->skas.regs))
-#define UPT_EBP(r) \
-	__CHOOSE_MODE(SC_EBP(UPT_SC(r)), REGS_EBP((r)->skas.regs))
-#define UPT_ORIG_EAX(r) \
-	__CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall)
-#define UPT_CS(r) \
-	__CHOOSE_MODE(SC_CS(UPT_SC(r)), REGS_CS((r)->skas.regs))
-#define UPT_SS(r) \
-	__CHOOSE_MODE(SC_SS(UPT_SC(r)), REGS_SS((r)->skas.regs))
-#define UPT_DS(r) \
-	__CHOOSE_MODE(SC_DS(UPT_SC(r)), REGS_DS((r)->skas.regs))
-#define UPT_ES(r) \
-	__CHOOSE_MODE(SC_ES(UPT_SC(r)), REGS_ES((r)->skas.regs))
-#define UPT_FS(r) \
-	__CHOOSE_MODE(SC_FS(UPT_SC(r)), REGS_FS((r)->skas.regs))
-#define UPT_GS(r) \
-	__CHOOSE_MODE(SC_GS(UPT_SC(r)), REGS_GS((r)->skas.regs))
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_EAX(r) REGS_EAX((r)->gp)
+#define UPT_EBX(r) REGS_EBX((r)->gp)
+#define UPT_ECX(r) REGS_ECX((r)->gp)
+#define UPT_EDX(r) REGS_EDX((r)->gp)
+#define UPT_ESI(r) REGS_ESI((r)->gp)
+#define UPT_EDI(r) REGS_EDI((r)->gp)
+#define UPT_EBP(r) REGS_EBP((r)->gp)
+#define UPT_ORIG_EAX(r) ((r)->syscall)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+#define UPT_FS(r) REGS_FS((r)->gp)
+#define UPT_GS(r) REGS_GS((r)->gp)
 
 #define UPT_SYSCALL_ARG1(r) UPT_EBX(r)
 #define UPT_SYSCALL_ARG2(r) UPT_ECX(r)
@@ -134,20 +88,19 @@ extern int mode_tt;
 
 extern int user_context(unsigned long sp);
 
-#define UPT_IS_USER(r) \
-	CHOOSE_MODE(user_context(UPT_SP(r)), (r)->skas.is_user)
+#define UPT_IS_USER(r) ((r)->is_user)
 
 struct syscall_args {
 	unsigned long args[6];
 };
 
 #define SYSCALL_ARGS(r) ((struct syscall_args) \
-                        { .args = { UPT_SYSCALL_ARG1(r), \
-                                    UPT_SYSCALL_ARG2(r), \
- 			            UPT_SYSCALL_ARG3(r), \
-                                    UPT_SYSCALL_ARG4(r), \
-		                    UPT_SYSCALL_ARG5(r), \
-                                    UPT_SYSCALL_ARG6(r) } } )
+			 { .args = { UPT_SYSCALL_ARG1(r),	\
+				     UPT_SYSCALL_ARG2(r),	\
+				     UPT_SYSCALL_ARG3(r),	\
+				     UPT_SYSCALL_ARG4(r),	\
+				     UPT_SYSCALL_ARG5(r),	\
+				     UPT_SYSCALL_ARG6(r) } } )
 
 #define UPT_REG(regs, reg) \
 	({	unsigned long val; \
@@ -175,7 +128,6 @@ struct syscall_args {
 		} \
 	        val; \
 	})
-	
 
 #define UPT_SET(regs, reg, val) \
 	do { \
@@ -204,29 +156,16 @@ struct syscall_args {
 	} while (0)
 
 #define UPT_SET_SYSCALL_RETURN(r, res) \
-	CHOOSE_MODE(SC_SET_SYSCALL_RETURN(UPT_SC(r), (res)), \
-                    REGS_SET_SYSCALL_RETURN((r)->skas.regs, (res)))
+	REGS_SET_SYSCALL_RETURN((r)->regs, (res))
 
-#define UPT_RESTART_SYSCALL(r) \
-	CHOOSE_MODE(SC_RESTART_SYSCALL(UPT_SC(r)), \
-		    REGS_RESTART_SYSCALL((r)->skas.regs))
+#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
 
 #define UPT_ORIG_SYSCALL(r) UPT_EAX(r)
 #define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r)
 #define UPT_SYSCALL_RET(r) UPT_EAX(r)
 
-#define UPT_FAULTINFO(r) \
-        CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo))
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
 
-#endif
+extern void arch_init_registers(int pid);
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+#endif
diff --git a/arch/um/include/sysdep-i386/sigcontext.h b/arch/um/include/sysdep-i386/sigcontext.h
index 23fd2644d7e..67e77122aa4 100644
--- a/arch/um/include/sysdep-i386/sigcontext.h
+++ b/arch/um/include/sysdep-i386/sigcontext.h
@@ -1,19 +1,15 @@
 /* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #ifndef __SYS_SIGCONTEXT_I386_H
 #define __SYS_SIGCONTEXT_I386_H
 
-#include "uml-config.h"
-#include <sysdep/sc.h>
+#include "sysdep/sc.h"
 
 #define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
 
-#define SC_RESTART_SYSCALL(sc) IP_RESTART_SYSCALL(SC_IP(sc))
-#define SC_SET_SYSCALL_RETURN(sc, result) SC_EAX(sc) = (result)
-
 #define GET_FAULTINFO_FROM_SC(fi,sc) \
 	{ \
 		(fi).cr2 = SC_CR2(sc); \
@@ -21,32 +17,10 @@
 		(fi).trap_no = SC_TRAPNO(sc); \
 	}
 
-/* ptrace expects that, at the start of a system call, %eax contains
- * -ENOSYS, so this makes it so.
- */
-#define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0)
-
 /* This is Page Fault */
 #define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
 
 /* SKAS3 has no trap_no on i386, but get_skas_faultinfo() sets it to 0. */
-#ifdef UML_CONFIG_MODE_SKAS
 #define SEGV_MAYBE_FIXABLE(fi)	((fi)->trap_no == 0 && ptrace_faultinfo)
-#else
-#define SEGV_MAYBE_FIXABLE(fi)	0
-#endif
-
-extern unsigned long *sc_sigmask(void *sc_ptr);
-extern int sc_get_fpregs(unsigned long buf, void *sc_ptr);
 
 #endif
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/sysdep-i386/stub.h b/arch/um/include/sysdep-i386/stub.h
index 4fffae75ba5..8c097b87fca 100644
--- a/arch/um/include/sysdep-i386/stub.h
+++ b/arch/um/include/sysdep-i386/stub.h
@@ -9,7 +9,7 @@
 #include <sys/mman.h>
 #include <asm/ptrace.h>
 #include <asm/unistd.h>
-#include <asm/page.h>
+#include "as-layout.h"
 #include "stub-data.h"
 #include "kern_constants.h"
 #include "uml-config.h"
@@ -19,7 +19,7 @@ extern void stub_clone_handler(void);
 
 #define STUB_SYSCALL_RET EAX
 #define STUB_MMAP_NR __NR_mmap2
-#define MMAP_OFFSET(o) ((o) >> PAGE_SHIFT)
+#define MMAP_OFFSET(o) ((o) >> UM_KERN_PAGE_SHIFT)
 
 static inline long stub_syscall0(long syscall)
 {
@@ -90,12 +90,12 @@ static inline void remap_stack(int fd, unsigned long offset)
 {
 	__asm__ volatile ("movl %%eax,%%ebp ; movl %0,%%eax ; int $0x80 ;"
 			  "movl %7, %%ebx ; movl %%eax, (%%ebx)"
-			  : : "g" (STUB_MMAP_NR), "b" (UML_CONFIG_STUB_DATA), 
-			    "c" (UM_KERN_PAGE_SIZE), 
+			  : : "g" (STUB_MMAP_NR), "b" (STUB_DATA),
+			    "c" (UM_KERN_PAGE_SIZE),
 			    "d" (PROT_READ | PROT_WRITE),
-			    "S" (MAP_FIXED | MAP_SHARED), "D" (fd), 
-			    "a" (offset), 
-			    "i" (&((struct stub_data *) UML_CONFIG_STUB_DATA)->err) 
+			    "S" (MAP_FIXED | MAP_SHARED), "D" (fd),
+			    "a" (offset),
+			    "i" (&((struct stub_data *) STUB_DATA)->err)
 			  : "memory");
 }
 
diff --git a/arch/um/include/sysdep-i386/thread.h b/arch/um/include/sysdep-i386/thread.h
deleted file mode 100644
index 243fed44d78..00000000000
--- a/arch/um/include/sysdep-i386/thread.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef __UM_THREAD_H
-#define __UM_THREAD_H
-
-#include <kern_constants.h>
-
-#define TASK_DEBUGREGS(task) ((unsigned long *) &(((char *) (task))[HOST_TASK_DEBUGREGS]))
-#ifdef UML_CONFIG_MODE_TT
-#define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[HOST_TASK_EXTERN_PID]))
-#endif
-
-#endif
diff --git a/arch/um/include/sysdep-x86_64/ptrace.h b/arch/um/include/sysdep-x86_64/ptrace.h
index 62403bd9966..9ea44d111f3 100644
--- a/arch/um/include/sysdep-x86_64/ptrace.h
+++ b/arch/um/include/sysdep-x86_64/ptrace.h
@@ -1,5 +1,6 @@
 /*
  * Copyright 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  *
  * Licensed under the GPL
  */
@@ -14,11 +15,6 @@
 #define MAX_REG_OFFSET (UM_FRAME_SIZE)
 #define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
 
-#ifdef UML_CONFIG_MODE_TT
-#include "sysdep/sc.h"
-#endif
-
-#ifdef UML_CONFIG_MODE_SKAS
 #include "skas_ptregs.h"
 
 #define REGS_IP(r) ((r)[HOST_IP])
@@ -88,78 +84,51 @@
 
 #define REGS_ERR(r) ((r)->fault_type)
 
-#endif
-
-#include "choose-mode.h"
-
-/* XXX */
-union uml_pt_regs {
-#ifdef UML_CONFIG_MODE_TT
-	struct tt_regs {
-		long syscall;
-		unsigned long orig_rax;
-		void *sc;
-                struct faultinfo faultinfo;
-	} tt;
-#endif
-#ifdef UML_CONFIG_MODE_SKAS
-	struct skas_regs {
-		unsigned long regs[MAX_REG_NR];
-		unsigned long fp[HOST_FP_SIZE];
-                struct faultinfo faultinfo;
-		long syscall;
-		int is_user;
-	} skas;
-#endif
+struct uml_pt_regs {
+	unsigned long gp[MAX_REG_NR];
+	struct faultinfo faultinfo;
+	long syscall;
+	int is_user;
 };
 
 #define EMPTY_UML_PT_REGS { }
 
-/* XXX */
-extern int mode_tt;
-
-#define UPT_RBX(r) __CHOOSE_MODE(SC_RBX(UPT_SC(r)), REGS_RBX((r)->skas.regs))
-#define UPT_RCX(r) __CHOOSE_MODE(SC_RCX(UPT_SC(r)), REGS_RCX((r)->skas.regs))
-#define UPT_RDX(r) __CHOOSE_MODE(SC_RDX(UPT_SC(r)), REGS_RDX((r)->skas.regs))
-#define UPT_RSI(r) __CHOOSE_MODE(SC_RSI(UPT_SC(r)), REGS_RSI((r)->skas.regs))
-#define UPT_RDI(r) __CHOOSE_MODE(SC_RDI(UPT_SC(r)), REGS_RDI((r)->skas.regs))
-#define UPT_RBP(r) __CHOOSE_MODE(SC_RBP(UPT_SC(r)), REGS_RBP((r)->skas.regs))
-#define UPT_RAX(r) __CHOOSE_MODE(SC_RAX(UPT_SC(r)), REGS_RAX((r)->skas.regs))
-#define UPT_R8(r) __CHOOSE_MODE(SC_R8(UPT_SC(r)), REGS_R8((r)->skas.regs))
-#define UPT_R9(r) __CHOOSE_MODE(SC_R9(UPT_SC(r)), REGS_R9((r)->skas.regs))
-#define UPT_R10(r) __CHOOSE_MODE(SC_R10(UPT_SC(r)), REGS_R10((r)->skas.regs))
-#define UPT_R11(r) __CHOOSE_MODE(SC_R11(UPT_SC(r)), REGS_R11((r)->skas.regs))
-#define UPT_R12(r) __CHOOSE_MODE(SC_R12(UPT_SC(r)), REGS_R12((r)->skas.regs))
-#define UPT_R13(r) __CHOOSE_MODE(SC_R13(UPT_SC(r)), REGS_R13((r)->skas.regs))
-#define UPT_R14(r) __CHOOSE_MODE(SC_R14(UPT_SC(r)), REGS_R14((r)->skas.regs))
-#define UPT_R15(r) __CHOOSE_MODE(SC_R15(UPT_SC(r)), REGS_R15((r)->skas.regs))
-#define UPT_CS(r) __CHOOSE_MODE(SC_CS(UPT_SC(r)), REGS_CS((r)->skas.regs))
-#define UPT_FS_BASE(r) \
-	__CHOOSE_MODE(SC_FS_BASE(UPT_SC(r)), REGS_FS_BASE((r)->skas.regs))
-#define UPT_FS(r) __CHOOSE_MODE(SC_FS(UPT_SC(r)), REGS_FS((r)->skas.regs))
-#define UPT_GS_BASE(r) \
-	__CHOOSE_MODE(SC_GS_BASE(UPT_SC(r)), REGS_GS_BASE((r)->skas.regs))
-#define UPT_GS(r) __CHOOSE_MODE(SC_GS(UPT_SC(r)), REGS_GS((r)->skas.regs))
-#define UPT_DS(r) __CHOOSE_MODE(SC_DS(UPT_SC(r)), REGS_DS((r)->skas.regs))
-#define UPT_ES(r) __CHOOSE_MODE(SC_ES(UPT_SC(r)), REGS_ES((r)->skas.regs))
-#define UPT_CS(r) __CHOOSE_MODE(SC_CS(UPT_SC(r)), REGS_CS((r)->skas.regs))
-#define UPT_SS(r) __CHOOSE_MODE(SC_SS(UPT_SC(r)), REGS_SS((r)->skas.regs))
-#define UPT_ORIG_RAX(r) \
-	__CHOOSE_MODE((r)->tt.orig_rax, REGS_ORIG_RAX((r)->skas.regs))
-
-#define UPT_IP(r) __CHOOSE_MODE(SC_IP(UPT_SC(r)), REGS_IP((r)->skas.regs))
-#define UPT_SP(r) __CHOOSE_MODE(SC_SP(UPT_SC(r)), REGS_SP((r)->skas.regs))
-
-#define UPT_EFLAGS(r) \
-	__CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs))
-#define UPT_SC(r) ((r)->tt.sc)
-#define UPT_SYSCALL_NR(r) __CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall)
+#define UPT_RBX(r) REGS_RBX((r)->gp)
+#define UPT_RCX(r) REGS_RCX((r)->gp)
+#define UPT_RDX(r) REGS_RDX((r)->gp)
+#define UPT_RSI(r) REGS_RSI((r)->gp)
+#define UPT_RDI(r) REGS_RDI((r)->gp)
+#define UPT_RBP(r) REGS_RBP((r)->gp)
+#define UPT_RAX(r) REGS_RAX((r)->gp)
+#define UPT_R8(r) REGS_R8((r)->gp)
+#define UPT_R9(r) REGS_R9((r)->gp)
+#define UPT_R10(r) REGS_R10((r)->gp)
+#define UPT_R11(r) REGS_R11((r)->gp)
+#define UPT_R12(r) REGS_R12((r)->gp)
+#define UPT_R13(r) REGS_R13((r)->gp)
+#define UPT_R14(r) REGS_R14((r)->gp)
+#define UPT_R15(r) REGS_R15((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_FS_BASE(r) REGS_FS_BASE((r)->gp)
+#define UPT_FS(r) REGS_FS((r)->gp)
+#define UPT_GS_BASE(r) REGS_GS_BASE((r)->gp)
+#define UPT_GS(r) REGS_GS((r)->gp)
+#define UPT_DS(r) REGS_DS((r)->gp)
+#define UPT_ES(r) REGS_ES((r)->gp)
+#define UPT_CS(r) REGS_CS((r)->gp)
+#define UPT_SS(r) REGS_SS((r)->gp)
+#define UPT_ORIG_RAX(r) REGS_ORIG_RAX((r)->gp)
+
+#define UPT_IP(r) REGS_IP((r)->gp)
+#define UPT_SP(r) REGS_SP((r)->gp)
+
+#define UPT_EFLAGS(r) REGS_EFLAGS((r)->gp)
+#define UPT_SYSCALL_NR(r) ((r)->syscall)
 #define UPT_SYSCALL_RET(r) UPT_RAX(r)
 
 extern int user_context(unsigned long sp);
 
-#define UPT_IS_USER(r) \
-	CHOOSE_MODE(user_context(UPT_SP(r)), (r)->skas.is_user)
+#define UPT_IS_USER(r) ((r)->is_user)
 
 #define UPT_SYSCALL_ARG1(r) UPT_RDI(r)
 #define UPT_SYSCALL_ARG2(r) UPT_RSI(r)
@@ -173,101 +142,99 @@ struct syscall_args {
 };
 
 #define SYSCALL_ARGS(r) ((struct syscall_args) \
-                        { .args = { UPT_SYSCALL_ARG1(r), \
-                                    UPT_SYSCALL_ARG2(r), \
- 			            UPT_SYSCALL_ARG3(r), \
-                                    UPT_SYSCALL_ARG4(r), \
-		                    UPT_SYSCALL_ARG5(r), \
-                                    UPT_SYSCALL_ARG6(r) } } )
+			 { .args = { UPT_SYSCALL_ARG1(r),	 \
+				     UPT_SYSCALL_ARG2(r),	 \
+				     UPT_SYSCALL_ARG3(r),	 \
+				     UPT_SYSCALL_ARG4(r),	 \
+				     UPT_SYSCALL_ARG5(r),	 \
+				     UPT_SYSCALL_ARG6(r) } } )
 
 #define UPT_REG(regs, reg) \
-        ({      unsigned long val; \
-                switch(reg){ \
-		case R8: val = UPT_R8(regs); break; \
-		case R9: val = UPT_R9(regs); break; \
-		case R10: val = UPT_R10(regs); break; \
-		case R11: val = UPT_R11(regs); break; \
-		case R12: val = UPT_R12(regs); break; \
-		case R13: val = UPT_R13(regs); break; \
-		case R14: val = UPT_R14(regs); break; \
-		case R15: val = UPT_R15(regs); break; \
-                case RIP: val = UPT_IP(regs); break; \
-                case RSP: val = UPT_SP(regs); break; \
-                case RAX: val = UPT_RAX(regs); break; \
-                case RBX: val = UPT_RBX(regs); break; \
-                case RCX: val = UPT_RCX(regs); break; \
-                case RDX: val = UPT_RDX(regs); break; \
-                case RSI: val = UPT_RSI(regs); break; \
-                case RDI: val = UPT_RDI(regs); break; \
-                case RBP: val = UPT_RBP(regs); break; \
-                case ORIG_RAX: val = UPT_ORIG_RAX(regs); break; \
-                case CS: val = UPT_CS(regs); break; \
-                case SS: val = UPT_SS(regs); break; \
-		case FS_BASE: val = UPT_FS_BASE(regs); break; \
-                case GS_BASE: val = UPT_GS_BASE(regs); break; \
-                case DS: val = UPT_DS(regs); break; \
-                case ES: val = UPT_ES(regs); break; \
-                case FS : val = UPT_FS (regs); break; \
-		case GS: val = UPT_GS(regs); break;	    \
-                case EFLAGS: val = UPT_EFLAGS(regs); break; \
-                default :  \
-                        panic("Bad register in UPT_REG : %d\n", reg);  \
-                        val = -1; \
-                } \
-                val; \
-        })
+	({      unsigned long val;		\
+		switch(reg){						\
+		case R8: val = UPT_R8(regs); break;			\
+		case R9: val = UPT_R9(regs); break;			\
+		case R10: val = UPT_R10(regs); break;			\
+		case R11: val = UPT_R11(regs); break;			\
+		case R12: val = UPT_R12(regs); break;			\
+		case R13: val = UPT_R13(regs); break;			\
+		case R14: val = UPT_R14(regs); break;			\
+		case R15: val = UPT_R15(regs); break;			\
+		case RIP: val = UPT_IP(regs); break;			\
+		case RSP: val = UPT_SP(regs); break;			\
+		case RAX: val = UPT_RAX(regs); break;			\
+		case RBX: val = UPT_RBX(regs); break;			\
+		case RCX: val = UPT_RCX(regs); break;			\
+		case RDX: val = UPT_RDX(regs); break;			\
+		case RSI: val = UPT_RSI(regs); break;			\
+		case RDI: val = UPT_RDI(regs); break;			\
+		case RBP: val = UPT_RBP(regs); break;			\
+		case ORIG_RAX: val = UPT_ORIG_RAX(regs); break;		\
+		case CS: val = UPT_CS(regs); break;			\
+		case SS: val = UPT_SS(regs); break;			\
+		case FS_BASE: val = UPT_FS_BASE(regs); break;		\
+		case GS_BASE: val = UPT_GS_BASE(regs); break;		\
+		case DS: val = UPT_DS(regs); break;			\
+		case ES: val = UPT_ES(regs); break;			\
+		case FS : val = UPT_FS (regs); break;			\
+		case GS: val = UPT_GS(regs); break;			\
+		case EFLAGS: val = UPT_EFLAGS(regs); break;		\
+		default :						\
+			panic("Bad register in UPT_REG : %d\n", reg);	\
+			val = -1;					\
+		}							\
+		val;							\
+	})
 
 
 #define UPT_SET(regs, reg, val) \
-        ({      unsigned long __upt_val = val; \
-                switch(reg){ \
-                case R8: UPT_R8(regs) = __upt_val; break; \
-                case R9: UPT_R9(regs) = __upt_val; break; \
-                case R10: UPT_R10(regs) = __upt_val; break; \
-                case R11: UPT_R11(regs) = __upt_val; break; \
-                case R12: UPT_R12(regs) = __upt_val; break; \
-                case R13: UPT_R13(regs) = __upt_val; break; \
-                case R14: UPT_R14(regs) = __upt_val; break; \
-                case R15: UPT_R15(regs) = __upt_val; break; \
-                case RIP: UPT_IP(regs) = __upt_val; break; \
-                case RSP: UPT_SP(regs) = __upt_val; break; \
-                case RAX: UPT_RAX(regs) = __upt_val; break; \
-                case RBX: UPT_RBX(regs) = __upt_val; break; \
-                case RCX: UPT_RCX(regs) = __upt_val; break; \
-                case RDX: UPT_RDX(regs) = __upt_val; break; \
-                case RSI: UPT_RSI(regs) = __upt_val; break; \
-                case RDI: UPT_RDI(regs) = __upt_val; break; \
-                case RBP: UPT_RBP(regs) = __upt_val; break; \
-                case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break; \
-                case CS: UPT_CS(regs) = __upt_val; break; \
-                case SS: UPT_SS(regs) = __upt_val; break; \
-                case FS_BASE: UPT_FS_BASE(regs) = __upt_val; break; \
-                case GS_BASE: UPT_GS_BASE(regs) = __upt_val; break; \
-                case DS: UPT_DS(regs) = __upt_val; break; \
-                case ES: UPT_ES(regs) = __upt_val; break; \
-                case FS: UPT_FS(regs) = __upt_val; break; \
-                case GS: UPT_GS(regs) = __upt_val; break; \
-                case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break; \
-                default :  \
-                        panic("Bad register in UPT_SET : %d\n", reg);  \
-			break; \
-                } \
-                __upt_val; \
-        })
+	({      unsigned long __upt_val = val;	\
+		switch(reg){						\
+		case R8: UPT_R8(regs) = __upt_val; break;		\
+		case R9: UPT_R9(regs) = __upt_val; break;		\
+		case R10: UPT_R10(regs) = __upt_val; break;		\
+		case R11: UPT_R11(regs) = __upt_val; break;		\
+		case R12: UPT_R12(regs) = __upt_val; break;		\
+		case R13: UPT_R13(regs) = __upt_val; break;		\
+		case R14: UPT_R14(regs) = __upt_val; break;		\
+		case R15: UPT_R15(regs) = __upt_val; break;		\
+		case RIP: UPT_IP(regs) = __upt_val; break;		\
+		case RSP: UPT_SP(regs) = __upt_val; break;		\
+		case RAX: UPT_RAX(regs) = __upt_val; break;		\
+		case RBX: UPT_RBX(regs) = __upt_val; break;		\
+		case RCX: UPT_RCX(regs) = __upt_val; break;		\
+		case RDX: UPT_RDX(regs) = __upt_val; break;		\
+		case RSI: UPT_RSI(regs) = __upt_val; break;		\
+		case RDI: UPT_RDI(regs) = __upt_val; break;		\
+		case RBP: UPT_RBP(regs) = __upt_val; break;		\
+		case ORIG_RAX: UPT_ORIG_RAX(regs) = __upt_val; break;	\
+		case CS: UPT_CS(regs) = __upt_val; break;		\
+		case SS: UPT_SS(regs) = __upt_val; break;		\
+		case FS_BASE: UPT_FS_BASE(regs) = __upt_val; break;	\
+		case GS_BASE: UPT_GS_BASE(regs) = __upt_val; break;	\
+		case DS: UPT_DS(regs) = __upt_val; break;		\
+		case ES: UPT_ES(regs) = __upt_val; break;		\
+		case FS: UPT_FS(regs) = __upt_val; break;		\
+		case GS: UPT_GS(regs) = __upt_val; break;		\
+		case EFLAGS: UPT_EFLAGS(regs) = __upt_val; break;	\
+		default :						\
+			panic("Bad register in UPT_SET : %d\n", reg);	\
+			break;						\
+		}							\
+		__upt_val;						\
+	})
 
 #define UPT_SET_SYSCALL_RETURN(r, res) \
-	CHOOSE_MODE(SC_SET_SYSCALL_RETURN(UPT_SC(r), (res)), \
-                    REGS_SET_SYSCALL_RETURN((r)->skas.regs, (res)))
+	REGS_SET_SYSCALL_RETURN((r)->regs, (res))
+
+#define UPT_RESTART_SYSCALL(r) REGS_RESTART_SYSCALL((r)->gp)
 
-#define UPT_RESTART_SYSCALL(r) \
-	CHOOSE_MODE(SC_RESTART_SYSCALL(UPT_SC(r)), \
-		    REGS_RESTART_SYSCALL((r)->skas.regs))
+#define UPT_SEGV_IS_FIXABLE(r) REGS_SEGV_IS_FIXABLE(&r->skas)
 
-#define UPT_SEGV_IS_FIXABLE(r) \
-	CHOOSE_MODE(SC_SEGV_IS_FIXABLE(UPT_SC(r)), \
-                    REGS_SEGV_IS_FIXABLE(&r->skas))
+#define UPT_FAULTINFO(r) (&(r)->faultinfo)
 
-#define UPT_FAULTINFO(r) \
-        CHOOSE_MODE((&(r)->tt.faultinfo), (&(r)->skas.faultinfo))
+static inline void arch_init_registers(int pid)
+{
+}
 
 #endif
diff --git a/arch/um/include/sysdep-x86_64/sigcontext.h b/arch/um/include/sysdep-x86_64/sigcontext.h
index 41073235e7a..0155133b145 100644
--- a/arch/um/include/sysdep-x86_64/sigcontext.h
+++ b/arch/um/include/sysdep-x86_64/sigcontext.h
@@ -11,43 +11,17 @@
 
 #define IP_RESTART_SYSCALL(ip) ((ip) -= 2)
 
-#define SC_RESTART_SYSCALL(sc) IP_RESTART_SYSCALL(SC_IP(sc))
-#define SC_SET_SYSCALL_RETURN(sc, result) SC_RAX(sc) = (result)
-
-#define SC_FAULT_ADDR(sc) SC_CR2(sc)
-#define SC_FAULT_TYPE(sc) SC_ERR(sc)
-
-#define GET_FAULTINFO_FROM_SC(fi,sc) \
+#define GET_FAULTINFO_FROM_SC(fi, sc) \
 	{ \
 		(fi).cr2 = SC_CR2(sc); \
 		(fi).error_code = SC_ERR(sc); \
 		(fi).trap_no = SC_TRAPNO(sc); \
 	}
 
-/* ptrace expects that, at the start of a system call, %eax contains
- * -ENOSYS, so this makes it so.
- */
-
-#define SC_START_SYSCALL(sc) do SC_RAX(sc) = -ENOSYS; while(0)
-
 /* This is Page Fault */
 #define SEGV_IS_FIXABLE(fi)	((fi)->trap_no == 14)
 
 /* No broken SKAS API, which doesn't pass trap_no, here. */
 #define SEGV_MAYBE_FIXABLE(fi)	0
 
-extern unsigned long *sc_sigmask(void *sc_ptr);
-
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
-
diff --git a/arch/um/include/sysdep-x86_64/stub.h b/arch/um/include/sysdep-x86_64/stub.h
index 92e989f8176..655f9c2de3a 100644
--- a/arch/um/include/sysdep-x86_64/stub.h
+++ b/arch/um/include/sysdep-x86_64/stub.h
@@ -9,6 +9,7 @@
 #include <sys/mman.h>
 #include <asm/unistd.h>
 #include <sysdep/ptrace_user.h>
+#include "as-layout.h"
 #include "stub-data.h"
 #include "kern_constants.h"
 #include "uml-config.h"
@@ -94,13 +95,13 @@ static inline void remap_stack(long fd, unsigned long offset)
 {
 	__asm__ volatile ("movq %4,%%r10 ; movq %5,%%r8 ; "
 			  "movq %6, %%r9; " __syscall "; movq %7, %%rbx ; "
-			  "movq %%rax, (%%rbx)": 
-			  : "a" (STUB_MMAP_NR), "D" (UML_CONFIG_STUB_DATA), 
-			    "S" (UM_KERN_PAGE_SIZE), 
-			    "d" (PROT_READ | PROT_WRITE), 
-                            "g" (MAP_FIXED | MAP_SHARED), "g" (fd), 
+			  "movq %%rax, (%%rbx)":
+			  : "a" (STUB_MMAP_NR), "D" (STUB_DATA),
+			    "S" (UM_KERN_PAGE_SIZE),
+			    "d" (PROT_READ | PROT_WRITE),
+                            "g" (MAP_FIXED | MAP_SHARED), "g" (fd),
 			    "g" (offset),
-			    "i" (&((struct stub_data *) UML_CONFIG_STUB_DATA)->err)
+			    "i" (&((struct stub_data *) STUB_DATA)->err)
 			  : __syscall_clobber, "r10", "r8", "r9" );
 }
 
diff --git a/arch/um/include/sysdep-x86_64/thread.h b/arch/um/include/sysdep-x86_64/thread.h
deleted file mode 100644
index cbef3e1697f..00000000000
--- a/arch/um/include/sysdep-x86_64/thread.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __UM_THREAD_H
-#define __UM_THREAD_H
-
-#include <kern_constants.h>
-
-#ifdef UML_CONFIG_MODE_TT
-#define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[HOST_TASK_EXTERN_PID]))
-#endif
-
-#endif
diff --git a/arch/um/include/task.h b/arch/um/include/task.h
index 6375ba7203c..3fe726b3cf4 100644
--- a/arch/um/include/task.h
+++ b/arch/um/include/task.h
@@ -3,7 +3,7 @@
 
 #include <kern_constants.h>
 
-#define TASK_REGS(task) ((union uml_pt_regs *) &(((char *) (task))[HOST_TASK_REGS]))
+#define TASK_REGS(task) ((struct uml_pt_regs *) &(((char *) (task))[HOST_TASK_REGS]))
 #define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
 
 #endif
diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h
index bcd1a4afb84..ecd2265b301 100644
--- a/arch/um/include/tlb.h
+++ b/arch/um/include/tlb.h
@@ -8,34 +8,7 @@
 
 #include "um_mmu.h"
 
-struct host_vm_op {
-	enum { NONE, MMAP, MUNMAP, MPROTECT } type;
-	union {
-		struct {
-			unsigned long addr;
-			unsigned long len;
-			unsigned int prot;
-			int fd;
-			__u64 offset;
-		} mmap;
-		struct {
-			unsigned long addr;
-			unsigned long len;
-		} munmap;
-		struct {
-			unsigned long addr;
-			unsigned long len;
-			unsigned int prot;
-		} mprotect;
-	} u;
-};
-
 extern void force_flush_all(void);
-extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
-                             unsigned long end_addr, int force,
-			     int (*do_ops)(union mm_context *,
-					   struct host_vm_op *, int, int,
-					   void **));
 extern int flush_tlb_kernel_range_common(unsigned long start,
 					 unsigned long end);
 
diff --git a/arch/um/include/tt/debug.h b/arch/um/include/tt/debug.h
deleted file mode 100644
index 9778fa83829..00000000000
--- a/arch/um/include/tt/debug.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002  Jeff Dike (jdike@karaya.com) and
- * Lars Brinkhoff.
- * Licensed under the GPL
- */
-
-#ifndef __UML_TT_DEBUG_H
-#define __UML_TT_DEBUG_H
-
-extern int debugger_proxy(int status, pid_t pid);
-extern void child_proxy(pid_t pid, int status);
-extern void init_proxy (pid_t pid, int waiting, int status);
-extern int start_debugger(char *prog, int startup, int stop, int *debugger_fd);
-extern void fake_child_exit(void);
-extern int gdb_config(char *str);
-extern int gdb_remove(int unused);
-
-#endif
diff --git a/arch/um/include/tt/mmu-tt.h b/arch/um/include/tt/mmu-tt.h
deleted file mode 100644
index 572a78b2258..00000000000
--- a/arch/um/include/tt/mmu-tt.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __TT_MMU_H
-#define __TT_MMU_H
-
-struct mmu_context_tt {
-};
-
-#endif
diff --git a/arch/um/include/tt/mode-tt.h b/arch/um/include/tt/mode-tt.h
deleted file mode 100644
index 2823cd56eea..00000000000
--- a/arch/um/include/tt/mode-tt.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __MODE_TT_H__
-#define __MODE_TT_H__
-
-#include "sysdep/ptrace.h"
-
-enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB };
-
-extern int tracing_pid;
-
-extern int tracer(int (*init_proc)(void *), void *sp);
-extern void sig_handler_common_tt(int sig, void *sc);
-extern void syscall_handler_tt(int sig, union uml_pt_regs *regs);
-extern void reboot_tt(void);
-extern void halt_tt(void);
-extern int is_tracer_winch(int pid, int fd, void *data);
-extern void kill_off_processes_tt(void);
-
-#endif
diff --git a/arch/um/include/tt/mode_kern_tt.h b/arch/um/include/tt/mode_kern_tt.h
deleted file mode 100644
index a4fc6305719..00000000000
--- a/arch/um/include/tt/mode_kern_tt.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __TT_MODE_KERN_H__
-#define __TT_MODE_KERN_H__
-
-#include "linux/sched.h"
-#include "asm/page.h"
-#include "asm/ptrace.h"
-#include "asm/uaccess.h"
-
-extern void switch_to_tt(void *prev, void *next);
-extern void flush_thread_tt(void);
-extern void start_thread_tt(struct pt_regs *regs, unsigned long eip,
-			   unsigned long esp);
-extern int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp,
-			  unsigned long stack_top, struct task_struct *p,
-			  struct pt_regs *regs);
-extern void release_thread_tt(struct task_struct *task);
-extern void initial_thread_cb_tt(void (*proc)(void *), void *arg);
-extern void init_idle_tt(void);
-extern void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end);
-extern void flush_tlb_kernel_vm_tt(void);
-extern void __flush_tlb_one_tt(unsigned long addr);
-extern void flush_tlb_range_tt(struct vm_area_struct *vma,
-			       unsigned long start, unsigned long end);
-extern void flush_tlb_mm_tt(struct mm_struct *mm);
-extern void force_flush_all_tt(void);
-extern long execute_syscall_tt(void *r);
-extern void before_mem_tt(unsigned long brk_start);
-extern unsigned long set_task_sizes_tt(unsigned long *task_size_out);
-extern int start_uml_tt(void);
-extern int external_pid_tt(struct task_struct *task);
-extern int thread_pid_tt(struct task_struct *task);
-
-#define kmem_end_tt (host_task_size - ABOVE_KMEM)
-
-#endif
diff --git a/arch/um/include/tt/tt.h b/arch/um/include/tt/tt.h
deleted file mode 100644
index acb8356e1f9..00000000000
--- a/arch/um/include/tt/tt.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __TT_H__
-#define __TT_H__
-
-#include "sysdep/ptrace.h"
-
-extern int gdb_pid;
-extern int debug;
-extern int debug_stop;
-extern int debug_trace;
-
-extern int honeypot;
-
-extern int fork_tramp(void *sig_stack);
-extern int do_proc_op(void *t, int proc_id);
-extern int tracer(int (*init_proc)(void *), void *sp);
-extern void attach_process(int pid);
-extern void tracer_panic(char *format, ...)
-	__attribute__ ((format (printf, 1, 2)));
-extern void set_init_pid(int pid);
-extern int set_user_mode(void *task);
-extern void set_tracing(void *t, int tracing);
-extern int is_tracing(void *task);
-extern void syscall_handler(int sig, union uml_pt_regs *regs);
-extern void exit_kernel(int pid, void *task);
-extern void do_syscall(void *task, int pid, int local_using_sysemu);
-extern void do_sigtrap(void *task);
-extern int is_valid_pid(int pid);
-extern void remap_data(void *segment_start, void *segment_end, int w);
-extern long execute_syscall_tt(void *r);
-
-#endif
-
diff --git a/arch/um/include/tt/uaccess-tt.h b/arch/um/include/tt/uaccess-tt.h
deleted file mode 100644
index 13a64f61fcf..00000000000
--- a/arch/um/include/tt/uaccess-tt.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#ifndef __TT_UACCESS_H
-#define __TT_UACCESS_H
-
-#include "linux/string.h"
-#include "linux/sched.h"
-#include "asm/processor.h"
-#include "asm/errno.h"
-#include "asm/current.h"
-#include "asm/a.out.h"
-#include "uml_uaccess.h"
-
-#define ABOVE_KMEM (16 * 1024 * 1024)
-
-extern unsigned long end_vm;
-extern unsigned long uml_physmem;
-
-#define is_stack(addr, size) \
-	(((unsigned long) (addr) < STACK_TOP) && \
-	 ((unsigned long) (addr) >= STACK_TOP - ABOVE_KMEM) && \
-	 (((unsigned long) (addr) + (size)) <= STACK_TOP))
-
-#define access_ok_tt(type, addr, size) \
-	(is_stack(addr, size))
-
-extern int __do_copy_from_user(void *to, const void *from, int n,
-			       void **fault_addr, void **fault_catcher);
-extern int __do_strncpy_from_user(char *dst, const char *src, size_t n,
-				  void **fault_addr, void **fault_catcher);
-extern int __do_clear_user(void *mem, size_t len, void **fault_addr,
-			   void **fault_catcher);
-extern int __do_strnlen_user(const char *str, unsigned long n,
-			     void **fault_addr, void **fault_catcher);
-
-extern int copy_from_user_tt(void *to, const void __user *from, int n);
-extern int copy_to_user_tt(void __user *to, const void *from, int n);
-extern int strncpy_from_user_tt(char *dst, const char __user *src, int count);
-extern int __clear_user_tt(void __user *mem, int len);
-extern int clear_user_tt(void __user *mem, int len);
-extern int strnlen_user_tt(const void __user *str, int len);
-
-#endif
diff --git a/arch/um/include/um_mmu.h b/arch/um/include/um_mmu.h
index 0fa64323830..8855d8df512 100644
--- a/arch/um/include/um_mmu.h
+++ b/arch/um/include/um_mmu.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -7,34 +7,22 @@
 #define __ARCH_UM_MMU_H
 
 #include "uml-config.h"
-#include "choose-mode.h"
+#include "mm_id.h"
+#include "asm/ldt.h"
 
-#ifdef UML_CONFIG_MODE_TT
-#include "mmu-tt.h"
+typedef struct mm_context {
+	struct mm_id id;
+	unsigned long last_page_table;
+#ifdef CONFIG_3_LEVEL_PGTABLES
+	unsigned long last_pmd;
 #endif
+	struct uml_ldt ldt;
+} mm_context_t;
 
-#ifdef UML_CONFIG_MODE_SKAS
-#include "mmu-skas.h"
-#endif
+extern void __switch_mm(struct mm_id * mm_idp);
 
-typedef union mm_context {
-#ifdef UML_CONFIG_MODE_TT
-	struct mmu_context_tt tt;
-#endif
-#ifdef UML_CONFIG_MODE_SKAS
-	struct mmu_context_skas skas;
-#endif
-} mm_context_t;
+/* Avoid tangled inclusion with asm/ldt.h */
+extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
+extern void free_ldt(struct mm_context *mm);
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h
index 5126a99b596..fdfc06b8560 100644
--- a/arch/um/include/um_uaccess.h
+++ b/arch/um/include/um_uaccess.h
@@ -1,26 +1,16 @@
 /* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #ifndef __ARCH_UM_UACCESS_H
 #define __ARCH_UM_UACCESS_H
 
-#include "choose-mode.h"
-
-#ifdef CONFIG_MODE_TT
-#include "uaccess-tt.h"
-#endif
-
-#ifdef CONFIG_MODE_SKAS
-#include "uaccess-skas.h"
-#endif
-
 #include "asm/fixmap.h"
 
 #define __under_task_size(addr, size) \
 	(((unsigned long) (addr) < TASK_SIZE) && \
-         (((unsigned long) (addr) + (size)) < TASK_SIZE))
+	 (((unsigned long) (addr) + (size)) < TASK_SIZE))
 
 #define __access_ok_vsyscall(type, addr, size) \
 	 ((type == VERIFY_READ) && \
@@ -35,20 +25,14 @@
 	(__addr_range_nowrap(addr, size) && \
 	 (__under_task_size(addr, size) || \
 	  __access_ok_vsyscall(type, addr, size) || \
-	  segment_eq(get_fs(), KERNEL_DS) || \
-	  CHOOSE_MODE_PROC(access_ok_tt, access_ok_skas, type, addr, size)))
+	  segment_eq(get_fs(), KERNEL_DS)))
 
-static inline int copy_from_user(void *to, const void __user *from, int n)
-{
-	return(CHOOSE_MODE_PROC(copy_from_user_tt, copy_from_user_skas, to,
-				from, n));
-}
+extern int copy_from_user(void *to, const void __user *from, int n);
+extern int copy_to_user(void __user *to, const void *from, int n);
 
-static inline int copy_to_user(void __user *to, const void *from, int n)
-{
-	return(CHOOSE_MODE_PROC(copy_to_user_tt, copy_to_user_skas, to, 
-				from, n));
-}
+extern int __do_copy_to_user(void *to, const void *from, int n,
+			     void **fault_addr, jmp_buf **fault_catcher);
+extern void __do_copy(void *to, const void *from, int n);
 
 /*
  * strncpy_from_user: - Copy a NUL terminated string from userspace.
@@ -69,11 +53,7 @@ static inline int copy_to_user(void __user *to, const void *from, int n)
  * and returns @count.
  */
 
-static inline int strncpy_from_user(char *dst, const char __user *src, int count)
-{
-	return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas,
-				dst, src, count));
-}
+extern int strncpy_from_user(char *dst, const char __user *src, int count);
 
 /*
  * __clear_user: - Zero a block of memory in user space, with less checking.
@@ -86,10 +66,7 @@ static inline int strncpy_from_user(char *dst, const char __user *src, int count
  * Returns number of bytes that could not be cleared.
  * On success, this will be zero.
  */
-static inline int __clear_user(void *mem, int len)
-{
-	return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len));
-}
+extern int __clear_user(void __user *mem, int len);
 
 /*
  * clear_user: - Zero a block of memory in user space.
@@ -101,10 +78,7 @@ static inline int __clear_user(void *mem, int len)
  * Returns number of bytes that could not be cleared.
  * On success, this will be zero.
  */
-static inline int clear_user(void __user *mem, int len)
-{
-	return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len));
-}
+extern int clear_user(void __user *mem, int len);
 
 /*
  * strlen_user: - Get the size of a string in user space.
@@ -117,20 +91,6 @@ static inline int clear_user(void __user *mem, int len)
  * On exception, returns 0.
  * If the string is too long, returns a value greater than @n.
  */
-static inline int strnlen_user(const void __user *str, long len)
-{
-	return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len));
-}
+extern int strnlen_user(const void __user *str, int len);
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/uml_uaccess.h b/arch/um/include/uml_uaccess.h
deleted file mode 100644
index c0df11d06f5..00000000000
--- a/arch/um/include/uml_uaccess.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __UML_UACCESS_H__
-#define __UML_UACCESS_H__
-
-extern int __do_copy_to_user(void *to, const void *from, int n,
-			     void **fault_addr, void **fault_catcher);
-void __do_copy(void *to, const void *from, int n);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/include/user.h b/arch/um/include/user.h
index d380e6d91a9..99033ff28a7 100644
--- a/arch/um/include/user.h
+++ b/arch/um/include/user.h
@@ -14,10 +14,12 @@
  */
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
-/*
- * This will provide the size_t definition in both kernel and userspace builds
- */
+/* This is to get size_t */
+#ifdef __KERNEL__
 #include <linux/types.h>
+#else
+#include <stddef.h>
+#endif
 
 extern void panic(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index c5cf4a0827b..499e5e95e60 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux,intel}.com)
 # Licensed under the GPL
 #
 
@@ -9,15 +9,12 @@ clean-files :=
 obj-y = config.o exec.o exitcode.o init_task.o irq.o ksyms.o mem.o \
 	physmem.o process.o ptrace.o reboot.o sigio.o \
 	signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o uaccess.o \
-	um_arch.o umid.o
+	um_arch.o umid.o skas/
 
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
 obj-$(CONFIG_GPROF)	+= gprof_syms.o
 obj-$(CONFIG_GCOV)	+= gmon_syms.o
 
-obj-$(CONFIG_MODE_TT) += tt/
-obj-$(CONFIG_MODE_SKAS) += skas/
-
 USER_OBJS := config.o
 
 include arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 41850906116..3866f4960f0 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -10,8 +10,6 @@ SECTIONS
   PROVIDE (__executable_start = START);
   . = START + SIZEOF_HEADERS;
   .interp         : { *(.interp) }
-  /* Used in arch/um/kernel/mem.c. Any memory between START and __binary_start
-   * is remapped.*/
   __binary_start = .;
   . = ALIGN(4096);		/* Init code and data */
   _text = .;
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index ce6828fd396..8196450451c 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -1,35 +1,44 @@
 /*
- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/slab.h"
+#include "linux/stddef.h"
+#include "linux/fs.h"
 #include "linux/smp_lock.h"
 #include "linux/ptrace.h"
-#include "linux/fs.h"
-#include "asm/ptrace.h"
-#include "asm/pgtable.h"
-#include "asm/tlbflush.h"
+#include "linux/sched.h"
+#include "asm/current.h"
+#include "asm/processor.h"
 #include "asm/uaccess.h"
-#include "kern_util.h"
 #include "as-layout.h"
 #include "mem_user.h"
-#include "kern.h"
-#include "irq_user.h"
-#include "tlb.h"
+#include "skas.h"
 #include "os.h"
-#include "choose-mode.h"
-#include "mode_kern.h"
 
 void flush_thread(void)
 {
+	void *data = NULL;
+	unsigned long end = proc_mm ? task_size : STUB_START;
+	int ret;
+
 	arch_flush_thread(&current->thread.arch);
-	CHOOSE_MODE(flush_thread_tt(), flush_thread_skas());
+
+	ret = unmap(&current->mm->context.id, 0, end, 1, &data);
+	if (ret) {
+		printk(KERN_ERR "flush_thread - clearing address space failed, "
+		       "err = %d\n", ret);
+		force_sig(SIGKILL, current);
+	}
+
+	__switch_mm(&current->mm->context.id);
 }
 
 void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 {
-	CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp);
+	set_fs(USER_DS);
+	PT_REGS_IP(regs) = eip;
+	PT_REGS_SP(regs) = esp;
 }
 
 #ifdef CONFIG_TTY_LOG
@@ -39,7 +48,7 @@ extern void log_exec(char **argv, void *tty);
 static long execve1(char *file, char __user * __user *argv,
 		    char __user *__user *env)
 {
-        long error;
+	long error;
 #ifdef CONFIG_TTY_LOG
 	struct tty_struct *tty;
 
@@ -49,17 +58,16 @@ static long execve1(char *file, char __user * __user *argv,
 		log_exec(argv, tty);
 	mutex_unlock(&tty_mutex);
 #endif
-        error = do_execve(file, argv, env, &current->thread.regs);
-        if (error == 0){
+	error = do_execve(file, argv, env, &current->thread.regs);
+	if (error == 0) {
 		task_lock(current);
-                current->ptrace &= ~PT_DTRACE;
+		current->ptrace &= ~PT_DTRACE;
 #ifdef SUBARCH_EXECVE1
 		SUBARCH_EXECVE1(&current->thread.regs.regs);
 #endif
 		task_unlock(current);
-                set_cmdline(current_cmd());
-        }
-        return(error);
+	}
+	return error;
 }
 
 long um_execve(char *file, char __user *__user *argv, char __user *__user *env)
@@ -67,9 +75,9 @@ long um_execve(char *file, char __user *__user *argv, char __user *__user *env)
 	long err;
 
 	err = execve1(file, argv, env);
-	if(!err)
-		do_longjmp(current->thread.exec_buf, 1);
-	return(err);
+	if (!err)
+		UML_LONGJMP(current->thread.exec_buf, 1);
+	return err;
 }
 
 long sys_execve(char __user *file, char __user *__user *argv,
@@ -86,5 +94,5 @@ long sys_execve(char __user *file, char __user *__user *argv,
 	putname(filename);
  out:
 	unlock_kernel();
-	return(error);
+	return error;
 }
diff --git a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
index cba516e6c99..dcfceca9505 100644
--- a/arch/um/kernel/init_task.c
+++ b/arch/um/kernel/init_task.c
@@ -3,16 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/mm.h"
-#include "linux/fs.h"
-#include "linux/module.h"
 #include "linux/sched.h"
 #include "linux/init_task.h"
+#include "linux/fs.h"
+#include "linux/module.h"
 #include "linux/mqueue.h"
 #include "asm/uaccess.h"
-#include "asm/pgtable.h"
-#include "mem_user.h"
-#include "os.h"
 
 static struct fs_struct init_fs = INIT_FS;
 struct mm_struct init_mm = INIT_MM(init_mm);
@@ -46,8 +42,3 @@ union thread_union init_thread_union
 union thread_union cpu0_irqstack
 	__attribute__((__section__(".data.init_irqstack"))) =
 		{ INIT_THREAD_INFO(init_task) };
-
-void unprotect_stack(unsigned long stack)
-{
-	os_protect_memory((void *) stack, THREAD_SIZE, 1, 1, 0);
-}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index cf0dd9cf8c4..277fce17b08 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,37 +1,19 @@
 /*
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  */
 
-#include "linux/kernel.h"
-#include "linux/module.h"
-#include "linux/smp.h"
-#include "linux/kernel_stat.h"
+#include "linux/cpumask.h"
+#include "linux/hardirq.h"
 #include "linux/interrupt.h"
-#include "linux/random.h"
-#include "linux/slab.h"
-#include "linux/file.h"
-#include "linux/proc_fs.h"
-#include "linux/init.h"
+#include "linux/kernel_stat.h"
+#include "linux/module.h"
 #include "linux/seq_file.h"
-#include "linux/profile.h"
-#include "linux/hardirq.h"
-#include "asm/irq.h"
-#include "asm/hw_irq.h"
-#include "asm/atomic.h"
-#include "asm/signal.h"
-#include "asm/system.h"
-#include "asm/errno.h"
-#include "asm/uaccess.h"
+#include "as-layout.h"
 #include "kern_util.h"
-#include "irq_user.h"
-#include "irq_kern.h"
 #include "os.h"
-#include "sigio.h"
-#include "misc_constants.h"
-#include "as-layout.h"
 
 /*
  * Generic, controller-independent functions:
@@ -71,9 +53,8 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 skip:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	} else if (i == NR_IRQS) {
+	} else if (i == NR_IRQS)
 		seq_putc(p, '\n');
-	}
 
 	return 0;
 }
@@ -91,7 +72,7 @@ static struct irq_fd **last_irq_ptr = &active_fds;
 
 extern void free_irqs(void);
 
-void sigio_handler(int sig, union uml_pt_regs *regs)
+void sigio_handler(int sig, struct uml_pt_regs *regs)
 {
 	struct irq_fd *irq_fd;
 	int n;
@@ -102,11 +83,13 @@ void sigio_handler(int sig, union uml_pt_regs *regs)
 	while (1) {
 		n = os_waiting_for_events(active_fds);
 		if (n <= 0) {
-			if(n == -EINTR) continue;
+			if (n == -EINTR)
+				continue;
 			else break;
 		}
 
-		for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
+		for (irq_fd = active_fds; irq_fd != NULL;
+		     irq_fd = irq_fd->next) {
 			if (irq_fd->current_events != 0) {
 				irq_fd->current_events = 0;
 				do_IRQ(irq_fd->irq, regs);
@@ -138,8 +121,7 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 
 	if (type == IRQ_READ)
 		events = UM_POLLIN | UM_POLLPRI;
-	else
-		events = UM_POLLOUT;
+	else events = UM_POLLOUT;
 	*new_fd = ((struct irq_fd) { .next  		= NULL,
 				     .id 		= dev_id,
 				     .fd 		= fd,
@@ -153,9 +135,10 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 	spin_lock_irqsave(&irq_lock, flags);
 	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
 		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-			printk("Registering fd %d twice\n", fd);
-			printk("Irqs : %d, %d\n", irq_fd->irq, irq);
-			printk("Ids : 0x%p, 0x%p\n", irq_fd->id, dev_id);
+			printk(KERN_ERR "Registering fd %d twice\n", fd);
+			printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
+			printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
+			       dev_id);
 			goto out_unlock;
 		}
 	}
@@ -171,7 +154,8 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 		if (n == 0)
 			break;
 
-		/* n > 0
+		/*
+		 * n > 0
 		 * It means we couldn't put new pollfd to current pollfds
 		 * and tmp_fds is NULL or too small for new pollfds array.
 		 * Needed size is equal to n as minimum.
@@ -197,7 +181,8 @@ int activate_fd(int irq, int fd, int type, void *dev_id)
 
 	spin_unlock_irqrestore(&irq_lock, flags);
 
-	/* This calls activate_fd, so it has to be outside the critical
+	/*
+	 * This calls activate_fd, so it has to be outside the critical
 	 * section.
 	 */
 	maybe_sigio_broken(fd, (type == IRQ_READ));
@@ -264,13 +249,14 @@ static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
 		i++;
 	}
 	if (irq == NULL) {
-		printk("find_irq_by_fd doesn't have descriptor %d\n", fd);
+		printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
+		       fd);
 		goto out;
 	}
 	fdi = os_get_pollfd(i);
 	if ((fdi != -1) && (fdi != fd)) {
-		printk("find_irq_by_fd - mismatch between active_fds and "
-		       "pollfds, fd %d vs %d, need %d\n", irq->fd,
+		printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
+		       "and pollfds, fd %d vs %d, need %d\n", irq->fd,
 		       fdi, fd);
 		irq = NULL;
 		goto out;
@@ -306,7 +292,7 @@ void deactivate_fd(int fd, int irqnum)
 
 	spin_lock_irqsave(&irq_lock, flags);
 	irq = find_irq_by_fd(fd, irqnum, &i);
-	if(irq == NULL){
+	if (irq == NULL) {
 		spin_unlock_irqrestore(&irq_lock, flags);
 		return;
 	}
@@ -339,36 +325,12 @@ int deactivate_all_fds(void)
 	return 0;
 }
 
-#ifdef CONFIG_MODE_TT
-void forward_interrupts(int pid)
-{
-	struct irq_fd *irq;
-	unsigned long flags;
-	int err;
-
-	spin_lock_irqsave(&irq_lock, flags);
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		err = os_set_owner(irq->fd, pid);
-		if (err < 0) {
-			/* XXX Just remove the irq rather than
-			 * print out an infinite stream of these
-			 */
-			printk("Failed to forward %d to pid %d, err = %d\n",
-			       irq->fd, pid, -err);
-		}
-
-		irq->pid = pid;
-	}
-	spin_unlock_irqrestore(&irq_lock, flags);
-}
-#endif
-
 /*
  * do_IRQ handles all normal device IRQ's (the special
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
-unsigned int do_IRQ(int irq, union uml_pt_regs *regs)
+unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
 	irq_enter();
@@ -396,8 +358,10 @@ int um_request_irq(unsigned int irq, int fd, int type,
 EXPORT_SYMBOL(um_request_irq);
 EXPORT_SYMBOL(reactivate_fd);
 
-/* hw_interrupt_type must define (startup || enable) &&
- * (shutdown || disable) && end */
+/*
+ * hw_interrupt_type must define (startup || enable) &&
+ * (shutdown || disable) && end
+ */
 static void dummy(unsigned int irq)
 {
 }
@@ -446,7 +410,8 @@ int init_aio_irq(int irq, char *name, irq_handler_t handler)
 
 	err = os_pipe(fds, 1, 1);
 	if (err) {
-		printk("init_aio_irq - os_pipe failed, err = %d\n", -err);
+		printk(KERN_ERR "init_aio_irq - os_pipe failed, err = %d\n",
+		       -err);
 		goto out;
 	}
 
@@ -454,7 +419,8 @@ int init_aio_irq(int irq, char *name, irq_handler_t handler)
 			     IRQF_DISABLED | IRQF_SAMPLE_RANDOM, name,
 			     (void *) (long) fds[0]);
 	if (err) {
-		printk("init_aio_irq - : um_request_irq failed, err = %d\n",
+		printk(KERN_ERR "init_aio_irq - : um_request_irq failed, "
+		       "err = %d\n",
 		       err);
 		goto out_close;
 	}
@@ -525,8 +491,9 @@ unsigned long to_irq_stack(unsigned long *mask_out)
 	int nested;
 
 	mask = xchg(&pending_mask, *mask_out);
-	if(mask != 0){
-		/* If any interrupts come in at this point, we want to
+	if (mask != 0) {
+		/*
+		 * If any interrupts come in at this point, we want to
 		 * make sure that their bits aren't lost by our
 		 * putting our bit in.  So, this loop accumulates bits
 		 * until xchg returns the same value that we put in.
@@ -538,13 +505,13 @@ unsigned long to_irq_stack(unsigned long *mask_out)
 		do {
 			old |= mask;
 			mask = xchg(&pending_mask, old);
-		} while(mask != old);
+		} while (mask != old);
 		return 1;
 	}
 
 	ti = current_thread_info();
 	nested = (ti->real_thread != NULL);
-	if(!nested){
+	if (!nested) {
 		struct task_struct *task;
 		struct thread_info *tti;
 
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 7b3e53fb807..1b388b41d95 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -1,22 +1,15 @@
 /* 
- * Copyright (C) 2001 - 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include "linux/module.h"
-#include "linux/string.h"
-#include "linux/smp_lock.h"
-#include "linux/spinlock.h"
-#include "linux/highmem.h"
-#include "asm/current.h"
-#include "asm/processor.h"
-#include "asm/unistd.h"
-#include "asm/pgalloc.h"
-#include "asm/pgtable.h"
-#include "asm/page.h"
+#include "linux/syscalls.h"
+#include "asm/a.out.h"
 #include "asm/tlbflush.h"
-#include "kern_util.h"
+#include "asm/uaccess.h"
 #include "as-layout.h"
+#include "kern_util.h"
 #include "mem_user.h"
 #include "os.h"
 
@@ -34,30 +27,19 @@ EXPORT_SYMBOL(get_kmem_end);
 EXPORT_SYMBOL(high_physmem);
 EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(um_virt_to_phys);
-EXPORT_SYMBOL(mode_tt);
 EXPORT_SYMBOL(handle_page_fault);
 EXPORT_SYMBOL(find_iomem);
 
-#ifdef CONFIG_MODE_TT
-EXPORT_SYMBOL(stop);
-EXPORT_SYMBOL(strncpy_from_user_tt);
-EXPORT_SYMBOL(copy_from_user_tt);
-EXPORT_SYMBOL(copy_to_user_tt);
-#endif
-
-#ifdef CONFIG_MODE_SKAS
-EXPORT_SYMBOL(strnlen_user_skas);
-EXPORT_SYMBOL(strncpy_from_user_skas);
-EXPORT_SYMBOL(copy_to_user_skas);
-EXPORT_SYMBOL(copy_from_user_skas);
-EXPORT_SYMBOL(clear_user_skas);
-#endif
+EXPORT_SYMBOL(strnlen_user);
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(clear_user);
 EXPORT_SYMBOL(uml_strdup);
 
 EXPORT_SYMBOL(os_stat_fd);
 EXPORT_SYMBOL(os_stat_file);
 EXPORT_SYMBOL(os_access);
-EXPORT_SYMBOL(os_print_error);
 EXPORT_SYMBOL(os_get_exec_close);
 EXPORT_SYMBOL(os_set_exec_close);
 EXPORT_SYMBOL(os_getpid);
@@ -85,9 +67,6 @@ EXPORT_SYMBOL(run_helper);
 EXPORT_SYMBOL(start_thread);
 EXPORT_SYMBOL(dump_thread);
 
-EXPORT_SYMBOL(do_gettimeofday);
-EXPORT_SYMBOL(do_settimeofday);
-
 #ifdef CONFIG_SMP
 
 /* required for SMP */
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index d2b11f24269..8456397f5f4 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -17,7 +17,7 @@
 #include "as-layout.h"
 #include "kern.h"
 #include "mem_user.h"
-#include "uml_uaccess.h"
+#include "um_uaccess.h"
 #include "os.h"
 #include "linux/types.h"
 #include "linux/string.h"
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 5ee7e851bbc..e66432f4248 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -1,25 +1,17 @@
 /*
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/mm.h"
-#include "linux/rbtree.h"
-#include "linux/slab.h"
-#include "linux/vmalloc.h"
 #include "linux/bootmem.h"
-#include "linux/module.h"
+#include "linux/mm.h"
 #include "linux/pfn.h"
-#include "asm/types.h"
-#include "asm/pgtable.h"
-#include "kern_util.h"
+#include "asm/page.h"
 #include "as-layout.h"
-#include "mode_kern.h"
-#include "mem.h"
+#include "init.h"
+#include "kern.h"
 #include "mem_user.h"
 #include "os.h"
-#include "kern.h"
-#include "init.h"
 
 static int physmem_fd = -1;
 
@@ -49,10 +41,10 @@ int __init init_maps(unsigned long physmem, unsigned long iomem,
 	total_len = phys_len + iomem_len + highmem_len;
 
 	map = alloc_bootmem_low_pages(total_len);
-	if(map == NULL)
+	if (map == NULL)
 		return -ENOMEM;
 
-	for(i = 0; i < total_pages; i++){
+	for (i = 0; i < total_pages; i++) {
 		p = &map[i];
 		memset(p, 0, sizeof(struct page));
 		SetPageReserved(p);
@@ -68,8 +60,8 @@ static unsigned long kmem_top = 0;
 
 unsigned long get_kmem_end(void)
 {
-	if(kmem_top == 0)
-		kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas);
+	if (kmem_top == 0)
+		kmem_top = host_task_size - 1024 * 1024;
 	return kmem_top;
 }
 
@@ -81,9 +73,9 @@ void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
 
 	fd = phys_mapping(phys, &offset);
 	err = os_map_memory((void *) virt, fd, offset, len, r, w, x);
-	if(err) {
-		if(err == -ENOMEM)
-			printk("try increasing the host's "
+	if (err) {
+		if (err == -ENOMEM)
+			printk(KERN_ERR "try increasing the host's "
 			       "/proc/sys/vm/max_map_count to <physical "
 			       "memory size>/4096\n");
 		panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, "
@@ -105,13 +97,16 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
 
 	offset = uml_reserved - uml_physmem;
 	err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
-			    len - offset, 1, 1, 0);
-	if(err < 0){
-		os_print_error(err, "Mapping memory");
+			    len - offset, 1, 1, 1);
+	if (err < 0) {
+		printf("setup_physmem - mapping %ld bytes of memory at 0x%p "
+		       "failed - errno = %d\n", len - offset,
+		       (void *) uml_reserved, err);
 		exit(1);
 	}
 
-	/* Special kludge - This page will be mapped in to userspace processes
+	/*
+	 * Special kludge - This page will be mapped in to userspace processes
 	 * from physmem_fd, so it needs to be written out there.
 	 */
 	os_seek_file(physmem_fd, __pa(&__syscall_stub_start));
@@ -122,20 +117,20 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
 		     len - bootmap_size - reserve);
 }
 
-int phys_mapping(unsigned long phys, __u64 *offset_out)
+int phys_mapping(unsigned long phys, unsigned long long *offset_out)
 {
 	int fd = -1;
 
-	if(phys < physmem_size){
+	if (phys < physmem_size) {
 		fd = physmem_fd;
 		*offset_out = phys;
 	}
-	else if(phys < __pa(end_iomem)){
+	else if (phys < __pa(end_iomem)) {
 		struct iomem_region *region = iomem_regions;
 
-		while(region != NULL){
-			if((phys >= region->phys) &&
-			   (phys < region->phys + region->size)){
+		while (region != NULL) {
+			if ((phys >= region->phys) &&
+			    (phys < region->phys + region->size)) {
 				fd = region->fd;
 				*offset_out = phys - region->phys;
 				break;
@@ -143,7 +138,7 @@ int phys_mapping(unsigned long phys, __u64 *offset_out)
 			region = region->next;
 		}
 	}
-	else if(phys < __pa(end_iomem) + highmem){
+	else if (phys < __pa(end_iomem) + highmem) {
 		fd = physmem_fd;
 		*offset_out = phys - iomem_size;
 	}
@@ -188,8 +183,8 @@ unsigned long find_iomem(char *driver, unsigned long *len_out)
 {
 	struct iomem_region *region = iomem_regions;
 
-	while(region != NULL){
-		if(!strcmp(region->driver, driver)){
+	while (region != NULL) {
+		if (!strcmp(region->driver, driver)) {
 			*len_out = region->size;
 			return region->virt;
 		}
@@ -206,12 +201,12 @@ int setup_iomem(void)
 	unsigned long iomem_start = high_physmem + PAGE_SIZE;
 	int err;
 
-	while(region != NULL){
+	while (region != NULL) {
 		err = os_map_memory((void *) iomem_start, region->fd, 0,
 				    region->size, 1, 1, 0);
-		if(err)
-			printk("Mapping iomem region for driver '%s' failed, "
-			       "errno = %d\n", region->driver, -err);
+		if (err)
+			printk(KERN_ERR "Mapping iomem region for driver '%s' "
+			       "failed, errno = %d\n", region->driver, -err);
 		else {
 			region->virt = iomem_start;
 			region->phys = __pa(region->virt);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index bfa52f206bb..0eae00b3e58 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -1,53 +1,30 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright 2003 PathScale, Inc.
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/sched.h"
-#include "linux/interrupt.h"
-#include "linux/string.h"
+#include "linux/stddef.h"
+#include "linux/err.h"
+#include "linux/hardirq.h"
 #include "linux/mm.h"
-#include "linux/slab.h"
-#include "linux/utsname.h"
-#include "linux/fs.h"
-#include "linux/utime.h"
-#include "linux/smp_lock.h"
-#include "linux/module.h"
-#include "linux/init.h"
-#include "linux/capability.h"
-#include "linux/vmalloc.h"
-#include "linux/spinlock.h"
+#include "linux/personality.h"
 #include "linux/proc_fs.h"
 #include "linux/ptrace.h"
 #include "linux/random.h"
-#include "linux/personality.h"
-#include "asm/unistd.h"
-#include "asm/mman.h"
-#include "asm/segment.h"
-#include "asm/stat.h"
+#include "linux/sched.h"
+#include "linux/tick.h"
+#include "linux/threads.h"
 #include "asm/pgtable.h"
-#include "asm/processor.h"
-#include "asm/tlbflush.h"
 #include "asm/uaccess.h"
-#include "asm/user.h"
-#include "kern_util.h"
 #include "as-layout.h"
-#include "kern.h"
-#include "signal_kern.h"
-#include "init.h"
-#include "irq_user.h"
-#include "mem_user.h"
-#include "tlb.h"
-#include "frame_kern.h"
-#include "sigcontext.h"
+#include "kern_util.h"
 #include "os.h"
-#include "mode.h"
-#include "mode_kern.h"
-#include "choose-mode.h"
+#include "skas.h"
+#include "tlb.h"
 
-/* This is a per-cpu array.  A processor only modifies its entry and it only
+/*
+ * This is a per-cpu array.  A processor only modifies its entry and it only
  * cares about its entry, so it's OK if another processor is modifying its
  * entry.
  */
@@ -55,15 +32,16 @@ struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } };
 
 static inline int external_pid(struct task_struct *task)
 {
-	return CHOOSE_MODE_PROC(external_pid_tt, external_pid_skas, task);
+	/* FIXME: Need to look up userspace_pid by cpu */
+	return userspace_pid[0];
 }
 
 int pid_to_processor_id(int pid)
 {
 	int i;
 
-	for(i = 0; i < ncpus; i++){
-		if(cpu_tasks[i].pid == pid)
+	for(i = 0; i < ncpus; i++) {
+		if (cpu_tasks[i].pid == pid)
 			return i;
 	}
 	return -1;
@@ -82,9 +60,9 @@ unsigned long alloc_stack(int order, int atomic)
 	if (atomic)
 		flags = GFP_ATOMIC;
 	page = __get_free_pages(flags, order);
-	if(page == 0)
+	if (page == 0)
 		return 0;
-	stack_protections(page);
+
 	return page;
 }
 
@@ -105,6 +83,8 @@ static inline void set_current(struct task_struct *task)
 		{ external_pid(task), task });
 }
 
+extern void arch_switch_to(struct task_struct *from, struct task_struct *to);
+
 void *_switch_to(void *prev, void *next, void *last)
 {
 	struct task_struct *from = prev;
@@ -114,9 +94,14 @@ void *_switch_to(void *prev, void *next, void *last)
 	set_current(to);
 
 	do {
-		current->thread.saved_task = NULL ;
-		CHOOSE_MODE_PROC(switch_to_tt, switch_to_skas, prev, next);
-		if(current->thread.saved_task)
+		current->thread.saved_task = NULL;
+
+		switch_threads(&from->thread.switch_buf,
+			       &to->thread.switch_buf);
+
+		arch_switch_to(current->thread.prev_sched, current);
+
+		if (current->thread.saved_task)
 			show_regs(&(current->thread.regs));
 		next= current->thread.saved_task;
 		prev= current;
@@ -128,20 +113,14 @@ void *_switch_to(void *prev, void *next, void *last)
 
 void interrupt_end(void)
 {
-	if(need_resched())
+	if (need_resched())
 		schedule();
-	if(test_tsk_thread_flag(current, TIF_SIGPENDING))
+	if (test_tsk_thread_flag(current, TIF_SIGPENDING))
 		do_signal();
 }
 
-void release_thread(struct task_struct *task)
-{
-	CHOOSE_MODE(release_thread_tt(task), release_thread_skas(task));
-}
-
 void exit_thread(void)
 {
-	unprotect_stack((unsigned long) current_thread);
 }
 
 void *get_current(void)
@@ -149,28 +128,99 @@ void *get_current(void)
 	return current;
 }
 
+extern void schedule_tail(struct task_struct *prev);
+
+/*
+ * This is called magically, by its address being stuffed in a jmp_buf
+ * and being longjmp-d to.
+ */
+void new_thread_handler(void)
+{
+	int (*fn)(void *), n;
+	void *arg;
+
+	if (current->thread.prev_sched != NULL)
+		schedule_tail(current->thread.prev_sched);
+	current->thread.prev_sched = NULL;
+
+	fn = current->thread.request.u.thread.proc;
+	arg = current->thread.request.u.thread.arg;
+
+	/*
+	 * The return value is 1 if the kernel thread execs a process,
+	 * 0 if it just exits
+	 */
+	n = run_kernel_thread(fn, arg, &current->thread.exec_buf);
+	if (n == 1) {
+		/* Handle any immediate reschedules or signals */
+		interrupt_end();
+		userspace(&current->thread.regs.regs);
+	}
+	else do_exit(0);
+}
+
+/* Called magically, see new_thread_handler above */
+void fork_handler(void)
+{
+	force_flush_all();
+	if (current->thread.prev_sched == NULL)
+		panic("blech");
+
+	schedule_tail(current->thread.prev_sched);
+
+	/*
+	 * XXX: if interrupt_end() calls schedule, this call to
+	 * arch_switch_to isn't needed. We could want to apply this to
+	 * improve performance. -bb
+	 */
+	arch_switch_to(current->thread.prev_sched, current);
+
+	current->thread.prev_sched = NULL;
+
+	/* Handle any immediate reschedules or signals */
+	interrupt_end();
+
+	userspace(&current->thread.regs.regs);
+}
+
 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		unsigned long stack_top, struct task_struct * p,
 		struct pt_regs *regs)
 {
-	int ret;
+	void (*handler)(void);
+	int ret = 0;
 
 	p->thread = (struct thread_struct) INIT_THREAD;
-	ret = CHOOSE_MODE_PROC(copy_thread_tt, copy_thread_skas, nr,
-				clone_flags, sp, stack_top, p, regs);
 
-	if (ret || !current->thread.forking)
-		goto out;
+	if (current->thread.forking) {
+	  	memcpy(&p->thread.regs.regs, &regs->regs,
+		       sizeof(p->thread.regs.regs));
+		REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.gp, 0);
+		if (sp != 0)
+			REGS_SP(p->thread.regs.regs.gp) = sp;
 
-	clear_flushed_tls(p);
+		handler = fork_handler;
 
-	/*
-	 * Set a new TLS for the child thread?
-	 */
-	if (clone_flags & CLONE_SETTLS)
-		ret = arch_copy_tls(p);
+		arch_copy_thread(&current->thread.arch, &p->thread.arch);
+	}
+	else {
+		init_thread_registers(&p->thread.regs.regs);
+		p->thread.request.u.thread = current->thread.request.u.thread;
+		handler = new_thread_handler;
+	}
+
+	new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
+
+	if (current->thread.forking) {
+		clear_flushed_tls(p);
+
+		/*
+		 * Set a new TLS for the child thread?
+		 */
+		if (clone_flags & CLONE_SETTLS)
+			ret = arch_copy_tls(p);
+	}
 
-out:
 	return ret;
 }
 
@@ -179,39 +229,35 @@ void initial_thread_cb(void (*proc)(void *), void *arg)
 	int save_kmalloc_ok = kmalloc_ok;
 
 	kmalloc_ok = 0;
-	CHOOSE_MODE_PROC(initial_thread_cb_tt, initial_thread_cb_skas, proc,
-			 arg);
+	initial_thread_cb_skas(proc, arg);
 	kmalloc_ok = save_kmalloc_ok;
 }
 
-#ifdef CONFIG_MODE_TT
-unsigned long stack_sp(unsigned long page)
-{
-	return page + PAGE_SIZE - sizeof(void *);
-}
-#endif
-
 void default_idle(void)
 {
-	CHOOSE_MODE(uml_idle_timer(), (void) 0);
+	unsigned long long nsecs;
 
-	while(1){
+	while(1) {
 		/* endless idle loop with no priority at all */
 
 		/*
 		 * although we are an idle CPU, we do not want to
 		 * get into the scheduler unnecessarily.
 		 */
-		if(need_resched())
+		if (need_resched())
 			schedule();
 
-		idle_sleep(10);
+		tick_nohz_stop_sched_tick();
+		nsecs = disable_timer();
+		idle_sleep(nsecs);
+		tick_nohz_restart_sched_tick();
 	}
 }
 
 void cpu_idle(void)
 {
-	CHOOSE_MODE(init_idle_tt(), init_idle_skas());
+	cpu_tasks[current_thread->cpu].pid = os_getpid();
+	default_idle();
 }
 
 void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
@@ -223,26 +269,26 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
 	pte_t *pte;
 	pte_t ptent;
 
-	if(task->mm == NULL)
+	if (task->mm == NULL)
 		return ERR_PTR(-EINVAL);
 	pgd = pgd_offset(task->mm, addr);
-	if(!pgd_present(*pgd))
+	if (!pgd_present(*pgd))
 		return ERR_PTR(-EINVAL);
 
 	pud = pud_offset(pgd, addr);
-	if(!pud_present(*pud))
+	if (!pud_present(*pud))
 		return ERR_PTR(-EINVAL);
 
 	pmd = pmd_offset(pud, addr);
-	if(!pmd_present(*pmd))
+	if (!pmd_present(*pmd))
 		return ERR_PTR(-EINVAL);
 
 	pte = pte_offset_kernel(pmd, addr);
 	ptent = *pte;
-	if(!pte_present(ptent))
+	if (!pte_present(ptent))
 		return ERR_PTR(-EINVAL);
 
-	if(pte_out != NULL)
+	if (pte_out != NULL)
 		*pte_out = ptent;
 	return (void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK);
 }
@@ -315,7 +361,7 @@ int smp_sigio_handler(void)
 #ifdef CONFIG_SMP
 	int cpu = current_thread->cpu;
 	IPI_handler(cpu);
-	if(cpu != 0)
+	if (cpu != 0)
 		return 1;
 #endif
 	return 0;
@@ -343,7 +389,8 @@ int get_using_sysemu(void)
 
 static int proc_read_sysemu(char *buf, char **start, off_t offset, int size,int *eof, void *data)
 {
-	if (snprintf(buf, size, "%d\n", get_using_sysemu()) < size) /*No overflow*/
+	if (snprintf(buf, size, "%d\n", get_using_sysemu()) < size)
+		/* No overflow */
 		*eof = 1;
 
 	return strlen(buf);
@@ -358,7 +405,8 @@ static int proc_write_sysemu(struct file *file,const char __user *buf, unsigned
 
 	if (tmp[0] >= '0' && tmp[0] <= '2')
 		set_using_sysemu(tmp[0] - '0');
-	return count; /*We use the first char, but pretend to write everything*/
+	/* We use the first char, but pretend to write everything */
+	return count;
 }
 
 int __init make_proc_sysemu(void)
@@ -388,10 +436,10 @@ int singlestepping(void * t)
 	struct task_struct *task = t ? t : current;
 
 	if ( ! (task->ptrace & PT_DTRACE) )
-		return(0);
+		return 0;
 
 	if (task->thread.singlestep_syscall)
-		return(1);
+		return 1;
 
 	return 2;
 }
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 6916c8888db..a0eba083306 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -1,35 +1,27 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/mm.h"
-#include "linux/errno.h"
-#include "linux/smp_lock.h"
-#include "linux/security.h"
-#include "linux/ptrace.h"
 #include "linux/audit.h"
+#include "linux/ptrace.h"
+#include "linux/sched.h"
+#include "asm/uaccess.h"
 #ifdef CONFIG_PROC_MM
-#include "linux/proc_mm.h"
+#include "proc_mm.h"
 #endif
-#include "asm/ptrace.h"
-#include "asm/uaccess.h"
-#include "kern_util.h"
 #include "skas_ptrace.h"
-#include "sysdep/ptrace.h"
-#include "os.h"
 
 static inline void set_singlestepping(struct task_struct *child, int on)
 {
-        if (on)
-                child->ptrace |= PT_DTRACE;
-        else
-                child->ptrace &= ~PT_DTRACE;
-        child->thread.singlestep_syscall = 0;
+	if (on)
+		child->ptrace |= PT_DTRACE;
+	else
+		child->ptrace &= ~PT_DTRACE;
+	child->thread.singlestep_syscall = 0;
 
 #ifdef SUBARCH_SET_SINGLESTEPPING
-        SUBARCH_SET_SINGLESTEPPING(child, on);
+	SUBARCH_SET_SINGLESTEPPING(child, on);
 #endif
 }
 
@@ -37,8 +29,8 @@ static inline void set_singlestepping(struct task_struct *child, int on)
  * Called by kernel/ptrace.c when detaching..
  */
 void ptrace_disable(struct task_struct *child)
-{ 
-        set_singlestepping(child,0);
+{
+	set_singlestepping(child,0);
 }
 
 extern int peek_user(struct task_struct * child, long addr, long data);
@@ -50,40 +42,40 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	unsigned long __user *p = (void __user *)(unsigned long)data;
 
 	switch (request) {
-		/* when I and D space are separate, these will need to be fixed. */
-	case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+	/* read word at location addr. */
+	case PTRACE_PEEKTEXT:
 	case PTRACE_PEEKDATA:
 		ret = generic_ptrace_peekdata(child, addr, data);
 		break;
 
 	/* read the word at location addr in the USER area. */
-        case PTRACE_PEEKUSR:
-                ret = peek_user(child, addr, data);
-                break;
+	case PTRACE_PEEKUSR:
+		ret = peek_user(child, addr, data);
+		break;
 
-	/* when I and D space are separate, this will have to be fixed. */
-	case PTRACE_POKETEXT: /* write the word at location addr. */
+	/* write the word at location addr. */
+	case PTRACE_POKETEXT:
 	case PTRACE_POKEDATA:
 		ret = generic_ptrace_pokedata(child, addr, data);
 		break;
 
-	case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
-                ret = poke_user(child, addr, data);
-                break;
+	/* write the word at location addr in the USER area */
+	case PTRACE_POKEUSR:
+		ret = poke_user(child, addr, data);
+		break;
 
-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-	case PTRACE_CONT: { /* restart after signal. */
+	/* continue and stop at next (return from) syscall */
+	case PTRACE_SYSCALL:
+	/* restart after signal. */
+	case PTRACE_CONT: {
 		ret = -EIO;
 		if (!valid_signal(data))
 			break;
 
-                set_singlestepping(child, 0);
-		if (request == PTRACE_SYSCALL) {
+		set_singlestepping(child, 0);
+		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		}
-		else {
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		}
+		else clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		child->exit_code = data;
 		wake_up_process(child);
 		ret = 0;
@@ -91,8 +83,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	}
 
 /*
- * make the child exit.  Best I can do is send it a sigkill. 
- * perhaps it should be put in the status that it wants to 
+ * make the child exit.  Best I can do is send it a sigkill.
+ * perhaps it should be put in the status that it wants to
  * exit.
  */
 	case PTRACE_KILL: {
@@ -100,7 +92,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
 			break;
 
-                set_singlestepping(child, 0);
+		set_singlestepping(child, 0);
 		child->exit_code = SIGKILL;
 		wake_up_process(child);
 		break;
@@ -111,7 +103,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-                set_singlestepping(child, 1);
+		set_singlestepping(child, 1);
 		child->exit_code = data;
 		/* give it a chance to run. */
 		wake_up_process(child);
@@ -119,11 +111,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 	}
 
-	case PTRACE_DETACH:
-		/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
- 		break;
-
 #ifdef PTRACE_GETREGS
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
 		if (!access_ok(VERIFY_WRITE, p, MAX_REG_OFFSET)) {
@@ -156,22 +143,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 #endif
 #ifdef PTRACE_GETFPREGS
 	case PTRACE_GETFPREGS: /* Get the child FPU state. */
-		ret = get_fpregs(data, child);
+		ret = get_fpregs((struct user_i387_struct __user *) data,
+				 child);
 		break;
 #endif
 #ifdef PTRACE_SETFPREGS
 	case PTRACE_SETFPREGS: /* Set the child FPU state. */
-	        ret = set_fpregs(data, child);
-		break;
-#endif
-#ifdef PTRACE_GETFPXREGS
-	case PTRACE_GETFPXREGS: /* Get the child FPU state. */
-		ret = get_fpxregs(data, child);
-		break;
-#endif
-#ifdef PTRACE_SETFPXREGS
-	case PTRACE_SETFPXREGS: /* Set the child FPU state. */
-		ret = set_fpxregs(data, child);
+	        ret = set_fpregs((struct user_i387_struct __user *) data,
+				 child);
 		break;
 #endif
 	case PTRACE_GET_THREAD_AREA:
@@ -185,14 +164,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 
 	case PTRACE_FAULTINFO: {
-		/* Take the info from thread->arch->faultinfo,
+		/*
+		 * Take the info from thread->arch->faultinfo,
 		 * but transfer max. sizeof(struct ptrace_faultinfo).
 		 * On i386, ptrace_faultinfo is smaller!
 		 */
 		ret = copy_to_user(p, &child->thread.arch.faultinfo,
 				   sizeof(struct ptrace_faultinfo));
-		if(ret)
-			break;
 		break;
 	}
 
@@ -200,12 +178,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	case PTRACE_LDT: {
 		struct ptrace_ldt ldt;
 
-		if(copy_from_user(&ldt, p, sizeof(ldt))){
+		if (copy_from_user(&ldt, p, sizeof(ldt))) {
 			ret = -EIO;
 			break;
 		}
 
-		/* This one is confusing, so just punt and return -EIO for 
+		/*
+		 * This one is confusing, so just punt and return -EIO for
 		 * now
 		 */
 		ret = -EIO;
@@ -217,7 +196,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		struct mm_struct *old = child->mm;
 		struct mm_struct *new = proc_mm_get_mm(data);
 
-		if(IS_ERR(new)){
+		if (IS_ERR(new)) {
 			ret = PTR_ERR(new);
 			break;
 		}
@@ -231,20 +210,22 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 	}
 #endif
 #ifdef PTRACE_ARCH_PRCTL
-        case PTRACE_ARCH_PRCTL:
-                /* XXX Calls ptrace on the host - needs some SMP thinking */
-                ret = arch_prctl_skas(child, data, (void *) addr);
-                break;
+	case PTRACE_ARCH_PRCTL:
+		/* XXX Calls ptrace on the host - needs some SMP thinking */
+		ret = arch_prctl(child, data, (void *) addr);
+		break;
 #endif
 	default:
 		ret = ptrace_request(child, request, addr, data);
+		if (ret == -EIO)
+			ret = subarch_ptrace(child, request, addr, data);
 		break;
 	}
 
 	return ret;
 }
 
-void send_sigtrap(struct task_struct *tsk, union uml_pt_regs *regs,
+void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
 		  int error_code)
 {
 	struct siginfo info;
@@ -260,10 +241,11 @@ void send_sigtrap(struct task_struct *tsk, union uml_pt_regs *regs,
 	force_sig_info(SIGTRAP, &info, tsk);
 }
 
-/* XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
+/*
+ * XXX Check PT_DTRACE vs TIF_SINGLESTEP for singlestepping check and
  * PT_PTRACED vs TIF_SYSCALL_TRACE for syscall tracing check
  */
-void syscall_trace(union uml_pt_regs *regs, int entryexit)
+void syscall_trace(struct uml_pt_regs *regs, int entryexit)
 {
 	int is_singlestep = (current->ptrace & PT_DTRACE) && entryexit;
 	int tracesysgood;
@@ -277,7 +259,7 @@ void syscall_trace(union uml_pt_regs *regs, int entryexit)
 					    UPT_SYSCALL_ARG3(regs),
 					    UPT_SYSCALL_ARG4(regs));
 		else audit_syscall_exit(AUDITSC_RESULT(UPT_SYSCALL_RET(regs)),
-                                        UPT_SYSCALL_RET(regs));
+					UPT_SYSCALL_RET(regs));
 	}
 
 	/* Fake a debug trap */
@@ -290,15 +272,18 @@ void syscall_trace(union uml_pt_regs *regs, int entryexit)
 	if (!(current->ptrace & PT_PTRACED))
 		return;
 
-	/* the 0x80 provides a way for the tracing parent to distinguish
-	   between a syscall stop and SIGTRAP delivery */
+	/*
+	 * the 0x80 provides a way for the tracing parent to distinguish
+	 * between a syscall stop and SIGTRAP delivery
+	 */
 	tracesysgood = (current->ptrace & PT_TRACESYSGOOD);
 	ptrace_notify(SIGTRAP | (tracesysgood ? 0x80 : 0));
 
 	if (entryexit) /* force do_signal() --> is_syscall() */
 		set_thread_flag(TIF_SIGPENDING);
 
-	/* this isn't the same as continuing with a signal, but it will do
+	/*
+	 * this isn't the same as continuing with a signal, but it will do
 	 * for normal use.  strace only continues with a signal if the
 	 * stopping signal is not SIGTRAP.  -brl
 	 */
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 7e4305a1fd3..04cebcf0679 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -1,60 +1,53 @@
 /* 
- * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/module.h"
 #include "linux/sched.h"
-#include "asm/smp.h"
-#include "kern_util.h"
-#include "kern.h"
 #include "os.h"
-#include "mode.h"
-#include "choose-mode.h"
+#include "skas.h"
 
 void (*pm_power_off)(void);
 
-#ifdef CONFIG_SMP
-static void kill_idlers(int me)
-{
-#ifdef CONFIG_MODE_TT
-	struct task_struct *p;
-	int i;
-
-	for(i = 0; i < ARRAY_SIZE(idle_threads); i++){
-		p = idle_threads[i];
-		if((p != NULL) && (p->thread.mode.tt.extern_pid != me))
-			os_kill_process(p->thread.mode.tt.extern_pid, 0);
-	}
-#endif
-}
-#endif
-
 static void kill_off_processes(void)
 {
-	CHOOSE_MODE(kill_off_processes_tt(), kill_off_processes_skas());
-#ifdef CONFIG_SMP
-	kill_idlers(os_getpid());
-#endif
+	if(proc_mm)
+		/*
+		 * FIXME: need to loop over userspace_pids
+		 */
+		os_kill_ptraced_process(userspace_pid[0], 1);
+	else {
+		struct task_struct *p;
+		int pid, me;
+
+		me = os_getpid();
+		for_each_process(p){
+			if(p->mm == NULL)
+				continue;
+
+			pid = p->mm->context.id.u.pid;
+			os_kill_ptraced_process(pid, 1);
+		}
+	}
 }
 
 void uml_cleanup(void)
 {
-        kmalloc_ok = 0;
+	kmalloc_ok = 0;
 	do_uml_exitcalls();
 	kill_off_processes();
 }
 
 void machine_restart(char * __unused)
 {
-        uml_cleanup();
-	CHOOSE_MODE(reboot_tt(), reboot_skas());
+	uml_cleanup();
+	reboot_skas();
 }
 
 void machine_power_off(void)
 {
-        uml_cleanup();
-	CHOOSE_MODE(halt_tt(), halt_skas());
+	uml_cleanup();
+	halt_skas();
 }
 
 void machine_halt(void)
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index c4020c3d785..19cb9773393 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -1,29 +1,17 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/stddef.h"
-#include "linux/sys.h"
-#include "linux/sched.h"
-#include "linux/wait.h"
-#include "linux/kernel.h"
-#include "linux/smp_lock.h"
 #include "linux/module.h"
-#include "linux/slab.h"
-#include "linux/tty.h"
-#include "linux/binfmts.h"
 #include "linux/ptrace.h"
+#include "linux/sched.h"
+#include "asm/siginfo.h"
 #include "asm/signal.h"
-#include "asm/uaccess.h"
 #include "asm/unistd.h"
-#include "asm/ucontext.h"
-#include "kern_util.h"
-#include "signal_kern.h"
-#include "kern.h"
 #include "frame_kern.h"
+#include "kern_util.h"
 #include "sigcontext.h"
-#include "mode.h"
 
 EXPORT_SYMBOL(block_signals);
 EXPORT_SYMBOL(unblock_signals);
@@ -46,9 +34,9 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr,
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
 	/* Did we come from a system call? */
-	if(PT_REGS_SYSCALL_NR(regs) >= 0){
+	if (PT_REGS_SYSCALL_NR(regs) >= 0) {
 		/* If so, check system call restarting.. */
-		switch(PT_REGS_SYSCALL_RET(regs)){
+		switch(PT_REGS_SYSCALL_RET(regs)) {
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTARTNOHAND:
 			PT_REGS_SYSCALL_RET(regs) = -EINTR;
@@ -68,17 +56,17 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr,
 	}
 
 	sp = PT_REGS_SP(regs);
-	if((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
+	if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
 		sp = current->sas_ss_sp + current->sas_ss_size;
 
 #ifdef CONFIG_ARCH_HAS_SC_SIGNALS
-	if(!(ka->sa.sa_flags & SA_SIGINFO))
+	if (!(ka->sa.sa_flags & SA_SIGINFO))
 		err = setup_signal_stack_sc(sp, signr, ka, regs, oldset);
 	else
 #endif
 		err = setup_signal_stack_si(sp, signr, ka, regs, info, oldset);
 
-	if(err){
+	if (err) {
 		spin_lock_irq(&current->sighand->siglock);
 		current->blocked = *oldset;
 		recalc_sigpending();
@@ -88,7 +76,7 @@ static int handle_signal(struct pt_regs *regs, unsigned long signr,
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked, &current->blocked,
 			  &ka->sa.sa_mask);
-		 if(!(ka->sa.sa_flags & SA_NODEFER))
+		if (!(ka->sa.sa_flags & SA_NODEFER))
 			sigaddset(&current->blocked, signr);
 		recalc_sigpending();
 		spin_unlock_irq(&current->sighand->siglock);
@@ -109,14 +97,16 @@ static int kern_do_signal(struct pt_regs *regs)
 	else
 		oldset = &current->blocked;
 
-	while((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0){
+	while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) {
 		handled_sig = 1;
 		/* Whee!  Actually deliver the signal.  */
-		if(!handle_signal(regs, sig, &ka_copy, &info, oldset)){
-			/* a signal was successfully delivered; the saved
+		if (!handle_signal(regs, sig, &ka_copy, &info, oldset)) {
+			/*
+			 * a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
-			 * clear the TIF_RESTORE_SIGMASK flag */
+			 * clear the TIF_RESTORE_SIGMASK flag
+			 */
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
 			break;
@@ -124,9 +114,9 @@ static int kern_do_signal(struct pt_regs *regs)
 	}
 
 	/* Did we come from a system call? */
-	if(!handled_sig && (PT_REGS_SYSCALL_NR(regs) >= 0)){
+	if (!handled_sig && (PT_REGS_SYSCALL_NR(regs) >= 0)) {
 		/* Restart the system call - no handlers present */
-		switch(PT_REGS_SYSCALL_RET(regs)){
+		switch(PT_REGS_SYSCALL_RET(regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
@@ -137,22 +127,25 @@ static int kern_do_signal(struct pt_regs *regs)
 			PT_REGS_ORIG_SYSCALL(regs) = __NR_restart_syscall;
 			PT_REGS_RESTART_SYSCALL(regs);
 			break;
- 		}
+		}
 	}
 
-	/* This closes a way to execute a system call on the host.  If
+	/*
+	 * This closes a way to execute a system call on the host.  If
 	 * you set a breakpoint on a system call instruction and singlestep
 	 * from it, the tracing thread used to PTRACE_SINGLESTEP the process
 	 * rather than PTRACE_SYSCALL it, allowing the system call to execute
 	 * on the host.  The tracing thread will check this flag and
 	 * PTRACE_SYSCALL if necessary.
 	 */
-	if(current->ptrace & PT_DTRACE)
+	if (current->ptrace & PT_DTRACE)
 		current->thread.singlestep_syscall =
 			is_syscall(PT_REGS_IP(&current->thread.regs));
 
-	/* if there's no signal to deliver, we just put the saved sigmask
-	 * back */
+	/*
+	 * if there's no signal to deliver, we just put the saved sigmask
+	 * back
+	 */
 	if (!handled_sig && test_thread_flag(TIF_RESTORE_SIGMASK)) {
 		clear_thread_flag(TIF_RESTORE_SIGMASK);
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
diff --git a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
index 3e3fa7e7e3c..0b76d8869c9 100644
--- a/arch/um/kernel/skas/Makefile
+++ b/arch/um/kernel/skas/Makefile
@@ -1,9 +1,9 @@
 #
-# Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com)
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 # Licensed under the GPL
 #
 
-obj-y := clone.o exec.o mem.o mmu.o process.o syscall.o tlb.o uaccess.o
+obj-y := clone.o mmu.o process.o syscall.o uaccess.o
 
 # clone.o is in the stub, so it can't be built with profiling
 # GCC hardened also auto-enables -fpic, but we need %ebx so it can't work ->
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 47b812b3bca..d119f4f7d89 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -4,6 +4,7 @@
 #include <sys/time.h>
 #include <asm/unistd.h>
 #include <asm/page.h>
+#include "as-layout.h"
 #include "ptrace_user.h"
 #include "skas.h"
 #include "stub-data.h"
@@ -21,12 +22,11 @@
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_clone_handler(void)
 {
-	struct stub_data *data = (struct stub_data *) UML_CONFIG_STUB_DATA;
+	struct stub_data *data = (struct stub_data *) STUB_DATA;
 	long err;
 
 	err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
-			    UML_CONFIG_STUB_DATA + UM_KERN_PAGE_SIZE / 2 -
-			    sizeof(void *));
+			    STUB_DATA + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
 	if(err != 0)
 		goto out;
 
diff --git a/arch/um/kernel/skas/exec.c b/arch/um/kernel/skas/exec.c
deleted file mode 100644
index 580eb646894..00000000000
--- a/arch/um/kernel/skas/exec.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include "linux/kernel.h"
-#include "asm/current.h"
-#include "asm/page.h"
-#include "asm/signal.h"
-#include "asm/ptrace.h"
-#include "asm/uaccess.h"
-#include "asm/mmu_context.h"
-#include "tlb.h"
-#include "skas.h"
-#include "um_mmu.h"
-#include "os.h"
-
-void flush_thread_skas(void)
-{
-	void *data = NULL;
-	unsigned long end = proc_mm ? task_size : CONFIG_STUB_START;
-	int ret;
-
-	ret = unmap(&current->mm->context.skas.id, 0, end, 1, &data);
-	if(ret){
-		printk("flush_thread_skas - clearing address space failed, "
-		       "err = %d\n", ret);
-		force_sig(SIGKILL, current);
-	}
-
-	switch_mm_skas(&current->mm->context.skas.id);
-}
-
-void start_thread_skas(struct pt_regs *regs, unsigned long eip,
-		       unsigned long esp)
-{
-	set_fs(USER_DS);
-	PT_REGS_IP(regs) = eip;
-	PT_REGS_SP(regs) = esp;
-}
diff --git a/arch/um/kernel/skas/mem.c b/arch/um/kernel/skas/mem.c
deleted file mode 100644
index 7c18dfcd7d8..00000000000
--- a/arch/um/kernel/skas/mem.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include "linux/mm.h"
-#include "asm/pgtable.h"
-#include "mem_user.h"
-#include "skas.h"
-
-unsigned long set_task_sizes_skas(unsigned long *task_size_out)
-{
-	/* Round up to the nearest 4M */
-	unsigned long host_task_size = ROUND_4M((unsigned long)
-						&host_task_size);
-
-	if (!skas_needs_stub)
-		*task_size_out = host_task_size;
-	else *task_size_out = CONFIG_STUB_START & PGDIR_MASK;
-
-	return host_task_size;
-}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 2c6d090a2e8..f859ec306cd 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -1,20 +1,13 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/list.h"
-#include "linux/spinlock.h"
-#include "linux/slab.h"
-#include "linux/errno.h"
 #include "linux/mm.h"
-#include "asm/current.h"
-#include "asm/segment.h"
-#include "asm/mmu.h"
+#include "linux/sched.h"
 #include "asm/pgalloc.h"
 #include "asm/pgtable.h"
-#include "asm/ldt.h"
+#include "as-layout.h"
 #include "os.h"
 #include "skas.h"
 
@@ -41,10 +34,11 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	if (!pte)
 		goto out_pte;
 
-	/* There's an interaction between the skas0 stub pages, stack
+	/*
+	 * There's an interaction between the skas0 stub pages, stack
 	 * randomization, and the BUG at the end of exit_mmap.  exit_mmap
-         * checks that the number of page tables freed is the same as had
-         * been allocated.  If the stack is on the last page table page,
+	 * checks that the number of page tables freed is the same as had
+	 * been allocated.  If the stack is on the last page table page,
 	 * then the stack pte page will be freed, and if not, it won't.  To
 	 * avoid having to know where the stack is, or if the process mapped
 	 * something at the top of its address space for some other reason,
@@ -54,76 +48,77 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	 * destroy_context_skas.
 	 */
 
-        mm->context.skas.last_page_table = pmd_page_vaddr(*pmd);
+	mm->context.last_page_table = pmd_page_vaddr(*pmd);
 #ifdef CONFIG_3_LEVEL_PGTABLES
-        mm->context.skas.last_pmd = (unsigned long) __va(pud_val(*pud));
+	mm->context.last_pmd = (unsigned long) __va(pud_val(*pud));
 #endif
 
 	*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
 	*pte = pte_mkread(*pte);
-	return(0);
+	return 0;
 
  out_pmd:
 	pud_free(pud);
  out_pte:
 	pmd_free(pmd);
  out:
-	return(-ENOMEM);
+	return -ENOMEM;
 }
 
-int init_new_context_skas(struct task_struct *task, struct mm_struct *mm)
+int init_new_context(struct task_struct *task, struct mm_struct *mm)
 {
- 	struct mmu_context_skas *from_mm = NULL;
-	struct mmu_context_skas *to_mm = &mm->context.skas;
+ 	struct mm_context *from_mm = NULL;
+	struct mm_context *to_mm = &mm->context;
 	unsigned long stack = 0;
 	int ret = -ENOMEM;
 
-	if(skas_needs_stub){
+	if (skas_needs_stub) {
 		stack = get_zeroed_page(GFP_KERNEL);
-		if(stack == 0)
+		if (stack == 0)
 			goto out;
 
-		/* This zeros the entry that pgd_alloc didn't, needed since
+		/*
+		 * This zeros the entry that pgd_alloc didn't, needed since
 		 * we are about to reinitialize it, and want mm.nr_ptes to
 		 * be accurate.
 		 */
 		mm->pgd[USER_PTRS_PER_PGD] = __pgd(0);
 
-		ret = init_stub_pte(mm, CONFIG_STUB_CODE,
+		ret = init_stub_pte(mm, STUB_CODE,
 				    (unsigned long) &__syscall_stub_start);
-		if(ret)
+		if (ret)
 			goto out_free;
 
-		ret = init_stub_pte(mm, CONFIG_STUB_DATA, stack);
-		if(ret)
+		ret = init_stub_pte(mm, STUB_DATA, stack);
+		if (ret)
 			goto out_free;
 
 		mm->nr_ptes--;
 	}
 
 	to_mm->id.stack = stack;
-	if(current->mm != NULL && current->mm != &init_mm)
-		from_mm = &current->mm->context.skas;
+	if (current->mm != NULL && current->mm != &init_mm)
+		from_mm = &current->mm->context;
 
-	if(proc_mm){
+	if (proc_mm) {
 		ret = new_mm(stack);
-		if(ret < 0){
-			printk("init_new_context_skas - new_mm failed, "
-			       "errno = %d\n", ret);
+		if (ret < 0) {
+			printk(KERN_ERR "init_new_context_skas - "
+			       "new_mm failed, errno = %d\n", ret);
 			goto out_free;
 		}
 		to_mm->id.u.mm_fd = ret;
 	}
 	else {
-		if(from_mm)
+		if (from_mm)
 			to_mm->id.u.pid = copy_context_skas0(stack,
 							     from_mm->id.u.pid);
 		else to_mm->id.u.pid = start_userspace(stack);
 	}
 
 	ret = init_new_ldt(to_mm, from_mm);
-	if(ret < 0){
-		printk("init_new_context_skas - init_ldt"
+	if (ret < 0) {
+		printk(KERN_ERR "init_new_context_skas - init_ldt"
 		       " failed, errno = %d\n", ret);
 		goto out_free;
 	}
@@ -131,22 +126,22 @@ int init_new_context_skas(struct task_struct *task, struct mm_struct *mm)
 	return 0;
 
  out_free:
-	if(to_mm->id.stack != 0)
+	if (to_mm->id.stack != 0)
 		free_page(to_mm->id.stack);
  out:
 	return ret;
 }
 
-void destroy_context_skas(struct mm_struct *mm)
+void destroy_context(struct mm_struct *mm)
 {
-	struct mmu_context_skas *mmu = &mm->context.skas;
+	struct mm_context *mmu = &mm->context;
 
-	if(proc_mm)
+	if (proc_mm)
 		os_close_file(mmu->id.u.mm_fd);
 	else
 		os_kill_ptraced_process(mmu->id.u.pid, 1);
 
-	if(!proc_mm || !ptrace_faultinfo){
+	if (!proc_mm || !ptrace_faultinfo) {
 		free_page(mmu->id.stack);
 		pte_lock_deinit(virt_to_page(mmu->last_page_table));
 		pte_free_kernel((pte_t *) mmu->last_page_table);
@@ -155,4 +150,6 @@ void destroy_context_skas(struct mm_struct *mm)
 		pmd_free((pmd_t *) mmu->last_pmd);
 #endif
 	}
+
+	free_ldt(mmu);
 }
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 48051a98525..fce389c2342 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -1,146 +1,26 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/slab.h"
-#include "linux/ptrace.h"
-#include "linux/proc_fs.h"
-#include "linux/file.h"
-#include "linux/errno.h"
 #include "linux/init.h"
-#include "asm/uaccess.h"
-#include "asm/atomic.h"
-#include "kern_util.h"
+#include "linux/sched.h"
 #include "as-layout.h"
-#include "skas.h"
 #include "os.h"
-#include "tlb.h"
-#include "kern.h"
-#include "mode.h"
-#include "registers.h"
-
-void switch_to_skas(void *prev, void *next)
-{
-	struct task_struct *from, *to;
-
-	from = prev;
-	to = next;
-
-	/* XXX need to check runqueues[cpu].idle */
-	if(current->pid == 0)
-		switch_timers(0);
-
-	switch_threads(&from->thread.mode.skas.switch_buf,
-		       &to->thread.mode.skas.switch_buf);
-
-	arch_switch_to_skas(current->thread.prev_sched, current);
-
-	if(current->pid == 0)
-		switch_timers(1);
-}
-
-extern void schedule_tail(struct task_struct *prev);
-
-/* This is called magically, by its address being stuffed in a jmp_buf
- * and being longjmp-d to.
- */
-void new_thread_handler(void)
-{
-	int (*fn)(void *), n;
-	void *arg;
-
-	if(current->thread.prev_sched != NULL)
-		schedule_tail(current->thread.prev_sched);
-	current->thread.prev_sched = NULL;
-
-	fn = current->thread.request.u.thread.proc;
-	arg = current->thread.request.u.thread.arg;
-
-	/* The return value is 1 if the kernel thread execs a process,
-	 * 0 if it just exits
-	 */
-	n = run_kernel_thread(fn, arg, &current->thread.exec_buf);
-	if(n == 1){
-		/* Handle any immediate reschedules or signals */
-		interrupt_end();
-		userspace(&current->thread.regs.regs);
-	}
-	else do_exit(0);
-}
-
-void release_thread_skas(struct task_struct *task)
-{
-}
-
-/* Called magically, see new_thread_handler above */
-void fork_handler(void)
-{
-	force_flush_all();
-	if(current->thread.prev_sched == NULL)
-		panic("blech");
-
-	schedule_tail(current->thread.prev_sched);
-
-	/* XXX: if interrupt_end() calls schedule, this call to
-	 * arch_switch_to_skas isn't needed. We could want to apply this to
-	 * improve performance. -bb */
-	arch_switch_to_skas(current->thread.prev_sched, current);
-
-	current->thread.prev_sched = NULL;
-
-/* Handle any immediate reschedules or signals */
-	interrupt_end();
-
-	userspace(&current->thread.regs.regs);
-}
-
-int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp,
-		     unsigned long stack_top, struct task_struct * p,
-		     struct pt_regs *regs)
-{
-	void (*handler)(void);
-
-	if(current->thread.forking){
-	  	memcpy(&p->thread.regs.regs.skas, &regs->regs.skas,
-		       sizeof(p->thread.regs.regs.skas));
-		REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.skas.regs, 0);
-		if(sp != 0) REGS_SP(p->thread.regs.regs.skas.regs) = sp;
-
-		handler = fork_handler;
-
-		arch_copy_thread(&current->thread.arch, &p->thread.arch);
-	}
-	else {
-		init_thread_registers(&p->thread.regs.regs);
-		p->thread.request.u.thread = current->thread.request.u.thread;
-		handler = new_thread_handler;
-	}
-
-	new_thread(task_stack_page(p), &p->thread.mode.skas.switch_buf,
-		   handler);
-	return(0);
-}
+#include "skas.h"
 
 int new_mm(unsigned long stack)
 {
 	int fd;
 
 	fd = os_open_file("/proc/mm", of_cloexec(of_write(OPENFLAGS())), 0);
-	if(fd < 0)
-		return(fd);
+	if (fd < 0)
+		return fd;
 
-	if(skas_needs_stub)
-		map_stub_pages(fd, CONFIG_STUB_CODE, CONFIG_STUB_DATA, stack);
+	if (skas_needs_stub)
+		map_stub_pages(fd, STUB_CODE, STUB_DATA, stack);
 
-	return(fd);
-}
-
-void init_idle_skas(void)
-{
-	cpu_tasks[current_thread->cpu].pid = os_getpid();
-	default_idle();
+	return fd;
 }
 
 extern void start_kernel(void);
@@ -158,67 +38,32 @@ static int __init start_kernel_proc(void *unused)
 	cpu_online_map = cpumask_of_cpu(0);
 #endif
 	start_kernel();
-	return(0);
+	return 0;
 }
 
 extern int userspace_pid[];
 
 extern char cpu0_irqstack[];
 
-int __init start_uml_skas(void)
+int __init start_uml(void)
 {
 	stack_protections((unsigned long) &cpu0_irqstack);
 	set_sigstack(cpu0_irqstack, THREAD_SIZE);
-	if(proc_mm)
+	if (proc_mm)
 		userspace_pid[0] = start_userspace(0);
 
 	init_new_thread_signals();
 
 	init_task.thread.request.u.thread.proc = start_kernel_proc;
 	init_task.thread.request.u.thread.arg = NULL;
-	return(start_idle_thread(task_stack_page(&init_task),
-				 &init_task.thread.mode.skas.switch_buf));
-}
-
-int external_pid_skas(struct task_struct *task)
-{
-	/* FIXME: Need to look up userspace_pid by cpu */
-	return(userspace_pid[0]);
-}
-
-int thread_pid_skas(struct task_struct *task)
-{
-	/* FIXME: Need to look up userspace_pid by cpu */
-	return(userspace_pid[0]);
-}
-
-void kill_off_processes_skas(void)
-{
-	if(proc_mm)
-		/*
-		 * FIXME: need to loop over userspace_pids in
-		 * kill_off_processes_skas
-		 */
-		os_kill_ptraced_process(userspace_pid[0], 1);
-	else {
-		struct task_struct *p;
-		int pid, me;
-
-		me = os_getpid();
-		for_each_process(p){
-			if(p->mm == NULL)
-				continue;
-
-			pid = p->mm->context.skas.id.u.pid;
-			os_kill_ptraced_process(pid, 1);
-		}
-	}
+	return start_idle_thread(task_stack_page(&init_task),
+				 &init_task.thread.switch_buf);
 }
 
 unsigned long current_stub_stack(void)
 {
-	if(current->mm == NULL)
-		return(0);
+	if (current->mm == NULL)
+		return 0;
 
-	return(current->mm->context.skas.id.stack);
+	return current->mm->context.id.stack;
 }
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index 0ae4eea21be..50b476f2b38 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -1,19 +1,15 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/sys.h"
+#include "linux/kernel.h"
 #include "linux/ptrace.h"
-#include "asm/errno.h"
-#include "asm/unistd.h"
-#include "asm/ptrace.h"
-#include "asm/current.h"
-#include "sysdep/syscalls.h"
 #include "kern_util.h"
-#include "syscall.h"
+#include "sysdep/ptrace.h"
+#include "sysdep/syscalls.h"
 
-void handle_syscall(union uml_pt_regs *r)
+void handle_syscall(struct uml_pt_regs *r)
 {
 	struct pt_regs *regs = container_of(r, struct pt_regs, regs);
 	long result;
@@ -24,7 +20,8 @@ void handle_syscall(union uml_pt_regs *r)
 	current->thread.nsyscalls++;
 	nsyscalls++;
 
-	/* This should go in the declaration of syscall, but when I do that,
+	/*
+	 * This should go in the declaration of syscall, but when I do that,
 	 * strace -f -c bash -c 'ls ; ls' breaks, sometimes not tracing
 	 * children at all, sometimes hanging when bash doesn't see the first
 	 * ls exit.
@@ -33,11 +30,11 @@ void handle_syscall(union uml_pt_regs *r)
 	 * in case it's a compiler bug.
 	 */
 	syscall = UPT_SYSCALL_NR(r);
-	if((syscall >= NR_syscalls) || (syscall < 0))
+	if ((syscall >= NR_syscalls) || (syscall < 0))
 		result = -ENOSYS;
 	else result = EXECUTE_SYSCALL(syscall, regs);
 
-	REGS_SET_SYSCALL_RETURN(r->skas.regs, result);
+	REGS_SET_SYSCALL_RETURN(r->gp, result);
 
 	syscall_trace(r, 1);
 }
diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c
deleted file mode 100644
index c0f0693743b..00000000000
--- a/arch/um/kernel/skas/tlb.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
- */
-
-#include "linux/stddef.h"
-#include "linux/sched.h"
-#include "linux/mm.h"
-#include "asm/page.h"
-#include "asm/pgtable.h"
-#include "asm/mmu.h"
-#include "mem_user.h"
-#include "mem.h"
-#include "skas.h"
-#include "os.h"
-#include "tlb.h"
-
-static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last,
-		  int finished, void **flush)
-{
-	struct host_vm_op *op;
-        int i, ret = 0;
-
-        for(i = 0; i <= last && !ret; i++){
-		op = &ops[i];
-		switch(op->type){
-		case MMAP:
-			ret = map(&mmu->skas.id, op->u.mmap.addr,
-				  op->u.mmap.len, op->u.mmap.prot,
-				  op->u.mmap.fd, op->u.mmap.offset, finished,
-				  flush);
-			break;
-		case MUNMAP:
-			ret = unmap(&mmu->skas.id, op->u.munmap.addr,
-				    op->u.munmap.len, finished, flush);
-			break;
-		case MPROTECT:
-			ret = protect(&mmu->skas.id, op->u.mprotect.addr,
-				      op->u.mprotect.len, op->u.mprotect.prot,
-				      finished, flush);
-			break;
-		default:
-			printk("Unknown op type %d in do_ops\n", op->type);
-			break;
-		}
-	}
-
-	return ret;
-}
-
-extern int proc_mm;
-
-static void fix_range(struct mm_struct *mm, unsigned long start_addr,
-		      unsigned long end_addr, int force)
-{
-        if(!proc_mm && (end_addr > CONFIG_STUB_START))
-                end_addr = CONFIG_STUB_START;
-
-        fix_range_common(mm, start_addr, end_addr, force, do_ops);
-}
-
-void __flush_tlb_one_skas(unsigned long addr)
-{
-        flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
-}
-
-void flush_tlb_range_skas(struct vm_area_struct *vma, unsigned long start, 
-		     unsigned long end)
-{
-        if(vma->vm_mm == NULL)
-                flush_tlb_kernel_range_common(start, end);
-        else fix_range(vma->vm_mm, start, end, 0);
-}
-
-void flush_tlb_mm_skas(struct mm_struct *mm)
-{
-	unsigned long end;
-
-	/* Don't bother flushing if this address space is about to be
-         * destroyed.
-         */
-        if(atomic_read(&mm->mm_users) == 0)
-                return;
-
-	end = proc_mm ? task_size : CONFIG_STUB_START;
-        fix_range(mm, 0, end, 0);
-}
-
-void force_flush_all_skas(void)
-{
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma = mm->mmap;
-
-	while(vma != NULL) {
-		fix_range(mm, vma->vm_start, vma->vm_end, 1);
-		vma = vma->vm_next;
-	}
-}
-
-void flush_tlb_page_skas(struct vm_area_struct *vma, unsigned long address)
-{
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	struct mm_struct *mm = vma->vm_mm;
-	void *flush = NULL;
-	int r, w, x, prot, err = 0;
-	struct mm_id *mm_id;
-
-	pgd = pgd_offset(mm, address);
-	if(!pgd_present(*pgd))
-		goto kill;
-
-	pud = pud_offset(pgd, address);
-	if(!pud_present(*pud))
-		goto kill;
-
-	pmd = pmd_offset(pud, address);
-	if(!pmd_present(*pmd))
-		goto kill;
-
-	pte = pte_offset_kernel(pmd, address);
-
-	r = pte_read(*pte);
-	w = pte_write(*pte);
-	x = pte_exec(*pte);
-	if (!pte_young(*pte)) {
-		r = 0;
-		w = 0;
-	} else if (!pte_dirty(*pte)) {
-		w = 0;
-	}
-
-	mm_id = &mm->context.skas.id;
-	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
-		(x ? UM_PROT_EXEC : 0));
-	if(pte_newpage(*pte)){
-		if(pte_present(*pte)){
-			unsigned long long offset;
-			int fd;
-
-			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
-			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
-				  1, &flush);
-		}
-		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
-	}
-	else if(pte_newprot(*pte))
-		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
-
-	if(err)
-		goto kill;
-
-	*pte = pte_mkuptodate(*pte);
-
-	return;
-
-kill:
-	printk("Failed to flush page for address 0x%lx\n", address);
-	force_sig(SIGKILL, current);
-}
-
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 8912cec0fe4..1d8b119f2d0 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -1,18 +1,14 @@
 /*
- * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/compiler.h"
-#include "linux/stddef.h"
-#include "linux/kernel.h"
-#include "linux/string.h"
-#include "linux/fs.h"
-#include "linux/hardirq.h"
+#include "linux/err.h"
 #include "linux/highmem.h"
+#include "linux/mm.h"
+#include "asm/current.h"
 #include "asm/page.h"
 #include "asm/pgtable.h"
-#include "asm/uaccess.h"
 #include "kern_util.h"
 #include "os.h"
 
@@ -27,16 +23,16 @@ static unsigned long maybe_map(unsigned long virt, int is_write)
 	void *phys = um_virt_to_phys(current, virt, &pte);
 	int dummy_code;
 
-	if(IS_ERR(phys) || (is_write && !pte_write(pte))){
+	if (IS_ERR(phys) || (is_write && !pte_write(pte))) {
 		err = handle_page_fault(virt, 0, is_write, 1, &dummy_code);
-		if(err)
-			return(-1UL);
+		if (err)
+			return -1UL;
 		phys = um_virt_to_phys(current, virt, NULL);
 	}
-        if(IS_ERR(phys))
-                phys = (void *) -1;
+	if (IS_ERR(phys))
+		phys = (void *) -1;
 
-	return((unsigned long) phys);
+	return (unsigned long) phys;
 }
 
 static int do_op_one_page(unsigned long addr, int len, int is_write,
@@ -46,17 +42,18 @@ static int do_op_one_page(unsigned long addr, int len, int is_write,
 	int n;
 
 	addr = maybe_map(addr, is_write);
-	if(addr == -1UL)
-		return(-1);
+	if (addr == -1UL)
+		return -1;
 
 	page = phys_to_page(addr);
-	addr = (unsigned long) kmap_atomic(page, KM_UML_USERCOPY) + (addr & ~PAGE_MASK);
+	addr = (unsigned long) kmap_atomic(page, KM_UML_USERCOPY) +
+		(addr & ~PAGE_MASK);
 
 	n = (*op)(addr, len, arg);
 
 	kunmap_atomic(page, KM_UML_USERCOPY);
 
-	return(n);
+	return n;
 }
 
 static void do_buffer_op(void *jmpbuf, void *arg_ptr)
@@ -81,21 +78,21 @@ static void do_buffer_op(void *jmpbuf, void *arg_ptr)
 
 	current->thread.fault_catcher = jmpbuf;
 	n = do_op_one_page(addr, size, is_write, op, arg);
-	if(n != 0){
+	if (n != 0) {
 		*res = (n < 0 ? remain : 0);
 		goto out;
 	}
 
 	addr += size;
 	remain -= size;
-	if(remain == 0){
+	if (remain == 0) {
 		*res = 0;
 		goto out;
 	}
 
-	while(addr < ((addr + remain) & PAGE_MASK)){
+	while(addr < ((addr + remain) & PAGE_MASK)) {
 		n = do_op_one_page(addr, PAGE_SIZE, is_write, op, arg);
-		if(n != 0){
+		if (n != 0) {
 			*res = (n < 0 ? remain : 0);
 			goto out;
 		}
@@ -103,13 +100,13 @@ static void do_buffer_op(void *jmpbuf, void *arg_ptr)
 		addr += PAGE_SIZE;
 		remain -= PAGE_SIZE;
 	}
-	if(remain == 0){
+	if (remain == 0) {
 		*res = 0;
 		goto out;
 	}
 
 	n = do_op_one_page(addr, remain, is_write, op, arg);
-	if(n != 0)
+	if (n != 0)
 		*res = (n < 0 ? remain : 0);
 	else *res = 0;
  out:
@@ -124,10 +121,10 @@ static int buffer_op(unsigned long addr, int len, int is_write,
 
 	faulted = setjmp_wrapper(do_buffer_op, addr, len, is_write, op, arg,
 				 &res);
-	if(!faulted)
-		return(res);
+	if (!faulted)
+		return res;
 
-	return(addr + len - (unsigned long) current->thread.fault_addr);
+	return addr + len - (unsigned long) current->thread.fault_addr;
 }
 
 static int copy_chunk_from_user(unsigned long from, int len, void *arg)
@@ -136,19 +133,19 @@ static int copy_chunk_from_user(unsigned long from, int len, void *arg)
 
 	memcpy((void *) to, (void *) from, len);
 	*to_ptr += len;
-	return(0);
+	return 0;
 }
 
-int copy_from_user_skas(void *to, const void __user *from, int n)
+int copy_from_user(void *to, const void __user *from, int n)
 {
-	if(segment_eq(get_fs(), KERNEL_DS)){
+	if (segment_eq(get_fs(), KERNEL_DS)) {
 		memcpy(to, (__force void*)from, n);
-		return(0);
+		return 0;
 	}
 
-	return(access_ok(VERIFY_READ, from, n) ?
+	return access_ok(VERIFY_READ, from, n) ?
 	       buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
-	       n);
+	       n;
 }
 
 static int copy_chunk_to_user(unsigned long to, int len, void *arg)
@@ -157,19 +154,19 @@ static int copy_chunk_to_user(unsigned long to, int len, void *arg)
 
 	memcpy((void *) to, (void *) from, len);
 	*from_ptr += len;
-	return(0);
+	return 0;
 }
 
-int copy_to_user_skas(void __user *to, const void *from, int n)
+int copy_to_user(void __user *to, const void *from, int n)
 {
-	if(segment_eq(get_fs(), KERNEL_DS)){
-		memcpy((__force void*)to, from, n);
-		return(0);
+	if (segment_eq(get_fs(), KERNEL_DS)) {
+		memcpy((__force void *) to, from, n);
+		return 0;
 	}
 
-	return(access_ok(VERIFY_WRITE, to, n) ?
+	return access_ok(VERIFY_WRITE, to, n) ?
 	       buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
-	       n);
+	       n;
 }
 
 static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
@@ -181,51 +178,51 @@ static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
 	n = strnlen(to, len);
 	*to_ptr += n;
 
-	if(n < len)
-	        return(1);
-	return(0);
+	if (n < len)
+	        return 1;
+	return 0;
 }
 
-int strncpy_from_user_skas(char *dst, const char __user *src, int count)
+int strncpy_from_user(char *dst, const char __user *src, int count)
 {
 	int n;
 	char *ptr = dst;
 
-	if(segment_eq(get_fs(), KERNEL_DS)){
-		strncpy(dst, (__force void*)src, count);
-		return(strnlen(dst, count));
+	if (segment_eq(get_fs(), KERNEL_DS)) {
+		strncpy(dst, (__force void *) src, count);
+		return strnlen(dst, count);
 	}
 
-	if(!access_ok(VERIFY_READ, src, 1))
-		return(-EFAULT);
+	if (!access_ok(VERIFY_READ, src, 1))
+		return -EFAULT;
 
 	n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user,
 		      &ptr);
-	if(n != 0)
-		return(-EFAULT);
-	return(strnlen(dst, count));
+	if (n != 0)
+		return -EFAULT;
+	return strnlen(dst, count);
 }
 
 static int clear_chunk(unsigned long addr, int len, void *unused)
 {
 	memset((void *) addr, 0, len);
-	return(0);
+	return 0;
 }
 
-int __clear_user_skas(void __user *mem, int len)
+int __clear_user(void __user *mem, int len)
 {
-	return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL));
+	return buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL);
 }
 
-int clear_user_skas(void __user *mem, int len)
+int clear_user(void __user *mem, int len)
 {
-	if(segment_eq(get_fs(), KERNEL_DS)){
+	if (segment_eq(get_fs(), KERNEL_DS)) {
 		memset((__force void*)mem, 0, len);
-		return(0);
+		return 0;
 	}
 
-	return(access_ok(VERIFY_WRITE, mem, len) ?
-	       buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len);
+	return access_ok(VERIFY_WRITE, mem, len) ?
+	       buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len;
 }
 
 static int strnlen_chunk(unsigned long str, int len, void *arg)
@@ -235,31 +232,20 @@ static int strnlen_chunk(unsigned long str, int len, void *arg)
 	n = strnlen((void *) str, len);
 	*len_ptr += n;
 
-	if(n < len)
-		return(1);
-	return(0);
+	if (n < len)
+		return 1;
+	return 0;
 }
 
-int strnlen_user_skas(const void __user *str, int len)
+int strnlen_user(const void __user *str, int len)
 {
 	int count = 0, n;
 
-	if(segment_eq(get_fs(), KERNEL_DS))
-		return(strnlen((__force char*)str, len) + 1);
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return strnlen((__force char*)str, len) + 1;
 
 	n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
-	if(n == 0)
-		return(count + 1);
-	return(-EFAULT);
+	if (n == 0)
+		return count + 1;
+	return -EFAULT;
 }
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index e6a7778006a..36d89cf8d20 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -56,12 +56,12 @@ void smp_send_stop(void)
 	int i;
 
 	printk(KERN_INFO "Stopping all CPUs...");
-	for(i = 0; i < num_online_cpus(); i++){
-		if(i == current_thread->cpu)
+	for (i = 0; i < num_online_cpus(); i++) {
+		if (i == current_thread->cpu)
 			continue;
 		os_write_file(cpu_data[i].ipi_pipe[1], "S", 1);
 	}
-	printk("done\n");
+	printk(KERN_INFO "done\n");
 }
 
 static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
@@ -72,7 +72,7 @@ static int idle_proc(void *cpup)
 	int cpu = (int) cpup, err;
 
 	err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1);
-	if(err < 0)
+	if (err < 0)
 		panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
 
 	os_set_fd_async(cpu_data[cpu].ipi_pipe[0],
@@ -80,7 +80,7 @@ static int idle_proc(void *cpup)
 
 	wmb();
 	if (cpu_test_and_set(cpu, cpu_callin_map)) {
-		printk("huh, CPU#%d already present??\n", cpu);
+		printk(KERN_ERR "huh, CPU#%d already present??\n", cpu);
 		BUG();
 	}
 
@@ -95,12 +95,11 @@ static int idle_proc(void *cpup)
 static struct task_struct *idle_thread(int cpu)
 {
 	struct task_struct *new_task;
-	unsigned char c;
 
 	current->thread.request.u.thread.proc = idle_proc;
 	current->thread.request.u.thread.arg = (void *) cpu;
 	new_task = fork_idle(cpu);
-	if(IS_ERR(new_task))
+	if (IS_ERR(new_task))
 		panic("copy_process failed in idle_thread, error = %ld",
 		      PTR_ERR(new_task));
 
@@ -108,9 +107,7 @@ static struct task_struct *idle_thread(int cpu)
 		          { .pid = 	new_task->thread.mode.tt.extern_pid,
 			    .task = 	new_task } );
 	idle_threads[cpu] = new_task;
-	CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c,
-				  sizeof(c)),
-		    ({ panic("skas mode doesn't support SMP"); }));
+	panic("skas mode doesn't support SMP");
 	return new_task;
 }
 
@@ -129,14 +126,14 @@ void smp_prepare_cpus(unsigned int maxcpus)
 	cpu_set(me, cpu_callin_map);
 
 	err = os_pipe(cpu_data[me].ipi_pipe, 1, 1);
-	if(err < 0)
+	if (err < 0)
 		panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
 
 	os_set_fd_async(cpu_data[me].ipi_pipe[0],
 		     current->thread.mode.tt.extern_pid);
 
-	for(cpu = 1; cpu < ncpus; cpu++){
-		printk("Booting processor %d...\n", cpu);
+	for (cpu = 1; cpu < ncpus; cpu++) {
+		printk(KERN_INFO "Booting processor %d...\n", cpu);
 
 		idle = idle_thread(cpu);
 
@@ -147,8 +144,8 @@ void smp_prepare_cpus(unsigned int maxcpus)
 			cpu_relax();
 
 		if (cpu_isset(cpu, cpu_callin_map))
-			printk("done\n");
-		else printk("failed\n");
+			printk(KERN_INFO "done\n");
+		else printk(KERN_INFO "failed\n");
 	}
 }
 
@@ -190,13 +187,14 @@ void IPI_handler(int cpu)
 			break;
 
 		case 'S':
-			printk("CPU#%d stopping\n", cpu);
-			while(1)
+			printk(KERN_INFO "CPU#%d stopping\n", cpu);
+			while (1)
 				pause();
 			break;
 
 		default:
-			printk("CPU#%d received unknown IPI [%c]!\n", cpu, c);
+			printk(KERN_ERR "CPU#%d received unknown IPI [%c]!\n",
+			       cpu, c);
 			break;
 		}
 	}
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index 7b3b67333ff..b9d92b2089a 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -1,27 +1,17 @@
 /*
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
 #include "linux/file.h"
-#include "linux/smp_lock.h"
-#include "linux/mm.h"
 #include "linux/fs.h"
+#include "linux/mm.h"
+#include "linux/sched.h"
 #include "linux/utsname.h"
-#include "linux/msg.h"
-#include "linux/shm.h"
-#include "linux/sys.h"
-#include "linux/syscalls.h"
-#include "linux/unistd.h"
-#include "linux/slab.h"
-#include "linux/utime.h"
+#include "asm/current.h"
 #include "asm/mman.h"
 #include "asm/uaccess.h"
-#include "kern_util.h"
-#include "sysdep/syscalls.h"
-#include "mode_kern.h"
-#include "choose-mode.h"
+#include "asm/unistd.h"
 
 /*  Unlocked, I don't care if this is a bit off */
 int nsyscalls = 0;
@@ -34,7 +24,7 @@ long sys_fork(void)
 	ret = do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
 	current->thread.forking = 0;
-	return(ret);
+	return ret;
 }
 
 long sys_vfork(void)
@@ -46,7 +36,7 @@ long sys_vfork(void)
 		      UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
 	current->thread.forking = 0;
-	return(ret);
+	return ret;
 }
 
 /* common code for old and new mmaps */
@@ -92,15 +82,15 @@ long old_mmap(unsigned long addr, unsigned long len,
  */
 long sys_pipe(unsigned long __user * fildes)
 {
-        int fd[2];
-        long error;
+	int fd[2];
+	long error;
 
-        error = do_pipe(fd);
-        if (!error) {
+	error = do_pipe(fd);
+	if (!error) {
 		if (copy_to_user(fildes, fd, sizeof(fd)))
-                        error = -EFAULT;
-        }
-        return error;
+			error = -EFAULT;
+	}
+	return error;
 }
 
 
@@ -124,7 +114,7 @@ long sys_olduname(struct oldold_utsname __user * name)
 	if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
 		return -EFAULT;
 
-  	down_read(&uts_sem);
+	down_read(&uts_sem);
 
 	error = __copy_to_user(&name->sysname, &utsname()->sysname,
 			       __OLD_UTS_LEN);
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 259c49da7ff..1ac746a9eae 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -1,189 +1,126 @@
 /*
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/module.h"
-#include "linux/unistd.h"
-#include "linux/stddef.h"
-#include "linux/spinlock.h"
-#include "linux/time.h"
-#include "linux/sched.h"
+#include "linux/clockchips.h"
 #include "linux/interrupt.h"
-#include "linux/init.h"
-#include "linux/delay.h"
-#include "linux/hrtimer.h"
+#include "linux/jiffies.h"
+#include "linux/threads.h"
 #include "asm/irq.h"
 #include "asm/param.h"
-#include "asm/current.h"
 #include "kern_util.h"
-#include "mode.h"
 #include "os.h"
 
-int hz(void)
-{
-	return(HZ);
-}
-
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
 unsigned long long sched_clock(void)
 {
-	return (unsigned long long)jiffies_64 * (1000000000 / HZ);
+	return (unsigned long long)jiffies_64 * (NSEC_PER_SEC / HZ);
 }
 
-#ifdef CONFIG_UML_REAL_TIME_CLOCK
-static unsigned long long prev_nsecs[NR_CPUS];
-static long long delta[NR_CPUS];		/* Deviation per interval */
-#endif
+void timer_handler(int sig, struct uml_pt_regs *regs)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	do_IRQ(TIMER_IRQ, regs);
+	local_irq_restore(flags);
+}
 
-void timer_irq(union uml_pt_regs *regs)
+static void itimer_set_mode(enum clock_event_mode mode,
+			    struct clock_event_device *evt)
 {
-	unsigned long long ticks = 0;
-#ifdef CONFIG_UML_REAL_TIME_CLOCK
-	int c = cpu();
-	if(prev_nsecs[c]){
-		/* We've had 1 tick */
-		unsigned long long nsecs = os_nsecs();
-
-		delta[c] += nsecs - prev_nsecs[c];
-		prev_nsecs[c] = nsecs;
-
-		/* Protect against the host clock being set backwards */
-		if(delta[c] < 0)
-			delta[c] = 0;
-
-		ticks += (delta[c] * HZ) / BILLION;
-		delta[c] -= (ticks * BILLION) / HZ;
-	}
-	else prev_nsecs[c] = os_nsecs();
-#else
-	ticks = 1;
-#endif
-	while(ticks > 0){
-		do_IRQ(TIMER_IRQ, regs);
-		ticks--;
+	switch(mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		set_interval();
+		break;
+
+	case CLOCK_EVT_MODE_SHUTDOWN:
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_ONESHOT:
+		disable_timer();
+		break;
+
+	case CLOCK_EVT_MODE_RESUME:
+		break;
 	}
 }
 
-/* Protects local_offset */
-static DEFINE_SPINLOCK(timer_spinlock);
-static unsigned long long local_offset = 0;
-
-static inline unsigned long long get_time(void)
+static int itimer_next_event(unsigned long delta,
+			     struct clock_event_device *evt)
 {
-	unsigned long long nsecs;
-	unsigned long flags;
-
-	spin_lock_irqsave(&timer_spinlock, flags);
-	nsecs = os_nsecs();
-	nsecs += local_offset;
-	spin_unlock_irqrestore(&timer_spinlock, flags);
-
-	return nsecs;
+	return timer_one_shot(delta + 1);
 }
 
-irqreturn_t um_timer(int irq, void *dev)
+static struct clock_event_device itimer_clockevent = {
+	.name		= "itimer",
+	.rating		= 250,
+	.cpumask	= CPU_MASK_ALL,
+	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+	.set_mode	= itimer_set_mode,
+	.set_next_event = itimer_next_event,
+	.shift		= 32,
+	.irq		= 0,
+};
+
+static irqreturn_t um_timer(int irq, void *dev)
 {
-	unsigned long long nsecs;
-	unsigned long flags;
-
-	write_seqlock_irqsave(&xtime_lock, flags);
-
-	do_timer(1);
-
-#ifdef CONFIG_UML_REAL_TIME_CLOCK
-	nsecs = get_time();
-#else
-	nsecs = (unsigned long long) xtime.tv_sec * BILLION + xtime.tv_nsec +
-		BILLION / HZ;
-#endif
-	xtime.tv_sec = nsecs / NSEC_PER_SEC;
-	xtime.tv_nsec = nsecs - xtime.tv_sec * NSEC_PER_SEC;
-
-	write_sequnlock_irqrestore(&xtime_lock, flags);
+	(*itimer_clockevent.event_handler)(&itimer_clockevent);
 
 	return IRQ_HANDLED;
 }
 
-static void register_timer(void)
+static cycle_t itimer_read(void)
+{
+	return os_nsecs();
+}
+
+static struct clocksource itimer_clocksource = {
+	.name		= "itimer",
+	.rating		= 300,
+	.read		= itimer_read,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.mult		= 1,
+	.shift		= 0,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static void __init setup_itimer(void)
 {
 	int err;
 
 	err = request_irq(TIMER_IRQ, um_timer, IRQF_DISABLED, "timer", NULL);
-	if(err != 0)
+	if (err != 0)
 		printk(KERN_ERR "register_timer : request_irq failed - "
 		       "errno = %d\n", -err);
 
-	err = set_interval(1);
-	if(err != 0)
-		printk(KERN_ERR "register_timer : set_interval failed - "
-		       "errno = %d\n", -err);
+	itimer_clockevent.mult = div_sc(HZ, NSEC_PER_SEC, 32);
+	itimer_clockevent.max_delta_ns =
+		clockevent_delta2ns(60 * HZ, &itimer_clockevent);
+	itimer_clockevent.min_delta_ns =
+		clockevent_delta2ns(1, &itimer_clockevent);
+	err = clocksource_register(&itimer_clocksource);
+	if (err) {
+		printk(KERN_ERR "clocksource_register returned %d\n", err);
+		return;
+	}
+	clockevents_register_device(&itimer_clockevent);
 }
 
 extern void (*late_time_init)(void);
 
-void time_init(void)
+void __init time_init(void)
 {
 	long long nsecs;
 
-	nsecs = os_nsecs();
-	set_normalized_timespec(&wall_to_monotonic, -nsecs / BILLION,
-				-nsecs % BILLION);
-	set_normalized_timespec(&xtime, nsecs / BILLION, nsecs % BILLION);
-	late_time_init = register_timer;
-}
-
-void do_gettimeofday(struct timeval *tv)
-{
-#ifdef CONFIG_UML_REAL_TIME_CLOCK
-	unsigned long long nsecs = get_time();
-#else
-	unsigned long long nsecs = (unsigned long long) xtime.tv_sec * BILLION +
-		xtime.tv_nsec;
-#endif
-	tv->tv_sec = nsecs / NSEC_PER_SEC;
-	/* Careful about calculations here - this was originally done as
-	 * (nsecs - tv->tv_sec * NSEC_PER_SEC) / NSEC_PER_USEC
-	 * which gave bogus (> 1000000) values.  Dunno why, suspect gcc
-	 * (4.0.0) miscompiled it, or there's a subtle 64/32-bit conversion
-	 * problem that I missed.
-	 */
-	nsecs -= tv->tv_sec * NSEC_PER_SEC;
-	tv->tv_usec = (unsigned long) nsecs / NSEC_PER_USEC;
-}
-
-static inline void set_time(unsigned long long nsecs)
-{
-	unsigned long long now;
-	unsigned long flags;
-
-	spin_lock_irqsave(&timer_spinlock, flags);
-	now = os_nsecs();
-	local_offset = nsecs - now;
-	spin_unlock_irqrestore(&timer_spinlock, flags);
-
-	clock_was_set();
-}
-
-int do_settimeofday(struct timespec *tv)
-{
-	set_time((unsigned long long) tv->tv_sec * NSEC_PER_SEC + tv->tv_nsec);
-
-	return 0;
-}
+	timer_init();
 
-void timer_handler(int sig, union uml_pt_regs *regs)
-{
-	if(current_thread->cpu == 0)
-		timer_irq(regs);
-	local_irq_disable();
-	irq_enter();
-	update_process_times(CHOOSE_MODE(
-	                     (UPT_SC(regs) && user_context(UPT_SP(regs))),
-			     (regs)->skas.is_user));
-	irq_exit();
-	local_irq_enable();
+	nsecs = os_nsecs();
+	set_normalized_timespec(&wall_to_monotonic, -nsecs / NSEC_PER_SEC,
+				-nsecs % NSEC_PER_SEC);
+	set_normalized_timespec(&xtime, nsecs / NSEC_PER_SEC,
+				nsecs % NSEC_PER_SEC);
+	late_time_init = setup_itimer;
 }
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 8a8d5285144..f4a0e407eee 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -1,130 +1,182 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include "linux/mm.h"
-#include "asm/page.h"
-#include "asm/pgalloc.h"
 #include "asm/pgtable.h"
 #include "asm/tlbflush.h"
-#include "choose-mode.h"
-#include "mode_kern.h"
 #include "as-layout.h"
-#include "tlb.h"
-#include "mem.h"
 #include "mem_user.h"
 #include "os.h"
+#include "skas.h"
+#include "tlb.h"
+
+struct host_vm_change {
+	struct host_vm_op {
+		enum { NONE, MMAP, MUNMAP, MPROTECT } type;
+		union {
+			struct {
+				unsigned long addr;
+				unsigned long len;
+				unsigned int prot;
+				int fd;
+				__u64 offset;
+			} mmap;
+			struct {
+				unsigned long addr;
+				unsigned long len;
+			} munmap;
+			struct {
+				unsigned long addr;
+				unsigned long len;
+				unsigned int prot;
+			} mprotect;
+		} u;
+	} ops[1];
+	int index;
+	struct mm_id *id;
+	void *data;
+	int force;
+};
+
+#define INIT_HVC(mm, force) \
+	((struct host_vm_change) \
+	 { .ops		= { { .type = NONE } },	\
+	   .id		= &mm->context.id, \
+       	   .data	= NULL, \
+	   .index	= 0, \
+	   .force	= force })
+
+static int do_ops(struct host_vm_change *hvc, int end,
+		  int finished)
+{
+	struct host_vm_op *op;
+	int i, ret = 0;
+
+	for (i = 0; i < end && !ret; i++) {
+		op = &hvc->ops[i];
+		switch(op->type) {
+		case MMAP:
+			ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
+				  op->u.mmap.prot, op->u.mmap.fd,
+				  op->u.mmap.offset, finished, &hvc->data);
+			break;
+		case MUNMAP:
+			ret = unmap(hvc->id, op->u.munmap.addr,
+				    op->u.munmap.len, finished, &hvc->data);
+			break;
+		case MPROTECT:
+			ret = protect(hvc->id, op->u.mprotect.addr,
+				      op->u.mprotect.len, op->u.mprotect.prot,
+				      finished, &hvc->data);
+			break;
+		default:
+			printk(KERN_ERR "Unknown op type %d in do_ops\n",
+			       op->type);
+			break;
+		}
+	}
+
+	return ret;
+}
 
 static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
-		    unsigned int prot, struct host_vm_op *ops, int *index,
-		    int last_filled, union mm_context *mmu, void **flush,
-		    int (*do_ops)(union mm_context *, struct host_vm_op *,
-				  int, int, void **))
+		    unsigned int prot, struct host_vm_change *hvc)
 {
 	__u64 offset;
 	struct host_vm_op *last;
 	int fd, ret = 0;
 
 	fd = phys_mapping(phys, &offset);
-	if(*index != -1){
-		last = &ops[*index];
-		if((last->type == MMAP) &&
+	if (hvc->index != 0) {
+		last = &hvc->ops[hvc->index - 1];
+		if ((last->type == MMAP) &&
 		   (last->u.mmap.addr + last->u.mmap.len == virt) &&
 		   (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
-		   (last->u.mmap.offset + last->u.mmap.len == offset)){
+		   (last->u.mmap.offset + last->u.mmap.len == offset)) {
 			last->u.mmap.len += len;
 			return 0;
 		}
 	}
 
-	if(*index == last_filled){
-		ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
-		*index = -1;
+	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
+		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
+		hvc->index = 0;
 	}
 
-	ops[++*index] = ((struct host_vm_op) { .type	= MMAP,
-			     			.u = { .mmap = {
-						       .addr	= virt,
-						       .len	= len,
-						       .prot	= prot,
-						       .fd	= fd,
-						       .offset	= offset }
+	hvc->ops[hvc->index++] = ((struct host_vm_op)
+				  { .type	= MMAP,
+				    .u = { .mmap = { .addr	= virt,
+						     .len	= len,
+						     .prot	= prot,
+						     .fd	= fd,
+						     .offset	= offset }
 			   } });
 	return ret;
 }
 
 static int add_munmap(unsigned long addr, unsigned long len,
-		      struct host_vm_op *ops, int *index, int last_filled,
-		      union mm_context *mmu, void **flush,
-		      int (*do_ops)(union mm_context *, struct host_vm_op *,
-				    int, int, void **))
+		      struct host_vm_change *hvc)
 {
 	struct host_vm_op *last;
 	int ret = 0;
 
-	if(*index != -1){
-		last = &ops[*index];
-		if((last->type == MUNMAP) &&
-		   (last->u.munmap.addr + last->u.mmap.len == addr)){
+	if (hvc->index != 0) {
+		last = &hvc->ops[hvc->index - 1];
+		if ((last->type == MUNMAP) &&
+		   (last->u.munmap.addr + last->u.mmap.len == addr)) {
 			last->u.munmap.len += len;
 			return 0;
 		}
 	}
 
-	if(*index == last_filled){
-		ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
-		*index = -1;
+	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
+		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
+		hvc->index = 0;
 	}
 
-	ops[++*index] = ((struct host_vm_op) { .type	= MUNMAP,
-			     		       .u = { .munmap = {
-						        .addr	= addr,
-							.len	= len } } });
+	hvc->ops[hvc->index++] = ((struct host_vm_op)
+				  { .type	= MUNMAP,
+			     	    .u = { .munmap = { .addr	= addr,
+						       .len	= len } } });
 	return ret;
 }
 
 static int add_mprotect(unsigned long addr, unsigned long len,
-			unsigned int prot, struct host_vm_op *ops, int *index,
-			int last_filled, union mm_context *mmu, void **flush,
-			int (*do_ops)(union mm_context *, struct host_vm_op *,
-				      int, int, void **))
+			unsigned int prot, struct host_vm_change *hvc)
 {
 	struct host_vm_op *last;
 	int ret = 0;
 
-	if(*index != -1){
-		last = &ops[*index];
-		if((last->type == MPROTECT) &&
+	if (hvc->index != 0) {
+		last = &hvc->ops[hvc->index - 1];
+		if ((last->type == MPROTECT) &&
 		   (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
-		   (last->u.mprotect.prot == prot)){
+		   (last->u.mprotect.prot == prot)) {
 			last->u.mprotect.len += len;
 			return 0;
 		}
 	}
 
-	if(*index == last_filled){
-		ret = (*do_ops)(mmu, ops, last_filled, 0, flush);
-		*index = -1;
+	if (hvc->index == ARRAY_SIZE(hvc->ops)) {
+		ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
+		hvc->index = 0;
 	}
 
-	ops[++*index] = ((struct host_vm_op) { .type	= MPROTECT,
-			     		       .u = { .mprotect = {
-						       .addr	= addr,
-						       .len	= len,
-						       .prot	= prot } } });
+	hvc->ops[hvc->index++] = ((struct host_vm_op)
+				  { .type	= MPROTECT,
+			     	    .u = { .mprotect = { .addr	= addr,
+							 .len	= len,
+							 .prot	= prot } } });
 	return ret;
 }
 
 #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
 
 static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
-				   unsigned long end, struct host_vm_op *ops,
-				   int last_op, int *op_index, int force,
-				   union mm_context *mmu, void **flush,
-				   int (*do_ops)(union mm_context *,
-						 struct host_vm_op *, int, int,
-						 void **))
+				   unsigned long end,
+				   struct host_vm_change *hvc)
 {
 	pte_t *pte;
 	int r, w, x, prot, ret = 0;
@@ -142,29 +194,22 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
 		}
 		prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
 			(x ? UM_PROT_EXEC : 0));
-		if(force || pte_newpage(*pte)){
-			if(pte_present(*pte))
+		if (hvc->force || pte_newpage(*pte)) {
+			if (pte_present(*pte))
 				ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
-					       PAGE_SIZE, prot, ops, op_index,
-					       last_op, mmu, flush, do_ops);
-			else ret = add_munmap(addr, PAGE_SIZE, ops, op_index,
-					      last_op, mmu, flush, do_ops);
+					       PAGE_SIZE, prot, hvc);
+			else ret = add_munmap(addr, PAGE_SIZE, hvc);
 		}
-		else if(pte_newprot(*pte))
-			ret = add_mprotect(addr, PAGE_SIZE, prot, ops, op_index,
-					   last_op, mmu, flush, do_ops);
+		else if (pte_newprot(*pte))
+			ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
 		*pte = pte_mkuptodate(*pte);
 	} while (pte++, addr += PAGE_SIZE, ((addr != end) && !ret));
 	return ret;
 }
 
 static inline int update_pmd_range(pud_t *pud, unsigned long addr,
-				   unsigned long end, struct host_vm_op *ops,
-				   int last_op, int *op_index, int force,
-				   union mm_context *mmu, void **flush,
-				   int (*do_ops)(union mm_context *,
-						 struct host_vm_op *, int, int,
-						 void **))
+				   unsigned long end,
+				   struct host_vm_change *hvc)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -173,28 +218,20 @@ static inline int update_pmd_range(pud_t *pud, unsigned long addr,
 	pmd = pmd_offset(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
-		if(!pmd_present(*pmd)){
-			if(force || pmd_newpage(*pmd)){
-				ret = add_munmap(addr, next - addr, ops,
-						 op_index, last_op, mmu,
-						 flush, do_ops);
+		if (!pmd_present(*pmd)) {
+			if (hvc->force || pmd_newpage(*pmd)) {
+				ret = add_munmap(addr, next - addr, hvc);
 				pmd_mkuptodate(*pmd);
 			}
 		}
-		else ret = update_pte_range(pmd, addr, next, ops, last_op,
-					    op_index, force, mmu, flush,
-					    do_ops);
+		else ret = update_pte_range(pmd, addr, next, hvc);
 	} while (pmd++, addr = next, ((addr != end) && !ret));
 	return ret;
 }
 
 static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
-				   unsigned long end, struct host_vm_op *ops,
-				   int last_op, int *op_index, int force,
-				   union mm_context *mmu, void **flush,
-				   int (*do_ops)(union mm_context *,
-						 struct host_vm_op *, int, int,
-						 void **))
+				   unsigned long end,
+				   struct host_vm_change *hvc)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -203,56 +240,45 @@ static inline int update_pud_range(pgd_t *pgd, unsigned long addr,
 	pud = pud_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
-		if(!pud_present(*pud)){
-			if(force || pud_newpage(*pud)){
-				ret = add_munmap(addr, next - addr, ops,
-						 op_index, last_op, mmu,
-						 flush, do_ops);
+		if (!pud_present(*pud)) {
+			if (hvc->force || pud_newpage(*pud)) {
+				ret = add_munmap(addr, next - addr, hvc);
 				pud_mkuptodate(*pud);
 			}
 		}
-		else ret = update_pmd_range(pud, addr, next, ops, last_op,
-					    op_index, force, mmu, flush,
-					    do_ops);
+		else ret = update_pmd_range(pud, addr, next, hvc);
 	} while (pud++, addr = next, ((addr != end) && !ret));
 	return ret;
 }
 
 void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
-		      unsigned long end_addr, int force,
-		      int (*do_ops)(union mm_context *, struct host_vm_op *,
-				    int, int, void **))
+		      unsigned long end_addr, int force)
 {
 	pgd_t *pgd;
-	union mm_context *mmu = &mm->context;
-	struct host_vm_op ops[1];
+	struct host_vm_change hvc;
 	unsigned long addr = start_addr, next;
-	int ret = 0, last_op = ARRAY_SIZE(ops) - 1, op_index = -1;
-	void *flush = NULL;
+	int ret = 0;
 
-	ops[0].type = NONE;
+	hvc = INIT_HVC(mm, force);
 	pgd = pgd_offset(mm, addr);
 	do {
 		next = pgd_addr_end(addr, end_addr);
-		if(!pgd_present(*pgd)){
-			if (force || pgd_newpage(*pgd)){
-				ret = add_munmap(addr, next - addr, ops,
-						 &op_index, last_op, mmu,
-						 &flush, do_ops);
+		if (!pgd_present(*pgd)) {
+			if (force || pgd_newpage(*pgd)) {
+				ret = add_munmap(addr, next - addr, &hvc);
 				pgd_mkuptodate(*pgd);
 			}
 		}
-		else ret = update_pud_range(pgd, addr, next, ops, last_op,
-					    &op_index, force, mmu, &flush,
-					    do_ops);
+		else ret = update_pud_range(pgd, addr, next, &hvc);
 	} while (pgd++, addr = next, ((addr != end_addr) && !ret));
 
-	if(!ret)
-		ret = (*do_ops)(mmu, ops, op_index, 1, &flush);
+	if (!ret)
+		ret = do_ops(&hvc, hvc.index, 1);
 
 	/* This is not an else because ret is modified above */
-	if(ret) {
-		printk("fix_range_common: failed, killing current process\n");
+	if (ret) {
+		printk(KERN_ERR "fix_range_common: failed, killing current "
+		       "process\n");
 		force_sig(SIGKILL, current);
 	}
 }
@@ -268,17 +294,17 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 	int updated = 0, err;
 
 	mm = &init_mm;
-	for(addr = start; addr < end;){
+	for (addr = start; addr < end;) {
 		pgd = pgd_offset(mm, addr);
-		if(!pgd_present(*pgd)){
+		if (!pgd_present(*pgd)) {
 			last = ADD_ROUND(addr, PGDIR_SIZE);
-			if(last > end)
+			if (last > end)
 				last = end;
-			if(pgd_newpage(*pgd)){
+			if (pgd_newpage(*pgd)) {
 				updated = 1;
 				err = os_unmap_memory((void *) addr,
 						      last - addr);
-				if(err < 0)
+				if (err < 0)
 					panic("munmap failed, errno = %d\n",
 					      -err);
 			}
@@ -287,15 +313,15 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 		}
 
 		pud = pud_offset(pgd, addr);
-		if(!pud_present(*pud)){
+		if (!pud_present(*pud)) {
 			last = ADD_ROUND(addr, PUD_SIZE);
-			if(last > end)
+			if (last > end)
 				last = end;
-			if(pud_newpage(*pud)){
+			if (pud_newpage(*pud)) {
 				updated = 1;
 				err = os_unmap_memory((void *) addr,
 						      last - addr);
-				if(err < 0)
+				if (err < 0)
 					panic("munmap failed, errno = %d\n",
 					      -err);
 			}
@@ -304,15 +330,15 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 		}
 
 		pmd = pmd_offset(pud, addr);
-		if(!pmd_present(*pmd)){
+		if (!pmd_present(*pmd)) {
 			last = ADD_ROUND(addr, PMD_SIZE);
-			if(last > end)
+			if (last > end)
 				last = end;
-			if(pmd_newpage(*pmd)){
+			if (pmd_newpage(*pmd)) {
 				updated = 1;
 				err = os_unmap_memory((void *) addr,
 						      last - addr);
-				if(err < 0)
+				if (err < 0)
 					panic("munmap failed, errno = %d\n",
 					      -err);
 			}
@@ -321,45 +347,110 @@ int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
 		}
 
 		pte = pte_offset_kernel(pmd, addr);
-		if(!pte_present(*pte) || pte_newpage(*pte)){
+		if (!pte_present(*pte) || pte_newpage(*pte)) {
 			updated = 1;
 			err = os_unmap_memory((void *) addr,
 					      PAGE_SIZE);
-			if(err < 0)
+			if (err < 0)
 				panic("munmap failed, errno = %d\n",
 				      -err);
-			if(pte_present(*pte))
+			if (pte_present(*pte))
 				map_memory(addr,
 					   pte_val(*pte) & PAGE_MASK,
 					   PAGE_SIZE, 1, 1, 1);
 		}
-		else if(pte_newprot(*pte)){
+		else if (pte_newprot(*pte)) {
 			updated = 1;
 			os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
 		}
 		addr += PAGE_SIZE;
 	}
-	return(updated);
+	return updated;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	struct mm_struct *mm = vma->vm_mm;
+	void *flush = NULL;
+	int r, w, x, prot, err = 0;
+	struct mm_id *mm_id;
+
+	address &= PAGE_MASK;
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd))
+		goto kill;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		goto kill;
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		goto kill;
+
+	pte = pte_offset_kernel(pmd, address);
+
+	r = pte_read(*pte);
+	w = pte_write(*pte);
+	x = pte_exec(*pte);
+	if (!pte_young(*pte)) {
+		r = 0;
+		w = 0;
+	} else if (!pte_dirty(*pte)) {
+		w = 0;
+	}
+
+	mm_id = &mm->context.id;
+	prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
+		(x ? UM_PROT_EXEC : 0));
+	if (pte_newpage(*pte)) {
+		if (pte_present(*pte)) {
+			unsigned long long offset;
+			int fd;
+
+			fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
+			err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
+				  1, &flush);
+		}
+		else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
+	}
+	else if (pte_newprot(*pte))
+		err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
+
+	if (err)
+		goto kill;
+
+	*pte = pte_mkuptodate(*pte);
+
+	return;
+
+kill:
+	printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
+	force_sig(SIGKILL, current);
 }
 
 pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address)
 {
-	return(pgd_offset(mm, address));
+	return pgd_offset(mm, address);
 }
 
 pud_t *pud_offset_proc(pgd_t *pgd, unsigned long address)
 {
-	return(pud_offset(pgd, address));
+	return pud_offset(pgd, address);
 }
 
 pmd_t *pmd_offset_proc(pud_t *pud, unsigned long address)
 {
-	return(pmd_offset(pud, address));
+	return pmd_offset(pud, address);
 }
 
 pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address)
 {
-	return(pte_offset_kernel(pmd, address));
+	return pte_offset_kernel(pmd, address);
 }
 
 pte_t *addr_pte(struct task_struct *task, unsigned long addr)
@@ -368,7 +459,7 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr)
 	pud_t *pud = pud_offset(pgd, addr);
 	pmd_t *pmd = pmd_offset(pud, addr);
 
-	return(pte_offset_map(pmd, addr));
+	return pte_offset_map(pmd, addr);
 }
 
 void flush_tlb_all(void)
@@ -378,35 +469,58 @@ void flush_tlb_all(void)
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	CHOOSE_MODE_PROC(flush_tlb_kernel_range_tt,
-			 flush_tlb_kernel_range_common, start, end);
+	flush_tlb_kernel_range_common(start, end);
 }
 
 void flush_tlb_kernel_vm(void)
 {
-	CHOOSE_MODE(flush_tlb_kernel_vm_tt(),
-		    flush_tlb_kernel_range_common(start_vm, end_vm));
+	flush_tlb_kernel_range_common(start_vm, end_vm);
 }
 
 void __flush_tlb_one(unsigned long addr)
 {
-	CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr);
+	flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
+}
+
+static void fix_range(struct mm_struct *mm, unsigned long start_addr,
+		      unsigned long end_addr, int force)
+{
+	if (!proc_mm && (end_addr > STUB_START))
+		end_addr = STUB_START;
+
+	fix_range_common(mm, start_addr, end_addr, force);
 }
 
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end)
 {
-	CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, vma, start,
-			 end);
+	if (vma->vm_mm == NULL)
+		flush_tlb_kernel_range_common(start, end);
+	else fix_range(vma->vm_mm, start, end, 0);
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm);
+	unsigned long end;
+
+	/*
+	 * Don't bother flushing if this address space is about to be
+	 * destroyed.
+	 */
+	if (atomic_read(&mm->mm_users) == 0)
+		return;
+
+	end = proc_mm ? task_size : STUB_START;
+	fix_range(mm, 0, end, 0);
 }
 
 void force_flush_all(void)
 {
-	CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas());
-}
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma = mm->mmap;
 
+	while (vma != NULL) {
+		fix_range(mm, vma->vm_start, vma->vm_end, 1);
+		vma = vma->vm_next;
+	}
+}
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 3850d53f79f..bd060551e61 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -1,40 +1,24 @@
 /*
- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "asm/errno.h"
-#include "linux/sched.h"
-#include "linux/mm.h"
-#include "linux/spinlock.h"
-#include "linux/init.h"
-#include "linux/ptrace.h"
-#include "asm/semaphore.h"
-#include "asm/pgtable.h"
-#include "asm/pgalloc.h"
-#include "asm/tlbflush.h"
-#include "asm/a.out.h"
-#include "asm/current.h"
-#include "asm/irq.h"
-#include "sysdep/sigcontext.h"
-#include "kern_util.h"
-#include "as-layout.h"
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 #include "arch.h"
-#include "kern.h"
-#include "chan_kern.h"
-#include "mconsole_kern.h"
-#include "mem.h"
-#include "mem_kern.h"
-#include "sysdep/sigcontext.h"
-#include "sysdep/ptrace.h"
-#include "os.h"
-#ifdef CONFIG_MODE_SKAS
-#include "skas.h"
-#endif
+#include "as-layout.h"
+#include "kern_util.h"
 #include "os.h"
+#include "sysdep/sigcontext.h"
 
-/* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */
+/*
+ * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
+ * segv().
+ */
 int handle_page_fault(unsigned long address, unsigned long ip,
 		      int is_write, int is_user, int *code_out)
 {
@@ -48,31 +32,33 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 
 	*code_out = SEGV_MAPERR;
 
-	/* If the fault was during atomic operation, don't take the fault, just
-	 * fail. */
+	/*
+	 * If the fault was during atomic operation, don't take the fault, just
+	 * fail.
+	 */
 	if (in_atomic())
 		goto out_nosemaphore;
 
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
-	if(!vma)
+	if (!vma)
 		goto out;
-	else if(vma->vm_start <= address)
+	else if (vma->vm_start <= address)
 		goto good_area;
-	else if(!(vma->vm_flags & VM_GROWSDOWN))
+	else if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto out;
-	else if(is_user && !ARCH_IS_STACKGROW(address))
+	else if (is_user && !ARCH_IS_STACKGROW(address))
 		goto out;
-	else if(expand_stack(vma, address))
+	else if (expand_stack(vma, address))
 		goto out;
 
 good_area:
 	*code_out = SEGV_ACCERR;
-	if(is_write && !(vma->vm_flags & VM_WRITE))
+	if (is_write && !(vma->vm_flags & VM_WRITE))
 		goto out;
 
 	/* Don't require VM_READ|VM_EXEC for write faults! */
-	if(!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
+	if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
 		goto out;
 
 	do {
@@ -98,9 +84,10 @@ survive:
 		pud = pud_offset(pgd, address);
 		pmd = pmd_offset(pud, address);
 		pte = pte_offset_kernel(pmd, address);
-	} while(!pte_present(*pte));
+	} while (!pte_present(*pte));
 	err = 0;
-	/* The below warning was added in place of
+	/*
+	 * The below warning was added in place of
 	 *	pte_mkyoung(); if (is_write) pte_mkdirty();
 	 * If it's triggered, we'd see normally a hang here (a clean pte is
 	 * marked read-only to emulate the dirty bit).
@@ -114,7 +101,7 @@ survive:
 out:
 	up_read(&mm->mmap_sem);
 out_nosemaphore:
-	return(err);
+	return err;
 
 /*
  * We ran out of memory, or some other thing happened to us that made
@@ -141,11 +128,11 @@ static void bad_segv(struct faultinfo fi, unsigned long ip)
 	force_sig_info(SIGSEGV, &si, current);
 }
 
-static void segv_handler(int sig, union uml_pt_regs *regs)
+static void segv_handler(int sig, struct uml_pt_regs *regs)
 {
 	struct faultinfo * fi = UPT_FAULTINFO(regs);
 
-	if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){
+	if (UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)) {
 		bad_segv(*fi, UPT_IP(regs));
 		return;
 	}
@@ -159,45 +146,49 @@ static void segv_handler(int sig, union uml_pt_regs *regs)
  * give us bad data!
  */
 unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
-		   union uml_pt_regs *regs)
+		   struct uml_pt_regs *regs)
 {
 	struct siginfo si;
-	void *catcher;
+	jmp_buf *catcher;
 	int err;
 	int is_write = FAULT_WRITE(fi);
 	unsigned long address = FAULT_ADDRESS(fi);
 
-	if(!is_user && (address >= start_vm) && (address < end_vm)){
+	if (!is_user && (address >= start_vm) && (address < end_vm)) {
 		flush_tlb_kernel_vm();
 		return 0;
 	}
-	else if(current->mm == NULL) {
+	else if (current->mm == NULL) {
 		show_regs(container_of(regs, struct pt_regs, regs));
-  		panic("Segfault with no mm");
+		panic("Segfault with no mm");
 	}
 
 	if (SEGV_IS_FIXABLE(&fi) || SEGV_MAYBE_FIXABLE(&fi))
-		err = handle_page_fault(address, ip, is_write, is_user, &si.si_code);
+		err = handle_page_fault(address, ip, is_write, is_user,
+					&si.si_code);
 	else {
 		err = -EFAULT;
-		/* A thread accessed NULL, we get a fault, but CR2 is invalid.
-		 * This code is used in __do_copy_from_user() of TT mode. */
+		/*
+		 * A thread accessed NULL, we get a fault, but CR2 is invalid.
+		 * This code is used in __do_copy_from_user() of TT mode.
+		 * XXX tt mode is gone, so maybe this isn't needed any more
+		 */
 		address = 0;
 	}
 
 	catcher = current->thread.fault_catcher;
-	if(!err)
+	if (!err)
 		return 0;
-	else if(catcher != NULL){
+	else if (catcher != NULL) {
 		current->thread.fault_addr = (void *) address;
-		do_longjmp(catcher, 1);
+		UML_LONGJMP(catcher, 1);
 	}
-	else if(current->thread.fault_addr != NULL)
+	else if (current->thread.fault_addr != NULL)
 		panic("fault_addr set but no fault catcher");
-	else if(!is_user && arch_fixup(ip, regs))
+	else if (!is_user && arch_fixup(ip, regs))
 		return 0;
 
-	if(!is_user) {
+	if (!is_user) {
 		show_regs(container_of(regs, struct pt_regs, regs));
 		panic("Kernel mode fault at addr 0x%lx, ip 0x%lx",
 		      address, ip);
@@ -211,7 +202,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 		current->thread.arch.faultinfo = fi;
 		force_sig_info(SIGBUS, &si, current);
 	} else if (err == -ENOMEM) {
-		printk("VM: killing process %s\n", current->comm);
+		printk(KERN_INFO "VM: killing process %s\n", current->comm);
 		do_exit(SIGKILL);
 	} else {
 		BUG_ON(err != -EFAULT);
@@ -223,15 +214,15 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
 	return 0;
 }
 
-void relay_signal(int sig, union uml_pt_regs *regs)
+void relay_signal(int sig, struct uml_pt_regs *regs)
 {
-	if(arch_handle_signal(sig, regs))
+	if (arch_handle_signal(sig, regs))
 		return;
 
-	if(!UPT_IS_USER(regs)){
-		if(sig == SIGBUS)
-			printk("Bus error - the host /dev/shm or /tmp mount "
-			       "likely just ran out of space\n");
+	if (!UPT_IS_USER(regs)) {
+		if (sig == SIGBUS)
+			printk(KERN_ERR "Bus error - the host /dev/shm or /tmp "
+			       "mount likely just ran out of space\n");
 		panic("Kernel mode signal %d", sig);
 	}
 
@@ -239,14 +230,14 @@ void relay_signal(int sig, union uml_pt_regs *regs)
 	force_sig(sig, current);
 }
 
-static void bus_handler(int sig, union uml_pt_regs *regs)
+static void bus_handler(int sig, struct uml_pt_regs *regs)
 {
-	if(current->thread.fault_catcher != NULL)
-		do_longjmp(current->thread.fault_catcher, 1);
+	if (current->thread.fault_catcher != NULL)
+		UML_LONGJMP(current->thread.fault_catcher, 1);
 	else relay_signal(sig, regs);
 }
 
-static void winch(int sig, union uml_pt_regs *regs)
+static void winch(int sig, struct uml_pt_regs *regs)
 {
 	do_IRQ(WINCH_IRQ, regs);
 }
diff --git a/arch/um/kernel/tt/Makefile b/arch/um/kernel/tt/Makefile
deleted file mode 100644
index 6939e5af847..00000000000
--- a/arch/um/kernel/tt/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# 
-# Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
-# Licensed under the GPL
-#
-
-obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \
-	syscall_kern.o syscall_user.o tlb.o tracer.o trap_user.o \
-	uaccess.o uaccess_user.o
-
-obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/
-
-USER_OBJS := gdb.o tracer.o
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/tt/exec_kern.c b/arch/um/kernel/tt/exec_kern.c
deleted file mode 100644
index 40126cb5180..00000000000
--- a/arch/um/kernel/tt/exec_kern.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include "linux/kernel.h"
-#include "linux/mm.h"
-#include "asm/signal.h"
-#include "asm/ptrace.h"
-#include "asm/uaccess.h"
-#include "asm/pgalloc.h"
-#include "asm/tlbflush.h"
-#include "kern_util.h"
-#include "irq_user.h"
-#include "mem_user.h"
-#include "os.h"
-#include "tlb.h"
-#include "mode.h"
-
-static int exec_tramp(void *sig_stack)
-{
-	init_new_thread_stack(sig_stack, NULL);
-	init_new_thread_signals();
-	os_stop_process(os_getpid());
-	return(0);
-}
-
-void flush_thread_tt(void)
-{
-	unsigned long stack;
-	int new_pid;
-
-	stack = alloc_stack(0, 0);
-	if(stack == 0){
-		printk(KERN_ERR 
-		       "flush_thread : failed to allocate temporary stack\n");
-		do_exit(SIGKILL);
-	}
-		
-	new_pid = start_fork_tramp(task_stack_page(current), stack, 0, exec_tramp);
-	if(new_pid < 0){
-		printk(KERN_ERR 
-		       "flush_thread : new thread failed, errno = %d\n",
-		       -new_pid);
-		do_exit(SIGKILL);
-	}
-
-	if(current_thread->cpu == 0)
-		forward_interrupts(new_pid);
-	current->thread.request.op = OP_EXEC;
-	current->thread.request.u.exec.pid = new_pid;
-	unprotect_stack((unsigned long) current_thread);
-	os_usr1_process(os_getpid());
-	change_sig(SIGUSR1, 1);
-
-	change_sig(SIGUSR1, 0);
-	enable_timer();
-	free_page(stack);
-	protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1);
-	stack_protections((unsigned long) current_thread);
-	force_flush_all();
-	unblock_signals();
-}
-
-void start_thread_tt(struct pt_regs *regs, unsigned long eip, 
-		     unsigned long esp)
-{
-	set_fs(USER_DS);
-	flush_tlb_mm(current->mm);
-	PT_REGS_IP(regs) = eip;
-	PT_REGS_SP(regs) = esp;
-	PT_FIX_EXEC_STACK(esp);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/exec_user.c b/arch/um/kernel/tt/exec_user.c
deleted file mode 100644
index 7b5f2181cf5..00000000000
--- a/arch/um/kernel/tt/exec_user.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <sched.h>
-#include <errno.h>
-#include <sys/wait.h>
-#include <signal.h>
-#include "kern_util.h"
-#include "user.h"
-#include "ptrace_user.h"
-#include "os.h"
-
-void do_exec(int old_pid, int new_pid)
-{
-	unsigned long regs[FRAME_SIZE];
-	int err;
-
-	if((ptrace(PTRACE_ATTACH, new_pid, 0, 0) < 0) ||
-	   (ptrace(PTRACE_CONT, new_pid, 0, 0) < 0))
-		tracer_panic("do_exec failed to attach proc - errno = %d",
-			     errno);
-
-	CATCH_EINTR(err = waitpid(new_pid, 0, WUNTRACED));
-	if (err < 0)
-		tracer_panic("do_exec failed to attach proc in waitpid - errno = %d",
-			     errno);
-
-	if(ptrace_getregs(old_pid, regs) < 0)
-		tracer_panic("do_exec failed to get registers - errno = %d",
-			     errno);
-
-	os_kill_ptraced_process(old_pid, 0);
-
-	if (ptrace(PTRACE_OLDSETOPTIONS, new_pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0)
-		tracer_panic("do_exec: PTRACE_SETOPTIONS failed, errno = %d", errno);
-
-	if(ptrace_setregs(new_pid, regs) < 0)
-		tracer_panic("do_exec failed to start new proc - errno = %d",
-			     errno);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/gdb.c b/arch/um/kernel/tt/gdb.c
deleted file mode 100644
index 030e4658f36..00000000000
--- a/arch/um/kernel/tt/gdb.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <signal.h>
-#include <sys/types.h>
-#include "ptrace_user.h"
-#include "uml-config.h"
-#include "kern_constants.h"
-#include "chan_user.h"
-#include "init.h"
-#include "user.h"
-#include "debug.h"
-#include "kern_util.h"
-#include "tt.h"
-#include "sysdep/thread.h"
-#include "os.h"
-
-extern int debugger_pid;
-extern int debugger_fd;
-extern int debugger_parent;
-
-int detach(int pid, int sig)
-{
-	return(ptrace(PTRACE_DETACH, pid, 0, sig));
-}
-
-int attach(int pid)
-{
-	int err;
-
-	err = ptrace(PTRACE_ATTACH, pid, 0, 0);
-	if(err < 0) return(-errno);
-	else return(err);
-}
-
-int cont(int pid)
-{
-	return(ptrace(PTRACE_CONT, pid, 0, 0));
-}
-
-#ifdef UML_CONFIG_PT_PROXY
-
-int debugger_signal(int status, pid_t pid)
-{
-	return(debugger_proxy(status, pid));
-}
-
-void child_signal(pid_t pid, int status)
-{
-	child_proxy(pid, status);
-}
-
-static void gdb_announce(char *dev_name, int dev)
-{
-	printf("gdb assigned device '%s'\n", dev_name);
-}
-
-static struct chan_opts opts = {
-	.announce  	= gdb_announce,
-	.xterm_title 	= "UML kernel debugger",
-	.raw 		= 0,
-	.tramp_stack 	= 0,
-	.in_kernel  	= 0,
-};
-
-/* Accessed by the tracing thread, which automatically serializes access */
-static void *xterm_data;
-static int xterm_fd;
-
-extern void *xterm_init(char *, int, struct chan_opts *);
-extern int xterm_open(int, int, int, void *, char **);
-extern void xterm_close(int, void *);
-
-int open_gdb_chan(void)
-{
-	char stack[UM_KERN_PAGE_SIZE], *dummy;
-
-	opts.tramp_stack = (unsigned long) stack;
-	xterm_data = xterm_init("", 0, &opts);
-	xterm_fd = xterm_open(1, 1, 1, xterm_data, &dummy);
-	return(xterm_fd);
-}
-
-static void exit_debugger_cb(void *unused)
-{
-	if(debugger_pid != -1){
-		if(gdb_pid != -1){
-			fake_child_exit();
-			gdb_pid = -1;
-		}
-		else kill_child_dead(debugger_pid);
-		debugger_pid = -1;
-		if(debugger_parent != -1)
-			detach(debugger_parent, SIGINT);
-	}
-	if(xterm_data != NULL) xterm_close(xterm_fd, xterm_data);
-}
-
-static void exit_debugger(void)
-{
-	initial_thread_cb(exit_debugger_cb, NULL);
-}
-
-__uml_exitcall(exit_debugger);
-
-struct gdb_data {
-	char *str;
-	int err;
-};
-
-extern char *linux_prog;
-
-static void config_gdb_cb(void *arg)
-{
-	struct gdb_data *data = arg;
-	void *task;
-	int pid;
-
-	data->err = -1;
-	if(debugger_pid != -1) exit_debugger_cb(NULL);
-	if(!strncmp(data->str, "pid,", strlen("pid,"))){
-		data->str += strlen("pid,");
-		pid = strtoul(data->str, NULL, 0);
-		task = cpu_tasks[0].task;
-		debugger_pid = attach_debugger(TASK_EXTERN_PID(task), pid, 0);
-		if(debugger_pid != -1){
-			data->err = 0;
-			gdb_pid = pid;
-		}
-		return;
-	}
-	data->err = 0;
-	debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd);
-	init_proxy(debugger_pid, 0, 0);
-}
-
-int gdb_config(char *str, char **error_out)
-{
-	struct gdb_data data;
-
-	if(*str++ != '=') return(-1);
-	data.str = str;
-	initial_thread_cb(config_gdb_cb, &data);
-	return(data.err);
-}
-
-void remove_gdb_cb(void *unused)
-{
-	exit_debugger_cb(NULL);
-}
-
-int gdb_remove(int unused, char **error_out)
-{
-	initial_thread_cb(remove_gdb_cb, NULL);
-        return 0;
-}
-
-void signal_usr1(int sig)
-{
-	if(debugger_pid != -1){
-		printf("The debugger is already running\n");
-		return;
-	}
-	debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd);
-	init_proxy(debugger_pid, 0, 0);
-}
-
-int init_ptrace_proxy(int idle_pid, int startup, int stop)
-{
-	int pid, status;
-
-	pid = start_debugger(linux_prog, startup, stop, &debugger_fd);
-	status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT, NULL);
- 	if(pid < 0){
-		cont(idle_pid);
-		return(-1);
-	}
-	init_proxy(pid, 1, status);
-	return(pid);
-}
-
-int attach_debugger(int idle_pid, int pid, int stop)
-{
-	int status = 0, err;
-
-	err = attach(pid);
-	if(err < 0){
-		printf("Failed to attach pid %d, errno = %d\n", pid, -err);
-		return(-1);
-	}
-	if(stop) status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT, NULL);
-	init_proxy(pid, 1, status);
-	return(pid);
-}
-
-#ifdef notdef /* Put this back in when it does something useful */
-static int __init uml_gdb_init_setup(char *line, int *add)
-{
-	gdb_init = uml_strdup(line);
-	return 0;
-}
-
-__uml_setup("gdb=", uml_gdb_init_setup, 
-"gdb=<channel description>\n\n"
-);
-#endif
-
-static int __init uml_gdb_pid_setup(char *line, int *add)
-{
-	gdb_pid = strtoul(line, NULL, 0);
-	*add = 0;
-	return 0;
-}
-
-__uml_setup("gdb-pid=", uml_gdb_pid_setup, 
-"gdb-pid=<pid>\n"
-"    gdb-pid is used to attach an external debugger to UML.  This may be\n"
-"    an already-running gdb or a debugger-like process like strace.\n\n"
-);
-
-#else
-
-int debugger_signal(int status, pid_t pid){ return(0); }
-void child_signal(pid_t pid, int status){ }
-int init_ptrace_proxy(int idle_pid, int startup, int stop)
-{
-	printf("debug requested when CONFIG_PT_PROXY is off\n");
-	kill_child_dead(idle_pid);
-	exit(1);
-}
-
-void signal_usr1(int sig)
-{
-	printf("debug requested when CONFIG_PT_PROXY is off\n");
-}
-
-int attach_debugger(int idle_pid, int pid, int stop)
-{
-	printf("attach_debugger called when CONFIG_PT_PROXY "
-	       "is off\n");
-	return(-1);
-}
-
-int config_gdb(char *str)
-{
-	return(-1);
-}
-
-int remove_gdb(void)
-{
-	return(-1);
-}
-
-int init_parent_proxy(int pid)
-{
-	return(-1);
-}
-
-void debugger_parent_signal(int status, int pid)
-{
-}
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/gdb_kern.c b/arch/um/kernel/tt/gdb_kern.c
deleted file mode 100644
index 03b06bc0077..00000000000
--- a/arch/um/kernel/tt/gdb_kern.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include "linux/init.h"
-#include "mconsole_kern.h"
-
-#ifdef CONFIG_MCONSOLE
-
-extern int gdb_config(char *str, char **error_out);
-extern int gdb_remove(int n, char **error_out);
-
-static struct mc_device gdb_mc = {
-	.list		= INIT_LIST_HEAD(gdb_mc.list),
-	.name		= "gdb",
-	.config		= gdb_config,
-	.remove		= gdb_remove,
-};
-
-int gdb_mc_init(void)
-{
-	mconsole_register_dev(&gdb_mc);
-	return(0);
-}
-
-__initcall(gdb_mc_init);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/include/mode-tt.h b/arch/um/kernel/tt/include/mode-tt.h
deleted file mode 100644
index e171e15fead..00000000000
--- a/arch/um/kernel/tt/include/mode-tt.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __MODE_TT_H__
-#define __MODE_TT_H__
-
-#include "sysdep/ptrace.h"
-
-enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB };
-
-extern int tracing_pid;
-
-extern int tracer(int (*init_proc)(void *), void *sp);
-extern void sig_handler_common_tt(int sig, void *sc);
-extern void syscall_handler_tt(int sig, union uml_pt_regs *regs);
-extern void reboot_tt(void);
-extern void halt_tt(void);
-extern int is_tracer_winch(int pid, int fd, void *data);
-extern void kill_off_processes_tt(void);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/ksyms.c b/arch/um/kernel/tt/ksyms.c
deleted file mode 100644
index 84a9385a8fe..00000000000
--- a/arch/um/kernel/tt/ksyms.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include "linux/module.h"
-#include "asm/uaccess.h"
-#include "mode.h"
-
-EXPORT_SYMBOL(__do_copy_from_user);
-EXPORT_SYMBOL(__do_copy_to_user);
-EXPORT_SYMBOL(__do_strncpy_from_user);
-EXPORT_SYMBOL(__do_strnlen_user); 
-EXPORT_SYMBOL(__do_clear_user);
-EXPORT_SYMBOL(clear_user_tt);
-
-EXPORT_SYMBOL(tracing_pid);
-EXPORT_SYMBOL(honeypot);
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/mem.c b/arch/um/kernel/tt/mem.c
deleted file mode 100644
index d0c3c4975f2..00000000000
--- a/arch/um/kernel/tt/mem.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include "linux/stddef.h"
-#include "linux/mm.h"
-#include "asm/uaccess.h"
-#include "mem_user.h"
-#include "kern_util.h"
-#include "kern.h"
-#include "tt.h"
-
-void before_mem_tt(unsigned long brk_start)
-{
-	if(debug)
-		remap_data(UML_ROUND_DOWN(&_stext), UML_ROUND_UP(&_etext), 1);
-	remap_data(UML_ROUND_DOWN(&_sdata), UML_ROUND_UP(&_edata), 1);
-	remap_data(UML_ROUND_DOWN(&__bss_start), UML_ROUND_UP(&_end), 1);
-}
-
-#define SIZE ((CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS) * 0x20000000)
-#define START (CONFIG_TOP_ADDR - SIZE)
-
-unsigned long set_task_sizes_tt(unsigned long *task_size_out)
-{
-	unsigned long host_task_size;
-
-	/* Round up to the nearest 4M */
-	host_task_size = ROUND_4M((unsigned long) &host_task_size);
-	*task_size_out = START;
-
-	return host_task_size;
-}
diff --git a/arch/um/kernel/tt/mem_user.c b/arch/um/kernel/tt/mem_user.c
deleted file mode 100644
index 9774f6360c3..00000000000
--- a/arch/um/kernel/tt/mem_user.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/mman.h>
-#include "tt.h"
-#include "mem_user.h"
-#include "os.h"
-
-void remap_data(void *segment_start, void *segment_end, int w)
-{
-	void *addr;
-	unsigned long size;
-	int data, prot;
-
-	if(w) prot = PROT_WRITE;
-	else prot = 0;
-	prot |= PROT_READ | PROT_EXEC;
-	size = (unsigned long) segment_end - 
-		(unsigned long) segment_start;
-	data = create_mem_file(size);
-	addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, data, 0);
-	if(addr == MAP_FAILED){
-		perror("mapping new data segment");
-		exit(1);
-	}
-	memcpy(addr, segment_start, size);
-	if(switcheroo(data, prot, addr, segment_start, size) < 0){
-		printf("switcheroo failed\n");
-		exit(1);
-	}
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
deleted file mode 100644
index 74347adf81b..00000000000
--- a/arch/um/kernel/tt/process_kern.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include "linux/sched.h"
-#include "linux/signal.h"
-#include "linux/kernel.h"
-#include "linux/interrupt.h"
-#include "linux/ptrace.h"
-#include "asm/system.h"
-#include "asm/pgalloc.h"
-#include "asm/ptrace.h"
-#include "asm/tlbflush.h"
-#include "irq_user.h"
-#include "kern_util.h"
-#include "os.h"
-#include "kern.h"
-#include "sigcontext.h"
-#include "mem_user.h"
-#include "tlb.h"
-#include "mode.h"
-#include "mode_kern.h"
-#include "init.h"
-#include "tt.h"
-
-void switch_to_tt(void *prev, void *next)
-{
-	struct task_struct *from, *to, *prev_sched;
-	unsigned long flags;
-	int err, vtalrm, alrm, prof, cpu;
-	char c;
-
-	from = prev;
-	to = next;
-
-	cpu = task_thread_info(from)->cpu;
-	if(cpu == 0)
-		forward_interrupts(to->thread.mode.tt.extern_pid);
-#ifdef CONFIG_SMP
-	forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid);
-#endif
-	local_irq_save(flags);
-
-	vtalrm = change_sig(SIGVTALRM, 0);
-	alrm = change_sig(SIGALRM, 0);
-	prof = change_sig(SIGPROF, 0);
-
-	forward_pending_sigio(to->thread.mode.tt.extern_pid);
-
-	c = 0;
-
-	/* Notice that here we "up" the semaphore on which "to" is waiting, and
-	 * below (the read) we wait on this semaphore (which is implemented by
-	 * switch_pipe) and go sleeping. Thus, after that, we have resumed in
-	 * "to", and can't use any more the value of "from" (which is outdated),
-	 * nor the value in "to" (since it was the task which stole us the CPU,
-	 * which we don't care about). */
-
-	err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
-	if(err != sizeof(c))
-		panic("write of switch_pipe failed, err = %d", -err);
-
-	if(from->thread.mode.tt.switch_pipe[0] == -1)
-		os_kill_process(os_getpid(), 0);
-
-	err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c,
-			     sizeof(c));
-	if(err != sizeof(c))
-		panic("read of switch_pipe failed, errno = %d", -err);
-
-	/* If the process that we have just scheduled away from has exited,
-	 * then it needs to be killed here.  The reason is that, even though
-	 * it will kill itself when it next runs, that may be too late.  Its
-	 * stack will be freed, possibly before then, and if that happens,
-	 * we have a use-after-free situation.  So, it gets killed here
-	 * in case it has not already killed itself.
-	 */
-	prev_sched = current->thread.prev_sched;
-        if(prev_sched->thread.mode.tt.switch_pipe[0] == -1)
-		os_kill_process(prev_sched->thread.mode.tt.extern_pid, 1);
-
-	change_sig(SIGVTALRM, vtalrm);
-	change_sig(SIGALRM, alrm);
-	change_sig(SIGPROF, prof);
-
-	arch_switch_to_tt(prev_sched, current);
-
-	flush_tlb_all();
-	local_irq_restore(flags);
-}
-
-void release_thread_tt(struct task_struct *task)
-{
-	int pid = task->thread.mode.tt.extern_pid;
-
-	/*
-         * We first have to kill the other process, before
-         * closing its switch_pipe. Else it might wake up
-         * and receive "EOF" before we could kill it.
-         */
-	if(os_getpid() != pid)
-		os_kill_process(pid, 0);
-
-        os_close_file(task->thread.mode.tt.switch_pipe[0]);
-        os_close_file(task->thread.mode.tt.switch_pipe[1]);
-	/* use switch_pipe as flag: thread is released */
-        task->thread.mode.tt.switch_pipe[0] = -1;
-}
-
-void suspend_new_thread(int fd)
-{
-	int err;
-	char c;
-
-	os_stop_process(os_getpid());
-	err = os_read_file(fd, &c, sizeof(c));
-	if(err != sizeof(c))
-		panic("read failed in suspend_new_thread, err = %d", -err);
-}
-
-void schedule_tail(struct task_struct *prev);
-
-static void new_thread_handler(int sig)
-{
-	unsigned long disable;
-	int (*fn)(void *);
-	void *arg;
-
-	fn = current->thread.request.u.thread.proc;
-	arg = current->thread.request.u.thread.arg;
-
-	UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
-	disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) |
-		(1 << (SIGIO - 1)) | (1 << (SIGPROF - 1));
-	SC_SIGMASK(UPT_SC(&current->thread.regs.regs)) &= ~disable;
-
-	suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
-
-	force_flush_all();
-	if(current->thread.prev_sched != NULL)
-		schedule_tail(current->thread.prev_sched);
-	current->thread.prev_sched = NULL;
-
-	init_new_thread_signals();
-	enable_timer();
-	free_page(current->thread.temp_stack);
-	set_cmdline("(kernel thread)");
-
-	change_sig(SIGUSR1, 1);
-	change_sig(SIGPROF, 1);
-	local_irq_enable();
-	if(!run_kernel_thread(fn, arg, &current->thread.exec_buf))
-		do_exit(0);
-
-	/* XXX No set_user_mode here because a newly execed process will
-	 * immediately segfault on its non-existent IP, coming straight back
-	 * to the signal handler, which will call set_user_mode on its way
-	 * out.  This should probably change since it's confusing.
-	 */
-}
-
-static int new_thread_proc(void *stack)
-{
-	/* local_irq_disable is needed to block out signals until this thread is
-	 * properly scheduled.  Otherwise, the tracing thread will get mighty
-	 * upset about any signals that arrive before that.
-	 * This has the complication that it sets the saved signal mask in
-	 * the sigcontext to block signals.  This gets restored when this
-	 * thread (or a descendant, since they get a copy of this sigcontext)
-	 * returns to userspace.
-	 * So, this is compensated for elsewhere.
-	 * XXX There is still a small window until local_irq_disable() actually
-	 * finishes where signals are possible - shouldn't be a problem in
-	 * practice since SIGIO hasn't been forwarded here yet, and the
-	 * local_irq_disable should finish before a SIGVTALRM has time to be
-	 * delivered.
-	 */
-
-	local_irq_disable();
-	init_new_thread_stack(stack, new_thread_handler);
-	os_usr1_process(os_getpid());
-	change_sig(SIGUSR1, 1);
-	return(0);
-}
-
-/* Signal masking - signals are blocked at the start of fork_tramp.  They
- * are re-enabled when finish_fork_handler is entered by fork_tramp hitting
- * itself with a SIGUSR1.  set_user_mode has to be run with SIGUSR1 off,
- * so it is blocked before it's called.  They are re-enabled on sigreturn
- * despite the fact that they were blocked when the SIGUSR1 was issued because
- * copy_thread copies the parent's sigcontext, including the signal mask
- * onto the signal frame.
- */
-
-void finish_fork_handler(int sig)
-{
- 	UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
-	suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
-
-	force_flush_all();
-	if(current->thread.prev_sched != NULL)
-		schedule_tail(current->thread.prev_sched);
-	current->thread.prev_sched = NULL;
-
-	enable_timer();
-	change_sig(SIGVTALRM, 1);
-	local_irq_enable();
-	if(current->mm != current->parent->mm)
-		protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 
-			       1, 0, 1);
-	stack_protections((unsigned long) current_thread);
-
-	free_page(current->thread.temp_stack);
-	local_irq_disable();
-	change_sig(SIGUSR1, 0);
-	set_user_mode(current);
-}
-
-int fork_tramp(void *stack)
-{
-	local_irq_disable();
-	arch_init_thread();
-	init_new_thread_stack(stack, finish_fork_handler);
-
-	os_usr1_process(os_getpid());
-	change_sig(SIGUSR1, 1);
-	return(0);
-}
-
-int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp,
-		   unsigned long stack_top, struct task_struct * p, 
-		   struct pt_regs *regs)
-{
-	int (*tramp)(void *);
-	int new_pid, err;
-	unsigned long stack;
-	
-	if(current->thread.forking)
-		tramp = fork_tramp;
-	else {
-		tramp = new_thread_proc;
-		p->thread.request.u.thread = current->thread.request.u.thread;
-	}
-
-	err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1);
-	if(err < 0){
-		printk("copy_thread : pipe failed, err = %d\n", -err);
-		return(err);
-	}
-
-	stack = alloc_stack(0, 0);
-	if(stack == 0){
-		printk(KERN_ERR "copy_thread : failed to allocate "
-		       "temporary stack\n");
-		return(-ENOMEM);
-	}
-
-	clone_flags &= CLONE_VM;
-	p->thread.temp_stack = stack;
-	new_pid = start_fork_tramp(task_stack_page(p), stack, clone_flags, tramp);
-	if(new_pid < 0){
-		printk(KERN_ERR "copy_thread : clone failed - errno = %d\n", 
-		       -new_pid);
-		return(new_pid);
-	}
-
-	if(current->thread.forking){
-		sc_to_sc(UPT_SC(&p->thread.regs.regs), UPT_SC(&regs->regs));
-		SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0);
-		if(sp != 0)
-			SC_SP(UPT_SC(&p->thread.regs.regs)) = sp;
-	}
-	p->thread.mode.tt.extern_pid = new_pid;
-
-	current->thread.request.op = OP_FORK;
-	current->thread.request.u.fork.pid = new_pid;
-	os_usr1_process(os_getpid());
-
-	/* Enable the signal and then disable it to ensure that it is handled
-	 * here, and nowhere else.
-	 */
-	change_sig(SIGUSR1, 1);
-
-	change_sig(SIGUSR1, 0);
-	err = 0;
-	return(err);
-}
-
-void reboot_tt(void)
-{
-	current->thread.request.op = OP_REBOOT;
-	os_usr1_process(os_getpid());
-	change_sig(SIGUSR1, 1);
-}
-
-void halt_tt(void)
-{
-	current->thread.request.op = OP_HALT;
-	os_usr1_process(os_getpid());
-	change_sig(SIGUSR1, 1);
-}
-
-void kill_off_processes_tt(void)
-{
-	struct task_struct *p;
-	int me;
-
-	me = os_getpid();
-        for_each_process(p){
-		if(p->thread.mode.tt.extern_pid != me) 
-			os_kill_process(p->thread.mode.tt.extern_pid, 0);
-	}
-	if(init_task.thread.mode.tt.extern_pid != me) 
-		os_kill_process(init_task.thread.mode.tt.extern_pid, 0);
-}
-
-void initial_thread_cb_tt(void (*proc)(void *), void *arg)
-{
-	if(os_getpid() == tracing_pid){
-		(*proc)(arg);
-	}
-	else {
-		current->thread.request.op = OP_CB;
-		current->thread.request.u.cb.proc = proc;
-		current->thread.request.u.cb.arg = arg;
-		os_usr1_process(os_getpid());
-		change_sig(SIGUSR1, 1);
-
-		change_sig(SIGUSR1, 0);
-	}
-}
-
-int do_proc_op(void *t, int proc_id)
-{
-	struct task_struct *task;
-	struct thread_struct *thread;
-	int op, pid;
-
-	task = t;
-	thread = &task->thread;
-	op = thread->request.op;
-	switch(op){
-	case OP_NONE:
-	case OP_TRACE_ON:
-		break;
-	case OP_EXEC:
-		pid = thread->request.u.exec.pid;
-		do_exec(thread->mode.tt.extern_pid, pid);
-		thread->mode.tt.extern_pid = pid;
-		cpu_tasks[task_thread_info(task)->cpu].pid = pid;
-		break;
-	case OP_FORK:
-		attach_process(thread->request.u.fork.pid);
-		break;
-	case OP_CB:
-		(*thread->request.u.cb.proc)(thread->request.u.cb.arg);
-		break;
-	case OP_REBOOT:
-	case OP_HALT:
-		break;
-	default:
-		tracer_panic("Bad op in do_proc_op");
-		break;
-	}
-	thread->request.op = OP_NONE;
-	return(op);
-}
-
-void init_idle_tt(void)
-{
-	default_idle();
-}
-
-extern void start_kernel(void);
-
-static int start_kernel_proc(void *unused)
-{
-	int pid;
-
-	block_signals();
-	pid = os_getpid();
-
-	cpu_tasks[0].pid = pid;
-	cpu_tasks[0].task = current;
-#ifdef CONFIG_SMP
- 	cpu_online_map = cpumask_of_cpu(0);
-#endif
-	if(debug) os_stop_process(pid);
-	start_kernel();
-	return(0);
-}
-
-void set_tracing(void *task, int tracing)
-{
-	((struct task_struct *) task)->thread.mode.tt.tracing = tracing;
-}
-
-int is_tracing(void *t)
-{
-	return (((struct task_struct *) t)->thread.mode.tt.tracing);
-}
-
-int set_user_mode(void *t)
-{
-	struct task_struct *task;
-
-	task = t ? t : current;
-	if(task->thread.mode.tt.tracing) 
-		return(1);
-	task->thread.request.op = OP_TRACE_ON;
-	os_usr1_process(os_getpid());
-	return(0);
-}
-
-void set_init_pid(int pid)
-{
-	int err;
-
-	init_task.thread.mode.tt.extern_pid = pid;
-	err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1);
-	if(err)
-		panic("Can't create switch pipe for init_task, errno = %d",
-		      -err);
-}
-
-int start_uml_tt(void)
-{
-	void *sp;
-	int pages;
-
-	pages = (1 << CONFIG_KERNEL_STACK_ORDER);
-	sp = task_stack_page(&init_task) +
-		pages * PAGE_SIZE - sizeof(unsigned long);
-	return(tracer(start_kernel_proc, sp));
-}
-
-int external_pid_tt(struct task_struct *task)
-{
-	return(task->thread.mode.tt.extern_pid);
-}
-
-int thread_pid_tt(struct task_struct *task)
-{
-	return(task->thread.mode.tt.extern_pid);
-}
-
-int is_valid_pid(int pid)
-{
-	struct task_struct *task;
-
-        read_lock(&tasklist_lock);
-        for_each_process(task){
-                if(task->thread.mode.tt.extern_pid == pid){
-			read_unlock(&tasklist_lock);
-			return(1);
-                }
-        }
-	read_unlock(&tasklist_lock);
-	return(0);
-}
diff --git a/arch/um/kernel/tt/ptproxy/Makefile b/arch/um/kernel/tt/ptproxy/Makefile
deleted file mode 100644
index 3ad5b774de5..00000000000
--- a/arch/um/kernel/tt/ptproxy/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# 
-# Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
-# Licensed under the GPL
-#
-
-obj-y = proxy.o ptrace.o sysdep.o wait.o
-
-USER_OBJS := $(obj-y)
-
-include arch/um/scripts/Makefile.rules
diff --git a/arch/um/kernel/tt/ptproxy/proxy.c b/arch/um/kernel/tt/ptproxy/proxy.c
deleted file mode 100644
index 420c23f311f..00000000000
--- a/arch/um/kernel/tt/ptproxy/proxy.c
+++ /dev/null
@@ -1,377 +0,0 @@
-/**********************************************************************
-proxy.c
-
-Copyright (C) 1999 Lars Brinkhoff.  See the file COPYING for licensing
-terms and conditions.
-
-Jeff Dike (jdike@karaya.com) : Modified for integration into uml
-**********************************************************************/
-
-/* XXX This file shouldn't refer to CONFIG_* */
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <string.h>
-#include <termios.h>
-#include <sys/wait.h>
-#include <sys/types.h>
-#include <sys/ioctl.h>
-#include <asm/unistd.h>
-#include "ptrace_user.h"
-
-#include "ptproxy.h"
-#include "sysdep.h"
-#include "wait.h"
-
-#include "user.h"
-#include "os.h"
-#include "tempfile.h"
-
-static int debugger_wait(debugger_state *debugger, int *status, int options,
-			 int (*syscall)(debugger_state *debugger, pid_t child),
-			 int (*normal_return)(debugger_state *debugger, 
-					      pid_t unused),
-			 int (*wait_return)(debugger_state *debugger, 
-					    pid_t unused))
-{
-	if(debugger->real_wait){
-		debugger->handle_trace = normal_return;
-		syscall_continue(debugger->pid);
-		debugger->real_wait = 0;
-		return(1);
-	}
-	debugger->wait_status_ptr = status;
-	debugger->wait_options = options;
-	if((debugger->debugee != NULL) && debugger->debugee->event){
-		syscall_continue(debugger->pid);
-		wait_for_stop(debugger->pid, SIGTRAP, PTRACE_SYSCALL,
-			      NULL);
-		(*wait_return)(debugger, -1);
-		return(0);
-	}
-	else if(debugger->wait_options & WNOHANG){
-		syscall_cancel(debugger->pid, 0);
-		debugger->handle_trace = syscall;
-		return(0);
-	}
-	else {
-		syscall_pause(debugger->pid);
-		debugger->handle_trace = wait_return;
-		debugger->waiting = 1;
-	}
-	return(1);
-}
-
-/*
- * Handle debugger trap, i.e. syscall.
- */
-
-int debugger_syscall(debugger_state *debugger, pid_t child)
-{
-	long arg1, arg2, arg3, arg4, arg5, result;
-	int syscall, ret = 0;
-
-	syscall = get_syscall(debugger->pid, &arg1, &arg2, &arg3, &arg4, 
-			      &arg5);
-
-	switch(syscall){
-	case __NR_execve:
-		/* execve never returns */
-		debugger->handle_trace = debugger_syscall; 
-		break;
-
-	case __NR_ptrace:
-		if(debugger->debugee->pid != 0) arg2 = debugger->debugee->pid;
-		if(!debugger->debugee->in_context) 
-			child = debugger->debugee->pid;
-		result = proxy_ptrace(debugger, arg1, arg2, arg3, arg4, child,
-				      &ret);
-		syscall_cancel(debugger->pid, result);
-		debugger->handle_trace = debugger_syscall;
-		return(ret);
-
-#ifdef __NR_waitpid
-	case __NR_waitpid:
-#endif
-	case __NR_wait4:
-		if(!debugger_wait(debugger, (int *) arg2, arg3, 
-				  debugger_syscall, debugger_normal_return, 
-				  proxy_wait_return))
-			return(0);
-		break;
-
-	case __NR_kill:
-		if(!debugger->debugee->in_context) 
-			child = debugger->debugee->pid;
-		if(arg1 == debugger->debugee->pid){
-			result = kill(child, arg2);
-			syscall_cancel(debugger->pid, result);
-			debugger->handle_trace = debugger_syscall;
-			return(0);
-		}
-		else debugger->handle_trace = debugger_normal_return;
-		break;
-
-	default:
-		debugger->handle_trace = debugger_normal_return;
-	}
-
-	syscall_continue(debugger->pid);
-	return(0);
-}
-
-/* Used by the tracing thread */
-static debugger_state parent;
-static int parent_syscall(debugger_state *debugger, int pid);
-
-int init_parent_proxy(int pid)
-{
-	parent = ((debugger_state) { .pid 		= pid,
-				     .wait_options 	= 0,
-				     .wait_status_ptr 	= NULL,
-				     .waiting 		= 0,
-				     .real_wait 	= 0,
-				     .expecting_child 	= 0,
-				     .handle_trace  	= parent_syscall,
-				     .debugee 		= NULL } );
-	return(0);
-}
-
-int parent_normal_return(debugger_state *debugger, pid_t unused)
-{
-	debugger->handle_trace = parent_syscall;
-	syscall_continue(debugger->pid);
-	return(0);
-}
-
-static int parent_syscall(debugger_state *debugger, int pid)
-{
-	long arg1, arg2, arg3, arg4, arg5;
-	int syscall;
-
-	syscall = get_syscall(pid, &arg1, &arg2, &arg3, &arg4, &arg5);
-		
-	if((syscall == __NR_wait4)
-#ifdef __NR_waitpid
-	   || (syscall == __NR_waitpid)
-#endif
-	){
-		debugger_wait(&parent, (int *) arg2, arg3, parent_syscall,
-			      parent_normal_return, parent_wait_return);
-	}
-	else ptrace(PTRACE_SYSCALL, pid, 0, 0);
-	return(0);
-}
-
-int debugger_normal_return(debugger_state *debugger, pid_t unused)
-{
-	debugger->handle_trace = debugger_syscall;
-	syscall_continue(debugger->pid);
-	return(0);
-}
-
-void debugger_cancelled_return(debugger_state *debugger, int result)
-{
-	debugger->handle_trace = debugger_syscall;
-	syscall_set_result(debugger->pid, result);
-	syscall_continue(debugger->pid);
-}
-
-/* Used by the tracing thread */
-static debugger_state debugger;
-static debugee_state debugee;
-
-void init_proxy (pid_t debugger_pid, int stopped, int status)
-{
-	debugger.pid = debugger_pid;
-	debugger.handle_trace = debugger_syscall;
-	debugger.debugee = &debugee;
-	debugger.waiting = 0;
-	debugger.real_wait = 0;
-	debugger.expecting_child = 0;
-
-	debugee.pid = 0;
-	debugee.traced = 0;
-	debugee.stopped = stopped;
-	debugee.event = 0;
-	debugee.zombie = 0;
-	debugee.died = 0;
-	debugee.wait_status = status;
-	debugee.in_context = 1;
-}
-
-int debugger_proxy(int status, int pid)
-{
-	int ret = 0, sig;
-
-	if(WIFSTOPPED(status)){
-		sig = WSTOPSIG(status);
-		if (sig == SIGTRAP)
-			ret = (*debugger.handle_trace)(&debugger, pid);
-						       
-		else if(sig == SIGCHLD){
-			if(debugger.expecting_child){
-				ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig);
-				debugger.expecting_child = 0;
-			}
-			else if(debugger.waiting)
-				real_wait_return(&debugger);
-			else {
-				ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig);
-				debugger.real_wait = 1;
-			}
-		}
-		else ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig);
-	}
-	else if(WIFEXITED(status)){
-		tracer_panic("debugger (pid %d) exited with status %d", 
-			     debugger.pid, WEXITSTATUS(status));
-	}
-	else if(WIFSIGNALED(status)){
-		tracer_panic("debugger (pid %d) exited with signal %d", 
-			     debugger.pid, WTERMSIG(status));
-	}
-	else {
-		tracer_panic("proxy got unknown status (0x%x) on debugger "
-			     "(pid %d)", status, debugger.pid);
-	}
-	return(ret);
-}
-
-void child_proxy(pid_t pid, int status)
-{
-	debugee.event = 1;
-	debugee.wait_status = status;
-
-	if(WIFSTOPPED(status)){
-		debugee.stopped = 1;
-		debugger.expecting_child = 1;
-		kill(debugger.pid, SIGCHLD);
-	}
-	else if(WIFEXITED(status) || WIFSIGNALED(status)){
-		debugee.zombie = 1;
-		debugger.expecting_child = 1;
-		kill(debugger.pid, SIGCHLD);
-	}
-	else panic("proxy got unknown status (0x%x) on child (pid %d)", 
-		   status, pid);
-}
-
-void debugger_parent_signal(int status, int pid)
-{
-	int sig;
-
-	if(WIFSTOPPED(status)){
-		sig = WSTOPSIG(status);
-		if(sig == SIGTRAP) (*parent.handle_trace)(&parent, pid);
-		else ptrace(PTRACE_SYSCALL, pid, 0, sig);
-	}
-}
-
-void fake_child_exit(void)
-{
-	int status, pid;
-
-	child_proxy(1, W_EXITCODE(0, 0));
-	while(debugger.waiting == 1){
-		CATCH_EINTR(pid = waitpid(debugger.pid, &status, WUNTRACED));
-		if(pid != debugger.pid){
-			printk("fake_child_exit - waitpid failed, "
-			       "errno = %d\n", errno);
-			return;
-		}
-		debugger_proxy(status, debugger.pid);
-	}
-	CATCH_EINTR(pid = waitpid(debugger.pid, &status, WUNTRACED));
-	if(pid != debugger.pid){
-		printk("fake_child_exit - waitpid failed, "
-		       "errno = %d\n", errno);
-		return;
-	}
-	if(ptrace(PTRACE_DETACH, debugger.pid, 0, SIGCONT) < 0)
-		printk("fake_child_exit - PTRACE_DETACH failed, errno = %d\n",
-		       errno);
-}
-
-char gdb_init_string[] = 
-"att 1 \n\
-b panic \n\
-b stop \n\
-handle SIGWINCH nostop noprint pass \n\
-";
-
-int start_debugger(char *prog, int startup, int stop, int *fd_out)
-{
-	int slave, child;
-
-	slave = open_gdb_chan();
-	child = fork();
-	if(child == 0){
-		char *tempname = NULL;
-		int fd;
-
-	        if(setsid() < 0) perror("setsid");
-		if((dup2(slave, 0) < 0) || (dup2(slave, 1) < 0) || 
-		   (dup2(slave, 2) < 0)){
-			printk("start_debugger : dup2 failed, errno = %d\n",
-			       errno);
-			exit(1);
-		}
-		if(ioctl(0, TIOCSCTTY, 0) < 0){
-			printk("start_debugger : TIOCSCTTY failed, "
-			       "errno = %d\n", errno);
-			exit(1);
-		}
-		if(tcsetpgrp (1, os_getpid()) < 0){
-			printk("start_debugger : tcsetpgrp failed, "
-			       "errno = %d\n", errno);
-#ifdef notdef
-			exit(1);
-#endif
-		}
-		fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0);
-		if(fd < 0){
-			printk("start_debugger : make_tempfile failed,"
-			       "err = %d\n", -fd);
-			exit(1);
-		}
-		os_write_file(fd, gdb_init_string,
-			      sizeof(gdb_init_string) - 1);
-		if(startup){
-			if(stop){
-				os_write_file(fd, "b start_kernel\n",
-						strlen("b start_kernel\n"));
-			}
-			os_write_file(fd, "c\n", strlen("c\n"));
-		}
-		if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){
-			printk("start_debugger :  PTRACE_TRACEME failed, "
-			       "errno = %d\n", errno);
-			exit(1);
-		}
-		execlp("gdb", "gdb", "--command", tempname, prog, NULL);
-		printk("start_debugger : exec of gdb failed, errno = %d\n",
-		       errno);
-	}
-	if(child < 0){
-		printk("start_debugger : fork for gdb failed, errno = %d\n",
-		       errno);
-		return(-1);
-	}
-	*fd_out = slave;
-	return(child);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/ptproxy/ptproxy.h b/arch/um/kernel/tt/ptproxy/ptproxy.h
deleted file mode 100644
index 5eb0285b196..00000000000
--- a/arch/um/kernel/tt/ptproxy/ptproxy.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/**********************************************************************
-ptproxy.h
-
-Copyright (C) 1999 Lars Brinkhoff.  See the file COPYING for licensing
-terms and conditions.
-**********************************************************************/
-
-#ifndef __PTPROXY_H
-#define __PTPROXY_H
-
-#include <sys/types.h>
-
-typedef struct debugger debugger_state;
-typedef struct debugee debugee_state;
-
-struct debugger
-{
-	pid_t pid;
-	int wait_options;
-	int *wait_status_ptr;
-	unsigned int waiting : 1;
-	unsigned int real_wait : 1;
-	unsigned int expecting_child : 1;
-	int (*handle_trace) (debugger_state *, pid_t);
-
-	debugee_state *debugee;
-};
-
-struct debugee
-{
-	pid_t pid;
-	int wait_status;
-	unsigned int died : 1;
-	unsigned int event : 1;
-	unsigned int stopped : 1;
-	unsigned int trace_singlestep : 1;
-	unsigned int trace_syscall : 1;
-	unsigned int traced : 1;
-	unsigned int zombie : 1;
-	unsigned int in_context : 1;
-};
-
-extern int debugger_syscall(debugger_state *debugger, pid_t pid);
-extern int debugger_normal_return (debugger_state *debugger, pid_t unused);
-
-extern long proxy_ptrace (struct debugger *, int, pid_t, long, long, pid_t,
-			  int *strace_out);
-extern void debugger_cancelled_return(debugger_state *debugger, int result);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/ptproxy/ptrace.c b/arch/um/kernel/tt/ptproxy/ptrace.c
deleted file mode 100644
index 4b4f6179b21..00000000000
--- a/arch/um/kernel/tt/ptproxy/ptrace.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/**********************************************************************
-ptrace.c
-
-Copyright (C) 1999 Lars Brinkhoff.  See the file COPYING for licensing
-terms and conditions.
-
-Jeff Dike (jdike@karaya.com) : Modified for integration into uml
-**********************************************************************/
-
-#include <errno.h>
-#include <unistd.h>
-#include <signal.h>
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-
-#include "ptproxy.h"
-#include "debug.h"
-#include "kern_util.h"
-#include "ptrace_user.h"
-#include "tt.h"
-#include "os.h"
-
-long proxy_ptrace(struct debugger *debugger, int arg1, pid_t arg2,
-		  long arg3, long arg4, pid_t child, int *ret)
-{
-	sigset_t relay;
-	long result;
-	int status;
-
-	*ret = 0;
-	if(debugger->debugee->died) return(-ESRCH);
-
-	switch(arg1){
-	case PTRACE_ATTACH:
-		if(debugger->debugee->traced) return(-EPERM);
-
-		debugger->debugee->pid = arg2;
-		debugger->debugee->traced = 1;
-
-		if(is_valid_pid(arg2) && (arg2 != child)){
-			debugger->debugee->in_context = 0;
-			kill(arg2, SIGSTOP);
-			debugger->debugee->event = 1;
-			debugger->debugee->wait_status = W_STOPCODE(SIGSTOP);
-		}
-		else {
-			debugger->debugee->in_context = 1;
-			if(debugger->debugee->stopped) 
-				child_proxy(child, W_STOPCODE(SIGSTOP));
-			else kill(child, SIGSTOP);
-		}
-
-		return(0);
-
-	case PTRACE_DETACH:
-		if(!debugger->debugee->traced) return(-EPERM);
-		
-		debugger->debugee->traced = 0;
-		debugger->debugee->pid = 0;
-		if(!debugger->debugee->in_context)
-			kill(child, SIGCONT);
-
-		return(0);
-
-	case PTRACE_CONT:
-		if(!debugger->debugee->in_context) return(-EPERM);
-		*ret = PTRACE_CONT;
-		return(ptrace(PTRACE_CONT, child, arg3, arg4));
-
-#ifdef UM_HAVE_GETFPREGS
-	case PTRACE_GETFPREGS:
-	{
-		long regs[FP_FRAME_SIZE];
-		int i, result;
-
-		result = ptrace(PTRACE_GETFPREGS, child, 0, regs);
-		if(result == -1) return(-errno);
-		
-		for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++)
-			ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i,
-			       regs[i]);
-		return(result);
-	}
-#endif
-
-#ifdef UM_HAVE_GETFPXREGS
-	case PTRACE_GETFPXREGS:
-	{
-		long regs[FPX_FRAME_SIZE];
-		int i, result;
-
-		result = ptrace(PTRACE_GETFPXREGS, child, 0, regs);
-		if(result == -1) return(-errno);
-		
-		for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++)
-			ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i,
-			       regs[i]);
-		return(result);
-	}
-#endif
-
-#ifdef UM_HAVE_GETREGS
-	case PTRACE_GETREGS:
-	{
-		long regs[FRAME_SIZE];
-		int i, result;
-
-		result = ptrace(PTRACE_GETREGS, child, 0, regs);
-		if(result == -1) return(-errno);
-
-		for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++)
-			ptrace (PTRACE_POKEDATA, debugger->pid,
-				arg4 + 4 * i, regs[i]);
-		return(result);
-	}
-	break;
-#endif
-
-	case PTRACE_KILL:
-		result = ptrace(PTRACE_KILL, child, arg3, arg4);
-		if(result == -1) return(-errno);
-
-		return(result);
-
-	case PTRACE_PEEKDATA:
-	case PTRACE_PEEKTEXT:
-	case PTRACE_PEEKUSR:
-		/* The value being read out could be -1, so we have to 
-		 * check errno to see if there's an error, and zero it
-		 * beforehand so we're not faked out by an old error
-		 */
-
-		errno = 0;
-		result = ptrace(arg1, child, arg3, 0);
-		if((result == -1) && (errno != 0)) return(-errno);
-
-		result = ptrace(PTRACE_POKEDATA, debugger->pid, arg4, result);
-		if(result == -1) return(-errno);
-			
-		return(result);
-
-	case PTRACE_POKEDATA:
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEUSR:
-		result = ptrace(arg1, child, arg3, arg4);
-		if(result == -1) return(-errno);
-
-		if(arg1 == PTRACE_POKEUSR) ptrace_pokeuser(arg3, arg4);
-		return(result);
-
-#ifdef UM_HAVE_SETFPREGS
-	case PTRACE_SETFPREGS:
-	{
-		long regs[FP_FRAME_SIZE];
-		int i;
-
-		for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++)
-			regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid,
-					  arg4 + 4 * i, 0);
-		result = ptrace(PTRACE_SETFPREGS, child, 0, regs);
-		if(result == -1) return(-errno);
-
-		return(result);
-	}
-#endif
-
-#ifdef UM_HAVE_SETFPXREGS
-	case PTRACE_SETFPXREGS:
-	{
-		long regs[FPX_FRAME_SIZE];
-		int i;
-
-		for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++)
-			regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid,
-					  arg4 + 4 * i, 0);
-		result = ptrace(PTRACE_SETFPXREGS, child, 0, regs);
-		if(result == -1) return(-errno);
-
-		return(result);
-	}
-#endif
-
-#ifdef UM_HAVE_SETREGS
-	case PTRACE_SETREGS:
-	{
-		long regs[FRAME_SIZE];
-		int i;
-
-		for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++)
-			regs[i] = ptrace(PTRACE_PEEKDATA, debugger->pid,
-					 arg4 + 4 * i, 0);
-		result = ptrace(PTRACE_SETREGS, child, 0, regs);
-		if(result == -1) return(-errno);
-
-		return(result);
-	}
-#endif
-
-	case PTRACE_SINGLESTEP:
-		if(!debugger->debugee->in_context) return(-EPERM);
-		sigemptyset(&relay);
-		sigaddset(&relay, SIGSEGV);
-		sigaddset(&relay, SIGILL);
-		sigaddset(&relay, SIGBUS);
-		result = ptrace(PTRACE_SINGLESTEP, child, arg3, arg4);
-		if(result == -1) return(-errno);
-		
-		status = wait_for_stop(child, SIGTRAP, PTRACE_SINGLESTEP,
-				       &relay);
-		child_proxy(child, status);
-		return(result);
-
-	case PTRACE_SYSCALL:
-		if(!debugger->debugee->in_context) return(-EPERM);
-		result = ptrace(PTRACE_SYSCALL, child, arg3, arg4);
-		if(result == -1) return(-errno);
-
-		*ret = PTRACE_SYSCALL;
-		return(result);
-
-	case PTRACE_TRACEME:
-	default:
-		return(-EINVAL);
-	}
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/ptproxy/sysdep.c b/arch/um/kernel/tt/ptproxy/sysdep.c
deleted file mode 100644
index e0e1ab0588a..00000000000
--- a/arch/um/kernel/tt/ptproxy/sysdep.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/**********************************************************************
-sysdep.c
-
-Copyright (C) 1999 Lars Brinkhoff.  See the file COPYING for licensing
-terms and conditions.
-**********************************************************************/
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <linux/unistd.h>
-#include "ptrace_user.h"
-#include "user.h"
-#include "os.h"
-
-int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, long *arg4, 
-		long *arg5)
-{
-	*arg1 = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_ARG1_OFFSET, 0);
-	*arg2 = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_ARG2_OFFSET, 0);
-	*arg3 = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_ARG3_OFFSET, 0);
-	*arg4 = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_ARG4_OFFSET, 0);
-	*arg5 = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_ARG5_OFFSET, 0);
-	return(ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET, 0));
-}
-
-void syscall_cancel(pid_t pid, int result)
-{
-	if((ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
-		   __NR_getpid) < 0) ||
-	   (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) ||
-	   (wait_for_stop(pid, SIGTRAP, PTRACE_SYSCALL, NULL) < 0) ||
-	   (ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, result) < 0) ||
-	   (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0))
-		printk("ptproxy: couldn't cancel syscall: errno = %d\n", 
-		       errno);
-}
-
-void syscall_set_result(pid_t pid, long result)
-{
-	ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, result);
-}
-
-void syscall_continue(pid_t pid)
-{
-	ptrace(PTRACE_SYSCALL, pid, 0, 0);
-}
-
-int syscall_pause(pid_t pid) 
-{
-	if(ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET, __NR_pause) < 0){
-		printk("syscall_change - ptrace failed, errno = %d\n", errno);
-		return(-1);
-	}
-	return(0);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/ptproxy/sysdep.h b/arch/um/kernel/tt/ptproxy/sysdep.h
deleted file mode 100644
index 735f488049a..00000000000
--- a/arch/um/kernel/tt/ptproxy/sysdep.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/**********************************************************************
-sysdep.h
-
-Copyright (C) 1999 Lars Brinkhoff.
-Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
-See the file COPYING for licensing terms and conditions.
-**********************************************************************/
-
-extern int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, 
-		       long *arg4, long *arg5);
-extern void syscall_cancel (pid_t pid, long result);
-extern void syscall_set_result (pid_t pid, long result);
-extern void syscall_continue (pid_t pid);
-extern int syscall_pause(pid_t pid);
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/ptproxy/wait.c b/arch/um/kernel/tt/ptproxy/wait.c
deleted file mode 100644
index bdd4af4b65f..00000000000
--- a/arch/um/kernel/tt/ptproxy/wait.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/**********************************************************************
-wait.c
-
-Copyright (C) 1999 Lars Brinkhoff.  See the file COPYING for licensing
-terms and conditions.
-
-**********************************************************************/
-
-#include <errno.h>
-#include <signal.h>
-#include <sys/wait.h>
-
-#include "ptproxy.h"
-#include "sysdep.h"
-#include "wait.h"
-#include "ptrace_user.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/sigcontext.h"
-
-int proxy_wait_return(struct debugger *debugger, pid_t unused)
-{
-	debugger->waiting = 0;
-
-	if(debugger->debugee->died || (debugger->wait_options & __WCLONE)){
-		debugger_cancelled_return(debugger, -ECHILD);
-		return(0);
-	}
-
-	if(debugger->debugee->zombie && debugger->debugee->event)
-		debugger->debugee->died = 1;
-
-	if(debugger->debugee->event){
-		debugger->debugee->event = 0;
-		ptrace(PTRACE_POKEDATA, debugger->pid,
-		       debugger->wait_status_ptr, 
-		       debugger->debugee->wait_status);
-		/* if (wait4)
-		   ptrace (PTRACE_POKEDATA, pid, rusage_ptr, ...); */
-		debugger_cancelled_return(debugger, debugger->debugee->pid);
-		return(0);
-	}
-
-	/* pause will return -EINTR, which happens to be right for wait */
-	debugger_normal_return(debugger, -1);
-	return(0);
-}
-
-int parent_wait_return(struct debugger *debugger, pid_t unused)
-{
-	return(debugger_normal_return(debugger, -1));
-}
-
-int real_wait_return(struct debugger *debugger)
-{
-	unsigned long ip;
-	int pid;
-
-	pid = debugger->pid;
-
-	ip = ptrace(PTRACE_PEEKUSR, pid, PT_IP_OFFSET, 0);
-	IP_RESTART_SYSCALL(ip);
-
-	if(ptrace(PTRACE_POKEUSR, pid, PT_IP_OFFSET, ip) < 0)
-		tracer_panic("real_wait_return : Failed to restart system "
-			     "call, errno = %d\n", errno);
-
-	if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) ||
-	   (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) ||
-	   (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) ||
-	   debugger_normal_return(debugger, -1))
-		tracer_panic("real_wait_return : gdb failed to wait, "
-			     "errno = %d\n", errno);
-	return(0);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/ptproxy/wait.h b/arch/um/kernel/tt/ptproxy/wait.h
deleted file mode 100644
index 542e73ee2ce..00000000000
--- a/arch/um/kernel/tt/ptproxy/wait.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/**********************************************************************
-wait.h
-
-Copyright (C) 1999 Lars Brinkhoff.  See the file COPYING for licensing
-terms and conditions.
-**********************************************************************/
-
-#ifndef __PTPROXY_WAIT_H
-#define __PTPROXY_WAIT_H
-
-extern int proxy_wait_return(struct debugger *debugger, pid_t unused);
-extern int real_wait_return(struct debugger *debugger);
-extern int parent_wait_return(struct debugger *debugger, pid_t unused);
-
-#endif
diff --git a/arch/um/kernel/tt/syscall_kern.c b/arch/um/kernel/tt/syscall_kern.c
deleted file mode 100644
index 293caa6d0c2..00000000000
--- a/arch/um/kernel/tt/syscall_kern.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#include "linux/types.h"
-#include "linux/utime.h"
-#include "linux/sys.h"
-#include "linux/ptrace.h"
-#include "asm/unistd.h"
-#include "asm/ptrace.h"
-#include "asm/uaccess.h"
-#include "asm/stat.h"
-#include "sysdep/syscalls.h"
-#include "sysdep/sigcontext.h"
-#include "kern_util.h"
-#include "syscall.h"
-
-void syscall_handler_tt(int sig, struct pt_regs *regs)
-{
-	void *sc;
-	long result;
-	int syscall;
-
-	sc = UPT_SC(&regs->regs);
-	SC_START_SYSCALL(sc);
-
-	syscall = UPT_SYSCALL_NR(&regs->regs);
-	syscall_trace(&regs->regs, 0);
-
-	current->thread.nsyscalls++;
-	nsyscalls++;
-
-	if((syscall >= NR_syscalls) || (syscall < 0))
-		result = -ENOSYS;
-	else result = EXECUTE_SYSCALL(syscall, regs);
-
-	/* regs->sc may have changed while the system call ran (there may
-	 * have been an interrupt or segfault), so it needs to be refreshed.
-	 */
-	UPT_SC(&regs->regs) = sc;
-
-	SC_SET_SYSCALL_RETURN(sc, result);
-
-	syscall_trace(&regs->regs, 1);
-}
diff --git a/arch/um/kernel/tt/syscall_user.c b/arch/um/kernel/tt/syscall_user.c
deleted file mode 100644
index f52b47aff1d..00000000000
--- a/arch/um/kernel/tt/syscall_user.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <unistd.h>
-#include <signal.h>
-#include <errno.h>
-#include <asm/unistd.h>
-#include "sysdep/ptrace.h"
-#include "sigcontext.h"
-#include "ptrace_user.h"
-#include "task.h"
-#include "kern_util.h"
-#include "syscall.h"
-#include "tt.h"
-
-void do_sigtrap(void *task)
-{
-	UPT_SYSCALL_NR(TASK_REGS(task)) = -1;
-}
-
-void do_syscall(void *task, int pid, int local_using_sysemu)
-{
-	unsigned long proc_regs[FRAME_SIZE];
-
-	if(ptrace_getregs(pid, proc_regs) < 0)
-		tracer_panic("Couldn't read registers");
-
-	UPT_SYSCALL_NR(TASK_REGS(task)) = PT_SYSCALL_NR(proc_regs);
-
-#ifdef UPT_ORIGGPR2
-        UPT_ORIGGPR2(TASK_REGS(task)) = REGS_ORIGGPR2(proc_regs);
-#endif
-
-	if(((unsigned long *) PT_IP(proc_regs) >= &_stext) &&
-	   ((unsigned long *) PT_IP(proc_regs) <= &_etext))
-		tracer_panic("I'm tracing myself and I can't get out");
-
-	/* advanced sysemu mode set syscall number to -1 automatically */
-	if (local_using_sysemu==2)
-		return;
-
-	/* syscall number -1 in sysemu skips syscall restarting in host */
-	if(ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
-		  local_using_sysemu ? -1 : __NR_getpid) < 0)
-		tracer_panic("do_syscall : Nullifying syscall failed, "
-			     "errno = %d", errno);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c
deleted file mode 100644
index 7caa24fe05d..00000000000
--- a/arch/um/kernel/tt/tlb.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Copyright 2003 PathScale, Inc.
- * Licensed under the GPL
- */
-
-#include "linux/stddef.h"
-#include "linux/kernel.h"
-#include "linux/sched.h"
-#include "linux/mm.h"
-#include "asm/page.h"
-#include "asm/pgtable.h"
-#include "asm/uaccess.h"
-#include "asm/tlbflush.h"
-#include "mem_user.h"
-#include "os.h"
-#include "tlb.h"
-
-static int do_ops(union mm_context *mmu, struct host_vm_op *ops, int last,
-		    int finished, void **flush)
-{
-	struct host_vm_op *op;
-        int i, ret=0;
-
-        for(i = 0; i <= last && !ret; i++){
-		op = &ops[i];
-		switch(op->type){
-		case MMAP:
-                        ret = os_map_memory((void *) op->u.mmap.addr,
-                                            op->u.mmap.fd, op->u.mmap.offset,
-                                            op->u.mmap.len, op->u.mmap.r,
-                                            op->u.mmap.w, op->u.mmap.x);
-			break;
-		case MUNMAP:
-                        ret = os_unmap_memory((void *) op->u.munmap.addr,
-                                              op->u.munmap.len);
-			break;
-		case MPROTECT:
-                        ret = protect_memory(op->u.mprotect.addr,
-                                             op->u.munmap.len,
-                                             op->u.mprotect.r,
-                                             op->u.mprotect.w,
-                                             op->u.mprotect.x, 1);
-			protect_memory(op->u.mprotect.addr, op->u.munmap.len,
-				       op->u.mprotect.r, op->u.mprotect.w,
-				       op->u.mprotect.x, 1);
-			break;
-		default:
-			printk("Unknown op type %d in do_ops\n", op->type);
-			break;
-		}
-	}
-
-	return ret;
-}
-
-static void fix_range(struct mm_struct *mm, unsigned long start_addr, 
-		      unsigned long end_addr, int force)
-{
-        if((current->thread.mode.tt.extern_pid != -1) &&
-           (current->thread.mode.tt.extern_pid != os_getpid()))
-                panic("fix_range fixing wrong address space, current = 0x%p",
-                      current);
-
-        fix_range_common(mm, start_addr, end_addr, force, do_ops);
-}
-
-atomic_t vmchange_seq = ATOMIC_INIT(1);
-
-void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end)
-{
-        if(flush_tlb_kernel_range_common(start, end))
-                atomic_inc(&vmchange_seq);
-}
-
-void flush_tlb_kernel_vm_tt(void)
-{
-        flush_tlb_kernel_range(start_vm, end_vm);
-}
-
-void __flush_tlb_one_tt(unsigned long addr)
-{
-        flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-}
-  
-void flush_tlb_range_tt(struct vm_area_struct *vma, unsigned long start, 
-		     unsigned long end)
-{
-	if(vma->vm_mm != current->mm) return;
-
-	/* Assumes that the range start ... end is entirely within
-	 * either process memory or kernel vm
-	 */
-	if((start >= start_vm) && (start < end_vm)){
-		if(flush_tlb_kernel_range_common(start, end))
-			atomic_inc(&vmchange_seq);
-	}
-	else fix_range(vma->vm_mm, start, end, 0);
-}
-
-void flush_tlb_mm_tt(struct mm_struct *mm)
-{
-	unsigned long seq;
-
-	if(mm != current->mm) return;
-
-	fix_range(mm, 0, STACK_TOP, 0);
-
-	seq = atomic_read(&vmchange_seq);
-	if(current->thread.mode.tt.vm_seq == seq)
-		return;
-	current->thread.mode.tt.vm_seq = seq;
-	flush_tlb_kernel_range_common(start_vm, end_vm);
-}
-
-void force_flush_all_tt(void)
-{
-	fix_range(current->mm, 0, STACK_TOP, 1);
-	flush_tlb_kernel_range_common(start_vm, end_vm);
-}
diff --git a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c
deleted file mode 100644
index c23588393f6..00000000000
--- a/arch/um/kernel/tt/tracer.c
+++ /dev/null
@@ -1,461 +0,0 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <signal.h>
-#include <errno.h>
-#include <sched.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-#include "user.h"
-#include "sysdep/ptrace.h"
-#include "sigcontext.h"
-#include "sysdep/sigcontext.h"
-#include "os.h"
-#include "mem_user.h"
-#include "process.h"
-#include "kern_util.h"
-#include "chan_user.h"
-#include "ptrace_user.h"
-#include "irq_user.h"
-#include "mode.h"
-#include "tt.h"
-
-static int tracer_winch[2];
-
-int is_tracer_winch(int pid, int fd, void *data)
-{
-	if(pid != os_getpgrp())
-		return(0);
-
-	register_winch_irq(tracer_winch[0], fd, -1, data);
-	return(1);
-}
-
-static void tracer_winch_handler(int sig)
-{
-	int n;
-	char c = 1;
-
-	n = os_write_file(tracer_winch[1], &c, sizeof(c));
-	if(n != sizeof(c))
-		printk("tracer_winch_handler - write failed, err = %d\n", -n);
-}
-
-/* Called only by the tracing thread during initialization */
-
-static void setup_tracer_winch(void)
-{
-	int err;
-
-	err = os_pipe(tracer_winch, 1, 1);
-	if(err < 0){
-		printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err);
-		return;
-	}
-	signal(SIGWINCH, tracer_winch_handler);
-}
-
-void attach_process(int pid)
-{
-	if((ptrace(PTRACE_ATTACH, pid, 0, 0) < 0) ||
-	   (ptrace(PTRACE_CONT, pid, 0, 0) < 0))
-		tracer_panic("OP_FORK failed to attach pid");
-	wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL);
-	if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0)
-		tracer_panic("OP_FORK: PTRACE_SETOPTIONS failed, errno = %d", errno);
-	if(ptrace(PTRACE_CONT, pid, 0, 0) < 0)
-		tracer_panic("OP_FORK failed to continue process");
-}
-
-void tracer_panic(char *format, ...)
-{
-	va_list ap;
-
-	va_start(ap, format);
-	vprintf(format, ap);
-	va_end(ap);
-	printf("\n");
-	while(1) pause();
-}
-
-static void tracer_segv(int sig, struct sigcontext sc)
-{
-        struct faultinfo fi;
-        GET_FAULTINFO_FROM_SC(fi, &sc);
-	printf("Tracing thread segfault at address 0x%lx, ip 0x%lx\n",
-               FAULT_ADDRESS(fi), SC_IP(&sc));
-	while(1)
-		pause();
-}
-
-/* Changed early in boot, and then only read */
-int debug = 0;
-int debug_stop = 1;
-int debug_parent = 0;
-int honeypot = 0;
-
-static int signal_tramp(void *arg)
-{
-	int (*proc)(void *);
-
-	if(honeypot && munmap((void *) (host_task_size - 0x10000000),
-			      0x10000000)) 
-		panic("Unmapping stack failed");
-	if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0)
-		panic("ptrace PTRACE_TRACEME failed");
-	os_stop_process(os_getpid());
-	change_sig(SIGWINCH, 0);
-	signal(SIGUSR1, SIG_IGN);
-	change_sig(SIGCHLD, 0);
-	signal(SIGSEGV, (__sighandler_t) sig_handler);
-	set_cmdline("(idle thread)");
-	set_init_pid(os_getpid());
-	init_irq_signals(0);
-	proc = arg;
-	return((*proc)(NULL));
-}
-
-static void sleeping_process_signal(int pid, int sig)
-{
-	switch(sig){
-	/* These two result from UML being ^Z-ed and bg-ed.  PTRACE_CONT is
-	 * right because the process must be in the kernel already.
-	 */
-	case SIGCONT:
-	case SIGTSTP:
-		if(ptrace(PTRACE_CONT, pid, 0, sig) < 0)
-			tracer_panic("sleeping_process_signal : Failed to "
-				     "continue pid %d, signal = %d, "
-				     "errno = %d\n", pid, sig, errno);
-		break;
-
-	/* This happens when the debugger (e.g. strace) is doing system call 
-	 * tracing on the kernel.  During a context switch, the current task
-	 * will be set to the incoming process and the outgoing process will
-	 * hop into write and then read.  Since it's not the current process
-	 * any more, the trace of those will land here.  So, we need to just 
-	 * PTRACE_SYSCALL it.
-	 */
-	case (SIGTRAP + 0x80):
-		if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
-			tracer_panic("sleeping_process_signal : Failed to "
-				     "PTRACE_SYSCALL pid %d, errno = %d\n",
-				     pid, errno);
-		break;
-	case SIGSTOP:
-		break;
-	default:
-		tracer_panic("sleeping process %d got unexpected "
-			     "signal : %d\n", pid, sig);
-		break;
-	}
-}
-
-/* Accessed only by the tracing thread */
-int debugger_pid = -1;
-int debugger_parent = -1;
-int debugger_fd = -1;
-int gdb_pid = -1;
-
-struct {
-	int pid;
-	int signal;
-	unsigned long addr;
-	struct timeval time;
-} signal_record[1024][32];
-
-int signal_index[32];
-int nsignals = 0;
-int debug_trace = 0;
-
-extern void signal_usr1(int sig);
-
-int tracing_pid = -1;
-
-int tracer(int (*init_proc)(void *), void *sp)
-{
-	void *task = NULL;
-	int status, pid = 0, sig = 0, cont_type, tracing = 0, op = 0;
-	int proc_id = 0, n, err, old_tracing = 0, strace = 0;
-	int local_using_sysemu = 0;
-
-	signal(SIGPIPE, SIG_IGN);
-	setup_tracer_winch();
-	tracing_pid = os_getpid();
-	printf("tracing thread pid = %d\n", tracing_pid);
-
-	pid = clone(signal_tramp, sp, CLONE_FILES | SIGCHLD, init_proc);
-	CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
-	if(n < 0){
-		printf("waitpid on idle thread failed, errno = %d\n", errno);
-		exit(1);
-	}
-	if (ptrace(PTRACE_OLDSETOPTIONS, pid, 0, (void *)PTRACE_O_TRACESYSGOOD) < 0) {
-		printf("Failed to PTRACE_SETOPTIONS for idle thread, errno = %d\n", errno);
-		exit(1);
-	}
-	if((ptrace(PTRACE_CONT, pid, 0, 0) < 0)){
-		printf("Failed to continue idle thread, errno = %d\n", errno);
-		exit(1);
-	}
-
-	signal(SIGSEGV, (sighandler_t) tracer_segv);
-	signal(SIGUSR1, signal_usr1);
-	if(debug_trace){
-		printf("Tracing thread pausing to be attached\n");
-		stop();
-	}
-	if(debug){
-		if(gdb_pid != -1) 
-			debugger_pid = attach_debugger(pid, gdb_pid, 1);
-		else debugger_pid = init_ptrace_proxy(pid, 1, debug_stop);
-		if(debug_parent){
-			debugger_parent = os_process_parent(debugger_pid);
-			init_parent_proxy(debugger_parent);
-			err = attach(debugger_parent);
-			if(err){
-				printf("Failed to attach debugger parent %d, "
-				       "errno = %d\n", debugger_parent, -err);
-				debugger_parent = -1;
-			}
-			else {
-				if(ptrace(PTRACE_SYSCALL, debugger_parent, 
-					  0, 0) < 0){
-					printf("Failed to continue debugger "
-					       "parent, errno = %d\n", errno);
-					debugger_parent = -1;
-				}
-			}
-		}
-	}
-	set_cmdline("(tracing thread)");
-	while(1){
-		CATCH_EINTR(pid = waitpid(-1, &status, WUNTRACED));
-		if(pid <= 0){
-			if(errno != ECHILD){
-				printf("wait failed - errno = %d\n", errno);
-			}
-			continue;
-		}
-		if(pid == debugger_pid){
-			int cont = 0;
-
-			if(WIFEXITED(status) || WIFSIGNALED(status))
-				debugger_pid = -1;
-			/* XXX Figure out how to deal with gdb and SMP */
-			else cont = debugger_signal(status, cpu_tasks[0].pid);
-			if(cont == PTRACE_SYSCALL) strace = 1;
-			continue;
-		}
-		else if(pid == debugger_parent){
-			debugger_parent_signal(status, pid);
-			continue;
-		}
-		nsignals++;
-		if(WIFEXITED(status)) ;
-#ifdef notdef
-		{
-			printf("Child %d exited with status %d\n", pid, 
-			       WEXITSTATUS(status));
-		}
-#endif
-		else if(WIFSIGNALED(status)){
-			sig = WTERMSIG(status);
-			if(sig != 9){
-				printf("Child %d exited with signal %d\n", pid,
-				       sig);
-			}
-		}
-		else if(WIFSTOPPED(status)){
-			proc_id = pid_to_processor_id(pid);
-			sig = WSTOPSIG(status);
-			if(proc_id == -1){
-				sleeping_process_signal(pid, sig);
-				continue;
-			}
-
-			task = cpu_tasks[proc_id].task;
-			tracing = is_tracing(task);
-			old_tracing = tracing;
-
-			/* Assume: no syscall, when coming from user */
-			if ( tracing )
-				do_sigtrap(task);
-
-			switch(sig){
-			case SIGUSR1:
-				sig = 0;
-				op = do_proc_op(task, proc_id);
-				switch(op){
-				/*
-				 * This is called when entering user mode; after
-				 * this, we start intercepting syscalls.
-				 *
-				 * In fact, a process is started in kernel mode,
-				 * so with is_tracing() == 0 (and that is reset
-				 * when executing syscalls, since UML kernel has
-				 * the right to do syscalls);
-				 */
-				case OP_TRACE_ON:
-					arch_leave_kernel(task, pid);
-					tracing = 1;
-					break;
-				case OP_REBOOT:
-				case OP_HALT:
-					unmap_physmem();
-					kmalloc_ok = 0;
-					os_kill_ptraced_process(pid, 0);
-					/* Now let's reap remaining zombies */
-					errno = 0;
-					do {
-						waitpid(-1, &status,
-							WUNTRACED);
-					} while (errno != ECHILD);
-					return(op == OP_REBOOT);
-				case OP_NONE:
-					printf("Detaching pid %d\n", pid);
-					detach(pid, SIGSTOP);
-					continue;
-				default:
-					break;
-				}
-				/* OP_EXEC switches host processes on us,
-				 * we want to continue the new one.
-				 */
-				pid = cpu_tasks[proc_id].pid;
-				break;
-			case (SIGTRAP + 0x80):
-				if(!tracing && (debugger_pid != -1)){
-					child_signal(pid, status & 0x7fff);
-					continue;
-				}
-				tracing = 0;
-				/* local_using_sysemu has been already set
-				 * below, since if we are here, is_tracing() on
-				 * the traced task was 1, i.e. the process had
-				 * already run through one iteration of the
-				 * loop which executed a OP_TRACE_ON request.*/
-				do_syscall(task, pid, local_using_sysemu);
-				sig = SIGUSR2;
-				break;
-			case SIGTRAP:
-				if(!tracing && (debugger_pid != -1)){
-					child_signal(pid, status);
-					continue;
-				}
-				tracing = 0;
-				break;
-			case SIGPROF:
-				if(tracing) sig = 0;
-				break;
-			case SIGCHLD:
-			case SIGHUP:
-				sig = 0;
-				break;
-			case SIGSEGV:
-			case SIGIO:
-			case SIGALRM:
-			case SIGVTALRM:
-			case SIGFPE:
-			case SIGBUS:
-			case SIGILL:
-			case SIGWINCH:
-
-			default:
-				tracing = 0;
-				break;
-			}
-			set_tracing(task, tracing);
-
-			if(!tracing && old_tracing)
-				arch_enter_kernel(task, pid);
-
-			if(!tracing && (debugger_pid != -1) && (sig != 0) &&
-				(sig != SIGALRM) && (sig != SIGVTALRM) &&
-				(sig != SIGSEGV) && (sig != SIGTRAP) &&
-				(sig != SIGUSR2) && (sig != SIGIO) &&
-				(sig != SIGFPE)){
-				child_signal(pid, status);
-				continue;
-			}
-
-			local_using_sysemu = get_using_sysemu();
-
-			if(tracing)
-				cont_type = SELECT_PTRACE_OPERATION(local_using_sysemu,
-				                                    singlestepping(task));
-			else if((debugger_pid != -1) && strace)
-				cont_type = PTRACE_SYSCALL;
-			else
-				cont_type = PTRACE_CONT;
-
-			if(ptrace(cont_type, pid, 0, sig) != 0){
-				tracer_panic("ptrace failed to continue "
-					     "process - errno = %d\n", 
-					     errno);
-			}
-		}
-	}
-	return(0);
-}
-
-static int __init uml_debug_setup(char *line, int *add)
-{
-	char *next;
-
-	debug = 1;
-	*add = 0;
-	if(*line != '=') return(0);
-	line++;
-
-	while(line != NULL){
-		next = strchr(line, ',');
-		if(next) *next++ = '\0';
-		
-		if(!strcmp(line, "go"))	debug_stop = 0;
-		else if(!strcmp(line, "parent")) debug_parent = 1;
-		else printf("Unknown debug option : '%s'\n", line);
-
-		line = next;
-	}
-	return(0);
-}
-
-__uml_setup("debug", uml_debug_setup,
-"debug\n"
-"    Starts up the kernel under the control of gdb. See the \n"
-"    kernel debugging tutorial and the debugging session pages\n"
-"    at http://user-mode-linux.sourceforge.net/ for more information.\n\n"
-);
-
-static int __init uml_debugtrace_setup(char *line, int *add)
-{
-	debug_trace = 1;
-	return 0;
-}
-__uml_setup("debugtrace", uml_debugtrace_setup,
-"debugtrace\n"
-"    Causes the tracing thread to pause until it is attached by a\n"
-"    debugger and continued.  This is mostly for debugging crashes\n"
-"    early during boot, and should be pretty much obsoleted by\n"
-"    the debug switch.\n\n"
-);
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/trap_user.c b/arch/um/kernel/tt/trap_user.c
deleted file mode 100644
index 3032eb5e246..00000000000
--- a/arch/um/kernel/tt/trap_user.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdlib.h>
-#include <errno.h>
-#include <signal.h>
-#include "sysdep/ptrace.h"
-#include "sysdep/sigcontext.h"
-#include "kern_util.h"
-#include "task.h"
-#include "tt.h"
-#include "os.h"
-
-void sig_handler_common_tt(int sig, void *sc_ptr)
-{
-	struct sigcontext *sc = sc_ptr;
-	struct tt_regs save_regs, *r;
-	int save_errno = errno, is_user = 0;
-	void (*handler)(int, union uml_pt_regs *);
-
-	/* This is done because to allow SIGSEGV to be delivered inside a SEGV
-	 * handler.  This can happen in copy_user, and if SEGV is disabled,
-	 * the process will die.
-	 */
-	if(sig == SIGSEGV)
-		change_sig(SIGSEGV, 1);
-
-	r = &TASK_REGS(get_current())->tt;
-        if ( sig == SIGFPE || sig == SIGSEGV ||
-             sig == SIGBUS || sig == SIGILL ||
-             sig == SIGTRAP ) {
-                GET_FAULTINFO_FROM_SC(r->faultinfo, sc);
-        }
-	save_regs = *r;
-	if (sc)
-		is_user = user_context(SC_SP(sc));
-	r->sc = sc;
-	if(sig != SIGUSR2) 
-		r->syscall = -1;
-
-	handler = sig_info[sig];
-
-	/* unblock SIGALRM, SIGVTALRM, SIGIO if sig isn't IRQ signal */
-	if (sig != SIGIO && sig != SIGWINCH &&
-	    sig != SIGVTALRM && sig != SIGALRM)
-		unblock_signals();
-
-	handler(sig, (union uml_pt_regs *) r);
-
-	if(is_user){
-		interrupt_end();
-		block_signals();
-		set_user_mode(NULL);
-	}
-	*r = save_regs;
-	errno = save_errno;
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/uaccess.c b/arch/um/kernel/tt/uaccess.c
deleted file mode 100644
index 1cb60726567..00000000000
--- a/arch/um/kernel/tt/uaccess.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
- * Licensed under the GPL
- */
-
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-
-int copy_from_user_tt(void *to, const void __user *from, int n)
-{
-	if(!access_ok(VERIFY_READ, from, n))
-		return(n);
-
-	return(__do_copy_from_user(to, from, n, &current->thread.fault_addr,
-				   &current->thread.fault_catcher));
-}
-
-int copy_to_user_tt(void __user *to, const void *from, int n)
-{
-	if(!access_ok(VERIFY_WRITE, to, n))
-		return(n);
-
-	return(__do_copy_to_user(to, from, n, &current->thread.fault_addr,
-				 &current->thread.fault_catcher));
-}
-
-int strncpy_from_user_tt(char *dst, const char __user *src, int count)
-{
-	int n;
-
-	if(!access_ok(VERIFY_READ, src, 1))
-		return(-EFAULT);
-
-	n = __do_strncpy_from_user(dst, src, count,
-				   &current->thread.fault_addr,
-				   &current->thread.fault_catcher);
-	if(n < 0) return(-EFAULT);
-	return(n);
-}
-
-int __clear_user_tt(void __user *mem, int len)
-{
-	return(__do_clear_user(mem, len,
-			       &current->thread.fault_addr,
-			       &current->thread.fault_catcher));
-}
-
-int clear_user_tt(void __user *mem, int len)
-{
-	if(!access_ok(VERIFY_WRITE, mem, len))
-		return(len);
-
-	return(__do_clear_user(mem, len, &current->thread.fault_addr,
-			       &current->thread.fault_catcher));
-}
-
-int strnlen_user_tt(const void __user *str, int len)
-{
-	return(__do_strnlen_user(str, len,
-				 &current->thread.fault_addr,
-				 &current->thread.fault_catcher));
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/tt/uaccess_user.c b/arch/um/kernel/tt/uaccess_user.c
deleted file mode 100644
index 0e5c82c5e5b..00000000000
--- a/arch/um/kernel/tt/uaccess_user.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/* 
- * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk)
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <string.h>
-#include "uml_uaccess.h"
-#include "task.h"
-#include "kern_util.h"
-#include "os.h"
-#include "longjmp.h"
-
-int __do_copy_from_user(void *to, const void *from, int n,
-			void **fault_addr, void **fault_catcher)
-{
-	struct tt_regs save = TASK_REGS(get_current())->tt;
-	unsigned long fault;
-	int faulted;
-
-	fault = __do_user_copy(to, from, n, fault_addr, fault_catcher,
-			       __do_copy, &faulted);
-	TASK_REGS(get_current())->tt = save;
-
-	if(!faulted)
-		return 0;
-	else if (fault)
-		return n - (fault - (unsigned long) from);
-	else
-		/* In case of a general protection fault, we don't have the
-		 * fault address, so NULL is used instead. Pretend we didn't
-		 * copy anything. */
-		return n;
-}
-
-static void __do_strncpy(void *dst, const void *src, int count)
-{
-	strncpy(dst, src, count);
-}	
-
-int __do_strncpy_from_user(char *dst, const char *src, unsigned long count,
-			   void **fault_addr, void **fault_catcher)
-{
-	struct tt_regs save = TASK_REGS(get_current())->tt;
-	unsigned long fault;
-	int faulted;
-
-	fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher,
-			       __do_strncpy, &faulted);
-	TASK_REGS(get_current())->tt = save;
-
-	if(!faulted) return(strlen(dst));
-	else return(-1);
-}
-
-static void __do_clear(void *to, const void *from, int n)
-{
-	memset(to, 0, n);
-}	
-
-int __do_clear_user(void *mem, unsigned long len,
-		    void **fault_addr, void **fault_catcher)
-{
-	struct tt_regs save = TASK_REGS(get_current())->tt;
-	unsigned long fault;
-	int faulted;
-
-	fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher,
-			       __do_clear, &faulted);
-	TASK_REGS(get_current())->tt = save;
-
-	if(!faulted) return(0);
-	else return(len - (fault - (unsigned long) mem));
-}
-
-int __do_strnlen_user(const char *str, unsigned long n,
-		      void **fault_addr, void **fault_catcher)
-{
-	struct tt_regs save = TASK_REGS(get_current())->tt;
-	int ret;
-	unsigned long *faddrp = (unsigned long *)fault_addr;
-	jmp_buf jbuf;
-
-	*fault_catcher = &jbuf;
-	if(UML_SETJMP(&jbuf) == 0)
-		ret = strlen(str) + 1;
-	else ret = *faddrp - (unsigned long) str;
-
-	*fault_addr = NULL;
-	*fault_catcher = NULL;
-
-	TASK_REGS(get_current())->tt = save;
-	return ret;
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/kernel/uaccess.c b/arch/um/kernel/uaccess.c
index 054e3de0784..d7436aacd26 100644
--- a/arch/um/kernel/uaccess.c
+++ b/arch/um/kernel/uaccess.c
@@ -18,7 +18,7 @@ void __do_copy(void *to, const void *from, int n)
 
 
 int __do_copy_to_user(void *to, const void *from, int n,
-		      void **fault_addr, void **fault_catcher)
+		      void **fault_addr, jmp_buf **fault_catcher)
 {
 	unsigned long fault;
 	int faulted;
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index ecc458fe51b..f1c71393f57 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -1,46 +1,24 @@
 /*
- * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/sched.h"
-#include "linux/notifier.h"
-#include "linux/mm.h"
-#include "linux/types.h"
-#include "linux/tty.h"
-#include "linux/init.h"
-#include "linux/bootmem.h"
-#include "linux/spinlock.h"
-#include "linux/utsname.h"
-#include "linux/sysrq.h"
-#include "linux/seq_file.h"
 #include "linux/delay.h"
+#include "linux/mm.h"
 #include "linux/module.h"
+#include "linux/seq_file.h"
+#include "linux/string.h"
 #include "linux/utsname.h"
-#include "asm/page.h"
 #include "asm/pgtable.h"
-#include "asm/ptrace.h"
-#include "asm/elf.h"
-#include "asm/user.h"
+#include "asm/processor.h"
 #include "asm/setup.h"
-#include "ubd_user.h"
-#include "asm/current.h"
-#include "kern_util.h"
-#include "as-layout.h"
 #include "arch.h"
+#include "as-layout.h"
+#include "init.h"
 #include "kern.h"
 #include "mem_user.h"
-#include "mem.h"
-#include "initrd.h"
-#include "init.h"
 #include "os.h"
-#include "choose-mode.h"
-#include "mode_kern.h"
-#include "mode.h"
-#ifdef UML_CONFIG_MODE_SKAS
 #include "skas.h"
-#endif
 
 #define DEFAULT_COMMAND_LINE "root=98:0"
 
@@ -53,7 +31,7 @@ static void __init add_arg(char *arg)
 		printf("add_arg: Too many command line arguments!\n");
 		exit(1);
 	}
-	if(strlen(command_line) > 0)
+	if (strlen(command_line) > 0)
 		strcat(command_line, " ");
 	strcat(command_line, arg);
 }
@@ -70,8 +48,8 @@ struct cpuinfo_um boot_cpu_data = {
 
 unsigned long thread_saved_pc(struct task_struct *task)
 {
-	return os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas,
-					      task));
+	/* FIXME: Need to look up userspace_pid by cpu */
+	return os_process_pc(userspace_pid[0]);
 }
 
 /* Changed in setup_arch, which is called in early boot */
@@ -90,7 +68,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	seq_printf(m, "processor\t: %d\n", index);
 	seq_printf(m, "vendor_id\t: User Mode Linux\n");
 	seq_printf(m, "model name\t: UML\n");
-	seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas"));
+	seq_printf(m, "mode\t\t: skas\n");
 	seq_printf(m, "host\t\t: %s\n", host_info);
 	seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
 		   loops_per_jiffy/(500000/HZ),
@@ -132,44 +110,13 @@ unsigned long end_vm;
 /* Set in uml_ncpus_setup */
 int ncpus = 1;
 
-#ifdef CONFIG_CMDLINE_ON_HOST
-/* Pointer set in linux_main, the array itself is private to each thread,
- * and changed at address space creation time so this poses no concurrency
- * problems.
- */
-static char *argv1_begin = NULL;
-static char *argv1_end = NULL;
-#endif
-
 /* Set in early boot */
 static int have_root __initdata = 0;
 
 /* Set in uml_mem_setup and modified in linux_main */
 long long physmem_size = 32 * 1024 * 1024;
 
-void set_cmdline(char *cmd)
-{
-#ifdef CONFIG_CMDLINE_ON_HOST
-	char *umid, *ptr;
-
-	if(CHOOSE_MODE(honeypot, 0)) return;
-
-	umid = get_umid();
-	if(*umid != '\0'){
-		snprintf(argv1_begin, 
-			 (argv1_end - argv1_begin) * sizeof(*ptr), 
-			 "(%s) ", umid);
-		ptr = &argv1_begin[strlen(argv1_begin)];
-	}
-	else ptr = argv1_begin;
-
-	snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd);
-	memset(argv1_begin + strlen(argv1_begin), '\0', 
-	       argv1_end - argv1_begin - strlen(argv1_begin));
-#endif
-}
-
-static char *usage_string = 
+static char *usage_string =
 "User Mode Linux v%s\n"
 "	available at http://user-mode-linux.sourceforge.net/\n\n";
 
@@ -201,13 +148,10 @@ __uml_setup("root=", uml_root_setup,
 "        root=/dev/ubd5\n\n"
 );
 
-#ifndef CONFIG_MODE_TT
-
 static int __init no_skas_debug_setup(char *line, int *add)
 {
 	printf("'debug' is not necessary to gdb UML in skas mode - run \n");
-	printf("'gdb linux' and disable CONFIG_CMDLINE_ON_HOST if gdb \n");
-	printf("doesn't work as expected\n");
+	printf("'gdb linux'");
 
 	return 0;
 }
@@ -217,8 +161,6 @@ __uml_setup("debug", no_skas_debug_setup,
 "    this flag is not needed to run gdb on UML in skas mode\n\n"
 );
 
-#endif
-
 #ifdef CONFIG_SMP
 static int __init uml_ncpus_setup(char *line, int *add)
 {
@@ -232,56 +174,10 @@ static int __init uml_ncpus_setup(char *line, int *add)
 
 __uml_setup("ncpus=", uml_ncpus_setup,
 "ncpus=<# of desired CPUs>\n"
-"    This tells an SMP kernel how many virtual processors to start.\n\n" 
+"    This tells an SMP kernel how many virtual processors to start.\n\n"
 );
 #endif
 
-static int force_tt = 0;
-
-#if defined(CONFIG_MODE_TT) && defined(CONFIG_MODE_SKAS)
-#define DEFAULT_TT 0
-
-static int __init mode_tt_setup(char *line, int *add)
-{
-	force_tt = 1;
-	return 0;
-}
-
-#else
-#ifdef CONFIG_MODE_SKAS
-
-#define DEFAULT_TT 0
-
-static int __init mode_tt_setup(char *line, int *add)
-{
-	printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n");
-	return 0;
-}
-
-#else
-#ifdef CONFIG_MODE_TT
-
-#define DEFAULT_TT 1
-
-static int __init mode_tt_setup(char *line, int *add)
-{
-	printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n");
-	return 0;
-}
-
-#endif
-#endif
-#endif
-
-__uml_setup("mode=tt", mode_tt_setup,
-"mode=tt\n"
-"    When both CONFIG_MODE_TT and CONFIG_MODE_SKAS are enabled, this option\n"
-"    forces UML to run in tt (tracing thread) mode.  It is not the default\n"
-"    because it's slower and less secure than skas mode.\n\n"
-);
-
-int mode_tt = DEFAULT_TT;
-
 static int __init Usage(char *line, int *add)
 {
 	const char **p;
@@ -310,9 +206,8 @@ static int __init uml_checksetup(char *line, int *add)
 		int n;
 
 		n = strlen(p->str);
-		if(!strncmp(line, p->str, n)){
-			if (p->setup_func(line + n, add)) return 1;
-		}
+		if (!strncmp(line, p->str, n) && p->setup_func(line + n, add))
+			return 1;
 		p++;
 	}
 	return 0;
@@ -323,7 +218,7 @@ static void __init uml_postsetup(void)
 	initcall_t *p;
 
 	p = &__uml_postsetup_start;
-	while(p < &__uml_postsetup_end){
+	while(p < &__uml_postsetup_end) {
 		(*p)();
 		p++;
 	}
@@ -339,6 +234,20 @@ EXPORT_SYMBOL(end_iomem);
 
 extern char __binary_start;
 
+static unsigned long set_task_sizes_skas(unsigned long *task_size_out)
+{
+	/* Round up to the nearest 4M */
+	unsigned long host_task_size = ROUND_4M((unsigned long)
+						&host_task_size);
+
+	if (!skas_needs_stub)
+		*task_size_out = host_task_size;
+	else
+		*task_size_out = STUB_START & PGDIR_MASK;
+
+	return host_task_size;
+}
+
 int __init linux_main(int argc, char **argv)
 {
 	unsigned long avail, diff;
@@ -346,45 +255,30 @@ int __init linux_main(int argc, char **argv)
 	unsigned int i, add;
 	char * mode;
 
-	for (i = 1; i < argc; i++){
-		if((i == 1) && (argv[i][0] == ' ')) continue;
+	for (i = 1; i < argc; i++) {
+		if ((i == 1) && (argv[i][0] == ' '))
+			continue;
 		add = 1;
 		uml_checksetup(argv[i], &add);
 		if (add)
 			add_arg(argv[i]);
 	}
-	if(have_root == 0)
+	if (have_root == 0)
 		add_arg(DEFAULT_COMMAND_LINE);
 
+	/* OS sanity checks that need to happen before the kernel runs */
 	os_early_checks();
-	if (force_tt)
-		clear_can_do_skas();
-	mode_tt = force_tt ? 1 : !can_do_skas();
-#ifndef CONFIG_MODE_TT
-	if (mode_tt) {
-		/*Since CONFIG_MODE_TT is #undef'ed, force_tt cannot be 1. So,
-		 * can_do_skas() returned 0, and the message is correct. */
-		printf("Support for TT mode is disabled, and no SKAS support is present on the host.\n");
-		exit(1);
-	}
-#endif
 
-#ifndef CONFIG_MODE_SKAS
-	mode = "TT";
-#else
-	/* Show to the user the result of selection */
-	if (mode_tt)
-		mode = "TT";
-	else if (proc_mm && ptrace_faultinfo)
+	can_do_skas();
+
+	if (proc_mm && ptrace_faultinfo)
 		mode = "SKAS3";
 	else
 		mode = "SKAS0";
-#endif
 
 	printf("UML running in %s mode\n", mode);
 
-	host_task_size = CHOOSE_MODE_PROC(set_task_sizes_tt,
-					  set_task_sizes_skas, &task_size);
+	host_task_size = set_task_sizes_skas(&task_size);
 
 	/*
 	 * Setting up handlers to 'sig_info' struct
@@ -392,13 +286,15 @@ int __init linux_main(int argc, char **argv)
 	os_fill_handlinfo(handlinfo_kern);
 
 	brk_start = (unsigned long) sbrk(0);
-	CHOOSE_MODE_PROC(before_mem_tt, before_mem_skas, brk_start);
-	/* Increase physical memory size for exec-shield users
-	so they actually get what they asked for. This should
-	add zero for non-exec shield users */
+
+	/*
+	 * Increase physical memory size for exec-shield users
+	 * so they actually get what they asked for. This should
+	 * add zero for non-exec shield users
+	 */
 
 	diff = UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
-	if(diff > 1024 * 1024){
+	if (diff > 1024 * 1024) {
 		printf("Adding %ld bytes to physical memory to account for "
 		       "exec-shield gap\n", diff);
 		physmem_size += UML_ROUND_UP(brk_start) - UML_ROUND_UP(&_end);
@@ -411,20 +307,16 @@ int __init linux_main(int argc, char **argv)
 
 	setup_machinename(init_utsname()->machine);
 
-#ifdef CONFIG_CMDLINE_ON_HOST
-	argv1_begin = argv[1];
-	argv1_end = &argv[1][strlen(argv[1])];
-#endif
-
 	highmem = 0;
 	iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
 	max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC;
 
-	/* Zones have to begin on a 1 << MAX_ORDER page boundary,
+	/*
+	 * Zones have to begin on a 1 << MAX_ORDER page boundary,
 	 * so this makes sure that's true for highmem
 	 */
 	max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1);
-	if(physmem_size + iomem_size > max_physmem){
+	if (physmem_size + iomem_size > max_physmem) {
 		highmem = physmem_size + iomem_size - max_physmem;
 		physmem_size -= highmem;
 #ifndef CONFIG_HIGHMEM
@@ -441,7 +333,7 @@ int __init linux_main(int argc, char **argv)
 	start_vm = VMALLOC_START;
 
 	setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
-	if(init_maps(physmem_size, iomem_size, highmem)){
+	if (init_maps(physmem_size, iomem_size, highmem)) {
 		printf("Failed to allocate mem_map for %Lu bytes of physical "
 		       "memory and %Lu bytes of highmem\n", physmem_size,
 		       highmem);
@@ -450,10 +342,11 @@ int __init linux_main(int argc, char **argv)
 
 	virtmem_size = physmem_size;
 	avail = get_kmem_end() - start_vm;
-	if(physmem_size > avail) virtmem_size = avail;
+	if (physmem_size > avail)
+		virtmem_size = avail;
 	end_vm = start_vm + virtmem_size;
 
-	if(virtmem_size < physmem_size)
+	if (virtmem_size < physmem_size)
 		printf("Kernel virtual memory size shrunk to %lu bytes\n",
 		       virtmem_size);
 
@@ -462,7 +355,7 @@ int __init linux_main(int argc, char **argv)
 	stack_protections((unsigned long) &init_thread_info);
 	os_flush_stdout();
 
-	return CHOOSE_MODE(start_uml_tt(), start_uml_skas());
+	return start_uml();
 }
 
 extern int uml_exitcode;
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 81acdc24348..13df191e2b4 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -18,13 +18,6 @@ SECTIONS
 
   . = START + SIZEOF_HEADERS;
 
-#ifdef MODE_TT
-  .remap_data : { UNMAP_PATH (.data .bss) }
-  .remap : { UNMAP_PATH (.text) }
-
-  . = ALIGN(4096);		/* Init code and data */
-#endif
-
   _text = .;
   _stext = .;
   __init_begin = .;
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 2f8c7946401..8e129af8170 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -1,23 +1,18 @@
 # 
-# Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 # Licensed under the GPL
 #
 
 obj-y = aio.o elf_aux.o execvp.o file.o helper.o irq.o main.o mem.o process.o \
-	sigio.o signal.o start_up.o time.o trap.o tty.o uaccess.o umid.o tls.o \
-	user_syms.o util.o drivers/ sys-$(SUBARCH)/
-
-obj-$(CONFIG_MODE_SKAS) += skas/
-
-obj-$(CONFIG_MODE_TT) += tt.o
-user-objs-$(CONFIG_MODE_TT) += tt.o
+	registers.o sigio.o signal.o start_up.o time.o trap.o tty.o uaccess.o \
+	umid.o tls.o user_syms.o util.o drivers/ sys-$(SUBARCH)/ skas/
 
 obj-$(CONFIG_TTY_LOG) += tty_log.o
 user-objs-$(CONFIG_TTY_LOG) += tty_log.o
 
 USER_OBJS := $(user-objs-y) aio.o elf_aux.o execvp.o file.o helper.o irq.o \
-	main.o mem.o process.o sigio.o signal.o start_up.o time.o trap.o tty.o \
-	tls.o uaccess.o umid.o util.o
+	main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
+	trap.o tty.o tls.o uaccess.o umid.o util.o
 
 CFLAGS_user_syms.o += -DSUBARCH_$(SUBARCH)
 
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index 59348359f9a..4158118c4a5 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -1,20 +1,19 @@
 /*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stdlib.h>
 #include <unistd.h>
+#include <sched.h>
 #include <signal.h>
 #include <errno.h>
-#include <sched.h>
-#include <sys/syscall.h>
-#include "os.h"
+#include <sys/time.h>
+#include <asm/unistd.h>
 #include "aio.h"
 #include "init.h"
-#include "user.h"
-#include "mode.h"
 #include "kern_constants.h"
+#include "os.h"
+#include "user.h"
 
 struct aio_thread_req {
 	enum aio_type type;
@@ -28,7 +27,8 @@ struct aio_thread_req {
 #if defined(HAVE_AIO_ABI)
 #include <linux/aio_abi.h>
 
-/* If we have the headers, we are going to build with AIO enabled.
+/*
+ * If we have the headers, we are going to build with AIO enabled.
  * If we don't have aio in libc, we define the necessary stubs here.
  */
 
@@ -52,7 +52,8 @@ static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
 
 #endif
 
-/* The AIO_MMAP cases force the mmapped page into memory here
+/*
+ * The AIO_MMAP cases force the mmapped page into memory here
  * rather than in whatever place first touches the data.  I used
  * to do this by touching the page, but that's delicate because
  * gcc is prone to optimizing that away.  So, what's done here
@@ -106,12 +107,12 @@ static int aio_thread(void *arg)
 
 	signal(SIGWINCH, SIG_IGN);
 
-	while(1){
+	while (1) {
 		n = io_getevents(ctx, 1, 1, &event, NULL);
-		if(n < 0){
-			if(errno == EINTR)
+		if (n < 0) {
+			if (errno == EINTR)
 				continue;
-			printk("aio_thread - io_getevents failed, "
+			printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
 			       "errno = %d\n", errno);
 		}
 		else {
@@ -120,9 +121,9 @@ static int aio_thread(void *arg)
 						.err	= event.res });
 			reply_fd = ((struct aio_context *) reply.data)->reply_fd;
 			err = write(reply_fd, &reply, sizeof(reply));
-			if(err != sizeof(reply))
-				printk("aio_thread - write failed, fd = %d, "
-				       "err = %d\n", reply_fd, errno);
+			if (err != sizeof(reply))
+				printk(UM_KERN_ERR "aio_thread - write failed, "
+				       "fd = %d, err = %d\n", reply_fd, errno);
 		}
 	}
 	return 0;
@@ -137,10 +138,10 @@ static int do_not_aio(struct aio_thread_req *req)
 	int n;
 
 	actual = lseek64(req->io_fd, req->offset, SEEK_SET);
-	if(actual != req->offset)
+	if (actual != req->offset)
 		return -errno;
 
-	switch(req->type){
+	switch(req->type) {
 	case AIO_READ:
 		n = read(req->io_fd, req->buf, req->len);
 		break;
@@ -151,11 +152,12 @@ static int do_not_aio(struct aio_thread_req *req)
 		n = read(req->io_fd, &c, sizeof(c));
 		break;
 	default:
-		printk("do_not_aio - bad request type : %d\n", req->type);
+		printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n",
+		       req->type);
 		return -EINVAL;
 	}
 
-	if(n < 0)
+	if (n < 0)
 		return -errno;
 	return 0;
 }
@@ -173,16 +175,18 @@ static int not_aio_thread(void *arg)
 	int err;
 
 	signal(SIGWINCH, SIG_IGN);
-	while(1){
+	while (1) {
 		err = read(aio_req_fd_r, &req, sizeof(req));
-		if(err != sizeof(req)){
-			if(err < 0)
-				printk("not_aio_thread - read failed, "
-				       "fd = %d, err = %d\n", aio_req_fd_r,
+		if (err != sizeof(req)) {
+			if (err < 0)
+				printk(UM_KERN_ERR "not_aio_thread - "
+				       "read failed, fd = %d, err = %d\n",
+				       aio_req_fd_r,
 				       errno);
 			else {
-				printk("not_aio_thread - short read, fd = %d, "
-				       "length = %d\n", aio_req_fd_r, err);
+				printk(UM_KERN_ERR "not_aio_thread - short "
+				       "read, fd = %d, length = %d\n",
+				       aio_req_fd_r, err);
 			}
 			continue;
 		}
@@ -190,9 +194,9 @@ static int not_aio_thread(void *arg)
 		reply = ((struct aio_thread_reply) { .data 	= req.aio,
 						     .err	= err });
 		err = write(req.aio->reply_fd, &reply, sizeof(reply));
-		if(err != sizeof(reply))
-			printk("not_aio_thread - write failed, fd = %d, "
-			       "err = %d\n", req.aio->reply_fd, errno);
+		if (err != sizeof(reply))
+			printk(UM_KERN_ERR "not_aio_thread - write failed, "
+			       "fd = %d, err = %d\n", req.aio->reply_fd, errno);
 	}
 
 	return 0;
@@ -203,35 +207,36 @@ static int init_aio_24(void)
 	int fds[2], err;
 
 	err = os_pipe(fds, 1, 1);
-	if(err)
+	if (err)
 		goto out;
 
 	aio_req_fd_w = fds[0];
 	aio_req_fd_r = fds[1];
 
 	err = os_set_fd_block(aio_req_fd_w, 0);
-	if(err)
+	if (err)
 		goto out_close_pipe;
 
 	err = run_helper_thread(not_aio_thread, NULL,
 				CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
-	if(err < 0)
+	if (err < 0)
 		goto out_close_pipe;
 
 	aio_pid = err;
 	goto out;
 
 out_close_pipe:
-	os_close_file(fds[0]);
-	os_close_file(fds[1]);
+	close(fds[0]);
+	close(fds[1]);
 	aio_req_fd_w = -1;
 	aio_req_fd_r = -1;
 out:
 #ifndef HAVE_AIO_ABI
-	printk("/usr/include/linux/aio_abi.h not present during build\n");
+	printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during "
+	       "build\n");
 #endif
-	printk("2.6 host AIO support not used - falling back to I/O "
-	       "thread\n");
+	printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to "
+	       "I/O thread\n");
 	return 0;
 }
 
@@ -241,21 +246,21 @@ static int init_aio_26(void)
 {
 	int err;
 
-	if(io_setup(256, &ctx)){
+	if (io_setup(256, &ctx)) {
 		err = -errno;
-		printk("aio_thread failed to initialize context, err = %d\n",
-		       errno);
+		printk(UM_KERN_ERR "aio_thread failed to initialize context, "
+		       "err = %d\n", errno);
 		return err;
 	}
 
 	err = run_helper_thread(aio_thread, NULL,
 				CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
-	if(err < 0)
+	if (err < 0)
 		return err;
 
 	aio_pid = err;
 
-	printk("Using 2.6 host AIO\n");
+	printk(UM_KERN_INFO "Using 2.6 host AIO\n");
 	return 0;
 }
 
@@ -266,13 +271,13 @@ static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
 	int err;
 
 	err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
-	if(err){
+	if (err) {
 		reply = ((struct aio_thread_reply) { .data = aio,
 					 .err  = err });
 		err = write(aio->reply_fd, &reply, sizeof(reply));
-		if(err != sizeof(reply)){
+		if (err != sizeof(reply)) {
 			err = -errno;
-			printk("submit_aio_26 - write failed, "
+			printk(UM_KERN_ERR "submit_aio_26 - write failed, "
 			       "fd = %d, err = %d\n", aio->reply_fd, -err);
 		}
 		else err = 0;
@@ -320,28 +325,24 @@ static int init_aio(void)
 {
 	int err;
 
-	CHOOSE_MODE(({ if(!aio_24){
-			    printk("Disabling 2.6 AIO in tt mode\n");
-			    aio_24 = 1;
-		    } }), (void) 0);
-
-	if(!aio_24){
+	if (!aio_24) {
 		err = init_aio_26();
-		if(err && (errno == ENOSYS)){
-			printk("2.6 AIO not supported on the host - "
-			       "reverting to 2.4 AIO\n");
+		if (err && (errno == ENOSYS)) {
+			printk(UM_KERN_INFO "2.6 AIO not supported on the "
+			       "host - reverting to 2.4 AIO\n");
 			aio_24 = 1;
 		}
 		else return err;
 	}
 
-	if(aio_24)
+	if (aio_24)
 		return init_aio_24();
 
 	return 0;
 }
 
-/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
+/*
+ * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
  * needs to be called when the kernel is running because it calls run_helper,
  * which needs get_free_page.  exit_aio is a __uml_exitcall because the generic
  * kernel does not run __exitcalls on shutdown, and can't because many of them
@@ -372,7 +373,7 @@ static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
 	int err;
 
 	err = write(aio_req_fd_w, &req, sizeof(req));
-	if(err == sizeof(req))
+	if (err == sizeof(req))
 		err = 0;
 	else err = -errno;
 
@@ -384,9 +385,8 @@ int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
 	       struct aio_context *aio)
 {
 	aio->reply_fd = reply_fd;
-	if(aio_24)
+	if (aio_24)
 		return submit_aio_24(type, io_fd, buf, len, offset, aio);
-	else {
+	else
 		return submit_aio_26(type, io_fd, buf, len, offset, aio);
-	}
 }
diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h
index 57ecdaf2f67..ddffd41c3f3 100644
--- a/arch/um/os-Linux/drivers/etap.h
+++ b/arch/um/os-Linux/drivers/etap.h
@@ -1,8 +1,11 @@
 /* 
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
+#ifndef __DRIVERS_ETAP_H
+#define __DRIVERS_ETAP_H
+
 #include "net_user.h"
 
 struct ethertap_data {
@@ -15,13 +18,4 @@ struct ethertap_data {
 
 extern const struct net_user_info ethertap_user_info;
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+#endif
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
index 12689141414..04f11b9f1ac 100644
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ b/arch/um/os-Linux/drivers/ethertap_kern.c
@@ -1,16 +1,15 @@
 /*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and 
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 by various other people who didn't put their name here.
  * Licensed under the GPL.
  */
 
 #include "linux/init.h"
-#include "linux/netdevice.h"
-#include "linux/etherdevice.h"
-#include "net_kern.h"
-#include "net_user.h"
+#include <linux/netdevice.h>
 #include "etap.h"
+#include "net_kern.h"
 
 struct ethertap_init {
 	char *dev_name;
@@ -37,32 +36,24 @@ static void etap_init(struct net_device *dev, void *data)
 	printk("\n");
 }
 
-static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp)
+static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
 	int len;
 
-	*skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP);
-	if(*skb == NULL) return(-ENOMEM);
-	len = net_recvfrom(fd, skb_mac_header(*skb),
-			   (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP);
-	if(len <= 0) return(len);
-	skb_pull(*skb, 2);
+	len = net_recvfrom(fd, skb_mac_header(skb),
+			   skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP);
+	if (len <= 0)
+		return(len);
+
+	skb_pull(skb, 2);
 	len -= 2;
-	return(len);
+	return len;
 }
 
-static int etap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp)
+static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	if(skb_headroom(*skb) < 2){
-	  	struct sk_buff *skb2;
-
-		skb2 = skb_realloc_headroom(*skb, 2);
-		dev_kfree_skb(*skb);
-		if (skb2 == NULL) return(-ENOMEM);
-		*skb = skb2;
-	}
-	skb_push(*skb, 2);
-	return(net_send(fd, (*skb)->data, (*skb)->len));
+	skb_push(skb, 2);
+	return net_send(fd, skb->data, skb->len);
 }
 
 const struct net_kern_info ethertap_kern_info = {
@@ -79,15 +70,15 @@ int ethertap_setup(char *str, char **mac_out, void *data)
 	*init = ((struct ethertap_init)
 		{ .dev_name 	= NULL,
 		  .gate_addr 	= NULL });
-	if(tap_setup_common(str, "ethertap", &init->dev_name, mac_out,
+	if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out,
 			    &init->gate_addr))
-		return(0);
-	if(init->dev_name == NULL){
-		printk("ethertap_setup : Missing tap device name\n");
-		return(0);
+		return 0;
+	if (init->dev_name == NULL) {
+		printk(KERN_ERR "ethertap_setup : Missing tap device name\n");
+		return 0;
 	}
 
-	return(1);
+	return 1;
 }
 
 static struct transport ethertap_transport = {
@@ -97,6 +88,7 @@ static struct transport ethertap_transport = {
 	.user 		= &ethertap_user_info,
 	.kern 		= &ethertap_kern_info,
 	.private_size 	= sizeof(struct ethertap_data),
+	.setup_size 	= sizeof(struct ethertap_init),
 };
 
 static int register_ethertap(void)
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
index 61d3953c7ac..4ff55360344 100644
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ b/arch/um/os-Linux/drivers/ethertap_user.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
  * James Leu (jleu@mindspring.net).
  * Copyright (C) 2001 by various other people who didn't put their name here.
@@ -7,20 +8,16 @@
 
 #include <stdio.h>
 #include <unistd.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <sys/errno.h>
+#include <errno.h>
+#include <string.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
-#include <sys/un.h>
-#include <net/if.h>
-#include "user.h"
-#include "kern_util.h"
-#include "net_user.h"
 #include "etap.h"
+#include "kern_constants.h"
 #include "os.h"
+#include "net_user.h"
 #include "um_malloc.h"
-#include "kern_constants.h"
+#include "user.h"
 
 #define MAX_PACKET ETH_MAX_PACKET
 
@@ -49,16 +46,18 @@ static void etap_change(int op, unsigned char *addr, unsigned char *netmask,
 	memcpy(change.addr, addr, sizeof(change.addr));
 	memcpy(change.netmask, netmask, sizeof(change.netmask));
 	CATCH_EINTR(n = write(fd, &change, sizeof(change)));
-	if(n != sizeof(change)){
-		printk("etap_change - request failed, err = %d\n", errno);
+	if (n != sizeof(change)) {
+		printk(UM_KERN_ERR "etap_change - request failed, err = %d\n",
+		       errno);
 		return;
 	}
 
 	output = kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL);
-	if(output == NULL)
-		printk("etap_change : Failed to allocate output buffer\n");
+	if (output == NULL)
+		printk(UM_KERN_ERR "etap_change : Failed to allocate output "
+		       "buffer\n");
 	read_output(fd, output, UM_KERN_PAGE_SIZE);
-	if(output != NULL){
+	if (output != NULL) {
 		printk("%s", output);
 		kfree(output);
 	}
@@ -87,11 +86,11 @@ static void etap_pre_exec(void *arg)
 	struct etap_pre_exec_data *data = arg;
 
 	dup2(data->control_remote, 1);
-	os_close_file(data->data_me);
-	os_close_file(data->control_me);
+	close(data->data_me);
+	close(data->control_me);
 }
 
-static int etap_tramp(char *dev, char *gate, int control_me, 
+static int etap_tramp(char *dev, char *gate, int control_me,
 		      int control_remote, int data_me, int data_remote)
 {
 	struct etap_pre_exec_data pe_data;
@@ -101,13 +100,13 @@ static int etap_tramp(char *dev, char *gate, int control_me,
 	char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
 	char *setup_args[] = { "uml_net", version_buf, "ethertap", dev,
 			       data_fd_buf, gate_buf, NULL };
-	char *nosetup_args[] = { "uml_net", version_buf, "ethertap", 
+	char *nosetup_args[] = { "uml_net", version_buf, "ethertap",
 				 dev, data_fd_buf, NULL };
 	char **args, c;
 
 	sprintf(data_fd_buf, "%d", data_remote);
 	sprintf(version_buf, "%d", UML_NET_VERSION);
-	if(gate != NULL){
+	if (gate != NULL) {
 		strcpy(gate_buf, gate);
 		args = setup_args;
 	}
@@ -119,24 +118,26 @@ static int etap_tramp(char *dev, char *gate, int control_me,
 	pe_data.data_me = data_me;
 	pid = run_helper(etap_pre_exec, &pe_data, args);
 
-	if(pid < 0)
+	if (pid < 0)
 		err = pid;
-	os_close_file(data_remote);
-	os_close_file(control_remote);
+	close(data_remote);
+	close(control_remote);
 	CATCH_EINTR(n = read(control_me, &c, sizeof(c)));
-	if(n != sizeof(c)){
+	if (n != sizeof(c)) {
 		err = -errno;
-		printk("etap_tramp : read of status failed, err = %d\n", -err);
+		printk(UM_KERN_ERR "etap_tramp : read of status failed, "
+		       "err = %d\n", -err);
 		return err;
 	}
-	if(c != 1){
-		printk("etap_tramp : uml_net failed\n");
+	if (c != 1) {
+		printk(UM_KERN_ERR "etap_tramp : uml_net failed\n");
 		err = -EINVAL;
 		CATCH_EINTR(n = waitpid(pid, &status, 0));
-		if(n < 0)
+		if (n < 0)
 			err = -errno;
-		else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1))
-			printk("uml_net didn't exit with status 1\n");
+		else if (!WIFEXITED(status) || (WEXITSTATUS(status) != 1))
+			printk(UM_KERN_ERR "uml_net didn't exit with "
+			       "status 1\n");
 	}
 	return err;
 }
@@ -148,43 +149,56 @@ static int etap_open(void *data)
 	int data_fds[2], control_fds[2], err, output_len;
 
 	err = tap_open_common(pri->dev, pri->gate_addr);
-	if(err)
+	if (err)
 		return err;
 
-	err = os_pipe(data_fds, 0, 0);
-	if(err < 0){
-		printk("data os_pipe failed - err = %d\n", -err);
+	err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds);
+	if (err) {
+		err = -errno;
+		printk(UM_KERN_ERR "etap_open - data socketpair failed - "
+		       "err = %d\n", errno);
 		return err;
 	}
 
-	err = os_pipe(control_fds, 1, 0);
-	if(err < 0){
-		printk("control os_pipe failed - err = %d\n", -err);
-		return err;
+	err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds);
+	if (err) {
+		err = -errno;
+		printk(UM_KERN_ERR "etap_open - control socketpair failed - "
+		       "err = %d\n", errno);
+		goto out_close_data;
 	}
 
-	err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], 
+	err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0],
 			 control_fds[1], data_fds[0], data_fds[1]);
 	output_len = UM_KERN_PAGE_SIZE;
 	output = kmalloc(output_len, UM_GFP_KERNEL);
 	read_output(control_fds[0], output, output_len);
 
-	if(output == NULL)
-		printk("etap_open : failed to allocate output buffer\n");
+	if (output == NULL)
+		printk(UM_KERN_ERR "etap_open : failed to allocate output "
+		       "buffer\n");
 	else {
 		printk("%s", output);
 		kfree(output);
 	}
 
-	if(err < 0){
-		printk("etap_tramp failed - err = %d\n", -err);
-		return err;
+	if (err < 0) {
+		printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err);
+		goto out_close_control;
 	}
 
 	pri->data_fd = data_fds[0];
 	pri->control_fd = control_fds[0];
 	iter_addresses(pri->dev, etap_open_addr, &pri->control_fd);
 	return data_fds[0];
+
+out_close_control:
+	close(control_fds[0]);
+	close(control_fds[1]);
+out_close_data:
+	close(data_fds[0]);
+	close(data_fds[1]);
+	return err;
 }
 
 static void etap_close(int fd, void *data)
@@ -192,37 +206,41 @@ static void etap_close(int fd, void *data)
 	struct ethertap_data *pri = data;
 
 	iter_addresses(pri->dev, etap_close_addr, &pri->control_fd);
-	os_close_file(fd);
-	os_shutdown_socket(pri->data_fd, 1, 1);
-	os_close_file(pri->data_fd);
+	close(fd);
+
+	if (shutdown(pri->data_fd, SHUT_RDWR) < 0)
+		printk(UM_KERN_ERR "etap_close - shutdown data socket failed, "
+		       "errno = %d\n", errno);
+
+	if (shutdown(pri->control_fd, SHUT_RDWR) < 0)
+		printk(UM_KERN_ERR "etap_close - shutdown control socket "
+		       "failed, errno = %d\n", errno);
+
+	close(pri->data_fd);
 	pri->data_fd = -1;
-	os_close_file(pri->control_fd);
+	close(pri->control_fd);
 	pri->control_fd = -1;
 }
 
-static int etap_set_mtu(int mtu, void *data)
-{
-	return mtu;
-}
-
 static void etap_add_addr(unsigned char *addr, unsigned char *netmask,
 			  void *data)
 {
 	struct ethertap_data *pri = data;
 
 	tap_check_ips(pri->gate_addr, addr);
-	if(pri->control_fd == -1)
+	if (pri->control_fd == -1)
 		return;
 	etap_open_addr(addr, netmask, &pri->control_fd);
 }
 
-static void etap_del_addr(unsigned char *addr, unsigned char *netmask, 
+static void etap_del_addr(unsigned char *addr, unsigned char *netmask,
 			  void *data)
 {
 	struct ethertap_data *pri = data;
 
-	if(pri->control_fd == -1)
+	if (pri->control_fd == -1)
 		return;
+
 	etap_close_addr(addr, netmask, &pri->control_fd);
 }
 
@@ -231,8 +249,8 @@ const struct net_user_info ethertap_user_info = {
 	.open		= etap_open,
 	.close	 	= etap_close,
 	.remove	 	= NULL,
-	.set_mtu	= etap_set_mtu,
 	.add_address	= etap_add_addr,
 	.delete_address = etap_del_addr,
-	.max_packet	= MAX_PACKET - ETH_HEADER_ETHERTAP
+	.mtu		= ETH_MAX_PACKET,
+	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_ETHERTAP,
 };
diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h
index d3e8d3af624..f17c31586c8 100644
--- a/arch/um/os-Linux/drivers/tuntap.h
+++ b/arch/um/os-Linux/drivers/tuntap.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -19,14 +19,3 @@ struct tuntap_data {
 extern const struct net_user_info tuntap_user_info;
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
index f1714e7fb1d..9d384807b07 100644
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ b/arch/um/os-Linux/drivers/tuntap_kern.c
@@ -1,16 +1,13 @@
-/* 
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/stddef.h"
-#include "linux/netdevice.h"
-#include "linux/etherdevice.h"
-#include "linux/skbuff.h"
-#include "linux/init.h"
-#include "asm/errno.h"
+#include <linux/netdevice.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <asm/errno.h>
 #include "net_kern.h"
-#include "net_user.h"
 #include "tuntap.h"
 
 struct tuntap_init {
@@ -38,19 +35,15 @@ static void tuntap_init(struct net_device *dev, void *data)
 	printk("\n");
 }
 
-static int tuntap_read(int fd, struct sk_buff **skb, 
-		       struct uml_net_private *lp)
+static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	*skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER);
-	if(*skb == NULL) return(-ENOMEM);
-	return(net_read(fd, skb_mac_header(*skb),
-			(*skb)->dev->mtu + ETH_HEADER_OTHER));
+	return net_read(fd, skb_mac_header(skb),
+			skb->dev->mtu + ETH_HEADER_OTHER);
 }
 
-static int tuntap_write(int fd, struct sk_buff **skb, 
-			struct uml_net_private *lp)
+static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 {
-	return(net_write(fd, (*skb)->data, (*skb)->len));
+	return net_write(fd, skb->data, skb->len);
 }
 
 const struct net_kern_info tuntap_kern_info = {
@@ -67,11 +60,11 @@ int tuntap_setup(char *str, char **mac_out, void *data)
 	*init = ((struct tuntap_init)
 		{ .dev_name 	= NULL,
 		  .gate_addr 	= NULL });
-	if(tap_setup_common(str, "tuntap", &init->dev_name, mac_out,
+	if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out,
 			    &init->gate_addr))
-		return(0);
+		return 0;
 
-	return(1);
+	return 1;
 }
 
 static struct transport tuntap_transport = {
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
index f848b4ea934..6c55d3c8ead 100644
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ b/arch/um/os-Linux/drivers/tuntap_user.c
@@ -1,27 +1,22 @@
 /* 
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <stdio.h>
-#include <stddef.h>
-#include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
-#include <sys/wait.h>
+#include <string.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
 #include <sys/socket.h>
-#include <sys/un.h>
+#include <sys/wait.h>
 #include <sys/uio.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <linux/if_tun.h>
-#include "net_user.h"
+#include "kern_constants.h"
+#include "os.h"
 #include "tuntap.h"
-#include "kern_util.h"
 #include "user.h"
-#include "os.h"
-
-#define MAX_PACKET ETH_MAX_PACKET
 
 static int tuntap_user_init(void *data, void *dev)
 {
@@ -37,7 +32,7 @@ static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask,
 	struct tuntap_data *pri = data;
 
 	tap_check_ips(pri->gate_addr, addr);
-	if((pri->fd == -1) || pri->fixed_config)
+	if ((pri->fd == -1) || pri->fixed_config)
 		return;
 	open_addr(addr, netmask, pri->dev_name);
 }
@@ -47,7 +42,7 @@ static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask,
 {
 	struct tuntap_data *pri = data;
 
-	if((pri->fd == -1) || pri->fixed_config)
+	if ((pri->fd == -1) || pri->fixed_config)
 		return;
 	close_addr(addr, netmask, pri->dev_name);
 }
@@ -62,7 +57,7 @@ static void tuntap_pre_exec(void *arg)
 	struct tuntap_pre_exec_data *data = arg;
 
 	dup2(data->stdout, 1);
-	os_close_file(data->close_me);
+	close(data->close_me);
 }
 
 static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
@@ -85,14 +80,14 @@ static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
 
 	pid = run_helper(tuntap_pre_exec, &data, argv);
 
-	if(pid < 0)
+	if (pid < 0)
 		return -pid;
 
-	os_close_file(remote);
+	close(remote);
 
 	msg.msg_name = NULL;
 	msg.msg_namelen = 0;
-	if(buffer != NULL){
+	if (buffer != NULL) {
 		iov = ((struct iovec) { buffer, buffer_len });
 		msg.msg_iov = &iov;
 		msg.msg_iovlen = 1;
@@ -106,26 +101,28 @@ static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
 	msg.msg_flags = 0;
 	n = recvmsg(me, &msg, 0);
 	*used_out = n;
-	if(n < 0){
+	if (n < 0) {
 		err = -errno;
-		printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", 
-		       errno);
+		printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - "
+		       "errno = %d\n", errno);
 		return err;
 	}
 	CATCH_EINTR(waitpid(pid, NULL, 0));
 
 	cmsg = CMSG_FIRSTHDR(&msg);
-	if(cmsg == NULL){
-		printk("tuntap_open_tramp : didn't receive a message\n");
+	if (cmsg == NULL) {
+		printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
+		       "message\n");
 		return -EINVAL;
 	}
-	if((cmsg->cmsg_level != SOL_SOCKET) || 
-	   (cmsg->cmsg_type != SCM_RIGHTS)){
-		printk("tuntap_open_tramp : didn't receive a descriptor\n");
+	if ((cmsg->cmsg_level != SOL_SOCKET) ||
+	   (cmsg->cmsg_type != SCM_RIGHTS)) {
+		printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
+		       "descriptor\n");
 		return -EINVAL;
 	}
 	*fd_out = ((int *) CMSG_DATA(cmsg))[0];
-	os_set_exec_close(*fd_out, 1);
+	os_set_exec_close(*fd_out);
 	return 0;
 }
 
@@ -137,47 +134,51 @@ static int tuntap_open(void *data)
 	int err, fds[2], len, used;
 
 	err = tap_open_common(pri->dev, pri->gate_addr);
-	if(err < 0)
+	if (err < 0)
 		return err;
 
-	if(pri->fixed_config){
+	if (pri->fixed_config) {
 		pri->fd = os_open_file("/dev/net/tun",
 				       of_cloexec(of_rdwr(OPENFLAGS())), 0);
-		if(pri->fd < 0){
-			printk("Failed to open /dev/net/tun, err = %d\n",
-			       -pri->fd);
+		if (pri->fd < 0) {
+			printk(UM_KERN_ERR "Failed to open /dev/net/tun, "
+			       "err = %d\n", -pri->fd);
 			return pri->fd;
 		}
 		memset(&ifr, 0, sizeof(ifr));
 		ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
 		strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
-		if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){
+		if (ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0) {
 			err = -errno;
-			printk("TUNSETIFF failed, errno = %d\n", errno);
-			os_close_file(pri->fd);
+			printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
+			       errno);
+			close(pri->fd);
 			return err;
 		}
 	}
 	else {
-		err = os_pipe(fds, 0, 0);
-		if(err < 0){
-			printk("tuntap_open : os_pipe failed - err = %d\n",
-			       -err);
+		err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds);
+		if (err) {
+			err = -errno;
+			printk(UM_KERN_ERR "tuntap_open : socketpair failed - "
+			       "errno = %d\n", errno);
 			return err;
 		}
 
 		buffer = get_output_buffer(&len);
-		if(buffer != NULL) len--;
+		if (buffer != NULL)
+			len--;
 		used = 0;
 
 		err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0],
 					fds[1], buffer, len, &used);
 
 		output = buffer;
-		if(err < 0) {
+		if (err < 0) {
 			printk("%s", output);
 			free_output_buffer(buffer);
-			printk("tuntap_open_tramp failed - err = %d\n", -err);
+			printk(UM_KERN_ERR "tuntap_open_tramp failed - "
+			       "err = %d\n", -err);
 			return err;
 		}
 
@@ -186,7 +187,7 @@ static int tuntap_open(void *data)
 		printk("%s", output);
 		free_output_buffer(buffer);
 
-		os_close_file(fds[0]);
+		close(fds[0]);
 		iter_addresses(pri->dev, open_addr, pri->dev_name);
 	}
 
@@ -197,24 +198,19 @@ static void tuntap_close(int fd, void *data)
 {
 	struct tuntap_data *pri = data;
 
-	if(!pri->fixed_config) 
+	if (!pri->fixed_config)
 		iter_addresses(pri->dev, close_addr, pri->dev_name);
-	os_close_file(fd);
+	close(fd);
 	pri->fd = -1;
 }
 
-static int tuntap_set_mtu(int mtu, void *data)
-{
-	return mtu;
-}
-
 const struct net_user_info tuntap_user_info = {
 	.init		= tuntap_user_init,
 	.open		= tuntap_open,
 	.close	 	= tuntap_close,
 	.remove	 	= NULL,
-	.set_mtu	= tuntap_set_mtu,
 	.add_address	= tuntap_add_addr,
 	.delete_address = tuntap_del_addr,
-	.max_packet	= MAX_PACKET
+	.mtu		= ETH_MAX_PACKET,
+	.max_packet	= ETH_MAX_PACKET + ETH_HEADER_OTHER,
 };
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c3ecc2a84e0..b542a3a021b 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -82,13 +82,6 @@ int os_access(const char* file, int mode)
 	return 0;
 }
 
-void os_print_error(int error, const char* str)
-{
-	errno = error < 0 ? -error : error;
-
-	perror(str);
-}
-
 /* FIXME? required only by hostaudio (because it passes ioctls verbatim) */
 int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg)
 {
@@ -101,30 +94,6 @@ int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-int os_window_size(int fd, int *rows, int *cols)
-{
-	struct winsize size;
-
-	if(ioctl(fd, TIOCGWINSZ, &size) < 0)
-		return -errno;
-
-	*rows = size.ws_row;
-	*cols = size.ws_col;
-
-	return 0;
-}
-
-int os_new_tty_pgrp(int fd, int pid)
-{
-	if(ioctl(fd, TIOCSCTTY, 0) < 0)
-		return -errno;
-
-	if(tcsetpgrp(fd, pid) < 0)
-		return -errno;
-
-	return 0;
-}
-
 /* FIXME: ensure namebuf in os_get_if_name is big enough */
 int os_get_ifname(int fd, char* namebuf)
 {
@@ -205,19 +174,19 @@ int os_file_mode(char *file, struct openflags *mode_out)
 
 	*mode_out = OPENFLAGS();
 
-	err = os_access(file, OS_ACC_W_OK);
-	if((err < 0) && (err != -EACCES))
-		return(err);
-
-	*mode_out = of_write(*mode_out);
-
-	err = os_access(file, OS_ACC_R_OK);
-	if((err < 0) && (err != -EACCES))
-		return(err);
+	err = access(file, W_OK);
+	if(err && (errno != EACCES))
+		return -errno;
+	else if(!err)
+		*mode_out = of_write(*mode_out);
 
-	*mode_out = of_read(*mode_out);
+	err = access(file, R_OK);
+	if(err && (errno != EACCES))
+		return -errno;
+	else if(!err)
+		*mode_out = of_read(*mode_out);
 
-	return(0);
+	return err;
 }
 
 int os_open_file(char *file, struct openflags flags, int mode)
@@ -236,15 +205,15 @@ int os_open_file(char *file, struct openflags flags, int mode)
 
 	fd = open64(file, f, mode);
 	if(fd < 0)
-		return(-errno);
+		return -errno;
 
 	if(flags.cl && fcntl(fd, F_SETFD, 1)){
 		err = -errno;
-		os_close_file(fd);
+		close(fd);
 		return err;
 	}
 
-	return(fd);
+	return fd;
 }
 
 int os_connect_socket(char *name)
@@ -280,9 +249,9 @@ void os_close_file(int fd)
 	close(fd);
 }
 
-int os_seek_file(int fd, __u64 offset)
+int os_seek_file(int fd, unsigned long long offset)
 {
-	__u64 actual;
+	unsigned long long actual;
 
 	actual = lseek64(fd, offset, SEEK_SET);
 	if(actual != offset)
@@ -316,31 +285,33 @@ int os_file_size(char *file, unsigned long long *size_out)
 	err = os_stat_file(file, &buf);
 	if(err < 0){
 		printk("Couldn't stat \"%s\" : err = %d\n", file, -err);
-		return(err);
+		return err;
 	}
 
 	if(S_ISBLK(buf.ust_mode)){
 		int fd;
 		long blocks;
 
-		fd = os_open_file(file, of_read(OPENFLAGS()), 0);
-		if(fd < 0){
-			printk("Couldn't open \"%s\", errno = %d\n", file, -fd);
-			return(fd);
+		fd = open(file, O_RDONLY, 0);
+		if(fd < 0) {
+			err = -errno;
+			printk("Couldn't open \"%s\", errno = %d\n", file,
+			       errno);
+			return err;
 		}
 		if(ioctl(fd, BLKGETSIZE, &blocks) < 0){
 			err = -errno;
 			printk("Couldn't get the block size of \"%s\", "
 			       "errno = %d\n", file, errno);
-			os_close_file(fd);
-			return(err);
+			close(fd);
+			return err;
 		}
 		*size_out = ((long long) blocks) * 512;
-		os_close_file(fd);
-		return(0);
+		close(fd);
 	}
-	*size_out = buf.ust_size;
-	return(0);
+	else *size_out = buf.ust_size;
+
+	return 0;
 }
 
 int os_file_modtime(char *file, unsigned long *modtime)
@@ -358,35 +329,28 @@ int os_file_modtime(char *file, unsigned long *modtime)
 	return 0;
 }
 
-int os_get_exec_close(int fd, int* close_on_exec)
+int os_get_exec_close(int fd, int *close_on_exec)
 {
 	int ret;
 
-	do {
-		ret = fcntl(fd, F_GETFD);
-	} while((ret < 0) && (errno == EINTR)) ;
+	CATCH_EINTR(ret = fcntl(fd, F_GETFD));
 
 	if(ret < 0)
-		return(-errno);
+		return -errno;
 
-	*close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0;
-	return(ret);
+	*close_on_exec = (ret & FD_CLOEXEC) ? 1 : 0;
+	return ret;
 }
 
-int os_set_exec_close(int fd, int close_on_exec)
+int os_set_exec_close(int fd)
 {
-	int flag, err;
-
-	if(close_on_exec) flag = FD_CLOEXEC;
-	else flag = 0;
+	int err;
 
-	do {
-		err = fcntl(fd, F_SETFD, flag);
-	} while((err < 0) && (errno == EINTR)) ;
+	CATCH_EINTR(err = fcntl(fd, F_SETFD, FD_CLOEXEC));
 
 	if(err < 0)
-		return(-errno);
-	return(err);
+		return -errno;
+	return err;
 }
 
 int os_pipe(int *fds, int stream, int close_on_exec)
@@ -395,16 +359,16 @@ int os_pipe(int *fds, int stream, int close_on_exec)
 
 	err = socketpair(AF_UNIX, type, 0, fds);
 	if(err < 0)
-		return(-errno);
+		return -errno;
 
 	if(!close_on_exec)
-		return(0);
+		return 0;
 
-	err = os_set_exec_close(fds[0], 1);
+	err = os_set_exec_close(fds[0]);
 	if(err < 0)
 		goto error;
 
-	err = os_set_exec_close(fds[1], 1);
+	err = os_set_exec_close(fds[1]);
 	if(err < 0)
 		goto error;
 
@@ -412,9 +376,9 @@ int os_pipe(int *fds, int stream, int close_on_exec)
 
  error:
 	printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err);
-	os_close_file(fds[1]);
-	os_close_file(fds[0]);
-	return(err);
+	close(fds[1]);
+	close(fds[0]);
+	return err;
 }
 
 int os_set_fd_async(int fd, int owner)
@@ -561,7 +525,7 @@ int os_create_unix_socket(char *file, int len, int close_on_exec)
 		return -errno;
 
 	if(close_on_exec) {
-		err = os_set_exec_close(sock, 1);
+		err = os_set_exec_close(sock);
 		if(err < 0)
 			printk("create_unix_socket : close_on_exec failed, "
 		       "err = %d", -err);
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index d81af7b8587..7a72dbb61b0 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -11,6 +11,7 @@
 #include <limits.h>
 #include <sys/signal.h>
 #include <sys/wait.h>
+#include <sys/socket.h>
 #include "user.h"
 #include "kern_util.h"
 #include "os.h"
@@ -54,13 +55,14 @@ int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv)
 	if (stack == 0)
 		return -ENOMEM;
 
-	ret = os_pipe(fds, 1, 0);
+	ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);
 	if (ret < 0) {
-		printk("run_helper : pipe failed, ret = %d\n", -ret);
+		ret = -errno;
+		printk("run_helper : pipe failed, errno = %d\n", errno);
 		goto out_free;
 	}
 
-	ret = os_set_exec_close(fds[1], 1);
+	ret = os_set_exec_close(fds[1]);
 	if (ret < 0) {
 		printk("run_helper : setting FD_CLOEXEC failed, ret = %d\n",
 		       -ret);
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index a633fa8e0a9..6aa6f95d652 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -145,11 +145,7 @@ void init_irq_signals(int on_sigstack)
 
 	flags = on_sigstack ? SA_ONSTACK : 0;
 
-	set_handler(SIGVTALRM, (__sighandler_t) alarm_handler,
-		    flags | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1);
-	set_handler(SIGALRM, (__sighandler_t) alarm_handler,
-		    flags | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1);
 	set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
 	signal(SIGWINCH, SIG_IGN);
 }
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index e85f4995a01..82c3778627b 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -1,33 +1,21 @@
 /*
- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <string.h>
-#include <signal.h>
+#include <unistd.h>
 #include <errno.h>
+#include <signal.h>
+#include <string.h>
 #include <sys/resource.h>
-#include <sys/mman.h>
-#include <sys/user.h>
-#include <asm/page.h>
-#include "kern_util.h"
 #include "as-layout.h"
-#include "mem_user.h"
-#include "irq_user.h"
-#include "user.h"
 #include "init.h"
-#include "mode.h"
-#include "choose-mode.h"
-#include "uml-config.h"
+#include "kern_constants.h"
+#include "kern_util.h"
 #include "os.h"
 #include "um_malloc.h"
-#include "kern_constants.h"
-
-/* Set in main, unchanged thereafter */
-char *linux_prog;
 
 #define PGD_BOUND (4 * 1024 * 1024)
 #define STACKSIZE (8 * 1024 * 1024)
@@ -37,13 +25,13 @@ static void set_stklim(void)
 {
 	struct rlimit lim;
 
-	if(getrlimit(RLIMIT_STACK, &lim) < 0){
+	if (getrlimit(RLIMIT_STACK, &lim) < 0) {
 		perror("getrlimit");
 		exit(1);
 	}
-	if((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)){
+	if ((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)) {
 		lim.rlim_cur = STACKSIZE;
-		if(setrlimit(RLIMIT_STACK, &lim) < 0){
+		if (setrlimit(RLIMIT_STACK, &lim) < 0) {
 			perror("setrlimit");
 			exit(1);
 		}
@@ -55,7 +43,7 @@ static __init void do_uml_initcalls(void)
 	initcall_t *call;
 
 	call = &__uml_initcall_start;
-	while (call < &__uml_initcall_end){
+	while (call < &__uml_initcall_end) {
 		(*call)();
 		call++;
 	}
@@ -74,7 +62,8 @@ static void install_fatal_handler(int sig)
 	/* All signals are enabled in this handler ... */
 	sigemptyset(&action.sa_mask);
 
-	/* ... including the signal being handled, plus we want the
+	/*
+	 * ... including the signal being handled, plus we want the
 	 * handler reset to the default behavior, so that if an exit
 	 * handler is hanging for some reason, the UML will just die
 	 * after this signal is sent a second time.
@@ -82,7 +71,7 @@ static void install_fatal_handler(int sig)
 	action.sa_flags = SA_RESETHAND | SA_NODEFER;
 	action.sa_restorer = NULL;
 	action.sa_handler = last_ditch_exit;
-	if(sigaction(sig, &action, NULL) < 0){
+	if (sigaction(sig, &action, NULL) < 0) {
 		printf("failed to install handler for signal %d - errno = %d\n",
 		       errno);
 		exit(1);
@@ -98,7 +87,8 @@ static void setup_env_path(void)
 	int path_len = 0;
 
 	old_path = getenv("PATH");
-	/* if no PATH variable is set or it has an empty value
+	/*
+	 * if no PATH variable is set or it has an empty value
 	 * just use the default + /usr/lib/uml
 	 */
 	if (!old_path || (path_len = strlen(old_path)) == 0) {
@@ -126,93 +116,68 @@ int __init main(int argc, char **argv, char **envp)
 	char **new_argv;
 	int ret, i, err;
 
-#ifdef UML_CONFIG_CMDLINE_ON_HOST
-	/* Allocate memory for thread command lines */
-	if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){
-
-		char padding[THREAD_NAME_LEN] = {
-			[ 0 ...  THREAD_NAME_LEN - 2] = ' ', '\0'
-		};
-
-		new_argv = malloc((argc + 2) * sizeof(char*));
-		if(!new_argv) {
-			perror("Allocating extended argv");
-			exit(1);
-		}
-
-		new_argv[0] = argv[0];
-		new_argv[1] = padding;
-
-		for(i = 2; i <= argc; i++)
-			new_argv[i] = argv[i - 1];
-		new_argv[argc + 1] = NULL;
-
-		execvp(new_argv[0], new_argv);
-		perror("execing with extended args");
-		exit(1);
-	}
-#endif
-
-	linux_prog = argv[0];
-
 	set_stklim();
 
 	setup_env_path();
 
 	new_argv = malloc((argc + 1) * sizeof(char *));
-	if(new_argv == NULL){
+	if (new_argv == NULL) {
 		perror("Mallocing argv");
 		exit(1);
 	}
-	for(i=0;i<argc;i++){
+	for (i = 0; i < argc; i++) {
 		new_argv[i] = strdup(argv[i]);
-		if(new_argv[i] == NULL){
+		if (new_argv[i] == NULL) {
 			perror("Mallocing an arg");
 			exit(1);
 		}
 	}
 	new_argv[argc] = NULL;
 
-	/* Allow these signals to bring down a UML if all other
+	/*
+	 * Allow these signals to bring down a UML if all other
 	 * methods of control fail.
 	 */
 	install_fatal_handler(SIGINT);
 	install_fatal_handler(SIGTERM);
 	install_fatal_handler(SIGHUP);
 
-	scan_elf_aux( envp);
+	scan_elf_aux(envp);
 
 	do_uml_initcalls();
 	ret = linux_main(argc, argv);
 
-	/* Disable SIGPROF - I have no idea why libc doesn't do this or turn
+	/*
+	 * Disable SIGPROF - I have no idea why libc doesn't do this or turn
 	 * off the profiling time, but UML dies with a SIGPROF just before
 	 * exiting when profiling is active.
 	 */
 	change_sig(SIGPROF, 0);
 
-	/* This signal stuff used to be in the reboot case.  However,
+	/*
+	 * This signal stuff used to be in the reboot case.  However,
 	 * sometimes a SIGVTALRM can come in when we're halting (reproducably
 	 * when writing out gcov information, presumably because that takes
 	 * some time) and cause a segfault.
 	 */
 
-	/* stop timers and set SIG*ALRM to be ignored */
+	/* stop timers and set SIGVTALRM to be ignored */
 	disable_timer();
 
 	/* disable SIGIO for the fds and set SIGIO to be ignored */
 	err = deactivate_all_fds();
-	if(err)
+	if (err)
 		printf("deactivate_all_fds failed, errno = %d\n", -err);
 
-	/* Let any pending signals fire now.  This ensures
+	/*
+	 * Let any pending signals fire now.  This ensures
 	 * that they won't be delivered after the exec, when
 	 * they are definitely not expected.
 	 */
 	unblock_signals();
 
 	/* Reboot */
-	if(ret){
+	if (ret) {
 		printf("\n");
 		execvp(new_argv[0], new_argv);
 		perror("Failed to exec kernel");
@@ -222,26 +187,24 @@ int __init main(int argc, char **argv, char **envp)
 	return uml_exitcode;
 }
 
-#define CAN_KMALLOC() \
-	(kmalloc_ok && CHOOSE_MODE((os_getpid() != tracing_pid), 1))
-
 extern void *__real_malloc(int);
 
 void *__wrap_malloc(int size)
 {
 	void *ret;
 
-	if(!CAN_KMALLOC())
+	if (!kmalloc_ok)
 		return __real_malloc(size);
-	else if(size <= UM_KERN_PAGE_SIZE)
+	else if (size <= UM_KERN_PAGE_SIZE)
 		/* finding contiguous pages can be hard*/
 		ret = kmalloc(size, UM_GFP_KERNEL);
 	else ret = vmalloc(size);
 
-	/* glibc people insist that if malloc fails, errno should be
+	/*
+	 * glibc people insist that if malloc fails, errno should be
 	 * set by malloc as well. So we do.
 	 */
-	if(ret == NULL)
+	if (ret == NULL)
 		errno = ENOMEM;
 
 	return ret;
@@ -251,7 +214,7 @@ void *__wrap_calloc(int n, int size)
 {
 	void *ptr = __wrap_malloc(n * size);
 
-	if(ptr == NULL)
+	if (ptr == NULL)
 		return NULL;
 	memset(ptr, 0, n * size);
 	return ptr;
@@ -265,7 +228,8 @@ void __wrap_free(void *ptr)
 {
 	unsigned long addr = (unsigned long) ptr;
 
-	/* We need to know how the allocation happened, so it can be correctly
+	/*
+	 * We need to know how the allocation happened, so it can be correctly
 	 * freed.  This is done by seeing what region of memory the pointer is
 	 * in -
 	 * 	physical memory - kmalloc/kfree
@@ -283,12 +247,12 @@ void __wrap_free(void *ptr)
 	 * there is a possibility for memory leaks.
 	 */
 
-	if((addr >= uml_physmem) && (addr < high_physmem)){
-		if(CAN_KMALLOC())
+	if ((addr >= uml_physmem) && (addr < high_physmem)) {
+		if (kmalloc_ok)
 			kfree(ptr);
 	}
-	else if((addr >= start_vm) && (addr < end_vm)){
-		if(CAN_KMALLOC())
+	else if ((addr >= start_vm) && (addr < end_vm)) {
+		if (kmalloc_ok)
 			vfree(ptr);
 	}
 	else __real_free(ptr);
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index c6378c6d10d..436f8d20b20 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -218,7 +218,7 @@ int __init create_tmp_file(unsigned long long len)
 
 	err = fchmod(fd, 0777);
 	if(err < 0){
-		perror("os_mode_fd");
+		perror("fchmod");
 		exit(1);
 	}
 
@@ -226,7 +226,7 @@ int __init create_tmp_file(unsigned long long len)
 	 * increase the file size by one byte, to the desired length.
 	 */
 	if (lseek64(fd, len - 1, SEEK_SET) < 0) {
-		perror("os_seek_file");
+		perror("lseek64");
 		exit(1);
 	}
 
@@ -247,7 +247,7 @@ int __init create_mem_file(unsigned long long len)
 
 	fd = create_tmp_file(len);
 
-	err = os_set_exec_close(fd, 1);
+	err = os_set_exec_close(fd);
 	if(err < 0){
 		errno = -err;
 		perror("exec_close");
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index e9c14329751..37781db4cec 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -1,27 +1,24 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com)
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <unistd.h>
 #include <stdio.h>
+#include <unistd.h>
 #include <errno.h>
 #include <signal.h>
+#include <fcntl.h>
 #include <sys/mman.h>
+#include <sys/ptrace.h>
 #include <sys/wait.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include "ptrace_user.h"
+#include <asm/unistd.h>
+#include "init.h"
+#include "kern_constants.h"
+#include "longjmp.h"
 #include "os.h"
-#include "user.h"
 #include "process.h"
-#include "irq_user.h"
-#include "kern_util.h"
-#include "longjmp.h"
 #include "skas_ptrace.h"
-#include "kern_constants.h"
-#include "uml-config.h"
-#include "init.h"
+#include "user.h"
 
 #define ARBITRARY_ADDR -1
 #define FAILURE_PID    -1
@@ -32,30 +29,32 @@
 unsigned long os_process_pc(int pid)
 {
 	char proc_stat[STAT_PATH_LEN], buf[256];
-	unsigned long pc;
+	unsigned long pc = ARBITRARY_ADDR;
 	int fd, err;
 
 	sprintf(proc_stat, "/proc/%d/stat", pid);
-	fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0);
-	if(fd < 0){
-		printk("os_process_pc - couldn't open '%s', err = %d\n",
-		       proc_stat, -fd);
-		return ARBITRARY_ADDR;
+	fd = open(proc_stat, O_RDONLY, 0);
+	if (fd < 0) {
+		printk(UM_KERN_ERR "os_process_pc - couldn't open '%s', "
+		       "errno = %d\n", proc_stat, errno);
+		goto out;
 	}
 	CATCH_EINTR(err = read(fd, buf, sizeof(buf)));
-	if(err < 0){
-		printk("os_process_pc - couldn't read '%s', err = %d\n",
-		       proc_stat, errno);
-		os_close_file(fd);
-		return ARBITRARY_ADDR;
+	if (err < 0) {
+		printk(UM_KERN_ERR "os_process_pc - couldn't read '%s', "
+		       "err = %d\n", proc_stat, errno);
+		goto out_close;
 	}
 	os_close_file(fd);
 	pc = ARBITRARY_ADDR;
-	if(sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d "
-		  "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d "
-		  "%*d %*d %*d %*d %*d %lu", &pc) != 1){
-		printk("os_process_pc - couldn't find pc in '%s'\n", buf);
-	}
+	if (sscanf(buf, "%*d " COMM_SCANF " %*c %*d %*d %*d %*d %*d %*d %*d "
+		   "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d "
+		   "%*d %*d %*d %*d %*d %lu", &pc) != 1)
+		printk(UM_KERN_ERR "os_process_pc - couldn't find pc in '%s'\n",
+		       buf);
+ out_close:
+	close(fd);
+ out:
 	return pc;
 }
 
@@ -63,30 +62,32 @@ int os_process_parent(int pid)
 {
 	char stat[STAT_PATH_LEN];
 	char data[256];
-	int parent, n, fd;
+	int parent = FAILURE_PID, n, fd;
 
-	if(pid == -1)
-		return -1;
+	if (pid == -1)
+		return parent;
 
 	snprintf(stat, sizeof(stat), "/proc/%d/stat", pid);
-	fd = os_open_file(stat, of_read(OPENFLAGS()), 0);
-	if(fd < 0){
-		printk("Couldn't open '%s', err = %d\n", stat, -fd);
-		return FAILURE_PID;
+	fd = open(stat, O_RDONLY, 0);
+	if (fd < 0) {
+		printk(UM_KERN_ERR "Couldn't open '%s', errno = %d\n", stat,
+		       errno);
+		return parent;
 	}
 
 	CATCH_EINTR(n = read(fd, data, sizeof(data)));
-	os_close_file(fd);
+	close(fd);
 
-	if(n < 0){
-		printk("Couldn't read '%s', err = %d\n", stat, errno);
-		return FAILURE_PID;
+	if (n < 0) {
+		printk(UM_KERN_ERR "Couldn't read '%s', errno = %d\n", stat,
+		       errno);
+		return parent;
 	}
 
 	parent = FAILURE_PID;
 	n = sscanf(data, "%*d " COMM_SCANF " %*c %d", &parent);
-	if(n != 1)
-		printk("Failed to scan '%s'\n", data);
+	if (n != 1)
+		printk(UM_KERN_ERR "Failed to scan '%s'\n", data);
 
 	return parent;
 }
@@ -99,9 +100,8 @@ void os_stop_process(int pid)
 void os_kill_process(int pid, int reap_child)
 {
 	kill(pid, SIGKILL);
-	if(reap_child)
+	if (reap_child)
 		CATCH_EINTR(waitpid(pid, NULL, 0));
-		
 }
 
 /* This is here uniquely to have access to the userspace errno, i.e. the one
@@ -129,17 +129,10 @@ void os_kill_ptraced_process(int pid, int reap_child)
 	kill(pid, SIGKILL);
 	ptrace(PTRACE_KILL, pid);
 	ptrace(PTRACE_CONT, pid);
-	if(reap_child)
+	if (reap_child)
 		CATCH_EINTR(waitpid(pid, NULL, 0));
 }
 
-#ifdef UML_CONFIG_MODE_TT
-void os_usr1_process(int pid)
-{
-	kill(pid, SIGUSR1);
-}
-#endif
-
 /* Don't use the glibc version, which caches the result in TLS. It misses some
  * syscalls, and also breaks with clone(), which does not unshare the TLS.
  */
@@ -160,34 +153,35 @@ int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len,
 	void *loc;
 	int prot;
 
-	prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | 
+	prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
 		(x ? PROT_EXEC : 0);
 
 	loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED,
 		     fd, off);
-	if(loc == MAP_FAILED)
+	if (loc == MAP_FAILED)
 		return -errno;
 	return 0;
 }
 
 int os_protect_memory(void *addr, unsigned long len, int r, int w, int x)
 {
-        int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | 
+	int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
 		    (x ? PROT_EXEC : 0));
 
-        if(mprotect(addr, len, prot) < 0)
+	if (mprotect(addr, len, prot) < 0)
 		return -errno;
-        return 0;
+
+	return 0;
 }
 
 int os_unmap_memory(void *addr, int len)
 {
-        int err;
+	int err;
 
-        err = munmap(addr, len);
-	if(err < 0)
+	err = munmap(addr, len);
+	if (err < 0)
 		return -errno;
-        return 0;
+	return 0;
 }
 
 #ifndef MADV_REMOVE
@@ -199,7 +193,7 @@ int os_drop_memory(void *addr, int length)
 	int err;
 
 	err = madvise(addr, length, MADV_REMOVE);
-	if(err < 0)
+	if (err < 0)
 		err = -errno;
 	return err;
 }
@@ -209,22 +203,24 @@ int __init can_drop_memory(void)
 	void *addr;
 	int fd, ok = 0;
 
-	printk("Checking host MADV_REMOVE support...");
+	printk(UM_KERN_INFO "Checking host MADV_REMOVE support...");
 	fd = create_mem_file(UM_KERN_PAGE_SIZE);
-	if(fd < 0){
-		printk("Creating test memory file failed, err = %d\n", -fd);
+	if (fd < 0) {
+		printk(UM_KERN_ERR "Creating test memory file failed, "
+		       "err = %d\n", -fd);
 		goto out;
 	}
 
 	addr = mmap64(NULL, UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
 		      MAP_SHARED, fd, 0);
-	if(addr == MAP_FAILED){
-		printk("Mapping test memory file failed, err = %d\n", -errno);
+	if (addr == MAP_FAILED) {
+		printk(UM_KERN_ERR "Mapping test memory file failed, "
+		       "err = %d\n", -errno);
 		goto out_close;
 	}
 
-	if(madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0){
-		printk("MADV_REMOVE failed, err = %d\n", -errno);
+	if (madvise(addr, UM_KERN_PAGE_SIZE, MADV_REMOVE) != 0) {
+		printk(UM_KERN_ERR "MADV_REMOVE failed, err = %d\n", -errno);
 		goto out_unmap;
 	}
 
@@ -239,58 +235,31 @@ out:
 	return ok;
 }
 
-#ifdef UML_CONFIG_MODE_TT
-void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
-{
-	int flags = 0, pages;
-
-	if(sig_stack != NULL){
-		pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER);
-		set_sigstack(sig_stack, pages * UM_KERN_PAGE_SIZE);
-		flags = SA_ONSTACK;
-	}
-	if(usr1_handler){
-		struct sigaction sa;
-
-		sa.sa_handler = usr1_handler;
-		sigemptyset(&sa.sa_mask);
-		sa.sa_flags = flags;
-		sa.sa_restorer = NULL;
-		if(sigaction(SIGUSR1, &sa, NULL) < 0)
-			panic("init_new_thread_stack - sigaction failed - "
-			      "errno = %d\n", errno);
-	}
-}
-#endif
-
 void init_new_thread_signals(void)
 {
 	set_handler(SIGSEGV, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
 	set_handler(SIGTRAP, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
 	set_handler(SIGFPE, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
 	set_handler(SIGILL, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
 	set_handler(SIGBUS, (__sighandler_t) sig_handler, SA_ONSTACK,
-		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
-	set_handler(SIGUSR2, (__sighandler_t) sig_handler,
-		    SA_ONSTACK, SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM,
-		    -1);
+		    SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
 	signal(SIGHUP, SIG_IGN);
 
 	init_irq_signals(1);
 }
 
-int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr)
+int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr)
 {
 	jmp_buf buf;
 	int n;
 
 	*jmp_ptr = &buf;
 	n = UML_SETJMP(&buf);
-	if(n != 0)
+	if (n != 0)
 		return n;
 	(*fn)(arg);
 	return 0;
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
new file mode 100644
index 00000000000..a32ba6ab121
--- /dev/null
+++ b/arch/um/os-Linux/registers.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2004 PathScale, Inc
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/ptrace.h>
+#include "sysdep/ptrace.h"
+#include "user.h"
+
+/* This is set once at boot time and not changed thereafter */
+
+static unsigned long exec_regs[MAX_REG_NR];
+
+void init_thread_registers(struct uml_pt_regs *to)
+{
+	memcpy(to->gp, exec_regs, sizeof(to->gp));
+}
+
+void save_registers(int pid, struct uml_pt_regs *regs)
+{
+	int err;
+
+	err = ptrace(PTRACE_GETREGS, pid, 0, regs->gp);
+	if (err < 0)
+		panic("save_registers - saving registers failed, errno = %d\n",
+		      errno);
+}
+
+void restore_registers(int pid, struct uml_pt_regs *regs)
+{
+	int err;
+
+	err = ptrace(PTRACE_SETREGS, pid, 0, regs->gp);
+	if (err < 0)
+		panic("restore_registers - saving registers failed, "
+		      "errno = %d\n", errno);
+}
+
+void init_registers(int pid)
+{
+	int err;
+
+	err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs);
+	if (err)
+		panic("check_ptrace : PTRACE_GETREGS failed, errno = %d",
+		      errno);
+
+	arch_init_registers(pid);
+}
+
+void get_safe_registers(unsigned long *regs)
+{
+	memcpy(regs, exec_regs, sizeof(exec_regs));
+}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index b98f7ea2d2f..e9800b0b568 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -1,26 +1,21 @@
 /*
  * Copyright (C) 2004 PathScale, Inc
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <signal.h>
-#include <stdio.h>
-#include <unistd.h>
 #include <stdlib.h>
-#include <errno.h>
 #include <stdarg.h>
-#include <string.h>
-#include <sys/mman.h>
-#include "user.h"
-#include "signal_kern.h"
-#include "sysdep/sigcontext.h"
-#include "sysdep/barrier.h"
-#include "sigcontext.h"
-#include "mode.h"
+#include <errno.h>
+#include <signal.h>
+#include <strings.h>
 #include "os.h"
+#include "sysdep/barrier.h"
+#include "sysdep/sigcontext.h"
+#include "user.h"
 
-/* These are the asynchronous signals.  SIGVTALRM and SIGARLM are handled
- * together under SIGVTALRM_BIT.  SIGPROF is excluded because we want to
+/*
+ * These are the asynchronous signals.  SIGPROF is excluded because we want to
  * be able to profile all of UML, not just the non-critical sections.  If
  * profiling is not thread-safe, then that is not my problem.  We can disable
  * profiling when SMP is enabled in that case.
@@ -31,10 +26,8 @@
 #define SIGVTALRM_BIT 1
 #define SIGVTALRM_MASK (1 << SIGVTALRM_BIT)
 
-#define SIGALRM_BIT 2
-#define SIGALRM_MASK (1 << SIGALRM_BIT)
-
-/* These are used by both the signal handlers and
+/*
+ * These are used by both the signal handlers and
  * block/unblock_signals.  I don't want modifications cached in a
  * register - they must go straight to memory.
  */
@@ -46,34 +39,27 @@ void sig_handler(int sig, struct sigcontext *sc)
 	int enabled;
 
 	enabled = signals_enabled;
-	if(!enabled && (sig == SIGIO)){
+	if (!enabled && (sig == SIGIO)) {
 		pending |= SIGIO_MASK;
 		return;
 	}
 
 	block_signals();
 
-	CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas,
-			 sig, sc);
+	sig_handler_common_skas(sig, sc);
 
 	set_signals(enabled);
 }
 
-static void real_alarm_handler(int sig, struct sigcontext *sc)
+static void real_alarm_handler(struct sigcontext *sc)
 {
-	union uml_pt_regs regs;
+	struct uml_pt_regs regs;
 
-	if(sig == SIGALRM)
-		switch_timers(0);
-
-	if(sc != NULL)
+	if (sc != NULL)
 		copy_sc(&regs, sc);
-	regs.skas.is_user = 0;
+	regs.is_user = 0;
 	unblock_signals();
-	timer_handler(sig, &regs);
-
-	if(sig == SIGALRM)
-		switch_timers(1);
+	timer_handler(SIGVTALRM, &regs);
 }
 
 void alarm_handler(int sig, struct sigcontext *sc)
@@ -81,27 +67,30 @@ void alarm_handler(int sig, struct sigcontext *sc)
 	int enabled;
 
 	enabled = signals_enabled;
-	if(!signals_enabled){
-		if(sig == SIGVTALRM)
-			pending |= SIGVTALRM_MASK;
-		else pending |= SIGALRM_MASK;
-
+	if (!signals_enabled) {
+		pending |= SIGVTALRM_MASK;
 		return;
 	}
 
 	block_signals();
 
-	real_alarm_handler(sig, sc);
+	real_alarm_handler(sc);
 	set_signals(enabled);
 }
 
+void timer_init(void)
+{
+	set_handler(SIGVTALRM, (__sighandler_t) alarm_handler,
+		    SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, -1);
+}
+
 void set_sigstack(void *sig_stack, int size)
 {
 	stack_t stack = ((stack_t) { .ss_flags	= 0,
 				     .ss_sp	= (__ptr_t) sig_stack,
 				     .ss_size 	= size - sizeof(void *) });
 
-	if(sigaltstack(&stack, NULL) != 0)
+	if (sigaltstack(&stack, NULL) != 0)
 		panic("enabling signal stack failed, errno = %d\n", errno);
 }
 
@@ -111,7 +100,7 @@ void remove_sigstack(void)
 				     .ss_sp	= NULL,
 				     .ss_size	= 0 });
 
-	if(sigaltstack(&stack, NULL) != 0)
+	if (sigaltstack(&stack, NULL) != 0)
 		panic("disabling signal stack failed, errno = %d\n", errno);
 }
 
@@ -135,26 +124,27 @@ void handle_signal(int sig, struct sigcontext *sc)
 		 * with this interrupt.
 		 */
 		bail = to_irq_stack(&pending);
-		if(bail)
+		if (bail)
 			return;
 
 		nested = pending & 1;
 		pending &= ~1;
 
-		while((sig = ffs(pending)) != 0){
+		while ((sig = ffs(pending)) != 0){
 			sig--;
 			pending &= ~(1 << sig);
 			(*handlers[sig])(sig, sc);
 		}
 
-		/* Again, pending comes back with a mask of signals
+		/*
+		 * Again, pending comes back with a mask of signals
 		 * that arrived while tearing down the stack.  If this
 		 * is non-zero, we just go back, set up the stack
 		 * again, and handle the new interrupts.
 		 */
-		if(!nested)
+		if (!nested)
 			pending = from_irq_stack(nested);
-	} while(pending);
+	} while (pending);
 }
 
 extern void hard_handler(int sig);
@@ -172,18 +162,18 @@ void set_handler(int sig, void (*handler)(int), int flags, ...)
 	sigemptyset(&action.sa_mask);
 
 	va_start(ap, flags);
-	while((mask = va_arg(ap, int)) != -1)
+	while ((mask = va_arg(ap, int)) != -1)
 		sigaddset(&action.sa_mask, mask);
 	va_end(ap);
 
 	action.sa_flags = flags;
 	action.sa_restorer = NULL;
-	if(sigaction(sig, &action, NULL) < 0)
+	if (sigaction(sig, &action, NULL) < 0)
 		panic("sigaction failed - errno = %d\n", errno);
 
 	sigemptyset(&sig_mask);
 	sigaddset(&sig_mask, sig);
-	if(sigprocmask(SIG_UNBLOCK, &sig_mask, NULL) < 0)
+	if (sigprocmask(SIG_UNBLOCK, &sig_mask, NULL) < 0)
 		panic("sigprocmask failed - errno = %d\n", errno);
 }
 
@@ -194,13 +184,14 @@ int change_sig(int signal, int on)
 	sigemptyset(&sigset);
 	sigaddset(&sigset, signal);
 	sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old);
-	return(!sigismember(&old, signal));
+	return !sigismember(&old, signal);
 }
 
 void block_signals(void)
 {
 	signals_enabled = 0;
-	/* This must return with signals disabled, so this barrier
+	/*
+	 * This must return with signals disabled, so this barrier
 	 * ensures that writes are flushed out before the return.
 	 * This might matter if gcc figures out how to inline this and
 	 * decides to shuffle this code into the caller.
@@ -212,27 +203,31 @@ void unblock_signals(void)
 {
 	int save_pending;
 
-	if(signals_enabled == 1)
+	if (signals_enabled == 1)
 		return;
 
-	/* We loop because the IRQ handler returns with interrupts off.  So,
+	/*
+	 * We loop because the IRQ handler returns with interrupts off.  So,
 	 * interrupts may have arrived and we need to re-enable them and
 	 * recheck pending.
 	 */
-	while(1){
-		/* Save and reset save_pending after enabling signals.  This
+	while(1) {
+		/*
+		 * Save and reset save_pending after enabling signals.  This
 		 * way, pending won't be changed while we're reading it.
 		 */
 		signals_enabled = 1;
 
-		/* Setting signals_enabled and reading pending must
+		/*
+		 * Setting signals_enabled and reading pending must
 		 * happen in this order.
 		 */
 		mb();
 
 		save_pending = pending;
-		if(save_pending == 0){
-			/* This must return with signals enabled, so
+		if (save_pending == 0) {
+			/*
+			 * This must return with signals enabled, so
 			 * this barrier ensures that writes are
 			 * flushed out before the return.  This might
 			 * matter if gcc figures out how to inline
@@ -245,26 +240,24 @@ void unblock_signals(void)
 
 		pending = 0;
 
-		/* We have pending interrupts, so disable signals, as the
+		/*
+		 * We have pending interrupts, so disable signals, as the
 		 * handlers expect them off when they are called.  They will
 		 * be enabled again above.
 		 */
 
 		signals_enabled = 0;
 
-		/* Deal with SIGIO first because the alarm handler might
+		/*
+		 * Deal with SIGIO first because the alarm handler might
 		 * schedule, leaving the pending SIGIO stranded until we come
 		 * back here.
 		 */
-		if(save_pending & SIGIO_MASK)
-			CHOOSE_MODE_PROC(sig_handler_common_tt,
-					 sig_handler_common_skas, SIGIO, NULL);
-
-		if(save_pending & SIGALRM_MASK)
-			real_alarm_handler(SIGALRM, NULL);
+		if (save_pending & SIGIO_MASK)
+			sig_handler_common_skas(SIGIO, NULL);
 
-		if(save_pending & SIGVTALRM_MASK)
-			real_alarm_handler(SIGVTALRM, NULL);
+		if (save_pending & SIGVTALRM_MASK)
+			real_alarm_handler(NULL);
 	}
 }
 
@@ -276,11 +269,11 @@ int get_signals(void)
 int set_signals(int enable)
 {
 	int ret;
-	if(signals_enabled == enable)
+	if (signals_enabled == enable)
 		return enable;
 
 	ret = signals_enabled;
-	if(enable)
+	if (enable)
 		unblock_signals();
 	else block_signals();
 
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 0f7df4eb903..484e68f9f7a 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -1,31 +1,26 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <signal.h>
+#include <stddef.h>
+#include <unistd.h>
 #include <errno.h>
 #include <string.h>
-#include <unistd.h>
 #include <sys/mman.h>
-#include <sys/wait.h>
-#include <asm/page.h>
-#include <asm/unistd.h>
-#include "mem_user.h"
-#include "mem.h"
-#include "skas.h"
-#include "user.h"
+#include "init.h"
+#include "kern_constants.h"
+#include "as-layout.h"
+#include "mm_id.h"
 #include "os.h"
 #include "proc_mm.h"
 #include "ptrace_user.h"
-#include "kern_util.h"
-#include "task.h"
 #include "registers.h"
-#include "uml-config.h"
+#include "skas.h"
+#include "user.h"
 #include "sysdep/ptrace.h"
 #include "sysdep/stub.h"
-#include "init.h"
-#include "kern_constants.h"
+#include "uml-config.h"
 
 extern unsigned long batch_syscall_stub, __syscall_stub_start;
 
@@ -34,7 +29,7 @@ extern void wait_stub_done(int pid);
 static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
 					      unsigned long *stack)
 {
-	if(stack == NULL) {
+	if (stack == NULL) {
 		stack = (unsigned long *) mm_idp->stack + 2;
 		*stack = 0;
 	}
@@ -45,8 +40,8 @@ static unsigned long syscall_regs[MAX_REG_NR];
 
 static int __init init_syscall_regs(void)
 {
-	get_safe_registers(syscall_regs, NULL);
-	syscall_regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE +
+	get_safe_registers(syscall_regs);
+	syscall_regs[REGS_IP_INDEX] = STUB_CODE +
 		((unsigned long) &batch_syscall_stub -
 		 (unsigned long) &__syscall_stub_start);
 	return 0;
@@ -68,29 +63,30 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 	unsigned long * syscall;
 	int err, pid = mm_idp->u.pid;
 
-	if(proc_mm)
+	if (proc_mm)
 		/* FIXME: Need to look up userspace_pid by cpu */
 		pid = userspace_pid[0];
 
 	multi_count++;
 
 	n = ptrace_setregs(pid, syscall_regs);
-	if(n < 0){
-		printk("Registers - \n");
-		for(i = 0; i < MAX_REG_NR; i++)
-			printk("\t%d\t0x%lx\n", i, syscall_regs[i]);
+	if (n < 0) {
+		printk(UM_KERN_ERR "Registers - \n");
+		for (i = 0; i < MAX_REG_NR; i++)
+			printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
 		panic("do_syscall_stub : PTRACE_SETREGS failed, errno = %d\n",
 		      -n);
 	}
 
 	err = ptrace(PTRACE_CONT, pid, 0, 0);
-	if(err)
+	if (err)
 		panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
 		      errno);
 
 	wait_stub_done(pid);
 
-	/* When the stub stops, we find the following values on the
+	/*
+	 * When the stub stops, we find the following values on the
 	 * beginning of the stack:
 	 * (long )return_value
 	 * (long )offset to failed sycall-data (0, if no error)
@@ -98,26 +94,26 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
 	ret = *((unsigned long *) mm_idp->stack);
 	offset = *((unsigned long *) mm_idp->stack + 1);
 	if (offset) {
-		data = (unsigned long *)(mm_idp->stack +
-					 offset - UML_CONFIG_STUB_DATA);
-		printk("do_syscall_stub : ret = %ld, offset = %ld, "
+		data = (unsigned long *)(mm_idp->stack + offset - STUB_DATA);
+		printk(UM_KERN_ERR "do_syscall_stub : ret = %ld, offset = %ld, "
 		       "data = %p\n", ret, offset, data);
 		syscall = (unsigned long *)((unsigned long)data + data[0]);
-		printk("do_syscall_stub: syscall %ld failed, return value = "
-		       "0x%lx, expected return value = 0x%lx\n",
+		printk(UM_KERN_ERR "do_syscall_stub: syscall %ld failed, "
+		       "return value = 0x%lx, expected return value = 0x%lx\n",
 		       syscall[0], ret, syscall[7]);
-		printk("    syscall parameters: "
+		printk(UM_KERN_ERR "    syscall parameters: "
 		       "0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
 		       syscall[1], syscall[2], syscall[3],
 		       syscall[4], syscall[5], syscall[6]);
-		for(n = 1; n < data[0]/sizeof(long); n++) {
-			if(n == 1)
-				printk("    additional syscall data:");
-			if(n % 4 == 1)
-				printk("\n      ");
+		for (n = 1; n < data[0]/sizeof(long); n++) {
+			if (n == 1)
+				printk(UM_KERN_ERR "    additional syscall "
+				       "data:");
+			if (n % 4 == 1)
+				printk("\n" UM_KERN_ERR "      ");
 			printk("  0x%lx", data[n]);
 		}
-		if(n > 1)
+		if (n > 1)
 			printk("\n");
 	}
 	else ret = 0;
@@ -133,7 +129,7 @@ long run_syscall_stub(struct mm_id * mm_idp, int syscall,
 {
 	unsigned long *stack = check_init_stack(mm_idp, *addr);
 
-	if(done && *addr == NULL)
+	if (done && *addr == NULL)
 		single_count++;
 
 	*stack += sizeof(long);
@@ -150,8 +146,8 @@ long run_syscall_stub(struct mm_id * mm_idp, int syscall,
 	*stack = 0;
 	multi_op_count++;
 
-	if(!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) <
-		     UM_KERN_PAGE_SIZE - 10 * sizeof(long))){
+	if (!done && ((((unsigned long) stack) & ~UM_KERN_PAGE_MASK) <
+		     UM_KERN_PAGE_SIZE - 10 * sizeof(long))) {
 		*addr = stack;
 		return 0;
 	}
@@ -166,14 +162,15 @@ long syscall_stub_data(struct mm_id * mm_idp,
 	unsigned long *stack;
 	int ret = 0;
 
-	/* If *addr still is uninitialized, it *must* contain NULL.
+	/*
+	 * If *addr still is uninitialized, it *must* contain NULL.
 	 * Thus in this case do_syscall_stub correctly won't be called.
 	 */
-	if((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >=
+	if ((((unsigned long) *addr) & ~UM_KERN_PAGE_MASK) >=
 	   UM_KERN_PAGE_SIZE - (10 + data_count) * sizeof(long)) {
 		ret = do_syscall_stub(mm_idp, addr);
 		/* in case of error, don't overwrite data on stack */
-		if(ret)
+		if (ret)
 			return ret;
 	}
 
@@ -185,7 +182,7 @@ long syscall_stub_data(struct mm_id * mm_idp,
 	memcpy(stack + 1, data, data_count * sizeof(long));
 
 	*stub_addr = (void *)(((unsigned long)(stack + 1) &
-			       ~UM_KERN_PAGE_MASK) + UML_CONFIG_STUB_DATA);
+			       ~UM_KERN_PAGE_MASK) + STUB_DATA);
 
 	return 0;
 }
@@ -195,7 +192,7 @@ int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot,
 {
 	int ret;
 
-	if(proc_mm){
+	if (proc_mm) {
 		struct proc_mm_op map;
 		int fd = mm_idp->u.mm_fd;
 
@@ -211,9 +208,10 @@ int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len, int prot,
 					   .offset= offset
 					 } } } );
 		CATCH_EINTR(ret = write(fd, &map, sizeof(map)));
-		if(ret != sizeof(map)){
+		if (ret != sizeof(map)) {
 			ret = -errno;
-			printk("map : /proc/mm map failed, err = %d\n", -ret);
+			printk(UM_KERN_ERR "map : /proc/mm map failed, "
+			       "err = %d\n", -ret);
 		}
 		else ret = 0;
 	}
@@ -234,7 +232,7 @@ int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
 {
 	int ret;
 
-	if(proc_mm){
+	if (proc_mm) {
 		struct proc_mm_op unmap;
 		int fd = mm_idp->u.mm_fd;
 
@@ -245,9 +243,10 @@ int unmap(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
 					     (unsigned long) addr,
 					     .len		= len } } } );
 		CATCH_EINTR(ret = write(fd, &unmap, sizeof(unmap)));
-		if(ret != sizeof(unmap)){
+		if (ret != sizeof(unmap)) {
 			ret = -errno;
-			printk("unmap - proc_mm write returned %d\n", ret);
+			printk(UM_KERN_ERR "unmap - proc_mm write returned "
+			       "%d\n", ret);
 		}
 		else ret = 0;
 	}
@@ -268,7 +267,7 @@ int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
 	struct proc_mm_op protect;
 	int ret;
 
-	if(proc_mm){
+	if (proc_mm) {
 		int fd = mm_idp->u.mm_fd;
 
 		protect = ((struct proc_mm_op) { .op	= MM_MPROTECT,
@@ -280,9 +279,9 @@ int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
 					       .prot	= prot } } } );
 
 		CATCH_EINTR(ret = write(fd, &protect, sizeof(protect)));
-		if(ret != sizeof(protect)){
+		if (ret != sizeof(protect)) {
 			ret = -errno;
-			printk("protect failed, err = %d", -ret);
+			printk(UM_KERN_ERR "protect failed, err = %d", -ret);
 		}
 		else ret = 0;
 	}
@@ -295,7 +294,3 @@ int protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
 
 	return ret;
 }
-
-void before_mem_skas(unsigned long unused)
-{
-}
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index ba9af8d6205..d77c81d7068 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -1,48 +1,38 @@
 /*
- * Copyright (C) 2002- 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <stdlib.h>
-#include <string.h>
 #include <unistd.h>
-#include <errno.h>
-#include <signal.h>
 #include <sched.h>
-#include "ptrace_user.h"
-#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
 #include <sys/mman.h>
-#include <sys/user.h>
-#include <sys/time.h>
-#include <sys/syscall.h>
-#include <asm/types.h>
-#include "user.h"
-#include "sysdep/ptrace.h"
-#include "kern_util.h"
-#include "skas.h"
-#include "stub-data.h"
-#include "mm_id.h"
-#include "sysdep/sigcontext.h"
-#include "sysdep/stub.h"
-#include "os.h"
-#include "proc_mm.h"
-#include "skas_ptrace.h"
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <asm/unistd.h>
+#include "as-layout.h"
 #include "chan_user.h"
-#include "registers.h"
+#include "kern_constants.h"
 #include "mem.h"
-#include "uml-config.h"
+#include "os.h"
 #include "process.h"
-#include "longjmp.h"
-#include "kern_constants.h"
-#include "as-layout.h"
+#include "proc_mm.h"
+#include "ptrace_user.h"
+#include "registers.h"
+#include "skas.h"
+#include "skas_ptrace.h"
+#include "user.h"
+#include "sysdep/stub.h"
 
 int is_skas_winch(int pid, int fd, void *data)
 {
-	if(pid != os_getpgrp())
-		return(0);
+	if (pid != getpgrp())
+		return 0;
 
 	register_winch_irq(-1, fd, -1, data, 0);
-	return(1);
+	return 1;
 }
 
 static int ptrace_dump_regs(int pid)
@@ -50,13 +40,12 @@ static int ptrace_dump_regs(int pid)
         unsigned long regs[MAX_REG_NR];
         int i;
 
-        if(ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
+        if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
                 return -errno;
-        else {
-                printk("Stub registers -\n");
-                for(i = 0; i < ARRAY_SIZE(regs); i++)
-                        printk("\t%d - %lx\n", i, regs[i]);
-        }
+
+	printk(UM_KERN_ERR "Stub registers -\n");
+	for (i = 0; i < ARRAY_SIZE(regs); i++)
+		printk(UM_KERN_ERR "\t%d - %lx\n", i, regs[i]);
 
         return 0;
 }
@@ -74,27 +63,28 @@ void wait_stub_done(int pid)
 {
 	int n, status, err;
 
-	while(1){
+	while (1) {
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
-		if((n < 0) || !WIFSTOPPED(status))
+		if ((n < 0) || !WIFSTOPPED(status))
 			goto bad_wait;
 
-		if(((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0)
+		if (((1 << WSTOPSIG(status)) & STUB_SIG_MASK) == 0)
 			break;
 
 		err = ptrace(PTRACE_CONT, pid, 0, 0);
-		if(err)
+		if (err)
 			panic("wait_stub_done : continue failed, errno = %d\n",
 			      errno);
 	}
 
-	if(((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
+	if (((1 << WSTOPSIG(status)) & STUB_DONE_MASK) != 0)
 		return;
 
 bad_wait:
 	err = ptrace_dump_regs(pid);
-	if(err)
-		printk("Failed to get registers from stub, errno = %d\n", -err);
+	if (err)
+		printk(UM_KERN_ERR "Failed to get registers from stub, "
+		       "errno = %d\n", -err);
 	panic("wait_stub_done : failed to wait for SIGUSR1/SIGTRAP, pid = %d, "
 	      "n = %d, errno = %d, status = 0x%x\n", pid, n, errno, status);
 }
@@ -105,9 +95,9 @@ void get_skas_faultinfo(int pid, struct faultinfo * fi)
 {
 	int err;
 
-	if(ptrace_faultinfo){
+	if (ptrace_faultinfo) {
 		err = ptrace(PTRACE_FAULTINFO, pid, 0, fi);
-		if(err)
+		if (err)
 			panic("get_skas_faultinfo - PTRACE_FAULTINFO failed, "
 			      "errno = %d\n", errno);
 
@@ -119,52 +109,57 @@ void get_skas_faultinfo(int pid, struct faultinfo * fi)
 	}
 	else {
 		err = ptrace(PTRACE_CONT, pid, 0, SIGSEGV);
-		if(err)
+		if (err)
 			panic("Failed to continue stub, pid = %d, errno = %d\n",
 			      pid, errno);
 		wait_stub_done(pid);
 
-		/* faultinfo is prepared by the stub-segv-handler at start of
+		/*
+		 * faultinfo is prepared by the stub-segv-handler at start of
 		 * the stub stack page. We just have to copy it.
 		 */
 		memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
 	}
 }
 
-static void handle_segv(int pid, union uml_pt_regs * regs)
+static void handle_segv(int pid, struct uml_pt_regs * regs)
 {
-	get_skas_faultinfo(pid, &regs->skas.faultinfo);
-	segv(regs->skas.faultinfo, 0, 1, NULL);
+	get_skas_faultinfo(pid, &regs->faultinfo);
+	segv(regs->faultinfo, 0, 1, NULL);
 }
 
-/*To use the same value of using_sysemu as the caller, ask it that value (in local_using_sysemu)*/
-static void handle_trap(int pid, union uml_pt_regs *regs, int local_using_sysemu)
+/*
+ * To use the same value of using_sysemu as the caller, ask it that value
+ * (in local_using_sysemu
+ */
+static void handle_trap(int pid, struct uml_pt_regs *regs,
+			int local_using_sysemu)
 {
 	int err, status;
 
 	/* Mark this as a syscall */
-	UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->skas.regs);
+	UPT_SYSCALL_NR(regs) = PT_SYSCALL_NR(regs->gp);
 
 	if (!local_using_sysemu)
 	{
 		err = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
 			     __NR_getpid);
-		if(err < 0)
-			panic("handle_trap - nullifying syscall failed errno = %d\n",
-			      errno);
+		if (err < 0)
+			panic("handle_trap - nullifying syscall failed, "
+			      "errno = %d\n", errno);
 
 		err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
-		if(err < 0)
-			panic("handle_trap - continuing to end of syscall failed, "
-			      "errno = %d\n", errno);
+		if (err < 0)
+			panic("handle_trap - continuing to end of syscall "
+			      "failed, errno = %d\n", errno);
 
 		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED));
-		if((err < 0) || !WIFSTOPPED(status) ||
-		   (WSTOPSIG(status) != SIGTRAP + 0x80)){
+		if ((err < 0) || !WIFSTOPPED(status) ||
+		   (WSTOPSIG(status) != SIGTRAP + 0x80)) {
                         err = ptrace_dump_regs(pid);
-                        if(err)
-                                printk("Failed to get registers from process, "
-                                       "errno = %d\n", -err);
+                        if (err)
+                                printk(UM_KERN_ERR "Failed to get registers "
+				       "from process, errno = %d\n", -err);
 			panic("handle_trap - failed to wait at end of syscall, "
 			      "errno = %d, status = %d\n", errno, status);
                 }
@@ -182,63 +177,64 @@ static int userspace_tramp(void *stack)
 
 	ptrace(PTRACE_TRACEME, 0, 0, 0);
 
-	init_new_thread_signals();
-	err = set_interval(1);
-	if(err)
+	signal(SIGTERM, SIG_DFL);
+	err = set_interval();
+	if (err)
 		panic("userspace_tramp - setting timer failed, errno = %d\n",
 		      err);
 
-	if(!proc_mm){
-		/* This has a pte, but it can't be mapped in with the usual
+	if (!proc_mm) {
+		/*
+		 * This has a pte, but it can't be mapped in with the usual
 		 * tlb_flush mechanism because this is part of that mechanism
 		 */
 		int fd;
-		__u64 offset;
+		unsigned long long offset;
 		fd = phys_mapping(to_phys(&__syscall_stub_start), &offset);
-		addr = mmap64((void *) UML_CONFIG_STUB_CODE, UM_KERN_PAGE_SIZE,
+		addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
 			      PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
-		if(addr == MAP_FAILED){
-			printk("mapping mmap stub failed, errno = %d\n",
-			       errno);
+		if (addr == MAP_FAILED) {
+			printk(UM_KERN_ERR "mapping mmap stub at 0x%lx failed, "
+			       "errno = %d\n", STUB_CODE, errno);
 			exit(1);
 		}
 
-		if(stack != NULL){
+		if (stack != NULL) {
 			fd = phys_mapping(to_phys(stack), &offset);
-			addr = mmap((void *) UML_CONFIG_STUB_DATA,
+			addr = mmap((void *) STUB_DATA,
 				    UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
 				    MAP_FIXED | MAP_SHARED, fd, offset);
-			if(addr == MAP_FAILED){
-				printk("mapping segfault stack failed, "
-				       "errno = %d\n", errno);
+			if (addr == MAP_FAILED) {
+				printk(UM_KERN_ERR "mapping segfault stack "
+				       "at 0x%lx failed, errno = %d\n",
+				       STUB_DATA, errno);
 				exit(1);
 			}
 		}
 	}
-	if(!ptrace_faultinfo && (stack != NULL)){
+	if (!ptrace_faultinfo && (stack != NULL)) {
 		struct sigaction sa;
 
-		unsigned long v = UML_CONFIG_STUB_CODE +
+		unsigned long v = STUB_CODE +
 				  (unsigned long) stub_segv_handler -
 				  (unsigned long) &__syscall_stub_start;
 
-		set_sigstack((void *) UML_CONFIG_STUB_DATA, UM_KERN_PAGE_SIZE);
+		set_sigstack((void *) STUB_DATA, UM_KERN_PAGE_SIZE);
 		sigemptyset(&sa.sa_mask);
 		sigaddset(&sa.sa_mask, SIGIO);
 		sigaddset(&sa.sa_mask, SIGWINCH);
-		sigaddset(&sa.sa_mask, SIGALRM);
 		sigaddset(&sa.sa_mask, SIGVTALRM);
 		sigaddset(&sa.sa_mask, SIGUSR1);
 		sa.sa_flags = SA_ONSTACK;
 		sa.sa_handler = (void *) v;
 		sa.sa_restorer = NULL;
-		if(sigaction(SIGSEGV, &sa, NULL) < 0)
+		if (sigaction(SIGSEGV, &sa, NULL) < 0)
 			panic("userspace_tramp - setting SIGSEGV handler "
 			      "failed - errno = %d\n", errno);
 	}
 
-	os_stop_process(os_getpid());
-	return(0);
+	kill(os_getpid(), SIGSTOP);
+	return 0;
 }
 
 /* Each element set once, and only accessed by a single processor anyway */
@@ -255,44 +251,55 @@ int start_userspace(unsigned long stub_stack)
 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
 		     PROT_READ | PROT_WRITE | PROT_EXEC,
 		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if(stack == MAP_FAILED)
+	if (stack == MAP_FAILED)
 		panic("start_userspace : mmap failed, errno = %d", errno);
 	sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *);
 
 	flags = CLONE_FILES | SIGCHLD;
-	if(proc_mm) flags |= CLONE_VM;
+	if (proc_mm)
+		flags |= CLONE_VM;
+
 	pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
-	if(pid < 0)
+	if (pid < 0)
 		panic("start_userspace : clone failed, errno = %d", errno);
 
 	do {
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
-		if(n < 0)
+		if (n < 0)
 			panic("start_userspace : wait failed, errno = %d",
 			      errno);
-	} while(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM));
+	} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM));
 
-	if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
+	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
 		panic("start_userspace : expected SIGSTOP, got status = %d",
 		      status);
 
-	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL, (void *)PTRACE_O_TRACESYSGOOD) < 0)
-		panic("start_userspace : PTRACE_OLDSETOPTIONS failed, errno=%d\n",
-		      errno);
+	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
+		   (void *) PTRACE_O_TRACESYSGOOD) < 0)
+		panic("start_userspace : PTRACE_OLDSETOPTIONS failed, "
+		      "errno = %d\n", errno);
 
-	if(munmap(stack, UM_KERN_PAGE_SIZE) < 0)
+	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0)
 		panic("start_userspace : munmap failed, errno = %d\n", errno);
 
-	return(pid);
+	return pid;
 }
 
-void userspace(union uml_pt_regs *regs)
+void userspace(struct uml_pt_regs *regs)
 {
+	struct itimerval timer;
+	unsigned long long nsecs, now;
 	int err, status, op, pid = userspace_pid[0];
 	/* To prevent races if using_sysemu changes under us.*/
 	int local_using_sysemu;
 
-	while(1){
+	if (getitimer(ITIMER_VIRTUAL, &timer))
+		printk("Failed to get itimer, errno = %d\n", errno);
+	nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC +
+		timer.it_value.tv_usec * UM_NSEC_PER_USEC;
+	nsecs += os_nsecs();
+
+	while (1) {
 		restore_registers(pid, regs);
 
 		/* Now we set local_using_sysemu to be used for one loop */
@@ -302,26 +309,28 @@ void userspace(union uml_pt_regs *regs)
 					     singlestepping(NULL));
 
 		err = ptrace(op, pid, 0, 0);
-		if(err)
+		if (err)
 			panic("userspace - could not resume userspace process, "
 			      "pid=%d, ptrace operation = %d, errno = %d\n",
 			      pid, op, errno);
 
 		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED));
-		if(err < 0)
+		if (err < 0)
 			panic("userspace - waitpid failed, errno = %d\n",
 			      errno);
 
-		regs->skas.is_user = 1;
+		regs->is_user = 1;
 		save_registers(pid, regs);
 		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
 
-		if(WIFSTOPPED(status)){
+		if (WIFSTOPPED(status)) {
 			int sig = WSTOPSIG(status);
-		  	switch(sig){
+		  	switch(sig) {
 			case SIGSEGV:
-				if(PTRACE_FULL_FAULTINFO || !ptrace_faultinfo){
-					get_skas_faultinfo(pid, &regs->skas.faultinfo);
+				if (PTRACE_FULL_FAULTINFO ||
+				    !ptrace_faultinfo) {
+					get_skas_faultinfo(pid,
+							   &regs->faultinfo);
 					(*sig_info[SIGSEGV])(SIGSEGV, regs);
 				}
 				else handle_segv(pid, regs);
@@ -332,8 +341,20 @@ void userspace(union uml_pt_regs *regs)
 			case SIGTRAP:
 				relay_signal(SIGTRAP, regs);
 				break;
-			case SIGIO:
 			case SIGVTALRM:
+				now = os_nsecs();
+				if(now < nsecs)
+					break;
+				block_signals();
+				(*sig_info[sig])(sig, regs);
+				unblock_signals();
+				nsecs = timer.it_value.tv_sec *
+					UM_NSEC_PER_SEC +
+					timer.it_value.tv_usec *
+					UM_NSEC_PER_USEC;
+				nsecs += os_nsecs();
+				break;
+			case SIGIO:
 			case SIGILL:
 			case SIGBUS:
 			case SIGFPE:
@@ -343,30 +364,29 @@ void userspace(union uml_pt_regs *regs)
 				unblock_signals();
 				break;
 			default:
-			        printk("userspace - child stopped with signal "
-				       "%d\n", sig);
+			        printk(UM_KERN_ERR "userspace - child stopped "
+				       "with signal %d\n", sig);
 			}
 			pid = userspace_pid[0];
 			interrupt_end();
 
 			/* Avoid -ERESTARTSYS handling in host */
-			if(PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET)
-				PT_SYSCALL_NR(regs->skas.regs) = -1;
+			if (PT_SYSCALL_NR_OFFSET != PT_SYSCALL_RET_OFFSET)
+				PT_SYSCALL_NR(regs->gp) = -1;
 		}
 	}
 }
 
 static unsigned long thread_regs[MAX_REG_NR];
-static unsigned long thread_fp_regs[HOST_FP_SIZE];
 
 static int __init init_thread_regs(void)
 {
-	get_safe_registers(thread_regs, thread_fp_regs);
+	get_safe_registers(thread_regs);
 	/* Set parent's instruction pointer to start of clone-stub */
-	thread_regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE +
+	thread_regs[REGS_IP_INDEX] = STUB_CODE +
 				(unsigned long) stub_clone_handler -
 				(unsigned long) &__syscall_stub_start;
-	thread_regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA + UM_KERN_PAGE_SIZE -
+	thread_regs[REGS_SP_INDEX] = STUB_DATA + UM_KERN_PAGE_SIZE -
 		sizeof(void *);
 #ifdef __SIGNAL_FRAMESIZE
 	thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
@@ -378,53 +398,53 @@ __initcall(init_thread_regs);
 
 int copy_context_skas0(unsigned long new_stack, int pid)
 {
+	struct timeval tv = { .tv_sec = 0, .tv_usec = UM_USEC_PER_SEC / UM_HZ };
 	int err;
 	unsigned long current_stack = current_stub_stack();
 	struct stub_data *data = (struct stub_data *) current_stack;
 	struct stub_data *child_data = (struct stub_data *) new_stack;
-	__u64 new_offset;
+	unsigned long long new_offset;
 	int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset);
 
-	/* prepare offset and fd of child's stack as argument for parent's
+	/*
+	 * prepare offset and fd of child's stack as argument for parent's
 	 * and child's mmap2 calls
 	 */
 	*data = ((struct stub_data) { .offset	= MMAP_OFFSET(new_offset),
 				      .fd	= new_fd,
 				      .timer    = ((struct itimerval)
-					            { { 0, 1000000 / hz() },
-						      { 0, 1000000 / hz() }})});
+					           { .it_value = tv,
+						     .it_interval = tv }) });
+
 	err = ptrace_setregs(pid, thread_regs);
-	if(err < 0)
+	if (err < 0)
 		panic("copy_context_skas0 : PTRACE_SETREGS failed, "
 		      "pid = %d, errno = %d\n", pid, -err);
 
-	err = ptrace_setfpregs(pid, thread_fp_regs);
-	if(err < 0)
-		panic("copy_context_skas0 : PTRACE_SETFPREGS failed, "
-		      "pid = %d, errno = %d\n", pid, -err);
-
 	/* set a well known return code for detection of child write failure */
 	child_data->err = 12345678;
 
-	/* Wait, until parent has finished its work: read child's pid from
+	/*
+	 * Wait, until parent has finished its work: read child's pid from
 	 * parent's stack, and check, if bad result.
 	 */
 	err = ptrace(PTRACE_CONT, pid, 0, 0);
-	if(err)
+	if (err)
 		panic("Failed to continue new process, pid = %d, "
 		      "errno = %d\n", pid, errno);
 	wait_stub_done(pid);
 
 	pid = data->err;
-	if(pid < 0)
+	if (pid < 0)
 		panic("copy_context_skas0 - stub-parent reports error %d\n",
 		      -pid);
 
-	/* Wait, until child has finished too: read child's result from
+	/*
+	 * Wait, until child has finished too: read child's result from
 	 * child's stack and check it.
 	 */
 	wait_stub_done(pid);
-	if (child_data->err != UML_CONFIG_STUB_DATA)
+	if (child_data->err != STUB_DATA)
 		panic("copy_context_skas0 - stub-child reports error %ld\n",
 		      child_data->err);
 
@@ -446,7 +466,7 @@ void map_stub_pages(int fd, unsigned long code,
 {
 	struct proc_mm_op mmop;
 	int n;
-	__u64 code_offset;
+	unsigned long long code_offset;
 	int code_fd = phys_mapping(to_phys((void *) &__syscall_stub_start),
 				   &code_offset);
 
@@ -461,16 +481,17 @@ void map_stub_pages(int fd, unsigned long code,
 					  .offset  = code_offset
 	} } });
 	CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop)));
-	if(n != sizeof(mmop)){
+	if (n != sizeof(mmop)) {
 		n = errno;
-		printk("mmap args - addr = 0x%lx, fd = %d, offset = %llx\n",
-		       code, code_fd, (unsigned long long) code_offset);
+		printk(UM_KERN_ERR "mmap args - addr = 0x%lx, fd = %d, "
+		       "offset = %llx\n", code, code_fd,
+		       (unsigned long long) code_offset);
 		panic("map_stub_pages : /proc/mm map for code failed, "
 		      "err = %d\n", n);
 	}
 
-	if ( stack ) {
-		__u64 map_offset;
+	if (stack) {
+		unsigned long long map_offset;
 		int map_fd = phys_mapping(to_phys((void *)stack), &map_offset);
 		mmop = ((struct proc_mm_op)
 				{ .op        = MM_MMAP,
@@ -484,7 +505,7 @@ void map_stub_pages(int fd, unsigned long code,
 				      .offset  = map_offset
 		} } });
 		CATCH_EINTR(n = write(fd, &mmop, sizeof(mmop)));
-		if(n != sizeof(mmop))
+		if (n != sizeof(mmop))
 			panic("map_stub_pages : /proc/mm map for data failed, "
 			      "err = %d\n", errno);
 	}
@@ -504,7 +525,7 @@ void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
 
 void switch_threads(jmp_buf *me, jmp_buf *you)
 {
-	if(UML_SETJMP(me) == 0)
+	if (UML_SETJMP(me) == 0)
 		UML_LONGJMP(you, 1);
 }
 
@@ -520,8 +541,7 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
 	int n;
 
 	set_handler(SIGWINCH, (__sighandler_t) sig_handler,
-		    SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGALRM,
-		    SIGVTALRM, -1);
+		    SA_ONSTACK | SA_RESTART, SIGUSR1, SIGIO, SIGVTALRM, -1);
 
 	/*
 	 * Can't use UML_SETJMP or UML_LONGJMP here because they save
@@ -532,7 +552,7 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
 	 * after returning to the jumper.
 	 */
 	n = setjmp(initial_jmpbuf);
-	switch(n){
+	switch(n) {
 	case INIT_JMP_NEW_THREAD:
 		(*switch_buf)[0].JB_IP = (unsigned long) new_thread_handler;
 		(*switch_buf)[0].JB_SP = (unsigned long) stack +
@@ -544,10 +564,10 @@ int start_idle_thread(void *stack, jmp_buf *switch_buf)
 		break;
 	case INIT_JMP_HALT:
 		kmalloc_ok = 0;
-		return(0);
+		return 0;
 	case INIT_JMP_REBOOT:
 		kmalloc_ok = 0;
-		return(1);
+		return 1;
 	default:
 		panic("Bad sigsetjmp return in start_idle_thread - %d\n", n);
 	}
@@ -563,7 +583,7 @@ void initial_thread_cb_skas(void (*proc)(void *), void *arg)
 	cb_back = &here;
 
 	block_signals();
-	if(UML_SETJMP(&here) == 0)
+	if (UML_SETJMP(&here) == 0)
 		UML_LONGJMP(&initial_jmpbuf, INIT_JMP_CALLBACK);
 	unblock_signals();
 
@@ -584,16 +604,16 @@ void reboot_skas(void)
 	UML_LONGJMP(&initial_jmpbuf, INIT_JMP_REBOOT);
 }
 
-void switch_mm_skas(struct mm_id *mm_idp)
+void __switch_mm(struct mm_id *mm_idp)
 {
 	int err;
 
-	/* FIXME: need cpu pid in switch_mm_skas */
-	if(proc_mm){
+	/* FIXME: need cpu pid in __switch_mm */
+	if (proc_mm) {
 		err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0,
 			     mm_idp->u.mm_fd);
-		if(err)
-			panic("switch_mm_skas - PTRACE_SWITCH_MM failed, "
+		if (err)
+			panic("__switch_mm - PTRACE_SWITCH_MM failed, "
 			      "errno = %d\n", errno);
 	}
 	else userspace_pid[0] = mm_idp->u.pid;
diff --git a/arch/um/os-Linux/skas/trap.c b/arch/um/os-Linux/skas/trap.c
index 3b600c2e63b..3b1b9244f46 100644
--- a/arch/um/os-Linux/skas/trap.c
+++ b/arch/um/os-Linux/skas/trap.c
@@ -1,37 +1,43 @@
 /*
- * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <signal.h>
-#include <errno.h>
+#if 0
 #include "kern_util.h"
-#include "as-layout.h"
-#include "task.h"
-#include "sigcontext.h"
 #include "skas.h"
 #include "ptrace_user.h"
-#include "sysdep/ptrace.h"
 #include "sysdep/ptrace_user.h"
+#endif
+
+#include <errno.h>
+#include <signal.h>
+#include "sysdep/ptrace.h"
+#include "kern_constants.h"
+#include "as-layout.h"
 #include "os.h"
+#include "sigcontext.h"
+#include "task.h"
 
-static union uml_pt_regs ksig_regs[UM_NR_CPUS];
+static struct uml_pt_regs ksig_regs[UM_NR_CPUS];
 
 void sig_handler_common_skas(int sig, void *sc_ptr)
 {
 	struct sigcontext *sc = sc_ptr;
-	union uml_pt_regs *r;
-	void (*handler)(int, union uml_pt_regs *);
+	struct uml_pt_regs *r;
+	void (*handler)(int, struct uml_pt_regs *);
 	int save_user, save_errno = errno;
 
-	/* This is done because to allow SIGSEGV to be delivered inside a SEGV
+	/*
+	 * This is done because to allow SIGSEGV to be delivered inside a SEGV
 	 * handler.  This can happen in copy_user, and if SEGV is disabled,
 	 * the process will die.
 	 * XXX Figure out why this is better than SA_NODEFER
 	 */
-	if(sig == SIGSEGV) {
+	if (sig == SIGSEGV) {
 		change_sig(SIGSEGV, 1);
-		/* For segfaults, we want the data from the
+		/*
+		 * For segfaults, we want the data from the
 		 * sigcontext.  In this case, we don't want to mangle
 		 * the process registers, so use a static set of
 		 * registers.  For other signals, the process
@@ -42,25 +48,22 @@ void sig_handler_common_skas(int sig, void *sc_ptr)
 	}
 	else r = TASK_REGS(get_current());
 
-	save_user = r->skas.is_user;
-	r->skas.is_user = 0;
-	if ( sig == SIGFPE || sig == SIGSEGV ||
-	     sig == SIGBUS || sig == SIGILL ||
-	     sig == SIGTRAP ) {
-		GET_FAULTINFO_FROM_SC(r->skas.faultinfo, sc);
-	}
+	save_user = r->is_user;
+	r->is_user = 0;
+	if ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) ||
+	    (sig == SIGILL) || (sig == SIGTRAP))
+		GET_FAULTINFO_FROM_SC(r->faultinfo, sc);
 
 	change_sig(SIGUSR1, 1);
 
 	handler = sig_info[sig];
 
-	/* unblock SIGALRM, SIGVTALRM, SIGIO if sig isn't IRQ signal */
-	if (sig != SIGIO && sig != SIGWINCH &&
-	    sig != SIGVTALRM && sig != SIGALRM)
+	/* unblock SIGVTALRM, SIGIO if sig isn't IRQ signal */
+	if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGVTALRM))
 		unblock_signals();
 
 	handler(sig, r);
 
 	errno = save_errno;
-	r->skas.is_user = save_user;
+	r->is_user = save_user;
 }
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 46f613975c1..7b81f6c08a5 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -1,75 +1,65 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <pty.h>
 #include <stdio.h>
-#include <stddef.h>
-#include <stdarg.h>
 #include <stdlib.h>
-#include <string.h>
+#include <stdarg.h>
 #include <unistd.h>
-#include <signal.h>
-#include <sched.h>
-#include <fcntl.h>
 #include <errno.h>
-#include <sys/time.h>
-#include <sys/wait.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <string.h>
 #include <sys/mman.h>
-#include <sys/resource.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
 #include <asm/unistd.h>
-#include <asm/page.h>
-#include <sys/types.h>
-#include "kern_util.h"
-#include "user.h"
-#include "signal_kern.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/sigcontext.h"
-#include "irq_user.h"
-#include "ptrace_user.h"
-#include "mem_user.h"
 #include "init.h"
-#include "os.h"
-#include "uml-config.h"
-#include "choose-mode.h"
-#include "mode.h"
-#include "tempfile.h"
 #include "kern_constants.h"
-
-#ifdef UML_CONFIG_MODE_SKAS
-#include "skas.h"
-#include "skas_ptrace.h"
+#include "os.h"
+#include "mem_user.h"
+#include "ptrace_user.h"
 #include "registers.h"
-#endif
+#include "skas_ptrace.h"
 
-static int ptrace_child(void *arg)
+static int ptrace_child(void)
 {
 	int ret;
+	/* Calling os_getpid because some libcs cached getpid incorrectly */
 	int pid = os_getpid(), ppid = getppid();
 	int sc_result;
 
 	change_sig(SIGWINCH, 0);
-	if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){
+	if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
 		perror("ptrace");
-		os_kill_process(pid, 0);
+		kill(pid, SIGKILL);
 	}
 	kill(pid, SIGSTOP);
 
-	/*This syscall will be intercepted by the parent. Don't call more than
-	 * once, please.*/
+	/*
+	 * This syscall will be intercepted by the parent. Don't call more than
+	 * once, please.
+	 */
 	sc_result = os_getpid();
 
 	if (sc_result == pid)
-		ret = 1; /*Nothing modified by the parent, we are running
-			   normally.*/
+		/* Nothing modified by the parent, we are running normally. */
+		ret = 1;
 	else if (sc_result == ppid)
-		ret = 0; /*Expected in check_ptrace and check_sysemu when they
-			   succeed in modifying the stack frame*/
+		/*
+		 * Expected in check_ptrace and check_sysemu when they succeed
+		 * in modifying the stack frame
+		 */
+		ret = 0;
 	else
-		ret = 2; /*Serious trouble! This could be caused by a bug in
-			   host 2.6 SKAS3/2.6 patch before release -V6, together
-			   with a bug in the UML code itself.*/
+		/* Serious trouble! This could be caused by a bug in host 2.6
+		 * SKAS3/2.6 patch before release -V6, together with a bug in
+		 * the UML code itself.
+		 */
+		ret = 2;
 	_exit(ret);
 }
 
@@ -101,29 +91,23 @@ static void non_fatal(char *fmt, ...)
 	fflush(stdout);
 }
 
-static int start_ptraced_child(void **stack_out)
+static int start_ptraced_child(void)
 {
-	void *stack;
-	unsigned long sp;
 	int pid, n, status;
 
-	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
-		     PROT_READ | PROT_WRITE | PROT_EXEC,
-		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-	if(stack == MAP_FAILED)
-		fatal_perror("check_ptrace : mmap failed");
-	sp = (unsigned long) stack + UM_KERN_PAGE_SIZE - sizeof(void *);
-	pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL);
-	if(pid < 0)
-		fatal_perror("start_ptraced_child : clone failed");
+	pid = fork();
+	if (pid == 0)
+		ptrace_child();
+	else if (pid < 0)
+		fatal_perror("start_ptraced_child : fork failed");
+
 	CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
-	if(n < 0)
-		fatal_perror("check_ptrace : clone failed");
-	if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
+	if (n < 0)
+		fatal_perror("check_ptrace : waitpid failed");
+	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP))
 		fatal("check_ptrace : expected SIGSTOP, got status = %d",
 		      status);
 
-	*stack_out = stack;
 	return pid;
 }
 
@@ -133,15 +117,14 @@ static int start_ptraced_child(void **stack_out)
  * So only for SYSEMU features we test mustpanic, while normal host features
  * must work anyway!
  */
-static int stop_ptraced_child(int pid, void *stack, int exitcode,
-			      int mustexit)
+static int stop_ptraced_child(int pid, int exitcode, int mustexit)
 {
 	int status, n, ret = 0;
 
-	if(ptrace(PTRACE_CONT, pid, 0, 0) < 0)
+	if (ptrace(PTRACE_CONT, pid, 0, 0) < 0)
 		fatal_perror("stop_ptraced_child : ptrace failed");
 	CATCH_EINTR(n = waitpid(pid, &status, 0));
-	if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
+	if (!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) {
 		int exit_with = WEXITSTATUS(status);
 		if (exit_with == 2)
 			non_fatal("check_ptrace : child exited with status 2. "
@@ -154,8 +137,6 @@ static int stop_ptraced_child(int pid, void *stack, int exitcode,
 		ret = -1;
 	}
 
-	if(munmap(stack, UM_KERN_PAGE_SIZE) < 0)
-		fatal_perror("check_ptrace : munmap failed");
 	return ret;
 }
 
@@ -207,40 +188,39 @@ __uml_setup("nosysemu", nosysemu_cmd_param,
 
 static void __init check_sysemu(void)
 {
-	void *stack;
 	unsigned long regs[MAX_REG_NR];
 	int pid, n, status, count=0;
 
 	non_fatal("Checking syscall emulation patch for ptrace...");
 	sysemu_supported = 0;
-	pid = start_ptraced_child(&stack);
+	pid = start_ptraced_child();
 
-	if(ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
+	if (ptrace(PTRACE_SYSEMU, pid, 0, 0) < 0)
 		goto fail;
 
 	CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
 	if (n < 0)
 		fatal_perror("check_sysemu : wait failed");
-	if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
+	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP))
 		fatal("check_sysemu : expected SIGTRAP, got status = %d",
 		      status);
 
-	if(ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
+	if (ptrace(PTRACE_GETREGS, pid, 0, regs) < 0)
 		fatal_perror("check_sysemu : PTRACE_GETREGS failed");
-	if(PT_SYSCALL_NR(regs) != __NR_getpid){
+	if (PT_SYSCALL_NR(regs) != __NR_getpid) {
 		non_fatal("check_sysemu got system call number %d, "
 			  "expected %d...", PT_SYSCALL_NR(regs), __NR_getpid);
 		goto fail;
 	}
 
 	n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET, os_getpid());
-	if(n < 0){
+	if (n < 0) {
 		non_fatal("check_sysemu : failed to modify system call "
 			  "return");
 		goto fail;
 	}
 
-	if (stop_ptraced_child(pid, stack, 0, 0) < 0)
+	if (stop_ptraced_child(pid, 0, 0) < 0)
 		goto fail_stopped;
 
 	sysemu_supported = 1;
@@ -248,90 +228,90 @@ static void __init check_sysemu(void)
 	set_using_sysemu(!force_sysemu_disabled);
 
 	non_fatal("Checking advanced syscall emulation patch for ptrace...");
-	pid = start_ptraced_child(&stack);
+	pid = start_ptraced_child();
 
-	if((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0))
 		fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed");
 
-	while(1){
+	while (1) {
 		count++;
-		if(ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
+		if (ptrace(PTRACE_SYSEMU_SINGLESTEP, pid, 0, 0) < 0)
 			goto fail;
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
-		if(n < 0)
+		if (n < 0)
 			fatal_perror("check_ptrace : wait failed");
 
-		if(WIFSTOPPED(status) && (WSTOPSIG(status) == (SIGTRAP|0x80))){
+		if (WIFSTOPPED(status) &&
+		    (WSTOPSIG(status) == (SIGTRAP|0x80))) {
 			if (!count)
 				fatal("check_ptrace : SYSEMU_SINGLESTEP "
 				      "doesn't singlestep");
 			n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_RET_OFFSET,
 				   os_getpid());
-			if(n < 0)
+			if (n < 0)
 				fatal_perror("check_sysemu : failed to modify "
 					     "system call return");
 			break;
 		}
-		else if(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP))
+		else if (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGTRAP))
 			count++;
 		else
 			fatal("check_ptrace : expected SIGTRAP or "
 			      "(SIGTRAP | 0x80), got status = %d", status);
 	}
-	if (stop_ptraced_child(pid, stack, 0, 0) < 0)
+	if (stop_ptraced_child(pid, 0, 0) < 0)
 		goto fail_stopped;
 
 	sysemu_supported = 2;
 	non_fatal("OK\n");
 
-	if ( !force_sysemu_disabled )
+	if (!force_sysemu_disabled)
 		set_using_sysemu(sysemu_supported);
 	return;
 
 fail:
-	stop_ptraced_child(pid, stack, 1, 0);
+	stop_ptraced_child(pid, 1, 0);
 fail_stopped:
 	non_fatal("missing\n");
 }
 
 static void __init check_ptrace(void)
 {
-	void *stack;
 	int pid, syscall, n, status;
 
 	non_fatal("Checking that ptrace can change system call numbers...");
-	pid = start_ptraced_child(&stack);
+	pid = start_ptraced_child();
 
-	if((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
+	if ((ptrace(PTRACE_OLDSETOPTIONS, pid, 0,
 		   (void *) PTRACE_O_TRACESYSGOOD) < 0))
 		fatal_perror("check_ptrace: PTRACE_OLDSETOPTIONS failed");
 
-	while(1){
-		if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
+	while (1) {
+		if (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
 			fatal_perror("check_ptrace : ptrace failed");
 
 		CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED));
-		if(n < 0)
+		if (n < 0)
 			fatal_perror("check_ptrace : wait failed");
 
-		if(!WIFSTOPPED(status) ||
+		if (!WIFSTOPPED(status) ||
 		   (WSTOPSIG(status) != (SIGTRAP | 0x80)))
 			fatal("check_ptrace : expected (SIGTRAP|0x80), "
 			       "got status = %d", status);
 
 		syscall = ptrace(PTRACE_PEEKUSR, pid, PT_SYSCALL_NR_OFFSET,
 				 0);
-		if(syscall == __NR_getpid){
+		if (syscall == __NR_getpid) {
 			n = ptrace(PTRACE_POKEUSR, pid, PT_SYSCALL_NR_OFFSET,
 				   __NR_getppid);
-			if(n < 0)
+			if (n < 0)
 				fatal_perror("check_ptrace : failed to modify "
 					     "system call");
 			break;
 		}
 	}
-	stop_ptraced_child(pid, stack, 0, 1);
+	stop_ptraced_child(pid, 0, 1);
 	non_fatal("OK\n");
 	check_sysemu();
 }
@@ -343,18 +323,18 @@ static void __init check_coredump_limit(void)
 	struct rlimit lim;
 	int err = getrlimit(RLIMIT_CORE, &lim);
 
-	if(err){
+	if (err) {
 		perror("Getting core dump limit");
 		return;
 	}
 
 	printf("Core dump limits :\n\tsoft - ");
-	if(lim.rlim_cur == RLIM_INFINITY)
+	if (lim.rlim_cur == RLIM_INFINITY)
 		printf("NONE\n");
 	else printf("%lu\n", lim.rlim_cur);
 
 	printf("\thard - ");
-	if(lim.rlim_max == RLIM_INFINITY)
+	if (lim.rlim_max == RLIM_INFINITY)
 		printf("NONE\n");
 	else printf("%lu\n", lim.rlim_max);
 }
@@ -408,20 +388,18 @@ __uml_setup("noptraceldt", noptraceldt_cmd_param,
 "    To support PTRACE_LDT, the host needs to be patched using\n"
 "    the current skas3 patch.\n\n");
 
-#ifdef UML_CONFIG_MODE_SKAS
 static inline void check_skas3_ptrace_faultinfo(void)
 {
 	struct ptrace_faultinfo fi;
-	void *stack;
 	int pid, n;
 
 	non_fatal("  - PTRACE_FAULTINFO...");
-	pid = start_ptraced_child(&stack);
+	pid = start_ptraced_child();
 
 	n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi);
 	if (n < 0) {
 		ptrace_faultinfo = 0;
-		if(errno == EIO)
+		if (errno == EIO)
 			non_fatal("not found\n");
 		else
 			perror("not found");
@@ -434,13 +412,12 @@ static inline void check_skas3_ptrace_faultinfo(void)
 	}
 
 	init_registers(pid);
-	stop_ptraced_child(pid, stack, 1, 1);
+	stop_ptraced_child(pid, 1, 1);
 }
 
 static inline void check_skas3_ptrace_ldt(void)
 {
 #ifdef PTRACE_LDT
-	void *stack;
 	int pid, n;
 	unsigned char ldtbuf[40];
 	struct ptrace_ldt ldt_op = (struct ptrace_ldt) {
@@ -449,11 +426,11 @@ static inline void check_skas3_ptrace_ldt(void)
 		.bytecount = sizeof(ldtbuf)};
 
 	non_fatal("  - PTRACE_LDT...");
-	pid = start_ptraced_child(&stack);
+	pid = start_ptraced_child();
 
 	n = ptrace(PTRACE_LDT, pid, 0, (unsigned long) &ldt_op);
 	if (n < 0) {
-		if(errno == EIO)
+		if (errno == EIO)
 			non_fatal("not found\n");
 		else {
 			perror("not found");
@@ -461,13 +438,13 @@ static inline void check_skas3_ptrace_ldt(void)
 		ptrace_ldt = 0;
 	}
 	else {
-		if(ptrace_ldt)
+		if (ptrace_ldt)
 			non_fatal("found\n");
 		else
 			non_fatal("found, but use is disabled\n");
 	}
 
-	stop_ptraced_child(pid, stack, 1, 1);
+	stop_ptraced_child(pid, 1, 1);
 #else
 	/* PTRACE_LDT might be disabled via cmdline option.
 	 * We want to override this, else we might use the stub
@@ -484,12 +461,9 @@ static inline void check_skas3_proc_mm(void)
 		proc_mm = 0;
 		perror("not found");
 	}
-	else {
-		if (!proc_mm)
-			non_fatal("found but disabled on command line\n");
-		else
-			non_fatal("found\n");
-	}
+	else if (!proc_mm)
+		non_fatal("found but disabled on command line\n");
+	else non_fatal("found\n");
 }
 
 int can_do_skas(void)
@@ -500,17 +474,11 @@ int can_do_skas(void)
 	check_skas3_ptrace_faultinfo();
 	check_skas3_ptrace_ldt();
 
-	if(!proc_mm || !ptrace_faultinfo || !ptrace_ldt)
+	if (!proc_mm || !ptrace_faultinfo || !ptrace_ldt)
 		skas_needs_stub = 1;
 
 	return 1;
 }
-#else
-int can_do_skas(void)
-{
-	return 0;
-}
-#endif
 
 int __init parse_iomem(char *str, int *add)
 {
@@ -521,25 +489,25 @@ int __init parse_iomem(char *str, int *add)
 
 	driver = str;
 	file = strchr(str,',');
-	if(file == NULL){
+	if (file == NULL) {
 		printf("parse_iomem : failed to parse iomem\n");
 		goto out;
 	}
 	*file = '\0';
 	file++;
 	fd = open(file, O_RDWR, 0);
-	if(fd < 0){
-		os_print_error(fd, "parse_iomem - Couldn't open io file");
+	if (fd < 0) {
+		perror("parse_iomem - Couldn't open io file");
 		goto out;
 	}
 
-	if(fstat64(fd, &buf) < 0){
+	if (fstat64(fd, &buf) < 0) {
 		perror("parse_iomem - cannot stat_fd file");
 		goto out_close;
 	}
 
 	new = malloc(sizeof(*new));
-	if(new == NULL){
+	if (new == NULL) {
 		perror("Couldn't allocate iomem_region struct");
 		goto out_close;
 	}
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index 37806621b25..a841262c594 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -1,9 +1,9 @@
 #
-# Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 # Licensed under the GPL
 #
 
-obj-$(CONFIG_MODE_SKAS) = registers.o signal.o tls.o
+obj-y = registers.o signal.o tls.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/um/os-Linux/sys-i386/registers.c b/arch/um/os-Linux/sys-i386/registers.c
index 84b44f9cd42..d1997ca76e5 100644
--- a/arch/um/os-Linux/sys-i386/registers.c
+++ b/arch/um/os-Linux/sys-i386/registers.c
@@ -1,144 +1,73 @@
 /*
  * Copyright (C) 2004 PathScale, Inc
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <errno.h>
-#include <string.h>
-#include "sysdep/ptrace_user.h"
-#include "sysdep/ptrace.h"
-#include "uml-config.h"
-#include "skas_ptregs.h"
-#include "registers.h"
+#include "kern_constants.h"
 #include "longjmp.h"
 #include "user.h"
+#include "sysdep/ptrace_user.h"
 
-/* These are set once at boot time and not changed thereafter */
-
-static unsigned long exec_regs[MAX_REG_NR];
-static unsigned long exec_fp_regs[HOST_FP_SIZE];
-static unsigned long exec_fpx_regs[HOST_XFP_SIZE];
-static int have_fpx_regs = 1;
-
-void init_thread_registers(union uml_pt_regs *to)
-{
-	memcpy(to->skas.regs, exec_regs, sizeof(to->skas.regs));
-	memcpy(to->skas.fp, exec_fp_regs, sizeof(to->skas.fp));
-	if(have_fpx_regs)
-		memcpy(to->skas.xfp, exec_fpx_regs, sizeof(to->skas.xfp));
-}
-
-/* XXX These need to use [GS]ETFPXREGS and copy_sc_{to,from}_user_skas needs
- * to pass in a sufficiently large buffer
- */
 int save_fp_registers(int pid, unsigned long *fp_regs)
 {
-	if(ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
+	if (ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
 		return -errno;
 	return 0;
 }
 
 int restore_fp_registers(int pid, unsigned long *fp_regs)
 {
-	if(ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
+	if (ptrace(PTRACE_SETFPREGS, pid, 0, fp_regs) < 0)
 		return -errno;
 	return 0;
 }
 
-static int move_registers(int pid, int int_op, union uml_pt_regs *regs,
-			  int fp_op, unsigned long *fp_regs)
+int save_fpx_registers(int pid, unsigned long *fp_regs)
 {
-	if(ptrace(int_op, pid, 0, regs->skas.regs) < 0)
-		return -errno;
-
-	if(ptrace(fp_op, pid, 0, fp_regs) < 0)
+	if (ptrace(PTRACE_GETFPXREGS, pid, 0, fp_regs) < 0)
 		return -errno;
-
 	return 0;
 }
 
-void save_registers(int pid, union uml_pt_regs *regs)
+int restore_fpx_registers(int pid, unsigned long *fp_regs)
 {
-	unsigned long *fp_regs;
-	int err, fp_op;
-
-	if(have_fpx_regs){
-		fp_op = PTRACE_GETFPXREGS;
-		fp_regs = regs->skas.xfp;
-	}
-	else {
-		fp_op = PTRACE_GETFPREGS;
-		fp_regs = regs->skas.fp;
-	}
-
-	err = move_registers(pid, PTRACE_GETREGS, regs, fp_op, fp_regs);
-	if(err)
-		panic("save_registers - saving registers failed, errno = %d\n",
-		      -err);
+	if (ptrace(PTRACE_SETFPXREGS, pid, 0, fp_regs) < 0)
+		return -errno;
+	return 0;
 }
 
-void restore_registers(int pid, union uml_pt_regs *regs)
+unsigned long get_thread_reg(int reg, jmp_buf *buf)
 {
-	unsigned long *fp_regs;
-	int err, fp_op;
-
-	if(have_fpx_regs){
-		fp_op = PTRACE_SETFPXREGS;
-		fp_regs = regs->skas.xfp;
-	}
-	else {
-		fp_op = PTRACE_SETFPREGS;
-		fp_regs = regs->skas.fp;
+	switch (reg) {
+	case EIP:
+		return buf[0]->__eip;
+	case UESP:
+		return buf[0]->__esp;
+	case EBP:
+		return buf[0]->__ebp;
+	default:
+		printk(UM_KERN_ERR "get_thread_regs - unknown register %d\n",
+		       reg);
+		return 0;
 	}
-
-	err = move_registers(pid, PTRACE_SETREGS, regs, fp_op, fp_regs);
-	if(err)
-		panic("restore_registers - saving registers failed, "
-		      "errno = %d\n", -err);
 }
 
-void init_registers(int pid)
+int have_fpx_regs = 1;
+
+void arch_init_registers(int pid)
 {
+	unsigned long fpx_regs[HOST_XFP_SIZE];
 	int err;
 
-	memset(exec_regs, 0, sizeof(exec_regs));
-	err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs);
-	if(err)
-		panic("check_ptrace : PTRACE_GETREGS failed, errno = %d",
-		      errno);
-
-	errno = 0;
-	err = ptrace(PTRACE_GETFPXREGS, pid, 0, exec_fpx_regs);
+	err = ptrace(PTRACE_GETFPXREGS, pid, 0, fpx_regs);
 	if(!err)
 		return;
+
 	if(errno != EIO)
 		panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d",
 		      errno);
 
 	have_fpx_regs = 0;
-
-	err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs);
-	if(err)
-		panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d",
-		      errno);
-}
-
-void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
-{
-	memcpy(regs, exec_regs, sizeof(exec_regs));
-	if(fp_regs != NULL)
-		memcpy(fp_regs, exec_fp_regs,
-		       HOST_FP_SIZE * sizeof(unsigned long));
-}
-
-unsigned long get_thread_reg(int reg, jmp_buf *buf)
-{
-	switch(reg){
-	case EIP: return buf[0]->__eip;
-	case UESP: return buf[0]->__esp;
-	case EBP: return buf[0]->__ebp;
-	default:
-		printk("get_thread_regs - unknown register %d\n", reg);
-		return 0;
-	}
 }
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
index 7955e061a67..a42a4ef02e1 100644
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ b/arch/um/os-Linux/sys-x86_64/Makefile
@@ -1,9 +1,9 @@
 #
-# Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
+# Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
 # Licensed under the GPL
 #
 
-obj-$(CONFIG_MODE_SKAS) = registers.o prctl.o signal.o
+obj-y = registers.o prctl.o signal.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/um/os-Linux/sys-x86_64/registers.c b/arch/um/os-Linux/sys-x86_64/registers.c
index 9467315b805..9bfa789992d 100644
--- a/arch/um/os-Linux/sys-x86_64/registers.c
+++ b/arch/um/os-Linux/sys-x86_64/registers.c
@@ -1,23 +1,15 @@
 /*
- * Copyright (C) 2004 PathScale, Inc
+ * Copyright (C) 2006-2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <errno.h>
 #include <sys/ptrace.h>
-#include <string.h>
-#include "ptrace_user.h"
-#include "uml-config.h"
-#include "skas_ptregs.h"
-#include "registers.h"
+#define __FRAME_OFFSETS
+#include <asm/ptrace.h>
 #include "longjmp.h"
 #include "user.h"
 
-/* These are set once at boot time and not changed thereafter */
-
-static unsigned long exec_regs[MAX_REG_NR];
-static unsigned long exec_fp_regs[HOST_FP_SIZE];
-
 int save_fp_registers(int pid, unsigned long *fp_regs)
 {
 	if(ptrace(PTRACE_GETFPREGS, pid, 0, fp_regs) < 0)
@@ -32,67 +24,6 @@ int restore_fp_registers(int pid, unsigned long *fp_regs)
 	return 0;
 }
 
-void init_thread_registers(union uml_pt_regs *to)
-{
-	memcpy(to->skas.regs, exec_regs, sizeof(to->skas.regs));
-	memcpy(to->skas.fp, exec_fp_regs, sizeof(to->skas.fp));
-}
-
-static int move_registers(int pid, int int_op, int fp_op,
-			  union uml_pt_regs *regs)
-{
-	if(ptrace(int_op, pid, 0, regs->skas.regs) < 0)
-		return -errno;
-
-	if(ptrace(fp_op, pid, 0, regs->skas.fp) < 0)
-		return -errno;
-
-	return 0;
-}
-
-void save_registers(int pid, union uml_pt_regs *regs)
-{
-	int err;
-
-	err = move_registers(pid, PTRACE_GETREGS, PTRACE_GETFPREGS, regs);
-	if(err)
-		panic("save_registers - saving registers failed, errno = %d\n",
-		      -err);
-}
-
-void restore_registers(int pid, union uml_pt_regs *regs)
-{
-	int err;
-
-	err = move_registers(pid, PTRACE_SETREGS, PTRACE_SETFPREGS, regs);
-	if(err)
-		panic("restore_registers - saving registers failed, "
-		      "errno = %d\n", -err);
-}
-
-void init_registers(int pid)
-{
-	int err;
-
-	err = ptrace(PTRACE_GETREGS, pid, 0, exec_regs);
-	if(err)
-		panic("check_ptrace : PTRACE_GETREGS failed, errno = %d",
-		      errno);
-
-	err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs);
-	if(err)
-		panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d",
-		      errno);
-}
-
-void get_safe_registers(unsigned long *regs, unsigned long *fp_regs)
-{
-	memcpy(regs, exec_regs, sizeof(exec_regs));
-	if(fp_regs != NULL)
-		memcpy(fp_regs, exec_fp_regs,
-		       HOST_FP_SIZE * sizeof(unsigned long));
-}
-
 unsigned long get_thread_reg(int reg, jmp_buf *buf)
 {
 	switch(reg){
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 5de169b168f..e34e1effe0f 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -1,101 +1,86 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
+#include <stddef.h>
+#include <errno.h>
+#include <signal.h>
 #include <time.h>
 #include <sys/time.h>
-#include <signal.h>
-#include <errno.h>
-#include "kern_util.h"
-#include "user.h"
-#include "process.h"
 #include "kern_constants.h"
 #include "os.h"
-#include "uml-config.h"
+#include "user.h"
 
-int set_interval(int is_virtual)
+int set_interval(void)
 {
-	int usec = 1000000/hz();
-	int timer_type = is_virtual ? ITIMER_VIRTUAL : ITIMER_REAL;
+	int usec = UM_USEC_PER_SEC / UM_HZ;
 	struct itimerval interval = ((struct itimerval) { { 0, usec },
 							  { 0, usec } });
 
-	if(setitimer(timer_type, &interval, NULL) == -1)
+	if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1)
 		return -errno;
 
 	return 0;
 }
 
-#ifdef UML_CONFIG_MODE_TT
-void enable_timer(void)
+int timer_one_shot(int ticks)
 {
-	set_interval(1);
-}
-#endif
+	unsigned long usec = ticks * UM_USEC_PER_SEC / UM_HZ;
+	unsigned long sec = usec / UM_USEC_PER_SEC;
+	struct itimerval interval;
 
-void disable_timer(void)
-{
-	struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }});
-	if((setitimer(ITIMER_VIRTUAL, &disable, NULL) < 0) ||
-	   (setitimer(ITIMER_REAL, &disable, NULL) < 0))
-		printk("disnable_timer - setitimer failed, errno = %d\n",
-		       errno);
-	/* If there are signals already queued, after unblocking ignore them */
-	signal(SIGALRM, SIG_IGN);
-	signal(SIGVTALRM, SIG_IGN);
+	usec %= UM_USEC_PER_SEC;
+	interval = ((struct itimerval) { { 0, 0 }, { sec, usec } });
+
+	if (setitimer(ITIMER_VIRTUAL, &interval, NULL) == -1)
+		return -errno;
+
+	return 0;
 }
 
-void switch_timers(int to_real)
+/**
+ * timeval_to_ns - Convert timeval to nanoseconds
+ * @ts:		pointer to the timeval variable to be converted
+ *
+ * Returns the scalar nanosecond representation of the timeval
+ * parameter.
+ *
+ * Ripped from linux/time.h because it's a kernel header, and thus
+ * unusable from here.
+ */
+static inline long long timeval_to_ns(const struct timeval *tv)
 {
-	struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }});
-	struct itimerval enable = ((struct itimerval) { { 0, 1000000/hz() },
-							{ 0, 1000000/hz() }});
-	int old, new;
-
-	if(to_real){
-		old = ITIMER_VIRTUAL;
-		new = ITIMER_REAL;
-	}
-	else {
-		old = ITIMER_REAL;
-		new = ITIMER_VIRTUAL;
-	}
-
-	if((setitimer(old, &disable, NULL) < 0) ||
-	   (setitimer(new, &enable, NULL)))
-		printk("switch_timers - setitimer failed, errno = %d\n",
-		       errno);
+	return ((long long) tv->tv_sec * UM_NSEC_PER_SEC) +
+		tv->tv_usec * UM_NSEC_PER_USEC;
 }
 
-#ifdef UML_CONFIG_MODE_TT
-void uml_idle_timer(void)
+long long disable_timer(void)
 {
-	if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR)
-		panic("Couldn't unset SIGVTALRM handler");
+	struct itimerval time = ((struct itimerval) { { 0, 0 }, { 0, 0 } });
+
+	if(setitimer(ITIMER_VIRTUAL, &time, &time) < 0)
+		printk(UM_KERN_ERR "disable_timer - setitimer failed, "
+		       "errno = %d\n", errno);
 
-	set_handler(SIGALRM, (__sighandler_t) alarm_handler,
-		    SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1);
-	set_interval(0);
+	return timeval_to_ns(&time.it_value);
 }
-#endif
 
-unsigned long long os_nsecs(void)
+long long os_nsecs(void)
 {
 	struct timeval tv;
 
 	gettimeofday(&tv, NULL);
-	return((unsigned long long) tv.tv_sec * BILLION + tv.tv_usec * 1000);
+	return timeval_to_ns(&tv);
 }
 
-void idle_sleep(int secs)
+extern void alarm_handler(int sig, struct sigcontext *sc);
+
+void idle_sleep(unsigned long long nsecs)
 {
-	struct timespec ts;
+	struct timespec ts = { .tv_sec	= nsecs / UM_NSEC_PER_SEC,
+			       .tv_nsec = nsecs % UM_NSEC_PER_SEC };
 
-	ts.tv_sec = secs;
-	ts.tv_nsec = 0;
-	nanosleep(&ts, NULL);
+	if (nanosleep(&ts, &ts) == 0)
+		alarm_handler(SIGVTALRM, NULL);
 }
diff --git a/arch/um/os-Linux/tls.c b/arch/um/os-Linux/tls.c
index 16215b99080..73277801ef1 100644
--- a/arch/um/os-Linux/tls.c
+++ b/arch/um/os-Linux/tls.c
@@ -1,18 +1,9 @@
 #include <errno.h>
-#include <unistd.h>
 #include <sys/ptrace.h>
-#include <sys/syscall.h>
-#include <asm/ldt.h>
 #include "sysdep/tls.h"
-#include "uml-config.h"
 
 /* TLS support - we basically rely on the host's one.*/
 
-/* In TT mode, this should be called only by the tracing thread, and makes sense
- * only for PTRACE_SET_THREAD_AREA. In SKAS mode, it's used normally.
- *
- */
-
 #ifndef PTRACE_GET_THREAD_AREA
 #define PTRACE_GET_THREAD_AREA 25
 #endif
@@ -32,8 +23,6 @@ int os_set_thread_area(user_desc_t *info, int pid)
 	return ret;
 }
 
-#ifdef UML_CONFIG_MODE_SKAS
-
 int os_get_thread_area(user_desc_t *info, int pid)
 {
 	int ret;
@@ -44,32 +33,3 @@ int os_get_thread_area(user_desc_t *info, int pid)
 		ret = -errno;
 	return ret;
 }
-
-#endif
-
-#ifdef UML_CONFIG_MODE_TT
-#include "linux/unistd.h"
-
-int do_set_thread_area_tt(user_desc_t *info)
-{
-	int ret;
-
-	ret = syscall(__NR_set_thread_area,info);
-	if (ret < 0) {
-		ret = -errno;
-	}
-	return ret;
-}
-
-int do_get_thread_area_tt(user_desc_t *info)
-{
-	int ret;
-
-	ret = syscall(__NR_get_thread_area,info);
-	if (ret < 0) {
-		ret = -errno;
-	}
-	return ret;
-}
-
-#endif /* UML_CONFIG_MODE_TT */
diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
index 295da657931..2a1c9843e32 100644
--- a/arch/um/os-Linux/trap.c
+++ b/arch/um/os-Linux/trap.c
@@ -1,22 +1,14 @@
 /*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stdlib.h>
 #include <signal.h>
-#include "kern_util.h"
 #include "os.h"
-#include "mode.h"
-#include "longjmp.h"
-
-void usr2_handler(int sig, union uml_pt_regs *regs)
-{
-	CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0);
-}
+#include "sysdep/ptrace.h"
 
 /* Initialized from linux_main() */
-void (*sig_info[NSIG])(int, union uml_pt_regs *);
+void (*sig_info[NSIG])(int, struct uml_pt_regs *);
 
 void os_fill_handlinfo(struct kern_handlers h)
 {
@@ -28,13 +20,4 @@ void os_fill_handlinfo(struct kern_handlers h)
 	sig_info[SIGSEGV] = h.page_fault;
 	sig_info[SIGIO] = h.sigio_handler;
 	sig_info[SIGVTALRM] = h.timer_handler;
-	sig_info[SIGALRM] = h.timer_handler;
-	sig_info[SIGUSR2] = usr2_handler;
-}
-
-void do_longjmp(void *b, int val)
-{
-	jmp_buf *buf = b;
-
-	UML_LONGJMP(buf, val);
 }
diff --git a/arch/um/os-Linux/tt.c b/arch/um/os-Linux/tt.c
deleted file mode 100644
index bcf9359c4e9..00000000000
--- a/arch/um/os-Linux/tt.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <signal.h>
-#include <sched.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <sys/time.h>
-#include <sys/ptrace.h>
-#include <linux/ptrace.h>
-#include <sys/wait.h>
-#include <sys/mman.h>
-#include <asm/ptrace.h>
-#include <asm/unistd.h>
-#include <asm/page.h>
-#include "kern_util.h"
-#include "user.h"
-#include "signal_kern.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/sigcontext.h"
-#include "irq_user.h"
-#include "ptrace_user.h"
-#include "init.h"
-#include "os.h"
-#include "uml-config.h"
-#include "choose-mode.h"
-#include "mode.h"
-#include "tempfile.h"
-#include "kern_constants.h"
-
-int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x,
-		   int must_succeed)
-{
-	int err;
-
-	err = os_protect_memory((void *) addr, len, r, w, x);
-	if(err < 0){
-                if(must_succeed)
-			panic("protect failed, err = %d", -err);
-		else return(err);
-	}
-	return(0);
-}
-
-void kill_child_dead(int pid)
-{
-	kill(pid, SIGKILL);
-	kill(pid, SIGCONT);
-	do {
-		int n;
-		CATCH_EINTR(n = waitpid(pid, NULL, 0));
-		if (n > 0)
-			kill(pid, SIGCONT);
-		else
-			break;
-	} while(1);
-}
-
-void stop(void)
-{
-	while(1) sleep(1000000);
-}
-
-int wait_for_stop(int pid, int sig, int cont_type, void *relay)
-{
-	sigset_t *relay_signals = relay;
-	int status, ret;
-
-	while(1){
-		CATCH_EINTR(ret = waitpid(pid, &status, WUNTRACED));
-		if((ret < 0) ||
-		   !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){
-			if(ret < 0){
-				printk("wait failed, errno = %d\n",
-				       errno);
-			}
-			else if(WIFEXITED(status))
-				printk("process %d exited with status %d\n",
-				       pid, WEXITSTATUS(status));
-			else if(WIFSIGNALED(status))
-				printk("process %d exited with signal %d\n",
-				       pid, WTERMSIG(status));
-			else if((WSTOPSIG(status) == SIGVTALRM) ||
-				(WSTOPSIG(status) == SIGALRM) ||
-				(WSTOPSIG(status) == SIGIO) ||
-				(WSTOPSIG(status) == SIGPROF) ||
-				(WSTOPSIG(status) == SIGCHLD) ||
-				(WSTOPSIG(status) == SIGWINCH) ||
-				(WSTOPSIG(status) == SIGINT)){
-				ptrace(cont_type, pid, 0, WSTOPSIG(status));
-				continue;
-			}
-			else if((relay_signals != NULL) &&
-				sigismember(relay_signals, WSTOPSIG(status))){
-				ptrace(cont_type, pid, 0, WSTOPSIG(status));
-				continue;
-			}
-			else printk("process %d stopped with signal %d\n",
-				    pid, WSTOPSIG(status));
-			panic("wait_for_stop failed to wait for %d to stop "
-			      "with %d\n", pid, sig);
-		}
-		return(status);
-	}
-}
-
-void forward_ipi(int fd, int pid)
-{
-	int err;
-
-	err = os_set_owner(fd, pid);
-	if(err < 0)
-		printk("forward_ipi: set_owner failed, fd = %d, me = %d, "
-		       "target = %d, err = %d\n", fd, os_getpid(), pid, -err);
-}
-
-/*
- *-------------------------
- * only for tt mode (will be deleted in future...)
- *-------------------------
- */
-
-struct tramp {
-	int (*tramp)(void *);
-	void *tramp_data;
-	unsigned long temp_stack;
-	int flags;
-	int pid;
-};
-
-/* See above for why sigkill is here */
-
-int sigkill = SIGKILL;
-
-int outer_tramp(void *arg)
-{
-	struct tramp *t;
-	int sig = sigkill;
-
-	t = arg;
-	t->pid = clone(t->tramp, (void *) t->temp_stack + UM_KERN_PAGE_SIZE/2,
-		       t->flags, t->tramp_data);
-	if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL);
-	kill(os_getpid(), sig);
-	_exit(0);
-}
-
-int start_fork_tramp(void *thread_arg, unsigned long temp_stack,
-		     int clone_flags, int (*tramp)(void *))
-{
-	struct tramp arg;
-	unsigned long sp;
-	int new_pid, status, err;
-
-	/* The trampoline will run on the temporary stack */
-	sp = stack_sp(temp_stack);
-
-	clone_flags |= CLONE_FILES | SIGCHLD;
-
-	arg.tramp = tramp;
-	arg.tramp_data = thread_arg;
-	arg.temp_stack = temp_stack;
-	arg.flags = clone_flags;
-
-	/* Start the process and wait for it to kill itself */
-	new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg);
-	if(new_pid < 0)
-		return(new_pid);
-
-	CATCH_EINTR(err = waitpid(new_pid, &status, 0));
-	if(err < 0)
-		panic("Waiting for outer trampoline failed - errno = %d",
-		      errno);
-
-	if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL))
-		panic("outer trampoline didn't exit with SIGKILL, "
-		      "status = %d", status);
-
-	return(arg.pid);
-}
-
-void forward_pending_sigio(int target)
-{
-	sigset_t sigs;
-
-	if(sigpending(&sigs))
-		panic("forward_pending_sigio : sigpending failed");
-	if(sigismember(&sigs, SIGIO))
-		kill(target, SIGIO);
-}
-
diff --git a/arch/um/os-Linux/uaccess.c b/arch/um/os-Linux/uaccess.c
index bbb73a65037..8d27b6d1df9 100644
--- a/arch/um/os-Linux/uaccess.c
+++ b/arch/um/os-Linux/uaccess.c
@@ -8,7 +8,7 @@
 #include "longjmp.h"
 
 unsigned long __do_user_copy(void *to, const void *from, int n,
-			     void **fault_addr, void **fault_catcher,
+			     void **fault_addr, jmp_buf **fault_catcher,
 			     void (*op)(void *to, const void *from,
 					int n), int *faulted_out)
 {
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index b462863f717..106fa864155 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -1,17 +1,21 @@
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
 #include <stdio.h>
-#include <unistd.h>
 #include <stdlib.h>
-#include <string.h>
+#include <dirent.h>
 #include <errno.h>
+#include <fcntl.h>
 #include <signal.h>
-#include <dirent.h>
-#include <sys/fcntl.h>
+#include <string.h>
+#include <unistd.h>
 #include <sys/stat.h>
-#include <sys/param.h>
 #include "init.h"
+#include "kern_constants.h"
 #include "os.h"
 #include "user.h"
-#include "mode.h"
 
 #define UML_DIR "~/.uml/"
 
@@ -28,13 +32,13 @@ static int __init make_uml_dir(void)
 	char dir[512] = { '\0' };
 	int len, err;
 
-	if(*uml_dir == '~'){
+	if (*uml_dir == '~') {
 		char *home = getenv("HOME");
 
 		err = -ENOENT;
-		if(home == NULL){
-			printk("make_uml_dir : no value in environment for "
-			       "$HOME\n");
+		if (home == NULL) {
+			printk(UM_KERN_ERR "make_uml_dir : no value in "
+			       "environment for $HOME\n");
 			goto err;
 		}
 		strlcpy(dir, home, sizeof(dir));
@@ -53,7 +57,7 @@ static int __init make_uml_dir(void)
 	}
 	strcpy(uml_dir, dir);
 
-	if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){
+	if ((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)) {
 	        printf("Failed to mkdir '%s': %s\n", uml_dir, strerror(errno));
 		err = -errno;
 		goto err_free;
@@ -70,8 +74,8 @@ err:
 /*
  * Unlinks the files contained in @dir and then removes @dir.
  * Doesn't handle directory trees, so it's not like rm -rf, but almost such. We
- * ignore ENOENT errors for anything (they happen, strangely enough - possibly due
- * to races between multiple dying UML threads).
+ * ignore ENOENT errors for anything (they happen, strangely enough - possibly
+ * due to races between multiple dying UML threads).
  */
 static int remove_files_and_dir(char *dir)
 {
@@ -116,7 +120,8 @@ out:
 	return ret;
 }
 
-/* This says that there isn't already a user of the specified directory even if
+/*
+ * This says that there isn't already a user of the specified directory even if
  * there are errors during the checking.  This is because if these errors
  * happen, the directory is unusable by the pre-existing UML, so we might as
  * well take it over.  This could happen either by
@@ -134,44 +139,45 @@ static inline int is_umdir_used(char *dir)
 	int dead, fd, p, n, err;
 
 	n = snprintf(file, sizeof(file), "%s/pid", dir);
-	if(n >= sizeof(file)){
-		printk("is_umdir_used - pid filename too long\n");
+	if (n >= sizeof(file)) {
+		printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n");
 		err = -E2BIG;
 		goto out;
 	}
 
 	dead = 0;
 	fd = open(file, O_RDONLY);
-	if(fd < 0) {
+	if (fd < 0) {
 		fd = -errno;
-		if(fd != -ENOENT){
-			printk("is_umdir_used : couldn't open pid file '%s', "
-			       "err = %d\n", file, -fd);
+		if (fd != -ENOENT) {
+			printk(UM_KERN_ERR "is_umdir_used : couldn't open pid "
+			       "file '%s', err = %d\n", file, -fd);
 		}
 		goto out;
 	}
 
 	err = 0;
 	n = read(fd, pid, sizeof(pid));
-	if(n < 0){
-		printk("is_umdir_used : couldn't read pid file '%s', "
-		       "err = %d\n", file, errno);
+	if (n < 0) {
+		printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file "
+		       "'%s', err = %d\n", file, errno);
 		goto out_close;
-	} else if(n == 0){
-		printk("is_umdir_used : couldn't read pid file '%s', "
-		       "0-byte read\n", file);
+	} else if (n == 0) {
+		printk(UM_KERN_ERR "is_umdir_used : couldn't read pid file "
+		       "'%s', 0-byte read\n", file);
 		goto out_close;
 	}
 
 	p = strtoul(pid, &end, 0);
-	if(end == pid){
-		printk("is_umdir_used : couldn't parse pid file '%s', "
-		       "errno = %d\n", file, errno);
+	if (end == pid) {
+		printk(UM_KERN_ERR "is_umdir_used : couldn't parse pid file "
+		       "'%s', errno = %d\n", file, errno);
 		goto out_close;
 	}
 
-	if((kill(p, 0) == 0) || (errno != ESRCH)){
-		printk("umid \"%s\" is already in use by pid %d\n", umid, p);
+	if ((kill(p, 0) == 0) || (errno != ESRCH)) {
+		printk(UM_KERN_ERR "umid \"%s\" is already in use by pid %d\n",
+		       umid, p);
 		return 1;
 	}
 
@@ -195,8 +201,8 @@ static int umdir_take_if_dead(char *dir)
 
 	ret = remove_files_and_dir(dir);
 	if (ret) {
-		printk("is_umdir_used - remove_files_and_dir failed with "
-		       "err = %d\n", ret);
+		printk(UM_KERN_ERR "is_umdir_used - remove_files_and_dir "
+		       "failed with err = %d\n", ret);
 	}
 	return ret;
 }
@@ -207,27 +213,28 @@ static void __init create_pid_file(void)
 	char pid[sizeof("nnnnn\0")];
 	int fd, n;
 
-	if(umid_file_name("pid", file, sizeof(file)))
+	if (umid_file_name("pid", file, sizeof(file)))
 		return;
 
 	fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644);
-	if(fd < 0){
-		printk("Open of machine pid file \"%s\" failed: %s\n",
-		       file, strerror(errno));
+	if (fd < 0) {
+		printk(UM_KERN_ERR "Open of machine pid file \"%s\" failed: "
+		       "%s\n", file, strerror(errno));
 		return;
 	}
 
 	snprintf(pid, sizeof(pid), "%d\n", getpid());
 	n = write(fd, pid, strlen(pid));
-	if(n != strlen(pid))
-		printk("Write of pid file failed - err = %d\n", errno);
+	if (n != strlen(pid))
+		printk(UM_KERN_ERR "Write of pid file failed - err = %d\n",
+		       errno);
 
 	close(fd);
 }
 
 int __init set_umid(char *name)
 {
-	if(strlen(name) > UMID_LEN - 1)
+	if (strlen(name) > UMID_LEN - 1)
 		return -E2BIG;
 
 	strlcpy(umid, name, sizeof(umid));
@@ -243,18 +250,18 @@ int __init make_umid(void)
 	int fd, err;
 	char tmp[256];
 
-	if(umid_setup)
+	if (umid_setup)
 		return 0;
 
 	make_uml_dir();
 
-	if(*umid == '\0'){
+	if (*umid == '\0') {
 		strlcpy(tmp, uml_dir, sizeof(tmp));
 		strlcat(tmp, "XXXXXX", sizeof(tmp));
 		fd = mkstemp(tmp);
-		if(fd < 0){
-			printk("make_umid - mkstemp(%s) failed: %s\n",
-			       tmp, strerror(errno));
+		if (fd < 0) {
+			printk(UM_KERN_ERR "make_umid - mkstemp(%s) failed: "
+			       "%s\n", tmp, strerror(errno));
 			err = -errno;
 			goto err;
 		}
@@ -263,11 +270,12 @@ int __init make_umid(void)
 
 		set_umid(&tmp[strlen(uml_dir)]);
 
-		/* There's a nice tiny little race between this unlink and
+		/*
+		 * There's a nice tiny little race between this unlink and
 		 * the mkdir below.  It'd be nice if there were a mkstemp
 		 * for directories.
 		 */
-		if(unlink(tmp)){
+		if (unlink(tmp)) {
 			err = -errno;
 			goto err;
 		}
@@ -275,9 +283,9 @@ int __init make_umid(void)
 
 	snprintf(tmp, sizeof(tmp), "%s%s", uml_dir, umid);
 	err = mkdir(tmp, 0777);
-	if(err < 0){
+	if (err < 0) {
 		err = -errno;
-		if(err != -EEXIST)
+		if (err != -EEXIST)
 			goto err;
 
 		if (umdir_take_if_dead(tmp) < 0)
@@ -285,9 +293,10 @@ int __init make_umid(void)
 
 		err = mkdir(tmp, 0777);
 	}
-	if(err){
+	if (err) {
 		err = -errno;
-		printk("Failed to create '%s' - err = %d\n", umid, -errno);
+		printk(UM_KERN_ERR "Failed to create '%s' - err = %d\n", umid,
+		       errno);
 		goto err;
 	}
 
@@ -302,14 +311,15 @@ int __init make_umid(void)
 
 static int __init make_umid_init(void)
 {
-	if(!make_umid())
+	if (!make_umid())
 		return 0;
 
-	/* If initializing with the given umid failed, then try again with
+	/*
+	 * If initializing with the given umid failed, then try again with
 	 * a random one.
 	 */
-	printk("Failed to initialize umid \"%s\", trying with a random umid\n",
-	       umid);
+	printk(UM_KERN_ERR "Failed to initialize umid \"%s\", trying with a "
+	       "random umid\n", umid);
 	*umid = '\0';
 	make_umid();
 
@@ -323,12 +333,12 @@ int __init umid_file_name(char *name, char *buf, int len)
 	int n, err;
 
 	err = make_umid();
-	if(err)
+	if (err)
 		return err;
 
 	n = snprintf(buf, len, "%s%s/%s", uml_dir, umid, name);
-	if(n >= len){
-		printk("umid_file_name : buffer too short\n");
+	if (n >= len) {
+		printk(UM_KERN_ERR "umid_file_name : buffer too short\n");
 		return -E2BIG;
 	}
 
@@ -342,21 +352,22 @@ char *get_umid(void)
 
 static int __init set_uml_dir(char *name, int *add)
 {
-	if(*name == '\0'){
+	if (*name == '\0') {
 		printf("uml_dir can't be an empty string\n");
 		return 0;
 	}
 
-	if(name[strlen(name) - 1] == '/'){
+	if (name[strlen(name) - 1] == '/') {
 		uml_dir = name;
 		return 0;
 	}
 
 	uml_dir = malloc(strlen(name) + 2);
-	if(uml_dir == NULL){
+	if (uml_dir == NULL) {
 		printf("Failed to malloc uml_dir - error = %d\n", errno);
 
-		/* Return 0 here because do_initcalls doesn't look at
+		/*
+		 * Return 0 here because do_initcalls doesn't look at
 		 * the return value.
 		 */
 		return 0;
@@ -377,7 +388,7 @@ static void remove_umid_dir(void)
 
 	sprintf(dir, "%s%s", uml_dir, umid);
 	err = remove_files_and_dir(dir);
-	if(err)
+	if (err)
 		printf("remove_umid_dir - remove_files_and_dir failed with "
 		       "err = %d\n", err);
 }
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 7cbcf484e13..ef095436a78 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -105,6 +105,44 @@ int setjmp_wrapper(void (*proc)(void *, void *), ...)
 
 void os_dump_core(void)
 {
+	int pid;
+
 	signal(SIGSEGV, SIG_DFL);
+
+	/*
+	 * We are about to SIGTERM this entire process group to ensure that
+	 * nothing is around to run after the kernel exits.  The
+	 * kernel wants to abort, not die through SIGTERM, so we
+	 * ignore it here.
+	 */
+
+	signal(SIGTERM, SIG_IGN);
+	kill(0, SIGTERM);
+	/*
+	 * Most of the other processes associated with this UML are
+	 * likely sTopped, so give them a SIGCONT so they see the
+	 * SIGTERM.
+	 */
+	kill(0, SIGCONT);
+
+	/*
+	 * Now, having sent signals to everyone but us, make sure they
+	 * die by ptrace.  Processes can survive what's been done to
+	 * them so far - the mechanism I understand is receiving a
+	 * SIGSEGV and segfaulting immediately upon return.  There is
+	 * always a SIGSEGV pending, and (I'm guessing) signals are
+	 * processed in numeric order so the SIGTERM (signal 15 vs
+	 * SIGSEGV being signal 11) is never handled.
+	 *
+	 * Run a waitpid loop until we get some kind of error.
+	 * Hopefully, it's ECHILD, but there's not a lot we can do if
+	 * it's something else.  Tell os_kill_ptraced_process not to
+	 * wait for the child to report its death because there's
+	 * nothing reasonable to do if that fails.
+	 */
+
+	while ((pid = waitpid(-1, NULL, WNOHANG)) > 0)
+		os_kill_ptraced_process(pid, 0);
+
 	abort();
 }
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index bf23dd3e24d..61107b68e05 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -21,7 +21,7 @@ $(UNPROFILE_OBJS:.o=.%): \
 $(UNPROFILE_OBJS) : CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ \
 	-Dunix -D__unix__ -D__$(SUBARCH)__ $(CF)
 
-# The stubs and unmap.o can't try to call mcount or update basic block data
+# The stubs can't try to call mcount or update basic block data
 define unprofile
 	$(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1)))
 endef
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index a4618b6b85b..964dc1a04c3 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -1,23 +1,21 @@
+#
+# Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+#
+
 obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
-	ptrace_user.o setjmp.o signal.o sigcontext.o syscalls.o sysrq.o \
+	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
 	sys_call_table.o tls.o
 
-obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
-
 subarch-obj-y = lib/bitops_32.o lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
 
-USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o
+USER_OBJS := bugs.o ptrace_user.o fault.o
 
 USER_OBJS += user-offsets.s
 extra-y += user-offsets.s
 
-extra-$(CONFIG_MODE_TT) += unmap.o
-
 UNPROFILE_OBJS := stub_segv.o
 CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
 
 include arch/um/scripts/Makefile.rules
-
-$(obj)/unmap.%: _c_flags = $(call unprofile,$(CFLAGS))
diff --git a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
index 0393e44813e..806895d73bc 100644
--- a/arch/um/sys-i386/bugs.c
+++ b/arch/um/sys-i386/bugs.c
@@ -1,18 +1,15 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <unistd.h>
 #include <errno.h>
+#include <signal.h>
 #include <string.h>
-#include <sys/signal.h>
-#include <asm/ldt.h>
-#include "kern_util.h"
-#include "user.h"
-#include "sysdep/ptrace.h"
-#include "task.h"
+#include "kern_constants.h"
 #include "os.h"
+#include "task.h"
+#include "user.h"
 
 #define MAXTOKEN 64
 
@@ -30,18 +27,20 @@ static char token(int fd, char *buf, int len, char stop)
 	do {
 		n = os_read_file(fd, ptr, sizeof(*ptr));
 		c = *ptr++;
-		if(n != sizeof(*ptr)){
-			if(n == 0)
+		if (n != sizeof(*ptr)) {
+			if (n == 0)
 				return 0;
-			printk("Reading /proc/cpuinfo failed, err = %d\n", -n);
-			if(n < 0)
+			printk(UM_KERN_ERR "Reading /proc/cpuinfo failed, "
+			       "err = %d\n", -n);
+			if (n < 0)
 				return n;
 			else return -EIO;
 		}
-	} while((c != '\n') && (c != stop) && (ptr < end));
+	} while ((c != '\n') && (c != stop) && (ptr < end));
 
-	if(ptr == end){
-		printk("Failed to find '%c' in /proc/cpuinfo\n", stop);
+	if (ptr == end) {
+		printk(UM_KERN_ERR "Failed to find '%c' in /proc/cpuinfo\n",
+		       stop);
 		return -1;
 	}
 	*(ptr - 1) = '\0';
@@ -54,26 +53,27 @@ static int find_cpuinfo_line(int fd, char *key, char *scratch, int len)
 	char c;
 
 	scratch[len - 1] = '\0';
-	while(1){
+	while (1) {
 		c = token(fd, scratch, len - 1, ':');
-		if(c <= 0)
+		if (c <= 0)
 			return 0;
-		else if(c != ':'){
-			printk("Failed to find ':' in /proc/cpuinfo\n");
+		else if (c != ':') {
+			printk(UM_KERN_ERR "Failed to find ':' in "
+			       "/proc/cpuinfo\n");
 			return 0;
 		}
 
-		if(!strncmp(scratch, key, strlen(key)))
+		if (!strncmp(scratch, key, strlen(key)))
 			return 1;
 
 		do {
 			n = os_read_file(fd, &c, sizeof(c));
-			if(n != sizeof(c)){
-				printk("Failed to find newline in "
+			if (n != sizeof(c)) {
+				printk(UM_KERN_ERR "Failed to find newline in "
 				       "/proc/cpuinfo, err = %d\n", -n);
 				return 0;
 			}
-		} while(c != '\n');
+		} while (c != '\n');
 	}
 	return 0;
 }
@@ -83,46 +83,50 @@ static int check_cpu_flag(char *feature, int *have_it)
 	char buf[MAXTOKEN], c;
 	int fd, len = ARRAY_SIZE(buf);
 
-	printk("Checking for host processor %s support...", feature);
+	printk(UM_KERN_INFO "Checking for host processor %s support...",
+	       feature);
 	fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0);
-	if(fd < 0){
-		printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd);
+	if (fd < 0) {
+		printk(UM_KERN_ERR "Couldn't open /proc/cpuinfo, err = %d\n",
+		       -fd);
 		return 0;
 	}
 
 	*have_it = 0;
-	if(!find_cpuinfo_line(fd, "flags", buf, ARRAY_SIZE(buf)))
+	if (!find_cpuinfo_line(fd, "flags", buf, ARRAY_SIZE(buf)))
 		goto out;
 
 	c = token(fd, buf, len - 1, ' ');
-	if(c < 0)
+	if (c < 0)
 		goto out;
-	else if(c != ' '){
-		printk("Failed to find ' ' in /proc/cpuinfo\n");
+	else if (c != ' ') {
+		printk(UM_KERN_ERR "Failed to find ' ' in /proc/cpuinfo\n");
 		goto out;
 	}
 
-	while(1){
+	while (1) {
 		c = token(fd, buf, len - 1, ' ');
-		if(c < 0)
+		if (c < 0)
 			goto out;
-		else if(c == '\n') break;
+		else if (c == '\n')
+			break;
 
-		if(!strcmp(buf, feature)){
+		if (!strcmp(buf, feature)) {
 			*have_it = 1;
 			goto out;
 		}
 	}
  out:
-	if(*have_it == 0)
+	if (*have_it == 0)
 		printk("No\n");
-	else if(*have_it == 1)
+	else if (*have_it == 1)
 		printk("Yes\n");
 	os_close_file(fd);
 	return 1;
 }
 
-#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems
+#if 0 /*
+       * This doesn't work in tt mode, plus it's causing compilation problems
        * for some people.
        */
 static void disable_lcall(void)
@@ -135,8 +139,9 @@ static void disable_lcall(void)
 	ldt.base_addr = 0;
 	ldt.limit = 0;
 	err = modify_ldt(1, &ldt, sizeof(ldt));
-	if(err)
-		printk("Failed to disable lcall7 - errno = %d\n", errno);
+	if (err)
+		printk(UM_KERN_ERR "Failed to disable lcall7 - errno = %d\n",
+		       errno);
 }
 #endif
 
@@ -151,40 +156,41 @@ void arch_check_bugs(void)
 {
 	int have_it;
 
-	if(os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0){
-		printk("/proc/cpuinfo not available - skipping CPU capability "
-		       "checks\n");
+	if (os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0) {
+		printk(UM_KERN_ERR "/proc/cpuinfo not available - skipping CPU "
+		       "capability checks\n");
 		return;
 	}
-	if(check_cpu_flag("cmov", &have_it))
+	if (check_cpu_flag("cmov", &have_it))
 		host_has_cmov = have_it;
-	if(check_cpu_flag("xmm", &have_it))
+	if (check_cpu_flag("xmm", &have_it))
 		host_has_xmm = have_it;
 }
 
-int arch_handle_signal(int sig, union uml_pt_regs *regs)
+int arch_handle_signal(int sig, struct uml_pt_regs *regs)
 {
 	unsigned char tmp[2];
 
-	/* This is testing for a cmov (0x0f 0x4x) instruction causing a
+	/*
+	 * This is testing for a cmov (0x0f 0x4x) instruction causing a
 	 * SIGILL in init.
 	 */
-	if((sig != SIGILL) || (TASK_PID(get_current()) != 1))
+	if ((sig != SIGILL) || (TASK_PID(get_current()) != 1))
 		return 0;
 
 	if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2))
 		panic("SIGILL in init, could not read instructions!\n");
-	if((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40))
+	if ((tmp[0] != 0x0f) || ((tmp[1] & 0xf0) != 0x40))
 		return 0;
 
-	if(host_has_cmov == 0)
+	if (host_has_cmov == 0)
 		panic("SIGILL caused by cmov, which this processor doesn't "
 		      "implement, boot a filesystem compiled for older "
 		      "processors");
-	else if(host_has_cmov == 1)
+	else if (host_has_cmov == 1)
 		panic("SIGILL caused by cmov, which this processor claims to "
 		      "implement");
-	else if(host_has_cmov == -1)
+	else if (host_has_cmov == -1)
 		panic("SIGILL caused by cmov, couldn't tell if this processor "
 		      "implements it, boot a filesystem compiled for older "
 		      "processors");
diff --git a/arch/um/sys-i386/fault.c b/arch/um/sys-i386/fault.c
index 745b4fd49e9..d670f68532f 100644
--- a/arch/um/sys-i386/fault.c
+++ b/arch/um/sys-i386/fault.c
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -15,14 +15,14 @@ struct exception_table_entry
 const struct exception_table_entry *search_exception_tables(unsigned long add);
 
 /* Compare this to arch/i386/mm/extable.c:fixup_exception() */
-int arch_fixup(unsigned long address, union uml_pt_regs *regs)
+int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
 {
 	const struct exception_table_entry *fixup;
 
 	fixup = search_exception_tables(address);
-	if(fixup != 0){
+	if (fixup != 0) {
 		UPT_IP(regs) = fixup->fixup;
-		return(1);
+		return 1;
 	}
-	return(0);
+	return 0;
 }
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index a939a7ef022..67c0958eb98 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -1,106 +1,30 @@
 /*
- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/slab.h"
-#include "linux/types.h"
-#include "linux/errno.h"
-#include "linux/spinlock.h"
-#include "asm/uaccess.h"
-#include "asm/smp.h"
-#include "asm/ldt.h"
+#include "linux/mm.h"
 #include "asm/unistd.h"
-#include "choose-mode.h"
-#include "kern.h"
-#include "mode_kern.h"
 #include "os.h"
-
-extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
-
-#ifdef CONFIG_MODE_TT
-
-static long do_modify_ldt_tt(int func, void __user *ptr,
-			      unsigned long bytecount)
-{
-	struct user_desc info;
-	int res = 0;
-	void *buf = NULL;
-	void *p = NULL; /* What we pass to host. */
-
-	switch(func){
-	case 1:
-	case 0x11: /* write_ldt */
-		/* Do this check now to avoid overflows. */
-		if (bytecount != sizeof(struct user_desc)) {
-			res = -EINVAL;
-			goto out;
-		}
-
-		if(copy_from_user(&info, ptr, sizeof(info))) {
-			res = -EFAULT;
-			goto out;
-		}
-
-		p = &info;
-		break;
-	case 0:
-	case 2: /* read_ldt */
-
-		/* The use of info avoids kmalloc on the write case, not on the
-		 * read one. */
-		buf = kmalloc(bytecount, GFP_KERNEL);
-		if (!buf) {
-			res = -ENOMEM;
-			goto out;
-		}
-		p = buf;
-		break;
-	default:
-		res = -ENOSYS;
-		goto out;
-	}
-
-	res = modify_ldt(func, p, bytecount);
-	if(res < 0)
-		goto out;
-
-	switch(func){
-	case 0:
-	case 2:
-		/* Modify_ldt was for reading and returned the number of read
-		 * bytes.*/
-		if(copy_to_user(ptr, p, res))
-			res = -EFAULT;
-		break;
-	}
-
-out:
-	kfree(buf);
-	return res;
-}
-
-#endif
-
-#ifdef CONFIG_MODE_SKAS
-
+#include "proc_mm.h"
 #include "skas.h"
 #include "skas_ptrace.h"
-#include "asm/mmu_context.h"
-#include "proc_mm.h"
+#include "sysdep/tls.h"
+
+extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
 
 long write_ldt_entry(struct mm_id * mm_idp, int func, struct user_desc * desc,
 		     void **addr, int done)
 {
 	long res;
 
-	if(proc_mm){
-		/* This is a special handling for the case, that the mm to
+	if (proc_mm) {
+		/*
+		 * This is a special handling for the case, that the mm to
 		 * modify isn't current->active_mm.
 		 * If this is called directly by modify_ldt,
 		 *     (current->active_mm->context.skas.u == mm_idp)
-		 * will be true. So no call to switch_mm_skas(mm_idp) is done.
+		 * will be true. So no call to __switch_mm(mm_idp) is done.
 		 * If this is called in case of init_new_ldt or PTRACE_LDT,
 		 * mm_idp won't belong to current->active_mm, but child->mm.
 		 * So we need to switch child's mm into our userspace, then
@@ -108,12 +32,12 @@ long write_ldt_entry(struct mm_id * mm_idp, int func, struct user_desc * desc,
 		 *
 		 * Note: I'm unsure: should interrupts be disabled here?
 		 */
-		if(!current->active_mm || current->active_mm == &init_mm ||
-		   mm_idp != &current->active_mm->context.skas.id)
-			switch_mm_skas(mm_idp);
+		if (!current->active_mm || current->active_mm == &init_mm ||
+		    mm_idp != &current->active_mm->context.id)
+			__switch_mm(mm_idp);
 	}
 
-	if(ptrace_ldt) {
+	if (ptrace_ldt) {
 		struct ptrace_ldt ldt_op = (struct ptrace_ldt) {
 			.func = func,
 			.ptr = desc,
@@ -121,7 +45,7 @@ long write_ldt_entry(struct mm_id * mm_idp, int func, struct user_desc * desc,
 		u32 cpu;
 		int pid;
 
-		if(!proc_mm)
+		if (!proc_mm)
 			pid = mm_idp->u.pid;
 		else {
 			cpu = get_cpu();
@@ -130,7 +54,7 @@ long write_ldt_entry(struct mm_id * mm_idp, int func, struct user_desc * desc,
 
 		res = os_ptrace_ldt(pid, 0, (unsigned long) &ldt_op);
 
-		if(proc_mm)
+		if (proc_mm)
 			put_cpu();
 	}
 	else {
@@ -139,7 +63,7 @@ long write_ldt_entry(struct mm_id * mm_idp, int func, struct user_desc * desc,
 					(sizeof(*desc) + sizeof(long) - 1) &
 					    ~(sizeof(long) - 1),
 					addr, &stub_addr);
-		if(!res){
+		if (!res) {
 			unsigned long args[] = { func,
 						 (unsigned long)stub_addr,
 						 sizeof(*desc),
@@ -149,13 +73,14 @@ long write_ldt_entry(struct mm_id * mm_idp, int func, struct user_desc * desc,
 		}
 	}
 
-	if(proc_mm){
-		/* This is the second part of special handling, that makes
+	if (proc_mm) {
+		/*
+		 * This is the second part of special handling, that makes
 		 * PTRACE_LDT possible to implement.
 		 */
-		if(current->active_mm && current->active_mm != &init_mm &&
-		   mm_idp != &current->active_mm->context.skas.id)
-			switch_mm_skas(&current->active_mm->context.skas.id);
+		if (current->active_mm && current->active_mm != &init_mm &&
+		    mm_idp != &current->active_mm->context.id)
+			__switch_mm(&current->active_mm->context.id);
 	}
 
 	return res;
@@ -170,21 +95,22 @@ static long read_ldt_from_host(void __user * ptr, unsigned long bytecount)
 			.ptr = kmalloc(bytecount, GFP_KERNEL)};
 	u32 cpu;
 
-	if(ptrace_ldt.ptr == NULL)
+	if (ptrace_ldt.ptr == NULL)
 		return -ENOMEM;
 
-	/* This is called from sys_modify_ldt only, so userspace_pid gives
+	/*
+	 * This is called from sys_modify_ldt only, so userspace_pid gives
 	 * us the right number
 	 */
 
 	cpu = get_cpu();
 	res = os_ptrace_ldt(userspace_pid[cpu], 0, (unsigned long) &ptrace_ldt);
 	put_cpu();
-	if(res < 0)
+	if (res < 0)
 		goto out;
 
 	n = copy_to_user(ptr, ptrace_ldt.ptr, res);
-	if(n != 0)
+	if (n != 0)
 		res = -EFAULT;
 
   out:
@@ -209,35 +135,34 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
 {
 	int i, err = 0;
 	unsigned long size;
-	uml_ldt_t * ldt = &current->mm->context.skas.ldt;
+	uml_ldt_t * ldt = &current->mm->context.ldt;
 
-	if(!ldt->entry_count)
+	if (!ldt->entry_count)
 		goto out;
-	if(bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
+	if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
 		bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
 	err = bytecount;
 
-	if(ptrace_ldt){
+	if (ptrace_ldt)
 		return read_ldt_from_host(ptr, bytecount);
-	}
 
 	down(&ldt->semaphore);
-	if(ldt->entry_count <= LDT_DIRECT_ENTRIES){
+	if (ldt->entry_count <= LDT_DIRECT_ENTRIES) {
 		size = LDT_ENTRY_SIZE*LDT_DIRECT_ENTRIES;
-		if(size > bytecount)
+		if (size > bytecount)
 			size = bytecount;
-		if(copy_to_user(ptr, ldt->u.entries, size))
+		if (copy_to_user(ptr, ldt->u.entries, size))
 			err = -EFAULT;
 		bytecount -= size;
 		ptr += size;
 	}
 	else {
-		for(i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
-			 i++){
+		for (i=0; i<ldt->entry_count/LDT_ENTRIES_PER_PAGE && bytecount;
+		     i++) {
 			size = PAGE_SIZE;
-			if(size > bytecount)
+			if (size > bytecount)
 				size = bytecount;
-			if(copy_to_user(ptr, ldt->u.pages[i], size)){
+			if (copy_to_user(ptr, ldt->u.pages[i], size)) {
 				err = -EFAULT;
 				break;
 			}
@@ -247,10 +172,10 @@ static int read_ldt(void __user * ptr, unsigned long bytecount)
 	}
 	up(&ldt->semaphore);
 
-	if(bytecount == 0 || err == -EFAULT)
+	if (bytecount == 0 || err == -EFAULT)
 		goto out;
 
-	if(clear_user(ptr, bytecount))
+	if (clear_user(ptr, bytecount))
 		err = -EFAULT;
 
 out:
@@ -261,15 +186,16 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount)
 {
 	int err;
 
-	if(bytecount > 5*LDT_ENTRY_SIZE)
+	if (bytecount > 5*LDT_ENTRY_SIZE)
 		bytecount = 5*LDT_ENTRY_SIZE;
 
 	err = bytecount;
-	/* UML doesn't support lcall7 and lcall27.
+	/*
+	 * UML doesn't support lcall7 and lcall27.
 	 * So, we don't really have a default ldt, but emulate
 	 * an empty ldt of common host default ldt size.
 	 */
-	if(clear_user(ptr, bytecount))
+	if (clear_user(ptr, bytecount))
 		err = -EFAULT;
 
 	return err;
@@ -277,60 +203,60 @@ static int read_default_ldt(void __user * ptr, unsigned long bytecount)
 
 static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
 {
-	uml_ldt_t * ldt = &current->mm->context.skas.ldt;
-	struct mm_id * mm_idp = &current->mm->context.skas.id;
+	uml_ldt_t * ldt = &current->mm->context.ldt;
+	struct mm_id * mm_idp = &current->mm->context.id;
 	int i, err;
 	struct user_desc ldt_info;
 	struct ldt_entry entry0, *ldt_p;
 	void *addr = NULL;
 
 	err = -EINVAL;
-	if(bytecount != sizeof(ldt_info))
+	if (bytecount != sizeof(ldt_info))
 		goto out;
 	err = -EFAULT;
-	if(copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+	if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
 		goto out;
 
 	err = -EINVAL;
-	if(ldt_info.entry_number >= LDT_ENTRIES)
+	if (ldt_info.entry_number >= LDT_ENTRIES)
 		goto out;
-	if(ldt_info.contents == 3){
+	if (ldt_info.contents == 3) {
 		if (func == 1)
 			goto out;
 		if (ldt_info.seg_not_present == 0)
 			goto out;
 	}
 
-        if(!ptrace_ldt)
-                down(&ldt->semaphore);
+	if (!ptrace_ldt)
+		down(&ldt->semaphore);
 
 	err = write_ldt_entry(mm_idp, func, &ldt_info, &addr, 1);
-	if(err)
+	if (err)
 		goto out_unlock;
-        else if(ptrace_ldt) {
-	/* With PTRACE_LDT available, this is used as a flag only */
-                ldt->entry_count = 1;
-                goto out;
-        }
-
-	if(ldt_info.entry_number >= ldt->entry_count &&
-	   ldt_info.entry_number >= LDT_DIRECT_ENTRIES){
-		for(i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
-		    i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
-		    i++){
-			if(i == 0)
+	else if (ptrace_ldt) {
+		/* With PTRACE_LDT available, this is used as a flag only */
+		ldt->entry_count = 1;
+		goto out;
+	}
+
+	if (ldt_info.entry_number >= ldt->entry_count &&
+	    ldt_info.entry_number >= LDT_DIRECT_ENTRIES) {
+		for (i=ldt->entry_count/LDT_ENTRIES_PER_PAGE;
+		     i*LDT_ENTRIES_PER_PAGE <= ldt_info.entry_number;
+		     i++) {
+			if (i == 0)
 				memcpy(&entry0, ldt->u.entries,
 				       sizeof(entry0));
 			ldt->u.pages[i] = (struct ldt_entry *)
 				__get_free_page(GFP_KERNEL|__GFP_ZERO);
-			if(!ldt->u.pages[i]){
+			if (!ldt->u.pages[i]) {
 				err = -ENOMEM;
 				/* Undo the change in host */
 				memset(&ldt_info, 0, sizeof(ldt_info));
 				write_ldt_entry(mm_idp, 1, &ldt_info, &addr, 1);
 				goto out_unlock;
 			}
-			if(i == 0) {
+			if (i == 0) {
 				memcpy(ldt->u.pages[0], &entry0,
 				       sizeof(entry0));
 				memcpy(ldt->u.pages[0]+1, ldt->u.entries+1,
@@ -339,17 +265,17 @@ static int write_ldt(void __user * ptr, unsigned long bytecount, int func)
 			ldt->entry_count = (i + 1) * LDT_ENTRIES_PER_PAGE;
 		}
 	}
-	if(ldt->entry_count <= ldt_info.entry_number)
+	if (ldt->entry_count <= ldt_info.entry_number)
 		ldt->entry_count = ldt_info.entry_number + 1;
 
-	if(ldt->entry_count <= LDT_DIRECT_ENTRIES)
+	if (ldt->entry_count <= LDT_DIRECT_ENTRIES)
 		ldt_p = ldt->u.entries + ldt_info.entry_number;
 	else
 		ldt_p = ldt->u.pages[ldt_info.entry_number/LDT_ENTRIES_PER_PAGE] +
 			ldt_info.entry_number%LDT_ENTRIES_PER_PAGE;
 
-	if(ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
-	   (func == 1 || LDT_empty(&ldt_info))){
+	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
+	   (func == 1 || LDT_empty(&ldt_info))) {
 		ldt_p->a = 0;
 		ldt_p->b = 0;
 	}
@@ -400,7 +326,7 @@ static void ldt_get_host_info(void)
 
 	spin_lock(&host_ldt_lock);
 
-	if(host_ldt_entries != NULL){
+	if (host_ldt_entries != NULL) {
 		spin_unlock(&host_ldt_lock);
 		return;
 	}
@@ -408,49 +334,49 @@ static void ldt_get_host_info(void)
 
 	spin_unlock(&host_ldt_lock);
 
-	for(i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++);
+	for (i = LDT_PAGES_MAX-1, order=0; i; i>>=1, order++)
+		;
 
 	ldt = (struct ldt_entry *)
 	      __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
-	if(ldt == NULL) {
-		printk("ldt_get_host_info: couldn't allocate buffer for host "
-		       "ldt\n");
+	if (ldt == NULL) {
+		printk(KERN_ERR "ldt_get_host_info: couldn't allocate buffer "
+		       "for host ldt\n");
 		return;
 	}
 
 	ret = modify_ldt(0, ldt, (1<<order)*PAGE_SIZE);
-	if(ret < 0) {
-		printk("ldt_get_host_info: couldn't read host ldt\n");
+	if (ret < 0) {
+		printk(KERN_ERR "ldt_get_host_info: couldn't read host ldt\n");
 		goto out_free;
 	}
-	if(ret == 0) {
+	if (ret == 0) {
 		/* default_ldt is active, simply write an empty entry 0 */
 		host_ldt_entries = dummy_list;
 		goto out_free;
 	}
 
-	for(i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++){
-		if(ldt[i].a != 0 || ldt[i].b != 0)
+	for (i=0, size=0; i<ret/LDT_ENTRY_SIZE; i++) {
+		if (ldt[i].a != 0 || ldt[i].b != 0)
 			size++;
 	}
 
-	if(size < ARRAY_SIZE(dummy_list))
+	if (size < ARRAY_SIZE(dummy_list))
 		host_ldt_entries = dummy_list;
 	else {
 		size = (size + 1) * sizeof(dummy_list[0]);
 		tmp = kmalloc(size, GFP_KERNEL);
-		if(tmp == NULL) {
-			printk("ldt_get_host_info: couldn't allocate host ldt "
-			       "list\n");
+		if (tmp == NULL) {
+			printk(KERN_ERR "ldt_get_host_info: couldn't allocate "
+			       "host ldt list\n");
 			goto out_free;
 		}
 		host_ldt_entries = tmp;
 	}
 
-	for(i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++){
-		if(ldt[i].a != 0 || ldt[i].b != 0) {
+	for (i=0, k=0; i<ret/LDT_ENTRY_SIZE; i++) {
+		if (ldt[i].a != 0 || ldt[i].b != 0)
 			host_ldt_entries[k++] = i;
-		}
 	}
 	host_ldt_entries[k] = -1;
 
@@ -458,8 +384,7 @@ out_free:
 	free_pages((unsigned long)ldt, order);
 }
 
-long init_new_ldt(struct mmu_context_skas * new_mm,
-		  struct mmu_context_skas * from_mm)
+long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
 {
 	struct user_desc desc;
 	short * num_p;
@@ -469,15 +394,15 @@ long init_new_ldt(struct mmu_context_skas * new_mm,
 	struct proc_mm_op copy;
 
 
-	if(!ptrace_ldt)
+	if (!ptrace_ldt)
 		init_MUTEX(&new_mm->ldt.semaphore);
 
-	if(!from_mm){
+	if (!from_mm) {
 		memset(&desc, 0, sizeof(desc));
 		/*
 		 * We have to initialize a clean ldt.
 		 */
-		if(proc_mm) {
+		if (proc_mm) {
 			/*
 			 * If the new mm was created using proc_mm, host's
 			 * default-ldt currently is assigned, which normally
@@ -485,8 +410,7 @@ long init_new_ldt(struct mmu_context_skas * new_mm,
 			 * To remove these gates, we simply write an empty
 			 * entry as number 0 to the host.
 			 */
-			err = write_ldt_entry(&new_mm->id, 1, &desc,
-					      &addr, 1);
+			err = write_ldt_entry(&new_mm->id, 1, &desc, &addr, 1);
 		}
 		else{
 			/*
@@ -495,11 +419,11 @@ long init_new_ldt(struct mmu_context_skas * new_mm,
 			 * will be reset in the following loop
 			 */
 			ldt_get_host_info();
-			for(num_p=host_ldt_entries; *num_p != -1; num_p++){
+			for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
 				desc.entry_number = *num_p;
 				err = write_ldt_entry(&new_mm->id, 1, &desc,
 						      &addr, *(num_p + 1) == -1);
-				if(err)
+				if (err)
 					break;
 			}
 		}
@@ -508,8 +432,9 @@ long init_new_ldt(struct mmu_context_skas * new_mm,
 		goto out;
 	}
 
-	if(proc_mm){
-		/* We have a valid from_mm, so we now have to copy the LDT of
+	if (proc_mm) {
+		/*
+		 * We have a valid from_mm, so we now have to copy the LDT of
 		 * from_mm to new_mm, because using proc_mm an new mm with
 		 * an empty/default LDT was created in new_mm()
 		 */
@@ -518,27 +443,27 @@ long init_new_ldt(struct mmu_context_skas * new_mm,
 					      { .copy_segments =
 							from_mm->id.u.mm_fd } } );
 		i = os_write_file(new_mm->id.u.mm_fd, &copy, sizeof(copy));
-		if(i != sizeof(copy))
-			printk("new_mm : /proc/mm copy_segments failed, "
-			       "err = %d\n", -i);
+		if (i != sizeof(copy))
+			printk(KERN_ERR "new_mm : /proc/mm copy_segments "
+			       "failed, err = %d\n", -i);
 	}
 
-	if(!ptrace_ldt) {
-		/* Our local LDT is used to supply the data for
+	if (!ptrace_ldt) {
+		/*
+		 * Our local LDT is used to supply the data for
 		 * modify_ldt(READLDT), if PTRACE_LDT isn't available,
 		 * i.e., we have to use the stub for modify_ldt, which
 		 * can't handle the big read buffer of up to 64kB.
 		 */
 		down(&from_mm->ldt.semaphore);
-		if(from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES){
+		if (from_mm->ldt.entry_count <= LDT_DIRECT_ENTRIES)
 			memcpy(new_mm->ldt.u.entries, from_mm->ldt.u.entries,
 			       sizeof(new_mm->ldt.u.entries));
-		}
-		else{
+		else {
 			i = from_mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE;
-			while(i-->0){
+			while (i-->0) {
 				page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
-				if (!page){
+				if (!page) {
 					err = -ENOMEM;
 					break;
 				}
@@ -557,22 +482,19 @@ long init_new_ldt(struct mmu_context_skas * new_mm,
 }
 
 
-void free_ldt(struct mmu_context_skas * mm)
+void free_ldt(struct mm_context *mm)
 {
 	int i;
 
-	if(!ptrace_ldt && mm->ldt.entry_count > LDT_DIRECT_ENTRIES){
+	if (!ptrace_ldt && mm->ldt.entry_count > LDT_DIRECT_ENTRIES) {
 		i = mm->ldt.entry_count / LDT_ENTRIES_PER_PAGE;
-		while(i-- > 0){
-			free_page((long )mm->ldt.u.pages[i]);
-		}
+		while (i-- > 0)
+			free_page((long) mm->ldt.u.pages[i]);
 	}
 	mm->ldt.entry_count = 0;
 }
-#endif
 
 int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
 {
-	return CHOOSE_MODE_PROC(do_modify_ldt_tt, do_modify_ldt_skas, func,
-	                        ptr, bytecount);
+	return do_modify_ldt_skas(func, ptr, bytecount);
 }
diff --git a/arch/um/sys-i386/ptrace.c b/arch/um/sys-i386/ptrace.c
index 28bf0115032..9657c89fdf3 100644
--- a/arch/um/sys-i386/ptrace.c
+++ b/arch/um/sys-i386/ptrace.c
@@ -1,35 +1,26 @@
-/* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <linux/compiler.h>
-#include "linux/sched.h"
 #include "linux/mm.h"
-#include "asm/elf.h"
-#include "asm/ptrace.h"
+#include "linux/sched.h"
 #include "asm/uaccess.h"
-#include "asm/unistd.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/sigcontext.h"
-#include "sysdep/sc.h"
+#include "skas.h"
 
-void arch_switch_to_tt(struct task_struct *from, struct task_struct *to)
-{
-	update_debugregs(to->thread.arch.debugregs_seq);
-	arch_switch_tls_tt(from, to);
-}
+extern int arch_switch_tls(struct task_struct *from, struct task_struct *to);
 
-void arch_switch_to_skas(struct task_struct *from, struct task_struct *to)
+void arch_switch_to(struct task_struct *from, struct task_struct *to)
 {
-	int err = arch_switch_tls_skas(from, to);
+	int err = arch_switch_tls(from, to);
 	if (!err)
 		return;
 
 	if (err != -EINVAL)
-		printk(KERN_WARNING "arch_switch_tls_skas failed, errno %d, not EINVAL\n", -err);
+		printk(KERN_WARNING "arch_switch_tls failed, errno %d, "
+		       "not EINVAL\n", -err);
 	else
-		printk(KERN_WARNING "arch_switch_tls_skas failed, errno = EINVAL\n");
+		printk(KERN_WARNING "arch_switch_tls failed, errno = EINVAL\n");
 }
 
 int is_syscall(unsigned long addr)
@@ -38,21 +29,21 @@ int is_syscall(unsigned long addr)
 	int n;
 
 	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
-	if(n){
+	if (n) {
 		/* access_process_vm() grants access to vsyscall and stub,
 		 * while copy_from_user doesn't. Maybe access_process_vm is
 		 * slow, but that doesn't matter, since it will be called only
 		 * in case of singlestepping, if copy_from_user failed.
 		 */
 		n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
-		if(n != sizeof(instr)) {
-			printk("is_syscall : failed to read instruction from "
-			       "0x%lx\n", addr);
-			return(1);
+		if (n != sizeof(instr)) {
+			printk(KERN_ERR "is_syscall : failed to read "
+			       "instruction from 0x%lx\n", addr);
+			return 1;
 		}
 	}
 	/* int 0x80 or sysenter */
-	return((instr == 0x80cd) || (instr == 0x340f));
+	return (instr == 0x80cd) || (instr == 0x340f);
 }
 
 /* determines which flags the user has access to. */
@@ -96,21 +87,21 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
 
 int poke_user(struct task_struct *child, long addr, long data)
 {
-        if ((addr & 3) || addr < 0)
-                return -EIO;
-
-        if (addr < MAX_REG_OFFSET)
-                return putreg(child, addr, data);
+	if ((addr & 3) || addr < 0)
+		return -EIO;
 
-        else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-                (addr <= offsetof(struct user, u_debugreg[7]))){
-                addr -= offsetof(struct user, u_debugreg[0]);
-                addr = addr >> 2;
-                if((addr == 4) || (addr == 5)) return -EIO;
-                child->thread.arch.debugregs[addr] = data;
-                return 0;
-        }
-        return -EIO;
+	if (addr < MAX_REG_OFFSET)
+		return putreg(child, addr, data);
+	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+		 (addr <= offsetof(struct user, u_debugreg[7]))) {
+		addr -= offsetof(struct user, u_debugreg[0]);
+		addr = addr >> 2;
+		if ((addr == 4) || (addr == 5))
+			return -EIO;
+		child->thread.arch.debugregs[addr] = data;
+		return 0;
+	}
+	return -EIO;
 }
 
 unsigned long getreg(struct task_struct *child, int regno)
@@ -133,20 +124,20 @@ unsigned long getreg(struct task_struct *child, int regno)
 	return retval;
 }
 
+/* read the word at location addr in the USER area. */
 int peek_user(struct task_struct *child, long addr, long data)
 {
-/* read the word at location addr in the USER area. */
 	unsigned long tmp;
 
 	if ((addr & 3) || addr < 0)
 		return -EIO;
 
 	tmp = 0;  /* Default return condition */
-	if(addr < MAX_REG_OFFSET){
+	if (addr < MAX_REG_OFFSET) {
 		tmp = getreg(child, addr);
 	}
-	else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-		(addr <= offsetof(struct user, u_debugreg[7]))){
+	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+		 (addr <= offsetof(struct user, u_debugreg[7]))) {
 		addr -= offsetof(struct user, u_debugreg[0]);
 		addr = addr >> 2;
 		tmp = child->thread.arch.debugregs[addr];
@@ -154,277 +145,68 @@ int peek_user(struct task_struct *child, long addr, long data)
 	return put_user(tmp, (unsigned long __user *) data);
 }
 
-struct i387_fxsave_struct {
-	unsigned short	cwd;
-	unsigned short	swd;
-	unsigned short	twd;
-	unsigned short	fop;
-	long	fip;
-	long	fcs;
-	long	foo;
-	long	fos;
-	long	mxcsr;
-	long	reserved;
-	long	st_space[32];	/* 8*16 bytes for each FP-reg = 128 bytes */
-	long	xmm_space[32];	/* 8*16 bytes for each XMM-reg = 128 bytes */
-	long	padding[56];
-};
-
-/*
- * FPU tag word conversions.
- */
-
-static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
+int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
 {
-	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
- 
-	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
-        tmp = ~twd;
-        tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
-        /* and move the valid bits to the lower byte. */
-        tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
-        tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
-        tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
-        return tmp;
-}
+	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+	long fpregs[HOST_FP_SIZE];
 
-static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
-{
-	struct _fpxreg *st = NULL;
-	unsigned long twd = (unsigned long) fxsave->twd;
-	unsigned long tag;
-	unsigned long ret = 0xffff0000;
-	int i;
+	BUG_ON(sizeof(*buf) != sizeof(fpregs));
+	err = save_fp_registers(userspace_pid[cpu], fpregs);
+	if (err)
+		return err;
 
-#define FPREG_ADDR(f, n)	((char *)&(f)->st_space + (n) * 16);
+	n = copy_to_user((void *) buf, fpregs, sizeof(fpregs));
+	if(n > 0)
+		return -EFAULT;
 
-	for ( i = 0 ; i < 8 ; i++ ) {
-		if ( twd & 0x1 ) {
-			st = (struct _fpxreg *) FPREG_ADDR( fxsave, i );
-
-			switch ( st->exponent & 0x7fff ) {
-			case 0x7fff:
-				tag = 2;		/* Special */
-				break;
-			case 0x0000:
-				if ( !st->significand[0] &&
-				     !st->significand[1] &&
-				     !st->significand[2] &&
-				     !st->significand[3] ) {
-					tag = 1;	/* Zero */
-				} else {
-					tag = 2;	/* Special */
-				}
-				break;
-			default:
-				if ( st->significand[3] & 0x8000 ) {
-					tag = 0;	/* Valid */
-				} else {
-					tag = 2;	/* Special */
-				}
-				break;
-			}
-		} else {
-			tag = 3;			/* Empty */
-		}
-		ret |= (tag << (2 * i));
-		twd = twd >> 1;
-	}
-	return ret;
+	return n;
 }
 
-/*
- * FXSR floating point environment conversions.
- */
-
-#ifdef CONFIG_MODE_TT
-static inline int convert_fxsr_to_user_tt(struct _fpstate __user *buf,
-					  struct pt_regs *regs)
+int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
 {
-	struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs));
-	unsigned long env[7];
-	struct _fpreg __user *to;
-	struct _fpxreg *from;
-	int i;
+	int n, cpu = ((struct thread_info *) child->stack)->cpu;
+	long fpregs[HOST_FP_SIZE];
 
-	env[0] = (unsigned long)fxsave->cwd | 0xffff0000;
-	env[1] = (unsigned long)fxsave->swd | 0xffff0000;
-	env[2] = twd_fxsr_to_i387(fxsave);
-	env[3] = fxsave->fip;
-	env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
-	env[5] = fxsave->foo;
-	env[6] = fxsave->fos;
+	BUG_ON(sizeof(*buf) != sizeof(fpregs));
+	n = copy_from_user(fpregs, (void *) buf, sizeof(fpregs));
+	if (n > 0)
+		return -EFAULT;
 
-	if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) )
-		return 1;
-
-	to = &buf->_st[0];
-	from = (struct _fpxreg *) &fxsave->st_space[0];
-	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
-		if ( __copy_to_user( to, from, sizeof(*to) ) )
-			return 1;
-	}
-	return 0;
+	return restore_fp_registers(userspace_pid[cpu], fpregs);
 }
-#endif
 
-static inline int convert_fxsr_to_user(struct _fpstate __user *buf,
-				       struct pt_regs *regs)
+int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
 {
-	return(CHOOSE_MODE(convert_fxsr_to_user_tt(buf, regs), 0));
-}
+	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+	long fpregs[HOST_XFP_SIZE];
 
-#ifdef CONFIG_MODE_TT
-static inline int convert_fxsr_from_user_tt(struct pt_regs *regs,
-					    struct _fpstate __user *buf)
-{
-	struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs));
-	unsigned long env[7];
-	struct _fpxreg *to;
-	struct _fpreg __user *from;
-	int i;
-
-	if ( __copy_from_user( env, buf, 7 * sizeof(long) ) )
-		return 1;
+	BUG_ON(sizeof(*buf) != sizeof(fpregs));
+	err = save_fpx_registers(userspace_pid[cpu], fpregs);
+	if (err)
+		return err;
 
-	fxsave->cwd = (unsigned short)(env[0] & 0xffff);
-	fxsave->swd = (unsigned short)(env[1] & 0xffff);
-	fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
-	fxsave->fip = env[3];
-	fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16);
-	fxsave->fcs = (env[4] & 0xffff);
-	fxsave->foo = env[5];
-	fxsave->fos = env[6];
+	n = copy_to_user((void *) buf, fpregs, sizeof(fpregs));
+	if(n > 0)
+		return -EFAULT;
 
-	to = (struct _fpxreg *) &fxsave->st_space[0];
-	from = &buf->_st[0];
-	for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
-		if ( __copy_from_user( to, from, sizeof(*from) ) )
-			return 1;
-	}
-	return 0;
-}
-#endif
-
-static inline int convert_fxsr_from_user(struct pt_regs *regs, 
-					 struct _fpstate __user *buf)
-{
-	return(CHOOSE_MODE(convert_fxsr_from_user_tt(regs, buf), 0));
-}
-
-int get_fpregs(unsigned long buf, struct task_struct *child)
-{
-	int err;
-
-	err = convert_fxsr_to_user((struct _fpstate __user *) buf,
-				   &child->thread.regs);
-	if(err) return(-EFAULT);
-	else return(0);
-}
-
-int set_fpregs(unsigned long buf, struct task_struct *child)
-{
-	int err;
-
-	err = convert_fxsr_from_user(&child->thread.regs, 
-				     (struct _fpstate __user *) buf);
-	if(err) return(-EFAULT);
-	else return(0);
-}
-
-#ifdef CONFIG_MODE_TT
-int get_fpxregs_tt(unsigned long buf, struct task_struct *tsk)
-{
-	struct pt_regs *regs = &tsk->thread.regs;
-	struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs));
-	int err;
-
-	err = __copy_to_user((void __user *) buf, fxsave,
-			     sizeof(struct user_fxsr_struct));
-	if(err) return -EFAULT;
-	else return 0;
-}
-#endif
-
-int get_fpxregs(unsigned long buf, struct task_struct *tsk)
-{
-	return(CHOOSE_MODE(get_fpxregs_tt(buf, tsk), 0));
-}
-
-#ifdef CONFIG_MODE_TT
-int set_fpxregs_tt(unsigned long buf, struct task_struct *tsk)
-{
-	struct pt_regs *regs = &tsk->thread.regs;
-	struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs));
-	int err;
-
-	err = __copy_from_user(fxsave, (void __user *) buf,
-			       sizeof(struct user_fxsr_struct) );
-	if(err) return -EFAULT;
-	else return 0;
-}
-#endif
-
-int set_fpxregs(unsigned long buf, struct task_struct *tsk)
-{
-	return(CHOOSE_MODE(set_fpxregs_tt(buf, tsk), 0));
-}
-
-#ifdef notdef
-int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
-{
-	fpu->cwd = (((SC_FP_CW(PT_REGS_SC(regs)) & 0xffff) << 16) |
-		    (SC_FP_SW(PT_REGS_SC(regs)) & 0xffff));
-	fpu->swd = SC_FP_CSSEL(PT_REGS_SC(regs)) & 0xffff;
-	fpu->twd = SC_FP_IPOFF(PT_REGS_SC(regs));
-	fpu->fip = SC_FP_CSSEL(PT_REGS_SC(regs)) & 0xffff;
-	fpu->fcs = SC_FP_DATAOFF(PT_REGS_SC(regs));
-	fpu->foo = SC_FP_DATASEL(PT_REGS_SC(regs));
-	fpu->fos = 0;
-	memcpy(fpu->st_space, (void *) SC_FP_ST(PT_REGS_SC(regs)),
-	       sizeof(fpu->st_space));
-	return(1);
+	return n;
 }
-#endif
 
-#ifdef CONFIG_MODE_TT
-static inline void copy_fpu_fxsave_tt(struct pt_regs *regs,
-				      struct user_i387_struct *buf)
+int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
 {
-	struct i387_fxsave_struct *fpu = SC_FXSR_ENV(PT_REGS_SC(regs));
-	unsigned short *to;
-	unsigned short *from;
-	int i;
+	int n, cpu = ((struct thread_info *) child->stack)->cpu;
+	long fpregs[HOST_XFP_SIZE];
 
-	memcpy( buf, fpu, 7 * sizeof(long) );
+	BUG_ON(sizeof(*buf) != sizeof(fpregs));
+	n = copy_from_user(fpregs, (void *) buf, sizeof(fpregs));
+	if (n > 0)
+		return -EFAULT;
 
-	to = (unsigned short *) &buf->st_space[0];
-	from = (unsigned short *) &fpu->st_space[0];
-	for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
-		memcpy( to, from, 5 * sizeof(unsigned short) );
-	}
+	return restore_fpx_registers(userspace_pid[cpu], fpregs);
 }
-#endif
 
-static inline void copy_fpu_fxsave(struct pt_regs *regs,
-				   struct user_i387_struct *buf)
+long subarch_ptrace(struct task_struct *child, long request, long addr,
+		    long data)
 {
-	(void) CHOOSE_MODE(copy_fpu_fxsave_tt(regs, buf), 0);
+	return -EIO;
 }
-
-int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu )
-{
-	copy_fpu_fxsave(regs, (struct user_i387_struct *) fpu);
-	return(1);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c
index 40ff0c831bd..5cf97bc229b 100644
--- a/arch/um/sys-i386/ptrace_user.c
+++ b/arch/um/sys-i386/ptrace_user.c
@@ -1,20 +1,10 @@
 /*
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include <stdio.h>
-#include <stddef.h>
 #include <errno.h>
-#include <unistd.h>
-#include "ptrace_user.h"
-/* Grr, asm/user.h includes asm/ptrace.h, so has to follow ptrace_user.h */
-#include <asm/user.h>
-#include "kern_util.h"
-#include "sysdep/thread.h"
-#include "user.h"
-#include "os.h"
-#include "uml-config.h"
+#include <sys/ptrace.h>
 
 int ptrace_getregs(long pid, unsigned long *regs_out)
 {
@@ -43,89 +33,3 @@ int ptrace_setfpregs(long pid, unsigned long *regs)
 		return -errno;
 	return 0;
 }
-
-#ifdef UML_CONFIG_MODE_TT
-
-static void write_debugregs(int pid, unsigned long *regs)
-{
-	struct user *dummy;
-	int nregs, i;
-
-	dummy = NULL;
-	nregs = ARRAY_SIZE(dummy->u_debugreg);
-	for(i = 0; i < nregs; i++){
-		if((i == 4) || (i == 5)) continue;
-		if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i],
-			  regs[i]) < 0)
-			printk("write_debugregs - ptrace failed on "
-			       "register %d, value = 0x%lx, errno = %d\n", i,
-			       regs[i], errno);
-	}
-}
-
-static void read_debugregs(int pid, unsigned long *regs)
-{
-	struct user *dummy;
-	int nregs, i;
-
-	dummy = NULL;
-	nregs = ARRAY_SIZE(dummy->u_debugreg);
-	for(i = 0; i < nregs; i++){
-		regs[i] = ptrace(PTRACE_PEEKUSR, pid,
-				 &dummy->u_debugreg[i], 0);
-	}
-}
-
-/* Accessed only by the tracing thread */
-static unsigned long kernel_debugregs[8] = { [ 0 ... 7 ] = 0 };
-
-void arch_enter_kernel(void *task, int pid)
-{
-	read_debugregs(pid, TASK_DEBUGREGS(task));
-	write_debugregs(pid, kernel_debugregs);
-}
-
-void arch_leave_kernel(void *task, int pid)
-{
-	read_debugregs(pid, kernel_debugregs);
-	write_debugregs(pid, TASK_DEBUGREGS(task));
-}
-
-#ifdef UML_CONFIG_PT_PROXY
-/* Accessed only by the tracing thread */
-static int debugregs_seq;
-
-/* Only called by the ptrace proxy */
-void ptrace_pokeuser(unsigned long addr, unsigned long data)
-{
-	if((addr < offsetof(struct user, u_debugreg[0])) ||
-	   (addr > offsetof(struct user, u_debugreg[7])))
-		return;
-	addr -= offsetof(struct user, u_debugreg[0]);
-	addr = addr >> 2;
-	if(kernel_debugregs[addr] == data) return;
-
-	kernel_debugregs[addr] = data;
-	debugregs_seq++;
-}
-
-static void update_debugregs_cb(void *arg)
-{
-	int pid = *((int *) arg);
-
-	write_debugregs(pid, kernel_debugregs);
-}
-
-/* Optimized out in its header when not defined */
-void update_debugregs(int seq)
-{
-	int me;
-
-	if(seq == debugregs_seq) return;
-
-	me = os_getpid();
-	initial_thread_cb(update_debugregs_cb, &me);
-}
-#endif
-
-#endif
diff --git a/arch/um/sys-i386/sigcontext.c b/arch/um/sys-i386/sigcontext.c
deleted file mode 100644
index 467d489c31c..00000000000
--- a/arch/um/sys-i386/sigcontext.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stddef.h>
-#include <string.h>
-#include <asm/ptrace.h>
-#include <asm/sigcontext.h>
-#include "sysdep/ptrace.h"
-#include "kern_util.h"
-
-void sc_to_sc(void *to_ptr, void *from_ptr)
-{
-	struct sigcontext *to = to_ptr, *from = from_ptr;
-
-	memcpy(to, from, sizeof(*to) + sizeof(struct _fpstate));
-	if(from->fpstate != NULL)
-		to->fpstate = (struct _fpstate *) (to + 1);
-}
-
-unsigned long *sc_sigmask(void *sc_ptr)
-{
-	struct sigcontext *sc = sc_ptr;
-	return &sc->oldmask;
-}
-
-int sc_get_fpregs(unsigned long buf, void *sc_ptr)
-{
-	struct sigcontext *sc = sc_ptr;
-	struct _fpstate *from = sc->fpstate, *to = (struct _fpstate *) buf;
-	int err = 0;
-
-	if(from == NULL){
-		err |= clear_user_proc(&to->cw, sizeof(to->cw));
-		err |= clear_user_proc(&to->sw, sizeof(to->sw));
-		err |= clear_user_proc(&to->tag, sizeof(to->tag));
-		err |= clear_user_proc(&to->ipoff, sizeof(to->ipoff));
-		err |= clear_user_proc(&to->cssel, sizeof(to->cssel));
-		err |= clear_user_proc(&to->dataoff, sizeof(to->dataoff));
-		err |= clear_user_proc(&to->datasel, sizeof(to->datasel));
-		err |= clear_user_proc(&to->_st, sizeof(to->_st));
-	}
-	else {
-		err |= copy_to_user_proc(&to->cw, &from->cw, sizeof(to->cw));
-		err |= copy_to_user_proc(&to->sw, &from->sw, sizeof(to->sw));
-		err |= copy_to_user_proc(&to->tag, &from->tag, 
-					 sizeof(to->tag));
-		err |= copy_to_user_proc(&to->ipoff, &from->ipoff, 
-					 sizeof(to->ipoff));
-		err |= copy_to_user_proc(&to->cssel,& from->cssel, 
-					 sizeof(to->cssel));
-		err |= copy_to_user_proc(&to->dataoff, &from->dataoff, 
-				    sizeof(to->dataoff));
-		err |= copy_to_user_proc(&to->datasel, &from->datasel, 
-				    sizeof(to->datasel));
-		err |= copy_to_user_proc(to->_st, from->_st, sizeof(to->_st));
-	}
-	return(err);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/sys-i386/signal.c b/arch/um/sys-i386/signal.c
index 1cbf95f6858..0147227ce18 100644
--- a/arch/um/sys-i386/signal.c
+++ b/arch/um/sys-i386/signal.c
@@ -1,189 +1,293 @@
 /*
- * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
+ * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/signal.h"
 #include "linux/ptrace.h"
-#include "asm/current.h"
-#include "asm/ucontext.h"
-#include "asm/uaccess.h"
 #include "asm/unistd.h"
+#include "asm/uaccess.h"
+#include "asm/ucontext.h"
 #include "frame_kern.h"
-#include "sigcontext.h"
-#include "registers.h"
-#include "mode.h"
-
-#ifdef CONFIG_MODE_SKAS
-
 #include "skas.h"
 
-void copy_sc(union uml_pt_regs *regs, void *from)
+void copy_sc(struct uml_pt_regs *regs, void *from)
 {
 	struct sigcontext *sc = from;
 
-	REGS_GS(regs->skas.regs) = sc->gs;
-	REGS_FS(regs->skas.regs) = sc->fs;
-	REGS_ES(regs->skas.regs) = sc->es;
-	REGS_DS(regs->skas.regs) = sc->ds;
-	REGS_EDI(regs->skas.regs) = sc->edi;
-	REGS_ESI(regs->skas.regs) = sc->esi;
-	REGS_EBP(regs->skas.regs) = sc->ebp;
-	REGS_SP(regs->skas.regs) = sc->esp;
-	REGS_EBX(regs->skas.regs) = sc->ebx;
-	REGS_EDX(regs->skas.regs) = sc->edx;
-	REGS_ECX(regs->skas.regs) = sc->ecx;
-	REGS_EAX(regs->skas.regs) = sc->eax;
-	REGS_IP(regs->skas.regs) = sc->eip;
-	REGS_CS(regs->skas.regs) = sc->cs;
-	REGS_EFLAGS(regs->skas.regs) = sc->eflags;
-	REGS_SS(regs->skas.regs) = sc->ss;
+	REGS_GS(regs->gp) = sc->gs;
+	REGS_FS(regs->gp) = sc->fs;
+	REGS_ES(regs->gp) = sc->es;
+	REGS_DS(regs->gp) = sc->ds;
+	REGS_EDI(regs->gp) = sc->edi;
+	REGS_ESI(regs->gp) = sc->esi;
+	REGS_EBP(regs->gp) = sc->ebp;
+	REGS_SP(regs->gp) = sc->esp;
+	REGS_EBX(regs->gp) = sc->ebx;
+	REGS_EDX(regs->gp) = sc->edx;
+	REGS_ECX(regs->gp) = sc->ecx;
+	REGS_EAX(regs->gp) = sc->eax;
+	REGS_IP(regs->gp) = sc->eip;
+	REGS_CS(regs->gp) = sc->cs;
+	REGS_EFLAGS(regs->gp) = sc->eflags;
+	REGS_SS(regs->gp) = sc->ss;
 }
 
-static int copy_sc_from_user_skas(struct pt_regs *regs,
-				  struct sigcontext __user *from)
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
 {
-  	struct sigcontext sc;
-	unsigned long fpregs[HOST_FP_SIZE];
-	int err;
+	unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+
+	/* Transform each pair of bits into 01 (valid) or 00 (empty) */
+	tmp = ~twd;
+	tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+	/* and move the valid bits to the lower byte. */
+	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+	return tmp;
+}
 
-	err = copy_from_user(&sc, from, sizeof(sc));
-	err |= copy_from_user(fpregs, sc.fpstate, sizeof(fpregs));
-	if(err)
-		return err;
+static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
+{
+	struct _fpxreg *st = NULL;
+	unsigned long twd = (unsigned long) fxsave->twd;
+	unsigned long tag;
+	unsigned long ret = 0xffff0000;
+	int i;
+
+#define FPREG_ADDR(f, n)	((char *)&(f)->st_space + (n) * 16);
+
+	for (i = 0; i < 8; i++) {
+		if (twd & 0x1) {
+			st = (struct _fpxreg *) FPREG_ADDR(fxsave, i);
+
+			switch (st->exponent & 0x7fff) {
+			case 0x7fff:
+				tag = 2;		/* Special */
+				break;
+			case 0x0000:
+				if ( !st->significand[0] &&
+				     !st->significand[1] &&
+				     !st->significand[2] &&
+				     !st->significand[3] ) {
+					tag = 1;	/* Zero */
+				} else {
+					tag = 2;	/* Special */
+				}
+				break;
+			default:
+				if (st->significand[3] & 0x8000) {
+					tag = 0;	/* Valid */
+				} else {
+					tag = 2;	/* Special */
+				}
+				break;
+			}
+		} else {
+			tag = 3;			/* Empty */
+		}
+		ret |= (tag << (2 * i));
+		twd = twd >> 1;
+	}
+	return ret;
+}
 
-	copy_sc(&regs->regs, &sc);
+static int convert_fxsr_to_user(struct _fpstate __user *buf,
+				struct user_fxsr_struct *fxsave)
+{
+	unsigned long env[7];
+	struct _fpreg __user *to;
+	struct _fpxreg *from;
+	int i;
+
+	env[0] = (unsigned long)fxsave->cwd | 0xffff0000ul;
+	env[1] = (unsigned long)fxsave->swd | 0xffff0000ul;
+	env[2] = twd_fxsr_to_i387(fxsave);
+	env[3] = fxsave->fip;
+	env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16);
+	env[5] = fxsave->foo;
+	env[6] = fxsave->fos;
+
+	if (__copy_to_user(buf, env, 7 * sizeof(unsigned long)))
+		return 1;
 
-	err = restore_fp_registers(userspace_pid[0], fpregs);
-	if(err < 0) {
-	  	printk("copy_sc_from_user_skas - PTRACE_SETFPREGS failed, "
-		       "errno = %d\n", -err);
-		return err;
-	}
+	to = &buf->_st[0];
+	from = (struct _fpxreg *) &fxsave->st_space[0];
+	for (i = 0; i < 8; i++, to++, from++) {
+		unsigned long __user *t = (unsigned long __user *)to;
+		unsigned long *f = (unsigned long *)from;
 
+		if (__put_user(*f, t) ||
+				__put_user(*(f + 1), t + 1) ||
+				__put_user(from->exponent, &to->exponent))
+			return 1;
+	}
 	return 0;
 }
 
-int copy_sc_to_user_skas(struct sigcontext __user *to, struct _fpstate __user *to_fp,
-                         struct pt_regs *regs, unsigned long sp)
+static int convert_fxsr_from_user(struct user_fxsr_struct *fxsave,
+				  struct _fpstate __user *buf)
 {
-  	struct sigcontext sc;
-	unsigned long fpregs[HOST_FP_SIZE];
-	struct faultinfo * fi = &current->thread.arch.faultinfo;
-	int err;
+	unsigned long env[7];
+	struct _fpxreg *to;
+	struct _fpreg __user *from;
+	int i;
 
-	sc.gs = REGS_GS(regs->regs.skas.regs);
-	sc.fs = REGS_FS(regs->regs.skas.regs);
-	sc.es = REGS_ES(regs->regs.skas.regs);
-	sc.ds = REGS_DS(regs->regs.skas.regs);
-	sc.edi = REGS_EDI(regs->regs.skas.regs);
-	sc.esi = REGS_ESI(regs->regs.skas.regs);
-	sc.ebp = REGS_EBP(regs->regs.skas.regs);
-	sc.esp = sp;
-	sc.ebx = REGS_EBX(regs->regs.skas.regs);
-	sc.edx = REGS_EDX(regs->regs.skas.regs);
-	sc.ecx = REGS_ECX(regs->regs.skas.regs);
-	sc.eax = REGS_EAX(regs->regs.skas.regs);
-	sc.eip = REGS_IP(regs->regs.skas.regs);
-	sc.cs = REGS_CS(regs->regs.skas.regs);
-	sc.eflags = REGS_EFLAGS(regs->regs.skas.regs);
-	sc.esp_at_signal = regs->regs.skas.regs[UESP];
-	sc.ss = regs->regs.skas.regs[SS];
-        sc.cr2 = fi->cr2;
-        sc.err = fi->error_code;
-        sc.trapno = fi->trap_no;
-
-	err = save_fp_registers(userspace_pid[0], fpregs);
-	if(err < 0){
-	  	printk("copy_sc_to_user_skas - PTRACE_GETFPREGS failed, "
-		       "errno = %d\n", err);
+	if (copy_from_user( env, buf, 7 * sizeof(long)))
 		return 1;
-	}
-	to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1));
-	sc.fpstate = to_fp;
 
-	if(err)
-	  	return err;
-
-	return copy_to_user(to, &sc, sizeof(sc)) ||
-	       copy_to_user(to_fp, fpregs, sizeof(fpregs));
+	fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+	fxsave->swd = (unsigned short)(env[1] & 0xffff);
+	fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
+	fxsave->fip = env[3];
+	fxsave->fop = (unsigned short)((env[4] & 0xffff0000ul) >> 16);
+	fxsave->fcs = (env[4] & 0xffff);
+	fxsave->foo = env[5];
+	fxsave->fos = env[6];
+
+	to = (struct _fpxreg *) &fxsave->st_space[0];
+	from = &buf->_st[0];
+	for (i = 0; i < 8; i++, to++, from++) {
+		unsigned long *t = (unsigned long *)to;
+		unsigned long __user *f = (unsigned long __user *)from;
+
+		if (__get_user(*t, f) ||
+		    __get_user(*(t + 1), f + 1) ||
+		    __get_user(to->exponent, &from->exponent))
+			return 1;
+	}
+	return 0;
 }
-#endif
 
-#ifdef CONFIG_MODE_TT
+extern int have_fpx_regs;
 
-/* These copy a sigcontext to/from userspace.  They copy the fpstate pointer,
- * blowing away the old, good one.  So, that value is saved, and then restored
- * after the sigcontext copy.  In copy_from, the variable holding the saved
- * fpstate pointer, and the sigcontext that it should be restored to are both
- * in the kernel, so we can just restore using an assignment.  In copy_to, the
- * saved pointer is in the kernel, but the sigcontext is in userspace, so we
- * copy_to_user it.
- */
-int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext __user *from,
-			 int fpsize)
+static int copy_sc_from_user(struct pt_regs *regs,
+			     struct sigcontext __user *from)
 {
-	struct _fpstate *to_fp;
-	struct _fpstate __user *from_fp;
-	unsigned long sigs;
+	struct sigcontext sc;
 	int err;
 
-	to_fp = to->fpstate;
-	sigs = to->oldmask;
-	err = copy_from_user(to, from, sizeof(*to));
-	from_fp = to->fpstate;
-	to->oldmask = sigs;
-	to->fpstate = to_fp;
-	if(to_fp != NULL)
-		err |= copy_from_user(to_fp, from_fp, fpsize);
-	return err;
+	err = copy_from_user(&sc, from, sizeof(sc));
+	if (err)
+		return err;
+
+	copy_sc(&regs->regs, &sc);
+	if (have_fpx_regs) {
+		struct user_fxsr_struct fpx;
+
+		err = copy_from_user(&fpx, &sc.fpstate->_fxsr_env[0],
+				     sizeof(struct user_fxsr_struct));
+		if (err)
+			return 1;
+
+		err = convert_fxsr_from_user(&fpx, sc.fpstate);
+		if (err)
+			return 1;
+
+		err = restore_fpx_registers(userspace_pid[current_thread->cpu],
+					    (unsigned long *) &fpx);
+		if (err < 0) {
+			printk(KERN_ERR "copy_sc_from_user - "
+			       "restore_fpx_registers failed, errno = %d\n",
+			       -err);
+			return 1;
+		}
+	}
+	else {
+		struct user_i387_struct fp;
+
+		err = copy_from_user(&fp, sc.fpstate,
+				     sizeof(struct user_i387_struct));
+		if (err)
+			return 1;
+
+		err = restore_fp_registers(userspace_pid[current_thread->cpu],
+					   (unsigned long *) &fp);
+		if (err < 0) {
+			printk(KERN_ERR "copy_sc_from_user - "
+			       "restore_fp_registers failed, errno = %d\n",
+			       -err);
+			return 1;
+		}
+	}
+
+	return 0;
 }
 
-int copy_sc_to_user_tt(struct sigcontext __user *to, struct _fpstate __user *fp,
-		       struct sigcontext *from, int fpsize, unsigned long sp)
+static int copy_sc_to_user(struct sigcontext __user *to,
+			   struct _fpstate __user *to_fp, struct pt_regs *regs,
+			   unsigned long sp)
 {
-	struct _fpstate __user *to_fp;
-	struct _fpstate *from_fp;
+	struct sigcontext sc;
+	struct faultinfo * fi = &current->thread.arch.faultinfo;
 	int err;
 
-	to_fp =	(fp ? fp : (struct _fpstate __user *) (to + 1));
-	from_fp = from->fpstate;
-	err = copy_to_user(to, from, sizeof(*to));
+	sc.gs = REGS_GS(regs->regs.gp);
+	sc.fs = REGS_FS(regs->regs.gp);
+	sc.es = REGS_ES(regs->regs.gp);
+	sc.ds = REGS_DS(regs->regs.gp);
+	sc.edi = REGS_EDI(regs->regs.gp);
+	sc.esi = REGS_ESI(regs->regs.gp);
+	sc.ebp = REGS_EBP(regs->regs.gp);
+	sc.esp = sp;
+	sc.ebx = REGS_EBX(regs->regs.gp);
+	sc.edx = REGS_EDX(regs->regs.gp);
+	sc.ecx = REGS_ECX(regs->regs.gp);
+	sc.eax = REGS_EAX(regs->regs.gp);
+	sc.eip = REGS_IP(regs->regs.gp);
+	sc.cs = REGS_CS(regs->regs.gp);
+	sc.eflags = REGS_EFLAGS(regs->regs.gp);
+	sc.esp_at_signal = regs->regs.gp[UESP];
+	sc.ss = regs->regs.gp[SS];
+	sc.cr2 = fi->cr2;
+	sc.err = fi->error_code;
+	sc.trapno = fi->trap_no;
 
-	/* The SP in the sigcontext is the updated one for the signal
-	 * delivery.  The sp passed in is the original, and this needs
-	 * to be restored, so we stick it in separately.
-	 */
-	err |= copy_to_user(&SC_SP(to), &sp, sizeof(sp));
+	to_fp = (to_fp ? to_fp : (struct _fpstate __user *) (to + 1));
+	sc.fpstate = to_fp;
 
-	if(from_fp != NULL){
-		err |= copy_to_user(&to->fpstate, &to_fp, sizeof(to->fpstate));
-		err |= copy_to_user(to_fp, from_fp, fpsize);
+	if (have_fpx_regs) {
+		struct user_fxsr_struct fpx;
+
+		err = save_fpx_registers(userspace_pid[current_thread->cpu],
+					 (unsigned long *) &fpx);
+		if (err < 0){
+			printk(KERN_ERR "copy_sc_to_user - save_fpx_registers "
+			       "failed, errno = %d\n", err);
+			return 1;
+		}
+
+		err = convert_fxsr_to_user(to_fp, &fpx);
+		if (err)
+			return 1;
+
+		err |= __put_user(fpx.swd, &to_fp->status);
+		err |= __put_user(X86_FXSR_MAGIC, &to_fp->magic);
+		if (err)
+			return 1;
+
+		if (copy_to_user(&to_fp->_fxsr_env[0], &fpx,
+				 sizeof(struct user_fxsr_struct)))
+			return 1;
 	}
-	return err;
-}
-#endif
-
-static int copy_sc_from_user(struct pt_regs *to, void __user *from)
-{
-	int ret;
+	else {
+		struct user_i387_struct fp;
 
-	ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from,
-					       sizeof(struct _fpstate)),
-			  copy_sc_from_user_skas(to, from));
-	return ret;
-}
+		err = save_fp_registers(userspace_pid[current_thread->cpu],
+					(unsigned long *) &fp);
+		if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
+			return 1;
+	}
 
-static int copy_sc_to_user(struct sigcontext __user *to, struct _fpstate __user *fp,
-			   struct pt_regs *from, unsigned long sp)
-{
-	return CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs),
-					      sizeof(*fp), sp),
-                           copy_sc_to_user_skas(to, fp, from, sp));
+	return copy_to_user(to, &sc, sizeof(sc));
 }
 
-static int copy_ucontext_to_user(struct ucontext __user *uc, struct _fpstate __user *fp,
-				 sigset_t *set, unsigned long sp)
+static int copy_ucontext_to_user(struct ucontext __user *uc,
+				 struct _fpstate __user *fp, sigset_t *set,
+				 unsigned long sp)
 {
 	int err = 0;
 
@@ -233,7 +337,7 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 		return 1;
 
 	restorer = frame->retcode;
-	if(ka->sa.sa_flags & SA_RESTORER)
+	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
 	/* Update SP now because the page fault handler refuses to extend
@@ -265,7 +369,7 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 	err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2));
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
 
-	if(err)
+	if (err)
 		goto err;
 
 	PT_REGS_SP(regs) = (unsigned long) frame;
@@ -298,7 +402,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 		return 1;
 
 	restorer = frame->retcode;
-	if(ka->sa.sa_flags & SA_RESTORER)
+	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 
 	/* See comment above about why this is here */
@@ -323,7 +427,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1));
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
 
-	if(err)
+	if (err)
 		goto err;
 
 	PT_REGS_IP(regs) = (unsigned long) ka->sa.sa_handler;
@@ -350,8 +454,8 @@ long sys_sigreturn(struct pt_regs regs)
 	unsigned long __user *extramask = frame->extramask;
 	int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
 
-	if(copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) ||
-	   copy_from_user(&set.sig[1], extramask, sig_size))
+	if (copy_from_user(&set.sig[0], oldmask, sizeof(set.sig[0])) ||
+	    copy_from_user(&set.sig[1], extramask, sig_size))
 		goto segfault;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
@@ -361,7 +465,7 @@ long sys_sigreturn(struct pt_regs regs)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	if(copy_sc_from_user(&current->thread.regs, sc))
+	if (copy_sc_from_user(&current->thread.regs, sc))
 		goto segfault;
 
 	/* Avoid ERESTART handling */
@@ -376,12 +480,13 @@ long sys_sigreturn(struct pt_regs regs)
 long sys_rt_sigreturn(struct pt_regs regs)
 {
 	unsigned long sp = PT_REGS_SP(&current->thread.regs);
-	struct rt_sigframe __user *frame = (struct rt_sigframe __user *) (sp - 4);
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *) (sp - 4);
 	sigset_t set;
 	struct ucontext __user *uc = &frame->uc;
 	int sig_size = _NSIG_WORDS * sizeof(unsigned long);
 
-	if(copy_from_user(&set, &uc->uc_sigmask, sig_size))
+	if (copy_from_user(&set, &uc->uc_sigmask, sig_size))
 		goto segfault;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
@@ -391,7 +496,7 @@ long sys_rt_sigreturn(struct pt_regs regs)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	if(copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
+	if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
 		goto segfault;
 
 	/* Avoid ERESTART handling */
diff --git a/arch/um/sys-i386/stub.S b/arch/um/sys-i386/stub.S
index 6a70d9ab5c2..e730772c401 100644
--- a/arch/um/sys-i386/stub.S
+++ b/arch/um/sys-i386/stub.S
@@ -1,4 +1,5 @@
 #include "uml-config.h"
+#include "as-layout.h"
 
 	.globl syscall_stub
 .section .__syscall_stub, "x"
@@ -6,7 +7,7 @@
 	.globl batch_syscall_stub
 batch_syscall_stub:
 	/* load pointer to first operation */
-	mov	$(UML_CONFIG_STUB_DATA+8), %esp
+	mov	$(ASM_STUB_DATA+8), %esp
 
 again:
 	/* load length of additional data */
@@ -14,12 +15,12 @@ again:
 
 	/* if(length == 0) : end of list */
 	/* write possible 0 to header */
-	mov	%eax, UML_CONFIG_STUB_DATA+4
+	mov	%eax, ASM_STUB_DATA+4
 	cmpl	$0, %eax
 	jz	done
 
 	/* save current pointer */
-	mov	%esp, UML_CONFIG_STUB_DATA+4
+	mov	%esp, ASM_STUB_DATA+4
 
 	/* skip additional data */
 	add	%eax, %esp
@@ -45,7 +46,7 @@ again:
 
 done:
 	/* save return value */
-	mov	%eax, UML_CONFIG_STUB_DATA
+	mov	%eax, ASM_STUB_DATA
 
 	/* stop */
 	int3
diff --git a/arch/um/sys-i386/stub_segv.c b/arch/um/sys-i386/stub_segv.c
index 2355dc19c46..b3999cb76bf 100644
--- a/arch/um/sys-i386/stub_segv.c
+++ b/arch/um/sys-i386/stub_segv.c
@@ -6,6 +6,7 @@
 #include <signal.h>
 #include <sys/select.h> /* The only way I can see to get sigset_t */
 #include <asm/unistd.h>
+#include "as-layout.h"
 #include "uml-config.h"
 #include "sysdep/stub.h"
 #include "sysdep/sigcontext.h"
@@ -17,8 +18,7 @@ stub_segv_handler(int sig)
 	struct sigcontext *sc = (struct sigcontext *) (&sig + 1);
 	int pid;
 
-	GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA),
-			      sc);
+	GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA), sc);
 
 	pid = stub_syscall0(__NR_getpid);
 	stub_syscall2(__NR_kill, pid, SIGUSR1);
diff --git a/arch/um/sys-i386/tls.c b/arch/um/sys-i386/tls.c
index fea8e5e15cc..b02266ab5c5 100644
--- a/arch/um/sys-i386/tls.c
+++ b/arch/um/sys-i386/tls.c
@@ -3,25 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
+#include "linux/percpu.h"
 #include "linux/sched.h"
-#include "linux/slab.h"
-#include "linux/types.h"
 #include "asm/uaccess.h"
-#include "asm/ptrace.h"
-#include "asm/segment.h"
-#include "asm/smp.h"
-#include "asm/desc.h"
-#include "choose-mode.h"
-#include "kern.h"
-#include "kern_util.h"
-#include "mode_kern.h"
 #include "os.h"
-#include "mode.h"
-
-#ifdef CONFIG_MODE_SKAS
 #include "skas.h"
-#endif
+#include "sysdep/tls.h"
 
 /*
  * If needed we can detect when it's uninitialized.
@@ -31,8 +18,7 @@
 static int host_supports_tls = -1;
 int host_gdt_entry_tls_min;
 
-#ifdef CONFIG_MODE_SKAS
-int do_set_thread_area_skas(struct user_desc *info)
+int do_set_thread_area(struct user_desc *info)
 {
 	int ret;
 	u32 cpu;
@@ -43,7 +29,7 @@ int do_set_thread_area_skas(struct user_desc *info)
 	return ret;
 }
 
-int do_get_thread_area_skas(struct user_desc *info)
+int do_get_thread_area(struct user_desc *info)
 {
 	int ret;
 	u32 cpu;
@@ -53,7 +39,6 @@ int do_get_thread_area_skas(struct user_desc *info)
 	put_cpu();
 	return ret;
 }
-#endif
 
 /*
  * sys_get_thread_area: get a yet unused TLS descriptor index.
@@ -82,7 +67,8 @@ static inline void clear_user_desc(struct user_desc* info)
 	/* Postcondition: LDT_empty(info) returns true. */
 	memset(info, 0, sizeof(*info));
 
-	/* Check the LDT_empty or the i386 sys_get_thread_area code - we obtain
+	/*
+	 * Check the LDT_empty or the i386 sys_get_thread_area code - we obtain
 	 * indeed an empty user_desc.
 	 */
 	info->read_exec_only = 1;
@@ -97,10 +83,13 @@ static int load_TLS(int flags, struct task_struct *to)
 	int idx;
 
 	for (idx = GDT_ENTRY_TLS_MIN; idx < GDT_ENTRY_TLS_MAX; idx++) {
-		struct uml_tls_struct* curr = &to->thread.arch.tls_array[idx - GDT_ENTRY_TLS_MIN];
+		struct uml_tls_struct* curr =
+			&to->thread.arch.tls_array[idx - GDT_ENTRY_TLS_MIN];
 
-		/* Actually, now if it wasn't flushed it gets cleared and
-		 * flushed to the host, which will clear it.*/
+		/*
+		 * Actually, now if it wasn't flushed it gets cleared and
+		 * flushed to the host, which will clear it.
+		 */
 		if (!curr->present) {
 			if (!curr->flushed) {
 				clear_user_desc(&curr->tls);
@@ -124,7 +113,8 @@ out:
 	return ret;
 }
 
-/* Verify if we need to do a flush for the new process, i.e. if there are any
+/*
+ * Verify if we need to do a flush for the new process, i.e. if there are any
  * present desc's, only if they haven't been flushed.
  */
 static inline int needs_TLS_update(struct task_struct *task)
@@ -133,10 +123,13 @@ static inline int needs_TLS_update(struct task_struct *task)
 	int ret = 0;
 
 	for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
-		struct uml_tls_struct* curr = &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
+		struct uml_tls_struct* curr =
+			&task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
 
-		/* Can't test curr->present, we may need to clear a descriptor
-		 * which had a value. */
+		/*
+		 * Can't test curr->present, we may need to clear a descriptor
+		 * which had a value.
+		 */
 		if (curr->flushed)
 			continue;
 		ret = 1;
@@ -145,7 +138,8 @@ static inline int needs_TLS_update(struct task_struct *task)
 	return ret;
 }
 
-/* On a newly forked process, the TLS descriptors haven't yet been flushed. So
+/*
+ * On a newly forked process, the TLS descriptors haven't yet been flushed. So
  * we mark them as such and the first switch_to will do the job.
  */
 void clear_flushed_tls(struct task_struct *task)
@@ -153,10 +147,13 @@ void clear_flushed_tls(struct task_struct *task)
 	int i;
 
 	for (i = GDT_ENTRY_TLS_MIN; i < GDT_ENTRY_TLS_MAX; i++) {
-		struct uml_tls_struct* curr = &task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
+		struct uml_tls_struct* curr =
+			&task->thread.arch.tls_array[i - GDT_ENTRY_TLS_MIN];
 
-		/* Still correct to do this, if it wasn't present on the host it
-		 * will remain as flushed as it was. */
+		/*
+		 * Still correct to do this, if it wasn't present on the host it
+		 * will remain as flushed as it was.
+		 */
 		if (!curr->present)
 			continue;
 
@@ -164,40 +161,33 @@ void clear_flushed_tls(struct task_struct *task)
 	}
 }
 
-/* In SKAS0 mode, currently, multiple guest threads sharing the same ->mm have a
+/*
+ * In SKAS0 mode, currently, multiple guest threads sharing the same ->mm have a
  * common host process. So this is needed in SKAS0 too.
  *
  * However, if each thread had a different host process (and this was discussed
  * for SMP support) this won't be needed.
  *
  * And this will not need be used when (and if) we'll add support to the host
- * SKAS patch. */
+ * SKAS patch.
+ */
 
-int arch_switch_tls_skas(struct task_struct *from, struct task_struct *to)
+int arch_switch_tls(struct task_struct *from, struct task_struct *to)
 {
 	if (!host_supports_tls)
 		return 0;
 
-	/* We have no need whatsoever to switch TLS for kernel threads; beyond
+	/*
+	 * We have no need whatsoever to switch TLS for kernel threads; beyond
 	 * that, that would also result in us calling os_set_thread_area with
-	 * userspace_pid[cpu] == 0, which gives an error. */
+	 * userspace_pid[cpu] == 0, which gives an error.
+	 */
 	if (likely(to->mm))
 		return load_TLS(O_FORCE, to);
 
 	return 0;
 }
 
-int arch_switch_tls_tt(struct task_struct *from, struct task_struct *to)
-{
-	if (!host_supports_tls)
-		return 0;
-
-	if (needs_TLS_update(to))
-		return load_TLS(0, to);
-
-	return 0;
-}
-
 static int set_tls_entry(struct task_struct* task, struct user_desc *info,
 			 int idx, int flushed)
 {
@@ -251,17 +241,20 @@ static int get_tls_entry(struct task_struct* task, struct user_desc *info, int i
 	*info = t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].tls;
 
 out:
-	/* Temporary debugging check, to make sure that things have been
+	/*
+	 * Temporary debugging check, to make sure that things have been
 	 * flushed. This could be triggered if load_TLS() failed.
 	 */
-	if (unlikely(task == current && !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) {
+	if (unlikely(task == current &&
+		     !t->arch.tls_array[idx - GDT_ENTRY_TLS_MIN].flushed)) {
 		printk(KERN_ERR "get_tls_entry: task with pid %d got here "
 				"without flushed TLS.", current->pid);
 	}
 
 	return 0;
 clear:
-	/* When the TLS entry has not been set, the values read to user in the
+	/*
+	 * When the TLS entry has not been set, the values read to user in the
 	 * tls_array are 0 (because it's cleared at boot, see
 	 * arch/i386/kernel/head.S:cpu_gdt_table). Emulate that.
 	 */
@@ -293,7 +286,7 @@ asmlinkage int sys_set_thread_area(struct user_desc __user *user_desc)
 			return -EFAULT;
 	}
 
-	ret = CHOOSE_MODE_PROC(do_set_thread_area_tt, do_set_thread_area_skas, &info);
+	ret = do_set_thread_area(&info);
 	if (ret)
 		return ret;
 	return set_tls_entry(current, &info, idx, 1);
@@ -363,8 +356,10 @@ out:
 }
 
 
-/* XXX: This part is probably common to i386 and x86-64. Don't create a common
- * file for now, do that when implementing x86-64 support.*/
+/*
+ * XXX: This part is probably common to i386 and x86-64. Don't create a common
+ * file for now, do that when implementing x86-64 support.
+ */
 static int __init __setup_host_supports_tls(void)
 {
 	check_host_supports_tls(&host_supports_tls, &host_gdt_entry_tls_min);
diff --git a/arch/um/sys-i386/unmap.c b/arch/um/sys-i386/unmap.c
deleted file mode 100644
index 1b0ad0e4adc..00000000000
--- a/arch/um/sys-i386/unmap.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <linux/mman.h>
-#include <asm/unistd.h>
-
-static int errno;
-
-static inline _syscall2(int,munmap,void *,start,size_t,len)
-static inline _syscall6(void *,mmap2,void *,addr,size_t,len,int,prot,int,flags,int,fd,off_t,offset)
-int switcheroo(int fd, int prot, void *from, void *to, int size)
-{
-	if(munmap(to, size) < 0){
-		return(-1);
-	}
-	if(mmap2(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1 ){
-		return(-1);
-	}
-	if(munmap(from, size) < 0){
-		return(-1);
-	}
-	return(0);
-}
diff --git a/arch/um/sys-i386/user-offsets.c b/arch/um/sys-i386/user-offsets.c
index 29118cf5ff2..514241526a1 100644
--- a/arch/um/sys-i386/user-offsets.c
+++ b/arch/um/sys-i386/user-offsets.c
@@ -2,9 +2,9 @@
 #include <stddef.h>
 #include <signal.h>
 #include <sys/poll.h>
+#include <sys/user.h>
 #include <sys/mman.h>
 #include <asm/ptrace.h>
-#include <asm/user.h>
 
 #define DEFINE(sym, val) \
 	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -48,8 +48,8 @@ void foo(void)
 	OFFSET(HOST_SC_FP_ST, _fpstate, _st);
 	OFFSET(HOST_SC_FXSR_ENV, _fpstate, _fxsr_env);
 
-	DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_i387_struct));
-	DEFINE_LONGS(HOST_XFP_SIZE, sizeof(struct user_fxsr_struct));
+	DEFINE_LONGS(HOST_FP_SIZE, sizeof(struct user_fpregs_struct));
+	DEFINE_LONGS(HOST_XFP_SIZE, sizeof(struct user_fpxregs_struct));
 
 	DEFINE(HOST_IP, EIP);
 	DEFINE(HOST_SP, UESP);
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index ea8185d8540..3c22de53208 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -5,10 +5,9 @@
 #
 
 obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
-	setjmp.o sigcontext.o signal.o syscalls.o syscall_table.o sysrq.o \
-	ksyms.o tls.o
+	setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \
+	sysrq.o ksyms.o tls.o
 
-obj-$(CONFIG_MODE_SKAS) += stub.o stub_segv.o
 obj-$(CONFIG_MODULES) += um_module.o
 
 subarch-obj-y = lib/bitops_64.o lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
@@ -16,16 +15,12 @@ subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
 
 ldt-y = ../sys-i386/ldt.o
 
-USER_OBJS := ptrace_user.o sigcontext.o
+USER_OBJS := ptrace_user.o
 
 USER_OBJS += user-offsets.s
 extra-y += user-offsets.s
 
-extra-$(CONFIG_MODE_TT) += unmap.o
-
 UNPROFILE_OBJS := stub_segv.o
 CFLAGS_stub_segv.o := $(CFLAGS_NO_HARDENING)
 
 include arch/um/scripts/Makefile.rules
-
-$(obj)/unmap.%: _c_flags = $(call unprofile,$(CFLAGS))
diff --git a/arch/um/sys-x86_64/bugs.c b/arch/um/sys-x86_64/bugs.c
index 09547889037..506b6765bbc 100644
--- a/arch/um/sys-x86_64/bugs.c
+++ b/arch/um/sys-x86_64/bugs.c
@@ -14,7 +14,7 @@ void arch_check_bugs(void)
 {
 }
 
-int arch_handle_signal(int sig, union uml_pt_regs *regs)
+int arch_handle_signal(int sig, struct uml_pt_regs *regs)
 {
 	return 0;
 }
diff --git a/arch/um/sys-x86_64/fault.c b/arch/um/sys-x86_64/fault.c
index 4636b1465b6..ce85117fc64 100644
--- a/arch/um/sys-x86_64/fault.c
+++ b/arch/um/sys-x86_64/fault.c
@@ -14,14 +14,15 @@ struct exception_table_entry
 };
 
 const struct exception_table_entry *search_exception_tables(unsigned long add);
-int arch_fixup(unsigned long address, union uml_pt_regs *regs)
+
+int arch_fixup(unsigned long address, struct uml_pt_regs *regs)
 {
 	const struct exception_table_entry *fixup;
 
 	fixup = search_exception_tables(address);
-	if(fixup != 0){
+	if (fixup != 0) {
 		UPT_IP(regs) = fixup->fixup;
-		return(1);
+		return 1;
 	}
-	return(0);
+	return 0;
 }
diff --git a/arch/um/sys-x86_64/ptrace.c b/arch/um/sys-x86_64/ptrace.c
index 1970d78aa52..a3cfeed17af 100644
--- a/arch/um/sys-x86_64/ptrace.c
+++ b/arch/um/sys-x86_64/ptrace.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  *
  * Licensed under the GPL
  */
@@ -12,17 +13,10 @@
 #include <asm/uaccess.h>
 #include <asm/elf.h>
 
-/* XXX x86_64 */
-unsigned long not_ss;
-unsigned long not_ds;
-unsigned long not_es;
-
-#define SC_SS(r) (not_ss)
-#define SC_DS(r) (not_ds)
-#define SC_ES(r) (not_es)
-
-/* determines which flags the user has access to. */
-/* 1 = access 0 = no access */
+/*
+ * determines which flags the user has access to.
+ * 1 = access 0 = no access
+ */
 #define FLAG_MASK 0x44dd5UL
 
 int putreg(struct task_struct *child, int regno, unsigned long value)
@@ -66,20 +60,21 @@ int putreg(struct task_struct *child, int regno, unsigned long value)
 
 int poke_user(struct task_struct *child, long addr, long data)
 {
-        if ((addr & 3) || addr < 0)
-                return -EIO;
-
-        if (addr < MAX_REG_OFFSET)
-                return putreg(child, addr, data);
-        else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-                (addr <= offsetof(struct user, u_debugreg[7]))){
-                addr -= offsetof(struct user, u_debugreg[0]);
-                addr = addr >> 2;
-                if((addr == 4) || (addr == 5)) return -EIO;
-                child->thread.arch.debugregs[addr] = data;
-                return 0;
-        }
-        return -EIO;
+	if ((addr & 3) || addr < 0)
+		return -EIO;
+
+	if (addr < MAX_REG_OFFSET)
+		return putreg(child, addr, data);
+	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+		(addr <= offsetof(struct user, u_debugreg[7]))){
+		addr -= offsetof(struct user, u_debugreg[0]);
+		addr = addr >> 2;
+		if ((addr == 4) || (addr == 5))
+			return -EIO;
+		child->thread.arch.debugregs[addr] = data;
+		return 0;
+	}
+	return -EIO;
 }
 
 unsigned long getreg(struct task_struct *child, int regno)
@@ -107,29 +102,22 @@ unsigned long getreg(struct task_struct *child, int regno)
 int peek_user(struct task_struct *child, long addr, long data)
 {
 	/* read the word at location addr in the USER area. */
-        unsigned long tmp;
-
-        if ((addr & 3) || addr < 0)
-                return -EIO;
-
-        tmp = 0;  /* Default return condition */
-        if(addr < MAX_REG_OFFSET){
-                tmp = getreg(child, addr);
-        }
-        else if((addr >= offsetof(struct user, u_debugreg[0])) &&
-                (addr <= offsetof(struct user, u_debugreg[7]))){
-                addr -= offsetof(struct user, u_debugreg[0]);
-                addr = addr >> 2;
-                tmp = child->thread.arch.debugregs[addr];
-        }
-        return put_user(tmp, (unsigned long *) data);
-}
+	unsigned long tmp;
 
-void arch_switch(void)
-{
-/* XXX
-	printk("arch_switch\n");
-*/
+	if ((addr & 3) || addr < 0)
+		return -EIO;
+
+	tmp = 0;  /* Default return condition */
+	if (addr < MAX_REG_OFFSET){
+		tmp = getreg(child, addr);
+	}
+	else if ((addr >= offsetof(struct user, u_debugreg[0])) &&
+		(addr <= offsetof(struct user, u_debugreg[7]))){
+		addr -= offsetof(struct user, u_debugreg[0]);
+		addr = addr >> 2;
+		tmp = child->thread.arch.debugregs[addr];
+	}
+	return put_user(tmp, (unsigned long *) data);
 }
 
 /* XXX Mostly copied from sys-i386 */
@@ -139,54 +127,68 @@ int is_syscall(unsigned long addr)
 	int n;
 
 	n = copy_from_user(&instr, (void __user *) addr, sizeof(instr));
-	if(n){
+	if (n){
 		/* access_process_vm() grants access to vsyscall and stub,
 		 * while copy_from_user doesn't. Maybe access_process_vm is
 		 * slow, but that doesn't matter, since it will be called only
 		 * in case of singlestepping, if copy_from_user failed.
 		 */
 		n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
-		if(n != sizeof(instr)) {
+		if (n != sizeof(instr)) {
 			printk("is_syscall : failed to read instruction from "
 			       "0x%lx\n", addr);
-			return(1);
+			return 1;
 		}
 	}
 	/* sysenter */
-	return(instr == 0x050f);
+	return instr == 0x050f;
 }
 
-int get_fpregs(unsigned long buf, struct task_struct *child)
+int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
 {
-	panic("get_fpregs");
-	return(0);
-}
+	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+	long fpregs[HOST_FP_SIZE];
 
-int set_fpregs(unsigned long buf, struct task_struct *child)
-{
-	panic("set_fpregs");
-	return(0);
+	BUG_ON(sizeof(*buf) != sizeof(fpregs));
+	err = save_fp_registers(userspace_pid[cpu], fpregs);
+	if (err)
+		return err;
+
+	n = copy_to_user((void *) buf, fpregs, sizeof(fpregs));
+	if(n > 0)
+		return -EFAULT;
+
+	return n;
 }
 
-int get_fpxregs(unsigned long buf, struct task_struct *tsk)
+int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
 {
-	panic("get_fpxregs");
-	return(0);
+	int n, cpu = ((struct thread_info *) child->stack)->cpu;
+	long fpregs[HOST_FP_SIZE];
+
+	BUG_ON(sizeof(*buf) != sizeof(fpregs));
+	n = copy_from_user(fpregs, (void *) buf, sizeof(fpregs));
+	if (n > 0)
+		return -EFAULT;
+
+	return restore_fp_registers(userspace_pid[cpu], fpregs);
 }
 
-int set_fpxregs(unsigned long buf, struct task_struct *tsk)
+long subarch_ptrace(struct task_struct *child, long request, long addr,
+		    long data)
 {
-	panic("set_fxpregs");
-	return(0);
-}
+	int ret = -EIO;
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
+	switch (request) {
+	case PTRACE_GETFPXREGS: /* Get the child FPU state. */
+		ret = get_fpregs((struct user_i387_struct __user *) data,
+				 child);
+		break;
+	case PTRACE_SETFPXREGS: /* Set the child FPU state. */
+		ret = set_fpregs((struct user_i387_struct __user *) data,
+				 child);
+		break;
+	}
+
+	return ret;
+}
diff --git a/arch/um/sys-x86_64/sigcontext.c b/arch/um/sys-x86_64/sigcontext.c
deleted file mode 100644
index c88e64def6f..00000000000
--- a/arch/um/sys-x86_64/sigcontext.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <signal.h>
-#include "user.h"
-
-void sc_to_sc(void *to_ptr, void *from_ptr)
-{
-        struct sigcontext *to = to_ptr, *from = from_ptr;
-        int size = sizeof(*to); /* + sizeof(struct _fpstate); */
-
-        memcpy(to, from, size);
-        if(from->fpstate != NULL)
-		to->fpstate = (struct _fpstate *) (to + 1);
-
-	to->fpstate = NULL;
-}
-
-unsigned long *sc_sigmask(void *sc_ptr)
-{
-	struct sigcontext *sc = sc_ptr;
-
-	return(&sc->oldmask);
-}
-
-/* Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/sys-x86_64/signal.c b/arch/um/sys-x86_64/signal.c
index fe8ec04d35b..1778d33808f 100644
--- a/arch/um/sys-x86_64/signal.c
+++ b/arch/um/sys-x86_64/signal.c
@@ -1,111 +1,121 @@
 /*
  * Copyright (C) 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
-#include "linux/stddef.h"
-#include "linux/errno.h"
 #include "linux/personality.h"
 #include "linux/ptrace.h"
-#include "asm/current.h"
+#include "asm/unistd.h"
 #include "asm/uaccess.h"
-#include "asm/sigcontext.h"
-#include "asm/ptrace.h"
-#include "asm/arch/ucontext.h"
-#include "choose-mode.h"
-#include "sysdep/ptrace.h"
+#include "asm/ucontext.h"
 #include "frame_kern.h"
-
-#ifdef CONFIG_MODE_SKAS
-
 #include "skas.h"
 
-void copy_sc(union uml_pt_regs *regs, void *from)
+void copy_sc(struct uml_pt_regs *regs, void *from)
 {
 	struct sigcontext *sc = from;
 
-#define GETREG(regs, regno, sc, regname) \
-       (regs)->skas.regs[(regno) / sizeof(unsigned long)] = (sc)->regname
-
-       GETREG(regs, R8, sc, r8);
-       GETREG(regs, R9, sc, r9);
-       GETREG(regs, R10, sc, r10);
-       GETREG(regs, R11, sc, r11);
-       GETREG(regs, R12, sc, r12);
-       GETREG(regs, R13, sc, r13);
-       GETREG(regs, R14, sc, r14);
-       GETREG(regs, R15, sc, r15);
-       GETREG(regs, RDI, sc, rdi);
-       GETREG(regs, RSI, sc, rsi);
-       GETREG(regs, RBP, sc, rbp);
-       GETREG(regs, RBX, sc, rbx);
-       GETREG(regs, RDX, sc, rdx);
-       GETREG(regs, RAX, sc, rax);
-       GETREG(regs, RCX, sc, rcx);
-       GETREG(regs, RSP, sc, rsp);
-       GETREG(regs, RIP, sc, rip);
-       GETREG(regs, EFLAGS, sc, eflags);
-       GETREG(regs, CS, sc, cs);
+#define GETREG(regs, regno, sc, regname)				\
+	(regs)->gp[(regno) / sizeof(unsigned long)] = (sc)->regname
+
+	GETREG(regs, R8, sc, r8);
+	GETREG(regs, R9, sc, r9);
+	GETREG(regs, R10, sc, r10);
+	GETREG(regs, R11, sc, r11);
+	GETREG(regs, R12, sc, r12);
+	GETREG(regs, R13, sc, r13);
+	GETREG(regs, R14, sc, r14);
+	GETREG(regs, R15, sc, r15);
+	GETREG(regs, RDI, sc, rdi);
+	GETREG(regs, RSI, sc, rsi);
+	GETREG(regs, RBP, sc, rbp);
+	GETREG(regs, RBX, sc, rbx);
+	GETREG(regs, RDX, sc, rdx);
+	GETREG(regs, RAX, sc, rax);
+	GETREG(regs, RCX, sc, rcx);
+	GETREG(regs, RSP, sc, rsp);
+	GETREG(regs, RIP, sc, rip);
+	GETREG(regs, EFLAGS, sc, eflags);
+	GETREG(regs, CS, sc, cs);
 
 #undef GETREG
 }
 
-static int copy_sc_from_user_skas(struct pt_regs *regs,
-                                 struct sigcontext __user *from)
+static int copy_sc_from_user(struct pt_regs *regs,
+			     struct sigcontext __user *from,
+			     struct _fpstate __user *fpp)
 {
-       int err = 0;
-
-#define GETREG(regs, regno, sc, regname) \
-       __get_user((regs)->regs.skas.regs[(regno) / sizeof(unsigned long)], \
-                  &(sc)->regname)
-
-       err |= GETREG(regs, R8, from, r8);
-       err |= GETREG(regs, R9, from, r9);
-       err |= GETREG(regs, R10, from, r10);
-       err |= GETREG(regs, R11, from, r11);
-       err |= GETREG(regs, R12, from, r12);
-       err |= GETREG(regs, R13, from, r13);
-       err |= GETREG(regs, R14, from, r14);
-       err |= GETREG(regs, R15, from, r15);
-       err |= GETREG(regs, RDI, from, rdi);
-       err |= GETREG(regs, RSI, from, rsi);
-       err |= GETREG(regs, RBP, from, rbp);
-       err |= GETREG(regs, RBX, from, rbx);
-       err |= GETREG(regs, RDX, from, rdx);
-       err |= GETREG(regs, RAX, from, rax);
-       err |= GETREG(regs, RCX, from, rcx);
-       err |= GETREG(regs, RSP, from, rsp);
-       err |= GETREG(regs, RIP, from, rip);
-       err |= GETREG(regs, EFLAGS, from, eflags);
-       err |= GETREG(regs, CS, from, cs);
+	struct user_i387_struct fp;
+	int err = 0;
+
+#define GETREG(regs, regno, sc, regname)				\
+	__get_user((regs)->regs.gp[(regno) / sizeof(unsigned long)],	\
+		   &(sc)->regname)
+
+	err |= GETREG(regs, R8, from, r8);
+	err |= GETREG(regs, R9, from, r9);
+	err |= GETREG(regs, R10, from, r10);
+	err |= GETREG(regs, R11, from, r11);
+	err |= GETREG(regs, R12, from, r12);
+	err |= GETREG(regs, R13, from, r13);
+	err |= GETREG(regs, R14, from, r14);
+	err |= GETREG(regs, R15, from, r15);
+	err |= GETREG(regs, RDI, from, rdi);
+	err |= GETREG(regs, RSI, from, rsi);
+	err |= GETREG(regs, RBP, from, rbp);
+	err |= GETREG(regs, RBX, from, rbx);
+	err |= GETREG(regs, RDX, from, rdx);
+	err |= GETREG(regs, RAX, from, rax);
+	err |= GETREG(regs, RCX, from, rcx);
+	err |= GETREG(regs, RSP, from, rsp);
+	err |= GETREG(regs, RIP, from, rip);
+	err |= GETREG(regs, EFLAGS, from, eflags);
+	err |= GETREG(regs, CS, from, cs);
+	if (err)
+		return 1;
 
 #undef GETREG
 
-       return err;
+	err = copy_from_user(&fp, fpp, sizeof(struct user_i387_struct));
+	if (err)
+		return 1;
+
+	err = restore_fp_registers(userspace_pid[current_thread->cpu],
+				   (unsigned long *) &fp);
+	if (err < 0) {
+		printk(KERN_ERR "copy_sc_from_user - "
+		       "restore_fp_registers failed, errno = %d\n",
+		       -err);
+		return 1;
+	}
+
+	return 0;
 }
 
-int copy_sc_to_user_skas(struct sigcontext __user *to,
-			 struct _fpstate __user *to_fp,
-			 struct pt_regs *regs, unsigned long mask,
-			 unsigned long sp)
+static int copy_sc_to_user(struct sigcontext __user *to,
+			   struct _fpstate __user *to_fp, struct pt_regs *regs,
+			   unsigned long mask, unsigned long sp)
 {
-        struct faultinfo * fi = &current->thread.arch.faultinfo;
+	struct faultinfo * fi = &current->thread.arch.faultinfo;
+	struct user_i387_struct fp;
 	int err = 0;
 
 	err |= __put_user(0, &to->gs);
 	err |= __put_user(0, &to->fs);
 
-#define PUTREG(regs, regno, sc, regname) \
-       __put_user((regs)->regs.skas.regs[(regno) / sizeof(unsigned long)], \
-                  &(sc)->regname)
+#define PUTREG(regs, regno, sc, regname)				\
+	__put_user((regs)->regs.gp[(regno) / sizeof(unsigned long)],	\
+		   &(sc)->regname)
 
 	err |= PUTREG(regs, RDI, to, rdi);
 	err |= PUTREG(regs, RSI, to, rsi);
 	err |= PUTREG(regs, RBP, to, rbp);
-        /* Must use orignal RSP, which is passed in, rather than what's in
-         * the pt_regs, because that's already been updated to point at the
-         * signal frame.
-         */
+	/*
+	 * Must use orignal RSP, which is passed in, rather than what's in
+	 * the pt_regs, because that's already been updated to point at the
+	 * signal frame.
+	 */
 	err |= __put_user(sp, &to->rsp);
 	err |= PUTREG(regs, RBX, to, rbx);
 	err |= PUTREG(regs, RDX, to, rdx);
@@ -121,91 +131,38 @@ int copy_sc_to_user_skas(struct sigcontext __user *to,
 	err |= PUTREG(regs, R15, to, r15);
 	err |= PUTREG(regs, CS, to, cs); /* XXX x86_64 doesn't do this */
 
-        err |= __put_user(fi->cr2, &to->cr2);
-        err |= __put_user(fi->error_code, &to->err);
-        err |= __put_user(fi->trap_no, &to->trapno);
+	err |= __put_user(fi->cr2, &to->cr2);
+	err |= __put_user(fi->error_code, &to->err);
+	err |= __put_user(fi->trap_no, &to->trapno);
 
 	err |= PUTREG(regs, RIP, to, rip);
 	err |= PUTREG(regs, EFLAGS, to, eflags);
 #undef PUTREG
 
 	err |= __put_user(mask, &to->oldmask);
-
-	return(err);
-}
-
-#endif
-
-#ifdef CONFIG_MODE_TT
-int copy_sc_from_user_tt(struct sigcontext *to, struct sigcontext __user *from,
-			 int fpsize)
-{
-	struct _fpstate *to_fp;
-	struct _fpstate __user *from_fp;
-	unsigned long sigs;
-	int err;
-
-	to_fp = to->fpstate;
-	sigs = to->oldmask;
-	err = copy_from_user(to, from, sizeof(*to));
-	from_fp = to->fpstate;
-	to->fpstate = to_fp;
-	to->oldmask = sigs;
-	if(to_fp != NULL)
-		err |= copy_from_user(to_fp, from_fp, fpsize);
-	return(err);
-}
-
-int copy_sc_to_user_tt(struct sigcontext __user *to, struct _fpstate __user *fp,
-		       struct sigcontext *from, int fpsize, unsigned long sp)
-{
-	struct _fpstate __user *to_fp;
-	struct _fpstate *from_fp;
-	int err;
-
-	to_fp = (fp ? fp : (struct _fpstate __user *) (to + 1));
-	from_fp = from->fpstate;
-	err = copy_to_user(to, from, sizeof(*to));
-	/* The SP in the sigcontext is the updated one for the signal
-	 * delivery.  The sp passed in is the original, and this needs
-	 * to be restored, so we stick it in separately.
-	 */
-	err |= copy_to_user(&SC_SP(to), &sp, sizeof(sp));
-
-	if(from_fp != NULL){
-		err |= copy_to_user(&to->fpstate, &to_fp, sizeof(to->fpstate));
-		err |= copy_to_user(to_fp, from_fp, fpsize);
+	if (err)
+		return 1;
+
+	err = save_fp_registers(userspace_pid[current_thread->cpu],
+				(unsigned long *) &fp);
+	if (err < 0) {
+		printk(KERN_ERR "copy_sc_from_user - restore_fp_registers "
+		       "failed, errno = %d\n", -err);
+		return 1;
 	}
-	return err;
-}
 
-#endif
-
-static int copy_sc_from_user(struct pt_regs *to, void __user *from)
-{
-       int ret;
-
-       ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from,
-                                              sizeof(struct _fpstate)),
-                         copy_sc_from_user_skas(to, from));
-       return(ret);
-}
+	if (copy_to_user(to_fp, &fp, sizeof(struct user_i387_struct)))
+		return 1;
 
-static int copy_sc_to_user(struct sigcontext __user *to,
-			   struct _fpstate __user *fp,
-			   struct pt_regs *from, unsigned long mask,
-			   unsigned long sp)
-{
-       return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs),
-                                             sizeof(*fp), sp),
-                          copy_sc_to_user_skas(to, fp, from, mask, sp)));
+	return err;
 }
 
 struct rt_sigframe
 {
-       char __user *pretcode;
-       struct ucontext uc;
-       struct siginfo info;
+	char __user *pretcode;
+	struct ucontext uc;
+	struct siginfo info;
+	struct _fpstate fpstate;
 };
 
 #define round_down(m, n) (((m) / (n)) * (n))
@@ -215,7 +172,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 			  siginfo_t *info, sigset_t *set)
 {
 	struct rt_sigframe __user *frame;
-	struct _fpstate __user *fp = NULL;
 	unsigned long save_sp = PT_REGS_RSP(regs);
 	int err = 0;
 	struct task_struct *me = current;
@@ -223,15 +179,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	frame = (struct rt_sigframe __user *)
 		round_down(stack_top - sizeof(struct rt_sigframe), 16);
 	/* Subtract 128 for a red zone and 8 for proper alignment */
-        frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
-
-	if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
-		goto out;
+	frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8);
 
-#if 0 /* XXX */
-	if (save_i387(fp) < 0)
-		err |= -1;
-#endif
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 		goto out;
 
@@ -241,7 +190,8 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 			goto out;
 	}
 
-	/* Update SP now because the page fault handler refuses to extend
+	/*
+	 * Update SP now because the page fault handler refuses to extend
 	 * the stack if the faulting address is too far below the current
 	 * SP, which frame now certainly is.  If there's an error, the original
 	 * value is restored on the way out.
@@ -258,9 +208,9 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	err |= __put_user(sas_ss_flags(save_sp),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= copy_sc_to_user(&frame->uc.uc_mcontext, fp, regs, set->sig[0],
-		save_sp);
-	err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
+	err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
+			       set->sig[0], save_sp);
+	err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
 	if (sizeof(*set) == 16) {
 		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
 		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
@@ -269,8 +219,10 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 		err |= __copy_to_user(&frame->uc.uc_sigmask, set,
 				      sizeof(*set));
 
-	/* Set up to return from userspace.  If provided, use a stub
-	   already in userspace.  */
+	/*
+	 * Set up to return from userspace.  If provided, use a stub
+	 * already in userspace.
+	 */
 	/* x86-64 should always use SA_RESTORER. */
 	if (ka->sa.sa_flags & SA_RESTORER)
 		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
@@ -292,8 +244,10 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	/* In case the signal handler was declared without prototypes */
 	PT_REGS_RAX(regs) = 0;
 
-	/* This also works for non SA_SIGINFO handlers because they expect the
-	   next argument after the signal number on the stack. */
+	/*
+	 * This also works for non SA_SIGINFO handlers because they expect the
+	 * next argument after the signal number on the stack.
+	 */
 	PT_REGS_RSI(regs) = (unsigned long) &frame->info;
 	PT_REGS_RDX(regs) = (unsigned long) &frame->uc;
 	PT_REGS_RIP(regs) = (unsigned long) ka->sa.sa_handler;
@@ -313,7 +267,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 	struct ucontext __user *uc = &frame->uc;
 	sigset_t set;
 
-	if(copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+	if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
 		goto segfault;
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
@@ -323,24 +277,15 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 
-	if(copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
+	if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext,
+			      &frame->fpstate))
 		goto segfault;
 
 	/* Avoid ERESTART handling */
 	PT_REGS_SYSCALL_NR(&current->thread.regs) = -1;
-	return(PT_REGS_SYSCALL_RET(&current->thread.regs));
+	return PT_REGS_SYSCALL_RET(&current->thread.regs);
 
  segfault:
 	force_sig(SIGSEGV, current);
 	return 0;
 }
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/arch/um/sys-x86_64/stub.S b/arch/um/sys-x86_64/stub.S
index 03c27973578..4afe204a6af 100644
--- a/arch/um/sys-x86_64/stub.S
+++ b/arch/um/sys-x86_64/stub.S
@@ -1,4 +1,5 @@
 #include "uml-config.h"
+#include "as-layout.h"
 
 	.globl syscall_stub
 .section .__syscall_stub, "x"
@@ -7,18 +8,18 @@ syscall_stub:
 	/* We don't have 64-bit constants, so this constructs the address
 	 * we need.
 	 */
-	movq	$(UML_CONFIG_STUB_DATA >> 32), %rbx
+	movq	$(ASM_STUB_DATA >> 32), %rbx
 	salq	$32, %rbx
-	movq	$(UML_CONFIG_STUB_DATA & 0xffffffff), %rcx
+	movq	$(ASM_STUB_DATA & 0xffffffff), %rcx
 	or	%rcx, %rbx
 	movq	%rax, (%rbx)
 	int3
 
 	.globl batch_syscall_stub
 batch_syscall_stub:
-	mov	$(UML_CONFIG_STUB_DATA >> 32), %rbx
+	mov	$(ASM_STUB_DATA >> 32), %rbx
 	sal	$32, %rbx
-	mov	$(UML_CONFIG_STUB_DATA & 0xffffffff), %rax
+	mov	$(ASM_STUB_DATA & 0xffffffff), %rax
 	or	%rax, %rbx
 	/* load pointer to first operation */
 	mov	%rbx, %rsp
diff --git a/arch/um/sys-x86_64/stub_segv.c b/arch/um/sys-x86_64/stub_segv.c
index 652fa34c2cd..3afb590f007 100644
--- a/arch/um/sys-x86_64/stub_segv.c
+++ b/arch/um/sys-x86_64/stub_segv.c
@@ -6,6 +6,7 @@
 #include <stddef.h>
 #include <signal.h>
 #include <asm/unistd.h>
+#include "as-layout.h"
 #include "uml-config.h"
 #include "sysdep/sigcontext.h"
 #include "sysdep/faultinfo.h"
@@ -33,7 +34,7 @@ stub_segv_handler(int sig)
         int pid;
 
 	__asm__ __volatile__("movq %%rdx, %0" : "=g" (uc) :);
-	GET_FAULTINFO_FROM_SC(*((struct faultinfo *) UML_CONFIG_STUB_DATA),
+	GET_FAULTINFO_FROM_SC(*((struct faultinfo *) STUB_DATA),
 			      &uc->uc_mcontext);
 
 	pid = stub_syscall0(__NR_getpid);
diff --git a/arch/um/sys-x86_64/syscalls.c b/arch/um/sys-x86_64/syscalls.c
index b3f6350cac4..86f6b18410e 100644
--- a/arch/um/sys-x86_64/syscalls.c
+++ b/arch/um/sys-x86_64/syscalls.c
@@ -1,70 +1,36 @@
 /*
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Copyright 2003 PathScale, Inc.
  *
  * Licensed under the GPL
  */
 
 #include "linux/linkage.h"
-#include "linux/slab.h"
-#include "linux/shm.h"
-#include "linux/utsname.h"
 #include "linux/personality.h"
-#include "asm/uaccess.h"
-#define __FRAME_OFFSETS
-#include "asm/ptrace.h"
-#include "asm/unistd.h"
+#include "linux/utsname.h"
 #include "asm/prctl.h" /* XXX This should get the constants from libc */
-#include "choose-mode.h"
-#include "kern.h"
+#include "asm/uaccess.h"
 #include "os.h"
 
 asmlinkage long sys_uname64(struct new_utsname __user * name)
 {
 	int err;
+
 	down_read(&uts_sem);
 	err = copy_to_user(name, utsname(), sizeof (*name));
 	up_read(&uts_sem);
+
 	if (personality(current->personality) == PER_LINUX32)
 		err |= copy_to_user(&name->machine, "i686", 5);
-	return err ? -EFAULT : 0;
-}
-
-#ifdef CONFIG_MODE_TT
-extern long arch_prctl(int code, unsigned long addr);
-
-static long arch_prctl_tt(int code, unsigned long addr)
-{
-	unsigned long tmp;
-	long ret;
-
-	switch(code){
-	case ARCH_SET_GS:
-	case ARCH_SET_FS:
-		ret = arch_prctl(code, addr);
-		break;
-	case ARCH_GET_FS:
-	case ARCH_GET_GS:
-		ret = arch_prctl(code, (unsigned long) &tmp);
-		if(!ret)
-			ret = put_user(tmp, (long __user *)addr);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
 
-	return(ret);
+	return err ? -EFAULT : 0;
 }
-#endif
-
-#ifdef CONFIG_MODE_SKAS
 
-long arch_prctl_skas(struct task_struct *task, int code,
-                     unsigned long __user *addr)
+long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
 {
-        unsigned long *ptr = addr, tmp;
+	unsigned long *ptr = addr, tmp;
 	long ret;
-	int pid = task->mm->context.skas.id.u.pid;
+	int pid = task->mm->context.id.u.pid;
 
 	/*
 	 * With ARCH_SET_FS (and ARCH_SET_GS is treated similarly to
@@ -79,52 +45,50 @@ long arch_prctl_skas(struct task_struct *task, int code,
 	 * arch_prctl is run on the host, then the registers are read
 	 * back.
 	 */
-	switch(code){
+	switch (code) {
 	case ARCH_SET_FS:
 	case ARCH_SET_GS:
-                restore_registers(pid, &current->thread.regs.regs);
-                break;
-        case ARCH_GET_FS:
-        case ARCH_GET_GS:
-                /*
-                 * With these two, we read to a local pointer and
-                 * put_user it to the userspace pointer that we were
-                 * given.  If addr isn't valid (because it hasn't been
-                 * faulted in or is just bogus), we want put_user to
-                 * fault it in (or return -EFAULT) instead of having
-                 * the host return -EFAULT.
-                 */
-                ptr = &tmp;
-        }
+		restore_registers(pid, &current->thread.regs.regs);
+		break;
+	case ARCH_GET_FS:
+	case ARCH_GET_GS:
+		/*
+		 * With these two, we read to a local pointer and
+		 * put_user it to the userspace pointer that we were
+		 * given.  If addr isn't valid (because it hasn't been
+		 * faulted in or is just bogus), we want put_user to
+		 * fault it in (or return -EFAULT) instead of having
+		 * the host return -EFAULT.
+		 */
+		ptr = &tmp;
+	}
 
-        ret = os_arch_prctl(pid, code, ptr);
-        if(ret)
-                return ret;
+	ret = os_arch_prctl(pid, code, ptr);
+	if (ret)
+		return ret;
 
-        switch(code){
+	switch (code) {
 	case ARCH_SET_FS:
 		current->thread.arch.fs = (unsigned long) ptr;
 		save_registers(pid, &current->thread.regs.regs);
 		break;
 	case ARCH_SET_GS:
-                save_registers(pid, &current->thread.regs.regs);
+		save_registers(pid, &current->thread.regs.regs);
 		break;
 	case ARCH_GET_FS:
 		ret = put_user(tmp, addr);
-	        break;
+		break;
 	case ARCH_GET_GS:
 		ret = put_user(tmp, addr);
-	        break;
+		break;
 	}
 
 	return ret;
 }
-#endif
 
 long sys_arch_prctl(int code, unsigned long addr)
 {
-	return CHOOSE_MODE_PROC(arch_prctl_tt, arch_prctl_skas, current, code,
-                                (unsigned long __user *) addr);
+	return arch_prctl(current, code, (unsigned long __user *) addr);
 }
 
 long sys_clone(unsigned long clone_flags, unsigned long newsp,
@@ -141,10 +105,10 @@ long sys_clone(unsigned long clone_flags, unsigned long newsp,
 	return ret;
 }
 
-void arch_switch_to_skas(struct task_struct *from, struct task_struct *to)
+void arch_switch_to(struct task_struct *from, struct task_struct *to)
 {
-        if((to->thread.arch.fs == 0) || (to->mm == NULL))
-                return;
+	if ((to->thread.arch.fs == 0) || (to->mm == NULL))
+		return;
 
-        arch_prctl_skas(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs);
+	arch_prctl(to, ARCH_SET_FS, (void __user *) to->thread.arch.fs);
 }
diff --git a/arch/um/sys-x86_64/tls.c b/arch/um/sys-x86_64/tls.c
index febbc94be25..f7ba46200ec 100644
--- a/arch/um/sys-x86_64/tls.c
+++ b/arch/um/sys-x86_64/tls.c
@@ -11,7 +11,7 @@ int arch_copy_tls(struct task_struct *t)
 	 * (which is argument 5, child_tid, of clone) so it can be set
 	 * during context switches.
 	 */
-	t->thread.arch.fs = t->thread.regs.regs.skas.regs[R8 / sizeof(long)];
+	t->thread.arch.fs = t->thread.regs.regs.gp[R8 / sizeof(long)];
 
-        return 0;
+	return 0;
 }
diff --git a/arch/um/sys-x86_64/unmap.c b/arch/um/sys-x86_64/unmap.c
deleted file mode 100644
index f4a4bffd8a1..00000000000
--- a/arch/um/sys-x86_64/unmap.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <linux/mman.h>
-#include <asm/unistd.h>
-
-static int errno;
-
-static inline _syscall2(int,munmap,void *,start,size_t,len)
-static inline _syscall6(void *,mmap,void *,addr,size_t,len,int,prot,int,flags,int,fd,off_t,offset)
-int switcheroo(int fd, int prot, void *from, void *to, int size)
-{
-	if(munmap(to, size) < 0){
-		return(-1);
-	}
-	if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1){
-		return(-1);
-	}
-	if(munmap(from, size) < 0){
-		return(-1);
-	}
-	return(0);
-}
diff --git a/arch/um/sys-x86_64/user-offsets.c b/arch/um/sys-x86_64/user-offsets.c
index 0d5fd764c21..f1ef2a8dfbc 100644
--- a/arch/um/sys-x86_64/user-offsets.c
+++ b/arch/um/sys-x86_64/user-offsets.c
@@ -3,17 +3,10 @@
 #include <signal.h>
 #include <sys/poll.h>
 #include <sys/mman.h>
+#include <sys/user.h>
 #define __FRAME_OFFSETS
 #include <asm/ptrace.h>
 #include <asm/types.h>
-/* For some reason, x86_64 defines u64 and u32 only in <pci/types.h>, which I
- * refuse to include here, even though they're used throughout the headers.
- * These are used in asm/user.h, and that include can't be avoided because of
- * the sizeof(struct user_regs_struct) below.
- */
-typedef __u64 u64;
-typedef __u32 u32;
-#include <asm/user.h>
 
 #define DEFINE(sym, val) \
         asm volatile("\n->" #sym " %0 " #val : : "i" (val))
diff --git a/arch/v850/kernel/fpga85e2c.c b/arch/v850/kernel/fpga85e2c.c
index 5c4923558a7..ab9cf16a85c 100644
--- a/arch/v850/kernel/fpga85e2c.c
+++ b/arch/v850/kernel/fpga85e2c.c
@@ -160,5 +160,8 @@ static void make_reg_snap (int irq, void *dummy, struct pt_regs *regs)
 
 static int reg_snap_dev_id;
 static struct irqaction reg_snap_action = {
-	make_reg_snap, 0, CPU_MASK_NONE, "reg_snap", &reg_snap_dev_id, 0
+	.handler = make_reg_snap,
+	.mask = CPU_MASK_NONE,
+	.name = "reg_snap",
+	.dev_id = &reg_snap_dev_id,
 };
diff --git a/arch/v850/kernel/time.c b/arch/v850/kernel/time.c
index f0905b03523..d810c93fe66 100644
--- a/arch/v850/kernel/time.c
+++ b/arch/v850/kernel/time.c
@@ -92,12 +92,11 @@ static irqreturn_t timer_interrupt (int irq, void *dummy, struct pt_regs *regs)
 
 static int timer_dev_id;
 static struct irqaction timer_irqaction = {
-	timer_interrupt,
-	IRQF_DISABLED,
-	CPU_MASK_NONE,
-	"timer",
-	&timer_dev_id,
-	NULL
+	.handler = timer_interrupt,
+	.flags = IRQF_DISABLED,
+	.mask = CPU_MASK_NONE,
+	.name = "timer",
+	.dev_id = &timer_dev_id,
 };
 
 void time_init (void)
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ffd01e5dcb5..2ca43ba32bc 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -595,7 +595,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	dmi_check_system(sw_any_bug_dmi_table);
 	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
 		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
-		policy->cpus = cpu_core_map[cpu];
+		policy->cpus = per_cpu(cpu_core_map, cpu);
 	}
 #endif
 
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 8eb414b906d..793eae854f4 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -200,7 +200,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy)
 	unsigned int i;
 
 #ifdef CONFIG_SMP
-	policy->cpus = cpu_sibling_map[policy->cpu];
+	policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
 #endif
 
 	/* Errata workaround */
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index b273b69cfdd..c06ac680c9c 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -57,7 +57,7 @@ static struct powernow_k8_data *powernow_data[NR_CPUS];
 static int cpu_family = CPU_OPTERON;
 
 #ifndef CONFIG_SMP
-static cpumask_t cpu_core_map[1];
+DEFINE_PER_CPU(cpumask_t, cpu_core_map);
 #endif
 
 /* Return a frequency in MHz, given an input fid */
@@ -667,7 +667,7 @@ static int fill_powernow_table(struct powernow_k8_data *data, struct pst_s *pst,
 
 	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
 	data->powernow_table = powernow_table;
-	if (first_cpu(cpu_core_map[data->cpu]) == data->cpu)
+	if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
 		print_basics(data);
 
 	for (j = 0; j < data->numps; j++)
@@ -821,7 +821,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 
 	/* fill in data */
 	data->numps = data->acpi_data.state_count;
-	if (first_cpu(cpu_core_map[data->cpu]) == data->cpu)
+	if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
 		print_basics(data);
 	powernow_k8_acpi_pst_values(data, 0);
 
@@ -1214,7 +1214,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	if (cpu_family == CPU_HW_PSTATE)
 		pol->cpus = cpumask_of_cpu(pol->cpu);
 	else
-		pol->cpus = cpu_core_map[pol->cpu];
+		pol->cpus = per_cpu(cpu_core_map, pol->cpu);
 	data->available_cores = &(pol->cpus);
 
 	/* Take a crude guess here.
@@ -1281,7 +1281,7 @@ static unsigned int powernowk8_get (unsigned int cpu)
 	cpumask_t oldmask = current->cpus_allowed;
 	unsigned int khz = 0;
 
-	data = powernow_data[first_cpu(cpu_core_map[cpu])];
+	data = powernow_data[first_cpu(per_cpu(cpu_core_map, cpu))];
 
 	if (!data)
 		return -EINVAL;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 36685e8f7be..14d68aa301e 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -322,7 +322,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy)
 
 	/* only run on CPU to be set, or on its sibling */
 #ifdef CONFIG_SMP
-	policy->cpus = cpu_sibling_map[policy->cpu];
+	policy->cpus = per_cpu(cpu_sibling_map, policy->cpu);
 #endif
 
 	cpus_allowed = current->cpus_allowed;
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 1e31b6caffb..879a0f789b1 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -122,7 +122,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #ifdef CONFIG_X86_HT
 	if (c->x86_max_cores * smp_num_siblings > 1) {
 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n]));
+		seq_printf(m, "siblings\t: %d\n",
+				cpus_weight(per_cpu(cpu_core_map, n)));
 		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
 		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index e2f4a1c6854..4ee1e5ee9b5 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -378,7 +378,7 @@ static struct irq_cpu_info {
 
 #define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
 
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i]))
+#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
 
 static cpumask_t balance_irq_affinity[NR_IRQS] = {
 	[0 ... NR_IRQS-1] = CPU_MASK_ALL
@@ -598,7 +598,7 @@ tryanotherirq:
 	 * (A+B)/2 vs B
 	 */
 	load = CPU_IRQ(min_loaded) >> 1;
-	for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) {
+	for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
 		if (load > CPU_IRQ(j)) {
 			/* This won't change cpu_sibling_map[min_loaded] */
 			load = CPU_IRQ(j);
diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index e7d0d3c2ef6..90f778c04b3 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -41,6 +41,13 @@ void jprobe_return_end(void);
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+struct kretprobe_blackpoint kretprobe_blacklist[] = {
+	{"__switch_to", }, /* This function switches only current task, but
+			     doesn't switch kernel stack.*/
+	{NULL, NULL}	/* Terminator */
+};
+const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
+
 /* insert a jmp code */
 static __always_inline void set_jmp_op(void *from, void *to)
 {
@@ -584,7 +591,7 @@ out:
 	return 1;
 }
 
-static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -666,7 +673,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 			ret = NOTIFY_STOP;
 		break;
 	case DIE_GPF:
-	case DIE_PAGE_FAULT:
 		/* kprobe_running() needs smp_processor_id() */
 		preempt_disable();
 		if (kprobe_running() &&
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index 62e28e52d78..681b801c5e2 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -48,6 +48,13 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p);
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
+struct kretprobe_blackpoint kretprobe_blacklist[] = {
+	{"__switch_to", }, /* This function switches only current task, but
+			      doesn't switch kernel stack.*/
+	{NULL, NULL}	/* Terminator */
+};
+const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
+
 /*
  * returns non-zero if opcode modifies the interrupt flag.
  */
@@ -657,7 +664,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 			ret = NOTIFY_STOP;
 		break;
 	case DIE_GPF:
-	case DIE_PAGE_FAULT:
 		/* kprobe_running() needs smp_processor_id() */
 		preempt_disable();
 		if (kprobe_running() &&
diff --git a/arch/x86/kernel/mce_amd_64.c b/arch/x86/kernel/mce_amd_64.c
index 2f8a7f18b0f..805b62b1e0d 100644
--- a/arch/x86/kernel/mce_amd_64.c
+++ b/arch/x86/kernel/mce_amd_64.c
@@ -472,7 +472,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
 #ifdef CONFIG_SMP
 	if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) {	/* symlink */
-		i = first_cpu(cpu_core_map[cpu]);
+		i = first_cpu(per_cpu(cpu_core_map, cpu));
 
 		/* first core not up yet */
 		if (cpu_data[i].cpu_core_id)
@@ -492,7 +492,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		if (err)
 			goto out;
 
-		b->cpus = cpu_core_map[cpu];
+		b->cpus = per_cpu(cpu_core_map, cpu);
 		per_cpu(threshold_banks, cpu)[bank] = b;
 		goto out;
 	}
@@ -509,7 +509,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 #ifndef CONFIG_SMP
 	b->cpus = CPU_MASK_ALL;
 #else
-	b->cpus = cpu_core_map[cpu];
+	b->cpus = per_cpu(cpu_core_map, cpu);
 #endif
 	err = kobject_register(&b->kobj);
 	if (err)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 71da01e73f0..a50b787b3bf 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -35,6 +35,7 @@
 #include <linux/pci_ids.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/scatterlist.h>
 #include <asm/iommu.h>
 #include <asm/calgary.h>
 #include <asm/tce.h>
@@ -384,31 +385,32 @@ static void calgary_unmap_sg(struct device *dev,
 	struct scatterlist *sglist, int nelems, int direction)
 {
 	struct iommu_table *tbl = find_iommu_table(dev);
+	struct scatterlist *s;
+	int i;
 
 	if (!translate_phb(to_pci_dev(dev)))
 		return;
 
-	while (nelems--) {
+	for_each_sg(sglist, s, nelems, i) {
 		unsigned int npages;
-		dma_addr_t dma = sglist->dma_address;
-		unsigned int dmalen = sglist->dma_length;
+		dma_addr_t dma = s->dma_address;
+		unsigned int dmalen = s->dma_length;
 
 		if (dmalen == 0)
 			break;
 
 		npages = num_dma_pages(dma, dmalen);
 		iommu_free(tbl, dma, npages);
-		sglist++;
 	}
 }
 
 static int calgary_nontranslate_map_sg(struct device* dev,
 	struct scatterlist *sg, int nelems, int direction)
 {
+	struct scatterlist *s;
 	int i;
 
-	for (i = 0; i < nelems; i++ ) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nelems, i) {
 		BUG_ON(!s->page);
 		s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
 		s->dma_length = s->length;
@@ -420,6 +422,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
 	int nelems, int direction)
 {
 	struct iommu_table *tbl = find_iommu_table(dev);
+	struct scatterlist *s;
 	unsigned long vaddr;
 	unsigned int npages;
 	unsigned long entry;
@@ -428,8 +431,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
 	if (!translate_phb(to_pci_dev(dev)))
 		return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
 
-	for (i = 0; i < nelems; i++ ) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nelems, i) {
 		BUG_ON(!s->page);
 
 		vaddr = (unsigned long)page_address(s->page) + s->offset;
@@ -454,9 +456,9 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
 	return nelems;
 error:
 	calgary_unmap_sg(dev, sg, nelems, direction);
-	for (i = 0; i < nelems; i++) {
-		sg[i].dma_address = bad_dma_address;
-		sg[i].dma_length = 0;
+	for_each_sg(sg, s, nelems, i) {
+		sg->dma_address = bad_dma_address;
+		sg->dma_length = 0;
 	}
 	return 0;
 }
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 4918c575d58..cfcc84e6c35 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/bitops.h>
 #include <linux/kdebug.h>
+#include <linux/scatterlist.h>
 #include <asm/atomic.h>
 #include <asm/io.h>
 #include <asm/mtrr.h>
@@ -278,10 +279,10 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
  */
 static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 {
+	struct scatterlist *s;
 	int i;
 
-	for (i = 0; i < nents; i++) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nents, i) {
 		if (!s->dma_length || !s->length)
 			break;
 		gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
@@ -292,14 +293,14 @@ static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
 static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 			       int nents, int dir)
 {
+	struct scatterlist *s;
 	int i;
 
 #ifdef CONFIG_IOMMU_DEBUG
 	printk(KERN_DEBUG "dma_map_sg overflow\n");
 #endif
 
- 	for (i = 0; i < nents; i++ ) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nents, i) {
 		unsigned long addr = page_to_phys(s->page) + s->offset; 
 		if (nonforced_iommu(dev, addr, s->length)) { 
 			addr = dma_map_area(dev, addr, s->length, dir);
@@ -319,23 +320,23 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 }
 
 /* Map multiple scatterlist entries continuous into the first. */
-static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
+static int __dma_map_cont(struct scatterlist *start, int nelems,
 		      struct scatterlist *sout, unsigned long pages)
 {
 	unsigned long iommu_start = alloc_iommu(pages);
 	unsigned long iommu_page = iommu_start; 
+	struct scatterlist *s;
 	int i;
 
 	if (iommu_start == -1)
 		return -1;
-	
-	for (i = start; i < stopat; i++) {
-		struct scatterlist *s = &sg[i];
+
+	for_each_sg(start, s, nelems, i) {
 		unsigned long pages, addr;
 		unsigned long phys_addr = s->dma_address;
 		
-		BUG_ON(i > start && s->offset);
-		if (i == start) {
+		BUG_ON(s != start && s->offset);
+		if (s == start) {
 			*sout = *s; 
 			sout->dma_address = iommu_bus_base;
 			sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
@@ -357,17 +358,17 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
 	return 0;
 }
 
-static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
+static inline int dma_map_cont(struct scatterlist *start, int nelems,
 		      struct scatterlist *sout,
 		      unsigned long pages, int need)
 {
-	if (!need) { 
-		BUG_ON(stopat - start != 1);
-		*sout = sg[start]; 
-		sout->dma_length = sg[start].length; 
+	if (!need) {
+		BUG_ON(nelems != 1);
+		*sout = *start;
+		sout->dma_length = start->length;
 		return 0;
-	} 
-	return __dma_map_cont(sg, start, stopat, sout, pages);
+	}
+	return __dma_map_cont(start, nelems, sout, pages);
 }
 		
 /*
@@ -381,6 +382,7 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 	int start;
 	unsigned long pages = 0;
 	int need = 0, nextneed;
+	struct scatterlist *s, *ps, *start_sg, *sgmap;
 
 	if (nents == 0) 
 		return 0;
@@ -390,8 +392,9 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 
 	out = 0;
 	start = 0;
-	for (i = 0; i < nents; i++) {
-		struct scatterlist *s = &sg[i];
+	start_sg = sgmap = sg;
+	ps = NULL; /* shut up gcc */
+	for_each_sg(sg, s, nents, i) {
 		dma_addr_t addr = page_to_phys(s->page) + s->offset;
 		s->dma_address = addr;
 		BUG_ON(s->length == 0); 
@@ -400,29 +403,33 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 
 		/* Handle the previous not yet processed entries */
 		if (i > start) {
-			struct scatterlist *ps = &sg[i-1];
 			/* Can only merge when the last chunk ends on a page 
 			   boundary and the new one doesn't have an offset. */
 			if (!iommu_merge || !nextneed || !need || s->offset ||
-			    (ps->offset + ps->length) % PAGE_SIZE) { 
-				if (dma_map_cont(sg, start, i, sg+out, pages,
-						 need) < 0)
+			    (ps->offset + ps->length) % PAGE_SIZE) {
+				if (dma_map_cont(start_sg, i - start, sgmap,
+						  pages, need) < 0)
 					goto error;
 				out++;
+				sgmap = sg_next(sgmap);
 				pages = 0;
-				start = i;	
+				start = i;
+				start_sg = s;
 			}
 		}
 
 		need = nextneed;
 		pages += to_pages(s->offset, s->length);
+		ps = s;
 	}
-	if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
+	if (dma_map_cont(start_sg, i - start, sgmap, pages, need) < 0)
 		goto error;
 	out++;
 	flush_gart();
-	if (out < nents) 
-		sg[out].dma_length = 0; 
+	if (out < nents) {
+		sgmap = sg_next(sgmap);
+		sgmap->dma_length = 0;
+	}
 	return out;
 
 error:
@@ -437,8 +444,8 @@ error:
 	if (panic_on_overflow)
 		panic("dma_map_sg: overflow on %lu pages\n", pages);
 	iommu_full(dev, pages << PAGE_SHIFT, dir);
-	for (i = 0; i < nents; i++)
-		sg[i].dma_address = bad_dma_address;
+	for_each_sg(sg, s, nents, i)
+		s->dma_address = bad_dma_address;
 	return 0;
 } 
 
diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c
index 2a34c6c025a..e85d4360360 100644
--- a/arch/x86/kernel/pci-nommu_64.c
+++ b/arch/x86/kernel/pci-nommu_64.c
@@ -5,6 +5,7 @@
 #include <linux/pci.h>
 #include <linux/string.h>
 #include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
 
 #include <asm/iommu.h>
 #include <asm/processor.h>
@@ -57,10 +58,10 @@ static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
 static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
 	       int nents, int direction)
 {
+	struct scatterlist *s;
 	int i;
 
- 	for (i = 0; i < nents; i++ ) {
-		struct scatterlist *s = &sg[i];
+	for_each_sg(sg, s, nents, i) {
 		BUG_ON(!s->page);
 		s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
 		if (!check_addr("map_sg", hwdev, s->dma_address, s->length))
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7352d4b377e..6309b275cb9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -581,7 +581,7 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
  *
  * Kprobes not supported here. Set the probe on schedule instead.
  */
-__kprobes struct task_struct *
+struct task_struct *
 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
diff --git a/arch/x86/kernel/ptrace_32.c b/arch/x86/kernel/ptrace_32.c
index 0cecd7513c9..8622b9cd3e3 100644
--- a/arch/x86/kernel/ptrace_32.c
+++ b/arch/x86/kernel/ptrace_32.c
@@ -524,11 +524,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = 0;
 		break;
 
-	case PTRACE_DETACH:
-		/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
 	  	if (!access_ok(VERIFY_WRITE, datap, FRAME_SIZE*sizeof(long))) {
 			ret = -EIO;
diff --git a/arch/x86/kernel/ptrace_64.c b/arch/x86/kernel/ptrace_64.c
index c0cac42df3b..86321ee6da9 100644
--- a/arch/x86/kernel/ptrace_64.c
+++ b/arch/x86/kernel/ptrace_64.c
@@ -500,11 +500,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = 0;
 		break;
 
-	case PTRACE_DETACH:
-		/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
 	case PTRACE_GETREGS: { /* Get all gp regs from the child. */
 	  	if (!access_ok(VERIFY_WRITE, (unsigned __user *)data,
 			       sizeof(struct user_regs_struct))) {
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index b7da90e79c7..85b5b6310ac 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -1070,7 +1070,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	if (smp_num_siblings * c->x86_max_cores > 1) {
 		int cpu = c - cpu_data;
 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
+		seq_printf(m, "siblings\t: %d\n",
+			       cpus_weight(per_cpu(cpu_core_map, cpu)));
 		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
 		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index e4f61d1c624..31fc08bd15e 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -70,12 +70,12 @@ EXPORT_SYMBOL(smp_num_siblings);
 int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
 
 /* representing HT siblings of each logical CPU */
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 
 /* representing HT and core siblings of each logical CPU */
-cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
 /* bitmap of online cpus */
 cpumask_t cpu_online_map __read_mostly;
@@ -300,7 +300,7 @@ cpumask_t cpu_coregroup_map(int cpu)
 	 * And for power savings, we return cpu_core_map
 	 */
 	if (sched_mc_power_savings || sched_smt_power_savings)
-		return cpu_core_map[cpu];
+		return per_cpu(cpu_core_map, cpu);
 	else
 		return c->llc_shared_map;
 }
@@ -319,22 +319,22 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
 			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
 			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
-				cpu_set(i, cpu_sibling_map[cpu]);
-				cpu_set(cpu, cpu_sibling_map[i]);
-				cpu_set(i, cpu_core_map[cpu]);
-				cpu_set(cpu, cpu_core_map[i]);
+				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+				cpu_set(i, per_cpu(cpu_core_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_core_map, i));
 				cpu_set(i, c[cpu].llc_shared_map);
 				cpu_set(cpu, c[i].llc_shared_map);
 			}
 		}
 	} else {
-		cpu_set(cpu, cpu_sibling_map[cpu]);
+		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 	}
 
 	cpu_set(cpu, c[cpu].llc_shared_map);
 
 	if (current_cpu_data.x86_max_cores == 1) {
-		cpu_core_map[cpu] = cpu_sibling_map[cpu];
+		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
 		c[cpu].booted_cores = 1;
 		return;
 	}
@@ -346,17 +346,17 @@ void __cpuinit set_cpu_sibling_map(int cpu)
 			cpu_set(cpu, c[i].llc_shared_map);
 		}
 		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
-			cpu_set(i, cpu_core_map[cpu]);
-			cpu_set(cpu, cpu_core_map[i]);
+			cpu_set(i, per_cpu(cpu_core_map, cpu));
+			cpu_set(cpu, per_cpu(cpu_core_map, i));
 			/*
 			 *  Does this new cpu bringup a new core?
 			 */
-			if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
 				/*
 				 * for each core in package, increment
 				 * the booted_cores for this new cpu
 				 */
-				if (first_cpu(cpu_sibling_map[i]) == i)
+				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
 					c[cpu].booted_cores++;
 				/*
 				 * increment the core count for all
@@ -983,8 +983,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 			printk(KERN_NOTICE "Local APIC not detected."
 					   " Using dummy APIC emulation.\n");
 		map_cpu_to_logical_apicid();
-		cpu_set(0, cpu_sibling_map[0]);
-		cpu_set(0, cpu_core_map[0]);
+		cpu_set(0, per_cpu(cpu_sibling_map, 0));
+		cpu_set(0, per_cpu(cpu_core_map, 0));
 		return;
 	}
 
@@ -1008,8 +1008,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
 		smpboot_clear_io_apic_irqs();
 		phys_cpu_present_map = physid_mask_of_physid(0);
-		cpu_set(0, cpu_sibling_map[0]);
-		cpu_set(0, cpu_core_map[0]);
+		cpu_set(0, per_cpu(cpu_sibling_map, 0));
+		cpu_set(0, per_cpu(cpu_core_map, 0));
 		return;
 	}
 
@@ -1023,8 +1023,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
 		smpboot_clear_io_apic_irqs();
 		phys_cpu_present_map = physid_mask_of_physid(0);
-		cpu_set(0, cpu_sibling_map[0]);
-		cpu_set(0, cpu_core_map[0]);
+		cpu_set(0, per_cpu(cpu_sibling_map, 0));
+		cpu_set(0, per_cpu(cpu_core_map, 0));
 		return;
 	}
 
@@ -1102,16 +1102,16 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	Dprintk("Boot done.\n");
 
 	/*
-	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
+	 * construct cpu_sibling_map, so that we can tell sibling CPUs
 	 * efficiently.
 	 */
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-		cpus_clear(cpu_sibling_map[cpu]);
-		cpus_clear(cpu_core_map[cpu]);
+		cpus_clear(per_cpu(cpu_sibling_map, cpu));
+		cpus_clear(per_cpu(cpu_core_map, cpu));
 	}
 
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
+	cpu_set(0, per_cpu(cpu_sibling_map, 0));
+	cpu_set(0, per_cpu(cpu_core_map, 0));
 
 	smpboot_setup_io_apic();
 
@@ -1148,19 +1148,19 @@ void remove_siblinginfo(int cpu)
 	int sibling;
 	struct cpuinfo_x86 *c = cpu_data;
 
-	for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
-		cpu_clear(cpu, cpu_core_map[sibling]);
-		/*
+	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
+		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
+		/*/
 		 * last thread sibling in this cpu core going down
 		 */
-		if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
 			c[sibling].booted_cores--;
 	}
 			
-	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
-		cpu_clear(cpu, cpu_sibling_map[sibling]);
-	cpus_clear(cpu_sibling_map[cpu]);
-	cpus_clear(cpu_core_map[cpu]);
+	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
+	cpus_clear(per_cpu(cpu_sibling_map, cpu));
+	cpus_clear(per_cpu(cpu_core_map, cpu));
 	c[cpu].phys_proc_id = 0;
 	c[cpu].cpu_core_id = 0;
 	cpu_clear(cpu, cpu_sibling_setup_map);
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
index 720a7d1f886..0faa0a0af27 100644
--- a/arch/x86/kernel/smpboot_64.c
+++ b/arch/x86/kernel/smpboot_64.c
@@ -91,12 +91,12 @@ EXPORT_SYMBOL(cpu_data);
 int smp_threads_ready;
 
 /* representing HT siblings of each logical CPU */
-cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 
 /* representing HT and core siblings of each logical CPU */
-cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
 /*
  * Trampoline 80x86 program as an array.
@@ -243,7 +243,7 @@ cpumask_t cpu_coregroup_map(int cpu)
 	 * And for power savings, we return cpu_core_map
 	 */
 	if (sched_mc_power_savings || sched_smt_power_savings)
-		return cpu_core_map[cpu];
+		return per_cpu(cpu_core_map, cpu);
 	else
 		return c->llc_shared_map;
 }
@@ -262,22 +262,22 @@ static inline void set_cpu_sibling_map(int cpu)
 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
 			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
 			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
-				cpu_set(i, cpu_sibling_map[cpu]);
-				cpu_set(cpu, cpu_sibling_map[i]);
-				cpu_set(i, cpu_core_map[cpu]);
-				cpu_set(cpu, cpu_core_map[i]);
+				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+				cpu_set(i, per_cpu(cpu_core_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_core_map, i));
 				cpu_set(i, c[cpu].llc_shared_map);
 				cpu_set(cpu, c[i].llc_shared_map);
 			}
 		}
 	} else {
-		cpu_set(cpu, cpu_sibling_map[cpu]);
+		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
 	}
 
 	cpu_set(cpu, c[cpu].llc_shared_map);
 
 	if (current_cpu_data.x86_max_cores == 1) {
-		cpu_core_map[cpu] = cpu_sibling_map[cpu];
+		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
 		c[cpu].booted_cores = 1;
 		return;
 	}
@@ -289,17 +289,17 @@ static inline void set_cpu_sibling_map(int cpu)
 			cpu_set(cpu, c[i].llc_shared_map);
 		}
 		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
-			cpu_set(i, cpu_core_map[cpu]);
-			cpu_set(cpu, cpu_core_map[i]);
+			cpu_set(i, per_cpu(cpu_core_map, cpu));
+			cpu_set(cpu, per_cpu(cpu_core_map, i));
 			/*
 			 *  Does this new cpu bringup a new core?
 			 */
-			if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
 				/*
 				 * for each core in package, increment
 				 * the booted_cores for this new cpu
 				 */
-				if (first_cpu(cpu_sibling_map[i]) == i)
+				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
 					c[cpu].booted_cores++;
 				/*
 				 * increment the core count for all
@@ -735,8 +735,8 @@ static __init void disable_smp(void)
 		phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
 	else
 		phys_cpu_present_map = physid_mask_of_physid(0);
-	cpu_set(0, cpu_sibling_map[0]);
-	cpu_set(0, cpu_core_map[0]);
+	cpu_set(0, per_cpu(cpu_sibling_map, 0));
+	cpu_set(0, per_cpu(cpu_core_map, 0));
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -971,19 +971,19 @@ static void remove_siblinginfo(int cpu)
 	int sibling;
 	struct cpuinfo_x86 *c = cpu_data;
 
-	for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
-		cpu_clear(cpu, cpu_core_map[sibling]);
+	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
+		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
 		/*
 		 * last thread sibling in this cpu core going down
 		 */
-		if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
 			c[sibling].booted_cores--;
 	}
 			
-	for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
-		cpu_clear(cpu, cpu_sibling_map[sibling]);
-	cpus_clear(cpu_sibling_map[cpu]);
-	cpus_clear(cpu_core_map[cpu]);
+	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
+		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
+	cpus_clear(per_cpu(cpu_sibling_map, cpu));
+	cpus_clear(per_cpu(cpu_core_map, cpu));
 	c[cpu].phys_proc_id = 0;
 	c[cpu].cpu_core_id = 0;
 	cpu_clear(cpu, cpu_sibling_setup_map);
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index fcb38e7f354..c686ae20fd6 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -25,6 +25,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
+#include <linux/kprobes.h>
 
 #include <asm/system.h>
 #include <asm/desc.h>
@@ -32,33 +33,27 @@
 
 extern void die(const char *,struct pt_regs *,long);
 
-static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-
-int register_page_fault_notifier(struct notifier_block *nb)
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs)
 {
-	vmalloc_sync_all();
-	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_page_fault_notifier);
+	int ret = 0;
+
+	/* kprobe_running() needs smp_processor_id() */
+	if (!user_mode_vm(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, 14))
+			ret = 1;
+		preempt_enable();
+	}
 
-int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
-
-static inline int notify_page_fault(struct pt_regs *regs, long err)
+#else
+static inline int notify_page_fault(struct pt_regs *regs)
 {
-	struct die_args args = {
-		.regs = regs,
-		.str = "page fault",
-		.err = err,
-		.trapnr = 14,
-		.signr = SIGSEGV
-	};
-	return atomic_notifier_call_chain(&notify_page_fault_chain,
-	                                  DIE_PAGE_FAULT, &args);
+	return 0;
 }
+#endif
 
 /*
  * Return EIP plus the CS segment base.  The segment limit is also
@@ -331,7 +326,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 	if (unlikely(address >= TASK_SIZE)) {
 		if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
 			return;
-		if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
+		if (notify_page_fault(regs))
 			return;
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -340,7 +335,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
 		goto bad_area_nosemaphore;
 	}
 
-	if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
+	if (notify_page_fault(regs))
 		return;
 
 	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
@@ -598,7 +593,7 @@ out_of_memory:
 	}
 	printk("VM: killing process %s\n", tsk->comm);
 	if (error_code & 4)
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	goto no_context;
 
 do_sigbus:
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 54816adb8e9..5e0e54906c4 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -25,6 +25,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
+#include <linux/kprobes.h>
 
 #include <asm/system.h>
 #include <asm/pgalloc.h>
@@ -40,34 +41,27 @@
 #define PF_RSVD	(1<<3)
 #define PF_INSTR	(1<<4)
 
-static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
-
-/* Hook to register for page fault notifications */
-int register_page_fault_notifier(struct notifier_block *nb)
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs)
 {
-	vmalloc_sync_all();
-	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-}
-EXPORT_SYMBOL_GPL(register_page_fault_notifier);
+	int ret = 0;
+
+	/* kprobe_running() needs smp_processor_id() */
+	if (!user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, 14))
+			ret = 1;
+		preempt_enable();
+	}
 
-int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+	return ret;
 }
-EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
-
-static inline int notify_page_fault(struct pt_regs *regs, long err)
+#else
+static inline int notify_page_fault(struct pt_regs *regs)
 {
-	struct die_args args = {
-		.regs = regs,
-		.str = "page fault",
-		.err = err,
-		.trapnr = 14,
-		.signr = SIGSEGV
-	};
-	return atomic_notifier_call_chain(&notify_page_fault_chain,
-	                                  DIE_PAGE_FAULT, &args);
+	return 0;
 }
+#endif
 
 /* Sometimes the CPU reports invalid exceptions on prefetch.
    Check that here and ignore.
@@ -345,7 +339,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 			if (vmalloc_fault(address) >= 0)
 				return;
 		}
-		if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
+		if (notify_page_fault(regs))
 			return;
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
@@ -354,7 +348,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 		goto bad_area_nosemaphore;
 	}
 
-	if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
+	if (notify_page_fault(regs))
 		return;
 
 	if (likely(regs->eflags & X86_EFLAGS_IF))
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 730a5b177b1..dda4e83649a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -735,11 +735,6 @@ int arch_add_memory(int nid, u64 start, u64 size)
 	return __add_pages(zone, start_pfn, nr_pages);
 }
 
-int remove_memory(u64 start, u64 size)
-{
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
 #endif
 
 struct kmem_cache *pmd_cache;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 458893b376f..1e3862e4106 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -474,12 +474,6 @@ error:
 }
 EXPORT_SYMBOL_GPL(arch_add_memory);
 
-int remove_memory(u64 start, u64 size)
-{
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(remove_memory);
-
 #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
 int memory_add_physaddr_to_nid(u64 start)
 {
@@ -748,3 +742,48 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 		return "[vsyscall]";
 	return NULL;
 }
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
+ */
+int __meminit vmemmap_populate(struct page *start_page,
+						unsigned long size, int node)
+{
+	unsigned long addr = (unsigned long)start_page;
+	unsigned long end = (unsigned long)(start_page + size);
+	unsigned long next;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	for (; addr < end; addr = next) {
+		next = pmd_addr_end(addr, end);
+
+		pgd = vmemmap_pgd_populate(addr, node);
+		if (!pgd)
+			return -ENOMEM;
+		pud = vmemmap_pud_populate(pgd, addr, node);
+		if (!pud)
+			return -ENOMEM;
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd)) {
+			pte_t entry;
+			void *p = vmemmap_alloc_block(PMD_SIZE, node);
+			if (!p)
+				return -ENOMEM;
+
+			entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+			mk_pte_huge(entry);
+			set_pmd(pmd, __pmd(pte_val(entry)));
+
+			printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
+				addr, addr + PMD_SIZE - 1, p, node);
+		} else
+			vmemmap_verify((pte_t *)pmd, node, addr, next);
+	}
+
+	return 0;
+}
+#endif
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 47925927b12..56b4757a1f4 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -379,7 +379,7 @@ static unsigned int get_stagger(void)
 {
 #ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
-	return (cpu != first_cpu(cpu_sibling_map[cpu]));
+	return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
 #endif	
 	return 0;
 }
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 557b8e24706..4fa33c27ccb 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -147,8 +147,13 @@ void __init xen_smp_prepare_boot_cpu(void)
 	make_lowmem_page_readwrite(&per_cpu__gdt_page);
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-		cpus_clear(cpu_sibling_map[cpu]);
-		cpus_clear(cpu_core_map[cpu]);
+		cpus_clear(per_cpu(cpu_sibling_map, cpu));
+		/*
+		 * cpu_core_map lives in a per cpu area that is cleared
+		 * when the per cpu array is allocated.
+		 *
+		 * cpus_clear(per_cpu(cpu_core_map, cpu));
+		 */
 	}
 
 	xen_setup_vcpu_info_placement();
@@ -159,8 +164,13 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	unsigned cpu;
 
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-		cpus_clear(cpu_sibling_map[cpu]);
-		cpus_clear(cpu_core_map[cpu]);
+		cpus_clear(per_cpu(cpu_sibling_map, cpu));
+		/*
+		 * cpu_core_ map will be zeroed when the per
+		 * cpu area is allocated.
+		 *
+		 * cpus_clear(per_cpu(cpu_core_map, cpu));
+		 */
 	}
 
 	smp_store_cpu_info(0);
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index cf013cb85ea..8c83dbe4c4d 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -409,6 +409,7 @@ config ARCH_DISCONTIGMEM_DEFAULT
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	depends on (NUMA || EXPERIMENTAL)
+	select SPARSEMEM_VMEMMAP_ENABLE
 
 config ARCH_MEMORY_PROBE
 	def_bool y
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 06a13d9b69d..5533c7850d5 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -304,10 +304,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		ret = put_user(sizeof(elf_fpregset_t), (unsigned long *) data);
 		break;
 
-	case PTRACE_DETACH: /* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		break;
-
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		goto out;
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 45d28f217c0..2f842859948 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -152,7 +152,7 @@ out_of_memory:
 	}
 	printk("VM: killing process %s\n", current->comm);
 	if (user_mode(regs))
-		do_exit(SIGKILL);
+		do_group_exit(SIGKILL);
 	bad_page_fault(regs, address, SIGKILL);
 	return;
 
diff --git a/block/blktrace.c b/block/blktrace.c
index 775471ef84a..d00ac3993c1 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -550,7 +550,7 @@ static void blk_trace_set_ht_offsets(void)
 	for_each_online_cpu(cpu) {
 		unsigned long long *cpu_off, *sibling_off;
 
-		for_each_cpu_mask(i, cpu_sibling_map[cpu]) {
+		for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu)) {
 			if (i == cpu)
 				continue;
 
diff --git a/block/bsg.c b/block/bsg.c
index b8ddfc66f21..8e181ab3afb 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -908,7 +908,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	}
 }
 
-static struct file_operations bsg_fops = {
+static const struct file_operations bsg_fops = {
 	.read		=	bsg_read,
 	.write		=	bsg_write,
 	.poll		=	bsg_poll,
diff --git a/block/elevator.c b/block/elevator.c
index b9c518afe1f..952aee04a68 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -712,6 +712,14 @@ struct request *elv_next_request(struct request_queue *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
+		/*
+		 * Kill the empty barrier place holder, the driver must
+		 * not ever see it.
+		 */
+		if (blk_empty_barrier(rq)) {
+			end_queued_request(rq, 1);
+			continue;
+		}
 		if (!(rq->cmd_flags & REQ_STARTED)) {
 			/*
 			 * This is the first time the device driver
@@ -751,15 +759,8 @@ struct request *elv_next_request(struct request_queue *q)
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
-			int nr_bytes = rq->hard_nr_sectors << 9;
-
-			if (!nr_bytes)
-				nr_bytes = rq->data_len;
-
-			blkdev_dequeue_request(rq);
 			rq->cmd_flags |= REQ_QUIET;
-			end_that_request_chunk(rq, 0, nr_bytes);
-			end_that_request_last(rq, 0);
+			end_queued_request(rq, 0);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
 								ret);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index d875673e76c..9eabac95fbe 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -30,6 +30,7 @@
 #include <linux/cpu.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
+#include <linux/scatterlist.h>
 
 /*
  * for max sense size
@@ -304,23 +305,6 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered,
 
 EXPORT_SYMBOL(blk_queue_ordered);
 
-/**
- * blk_queue_issue_flush_fn - set function for issuing a flush
- * @q:     the request queue
- * @iff:   the function to be called issuing the flush
- *
- * Description:
- *   If a driver supports issuing a flush command, the support is notified
- *   to the block layer by defining it through this call.
- *
- **/
-void blk_queue_issue_flush_fn(struct request_queue *q, issue_flush_fn *iff)
-{
-	q->issue_flush_fn = iff;
-}
-
-EXPORT_SYMBOL(blk_queue_issue_flush_fn);
-
 /*
  * Cache flushing for ordered writes handling
  */
@@ -377,10 +361,12 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 	/*
 	 * Okay, sequence complete.
 	 */
-	rq = q->orig_bar_rq;
-	uptodate = q->orderr ? q->orderr : 1;
+	uptodate = 1;
+	if (q->orderr)
+		uptodate = q->orderr;
 
 	q->ordseq = 0;
+	rq = q->orig_bar_rq;
 
 	end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
 	end_that_request_last(rq, uptodate);
@@ -445,7 +431,8 @@ static inline struct request *start_ordered(struct request_queue *q,
 	rq_init(q, rq);
 	if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
 		rq->cmd_flags |= REQ_RW;
-	rq->cmd_flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
+	if (q->ordered & QUEUE_ORDERED_FUA)
+		rq->cmd_flags |= REQ_FUA;
 	rq->elevator_private = NULL;
 	rq->elevator_private2 = NULL;
 	init_request_from_bio(rq, q->orig_bar_rq->bio);
@@ -455,9 +442,12 @@ static inline struct request *start_ordered(struct request_queue *q,
 	 * Queue ordered sequence.  As we stack them at the head, we
 	 * need to queue in reverse order.  Note that we rely on that
 	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-	 * request gets inbetween ordered sequence.
+	 * request gets inbetween ordered sequence. If this request is
+	 * an empty barrier, we don't need to do a postflush ever since
+	 * there will be no data written between the pre and post flush.
+	 * Hence a single flush will suffice.
 	 */
-	if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
+	if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq))
 		queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
 	else
 		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
@@ -481,7 +471,7 @@ static inline struct request *start_ordered(struct request_queue *q,
 int blk_do_ordered(struct request_queue *q, struct request **rqp)
 {
 	struct request *rq = *rqp;
-	int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
+	const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 
 	if (!q->ordseq) {
 		if (!is_barrier)
@@ -1329,9 +1319,10 @@ static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
  * must make sure sg can hold rq->nr_phys_segments entries
  */
 int blk_rq_map_sg(struct request_queue *q, struct request *rq,
-		  struct scatterlist *sg)
+		  struct scatterlist *sglist)
 {
 	struct bio_vec *bvec, *bvprv;
+	struct scatterlist *next_sg, *sg;
 	struct req_iterator iter;
 	int nsegs, cluster;
 
@@ -1342,11 +1333,12 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 	 * for each bio in rq
 	 */
 	bvprv = NULL;
+	sg = next_sg = &sglist[0];
 	rq_for_each_segment(bvec, rq, iter) {
 		int nbytes = bvec->bv_len;
 
 		if (bvprv && cluster) {
-			if (sg[nsegs - 1].length + nbytes > q->max_segment_size)
+			if (sg->length + nbytes > q->max_segment_size)
 				goto new_segment;
 
 			if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
@@ -1354,14 +1346,15 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
 				goto new_segment;
 
-			sg[nsegs - 1].length += nbytes;
+			sg->length += nbytes;
 		} else {
 new_segment:
-			memset(&sg[nsegs],0,sizeof(struct scatterlist));
-			sg[nsegs].page = bvec->bv_page;
-			sg[nsegs].length = nbytes;
-			sg[nsegs].offset = bvec->bv_offset;
+			sg = next_sg;
+			next_sg = sg_next(sg);
 
+			sg->page = bvec->bv_page;
+			sg->length = nbytes;
+			sg->offset = bvec->bv_offset;
 			nsegs++;
 		}
 		bvprv = bvec;
@@ -2660,6 +2653,14 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
 
 EXPORT_SYMBOL(blk_execute_rq);
 
+static void bio_end_empty_barrier(struct bio *bio, int err)
+{
+	if (err)
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+
+	complete(bio->bi_private);
+}
+
 /**
  * blkdev_issue_flush - queue a flush
  * @bdev:	blockdev to issue flush for
@@ -2672,7 +2673,10 @@ EXPORT_SYMBOL(blk_execute_rq);
  */
 int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 {
+	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q;
+	struct bio *bio;
+	int ret;
 
 	if (bdev->bd_disk == NULL)
 		return -ENXIO;
@@ -2680,10 +2684,32 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 	q = bdev_get_queue(bdev);
 	if (!q)
 		return -ENXIO;
-	if (!q->issue_flush_fn)
-		return -EOPNOTSUPP;
 
-	return q->issue_flush_fn(q, bdev->bd_disk, error_sector);
+	bio = bio_alloc(GFP_KERNEL, 0);
+	if (!bio)
+		return -ENOMEM;
+
+	bio->bi_end_io = bio_end_empty_barrier;
+	bio->bi_private = &wait;
+	bio->bi_bdev = bdev;
+	submit_bio(1 << BIO_RW_BARRIER, bio);
+
+	wait_for_completion(&wait);
+
+	/*
+	 * The driver must store the error location in ->bi_sector, if
+	 * it supports it. For non-stacked drivers, this should be copied
+	 * from rq->sector.
+	 */
+	if (error_sector)
+		*error_sector = bio->bi_sector;
+
+	ret = 0;
+	if (!bio_flagged(bio, BIO_UPTODATE))
+		ret = -EIO;
+
+	bio_put(bio);
+	return ret;
 }
 
 EXPORT_SYMBOL(blkdev_issue_flush);
@@ -3051,7 +3077,7 @@ static inline void blk_partition_remap(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
 
-	if (bdev != bdev->bd_contains) {
+	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 		const int rw = bio_data_dir(bio);
 
@@ -3117,6 +3143,35 @@ static inline int should_fail_request(struct bio *bio)
 
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 
+/*
+ * Check whether this bio extends beyond the end of the device.
+ */
+static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
+{
+	sector_t maxsector;
+
+	if (!nr_sectors)
+		return 0;
+
+	/* Test device or partition size, when known. */
+	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
+	if (maxsector) {
+		sector_t sector = bio->bi_sector;
+
+		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
+			/*
+			 * This may well happen - the kernel calls bread()
+			 * without checking the size of the device, e.g., when
+			 * mounting a device.
+			 */
+			handle_bad_sector(bio);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * generic_make_request: hand a buffer to its device driver for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -3144,27 +3199,14 @@ static inline int should_fail_request(struct bio *bio)
 static inline void __generic_make_request(struct bio *bio)
 {
 	struct request_queue *q;
-	sector_t maxsector;
 	sector_t old_sector;
 	int ret, nr_sectors = bio_sectors(bio);
 	dev_t old_dev;
 
 	might_sleep();
-	/* Test device or partition size, when known. */
-	maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-	if (maxsector) {
-		sector_t sector = bio->bi_sector;
 
-		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
-			/*
-			 * This may well happen - the kernel calls bread()
-			 * without checking the size of the device, e.g., when
-			 * mounting a device.
-			 */
-			handle_bad_sector(bio);
-			goto end_io;
-		}
-	}
+	if (bio_check_eod(bio, nr_sectors))
+		goto end_io;
 
 	/*
 	 * Resolve the mapping until finished. (drivers are
@@ -3191,7 +3233,7 @@ end_io:
 			break;
 		}
 
-		if (unlikely(bio_sectors(bio) > q->max_hw_sectors)) {
+		if (unlikely(nr_sectors > q->max_hw_sectors)) {
 			printk("bio too big device %s (%u > %u)\n", 
 				bdevname(bio->bi_bdev, b),
 				bio_sectors(bio),
@@ -3212,7 +3254,7 @@ end_io:
 		blk_partition_remap(bio);
 
 		if (old_sector != -1)
-			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 
+			blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
 					    old_sector);
 
 		blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
@@ -3220,21 +3262,8 @@ end_io:
 		old_sector = bio->bi_sector;
 		old_dev = bio->bi_bdev->bd_dev;
 
-		maxsector = bio->bi_bdev->bd_inode->i_size >> 9;
-		if (maxsector) {
-			sector_t sector = bio->bi_sector;
-
-			if (maxsector < nr_sectors ||
-					maxsector - nr_sectors < sector) {
-				/*
-				 * This may well happen - partitions are not
-				 * checked to make sure they are within the size
-				 * of the whole device.
-				 */
-				handle_bad_sector(bio);
-				goto end_io;
-			}
-		}
+		if (bio_check_eod(bio, nr_sectors))
+			goto end_io;
 
 		ret = q->make_request_fn(q, bio);
 	} while (ret);
@@ -3307,23 +3336,32 @@ void submit_bio(int rw, struct bio *bio)
 {
 	int count = bio_sectors(bio);
 
-	BIO_BUG_ON(!bio->bi_size);
-	BIO_BUG_ON(!bio->bi_io_vec);
 	bio->bi_rw |= rw;
-	if (rw & WRITE) {
-		count_vm_events(PGPGOUT, count);
-	} else {
-		task_io_account_read(bio->bi_size);
-		count_vm_events(PGPGIN, count);
-	}
 
-	if (unlikely(block_dump)) {
-		char b[BDEVNAME_SIZE];
-		printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
-			current->comm, current->pid,
-			(rw & WRITE) ? "WRITE" : "READ",
-			(unsigned long long)bio->bi_sector,
-			bdevname(bio->bi_bdev,b));
+	/*
+	 * If it's a regular read/write or a barrier with data attached,
+	 * go through the normal accounting stuff before submission.
+	 */
+	if (!bio_empty_barrier(bio)) {
+
+		BIO_BUG_ON(!bio->bi_size);
+		BIO_BUG_ON(!bio->bi_io_vec);
+
+		if (rw & WRITE) {
+			count_vm_events(PGPGOUT, count);
+		} else {
+			task_io_account_read(bio->bi_size);
+			count_vm_events(PGPGIN, count);
+		}
+
+		if (unlikely(block_dump)) {
+			char b[BDEVNAME_SIZE];
+			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s\n",
+				current->comm, current->pid,
+				(rw & WRITE) ? "WRITE" : "READ",
+				(unsigned long long)bio->bi_sector,
+				bdevname(bio->bi_bdev,b));
+		}
 	}
 
 	generic_make_request(bio);
@@ -3399,6 +3437,14 @@ static int __end_that_request_first(struct request *req, int uptodate,
 	while ((bio = req->bio) != NULL) {
 		int nbytes;
 
+		/*
+		 * For an empty barrier request, the low level driver must
+		 * store a potential error location in ->sector. We pass
+		 * that back up in ->bi_sector.
+		 */
+		if (blk_empty_barrier(req))
+			bio->bi_sector = req->sector;
+
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
 			nbytes = bio->bi_size;
@@ -3564,7 +3610,7 @@ static struct notifier_block blk_cpu_notifier __cpuinitdata = {
  * Description:
  *     Ends all I/O on a request. It does not handle partial completions,
  *     unless the driver actually implements this in its completion callback
- *     through requeueing. Theh actual completion happens out-of-order,
+ *     through requeueing. The actual completion happens out-of-order,
  *     through a softirq handler. The user must have registered a completion
  *     callback through blk_queue_softirq_done().
  **/
@@ -3627,15 +3673,83 @@ void end_that_request_last(struct request *req, int uptodate)
 
 EXPORT_SYMBOL(end_that_request_last);
 
-void end_request(struct request *req, int uptodate)
+static inline void __end_request(struct request *rq, int uptodate,
+				 unsigned int nr_bytes, int dequeue)
 {
-	if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
-		add_disk_randomness(req->rq_disk);
-		blkdev_dequeue_request(req);
-		end_that_request_last(req, uptodate);
+	if (!end_that_request_chunk(rq, uptodate, nr_bytes)) {
+		if (dequeue)
+			blkdev_dequeue_request(rq);
+		add_disk_randomness(rq->rq_disk);
+		end_that_request_last(rq, uptodate);
 	}
 }
 
+static unsigned int rq_byte_size(struct request *rq)
+{
+	if (blk_fs_request(rq))
+		return rq->hard_nr_sectors << 9;
+
+	return rq->data_len;
+}
+
+/**
+ * end_queued_request - end all I/O on a queued request
+ * @rq:		the request being processed
+ * @uptodate:	error value or 0/1 uptodate flag
+ *
+ * Description:
+ *     Ends all I/O on a request, and removes it from the block layer queues.
+ *     Not suitable for normal IO completion, unless the driver still has
+ *     the request attached to the block layer.
+ *
+ **/
+void end_queued_request(struct request *rq, int uptodate)
+{
+	__end_request(rq, uptodate, rq_byte_size(rq), 1);
+}
+EXPORT_SYMBOL(end_queued_request);
+
+/**
+ * end_dequeued_request - end all I/O on a dequeued request
+ * @rq:		the request being processed
+ * @uptodate:	error value or 0/1 uptodate flag
+ *
+ * Description:
+ *     Ends all I/O on a request. The request must already have been
+ *     dequeued using blkdev_dequeue_request(), as is normally the case
+ *     for most drivers.
+ *
+ **/
+void end_dequeued_request(struct request *rq, int uptodate)
+{
+	__end_request(rq, uptodate, rq_byte_size(rq), 0);
+}
+EXPORT_SYMBOL(end_dequeued_request);
+
+
+/**
+ * end_request - end I/O on the current segment of the request
+ * @rq:		the request being processed
+ * @uptodate:	error value or 0/1 uptodate flag
+ *
+ * Description:
+ *     Ends I/O on the current segment of a request. If that is the only
+ *     remaining segment, the request is also completed and freed.
+ *
+ *     This is a remnant of how older block drivers handled IO completions.
+ *     Modern drivers typically end IO on the full request in one go, unless
+ *     they have a residual value to account for. For that case this function
+ *     isn't really useful, unless the residual just happens to be the
+ *     full current segment. In other words, don't use this function in new
+ *     code. Either use end_request_completely(), or the
+ *     end_that_request_chunk() (along with end_that_request_last()) for
+ *     partial completions.
+ *
+ **/
+void end_request(struct request *req, int uptodate)
+{
+	__end_request(req, uptodate, req->hard_cur_sectors << 9, 1);
+}
 EXPORT_SYMBOL(end_request);
 
 static void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
@@ -3928,7 +4042,6 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 			max_hw_sectors_kb = q->max_hw_sectors >> 1,
 			page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
 	ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
-	int ra_kb;
 
 	if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
 		return -EINVAL;
@@ -3937,14 +4050,6 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
 	 * values synchronously:
 	 */
 	spin_lock_irq(q->queue_lock);
-	/*
-	 * Trim readahead window as well, if necessary:
-	 */
-	ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10);
-	if (ra_kb > max_sectors_kb)
-		q->backing_dev_info.ra_pages =
-				max_sectors_kb >> (PAGE_CACHE_SHIFT - 10);
-
 	q->max_sectors = max_sectors_kb << 1;
 	spin_unlock_irq(q->queue_lock);
 
@@ -3958,7 +4063,23 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 
+static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(q->max_phys_segments, page);
+}
+
+static ssize_t queue_max_segments_store(struct request_queue *q,
+					const char *page, size_t count)
+{
+	unsigned long segments;
+	ssize_t ret = queue_var_store(&segments, page, count);
+
+	spin_lock_irq(q->queue_lock);
+	q->max_phys_segments = segments;
+	spin_unlock_irq(q->queue_lock);
 
+	return ret;
+}
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -3982,6 +4103,12 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
 	.show = queue_max_hw_sectors_show,
 };
 
+static struct queue_sysfs_entry queue_max_segments_entry = {
+	.attr = {.name = "max_segments", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_max_segments_show,
+	.store = queue_max_segments_store,
+};
+
 static struct queue_sysfs_entry queue_iosched_entry = {
 	.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
 	.show = elv_iosched_show,
@@ -3993,6 +4120,7 @@ static struct attribute *default_attrs[] = {
 	&queue_ra_entry.attr,
 	&queue_max_hw_sectors_entry.attr,
 	&queue_max_sectors_entry.attr,
+	&queue_max_segments_entry.attr,
 	&queue_iosched_entry.attr,
 	NULL,
 };
diff --git a/crypto/digest.c b/crypto/digest.c
index 1bf7414aeb9..e56de6748b1 100644
--- a/crypto/digest.c
+++ b/crypto/digest.c
@@ -77,7 +77,7 @@ static int update2(struct hash_desc *desc,
 
 		if (!nbytes)
 			break;
-		sg = sg_next(sg);
+		sg = scatterwalk_sg_next(sg);
 	}
 
 	return 0;
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 3052f6507f5..d6852c33cfb 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -62,7 +62,7 @@ static void scatterwalk_pagedone(struct scatter_walk *walk, int out,
 		walk->offset += PAGE_SIZE - 1;
 		walk->offset &= PAGE_MASK;
 		if (walk->offset >= walk->sg->offset + walk->sg->length)
-			scatterwalk_start(walk, sg_next(walk->sg));
+			scatterwalk_start(walk, scatterwalk_sg_next(walk->sg));
 	}
 }
 
diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h
index 500a220ad90..9c73e37a42c 100644
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -20,7 +20,7 @@
 
 #include "internal.h"
 
-static inline struct scatterlist *sg_next(struct scatterlist *sg)
+static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg)
 {
 	return (++sg)->length ? sg : (void *)sg->page;
 }
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 7bdae47d6b9..4fb134d50da 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -84,6 +84,8 @@ source "drivers/rtc/Kconfig"
 
 source "drivers/dma/Kconfig"
 
+source "drivers/dca/Kconfig"
+
 source "drivers/auxdisplay/Kconfig"
 
 source "drivers/kvm/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index a168eacdcd9..174c27eb443 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -85,6 +85,7 @@ obj-$(CONFIG_CRYPTO)		+= crypto/
 obj-$(CONFIG_SUPERH)		+= sh/
 obj-$(CONFIG_GENERIC_TIME)	+= clocksource/
 obj-$(CONFIG_DMA_ENGINE)	+= dma/
+obj-$(CONFIG_DCA)		+= dca/
 obj-$(CONFIG_HID)		+= hid/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_OF)		+= of/
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 68699b3e799..bbaa545ea99 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1410,7 +1410,7 @@ static void ata_qc_complete_internal(struct ata_queued_cmd *qc)
  */
 unsigned ata_exec_internal_sg(struct ata_device *dev,
 			      struct ata_taskfile *tf, const u8 *cdb,
-			      int dma_dir, struct scatterlist *sg,
+			      int dma_dir, struct scatterlist *sgl,
 			      unsigned int n_elem, unsigned long timeout)
 {
 	struct ata_link *link = dev->link;
@@ -1472,11 +1472,12 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	qc->dma_dir = dma_dir;
 	if (dma_dir != DMA_NONE) {
 		unsigned int i, buflen = 0;
+		struct scatterlist *sg;
 
-		for (i = 0; i < n_elem; i++)
-			buflen += sg[i].length;
+		for_each_sg(sgl, sg, n_elem, i)
+			buflen += sg->length;
 
-		ata_sg_init(qc, sg, n_elem);
+		ata_sg_init(qc, sgl, n_elem);
 		qc->nbytes = buflen;
 	}
 
@@ -4292,7 +4293,7 @@ void ata_sg_clean(struct ata_queued_cmd *qc)
 		if (qc->n_elem)
 			dma_unmap_sg(ap->dev, sg, qc->n_elem, dir);
 		/* restore last sg */
-		sg[qc->orig_n_elem - 1].length += qc->pad_len;
+		sg_last(sg, qc->orig_n_elem)->length += qc->pad_len;
 		if (pad_buf) {
 			struct scatterlist *psg = &qc->pad_sgent;
 			void *addr = kmap_atomic(psg->page, KM_IRQ0);
@@ -4547,6 +4548,7 @@ void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen)
 	qc->orig_n_elem = 1;
 	qc->buf_virt = buf;
 	qc->nbytes = buflen;
+	qc->cursg = qc->__sg;
 
 	sg_init_one(&qc->sgent, buf, buflen);
 }
@@ -4572,6 +4574,7 @@ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
 	qc->__sg = sg;
 	qc->n_elem = n_elem;
 	qc->orig_n_elem = n_elem;
+	qc->cursg = qc->__sg;
 }
 
 /**
@@ -4661,7 +4664,7 @@ static int ata_sg_setup(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	struct scatterlist *sg = qc->__sg;
-	struct scatterlist *lsg = &sg[qc->n_elem - 1];
+	struct scatterlist *lsg = sg_last(qc->__sg, qc->n_elem);
 	int n_elem, pre_n_elem, dir, trim_sg = 0;
 
 	VPRINTK("ENTER, ata%u\n", ap->print_id);
@@ -4825,7 +4828,6 @@ void ata_data_xfer_noirq(struct ata_device *adev, unsigned char *buf,
 static void ata_pio_sector(struct ata_queued_cmd *qc)
 {
 	int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
-	struct scatterlist *sg = qc->__sg;
 	struct ata_port *ap = qc->ap;
 	struct page *page;
 	unsigned int offset;
@@ -4834,8 +4836,8 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 	if (qc->curbytes == qc->nbytes - qc->sect_size)
 		ap->hsm_task_state = HSM_ST_LAST;
 
-	page = sg[qc->cursg].page;
-	offset = sg[qc->cursg].offset + qc->cursg_ofs;
+	page = qc->cursg->page;
+	offset = qc->cursg->offset + qc->cursg_ofs;
 
 	/* get the current page and offset */
 	page = nth_page(page, (offset >> PAGE_SHIFT));
@@ -4863,8 +4865,8 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
 	qc->curbytes += qc->sect_size;
 	qc->cursg_ofs += qc->sect_size;
 
-	if (qc->cursg_ofs == (&sg[qc->cursg])->length) {
-		qc->cursg++;
+	if (qc->cursg_ofs == qc->cursg->length) {
+		qc->cursg = sg_next(qc->cursg);
 		qc->cursg_ofs = 0;
 	}
 }
@@ -4950,16 +4952,18 @@ static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
 {
 	int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
 	struct scatterlist *sg = qc->__sg;
+	struct scatterlist *lsg = sg_last(qc->__sg, qc->n_elem);
 	struct ata_port *ap = qc->ap;
 	struct page *page;
 	unsigned char *buf;
 	unsigned int offset, count;
+	int no_more_sg = 0;
 
 	if (qc->curbytes + bytes >= qc->nbytes)
 		ap->hsm_task_state = HSM_ST_LAST;
 
 next_sg:
-	if (unlikely(qc->cursg >= qc->n_elem)) {
+	if (unlikely(no_more_sg)) {
 		/*
 		 * The end of qc->sg is reached and the device expects
 		 * more data to transfer. In order not to overrun qc->sg
@@ -4982,7 +4986,7 @@ next_sg:
 		return;
 	}
 
-	sg = &qc->__sg[qc->cursg];
+	sg = qc->cursg;
 
 	page = sg->page;
 	offset = sg->offset + qc->cursg_ofs;
@@ -5021,7 +5025,10 @@ next_sg:
 	qc->cursg_ofs += count;
 
 	if (qc->cursg_ofs == sg->length) {
-		qc->cursg++;
+		if (qc->cursg == lsg)
+			no_more_sg = 1;
+
+		qc->cursg = sg_next(qc->cursg);
 		qc->cursg_ofs = 0;
 	}
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index d63c81ed084..9fbb39cd0f5 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -801,8 +801,6 @@ int ata_scsi_slave_config(struct scsi_device *sdev)
 
 	ata_scsi_sdev_config(sdev);
 
-	blk_queue_max_phys_segments(sdev->request_queue, LIBATA_MAX_PRD);
-
 	sdev->manage_start_stop = 1;
 
 	if (dev)
@@ -3240,7 +3238,7 @@ static void ata_scsi_handle_link_detach(struct ata_link *link)
 
 /**
  *	ata_scsi_media_change_notify - send media change event
- *	@atadev: Pointer to the disk device with media change event
+ *	@dev: Pointer to the disk device with media change event
  *
  *	Tell the block layer to send a media change notification
  *	event.
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 7a1390cd6aa..c41d0728efe 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -238,7 +238,7 @@ store_mem_state(struct sys_device *dev, const char *buf, size_t count)
 	mem = container_of(dev, struct memory_block, sysdev);
 	phys_section_nr = mem->phys_index;
 
-	if (!valid_section_nr(phys_section_nr))
+	if (!present_section_nr(phys_section_nr))
 		goto out;
 
 	if (!strncmp(buf, "online", min((int)count, 6)))
@@ -418,7 +418,7 @@ int register_new_memory(struct mem_section *section)
 
 int unregister_memory_section(struct mem_section *section)
 {
-	if (!valid_section(section))
+	if (!present_section(section))
 		return -EINVAL;
 
 	return remove_memory_block(0, section, 0);
@@ -443,7 +443,7 @@ int __init memory_dev_init(void)
 	 * during boot and have been initialized
 	 */
 	for (i = 0; i < NR_MEM_SECTIONS; i++) {
-		if (!valid_section_nr(i))
+		if (!present_section_nr(i))
 			continue;
 		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0);
 		if (!ret)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index cae346ef1b2..88eeed72b5d 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -12,6 +12,7 @@
 #include <linux/topology.h>
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
+#include <linux/device.h>
 
 static struct sysdev_class node_class = {
 	set_kset_name("node"),
@@ -232,8 +233,96 @@ void unregister_one_node(int nid)
 	unregister_node(&node_devices[nid]);
 }
 
+/*
+ * node states attributes
+ */
+
+static ssize_t print_nodes_state(enum node_states state, char *buf)
+{
+	int n;
+
+	n = nodelist_scnprintf(buf, PAGE_SIZE, node_states[state]);
+	if (n > 0 && PAGE_SIZE > n + 1) {
+		*(buf + n++) = '\n';
+		*(buf + n++) = '\0';
+	}
+	return n;
+}
+
+static ssize_t print_nodes_possible(struct sysdev_class *class, char *buf)
+{
+	return print_nodes_state(N_POSSIBLE, buf);
+}
+
+static ssize_t print_nodes_online(struct sysdev_class *class, char *buf)
+{
+	return print_nodes_state(N_ONLINE, buf);
+}
+
+static ssize_t print_nodes_has_normal_memory(struct sysdev_class *class,
+						char *buf)
+{
+	return print_nodes_state(N_NORMAL_MEMORY, buf);
+}
+
+static ssize_t print_nodes_has_cpu(struct sysdev_class *class, char *buf)
+{
+	return print_nodes_state(N_CPU, buf);
+}
+
+static SYSDEV_CLASS_ATTR(possible, 0444, print_nodes_possible, NULL);
+static SYSDEV_CLASS_ATTR(online, 0444, print_nodes_online, NULL);
+static SYSDEV_CLASS_ATTR(has_normal_memory, 0444, print_nodes_has_normal_memory,
+									NULL);
+static SYSDEV_CLASS_ATTR(has_cpu, 0444, print_nodes_has_cpu, NULL);
+
+#ifdef CONFIG_HIGHMEM
+static ssize_t print_nodes_has_high_memory(struct sysdev_class *class,
+						 char *buf)
+{
+	return print_nodes_state(N_HIGH_MEMORY, buf);
+}
+
+static SYSDEV_CLASS_ATTR(has_high_memory, 0444, print_nodes_has_high_memory,
+									 NULL);
+#endif
+
+struct sysdev_class_attribute *node_state_attr[] = {
+	&attr_possible,
+	&attr_online,
+	&attr_has_normal_memory,
+#ifdef CONFIG_HIGHMEM
+	&attr_has_high_memory,
+#endif
+	&attr_has_cpu,
+};
+
+static int node_states_init(void)
+{
+	int i;
+	int err = 0;
+
+	for (i = 0;  i < NR_NODE_STATES; i++) {
+		int ret;
+		ret = sysdev_class_create_file(&node_class, node_state_attr[i]);
+		if (!err)
+			err = ret;
+	}
+	return err;
+}
+
 static int __init register_node_type(void)
 {
-	return sysdev_class_register(&node_class);
+	int ret;
+
+	ret = sysdev_class_register(&node_class);
+	if (!ret)
+		ret = node_states_init();
+
+	/*
+	 * Note:  we're not going to unregister the node class if we fail
+	 * to register the node state class attribute files.
+	 */
+	return ret;
 }
 postcore_initcall(register_node_type);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 55c3237fb1b..3fb7e8bc436 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1191,7 +1191,6 @@ static inline void complete_buffers(struct bio *bio, int status)
 {
 	while (bio) {
 		struct bio *xbh = bio->bi_next;
-		int nr_sectors = bio_sectors(bio);
 
 		bio->bi_next = NULL;
 		bio_endio(bio, status ? 0 : -EIO);
@@ -2570,6 +2569,7 @@ static void do_cciss_request(struct request_queue *q)
 	       (int)creq->nr_sectors);
 #endif				/* CCISS_DEBUG */
 
+	memset(tmp_sg, 0, sizeof(tmp_sg));
 	seg = blk_rq_map_sg(q, creq, tmp_sg);
 
 	/* get the DMA records for the setup */
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 3853c9a38d6..568603d3043 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -981,9 +981,8 @@ static void start_io(ctlr_info_t *h)
 static inline void complete_buffers(struct bio *bio, int ok)
 {
 	struct bio *xbh;
-	while(bio) {
-		int nr_sectors = bio_sectors(bio);
 
+	while (bio) {
 		xbh = bio->bi_next;
 		bio->bi_next = NULL;
 		
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b9233a06934..e5a051577a5 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -204,14 +204,13 @@ lo_do_transfer(struct loop_device *lo, int cmd,
  * do_lo_send_aops - helper for writing data to a loop device
  *
  * This is the fast version for backing filesystems which implement the address
- * space operations prepare_write and commit_write.
+ * space operations write_begin and write_end.
  */
 static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
-		int bsize, loff_t pos, struct page *page)
+		int bsize, loff_t pos, struct page *unused)
 {
 	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 	struct address_space *mapping = file->f_mapping;
-	const struct address_space_operations *aops = mapping->a_ops;
 	pgoff_t index;
 	unsigned offset, bv_offs;
 	int len, ret;
@@ -223,63 +222,45 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 	len = bvec->bv_len;
 	while (len > 0) {
 		sector_t IV;
-		unsigned size;
+		unsigned size, copied;
 		int transfer_result;
+		struct page *page;
+		void *fsdata;
 
 		IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
 		size = PAGE_CACHE_SIZE - offset;
 		if (size > len)
 			size = len;
-		page = grab_cache_page(mapping, index);
-		if (unlikely(!page))
+
+		ret = pagecache_write_begin(file, mapping, pos, size, 0,
+							&page, &fsdata);
+		if (ret)
 			goto fail;
-		ret = aops->prepare_write(file, page, offset,
-					  offset + size);
-		if (unlikely(ret)) {
-			if (ret == AOP_TRUNCATED_PAGE) {
-				page_cache_release(page);
-				continue;
-			}
-			goto unlock;
-		}
+
 		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
 				bvec->bv_page, bv_offs, size, IV);
-		if (unlikely(transfer_result)) {
-			/*
-			 * The transfer failed, but we still write the data to
-			 * keep prepare/commit calls balanced.
-			 */
-			printk(KERN_ERR "loop: transfer error block %llu\n",
-			       (unsigned long long)index);
-			zero_user_page(page, offset, size, KM_USER0);
-		}
-		flush_dcache_page(page);
-		ret = aops->commit_write(file, page, offset,
-					 offset + size);
-		if (unlikely(ret)) {
-			if (ret == AOP_TRUNCATED_PAGE) {
-				page_cache_release(page);
-				continue;
-			}
-			goto unlock;
-		}
+		copied = size;
 		if (unlikely(transfer_result))
-			goto unlock;
-		bv_offs += size;
-		len -= size;
+			copied = 0;
+
+		ret = pagecache_write_end(file, mapping, pos, size, copied,
+							page, fsdata);
+		if (ret < 0 || ret != copied)
+			goto fail;
+
+		if (unlikely(transfer_result))
+			goto fail;
+
+		bv_offs += copied;
+		len -= copied;
 		offset = 0;
 		index++;
-		pos += size;
-		unlock_page(page);
-		page_cache_release(page);
+		pos += copied;
 	}
 	ret = 0;
 out:
 	mutex_unlock(&mapping->host->i_mutex);
 	return ret;
-unlock:
-	unlock_page(page);
-	page_cache_release(page);
 fail:
 	ret = -1;
 	goto out;
@@ -313,7 +294,7 @@ static int __do_lo_send_write(struct file *file,
  * do_lo_send_direct_write - helper for writing data to a loop device
  *
  * This is the fast, non-transforming version for backing filesystems which do
- * not implement the address space operations prepare_write and commit_write.
+ * not implement the address space operations write_begin and write_end.
  * It uses the write file operation which should be present on all writeable
  * filesystems.
  */
@@ -332,7 +313,7 @@ static int do_lo_send_direct_write(struct loop_device *lo,
  * do_lo_send_write - helper for writing data to a loop device
  *
  * This is the slow, transforming version for filesystems which do not
- * implement the address space operations prepare_write and commit_write.  It
+ * implement the address space operations write_begin and write_end.  It
  * uses the write file operation which should be present on all writeable
  * filesystems.
  *
@@ -780,7 +761,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
 		 */
 		if (!file->f_op->splice_read)
 			goto out_putf;
-		if (aops->prepare_write && aops->commit_write)
+		if (aops->prepare_write || aops->write_begin)
 			lo_flags |= LO_FLAGS_USE_AOPS;
 		if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
 			lo_flags |= LO_FLAGS_READ_ONLY;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 540bf367698..a8130a4ad6d 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1133,16 +1133,21 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 	 * Schedule reads for missing parts of the packet.
 	 */
 	for (f = 0; f < pkt->frames; f++) {
+		struct bio_vec *vec;
+
 		int p, offset;
 		if (written[f])
 			continue;
 		bio = pkt->r_bios[f];
+		vec = bio->bi_io_vec;
 		bio_init(bio);
 		bio->bi_max_vecs = 1;
 		bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
 		bio->bi_bdev = pd->bdev;
 		bio->bi_end_io = pkt_end_io_read;
 		bio->bi_private = pkt;
+		bio->bi_io_vec = vec;
+		bio->bi_destructor = pkt_bio_destructor;
 
 		p = (f * CD_FRAMESIZE) / PAGE_SIZE;
 		offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
@@ -1439,6 +1444,8 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 	pkt->w_bio->bi_bdev = pd->bdev;
 	pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
 	pkt->w_bio->bi_private = pkt;
+	pkt->w_bio->bi_io_vec = bvec;
+	pkt->w_bio->bi_destructor = pkt_bio_destructor;
 	for (f = 0; f < pkt->frames; f++)
 		if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
 			BUG();
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 06d0552cf49..e354bfc070e 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -414,26 +414,6 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req)
 	req->cmd_type = REQ_TYPE_FLUSH;
 }
 
-static int ps3disk_issue_flush(struct request_queue *q, struct gendisk *gendisk,
-			       sector_t *sector)
-{
-	struct ps3_storage_device *dev = q->queuedata;
-	struct request *req;
-	int res;
-
-	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
-
-	req = blk_get_request(q, WRITE, __GFP_WAIT);
-	ps3disk_prepare_flush(q, req);
-	res = blk_execute_rq(q, gendisk, req, 0);
-	if (res)
-		dev_err(&dev->sbd.core, "%s:%u: flush request failed %d\n",
-			__func__, __LINE__, res);
-	blk_put_request(req);
-	return res;
-}
-
-
 static unsigned long ps3disk_mask;
 
 static DEFINE_MUTEX(ps3disk_mask_mutex);
@@ -506,7 +486,6 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev)
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
 	blk_queue_hardsect_size(queue, dev->blk_size);
 
-	blk_queue_issue_flush_fn(queue, ps3disk_issue_flush);
 	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH,
 			  ps3disk_prepare_flush);
 
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index b391776e5bf..f6f8c03047f 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -896,10 +896,6 @@ config GPIO_TB0219
 	depends on TANBAC_TB022X
 	select GPIO_VR41XX
 
-source "drivers/char/agp/Kconfig"
-
-source "drivers/char/drm/Kconfig"
-
 source "drivers/char/pcmcia/Kconfig"
 
 config MWAVE
diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig
index 713533d8a86..f22c253bc09 100644
--- a/drivers/char/agp/Kconfig
+++ b/drivers/char/agp/Kconfig
@@ -1,4 +1,4 @@
-config AGP
+menuconfig AGP
 	tristate "/dev/agpgart (AGP Support)"
 	depends on ALPHA || IA64 || PARISC || PPC || X86
 	depends on PCI
diff --git a/drivers/char/drm/Kconfig b/drivers/char/drm/Kconfig
index 0b7ffa5191c..ba3058dd39a 100644
--- a/drivers/char/drm/Kconfig
+++ b/drivers/char/drm/Kconfig
@@ -4,7 +4,7 @@
 # This driver provides support for the
 # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
 #
-config DRM
+menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
 	depends on (AGP || AGP=n) && PCI && !EMULATED_CMPXCHG
 	help
diff --git a/drivers/char/drm/radeon_irq.c b/drivers/char/drm/radeon_irq.c
index f89e57665b6..2b2407ee490 100644
--- a/drivers/char/drm/radeon_irq.c
+++ b/drivers/char/drm/radeon_irq.c
@@ -144,8 +144,8 @@ static int radeon_wait_irq(struct drm_device * dev, int swi_nr)
 	return ret;
 }
 
-int radeon_driver_vblank_do_wait(struct drm_device * dev, unsigned int *sequence,
-				 int crtc)
+static int radeon_driver_vblank_do_wait(struct drm_device * dev,
+					unsigned int *sequence, int crtc)
 {
 	drm_radeon_private_t *dev_priv =
 	    (drm_radeon_private_t *) dev->dev_private;
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index bbee97ff355..64551ab6be0 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -625,65 +625,10 @@ static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
 	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
 }
 
-#ifdef CONFIG_MMU
-/*
- * For fun, we are using the MMU for this.
- */
-static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
-{
-	struct mm_struct *mm;
-	struct vm_area_struct * vma;
-	unsigned long addr=(unsigned long)buf;
-
-	mm = current->mm;
-	/* Oops, this was forgotten before. -ben */
-	down_read(&mm->mmap_sem);
-
-	/* For private mappings, just map in zero pages. */
-	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
-		unsigned long count;
-
-		if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
-			goto out_up;
-		if (vma->vm_flags & (VM_SHARED | VM_HUGETLB))
-			break;
-		count = vma->vm_end - addr;
-		if (count > size)
-			count = size;
-
-		zap_page_range(vma, addr, count, NULL);
-        	if (zeromap_page_range(vma, addr, count, PAGE_COPY))
-			break;
-
-		size -= count;
-		buf += count;
-		addr += count;
-		if (size == 0)
-			goto out_up;
-	}
-
-	up_read(&mm->mmap_sem);
-	
-	/* The shared case is hard. Let's do the conventional zeroing. */ 
-	do {
-		unsigned long unwritten = clear_user(buf, PAGE_SIZE);
-		if (unwritten)
-			return size + unwritten - PAGE_SIZE;
-		cond_resched();
-		buf += PAGE_SIZE;
-		size -= PAGE_SIZE;
-	} while (size);
-
-	return size;
-out_up:
-	up_read(&mm->mmap_sem);
-	return size;
-}
-
 static ssize_t read_zero(struct file * file, char __user * buf, 
 			 size_t count, loff_t *ppos)
 {
-	unsigned long left, unwritten, written = 0;
+	size_t written;
 
 	if (!count)
 		return 0;
@@ -691,69 +636,33 @@ static ssize_t read_zero(struct file * file, char __user * buf,
 	if (!access_ok(VERIFY_WRITE, buf, count))
 		return -EFAULT;
 
-	left = count;
-
-	/* do we want to be clever? Arbitrary cut-off */
-	if (count >= PAGE_SIZE*4) {
-		unsigned long partial;
+	written = 0;
+	while (count) {
+		unsigned long unwritten;
+		size_t chunk = count;
 
-		/* How much left of the page? */
-		partial = (PAGE_SIZE-1) & -(unsigned long) buf;
-		unwritten = clear_user(buf, partial);
-		written = partial - unwritten;
-		if (unwritten)
-			goto out;
-		left -= partial;
-		buf += partial;
-		unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
-		written += (left & PAGE_MASK) - unwritten;
+		if (chunk > PAGE_SIZE)
+			chunk = PAGE_SIZE;	/* Just for latency reasons */
+		unwritten = clear_user(buf, chunk);
+		written += chunk - unwritten;
 		if (unwritten)
-			goto out;
-		buf += left & PAGE_MASK;
-		left &= ~PAGE_MASK;
-	}
-	unwritten = clear_user(buf, left);
-	written += left - unwritten;
-out:
-	return written ? written : -EFAULT;
-}
-
-static int mmap_zero(struct file * file, struct vm_area_struct * vma)
-{
-	int err;
-
-	if (vma->vm_flags & VM_SHARED)
-		return shmem_zero_setup(vma);
-	err = zeromap_page_range(vma, vma->vm_start,
-			vma->vm_end - vma->vm_start, vma->vm_page_prot);
-	BUG_ON(err == -EEXIST);
-	return err;
-}
-#else /* CONFIG_MMU */
-static ssize_t read_zero(struct file * file, char * buf, 
-			 size_t count, loff_t *ppos)
-{
-	size_t todo = count;
-
-	while (todo) {
-		size_t chunk = todo;
-
-		if (chunk > 4096)
-			chunk = 4096;	/* Just for latency reasons */
-		if (clear_user(buf, chunk))
-			return -EFAULT;
+			break;
 		buf += chunk;
-		todo -= chunk;
+		count -= chunk;
 		cond_resched();
 	}
-	return count;
+	return written ? written : -EFAULT;
 }
 
 static int mmap_zero(struct file * file, struct vm_area_struct * vma)
 {
+#ifndef CONFIG_MMU
 	return -ENOSYS;
+#endif
+	if (vma->vm_flags & VM_SHARED)
+		return shmem_zero_setup(vma);
+	return 0;
 }
-#endif /* CONFIG_MMU */
 
 static ssize_t write_full(struct file * file, const char __user * buf,
 			  size_t count, loff_t *ppos)
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 04ac155d3a0..82f2e27dca7 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -362,7 +362,7 @@ mspec_init(void)
 		is_sn2 = 1;
 		if (is_shub2()) {
 			ret = -ENOMEM;
-			for_each_online_node(nid) {
+			for_each_node_state(nid, N_ONLINE) {
 				int actual_nid;
 				int nasid;
 				unsigned long phys;
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index edb7002a321..0d56f8fc105 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -750,13 +750,15 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 	return 0;
 }
 
-static inline int resize_screen(struct vc_data *vc, int width, int height)
+static inline int resize_screen(struct vc_data *vc, int width, int height,
+				int user)
 {
 	/* Resizes the resolution of the display adapater */
 	int err = 0;
 
 	if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize)
-		err = vc->vc_sw->con_resize(vc, width, height);
+		err = vc->vc_sw->con_resize(vc, width, height, user);
+
 	return err;
 }
 
@@ -772,7 +774,7 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines)
 	unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0;
 	unsigned int old_cols, old_rows, old_row_size, old_screen_size;
 	unsigned int new_cols, new_rows, new_row_size, new_screen_size;
-	unsigned int end;
+	unsigned int end, user;
 	unsigned short *newscreen;
 
 	WARN_CONSOLE_UNLOCKED();
@@ -780,6 +782,9 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines)
 	if (!vc)
 		return -ENXIO;
 
+	user = vc->vc_resize_user;
+	vc->vc_resize_user = 0;
+
 	if (cols > VC_RESIZE_MAXCOL || lines > VC_RESIZE_MAXROW)
 		return -EINVAL;
 
@@ -800,7 +805,7 @@ int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines)
 	old_row_size = vc->vc_size_row;
 	old_screen_size = vc->vc_screenbuf_size;
 
-	err = resize_screen(vc, new_cols, new_rows);
+	err = resize_screen(vc, new_cols, new_rows, user);
 	if (err) {
 		kfree(newscreen);
 		return err;
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 7a61a2a9aaf..f69a8258095 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -847,14 +847,24 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 	case VT_RESIZE:
 	{
 		struct vt_sizes __user *vtsizes = up;
+		struct vc_data *vc;
+
 		ushort ll,cc;
 		if (!perm)
 			return -EPERM;
 		if (get_user(ll, &vtsizes->v_rows) ||
 		    get_user(cc, &vtsizes->v_cols))
 			return -EFAULT;
-		for (i = 0; i < MAX_NR_CONSOLES; i++)
-			vc_lock_resize(vc_cons[i].d, cc, ll);
+
+		for (i = 0; i < MAX_NR_CONSOLES; i++) {
+			vc = vc_cons[i].d;
+
+			if (vc) {
+				vc->vc_resize_user = 1;
+				vc_lock_resize(vc_cons[i].d, cc, ll);
+			}
+		}
+
 		return 0;
 	}
 
@@ -900,6 +910,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 				vc_cons[i].d->vc_scan_lines = vlin;
 			if (clin)
 				vc_cons[i].d->vc_font.height = clin;
+			vc_cons[i].d->vc_resize_user = 1;
 			vc_resize(vc_cons[i].d, cc, ll);
 			release_console_sem();
 		}
diff --git a/drivers/dca/Kconfig b/drivers/dca/Kconfig
new file mode 100644
index 00000000000..94f0364a0ef
--- /dev/null
+++ b/drivers/dca/Kconfig
@@ -0,0 +1,7 @@
+#
+# DCA server configuration
+#
+
+config DCA
+	tristate
+
diff --git a/drivers/dca/Makefile b/drivers/dca/Makefile
new file mode 100644
index 00000000000..b2db56bb9dd
--- /dev/null
+++ b/drivers/dca/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_DCA) += dca.o
+dca-objs := dca-core.o dca-sysfs.o
diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
new file mode 100644
index 00000000000..bf5b92f86df
--- /dev/null
+++ b/drivers/dca/dca-core.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright(c) 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports an interface for DCA clients and providers to meet.
+ */
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <linux/dca.h>
+
+MODULE_LICENSE("GPL");
+
+/* For now we're assuming a single, global, DCA provider for the system. */
+
+static DEFINE_SPINLOCK(dca_lock);
+
+static struct dca_provider *global_dca = NULL;
+
+/**
+ * dca_add_requester - add a dca client to the list
+ * @dev - the device that wants dca service
+ */
+int dca_add_requester(struct device *dev)
+{
+	int err, slot;
+
+	if (!global_dca)
+		return -ENODEV;
+
+	spin_lock(&dca_lock);
+	slot = global_dca->ops->add_requester(global_dca, dev);
+	spin_unlock(&dca_lock);
+	if (slot < 0)
+		return slot;
+
+	err = dca_sysfs_add_req(global_dca, dev, slot);
+	if (err) {
+		spin_lock(&dca_lock);
+		global_dca->ops->remove_requester(global_dca, dev);
+		spin_unlock(&dca_lock);
+		return err;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dca_add_requester);
+
+/**
+ * dca_remove_requester - remove a dca client from the list
+ * @dev - the device that wants dca service
+ */
+int dca_remove_requester(struct device *dev)
+{
+	int slot;
+	if (!global_dca)
+		return -ENODEV;
+
+	spin_lock(&dca_lock);
+	slot = global_dca->ops->remove_requester(global_dca, dev);
+	spin_unlock(&dca_lock);
+	if (slot < 0)
+		return slot;
+
+	dca_sysfs_remove_req(global_dca, slot);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dca_remove_requester);
+
+/**
+ * dca_get_tag - return the dca tag for the given cpu
+ * @cpu - the cpuid as returned by get_cpu()
+ */
+u8 dca_get_tag(int cpu)
+{
+	if (!global_dca)
+		return -ENODEV;
+	return global_dca->ops->get_tag(global_dca, cpu);
+}
+EXPORT_SYMBOL_GPL(dca_get_tag);
+
+/**
+ * alloc_dca_provider - get data struct for describing a dca provider
+ * @ops - pointer to struct of dca operation function pointers
+ * @priv_size - size of extra mem to be added for provider's needs
+ */
+struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size)
+{
+	struct dca_provider *dca;
+	int alloc_size;
+
+	alloc_size = (sizeof(*dca) + priv_size);
+	dca = kzalloc(alloc_size, GFP_KERNEL);
+	if (!dca)
+		return NULL;
+	dca->ops = ops;
+
+	return dca;
+}
+EXPORT_SYMBOL_GPL(alloc_dca_provider);
+
+/**
+ * free_dca_provider - release the dca provider data struct
+ * @ops - pointer to struct of dca operation function pointers
+ * @priv_size - size of extra mem to be added for provider's needs
+ */
+void free_dca_provider(struct dca_provider *dca)
+{
+	kfree(dca);
+}
+EXPORT_SYMBOL_GPL(free_dca_provider);
+
+static BLOCKING_NOTIFIER_HEAD(dca_provider_chain);
+
+/**
+ * register_dca_provider - register a dca provider
+ * @dca - struct created by alloc_dca_provider()
+ * @dev - device providing dca services
+ */
+int register_dca_provider(struct dca_provider *dca, struct device *dev)
+{
+	int err;
+
+	if (global_dca)
+		return -EEXIST;
+	err = dca_sysfs_add_provider(dca, dev);
+	if (err)
+		return err;
+	global_dca = dca;
+	blocking_notifier_call_chain(&dca_provider_chain,
+				     DCA_PROVIDER_ADD, NULL);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(register_dca_provider);
+
+/**
+ * unregister_dca_provider - remove a dca provider
+ * @dca - struct created by alloc_dca_provider()
+ */
+void unregister_dca_provider(struct dca_provider *dca)
+{
+	if (!global_dca)
+		return;
+	blocking_notifier_call_chain(&dca_provider_chain,
+				     DCA_PROVIDER_REMOVE, NULL);
+	global_dca = NULL;
+	dca_sysfs_remove_provider(dca);
+}
+EXPORT_SYMBOL_GPL(unregister_dca_provider);
+
+/**
+ * dca_register_notify - register a client's notifier callback
+ */
+void dca_register_notify(struct notifier_block *nb)
+{
+	blocking_notifier_chain_register(&dca_provider_chain, nb);
+}
+EXPORT_SYMBOL_GPL(dca_register_notify);
+
+/**
+ * dca_unregister_notify - remove a client's notifier callback
+ */
+void dca_unregister_notify(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&dca_provider_chain, nb);
+}
+EXPORT_SYMBOL_GPL(dca_unregister_notify);
+
+static int __init dca_init(void)
+{
+	return dca_sysfs_init();
+}
+
+static void __exit dca_exit(void)
+{
+	dca_sysfs_exit();
+}
+
+module_init(dca_init);
+module_exit(dca_exit);
+
diff --git a/drivers/dca/dca-sysfs.c b/drivers/dca/dca-sysfs.c
new file mode 100644
index 00000000000..24a263b6844
--- /dev/null
+++ b/drivers/dca/dca-sysfs.c
@@ -0,0 +1,88 @@
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/kdev_t.h>
+#include <linux/err.h>
+#include <linux/dca.h>
+
+static struct class *dca_class;
+static struct idr dca_idr;
+static spinlock_t dca_idr_lock;
+
+int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot)
+{
+	struct class_device *cd;
+
+	cd = class_device_create(dca_class, dca->cd, MKDEV(0, slot + 1),
+				 dev, "requester%d", slot);
+	if (IS_ERR(cd))
+		return PTR_ERR(cd);
+	return 0;
+}
+
+void dca_sysfs_remove_req(struct dca_provider *dca, int slot)
+{
+	class_device_destroy(dca_class, MKDEV(0, slot + 1));
+}
+
+int dca_sysfs_add_provider(struct dca_provider *dca, struct device *dev)
+{
+	struct class_device *cd;
+	int err = 0;
+
+idr_try_again:
+	if (!idr_pre_get(&dca_idr, GFP_KERNEL))
+		return -ENOMEM;
+	spin_lock(&dca_idr_lock);
+	err = idr_get_new(&dca_idr, dca, &dca->id);
+	spin_unlock(&dca_idr_lock);
+	switch (err) {
+	case 0:
+		break;
+	case -EAGAIN:
+		goto idr_try_again;
+	default:
+		return err;
+	}
+
+	cd = class_device_create(dca_class, NULL, MKDEV(0, 0),
+				 dev, "dca%d", dca->id);
+	if (IS_ERR(cd)) {
+		spin_lock(&dca_idr_lock);
+		idr_remove(&dca_idr, dca->id);
+		spin_unlock(&dca_idr_lock);
+		return PTR_ERR(cd);
+	}
+	dca->cd = cd;
+	return 0;
+}
+
+void dca_sysfs_remove_provider(struct dca_provider *dca)
+{
+	class_device_unregister(dca->cd);
+	dca->cd = NULL;
+	spin_lock(&dca_idr_lock);
+	idr_remove(&dca_idr, dca->id);
+	spin_unlock(&dca_idr_lock);
+}
+
+int __init dca_sysfs_init(void)
+{
+	idr_init(&dca_idr);
+	spin_lock_init(&dca_idr_lock);
+
+	dca_class = class_create(THIS_MODULE, "dca");
+	if (IS_ERR(dca_class)) {
+		idr_destroy(&dca_idr);
+		return PTR_ERR(dca_class);
+	}
+	return 0;
+}
+
+void __exit dca_sysfs_exit(void)
+{
+	class_destroy(dca_class);
+	idr_destroy(&dca_idr);
+}
+
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 8f670dae53b..9c91b0fd134 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -2,42 +2,52 @@
 # DMA engine configuration
 #
 
-menu "DMA Engine support"
-	depends on HAS_DMA
+menuconfig DMADEVICES
+	bool "DMA Offload Engine support"
+	depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
+	help
+	  Intel(R) offload engines enable offloading memory copies in the
+	  network stack and RAID operations in the MD driver.
+
+if DMADEVICES
+
+comment "DMA Devices"
+
+config INTEL_IOATDMA
+	tristate "Intel I/OAT DMA support"
+	depends on PCI && X86
+	select DMA_ENGINE
+	select DCA
+	help
+	  Enable support for the Intel(R) I/OAT DMA engine present
+	  in recent Intel Xeon chipsets.
+
+	  Say Y here if you have such a chipset.
+
+	  If unsure, say N.
+
+config INTEL_IOP_ADMA
+	tristate "Intel IOP ADMA support"
+	depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
+	select ASYNC_CORE
+	select DMA_ENGINE
+	help
+	  Enable support for the Intel(R) IOP Series RAID engines.
 
 config DMA_ENGINE
-	bool "Support for DMA engines"
-	---help---
-          DMA engines offload bulk memory operations from the CPU to dedicated
-          hardware, allowing the operations to happen asynchronously.
+	bool
 
 comment "DMA Clients"
+	depends on DMA_ENGINE
 
 config NET_DMA
 	bool "Network: TCP receive copy offload"
 	depends on DMA_ENGINE && NET
 	default y
-	---help---
+	help
 	  This enables the use of DMA engines in the network stack to
 	  offload receive copy-to-user operations, freeing CPU cycles.
 	  Since this is the main user of the DMA engine, it should be enabled;
 	  say Y here.
 
-comment "DMA Devices"
-
-config INTEL_IOATDMA
-	tristate "Intel I/OAT DMA support"
-	depends on DMA_ENGINE && PCI
-	default m
-	---help---
-	  Enable support for the Intel(R) I/OAT DMA engine.
-
-config INTEL_IOP_ADMA
-        tristate "Intel IOP ADMA support"
-        depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX)
-	select ASYNC_CORE
-        default m
-        ---help---
-          Enable support for the Intel(R) IOP Series RAID engines.
-
-endmenu
+endif
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index b3839b687ae..b152cd84e12 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_NET_DMA) += iovlock.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
new file mode 100644
index 00000000000..f7276bf2fe7
--- /dev/null
+++ b/drivers/dma/ioat.c
@@ -0,0 +1,211 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include "ioatdma.h"
+#include "ioatdma_registers.h"
+#include "ioatdma_hw.h"
+
+MODULE_VERSION("1.24");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+	{ 0, }
+};
+
+struct ioat_device {
+	struct pci_dev		*pdev;
+	void __iomem		*iobase;
+	struct ioatdma_device	*dma;
+	struct dca_provider	*dca;
+};
+
+static int __devinit ioat_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id);
+#ifdef IOAT_DMA_REMOVE
+static void __devexit ioat_remove(struct pci_dev *pdev);
+#endif
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct ioat_device *device = pci_get_drvdata(pdev);
+	u8 version;
+	int err = 0;
+
+	version = readb(iobase + IOAT_VER_OFFSET);
+	switch (version) {
+	case IOAT_VER_1_2:
+		device->dma = ioat_dma_probe(pdev, iobase);
+		if (ioat_dca_enabled)
+			device->dca = ioat_dca_init(pdev, iobase);
+		break;
+	default:
+		err = -ENODEV;
+		break;
+	}
+	return err;
+}
+
+static void ioat_shutdown_functionality(struct pci_dev *pdev)
+{
+	struct ioat_device *device = pci_get_drvdata(pdev);
+
+	if (device->dma) {
+		ioat_dma_remove(device->dma);
+		device->dma = NULL;
+	}
+
+	if (device->dca) {
+		unregister_dca_provider(device->dca);
+		free_dca_provider(device->dca);
+		device->dca = NULL;
+	}
+
+}
+
+static struct pci_driver ioat_pci_drv = {
+	.name		= "ioatdma",
+	.id_table	= ioat_pci_tbl,
+	.probe		= ioat_probe,
+	.shutdown	= ioat_shutdown_functionality,
+#ifdef IOAT_DMA_REMOVE
+	.remove		= __devexit_p(ioat_remove),
+#endif
+};
+
+static int __devinit ioat_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	void __iomem *iobase;
+	struct ioat_device *device;
+	unsigned long mmio_start, mmio_len;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		goto err_enable_device;
+
+	err = pci_request_regions(pdev, ioat_pci_drv.name);
+	if (err)
+		goto err_request_regions;
+
+	err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+	if (err)
+		err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	if (err)
+		goto err_set_dma_mask;
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+	if (err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+	if (err)
+		goto err_set_dma_mask;
+
+	mmio_start = pci_resource_start(pdev, 0);
+	mmio_len = pci_resource_len(pdev, 0);
+	iobase = ioremap(mmio_start, mmio_len);
+	if (!iobase) {
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device) {
+		err = -ENOMEM;
+		goto err_kzalloc;
+	}
+	device->pdev = pdev;
+	pci_set_drvdata(pdev, device);
+	device->iobase = iobase;
+
+	pci_set_master(pdev);
+
+	err = ioat_setup_functionality(pdev, iobase);
+	if (err)
+		goto err_version;
+
+	return 0;
+
+err_version:
+	kfree(device);
+err_kzalloc:
+	iounmap(iobase);
+err_ioremap:
+err_set_dma_mask:
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+err_request_regions:
+err_enable_device:
+	return err;
+}
+
+#ifdef IOAT_DMA_REMOVE
+/*
+ * It is unsafe to remove this module: if removed while a requested
+ * dma is outstanding, esp. from tcp, it is possible to hang while
+ * waiting for something that will never finish, thus hanging at
+ * least one cpu.  However, if you're feeling lucky and need to do
+ * some testing, this usually works just fine.
+ */
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+	struct ioat_device *device = pci_get_drvdata(pdev);
+
+	ioat_shutdown_functionality(pdev);
+
+	kfree(device);
+
+	iounmap(device->iobase);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+#endif
+
+static int __init ioat_init_module(void)
+{
+	return pci_register_driver(&ioat_pci_drv);
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+	pci_unregister_driver(&ioat_pci_drv);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c
new file mode 100644
index 00000000000..2ae04c30ede
--- /dev/null
+++ b/drivers/dma/ioat_dca.c
@@ -0,0 +1,263 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "ioatdma.h"
+#include "ioatdma_registers.h"
+
+/*
+ * Bit 16 of a tag map entry is the "valid" bit, if it is set then bits 0:15
+ * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device.  Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x)		(DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN	8
+
+static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
+static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+	return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(void)
+{
+	/* CPUID level 9 returns DCA configuration */
+	/* Bit 0 indicates DCA enabled by the BIOS */
+	unsigned long cpuid_level_9;
+	int res;
+
+	cpuid_level_9 = cpuid_eax(9);
+	res = test_bit(0, &cpuid_level_9);
+	if (!res)
+		printk(KERN_ERR "ioat dma: DCA is disabled in BIOS\n");
+
+	return res;
+}
+
+static int system_has_dca_enabled(void)
+{
+	if (boot_cpu_has(X86_FEATURE_DCA))
+		return dca_enabled_in_bios();
+
+	printk(KERN_ERR "ioat dma: boot cpu doesn't have X86_FEATURE_DCA\n");
+	return 0;
+}
+
+struct ioat_dca_slot {
+	struct pci_dev *pdev;	/* requester device */
+	u16 rid;		/* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+
+struct ioat_dca_priv {
+	void __iomem		*iobase;
+	void			*dca_base;
+	int			 max_requesters;
+	int			 requester_count;
+	u8			 tag_map[IOAT_TAG_MAP_LEN];
+	struct ioat_dca_slot 	 req_slots[0];
+};
+
+/* 5000 series chipset DCA Port Requester ID Table Entry Format
+ * [15:8]	PCI-Express Bus Number
+ * [7:3]	PCI-Express Device Number
+ * [2:0]	PCI-Express Function Number
+ *
+ * 5000 series chipset DCA control register format
+ * [7:1]	Reserved (0)
+ * [0]		Ignore Function Number
+ */
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			writew(id, ioatdca->dca_base + (i * 4));
+			/* make sure the ignore function bit is off */
+			writeb(0, ioatdca->dca_base + (i * 4) + 2);
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+				     struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			writew(0, ioatdca->dca_base + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	int i, apic_id, bit, value;
+	u8 entry, tag;
+
+	tag = 0;
+	apic_id = cpu_physical_id(cpu);
+
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+		entry = ioatdca->tag_map[i];
+		if (entry & DCA_TAG_MAP_VALID) {
+			bit = entry & ~DCA_TAG_MAP_VALID;
+			value = (apic_id & (1 << bit)) ? 1 : 0;
+		} else {
+			value = entry ? 1 : 0;
+		}
+		tag |= (value << i);
+	}
+	return tag;
+}
+
+static struct dca_ops ioat_dca_ops = {
+	.add_requester		= ioat_dca_add_requester,
+	.remove_requester	= ioat_dca_remove_requester,
+	.get_tag		= ioat_dca_get_tag,
+};
+
+
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	u8 *tag_map = NULL;
+	int i;
+	int err;
+
+	if (!system_has_dca_enabled())
+		return NULL;
+
+	/* I/OAT v1 systems must have a known tag_map to support DCA */
+	switch (pdev->vendor) {
+	case PCI_VENDOR_ID_INTEL:
+		switch (pdev->device) {
+		case PCI_DEVICE_ID_INTEL_IOAT:
+			tag_map = ioat_tag_map_BNB;
+			break;
+		case PCI_DEVICE_ID_INTEL_IOAT_CNB:
+			tag_map = ioat_tag_map_CNB;
+			break;
+		case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
+			tag_map = ioat_tag_map_SCNB;
+			break;
+		}
+		break;
+	case PCI_VENDOR_ID_UNISYS:
+		switch (pdev->device) {
+		case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
+			tag_map = ioat_tag_map_UNISYS;
+			break;
+		}
+		break;
+	}
+	if (tag_map == NULL)
+		return NULL;
+
+	dca = alloc_dca_provider(&ioat_dca_ops,
+			sizeof(*ioatdca) +
+			(sizeof(struct ioat_dca_slot) * IOAT_DCA_MAX_REQ));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->max_requesters = IOAT_DCA_MAX_REQ;
+
+	ioatdca->dca_base = iobase + 0x54;
+
+	/* copy over the APIC ID to DCA tag mapping */
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
+		ioatdca->tag_map[i] = tag_map[i];
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}
+
diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioat_dma.c
index 41b18c5a314..66c5bb53211 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioat_dma.c
@@ -1,10 +1,10 @@
 /*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2007 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
  *
  * This program is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
@@ -12,11 +12,12 @@
  * more details.
  *
  * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
  *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
  */
 
 /*
@@ -35,17 +36,77 @@
 #include "ioatdma_registers.h"
 #include "ioatdma_hw.h"
 
+#define INITIAL_IOAT_DESC_COUNT 128
+
 #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
-#define to_ioat_device(dev) container_of(dev, struct ioat_device, common)
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
 #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
 #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
 
 /* internal functions */
-static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
-static void ioat_shutdown(struct pci_dev *pdev);
-static void __devexit ioat_remove(struct pci_dev *pdev);
+static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
+static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
 
-static int enumerate_dma_channels(struct ioat_device *device)
+static struct ioat_dma_chan *ioat_lookup_chan_by_index(struct ioatdma_device *device,
+						       int index)
+{
+	return device->idx[index];
+}
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+	struct ioatdma_device *instance = data;
+	struct ioat_dma_chan *ioat_chan;
+	unsigned long attnstatus;
+	int bit;
+	u8 intrctrl;
+
+	intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+	if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+		return IRQ_NONE;
+
+	if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+		writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+		return IRQ_NONE;
+	}
+
+	attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+	for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+		ioat_chan = ioat_lookup_chan_by_index(instance, bit);
+		tasklet_schedule(&ioat_chan->cleanup_task);
+	}
+
+	writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+	return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+	struct ioat_dma_chan *ioat_chan = data;
+
+	tasklet_schedule(&ioat_chan->cleanup_task);
+
+	return IRQ_HANDLED;
+}
+
+static void ioat_dma_cleanup_tasklet(unsigned long data);
+
+/**
+ * ioat_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
 {
 	u8 xfercap_scale;
 	u32 xfercap;
@@ -73,13 +134,19 @@ static int enumerate_dma_channels(struct ioat_device *device)
 		/* This should be made common somewhere in dmaengine.c */
 		ioat_chan->common.device = &device->common;
 		list_add_tail(&ioat_chan->common.device_node,
-		              &device->common.channels);
+			      &device->common.channels);
+		device->idx[i] = ioat_chan;
+		tasklet_init(&ioat_chan->cleanup_task,
+			     ioat_dma_cleanup_tasklet,
+			     (unsigned long) ioat_chan);
+		tasklet_disable(&ioat_chan->cleanup_task);
 	}
 	return device->common.chancnt;
 }
 
-static void
-ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
+static void ioat_set_src(dma_addr_t addr,
+			 struct dma_async_tx_descriptor *tx,
+			 int index)
 {
 	struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
@@ -93,8 +160,9 @@ ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
 
 }
 
-static void
-ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
+static void ioat_set_dest(dma_addr_t addr,
+			  struct dma_async_tx_descriptor *tx,
+			  int index)
 {
 	struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
@@ -107,8 +175,7 @@ ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
 	}
 }
 
-static dma_cookie_t
-ioat_tx_submit(struct dma_async_tx_descriptor *tx)
+static dma_cookie_t ioat_tx_submit(struct dma_async_tx_descriptor *tx)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
 	struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
@@ -141,27 +208,27 @@ ioat_tx_submit(struct dma_async_tx_descriptor *tx)
 	if (append)
 		writeb(IOAT_CHANCMD_APPEND,
 			ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
-	
+
 	return cookie;
 }
 
 static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
-	struct ioat_dma_chan *ioat_chan,
-	gfp_t flags)
+					struct ioat_dma_chan *ioat_chan,
+					gfp_t flags)
 {
 	struct ioat_dma_descriptor *desc;
 	struct ioat_desc_sw *desc_sw;
-	struct ioat_device *ioat_device;
+	struct ioatdma_device *ioatdma_device;
 	dma_addr_t phys;
 
-	ioat_device = to_ioat_device(ioat_chan->common.device);
-	desc = pci_pool_alloc(ioat_device->dma_pool, flags, &phys);
+	ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
+	desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
 	if (unlikely(!desc))
 		return NULL;
 
 	desc_sw = kzalloc(sizeof(*desc_sw), flags);
 	if (unlikely(!desc_sw)) {
-		pci_pool_free(ioat_device->dma_pool, desc, phys);
+		pci_pool_free(ioatdma_device->dma_pool, desc, phys);
 		return NULL;
 	}
 
@@ -177,10 +244,6 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
 	return desc_sw;
 }
 
-#define INITIAL_IOAT_DESC_COUNT 128
-
-static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan);
-
 /* returns the actual number of allocated descriptors */
 static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
 {
@@ -195,15 +258,16 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
 	if (!list_empty(&ioat_chan->free_desc))
 		return INITIAL_IOAT_DESC_COUNT;
 
-        /* Setup register to interrupt and write completion status on error */
+	/* Setup register to interrupt and write completion status on error */
 	chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
 		IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
 		IOAT_CHANCTRL_ERR_COMPLETION_EN;
-        writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+	writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
 
 	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 	if (chanerr) {
-		printk("IOAT: CHANERR = %x, clearing\n", chanerr);
+		dev_err(&ioat_chan->device->pdev->dev,
+			"ioatdma: CHANERR = %x, clearing\n", chanerr);
 		writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 	}
 
@@ -211,7 +275,8 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
 	for (i = 0; i < INITIAL_IOAT_DESC_COUNT; i++) {
 		desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
 		if (!desc) {
-			printk(KERN_ERR "IOAT: Only %d initial descriptors\n", i);
+			dev_err(&ioat_chan->device->pdev->dev,
+				"ioatdma: Only %d initial descriptors\n", i);
 			break;
 		}
 		list_add_tail(&desc->node, &tmp_list);
@@ -224,8 +289,8 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
 	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
 	ioat_chan->completion_virt =
 		pci_pool_alloc(ioat_chan->device->completion_pool,
-		               GFP_KERNEL,
-		               &ioat_chan->completion_addr);
+			       GFP_KERNEL,
+			       &ioat_chan->completion_addr);
 	memset(ioat_chan->completion_virt, 0,
 	       sizeof(*ioat_chan->completion_virt));
 	writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
@@ -233,54 +298,88 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
 	writel(((u64) ioat_chan->completion_addr) >> 32,
 	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
 
-	ioat_start_null_desc(ioat_chan);
+	tasklet_enable(&ioat_chan->cleanup_task);
+	ioat_dma_start_null_desc(ioat_chan);
 	return i;
 }
 
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
-
 static void ioat_dma_free_chan_resources(struct dma_chan *chan)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-	struct ioat_device *ioat_device = to_ioat_device(chan->device);
+	struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
 	struct ioat_desc_sw *desc, *_desc;
-	u16 chanctrl;
 	int in_use_descs = 0;
 
+	tasklet_disable(&ioat_chan->cleanup_task);
 	ioat_dma_memcpy_cleanup(ioat_chan);
 
+	/* Delay 100ms after reset to allow internal DMA logic to quiesce
+	 * before removing DMA descriptor resources.
+	 */
 	writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
+	mdelay(100);
 
 	spin_lock_bh(&ioat_chan->desc_lock);
 	list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
 		in_use_descs++;
 		list_del(&desc->node);
-		pci_pool_free(ioat_device->dma_pool, desc->hw,
+		pci_pool_free(ioatdma_device->dma_pool, desc->hw,
 			      desc->async_tx.phys);
 		kfree(desc);
 	}
 	list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) {
 		list_del(&desc->node);
-		pci_pool_free(ioat_device->dma_pool, desc->hw,
+		pci_pool_free(ioatdma_device->dma_pool, desc->hw,
 			      desc->async_tx.phys);
 		kfree(desc);
 	}
 	spin_unlock_bh(&ioat_chan->desc_lock);
 
-	pci_pool_free(ioat_device->completion_pool,
-	              ioat_chan->completion_virt,
-	              ioat_chan->completion_addr);
+	pci_pool_free(ioatdma_device->completion_pool,
+		      ioat_chan->completion_virt,
+		      ioat_chan->completion_addr);
 
 	/* one is ok since we left it on there on purpose */
 	if (in_use_descs > 1)
-		printk(KERN_ERR "IOAT: Freeing %d in use descriptors!\n",
+		dev_err(&ioat_chan->device->pdev->dev,
+			"ioatdma: Freeing %d in use descriptors!\n",
 			in_use_descs - 1);
 
 	ioat_chan->last_completion = ioat_chan->completion_addr = 0;
+	ioat_chan->pending = 0;
+}
+/**
+ * ioat_dma_get_next_descriptor - return the next available descriptor
+ * @ioat_chan: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held.  Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
+						struct ioat_dma_chan *ioat_chan)
+{
+	struct ioat_desc_sw *new = NULL;
+
+	if (!list_empty(&ioat_chan->free_desc)) {
+		new = to_ioat_desc(ioat_chan->free_desc.next);
+		list_del(&new->node);
+	} else {
+		/* try to get another desc */
+		new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
+		/* will this ever happen? */
+		/* TODO add upper limit on these */
+		BUG_ON(!new);
+	}
+
+	prefetch(new->hw);
+	return new;
 }
 
-static struct dma_async_tx_descriptor *
-ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
+static struct dma_async_tx_descriptor *ioat_dma_prep_memcpy(
+						struct dma_chan *chan,
+						size_t len,
+						int int_en)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
 	struct ioat_desc_sw *first, *prev, *new;
@@ -299,17 +398,7 @@ ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
 
 	spin_lock_bh(&ioat_chan->desc_lock);
 	while (len) {
-		if (!list_empty(&ioat_chan->free_desc)) {
-			new = to_ioat_desc(ioat_chan->free_desc.next);
-			list_del(&new->node);
-		} else {
-			/* try to get another desc */
-			new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
-			/* will this ever happen? */
-			/* TODO add upper limit on these */
-			BUG_ON(!new);
-		}
-
+		new = ioat_dma_get_next_descriptor(ioat_chan);
 		copy = min((u32) len, ioat_chan->xfercap);
 
 		new->hw->size = copy;
@@ -343,12 +432,11 @@ ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
 	return new ? &new->async_tx : NULL;
 }
 
-
 /**
- * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ *                                 descriptors to hw
  * @chan: DMA channel handle
  */
-
 static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
@@ -360,15 +448,23 @@ static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan)
 	}
 }
 
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
+static void ioat_dma_cleanup_tasklet(unsigned long data)
+{
+	struct ioat_dma_chan *chan = (void *)data;
+	ioat_dma_memcpy_cleanup(chan);
+	writew(IOAT_CHANCTRL_INT_DISABLE,
+	       chan->reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
 {
 	unsigned long phys_complete;
 	struct ioat_desc_sw *desc, *_desc;
 	dma_cookie_t cookie = 0;
 
-	prefetch(chan->completion_virt);
+	prefetch(ioat_chan->completion_virt);
 
-	if (!spin_trylock(&chan->cleanup_lock))
+	if (!spin_trylock(&ioat_chan->cleanup_lock))
 		return;
 
 	/* The completion writeback can happen at any time,
@@ -378,26 +474,28 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
 
 #if (BITS_PER_LONG == 64)
 	phys_complete =
-	chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+	ioat_chan->completion_virt->full & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
 #else
-	phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
+	phys_complete = ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
 #endif
 
-	if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
-		IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
-		printk("IOAT: Channel halted, chanerr = %x\n",
-			readl(chan->reg_base + IOAT_CHANERR_OFFSET));
+	if ((ioat_chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
+				IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
+		dev_err(&ioat_chan->device->pdev->dev,
+			"ioatdma: Channel halted, chanerr = %x\n",
+			readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
 
 		/* TODO do something to salvage the situation */
 	}
 
-	if (phys_complete == chan->last_completion) {
-		spin_unlock(&chan->cleanup_lock);
+	if (phys_complete == ioat_chan->last_completion) {
+		spin_unlock(&ioat_chan->cleanup_lock);
 		return;
 	}
 
-	spin_lock_bh(&chan->desc_lock);
-	list_for_each_entry_safe(desc, _desc, &chan->used_desc, node) {
+	cookie = 0;
+	spin_lock_bh(&ioat_chan->desc_lock);
+	list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
 
 		/*
 		 * Incoming DMA requests may use multiple descriptors, due to
@@ -407,31 +505,36 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
 		if (desc->async_tx.cookie) {
 			cookie = desc->async_tx.cookie;
 
-			/* yes we are unmapping both _page and _single alloc'd
-			   regions with unmap_page. Is this *really* that bad?
-			*/
-			pci_unmap_page(chan->device->pdev,
+			/*
+			 * yes we are unmapping both _page and _single alloc'd
+			 * regions with unmap_page. Is this *really* that bad?
+			 */
+			pci_unmap_page(ioat_chan->device->pdev,
 					pci_unmap_addr(desc, dst),
 					pci_unmap_len(desc, len),
 					PCI_DMA_FROMDEVICE);
-			pci_unmap_page(chan->device->pdev,
+			pci_unmap_page(ioat_chan->device->pdev,
 					pci_unmap_addr(desc, src),
 					pci_unmap_len(desc, len),
 					PCI_DMA_TODEVICE);
 		}
 
 		if (desc->async_tx.phys != phys_complete) {
-			/* a completed entry, but not the last, so cleanup
+			/*
+			 * a completed entry, but not the last, so cleanup
 			 * if the client is done with the descriptor
 			 */
 			if (desc->async_tx.ack) {
 				list_del(&desc->node);
-				list_add_tail(&desc->node, &chan->free_desc);
+				list_add_tail(&desc->node,
+					      &ioat_chan->free_desc);
 			} else
 				desc->async_tx.cookie = 0;
 		} else {
-			/* last used desc. Do not remove, so we can append from
-			   it, but don't look at it next time, either */
+			/*
+			 * last used desc. Do not remove, so we can append from
+			 * it, but don't look at it next time, either
+			 */
 			desc->async_tx.cookie = 0;
 
 			/* TODO check status bits? */
@@ -439,13 +542,13 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
 		}
 	}
 
-	spin_unlock_bh(&chan->desc_lock);
+	spin_unlock_bh(&ioat_chan->desc_lock);
 
-	chan->last_completion = phys_complete;
+	ioat_chan->last_completion = phys_complete;
 	if (cookie != 0)
-		chan->completed_cookie = cookie;
+		ioat_chan->completed_cookie = cookie;
 
-	spin_unlock(&chan->cleanup_lock);
+	spin_unlock(&ioat_chan->cleanup_lock);
 }
 
 static void ioat_dma_dependency_added(struct dma_chan *chan)
@@ -466,11 +569,10 @@ static void ioat_dma_dependency_added(struct dma_chan *chan)
  * @done: if not %NULL, updated with last completed transaction
  * @used: if not %NULL, updated with last used transaction
  */
-
 static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
-                                            dma_cookie_t cookie,
-                                            dma_cookie_t *done,
-                                            dma_cookie_t *used)
+					    dma_cookie_t cookie,
+					    dma_cookie_t *done,
+					    dma_cookie_t *used)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
 	dma_cookie_t last_used;
@@ -481,7 +583,7 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
 	last_complete = ioat_chan->completed_cookie;
 
 	if (done)
-		*done= last_complete;
+		*done = last_complete;
 	if (used)
 		*used = last_used;
 
@@ -495,7 +597,7 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
 	last_complete = ioat_chan->completed_cookie;
 
 	if (done)
-		*done= last_complete;
+		*done = last_complete;
 	if (used)
 		*used = last_used;
 
@@ -504,63 +606,13 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
 
 /* PCI API */
 
-static struct pci_device_id ioat_pci_tbl[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_UNISYS,
-		     PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
-	{ 0, }
-};
-
-static struct pci_driver ioat_pci_driver = {
-	.name 	= "ioatdma",
-	.id_table = ioat_pci_tbl,
-	.probe	= ioat_probe,
-	.shutdown = ioat_shutdown,
-	.remove	= __devexit_p(ioat_remove),
-};
-
-static irqreturn_t ioat_do_interrupt(int irq, void *data)
-{
-	struct ioat_device *instance = data;
-	unsigned long attnstatus;
-	u8 intrctrl;
-
-	intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
-
-	if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
-		return IRQ_NONE;
-
-	if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
-		writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
-		return IRQ_NONE;
-	}
-
-	attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
-
-	printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus);
-
-	writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
-	return IRQ_HANDLED;
-}
-
-static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan)
+static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
 {
 	struct ioat_desc_sw *desc;
 
 	spin_lock_bh(&ioat_chan->desc_lock);
 
-	if (!list_empty(&ioat_chan->free_desc)) {
-		desc = to_ioat_desc(ioat_chan->free_desc.next);
-		list_del(&desc->node);
-	} else {
-		/* try to get another desc */
-		spin_unlock_bh(&ioat_chan->desc_lock);
-		desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
-		spin_lock_bh(&ioat_chan->desc_lock);
-		/* will this ever happen? */
-		BUG_ON(!desc);
-	}
-
+	desc = ioat_dma_get_next_descriptor(ioat_chan);
 	desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
 	desc->hw->next = 0;
 	desc->async_tx.ack = 1;
@@ -581,7 +633,11 @@ static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan)
  */
 #define IOAT_TEST_SIZE 2000
 
-static int ioat_self_test(struct ioat_device *device)
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+static int ioat_dma_self_test(struct ioatdma_device *device)
 {
 	int i;
 	u8 *src;
@@ -607,9 +663,11 @@ static int ioat_self_test(struct ioat_device *device)
 
 	/* Start copy, using first DMA channel */
 	dma_chan = container_of(device->common.channels.next,
-	                        struct dma_chan,
-	                        device_node);
+				struct dma_chan,
+				device_node);
 	if (ioat_dma_alloc_chan_resources(dma_chan) < 1) {
+		dev_err(&device->pdev->dev,
+			"selftest cannot allocate chan resource\n");
 		err = -ENODEV;
 		goto out;
 	}
@@ -627,12 +685,14 @@ static int ioat_self_test(struct ioat_device *device)
 	msleep(1);
 
 	if (ioat_dma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
-		printk(KERN_ERR "ioatdma: Self-test copy timed out, disabling\n");
+		dev_err(&device->pdev->dev,
+			"ioatdma: Self-test copy timed out, disabling\n");
 		err = -ENODEV;
 		goto free_resources;
 	}
 	if (memcmp(src, dest, IOAT_TEST_SIZE)) {
-		printk(KERN_ERR "ioatdma: Self-test copy failed compare, disabling\n");
+		dev_err(&device->pdev->dev,
+			"ioatdma: Self-test copy failed compare, disabling\n");
 		err = -ENODEV;
 		goto free_resources;
 	}
@@ -645,147 +705,252 @@ out:
 	return err;
 }
 
-static int __devinit ioat_probe(struct pci_dev *pdev,
-                                const struct pci_device_id *ent)
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+		    sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+		 "set ioat interrupt style: msix (default), "
+		 "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
 {
-	int err;
-	unsigned long mmio_start, mmio_len;
-	void __iomem *reg_base;
-	struct ioat_device *device;
+	struct ioat_dma_chan *ioat_chan;
+	int err, i, j, msixcnt;
+	u8 intrctrl = 0;
+
+	if (!strcmp(ioat_interrupt_style, "msix"))
+		goto msix;
+	if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+		goto msix_single_vector;
+	if (!strcmp(ioat_interrupt_style, "msi"))
+		goto msi;
+	if (!strcmp(ioat_interrupt_style, "intx"))
+		goto intx;
+
+msix:
+	/* The number of MSI-X vectors should equal the number of channels */
+	msixcnt = device->common.chancnt;
+	for (i = 0; i < msixcnt; i++)
+		device->msix_entries[i].entry = i;
+
+	err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
+	if (err < 0)
+		goto msi;
+	if (err > 0)
+		goto msix_single_vector;
+
+	for (i = 0; i < msixcnt; i++) {
+		ioat_chan = ioat_lookup_chan_by_index(device, i);
+		err = request_irq(device->msix_entries[i].vector,
+				  ioat_dma_do_interrupt_msix,
+				  0, "ioat-msix", ioat_chan);
+		if (err) {
+			for (j = 0; j < i; j++) {
+				ioat_chan =
+					ioat_lookup_chan_by_index(device, j);
+				free_irq(device->msix_entries[j].vector,
+					 ioat_chan);
+			}
+			goto msix_single_vector;
+		}
+	}
+	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+	device->irq_mode = msix_multi_vector;
+	goto done;
 
-	err = pci_enable_device(pdev);
+msix_single_vector:
+	device->msix_entries[0].entry = 0;
+	err = pci_enable_msix(device->pdev, device->msix_entries, 1);
 	if (err)
-		goto err_enable_device;
+		goto msi;
 
-	err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
-	if (err)
-		err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
+			  0, "ioat-msix", device);
+	if (err) {
+		pci_disable_msix(device->pdev);
+		goto msi;
+	}
+	device->irq_mode = msix_single_vector;
+	goto done;
+
+msi:
+	err = pci_enable_msi(device->pdev);
 	if (err)
-		goto err_set_dma_mask;
+		goto intx;
 
-	err = pci_request_regions(pdev, ioat_pci_driver.name);
+	err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
+			  0, "ioat-msi", device);
+	if (err) {
+		pci_disable_msi(device->pdev);
+		goto intx;
+	}
+	/*
+	 * CB 1.2 devices need a bit set in configuration space to enable MSI
+	 */
+	if (device->version == IOAT_VER_1_2) {
+		u32 dmactrl;
+		pci_read_config_dword(device->pdev,
+				      IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+		dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+		pci_write_config_dword(device->pdev,
+				       IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+	}
+	device->irq_mode = msi;
+	goto done;
+
+intx:
+	err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
+			  IRQF_SHARED, "ioat-intx", device);
 	if (err)
-		goto err_request_regions;
+		goto err_no_irq;
+	device->irq_mode = intx;
 
-	mmio_start = pci_resource_start(pdev, 0);
-	mmio_len = pci_resource_len(pdev, 0);
+done:
+	intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+	writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+	return 0;
 
-	reg_base = ioremap(mmio_start, mmio_len);
-	if (!reg_base) {
-		err = -ENOMEM;
-		goto err_ioremap;
+err_no_irq:
+	/* Disable all interrupt generation */
+	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+	dev_err(&device->pdev->dev, "no usable interrupts\n");
+	device->irq_mode = none;
+	return -1;
+}
+
+/**
+ * ioat_dma_remove_interrupts - remove whatever interrupts were set
+ * @device: ioat device
+ */
+static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
+{
+	struct ioat_dma_chan *ioat_chan;
+	int i;
+
+	/* Disable all interrupt generation */
+	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+
+	switch (device->irq_mode) {
+	case msix_multi_vector:
+		for (i = 0; i < device->common.chancnt; i++) {
+			ioat_chan = ioat_lookup_chan_by_index(device, i);
+			free_irq(device->msix_entries[i].vector, ioat_chan);
+		}
+		pci_disable_msix(device->pdev);
+		break;
+	case msix_single_vector:
+		free_irq(device->msix_entries[0].vector, device);
+		pci_disable_msix(device->pdev);
+		break;
+	case msi:
+		free_irq(device->pdev->irq, device);
+		pci_disable_msi(device->pdev);
+		break;
+	case intx:
+		free_irq(device->pdev->irq, device);
+		break;
+	case none:
+		dev_warn(&device->pdev->dev,
+			 "call to %s without interrupts setup\n", __func__);
 	}
+	device->irq_mode = none;
+}
+
+struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
+				      void __iomem *iobase)
+{
+	int err;
+	struct ioatdma_device *device;
 
 	device = kzalloc(sizeof(*device), GFP_KERNEL);
 	if (!device) {
 		err = -ENOMEM;
 		goto err_kzalloc;
 	}
+	device->pdev = pdev;
+	device->reg_base = iobase;
+	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
 
 	/* DMA coherent memory pool for DMA descriptor allocations */
 	device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
-		sizeof(struct ioat_dma_descriptor), 64, 0);
+					   sizeof(struct ioat_dma_descriptor),
+					   64, 0);
 	if (!device->dma_pool) {
 		err = -ENOMEM;
 		goto err_dma_pool;
 	}
 
-	device->completion_pool = pci_pool_create("completion_pool", pdev, sizeof(u64), SMP_CACHE_BYTES, SMP_CACHE_BYTES);
+	device->completion_pool = pci_pool_create("completion_pool", pdev,
+						  sizeof(u64), SMP_CACHE_BYTES,
+						  SMP_CACHE_BYTES);
 	if (!device->completion_pool) {
 		err = -ENOMEM;
 		goto err_completion_pool;
 	}
 
-	device->pdev = pdev;
-	pci_set_drvdata(pdev, device);
-#ifdef CONFIG_PCI_MSI
-	if (pci_enable_msi(pdev) == 0) {
-		device->msi = 1;
-	} else {
-		device->msi = 0;
-	}
-#endif
-	err = request_irq(pdev->irq, &ioat_do_interrupt, IRQF_SHARED, "ioat",
-		device);
-	if (err)
-		goto err_irq;
-
-	device->reg_base = reg_base;
-
-	writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET);
-	pci_set_master(pdev);
-
 	INIT_LIST_HEAD(&device->common.channels);
-	enumerate_dma_channels(device);
+	ioat_dma_enumerate_channels(device);
 
 	dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
-	device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources;
-	device->common.device_free_chan_resources = ioat_dma_free_chan_resources;
+	device->common.device_alloc_chan_resources =
+						ioat_dma_alloc_chan_resources;
+	device->common.device_free_chan_resources =
+						ioat_dma_free_chan_resources;
 	device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy;
 	device->common.device_is_tx_complete = ioat_dma_is_complete;
 	device->common.device_issue_pending = ioat_dma_memcpy_issue_pending;
 	device->common.device_dependency_added = ioat_dma_dependency_added;
 	device->common.dev = &pdev->dev;
-	printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n",
-		device->common.chancnt);
+	dev_err(&device->pdev->dev,
+		"ioatdma: Intel(R) I/OAT DMA Engine found,"
+		" %d channels, device version 0x%02x\n",
+		device->common.chancnt, device->version);
 
-	err = ioat_self_test(device);
+	err = ioat_dma_setup_interrupts(device);
+	if (err)
+		goto err_setup_interrupts;
+
+	err = ioat_dma_self_test(device);
 	if (err)
 		goto err_self_test;
 
 	dma_async_device_register(&device->common);
 
-	return 0;
+	return device;
 
 err_self_test:
-err_irq:
+	ioat_dma_remove_interrupts(device);
+err_setup_interrupts:
 	pci_pool_destroy(device->completion_pool);
 err_completion_pool:
 	pci_pool_destroy(device->dma_pool);
 err_dma_pool:
 	kfree(device);
 err_kzalloc:
-	iounmap(reg_base);
-err_ioremap:
-	pci_release_regions(pdev);
-err_request_regions:
-err_set_dma_mask:
-	pci_disable_device(pdev);
-err_enable_device:
-
-	printk(KERN_ERR "Intel(R) I/OAT DMA Engine initialization failed\n");
-
-	return err;
+	iounmap(iobase);
+	dev_err(&device->pdev->dev,
+		"ioatdma: Intel(R) I/OAT DMA Engine initialization failed\n");
+	return NULL;
 }
 
-static void ioat_shutdown(struct pci_dev *pdev)
+void ioat_dma_remove(struct ioatdma_device *device)
 {
-	struct ioat_device *device;
-	device = pci_get_drvdata(pdev);
-
-	dma_async_device_unregister(&device->common);
-}
-
-static void __devexit ioat_remove(struct pci_dev *pdev)
-{
-	struct ioat_device *device;
 	struct dma_chan *chan, *_chan;
 	struct ioat_dma_chan *ioat_chan;
 
-	device = pci_get_drvdata(pdev);
 	dma_async_device_unregister(&device->common);
 
-	free_irq(device->pdev->irq, device);
-#ifdef CONFIG_PCI_MSI
-	if (device->msi)
-		pci_disable_msi(device->pdev);
-#endif
+	ioat_dma_remove_interrupts(device);
+
 	pci_pool_destroy(device->dma_pool);
 	pci_pool_destroy(device->completion_pool);
-	iounmap(device->reg_base);
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
-	list_for_each_entry_safe(chan, _chan, &device->common.channels, device_node) {
+
+	list_for_each_entry_safe(chan, _chan,
+				 &device->common.channels, device_node) {
 		ioat_chan = to_ioat_chan(chan);
 		list_del(&chan->device_node);
 		kfree(ioat_chan);
@@ -793,25 +958,3 @@ static void __devexit ioat_remove(struct pci_dev *pdev)
 	kfree(device);
 }
 
-/* MODULE API */
-MODULE_VERSION("1.9");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Intel Corporation");
-
-static int __init ioat_init_module(void)
-{
-	/* it's currently unsafe to unload this module */
-	/* if forced, worst case is that rmmod hangs */
-	__unsafe(THIS_MODULE);
-
-	return pci_register_driver(&ioat_pci_driver);
-}
-
-module_init(ioat_init_module);
-
-static void __exit ioat_exit_module(void)
-{
-	pci_unregister_driver(&ioat_pci_driver);
-}
-
-module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
index bf4dad70e0f..2a319e124ec 100644
--- a/drivers/dma/ioatdma.h
+++ b/drivers/dma/ioatdma.h
@@ -28,25 +28,35 @@
 #include <linux/cache.h>
 #include <linux/pci_ids.h>
 
+enum ioat_interrupt {
+	none = 0,
+	msix_multi_vector = 1,
+	msix_single_vector = 2,
+	msi = 3,
+	intx = 4,
+};
+
 #define IOAT_LOW_COMPLETION_MASK	0xffffffc0
 
 /**
- * struct ioat_device - internal representation of a IOAT device
+ * struct ioatdma_device - internal representation of a IOAT device
  * @pdev: PCI-Express device
  * @reg_base: MMIO register space base address
  * @dma_pool: for allocating DMA descriptors
  * @common: embedded struct dma_device
- * @msi: Message Signaled Interrupt number
+ * @version: version of ioatdma device
  */
 
-struct ioat_device {
+struct ioatdma_device {
 	struct pci_dev *pdev;
 	void __iomem *reg_base;
 	struct pci_pool *dma_pool;
 	struct pci_pool *completion_pool;
-
 	struct dma_device common;
-	u8 msi;
+	u8 version;
+	enum ioat_interrupt irq_mode;
+	struct msix_entry msix_entries[4];
+	struct ioat_dma_chan *idx[4];
 };
 
 /**
@@ -84,7 +94,7 @@ struct ioat_dma_chan {
 
 	int pending;
 
-	struct ioat_device *device;
+	struct ioatdma_device *device;
 	struct dma_chan common;
 
 	dma_addr_t completion_addr;
@@ -95,6 +105,7 @@ struct ioat_dma_chan {
 			u32 high;
 		};
 	} *completion_virt;
+	struct tasklet_struct cleanup_task;
 };
 
 /* wrapper around hardware descriptor format + additional software fields */
@@ -117,4 +128,16 @@ struct ioat_desc_sw {
 	struct dma_async_tx_descriptor async_tx;
 };
 
+#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
+struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
+				      void __iomem *iobase);
+void ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev,
+				   void __iomem *iobase);
+#else
+#define ioat_dma_probe(pdev, iobase)    NULL
+#define ioat_dma_remove(device)         do { } while (0)
+#define ioat_dca_init(pdev, iobase)	NULL
+#endif
+
 #endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
index 4d7a12880be..9e7434e1551 100644
--- a/drivers/dma/ioatdma_hw.h
+++ b/drivers/dma/ioatdma_hw.h
@@ -27,7 +27,7 @@
 #define IOAT_PCI_RID			0x00
 #define IOAT_PCI_SVID			0x8086
 #define IOAT_PCI_SID			0x8086
-#define IOAT_VER			0x12	/* Version 1.2 */
+#define IOAT_VER_1_2			0x12	/* Version 1.2 */
 
 struct ioat_dma_descriptor {
 	uint32_t	size;
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h
index a30c7349075..baaab5ea146 100644
--- a/drivers/dma/ioatdma_registers.h
+++ b/drivers/dma/ioatdma_registers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -21,6 +21,9 @@
 #ifndef _IOAT_REGISTERS_H_
 #define _IOAT_REGISTERS_H_
 
+#define IOAT_PCI_DMACTRL_OFFSET			0x48
+#define IOAT_PCI_DMACTRL_DMA_EN			0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN			0x00000002
 
 /* MMIO Device Registers */
 #define IOAT_CHANCNT_OFFSET			0x00	/*  8-bit */
@@ -39,6 +42,7 @@
 #define IOAT_INTRCTRL_MASTER_INT_EN		0x01	/* Master Interrupt Enable */
 #define IOAT_INTRCTRL_INT_STATUS		0x02	/* ATTNSTATUS -or- Channel Int */
 #define IOAT_INTRCTRL_INT			0x04	/* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL	0x08    /* Enable all MSI-X vectors */
 
 #define IOAT_ATTNSTATUS_OFFSET			0x04	/* Each bit is a channel */
 
diff --git a/drivers/ide/cris/ide-cris.c b/drivers/ide/cris/ide-cris.c
index 2b4d2a0ae5c..c306c9f534a 100644
--- a/drivers/ide/cris/ide-cris.c
+++ b/drivers/ide/cris/ide-cris.c
@@ -939,7 +939,8 @@ static int cris_ide_build_dmatable (ide_drive_t *drive)
 		/* group sequential buffers into one large buffer */
 		addr = page_to_phys(sg->page) + sg->offset;
 		size = sg_dma_len(sg);
-		while (sg++, --i) {
+		while (--i) {
+			sg = sg_next(sg);
 			if ((addr + size) != page_to_phys(sg->page) + sg->offset)
 				break;
 			size += sg_dma_len(sg);
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 4754769eda9..92177ca48b4 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -716,32 +716,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
 	rq->buffer = rq->cmd;
 }
 
-static int idedisk_issue_flush(struct request_queue *q, struct gendisk *disk,
-			       sector_t *error_sector)
-{
-	ide_drive_t *drive = q->queuedata;
-	struct request *rq;
-	int ret;
-
-	if (!drive->wcache)
-		return 0;
-
-	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-
-	idedisk_prepare_flush(q, rq);
-
-	ret = blk_execute_rq(q, disk, rq, 0);
-
-	/*
-	 * if we failed and caller wants error offset, get it
-	 */
-	if (ret && error_sector)
-		*error_sector = ide_get_error_location(drive, rq->cmd);
-
-	blk_put_request(rq);
-	return ret;
-}
-
 /*
  * This is tightly woven into the driver->do_special can not touch.
  * DON'T do it again until a total personality rewrite is committed.
@@ -781,7 +755,6 @@ static void update_ordered(ide_drive_t *drive)
 	struct hd_driveid *id = drive->id;
 	unsigned ordered = QUEUE_ORDERED_NONE;
 	prepare_flush_fn *prep_fn = NULL;
-	issue_flush_fn *issue_fn = NULL;
 
 	if (drive->wcache) {
 		unsigned long long capacity;
@@ -805,13 +778,11 @@ static void update_ordered(ide_drive_t *drive)
 		if (barrier) {
 			ordered = QUEUE_ORDERED_DRAIN_FLUSH;
 			prep_fn = idedisk_prepare_flush;
-			issue_fn = idedisk_issue_flush;
 		}
 	} else
 		ordered = QUEUE_ORDERED_DRAIN;
 
 	blk_queue_ordered(drive->queue, ordered, prep_fn);
-	blk_queue_issue_flush_fn(drive->queue, issue_fn);
 }
 
 static int write_cache(ide_drive_t *drive, int arg)
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index b453211ee0f..a4cbbbaccde 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -280,7 +280,7 @@ int ide_build_dmatable (ide_drive_t *drive, struct request *rq)
 			}
 		}
 
-		sg++;
+		sg = sg_next(sg);
 		i--;
 	}
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 4cece930114..04273d3c147 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -322,41 +322,6 @@ static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq)
 	spin_unlock_irqrestore(&ide_lock, flags);
 }
 
-/*
- * FIXME: probably move this somewhere else, name is bad too :)
- */
-u64 ide_get_error_location(ide_drive_t *drive, char *args)
-{
-	u32 high, low;
-	u8 hcyl, lcyl, sect;
-	u64 sector;
-
-	high = 0;
-	hcyl = args[5];
-	lcyl = args[4];
-	sect = args[3];
-
-	if (ide_id_has_flush_cache_ext(drive->id)) {
-		low = (hcyl << 16) | (lcyl << 8) | sect;
-		HWIF(drive)->OUTB(drive->ctl|0x80, IDE_CONTROL_REG);
-		high = ide_read_24(drive);
-	} else {
-		u8 cur = HWIF(drive)->INB(IDE_SELECT_REG);
-		if (cur & 0x40) {
-			high = cur & 0xf;
-			low = (hcyl << 16) | (lcyl << 8) | sect;
-		} else {
-			low = hcyl * drive->head * drive->sect;
-			low += lcyl * drive->sect;
-			low += sect - 1;
-		}
-	}
-
-	sector = ((u64) high << 24) | low;
-	return sector;
-}
-EXPORT_SYMBOL(ide_get_error_location);
-
 /**
  *	ide_end_drive_cmd	-	end an explicit drive command
  *	@drive: command 
@@ -881,7 +846,8 @@ void ide_init_sg_cmd(ide_drive_t *drive, struct request *rq)
 	ide_hwif_t *hwif = drive->hwif;
 
 	hwif->nsect = hwif->nleft = rq->nr_sectors;
-	hwif->cursg = hwif->cursg_ofs = 0;
+	hwif->cursg_ofs = 0;
+	hwif->cursg = NULL;
 }
 
 EXPORT_SYMBOL_GPL(ide_init_sg_cmd);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index d1011712601..34b1fb65bc7 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1349,7 +1349,7 @@ static int hwif_init(ide_hwif_t *hwif)
 	if (!hwif->sg_max_nents)
 		hwif->sg_max_nents = PRD_ENTRIES;
 
-	hwif->sg_table = kmalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
+	hwif->sg_table = kzalloc(sizeof(struct scatterlist)*hwif->sg_max_nents,
 				 GFP_KERNEL);
 	if (!hwif->sg_table) {
 		printk(KERN_ERR "%s: unable to allocate SG table.\n", hwif->name);
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index aa06dafb74a..2a3c8d49834 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -45,6 +45,7 @@
 #include <linux/hdreg.h>
 #include <linux/ide.h>
 #include <linux/bitops.h>
+#include <linux/scatterlist.h>
 
 #include <asm/byteorder.h>
 #include <asm/irq.h>
@@ -263,6 +264,7 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct scatterlist *sg = hwif->sg_table;
+	struct scatterlist *cursg = hwif->cursg;
 	struct page *page;
 #ifdef CONFIG_HIGHMEM
 	unsigned long flags;
@@ -270,8 +272,14 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write)
 	unsigned int offset;
 	u8 *buf;
 
-	page = sg[hwif->cursg].page;
-	offset = sg[hwif->cursg].offset + hwif->cursg_ofs * SECTOR_SIZE;
+	cursg = hwif->cursg;
+	if (!cursg) {
+		cursg = sg;
+		hwif->cursg = sg;
+	}
+
+	page = cursg->page;
+	offset = cursg->offset + hwif->cursg_ofs * SECTOR_SIZE;
 
 	/* get the current page and offset */
 	page = nth_page(page, (offset >> PAGE_SHIFT));
@@ -285,8 +293,8 @@ static void ide_pio_sector(ide_drive_t *drive, unsigned int write)
 	hwif->nleft--;
 	hwif->cursg_ofs++;
 
-	if ((hwif->cursg_ofs * SECTOR_SIZE) == sg[hwif->cursg].length) {
-		hwif->cursg++;
+	if ((hwif->cursg_ofs * SECTOR_SIZE) == cursg->length) {
+		hwif->cursg = sg_next(hwif->cursg);
 		hwif->cursg_ofs = 0;
 	}
 
@@ -367,6 +375,8 @@ static ide_startstop_t task_error(ide_drive_t *drive, struct request *rq,
 
 static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 {
+	HWIF(drive)->cursg = NULL;
+
 	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *task = rq->special;
 
diff --git a/drivers/ide/mips/au1xxx-ide.c b/drivers/ide/mips/au1xxx-ide.c
index aebde49365d..892d08f61dc 100644
--- a/drivers/ide/mips/au1xxx-ide.c
+++ b/drivers/ide/mips/au1xxx-ide.c
@@ -296,7 +296,7 @@ static int auide_build_dmatable(ide_drive_t *drive)
 			cur_addr += tc;
 			cur_len -= tc;
 		}
-		sg++;
+		sg = sg_next(sg);
 		i--;
 	}
 
diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c
index 85ffaaa39b1..c74fef6bbc9 100644
--- a/drivers/ide/pci/sgiioc4.c
+++ b/drivers/ide/pci/sgiioc4.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/ioport.h>
 #include <linux/blkdev.h>
+#include <linux/scatterlist.h>
 #include <linux/ioc4.h>
 #include <asm/io.h>
 
@@ -537,7 +538,7 @@ sgiioc4_build_dma_table(ide_drive_t * drive, struct request *rq, int ddir)
 			}
 		}
 
-		sg++;
+		sg = sg_next(sg);
 		i--;
 	}
 
diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c
index 7d8873839e2..9e86406bf44 100644
--- a/drivers/ide/ppc/pmac.c
+++ b/drivers/ide/ppc/pmac.c
@@ -1539,7 +1539,7 @@ pmac_ide_build_dmatable(ide_drive_t *drive, struct request *rq)
 			cur_len -= tc;
 			++table;
 		}
-		sg++;
+		sg = sg_next(sg);
 		i--;
 	}
 
diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c
index f87f003e3ef..22709a4f8fc 100644
--- a/drivers/infiniband/hw/ipath/ipath_dma.c
+++ b/drivers/infiniband/hw/ipath/ipath_dma.c
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include <linux/scatterlist.h>
 #include <rdma/ib_verbs.h>
 
 #include "ipath_verbs.h"
@@ -96,17 +97,18 @@ static void ipath_dma_unmap_page(struct ib_device *dev,
 	BUG_ON(!valid_dma_direction(direction));
 }
 
-static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents,
-			enum dma_data_direction direction)
+static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+			int nents, enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	u64 addr;
 	int i;
 	int ret = nents;
 
 	BUG_ON(!valid_dma_direction(direction));
 
-	for (i = 0; i < nents; i++) {
-		addr = (u64) page_address(sg[i].page);
+	for_each_sg(sgl, sg, nents, i) {
+		addr = (u64) page_address(sg->page);
 		/* TODO: handle highmem pages */
 		if (!addr) {
 			ret = 0;
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index e05690e3592..f3529b6f0a3 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -124,17 +124,19 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
 
 	if (cmd_dir == ISER_DIR_OUT) {
 		/* copy the unaligned sg the buffer which is used for RDMA */
-		struct scatterlist *sg = (struct scatterlist *)data->buf;
+		struct scatterlist *sgl = (struct scatterlist *)data->buf;
+		struct scatterlist *sg;
 		int i;
 		char *p, *from;
 
-		for (p = mem, i = 0; i < data->size; i++) {
-			from = kmap_atomic(sg[i].page, KM_USER0);
+		p = mem;
+		for_each_sg(sgl, sg, data->size, i) {
+			from = kmap_atomic(sg->page, KM_USER0);
 			memcpy(p,
-			       from + sg[i].offset,
-			       sg[i].length);
+			       from + sg->offset,
+			       sg->length);
 			kunmap_atomic(from, KM_USER0);
-			p += sg[i].length;
+			p += sg->length;
 		}
 	}
 
@@ -176,7 +178,7 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
 
 	if (cmd_dir == ISER_DIR_IN) {
 		char *mem;
-		struct scatterlist *sg;
+		struct scatterlist *sgl, *sg;
 		unsigned char *p, *to;
 		unsigned int sg_size;
 		int i;
@@ -184,16 +186,17 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask,
 		/* copy back read RDMA to unaligned sg */
 		mem	= mem_copy->copy_buf;
 
-		sg	= (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf;
+		sgl	= (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf;
 		sg_size = iser_ctask->data[ISER_DIR_IN].size;
 
-		for (p = mem, i = 0; i < sg_size; i++){
-			to = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
-			memcpy(to + sg[i].offset,
+		p = mem;
+		for_each_sg(sgl, sg, sg_size, i) {
+			to = kmap_atomic(sg->page, KM_SOFTIRQ0);
+			memcpy(to + sg->offset,
 			       p,
-			       sg[i].length);
+			       sg->length);
 			kunmap_atomic(to, KM_SOFTIRQ0);
-			p += sg[i].length;
+			p += sg->length;
 		}
 	}
 
@@ -224,7 +227,8 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 			       struct iser_page_vec *page_vec,
 			       struct ib_device *ibdev)
 {
-	struct scatterlist *sg = (struct scatterlist *)data->buf;
+	struct scatterlist *sgl = (struct scatterlist *)data->buf;
+	struct scatterlist *sg;
 	u64 first_addr, last_addr, page;
 	int end_aligned;
 	unsigned int cur_page = 0;
@@ -232,24 +236,25 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 	int i;
 
 	/* compute the offset of first element */
-	page_vec->offset = (u64) sg[0].offset & ~MASK_4K;
+	page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
 
-	for (i = 0; i < data->dma_nents; i++) {
-		unsigned int dma_len = ib_sg_dma_len(ibdev, &sg[i]);
+	for_each_sg(sgl, sg, data->dma_nents, i) {
+		unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
 
 		total_sz += dma_len;
 
-		first_addr = ib_sg_dma_address(ibdev, &sg[i]);
+		first_addr = ib_sg_dma_address(ibdev, sg);
 		last_addr  = first_addr + dma_len;
 
 		end_aligned   = !(last_addr  & ~MASK_4K);
 
 		/* continue to collect page fragments till aligned or SG ends */
 		while (!end_aligned && (i + 1 < data->dma_nents)) {
+			sg = sg_next(sg);
 			i++;
-			dma_len = ib_sg_dma_len(ibdev, &sg[i]);
+			dma_len = ib_sg_dma_len(ibdev, sg);
 			total_sz += dma_len;
-			last_addr = ib_sg_dma_address(ibdev, &sg[i]) + dma_len;
+			last_addr = ib_sg_dma_address(ibdev, sg) + dma_len;
 			end_aligned = !(last_addr  & ~MASK_4K);
 		}
 
@@ -284,25 +289,26 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
 					      struct ib_device *ibdev)
 {
-	struct scatterlist *sg;
+	struct scatterlist *sgl, *sg;
 	u64 end_addr, next_addr;
 	int i, cnt;
 	unsigned int ret_len = 0;
 
-	sg = (struct scatterlist *)data->buf;
+	sgl = (struct scatterlist *)data->buf;
 
-	for (cnt = 0, i = 0; i < data->dma_nents; i++, cnt++) {
+	cnt = 0;
+	for_each_sg(sgl, sg, data->dma_nents, i) {
 		/* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX "
 		   "offset: %ld sz: %ld\n", i,
-		   (unsigned long)page_to_phys(sg[i].page),
-		   (unsigned long)sg[i].offset,
-		   (unsigned long)sg[i].length); */
-		end_addr = ib_sg_dma_address(ibdev, &sg[i]) +
-			   ib_sg_dma_len(ibdev, &sg[i]);
+		   (unsigned long)page_to_phys(sg->page),
+		   (unsigned long)sg->offset,
+		   (unsigned long)sg->length); */
+		end_addr = ib_sg_dma_address(ibdev, sg) +
+			   ib_sg_dma_len(ibdev, sg);
 		/* iser_dbg("Checking sg iobuf end address "
 		       "0x%08lX\n", end_addr); */
 		if (i + 1 < data->dma_nents) {
-			next_addr = ib_sg_dma_address(ibdev, &sg[i+1]);
+			next_addr = ib_sg_dma_address(ibdev, sg_next(sg));
 			/* are i, i+1 fragments of the same page? */
 			if (end_addr == next_addr)
 				continue;
@@ -322,15 +328,16 @@ static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data,
 static void iser_data_buf_dump(struct iser_data_buf *data,
 			       struct ib_device *ibdev)
 {
-	struct scatterlist *sg = (struct scatterlist *)data->buf;
+	struct scatterlist *sgl = (struct scatterlist *)data->buf;
+	struct scatterlist *sg;
 	int i;
 
-	for (i = 0; i < data->dma_nents; i++)
+	for_each_sg(sgl, sg, data->dma_nents, i)
 		iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
 			 "off:0x%x sz:0x%x dma_len:0x%x\n",
-			 i, (unsigned long)ib_sg_dma_address(ibdev, &sg[i]),
-			 sg[i].page, sg[i].offset,
-			 sg[i].length, ib_sg_dma_len(ibdev, &sg[i]));
+			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
+			 sg->page, sg->offset,
+			 sg->length, ib_sg_dma_len(ibdev, sg));
 }
 
 static void iser_dump_page_vec(struct iser_page_vec *page_vec)
diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index 23b6f7bc16b..476012b6dfa 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -506,9 +506,14 @@ static void send_message(capidrv_contr * card, _cmsg * cmsg)
 {
 	struct sk_buff *skb;
 	size_t len;
+
 	capi_cmsg2message(cmsg, cmsg->buf);
 	len = CAPIMSG_LEN(cmsg->buf);
 	skb = alloc_skb(len, GFP_ATOMIC);
+	if (!skb) {
+		printk(KERN_ERR "capidrv::send_message: can't allocate mem\n");
+		return;
+	}
 	memcpy(skb_put(skb, len), cmsg->buf, len);
 	if (capi20_put_message(&global.ap, skb) != CAPI_NOERROR)
 		kfree_skb(skb);
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 9f73bc2727c..f5553186931 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -821,6 +821,8 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 				return -EFAULT;
 		}
 		card = get_capi_ctr_by_nr(ldef.contr);
+		if (!card)
+			return -EINVAL;
 		card = capi_ctr_get(card);
 		if (!card)
 			return -ESRCH;
diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c
index 00e31609a23..af7648274b3 100644
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -1936,14 +1936,7 @@ static int gigaset_write_room(struct cardstate *cs)
  */
 static int gigaset_chars_in_buffer(struct cardstate *cs)
 {
-	unsigned long flags;
-	unsigned bytes;
-
-	spin_lock_irqsave(&cs->cmdlock, flags);
-	bytes = cs->cmdbytes;
-	spin_unlock_irqrestore(&cs->cmdlock, flags);
-
-	return bytes;
+	return cs->cmdbytes;
 }
 
 /* gigaset_brkchars
diff --git a/drivers/isdn/gigaset/i4l.c b/drivers/isdn/gigaset/i4l.c
index 1654fa41357..9e089f06a94 100644
--- a/drivers/isdn/gigaset/i4l.c
+++ b/drivers/isdn/gigaset/i4l.c
@@ -109,13 +109,9 @@ EXPORT_SYMBOL_GPL(gigaset_skb_sent);
 static int command_from_LL(isdn_ctrl *cntrl)
 {
 	struct cardstate *cs = gigaset_get_cs_by_id(cntrl->driver);
-	//isdn_ctrl response;
-	//unsigned long flags;
 	struct bc_state *bcs;
 	int retval = 0;
 	struct setup_parm *sp;
-	unsigned param;
-	unsigned long flags;
 
 	gigaset_debugdrivers();
 
@@ -162,12 +158,8 @@ static int command_from_LL(isdn_ctrl *cntrl)
 		}
 		*sp = cntrl->parm.setup;
 
-		spin_lock_irqsave(&cs->lock, flags);
-		param = bcs->at_state.seq_index;
-		spin_unlock_irqrestore(&cs->lock, flags);
-
-		if (!gigaset_add_event(cs, &bcs->at_state, EV_DIAL, sp, param,
-				       NULL)) {
+		if (!gigaset_add_event(cs, &bcs->at_state, EV_DIAL, sp,
+				       bcs->at_state.seq_index, NULL)) {
 			//FIXME what should we do?
 			kfree(sp);
 			gigaset_free_channel(bcs);
diff --git a/drivers/isdn/gigaset/proc.c b/drivers/isdn/gigaset/proc.c
index e767afa55ab..da6f3acf9fd 100644
--- a/drivers/isdn/gigaset/proc.c
+++ b/drivers/isdn/gigaset/proc.c
@@ -19,15 +19,9 @@
 static ssize_t show_cidmode(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
-	int ret;
-	unsigned long flags;
 	struct cardstate *cs = dev_get_drvdata(dev);
 
-	spin_lock_irqsave(&cs->lock, flags);
-	ret = sprintf(buf, "%u\n", cs->cidmode);
-	spin_unlock_irqrestore(&cs->lock, flags);
-
-	return ret;
+	return sprintf(buf, "%u\n", cs->cidmode);
 }
 
 static ssize_t set_cidmode(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c
index a1263019df5..ca4bee173cf 100644
--- a/drivers/isdn/gigaset/usb-gigaset.c
+++ b/drivers/isdn/gigaset/usb-gigaset.c
@@ -310,7 +310,6 @@ static void gigaset_modem_fill(unsigned long data)
 	struct cardstate *cs = (struct cardstate *) data;
 	struct bc_state *bcs = &cs->bcs[0]; /* only one channel */
 	struct cmdbuf_t *cb;
-	unsigned long flags;
 	int again;
 
 	gig_dbg(DEBUG_OUTPUT, "modem_fill");
@@ -323,9 +322,7 @@ static void gigaset_modem_fill(unsigned long data)
 	do {
 		again = 0;
 		if (!bcs->tx_skb) { /* no skb is being sent */
-			spin_lock_irqsave(&cs->cmdlock, flags);
 			cb = cs->cmdbuf;
-			spin_unlock_irqrestore(&cs->cmdlock, flags);
 			if (cb) { /* commands to send? */
 				gig_dbg(DEBUG_OUTPUT, "modem_fill: cb");
 				if (send_cb(cs, cb) < 0) {
@@ -546,13 +543,9 @@ static int gigaset_write_cmd(struct cardstate *cs, const unsigned char *buf,
 
 static int gigaset_write_room(struct cardstate *cs)
 {
-	unsigned long flags;
 	unsigned bytes;
 
-	spin_lock_irqsave(&cs->cmdlock, flags);
 	bytes = cs->cmdbytes;
-	spin_unlock_irqrestore(&cs->cmdlock, flags);
-
 	return bytes < IF_WRITEBUF ? IF_WRITEBUF - bytes : 0;
 }
 
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 4910bca5264..c6df2925ebd 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -1365,7 +1365,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 				} else {
 					s = NULL;
 				}
-				ret = down_interruptible(&dev->sem);
+				ret = mutex_lock_interruptible(&dev->mtx);
 				if( ret ) return ret;
 				if ((s = isdn_net_new(s, NULL))) {
 					if (copy_to_user(argp, s, strlen(s) + 1)){
@@ -1375,7 +1375,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 					}
 				} else
 					ret = -ENODEV;
-				up(&dev->sem);
+				mutex_unlock(&dev->mtx);
 				return ret;
 			case IIOCNETASL:
 				/* Add a slave to a network-interface */
@@ -1384,7 +1384,7 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 						return -EFAULT;
 				} else
 					return -EINVAL;
-				ret = down_interruptible(&dev->sem);
+				ret = mutex_lock_interruptible(&dev->mtx);
 				if( ret ) return ret;
 				if ((s = isdn_net_newslave(bname))) {
 					if (copy_to_user(argp, s, strlen(s) + 1)){
@@ -1394,17 +1394,17 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 					}
 				} else
 					ret = -ENODEV;
-				up(&dev->sem);
+				mutex_unlock(&dev->mtx);
 				return ret;
 			case IIOCNETDIF:
 				/* Delete a network-interface */
 				if (arg) {
 					if (copy_from_user(name, argp, sizeof(name)))
 						return -EFAULT;
-					ret = down_interruptible(&dev->sem);
+					ret = mutex_lock_interruptible(&dev->mtx);
 					if( ret ) return ret;
 					ret = isdn_net_rm(name);
-					up(&dev->sem);
+					mutex_unlock(&dev->mtx);
 					return ret;
 				} else
 					return -EINVAL;
@@ -1433,10 +1433,10 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 				if (arg) {
 					if (copy_from_user(&phone, argp, sizeof(phone)))
 						return -EFAULT;
-					ret = down_interruptible(&dev->sem);
+					ret = mutex_lock_interruptible(&dev->mtx);
 					if( ret ) return ret;
 					ret = isdn_net_addphone(&phone);
-					up(&dev->sem);
+					mutex_unlock(&dev->mtx);
 					return ret;
 				} else
 					return -EINVAL;
@@ -1445,10 +1445,10 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 				if (arg) {
 					if (copy_from_user(&phone, argp, sizeof(phone)))
 						return -EFAULT;
-					ret = down_interruptible(&dev->sem);
+					ret = mutex_lock_interruptible(&dev->mtx);
 					if( ret ) return ret;
 					ret = isdn_net_getphones(&phone, argp);
-					up(&dev->sem);
+					mutex_unlock(&dev->mtx);
 					return ret;
 				} else
 					return -EINVAL;
@@ -1457,10 +1457,10 @@ isdn_ioctl(struct inode *inode, struct file *file, uint cmd, ulong arg)
 				if (arg) {
 					if (copy_from_user(&phone, argp, sizeof(phone)))
 						return -EFAULT;
-					ret = down_interruptible(&dev->sem);
+					ret = mutex_lock_interruptible(&dev->mtx);
 					if( ret ) return ret;
 					ret = isdn_net_delphone(&phone);
-					up(&dev->sem);
+					mutex_unlock(&dev->mtx);
 					return ret;
 				} else
 					return -EINVAL;
@@ -2304,7 +2304,7 @@ static int __init isdn_init(void)
 #ifdef MODULE
 	dev->owner = THIS_MODULE;
 #endif
-	init_MUTEX(&dev->sem);
+	mutex_init(&dev->mtx);
 	init_waitqueue_head(&dev->info_waitq);
 	for (i = 0; i < ISDN_MAX_CHANNELS; i++) {
 		dev->drvmap[i] = -1;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 8216a6f75be..64fee90bb68 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -441,33 +441,12 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
 	return clone;
 }
 
-static void crypt_free_buffer_pages(struct crypt_config *cc,
-                                    struct bio *clone, unsigned int bytes)
+static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
 {
-	unsigned int i, start, end;
+	unsigned int i;
 	struct bio_vec *bv;
 
-	/*
-	 * This is ugly, but Jens Axboe thinks that using bi_idx in the
-	 * endio function is too dangerous at the moment, so I calculate the
-	 * correct position using bi_vcnt and bi_size.
-	 * The bv_offset and bv_len fields might already be modified but we
-	 * know that we always allocated whole pages.
-	 * A fix to the bi_idx issue in the kernel is in the works, so
-	 * we will hopefully be able to revert to the cleaner solution soon.
-	 */
-	i = clone->bi_vcnt - 1;
-	bv = bio_iovec_idx(clone, i);
-	end = (i << PAGE_SHIFT) + (bv->bv_offset + bv->bv_len) - clone->bi_size;
-	start = end - bytes;
-
-	start >>= PAGE_SHIFT;
-	if (!clone->bi_size)
-		end = clone->bi_vcnt;
-	else
-		end >>= PAGE_SHIFT;
-
-	for (i = start; i < end; i++) {
+	for (i = 0; i < clone->bi_vcnt; i++) {
 		bv = bio_iovec_idx(clone, i);
 		BUG_ON(!bv->bv_page);
 		mempool_free(bv->bv_page, cc->page_pool);
@@ -519,7 +498,7 @@ static void crypt_endio(struct bio *clone, int error)
 	 * free the processed pages
 	 */
 	if (!read_io) {
-		crypt_free_buffer_pages(cc, clone, clone->bi_size);
+		crypt_free_buffer_pages(cc, clone);
 		goto out;
 	}
 
@@ -608,7 +587,7 @@ static void process_write(struct dm_crypt_io *io)
 		ctx.idx_out = 0;
 
 		if (unlikely(crypt_convert(cc, &ctx) < 0)) {
-			crypt_free_buffer_pages(cc, clone, clone->bi_size);
+			crypt_free_buffer_pages(cc, clone);
 			bio_put(clone);
 			dec_pending(io, -EIO);
 			return;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2bcde5798b5..fbe477bb2c6 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -999,33 +999,6 @@ void dm_table_unplug_all(struct dm_table *t)
 	}
 }
 
-int dm_table_flush_all(struct dm_table *t)
-{
-	struct list_head *d, *devices = dm_table_get_devices(t);
-	int ret = 0;
-	unsigned i;
-
-	for (i = 0; i < t->num_targets; i++)
-		if (t->targets[i].type->flush)
-			t->targets[i].type->flush(&t->targets[i]);
-
-	for (d = devices->next; d != devices; d = d->next) {
-		struct dm_dev *dd = list_entry(d, struct dm_dev, list);
-		struct request_queue *q = bdev_get_queue(dd->bdev);
-		int err;
-
-		if (!q->issue_flush_fn)
-			err = -EOPNOTSUPP;
-		else
-			err = q->issue_flush_fn(q, dd->bdev->bd_disk, NULL);
-
-		if (!ret)
-			ret = err;
-	}
-
-	return ret;
-}
-
 struct mapped_device *dm_table_get_md(struct dm_table *t)
 {
 	dm_get(t->md);
@@ -1043,4 +1016,3 @@ EXPORT_SYMBOL(dm_table_get_md);
 EXPORT_SYMBOL(dm_table_put);
 EXPORT_SYMBOL(dm_table_get);
 EXPORT_SYMBOL(dm_table_unplug_all);
-EXPORT_SYMBOL(dm_table_flush_all);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 167765c4774..d837d37f620 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -840,21 +840,6 @@ static int dm_request(struct request_queue *q, struct bio *bio)
 	return 0;
 }
 
-static int dm_flush_all(struct request_queue *q, struct gendisk *disk,
-			sector_t *error_sector)
-{
-	struct mapped_device *md = q->queuedata;
-	struct dm_table *map = dm_get_table(md);
-	int ret = -ENXIO;
-
-	if (map) {
-		ret = dm_table_flush_all(map);
-		dm_table_put(map);
-	}
-
-	return ret;
-}
-
 static void dm_unplug_all(struct request_queue *q)
 {
 	struct mapped_device *md = q->queuedata;
@@ -1003,7 +988,6 @@ static struct mapped_device *alloc_dev(int minor)
 	blk_queue_make_request(md->queue, dm_request);
 	blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
 	md->queue->unplug_fn = dm_unplug_all;
-	md->queue->issue_flush_fn = dm_flush_all;
 
 	md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache);
 	if (!md->io_pool)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 462ee652a89..4b3faa45277 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -111,7 +111,6 @@ void dm_table_postsuspend_targets(struct dm_table *t);
 int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
 void dm_table_unplug_all(struct dm_table *t);
-int dm_table_flush_all(struct dm_table *t);
 
 /*-----------------------------------------------------------------
  * A registry of target types.
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 550148770bb..56a11f6c127 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -92,25 +92,6 @@ static void linear_unplug(struct request_queue *q)
 	}
 }
 
-static int linear_issue_flush(struct request_queue *q, struct gendisk *disk,
-			      sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	linear_conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	for (i=0; i < mddev->raid_disks && ret == 0; i++) {
-		struct block_device *bdev = conf->disks[i].rdev->bdev;
-		struct request_queue *r_queue = bdev_get_queue(bdev);
-
-		if (!r_queue->issue_flush_fn)
-			ret = -EOPNOTSUPP;
-		else
-			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-	}
-	return ret;
-}
-
 static int linear_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -279,7 +260,6 @@ static int linear_run (mddev_t *mddev)
 
 	blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
 	mddev->queue->unplug_fn = linear_unplug;
-	mddev->queue->issue_flush_fn = linear_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = linear_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 	return 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index acf1b81b47c..0dc563d76b3 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3463,7 +3463,6 @@ static int do_md_stop(mddev_t * mddev, int mode)
 			mddev->pers->stop(mddev);
 			mddev->queue->merge_bvec_fn = NULL;
 			mddev->queue->unplug_fn = NULL;
-			mddev->queue->issue_flush_fn = NULL;
 			mddev->queue->backing_dev_info.congested_fn = NULL;
 			if (mddev->pers->sync_request)
 				sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index f2a63f394ad..b35731cceac 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -194,35 +194,6 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
 	seq_printf (seq, "]");
 }
 
-static int multipath_issue_flush(struct request_queue *q, struct gendisk *disk,
-				 sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	multipath_conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct block_device *bdev = rdev->bdev;
-			struct request_queue *r_queue = bdev_get_queue(bdev);
-
-			if (!r_queue->issue_flush_fn)
-				ret = -EOPNOTSUPP;
-			else {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
-							      error_sector);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-			}
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
 static int multipath_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -527,7 +498,6 @@ static int multipath_run (mddev_t *mddev)
 	mddev->array_size = mddev->size;
 
 	mddev->queue->unplug_fn = multipath_unplug;
-	mddev->queue->issue_flush_fn = multipath_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = multipath_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ef0da2d8495..e79e1a538d4 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -40,26 +40,6 @@ static void raid0_unplug(struct request_queue *q)
 	}
 }
 
-static int raid0_issue_flush(struct request_queue *q, struct gendisk *disk,
-			     sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	raid0_conf_t *conf = mddev_to_conf(mddev);
-	mdk_rdev_t **devlist = conf->strip_zone[0].dev;
-	int i, ret = 0;
-
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		struct block_device *bdev = devlist[i]->bdev;
-		struct request_queue *r_queue = bdev_get_queue(bdev);
-
-		if (!r_queue->issue_flush_fn)
-			ret = -EOPNOTSUPP;
-		else
-			ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-	}
-	return ret;
-}
-
 static int raid0_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -250,7 +230,6 @@ static int create_strip_zones (mddev_t *mddev)
 
 	mddev->queue->unplug_fn = raid0_unplug;
 
-	mddev->queue->issue_flush_fn = raid0_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6d03bea6fa5..0bcefad8241 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -567,36 +567,6 @@ static void raid1_unplug(struct request_queue *q)
 	md_wakeup_thread(mddev->thread);
 }
 
-static int raid1_issue_flush(struct request_queue *q, struct gendisk *disk,
-			     sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct block_device *bdev = rdev->bdev;
-			struct request_queue *r_queue = bdev_get_queue(bdev);
-
-			if (!r_queue->issue_flush_fn)
-				ret = -EOPNOTSUPP;
-			else {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
-							      error_sector);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-			}
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
 static int raid1_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -1997,7 +1967,6 @@ static int run(mddev_t *mddev)
 	mddev->array_size = mddev->size;
 
 	mddev->queue->unplug_fn = raid1_unplug;
-	mddev->queue->issue_flush_fn = raid1_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = raid1_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 25a96c42bdb..fc6607acb6e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -611,36 +611,6 @@ static void raid10_unplug(struct request_queue *q)
 	md_wakeup_thread(mddev->thread);
 }
 
-static int raid10_issue_flush(struct request_queue *q, struct gendisk *disk,
-			     sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct block_device *bdev = rdev->bdev;
-			struct request_queue *r_queue = bdev_get_queue(bdev);
-
-			if (!r_queue->issue_flush_fn)
-				ret = -EOPNOTSUPP;
-			else {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
-							      error_sector);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-			}
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
 static int raid10_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -2118,7 +2088,6 @@ static int run(mddev_t *mddev)
 	mddev->resync_max_sectors = size << conf->chunk_shift;
 
 	mddev->queue->unplug_fn = raid10_unplug;
-	mddev->queue->issue_flush_fn = raid10_issue_flush;
 	mddev->queue->backing_dev_info.congested_fn = raid10_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index caaca9e178b..8ee181a01f5 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3204,36 +3204,6 @@ static void raid5_unplug_device(struct request_queue *q)
 	unplug_slaves(mddev);
 }
 
-static int raid5_issue_flush(struct request_queue *q, struct gendisk *disk,
-			     sector_t *error_sector)
-{
-	mddev_t *mddev = q->queuedata;
-	raid5_conf_t *conf = mddev_to_conf(mddev);
-	int i, ret = 0;
-
-	rcu_read_lock();
-	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
-		if (rdev && !test_bit(Faulty, &rdev->flags)) {
-			struct block_device *bdev = rdev->bdev;
-			struct request_queue *r_queue = bdev_get_queue(bdev);
-
-			if (!r_queue->issue_flush_fn)
-				ret = -EOPNOTSUPP;
-			else {
-				atomic_inc(&rdev->nr_pending);
-				rcu_read_unlock();
-				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
-							      error_sector);
-				rdev_dec_pending(rdev, mddev);
-				rcu_read_lock();
-			}
-		}
-	}
-	rcu_read_unlock();
-	return ret;
-}
-
 static int raid5_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
@@ -4263,7 +4233,6 @@ static int run(mddev_t *mddev)
 		       mdname(mddev));
 
 	mddev->queue->unplug_fn = raid5_unplug_device;
-	mddev->queue->issue_flush_fn = raid5_issue_flush;
 	mddev->queue->backing_dev_info.congested_data = mddev;
 	mddev->queue->backing_dev_info.congested_fn = raid5_congested;
 
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index bdff950a54a..626bb3c9af2 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -293,7 +293,7 @@ nextSGEset:
 	for (ii=0; ii < (numSgeThisFrame-1); ii++) {
 		thisxfer = sg_dma_len(sg);
 		if (thisxfer == 0) {
-			sg ++; /* Get next SG element from the OS */
+			sg = sg_next(sg); /* Get next SG element from the OS */
 			sg_done++;
 			continue;
 		}
@@ -301,7 +301,7 @@ nextSGEset:
 		v2 = sg_dma_address(sg);
 		mptscsih_add_sge(psge, sgflags | thisxfer, v2);
 
-		sg++;		/* Get next SG element from the OS */
+		sg = sg_next(sg);	/* Get next SG element from the OS */
 		psge += (sizeof(u32) + sizeof(dma_addr_t));
 		sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
 		sg_done++;
@@ -322,7 +322,7 @@ nextSGEset:
 		v2 = sg_dma_address(sg);
 		mptscsih_add_sge(psge, sgflags | thisxfer, v2);
 		/*
-		sg++;
+		sg = sg_next(sg);
 		psge += (sizeof(u32) + sizeof(dma_addr_t));
 		*/
 		sgeOffset += (sizeof(u32) + sizeof(dma_addr_t));
@@ -2605,14 +2605,10 @@ mptscsih_set_scsi_lookup(MPT_ADAPTER *ioc, int i, struct scsi_cmnd *scmd)
 }
 
 /**
- * SCPNT_TO_LOOKUP_IDX
- *
- * search's for a given scmd in the ScsiLookup[] array list
- *
+ * SCPNT_TO_LOOKUP_IDX - searches for a given scmd in the ScsiLookup[] array list
  * @ioc: Pointer to MPT_ADAPTER structure
- * @scmd: scsi_cmnd pointer
- *
- **/
+ * @sc: scsi_cmnd pointer
+ */
 static int
 SCPNT_TO_LOOKUP_IDX(MPT_ADAPTER *ioc, struct scsi_cmnd *sc)
 {
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 50b2c733441..d602ba6d541 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -149,29 +149,6 @@ static int i2o_block_device_flush(struct i2o_device *dev)
 };
 
 /**
- *	i2o_block_issue_flush - device-flush interface for block-layer
- *	@queue: the request queue of the device which should be flushed
- *	@disk: gendisk
- *	@error_sector: error offset
- *
- *	Helper function to provide flush functionality to block-layer.
- *
- *	Returns 0 on success or negative error code on failure.
- */
-
-static int i2o_block_issue_flush(struct request_queue * queue, struct gendisk *disk,
-				 sector_t * error_sector)
-{
-	struct i2o_block_device *i2o_blk_dev = queue->queuedata;
-	int rc = -ENODEV;
-
-	if (likely(i2o_blk_dev))
-		rc = i2o_block_device_flush(i2o_blk_dev->i2o_dev);
-
-	return rc;
-}
-
-/**
  *	i2o_block_device_mount - Mount (load) the media of device dev
  *	@dev: I2O device which should receive the mount request
  *	@media_id: Media Identifier
@@ -1009,7 +986,6 @@ static struct i2o_block_device *i2o_block_device_alloc(void)
 	}
 
 	blk_queue_prep_rq(queue, i2o_block_prep_req_fn);
-	blk_queue_issue_flush_fn(queue, i2o_block_issue_flush);
 
 	gd->major = I2O_MAJOR;
 	gd->queue = queue;
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index b0abc7d9280..a5d0354bbbd 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -153,14 +153,14 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 			blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
 			blk_queue_max_segment_size(mq->queue, bouncesz);
 
-			mq->sg = kmalloc(sizeof(struct scatterlist),
+			mq->sg = kzalloc(sizeof(struct scatterlist),
 				GFP_KERNEL);
 			if (!mq->sg) {
 				ret = -ENOMEM;
 				goto cleanup_queue;
 			}
 
-			mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
+			mq->bounce_sg = kzalloc(sizeof(struct scatterlist) *
 				bouncesz / 512, GFP_KERNEL);
 			if (!mq->bounce_sg) {
 				ret = -ENOMEM;
@@ -177,7 +177,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
 		blk_queue_max_hw_segments(mq->queue, host->max_hw_segs);
 		blk_queue_max_segment_size(mq->queue, host->max_seg_size);
 
-		mq->sg = kmalloc(sizeof(struct scatterlist) *
+		mq->sg = kzalloc(sizeof(struct scatterlist) *
 			host->max_phys_segs, GFP_KERNEL);
 		if (!mq->sg) {
 			ret = -ENOMEM;
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 254b194e762..71b986b38c5 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1280,8 +1280,8 @@ static int mmc_spi_probe(struct spi_device *spi)
 	if (!host->data)
 		goto fail_nobuf1;
 
-	if (spi->master->cdev.dev->dma_mask) {
-		struct device	*dev = spi->master->cdev.dev;
+	if (spi->master->dev.parent->dma_mask) {
+		struct device	*dev = spi->master->dev.parent;
 
 		host->dma_dev = dev;
 		host->ones_dma = dma_map_single(dev, ones,
diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig
index c0c77f82d05..f201bd67313 100644
--- a/drivers/pcmcia/Kconfig
+++ b/drivers/pcmcia/Kconfig
@@ -2,9 +2,7 @@
 # PCCARD (PCMCIA/CardBus) bus subsystem configuration
 #
 
-menu "PCCARD (PCMCIA/CardBus) support"
-
-config PCCARD
+menuconfig PCCARD
 	tristate "PCCard (PCMCIA/CardBus) support"
 	depends on HOTPLUG
 	---help---
@@ -278,5 +276,3 @@ config PCCARD_IODYN
 	bool
 
 endif	# PCCARD
-
-endmenu
diff --git a/drivers/pcmcia/au1000_xxs1500.c b/drivers/pcmcia/au1000_xxs1500.c
index 01874b0bb03..ce9d5c44a7b 100644
--- a/drivers/pcmcia/au1000_xxs1500.c
+++ b/drivers/pcmcia/au1000_xxs1500.c
@@ -50,7 +50,10 @@
 
 #include <asm/au1000.h>
 #include <asm/au1000_pcmcia.h>
-#include <asm/xxs1500.h>
+
+#define PCMCIA_MAX_SOCK		0
+#define PCMCIA_NUM_SOCKS	(PCMCIA_MAX_SOCK + 1)
+#define PCMCIA_IRQ		AU1000_GPIO_4
 
 #if 0
 #define DEBUG(x,args...)	printk(__FUNCTION__ ": " x,##args)
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index d154dee76e7..06a85d7d5aa 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -25,6 +25,7 @@
 #include <linux/ioport.h>
 #include <asm/io.h>
 #include <asm/byteorder.h>
+#include <asm/unaligned.h>
 
 #include <pcmcia/cs_types.h>
 #include <pcmcia/ss.h>
@@ -401,6 +402,15 @@ EXPORT_SYMBOL(pcmcia_replace_cis);
     
 ======================================================================*/
 
+static inline u16 cis_get_u16(void *ptr)
+{
+	return le16_to_cpu(get_unaligned((__le16 *) ptr));
+}
+static inline u32 cis_get_u32(void *ptr)
+{
+	return le32_to_cpu(get_unaligned((__le32 *) ptr));
+}
+
 typedef struct tuple_flags {
     u_int		link_space:4;
     u_int		has_link:1;
@@ -461,7 +471,7 @@ static int follow_link(struct pcmcia_socket *s, tuple_t *tuple)
 	/* Get indirect link from the MFC tuple */
 	read_cis_cache(s, LINK_SPACE(tuple->Flags),
 		       tuple->LinkOffset, 5, link);
-	ofs = le32_to_cpu(*(__le32 *)(link+1));
+	ofs = cis_get_u32(link + 1);
 	SPACE(tuple->Flags) = (link[0] == CISTPL_MFC_ATTR);
 	/* Move to the next indirect link */
 	tuple->LinkOffset += 5;
@@ -668,10 +678,10 @@ static int parse_checksum(tuple_t *tuple, cistpl_checksum_t *csum)
     u_char *p;
     if (tuple->TupleDataLen < 5)
 	return CS_BAD_TUPLE;
-    p = (u_char *)tuple->TupleData;
-    csum->addr = tuple->CISOffset+(short)le16_to_cpu(*(__le16 *)p)-2;
-    csum->len = le16_to_cpu(*(__le16 *)(p + 2));
-    csum->sum = *(p+4);
+    p = (u_char *) tuple->TupleData;
+    csum->addr = tuple->CISOffset + cis_get_u16(p) - 2;
+    csum->len = cis_get_u16(p + 2);
+    csum->sum = *(p + 4);
     return CS_SUCCESS;
 }
 
@@ -681,7 +691,7 @@ static int parse_longlink(tuple_t *tuple, cistpl_longlink_t *link)
 {
     if (tuple->TupleDataLen < 4)
 	return CS_BAD_TUPLE;
-    link->addr = le32_to_cpu(*(__le32 *)tuple->TupleData);
+    link->addr = cis_get_u32(tuple->TupleData);
     return CS_SUCCESS;
 }
 
@@ -700,7 +710,8 @@ static int parse_longlink_mfc(tuple_t *tuple,
 	return CS_BAD_TUPLE;
     for (i = 0; i < link->nfn; i++) {
 	link->fn[i].space = *p; p++;
-	link->fn[i].addr = le32_to_cpu(*(__le32 *)p); p += 4;
+	link->fn[i].addr = cis_get_u32(p);
+	p += 4;
     }
     return CS_SUCCESS;
 }
@@ -787,12 +798,10 @@ static int parse_jedec(tuple_t *tuple, cistpl_jedec_t *jedec)
 
 static int parse_manfid(tuple_t *tuple, cistpl_manfid_t *m)
 {
-    __le16 *p;
     if (tuple->TupleDataLen < 4)
 	return CS_BAD_TUPLE;
-    p = (__le16 *)tuple->TupleData;
-    m->manf = le16_to_cpu(p[0]);
-    m->card = le16_to_cpu(p[1]);
+    m->manf = cis_get_u16(tuple->TupleData);
+    m->card = cis_get_u16(tuple->TupleData + 2);
     return CS_SUCCESS;
 }
 
@@ -1091,7 +1100,7 @@ static int parse_cftable_entry(tuple_t *tuple,
 	break;
     case 0x20:
 	entry->mem.nwin = 1;
-	entry->mem.win[0].len = le16_to_cpu(*(__le16 *)p) << 8;
+	entry->mem.win[0].len = cis_get_u16(p) << 8;
 	entry->mem.win[0].card_addr = 0;
 	entry->mem.win[0].host_addr = 0;
 	p += 2;
@@ -1099,9 +1108,8 @@ static int parse_cftable_entry(tuple_t *tuple,
 	break;
     case 0x40:
 	entry->mem.nwin = 1;
-	entry->mem.win[0].len = le16_to_cpu(*(__le16 *)p) << 8;
-	entry->mem.win[0].card_addr =
-	    le16_to_cpu(*(__le16 *)(p+2)) << 8;
+	entry->mem.win[0].len = cis_get_u16(p) << 8;
+	entry->mem.win[0].card_addr = cis_get_u16(p + 2) << 8;
 	entry->mem.win[0].host_addr = 0;
 	p += 4;
 	if (p > q) return CS_BAD_TUPLE;
@@ -1138,7 +1146,7 @@ static int parse_bar(tuple_t *tuple, cistpl_bar_t *bar)
     p = (u_char *)tuple->TupleData;
     bar->attr = *p;
     p += 2;
-    bar->size = le32_to_cpu(*(__le32 *)p);
+    bar->size = cis_get_u32(p);
     return CS_SUCCESS;
 }
 
@@ -1151,7 +1159,7 @@ static int parse_config_cb(tuple_t *tuple, cistpl_config_t *config)
 	return CS_BAD_TUPLE;
     config->last_idx = *(++p);
     p++;
-    config->base = le32_to_cpu(*(__le32 *)p);
+    config->base = cis_get_u32(p);
     config->subtuples = tuple->TupleDataLen - 6;
     return CS_SUCCESS;
 }
@@ -1267,7 +1275,7 @@ static int parse_vers_2(tuple_t *tuple, cistpl_vers_2_t *v2)
 
     v2->vers = p[0];
     v2->comply = p[1];
-    v2->dindex = le16_to_cpu(*(__le16 *)(p+2));
+    v2->dindex = cis_get_u16(p +2 );
     v2->vspec8 = p[6];
     v2->vspec9 = p[7];
     v2->nhdr = p[8];
@@ -1308,8 +1316,8 @@ static int parse_format(tuple_t *tuple, cistpl_format_t *fmt)
 
     fmt->type = p[0];
     fmt->edc = p[1];
-    fmt->offset = le32_to_cpu(*(__le32 *)(p+2));
-    fmt->length = le32_to_cpu(*(__le32 *)(p+6));
+    fmt->offset = cis_get_u32(p + 2);
+    fmt->length = cis_get_u32(p + 6);
 
     return CS_SUCCESS;
 }
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 55baa1f0fcb..7bf78c12789 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -23,6 +23,7 @@
 #include <linux/crc32.h>
 #include <linux/firmware.h>
 #include <linux/kref.h>
+#include <linux/dma-mapping.h>
 
 #define IN_CARD_SERVICES
 #include <pcmcia/cs_types.h>
@@ -670,6 +671,9 @@ struct pcmcia_device * pcmcia_device_add(struct pcmcia_socket *s, unsigned int f
 	p_dev->dev.bus = &pcmcia_bus_type;
 	p_dev->dev.parent = s->dev.parent;
 	p_dev->dev.release = pcmcia_release_dev;
+	/* by default don't allow DMA */
+	p_dev->dma_mask = DMA_MASK_NONE;
+	p_dev->dev.dma_mask = &p_dev->dma_mask;
 	bus_id_len = sprintf (p_dev->dev.bus_id, "%d.%d", p_dev->socket->sock, p_dev->device_no);
 
 	p_dev->devname = kmalloc(6 + bus_id_len + 1, GFP_KERNEL);
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index dca9f8549b3..874923fcb2f 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -58,7 +58,7 @@ static inline u_int pxa2xx_mcxx_asst(u_int pcmcia_cycle_ns,
 				     u_int mem_clk_10khz)
 {
 	u_int code = pcmcia_cycle_ns * mem_clk_10khz;
-	return (code / 300000) + ((code % 300000) ? 1 : 0) - 1;
+	return (code / 300000) + ((code % 300000) ? 1 : 0) + 1;
 }
 
 static inline u_int pxa2xx_mcxx_setup(u_int pcmcia_cycle_ns,
diff --git a/drivers/ps3/ps3av.c b/drivers/ps3/ps3av.c
index 85e21614f86..397f4ce849d 100644
--- a/drivers/ps3/ps3av.c
+++ b/drivers/ps3/ps3av.c
@@ -23,6 +23,7 @@
 #include <linux/delay.h>
 #include <linux/notifier.h>
 #include <linux/ioctl.h>
+#include <linux/fb.h>
 
 #include <asm/firmware.h>
 #include <asm/ps3av.h>
@@ -33,6 +34,8 @@
 #define BUFSIZE          4096	/* vuart buf size */
 #define PS3AV_BUF_SIZE   512	/* max packet size */
 
+static int safe_mode;
+
 static int timeout = 5000;	/* in msec ( 5 sec ) */
 module_param(timeout, int, 0644);
 
@@ -491,10 +494,10 @@ static int ps3av_set_videomode(void)
 	return 0;
 }
 
-static void ps3av_set_videomode_cont(u32 id, u32 old_id)
+static void ps3av_set_videomode_packet(u32 id)
 {
 	struct ps3av_pkt_avb_param avb_param;
-	int i;
+	unsigned int i;
 	u32 len = 0, av_video_cs;
 	const struct avset_video_mode *video_mode;
 	int res;
@@ -507,24 +510,6 @@ static void ps3av_set_videomode_cont(u32 id, u32 old_id)
 					ps3av->av_hw_conf.num_of_avmulti;
 	avb_param.num_of_av_audio_pkt = 0;
 
-	/* video signal off */
-	ps3av_set_video_disable_sig();
-
-	/* Retail PS3 product doesn't support this */
-	if (id & PS3AV_MODE_HDCP_OFF) {
-		res = ps3av_cmd_av_hdmi_mode(PS3AV_CMD_AV_HDMI_HDCP_OFF);
-		if (res == PS3AV_STATUS_UNSUPPORTED_HDMI_MODE)
-			dev_dbg(&ps3av->dev->core, "Not supported\n");
-		else if (res)
-			dev_dbg(&ps3av->dev->core,
-				"ps3av_cmd_av_hdmi_mode failed\n");
-	} else if (old_id & PS3AV_MODE_HDCP_OFF) {
-		res = ps3av_cmd_av_hdmi_mode(PS3AV_CMD_AV_HDMI_MODE_NORMAL);
-		if (res < 0 && res != PS3AV_STATUS_UNSUPPORTED_HDMI_MODE)
-			dev_dbg(&ps3av->dev->core,
-				"ps3av_cmd_av_hdmi_mode failed\n");
-	}
-
 	/* video_pkt */
 	for (i = 0; i < avb_param.num_of_video_pkt; i++)
 		len += ps3av_cmd_set_video_mode(&avb_param.buf[len],
@@ -555,6 +540,42 @@ static void ps3av_set_videomode_cont(u32 id, u32 old_id)
 		       __func__);
 	else if (res)
 		dev_dbg(&ps3av->dev->core, "ps3av_cmd_avb_param failed\n");
+}
+
+static void ps3av_set_videomode_cont(u32 id, u32 old_id)
+{
+	static int vesa = 0;
+	int res;
+
+	/* video signal off */
+	ps3av_set_video_disable_sig();
+
+	/*
+	 * AV backend needs non-VESA mode setting at least one time
+	 * when VESA mode is used.
+	 */
+	if (vesa == 0 && (id & PS3AV_MODE_MASK) >= 11) {
+		/* vesa mode */
+		ps3av_set_videomode_packet(2);	/* 480P */
+	}
+	vesa = 1;
+
+	/* Retail PS3 product doesn't support this */
+	if (id & PS3AV_MODE_HDCP_OFF) {
+		res = ps3av_cmd_av_hdmi_mode(PS3AV_CMD_AV_HDMI_HDCP_OFF);
+		if (res == PS3AV_STATUS_UNSUPPORTED_HDMI_MODE)
+			dev_dbg(&ps3av->dev->core, "Not supported\n");
+		else if (res)
+			dev_dbg(&ps3av->dev->core,
+				"ps3av_cmd_av_hdmi_mode failed\n");
+	} else if (old_id & PS3AV_MODE_HDCP_OFF) {
+		res = ps3av_cmd_av_hdmi_mode(PS3AV_CMD_AV_HDMI_MODE_NORMAL);
+		if (res < 0 && res != PS3AV_STATUS_UNSUPPORTED_HDMI_MODE)
+			dev_dbg(&ps3av->dev->core,
+				"ps3av_cmd_av_hdmi_mode failed\n");
+	}
+
+	ps3av_set_videomode_packet(id);
 
 	msleep(1500);
 	/* av video mute */
@@ -567,165 +588,251 @@ static void ps3avd(struct work_struct *work)
 	complete(&ps3av->done);
 }
 
-static int ps3av_vid2table_id(int vid)
-{
-	int i;
-
-	for (i = 1; i < ARRAY_SIZE(video_mode_table); i++)
-		if (video_mode_table[i].vid == vid)
-			return i;
-	return -1;
-}
+#define SHIFT_50	0
+#define SHIFT_60	4
+#define SHIFT_VESA	8
+
+static const struct {
+	unsigned mask : 19;
+	unsigned id :  4;
+} ps3av_preferred_modes[] = {
+	{ .mask = PS3AV_RESBIT_WUXGA		<< SHIFT_VESA,	.id = 13 },
+	{ .mask = PS3AV_RESBIT_1920x1080P	<< SHIFT_60,	.id = 5 },
+	{ .mask = PS3AV_RESBIT_1920x1080P	<< SHIFT_50,	.id = 10 },
+	{ .mask = PS3AV_RESBIT_1920x1080I	<< SHIFT_60,	.id = 4 },
+	{ .mask = PS3AV_RESBIT_1920x1080I	<< SHIFT_50,	.id = 9 },
+	{ .mask = PS3AV_RESBIT_SXGA		<< SHIFT_VESA,	.id = 12 },
+	{ .mask = PS3AV_RESBIT_WXGA		<< SHIFT_VESA,	.id = 11 },
+	{ .mask = PS3AV_RESBIT_1280x720P	<< SHIFT_60,	.id = 3 },
+	{ .mask = PS3AV_RESBIT_1280x720P	<< SHIFT_50,	.id = 8 },
+	{ .mask = PS3AV_RESBIT_720x480P		<< SHIFT_60,	.id = 2 },
+	{ .mask = PS3AV_RESBIT_720x576P		<< SHIFT_50,	.id = 7 },
+};
 
-static int ps3av_resbit2vid(u32 res_50, u32 res_60)
+static int ps3av_resbit2id(u32 res_50, u32 res_60, u32 res_vesa)
 {
-	int vid = -1;
+	unsigned int i;
+	u32 res_all;
+
+	/*
+	 * We mask off the resolution bits we care about and combine the
+	 * results in one bitfield, so make sure there's no overlap
+	 */
+	BUILD_BUG_ON(PS3AV_RES_MASK_50 << SHIFT_50 &
+		     PS3AV_RES_MASK_60 << SHIFT_60);
+	BUILD_BUG_ON(PS3AV_RES_MASK_50 << SHIFT_50 &
+		     PS3AV_RES_MASK_VESA << SHIFT_VESA);
+	BUILD_BUG_ON(PS3AV_RES_MASK_60 << SHIFT_60 &
+		     PS3AV_RES_MASK_VESA << SHIFT_VESA);
+	res_all = (res_50 & PS3AV_RES_MASK_50) << SHIFT_50 |
+		  (res_60 & PS3AV_RES_MASK_60) << SHIFT_60 |
+		  (res_vesa & PS3AV_RES_MASK_VESA) << SHIFT_VESA;
+
+	if (!res_all)
+		return 0;
+
+	for (i = 0; i < ARRAY_SIZE(ps3av_preferred_modes); i++)
+		if (res_all & ps3av_preferred_modes[i].mask)
+			return ps3av_preferred_modes[i].id;
 
-	if (res_50 > res_60) {	/* if res_50 == res_60, res_60 will be used */
-		if (res_50 & PS3AV_RESBIT_1920x1080P)
-			vid = PS3AV_CMD_VIDEO_VID_1080P_50HZ;
-		else if (res_50 & PS3AV_RESBIT_1920x1080I)
-			vid = PS3AV_CMD_VIDEO_VID_1080I_50HZ;
-		else if (res_50 & PS3AV_RESBIT_1280x720P)
-			vid = PS3AV_CMD_VIDEO_VID_720P_50HZ;
-		else if (res_50 & PS3AV_RESBIT_720x576P)
-			vid = PS3AV_CMD_VIDEO_VID_576P;
-		else
-			vid = -1;
-	} else {
-		if (res_60 & PS3AV_RESBIT_1920x1080P)
-			vid = PS3AV_CMD_VIDEO_VID_1080P_60HZ;
-		else if (res_60 & PS3AV_RESBIT_1920x1080I)
-			vid = PS3AV_CMD_VIDEO_VID_1080I_60HZ;
-		else if (res_60 & PS3AV_RESBIT_1280x720P)
-			vid = PS3AV_CMD_VIDEO_VID_720P_60HZ;
-		else if (res_60 & PS3AV_RESBIT_720x480P)
-			vid = PS3AV_CMD_VIDEO_VID_480P;
-		else
-			vid = -1;
-	}
-	return vid;
+	return 0;
 }
 
-static int ps3av_hdmi_get_vid(struct ps3av_info_monitor *info)
+static int ps3av_hdmi_get_id(struct ps3av_info_monitor *info)
 {
-	u32 res_50, res_60;
-	int vid = -1;
+	int id;
 
-	if (info->monitor_type != PS3AV_MONITOR_TYPE_HDMI)
-		return -1;
+	if (safe_mode)
+		return PS3AV_DEFAULT_HDMI_MODE_ID_REG_60;
 
 	/* check native resolution */
-	res_50 = info->res_50.native & PS3AV_RES_MASK_50;
-	res_60 = info->res_60.native & PS3AV_RES_MASK_60;
-	if (res_50 || res_60) {
-		vid = ps3av_resbit2vid(res_50, res_60);
-		return vid;
+	id = ps3av_resbit2id(info->res_50.native, info->res_60.native,
+			     info->res_vesa.native);
+	if (id) {
+		pr_debug("%s: Using native mode %d\n", __func__, id);
+		return id;
 	}
 
-	/* check resolution */
-	res_50 = info->res_50.res_bits & PS3AV_RES_MASK_50;
-	res_60 = info->res_60.res_bits & PS3AV_RES_MASK_60;
-	if (res_50 || res_60) {
-		vid = ps3av_resbit2vid(res_50, res_60);
-		return vid;
+	/* check supported resolutions */
+	id = ps3av_resbit2id(info->res_50.res_bits, info->res_60.res_bits,
+			     info->res_vesa.res_bits);
+	if (id) {
+		pr_debug("%s: Using supported mode %d\n", __func__, id);
+		return id;
 	}
 
 	if (ps3av->region & PS3AV_REGION_60)
-		vid = PS3AV_DEFAULT_HDMI_VID_REG_60;
+		id = PS3AV_DEFAULT_HDMI_MODE_ID_REG_60;
 	else
-		vid = PS3AV_DEFAULT_HDMI_VID_REG_50;
-	return vid;
+		id = PS3AV_DEFAULT_HDMI_MODE_ID_REG_50;
+	pr_debug("%s: Using default mode %d\n", __func__, id);
+	return id;
 }
 
-static int ps3av_auto_videomode(struct ps3av_pkt_av_get_hw_conf *av_hw_conf,
-				int boot)
+static void ps3av_monitor_info_dump(const struct ps3av_pkt_av_get_monitor_info *monitor_info)
 {
-	int i, res, vid = -1, dvi = 0, rgb = 0;
+	const struct ps3av_info_monitor *info = &monitor_info->info;
+	const struct ps3av_info_audio *audio = info->audio;
+	char id[sizeof(info->monitor_id)*3+1];
+	int i;
+
+	pr_debug("Monitor Info: size %u\n", monitor_info->send_hdr.size);
+
+	pr_debug("avport: %02x\n", info->avport);
+	for (i = 0; i < sizeof(info->monitor_id); i++)
+		sprintf(&id[i*3], " %02x", info->monitor_id[i]);
+	pr_debug("monitor_id: %s\n", id);
+	pr_debug("monitor_type: %02x\n", info->monitor_type);
+	pr_debug("monitor_name: %.*s\n", (int)sizeof(info->monitor_name),
+		 info->monitor_name);
+
+	/* resolution */
+	pr_debug("resolution_60: bits: %08x native: %08x\n",
+		 info->res_60.res_bits, info->res_60.native);
+	pr_debug("resolution_50: bits: %08x native: %08x\n",
+		 info->res_50.res_bits, info->res_50.native);
+	pr_debug("resolution_other: bits: %08x native: %08x\n",
+		 info->res_other.res_bits, info->res_other.native);
+	pr_debug("resolution_vesa: bits: %08x native: %08x\n",
+		 info->res_vesa.res_bits, info->res_vesa.native);
+
+	/* color space */
+	pr_debug("color space    rgb: %02x\n", info->cs.rgb);
+	pr_debug("color space yuv444: %02x\n", info->cs.yuv444);
+	pr_debug("color space yuv422: %02x\n", info->cs.yuv422);
+
+	/* color info */
+	pr_debug("color info   red: X %04x Y %04x\n", info->color.red_x,
+		 info->color.red_y);
+	pr_debug("color info green: X %04x Y %04x\n", info->color.green_x,
+		 info->color.green_y);
+	pr_debug("color info  blue: X %04x Y %04x\n", info->color.blue_x,
+		 info->color.blue_y);
+	pr_debug("color info white: X %04x Y %04x\n", info->color.white_x,
+		 info->color.white_y);
+	pr_debug("color info gamma:  %08x\n", info->color.gamma);
+
+	/* other info */
+	pr_debug("supported_AI: %02x\n", info->supported_ai);
+	pr_debug("speaker_info: %02x\n", info->speaker_info);
+	pr_debug("num of audio: %02x\n", info->num_of_audio_block);
+
+	/* audio block */
+	for (i = 0; i < info->num_of_audio_block; i++) {
+		pr_debug("audio[%d] type: %02x max_ch: %02x fs: %02x sbit: "
+			 "%02x\n",
+			 i, audio->type, audio->max_num_of_ch, audio->fs,
+			 audio->sbit);
+		audio++;
+	}
+}
+
+static const struct ps3av_monitor_quirk {
+	const char *monitor_name;
+	u32 clear_60, clear_50, clear_vesa;
+} ps3av_monitor_quirks[] = {
+	{
+		.monitor_name	= "DELL 2007WFP",
+		.clear_60	= PS3AV_RESBIT_1920x1080I
+	}, {
+		.monitor_name	= "L226WTQ",
+		.clear_60	= PS3AV_RESBIT_1920x1080I |
+				  PS3AV_RESBIT_1920x1080P
+	}, {
+		.monitor_name	= "SyncMaster",
+		.clear_60	= PS3AV_RESBIT_1920x1080I
+	}
+};
+
+static void ps3av_fixup_monitor_info(struct ps3av_info_monitor *info)
+{
+	unsigned int i;
+	const struct ps3av_monitor_quirk *quirk;
+
+	for (i = 0; i < ARRAY_SIZE(ps3av_monitor_quirks); i++) {
+		quirk = &ps3av_monitor_quirks[i];
+		if (!strncmp(info->monitor_name, quirk->monitor_name,
+			     sizeof(info->monitor_name))) {
+			pr_info("%s: Applying quirk for %s\n", __func__,
+				quirk->monitor_name);
+			info->res_60.res_bits &= ~quirk->clear_60;
+			info->res_60.native &= ~quirk->clear_60;
+			info->res_50.res_bits &= ~quirk->clear_50;
+			info->res_50.native &= ~quirk->clear_50;
+			info->res_vesa.res_bits &= ~quirk->clear_vesa;
+			info->res_vesa.native &= ~quirk->clear_vesa;
+			break;
+		}
+	}
+}
+
+static int ps3av_auto_videomode(struct ps3av_pkt_av_get_hw_conf *av_hw_conf)
+{
+	int i, res, id = 0, dvi = 0, rgb = 0;
 	struct ps3av_pkt_av_get_monitor_info monitor_info;
 	struct ps3av_info_monitor *info;
 
-	/* get vid for hdmi */
-	for (i = 0; i < av_hw_conf->num_of_hdmi; i++) {
+	/* get mode id for hdmi */
+	for (i = 0; i < av_hw_conf->num_of_hdmi && !id; i++) {
 		res = ps3av_cmd_video_get_monitor_info(&monitor_info,
 						       PS3AV_CMD_AVPORT_HDMI_0 +
 						       i);
 		if (res < 0)
 			return -1;
 
-		ps3av_cmd_av_monitor_info_dump(&monitor_info);
+		ps3av_monitor_info_dump(&monitor_info);
+
 		info = &monitor_info.info;
-		/* check DVI */
-		if (info->monitor_type == PS3AV_MONITOR_TYPE_DVI) {
+		ps3av_fixup_monitor_info(info);
+
+		switch (info->monitor_type) {
+		case PS3AV_MONITOR_TYPE_DVI:
 			dvi = PS3AV_MODE_DVI;
-			break;
-		}
-		/* check HDMI */
-		vid = ps3av_hdmi_get_vid(info);
-		if (vid != -1) {
-			/* got valid vid */
+			/* fall through */
+		case PS3AV_MONITOR_TYPE_HDMI:
+			id = ps3av_hdmi_get_id(info);
 			break;
 		}
 	}
 
-	if (dvi) {
-		/* DVI mode */
-		vid = PS3AV_DEFAULT_DVI_VID;
-	} else if (vid == -1) {
+	if (!id) {
 		/* no HDMI interface or HDMI is off */
 		if (ps3av->region & PS3AV_REGION_60)
-			vid = PS3AV_DEFAULT_AVMULTI_VID_REG_60;
+			id = PS3AV_DEFAULT_AVMULTI_MODE_ID_REG_60;
 		else
-			vid = PS3AV_DEFAULT_AVMULTI_VID_REG_50;
+			id = PS3AV_DEFAULT_AVMULTI_MODE_ID_REG_50;
 		if (ps3av->region & PS3AV_REGION_RGB)
 			rgb = PS3AV_MODE_RGB;
-	} else if (boot) {
-		/* HDMI: using DEFAULT HDMI_VID while booting up */
-		info = &monitor_info.info;
-		if (ps3av->region & PS3AV_REGION_60) {
-			if (info->res_60.res_bits & PS3AV_RESBIT_720x480P)
-				vid = PS3AV_DEFAULT_HDMI_VID_REG_60;
-			else if (info->res_50.res_bits & PS3AV_RESBIT_720x576P)
-				vid = PS3AV_DEFAULT_HDMI_VID_REG_50;
-			else {
-				/* default */
-				vid = PS3AV_DEFAULT_HDMI_VID_REG_60;
-			}
-		} else {
-			if (info->res_50.res_bits & PS3AV_RESBIT_720x576P)
-				vid = PS3AV_DEFAULT_HDMI_VID_REG_50;
-			else if (info->res_60.res_bits & PS3AV_RESBIT_720x480P)
-				vid = PS3AV_DEFAULT_HDMI_VID_REG_60;
-			else {
-				/* default */
-				vid = PS3AV_DEFAULT_HDMI_VID_REG_50;
-			}
-		}
+		pr_debug("%s: Using avmulti mode %d\n", __func__, id);
 	}
 
-	return (ps3av_vid2table_id(vid) | dvi | rgb);
+	return id | dvi | rgb;
 }
 
 static int ps3av_get_hw_conf(struct ps3av *ps3av)
 {
 	int i, j, k, res;
+	const struct ps3av_pkt_av_get_hw_conf *hw_conf;
 
 	/* get av_hw_conf */
 	res = ps3av_cmd_av_get_hw_conf(&ps3av->av_hw_conf);
 	if (res < 0)
 		return -1;
 
-	ps3av_cmd_av_hw_conf_dump(&ps3av->av_hw_conf);
+	hw_conf = &ps3av->av_hw_conf;
+	pr_debug("av_h_conf: num of hdmi: %u\n", hw_conf->num_of_hdmi);
+	pr_debug("av_h_conf: num of avmulti: %u\n", hw_conf->num_of_avmulti);
+	pr_debug("av_h_conf: num of spdif: %u\n", hw_conf->num_of_spdif);
 
 	for (i = 0; i < PS3AV_HEAD_MAX; i++)
 		ps3av->head[i] = PS3AV_CMD_VIDEO_HEAD_A + i;
 	for (i = 0; i < PS3AV_OPT_PORT_MAX; i++)
 		ps3av->opt_port[i] = PS3AV_CMD_AVPORT_SPDIF_0 + i;
-	for (i = 0; i < ps3av->av_hw_conf.num_of_hdmi; i++)
+	for (i = 0; i < hw_conf->num_of_hdmi; i++)
 		ps3av->av_port[i] = PS3AV_CMD_AVPORT_HDMI_0 + i;
-	for (j = 0; j < ps3av->av_hw_conf.num_of_avmulti; j++)
+	for (j = 0; j < hw_conf->num_of_avmulti; j++)
 		ps3av->av_port[i + j] = PS3AV_CMD_AVPORT_AVMULTI_0 + j;
-	for (k = 0; k < ps3av->av_hw_conf.num_of_spdif; k++)
+	for (k = 0; k < hw_conf->num_of_spdif; k++)
 		ps3av->av_port[i + j + k] = PS3AV_CMD_AVPORT_SPDIF_0 + k;
 
 	/* set all audio port */
@@ -738,7 +845,7 @@ static int ps3av_get_hw_conf(struct ps3av *ps3av)
 }
 
 /* set mode using id */
-int ps3av_set_video_mode(u32 id, int boot)
+int ps3av_set_video_mode(u32 id)
 {
 	int size;
 	u32 option;
@@ -752,7 +859,7 @@ int ps3av_set_video_mode(u32 id, int boot)
 	/* auto mode */
 	option = id & ~PS3AV_MODE_MASK;
 	if ((id & PS3AV_MODE_MASK) == 0) {
-		id = ps3av_auto_videomode(&ps3av->av_hw_conf, boot);
+		id = ps3av_auto_videomode(&ps3av->av_hw_conf);
 		if (id < 1) {
 			printk(KERN_ERR "%s: invalid id :%d\n", __func__, id);
 			return -EINVAL;
@@ -772,34 +879,13 @@ int ps3av_set_video_mode(u32 id, int boot)
 
 EXPORT_SYMBOL_GPL(ps3av_set_video_mode);
 
-int ps3av_get_auto_mode(int boot)
+int ps3av_get_auto_mode(void)
 {
-	return ps3av_auto_videomode(&ps3av->av_hw_conf, boot);
+	return ps3av_auto_videomode(&ps3av->av_hw_conf);
 }
 
 EXPORT_SYMBOL_GPL(ps3av_get_auto_mode);
 
-int ps3av_set_mode(u32 id, int boot)
-{
-	int res;
-
-	res = ps3av_set_video_mode(id, boot);
-	if (res)
-		return res;
-
-	res = ps3av_set_audio_mode(PS3AV_CMD_AUDIO_NUM_OF_CH_2,
-				   PS3AV_CMD_AUDIO_FS_48K,
-				   PS3AV_CMD_AUDIO_WORD_BITS_16,
-				   PS3AV_CMD_AUDIO_FORMAT_PCM,
-				   PS3AV_CMD_AUDIO_SOURCE_SERIAL);
-	if (res)
-		return res;
-
-	return 0;
-}
-
-EXPORT_SYMBOL_GPL(ps3av_set_mode);
-
 int ps3av_get_mode(void)
 {
 	return ps3av ? ps3av->ps3av_mode : 0;
@@ -941,7 +1027,14 @@ static int ps3av_probe(struct ps3_system_bus_device *dev)
 		       res);
 
 	ps3av_get_hw_conf(ps3av);
-	id = ps3av_auto_videomode(&ps3av->av_hw_conf, 1);
+
+#ifdef CONFIG_FB
+	if (fb_mode_option && !strcmp(fb_mode_option, "safe"))
+		safe_mode = 1;
+#endif /* CONFIG_FB */
+	id = ps3av_auto_videomode(&ps3av->av_hw_conf);
+	safe_mode = 0;
+
 	mutex_lock(&ps3av->mutex);
 	ps3av->ps3av_mode = id;
 	mutex_unlock(&ps3av->mutex);
diff --git a/drivers/ps3/ps3av_cmd.c b/drivers/ps3/ps3av_cmd.c
index f72f5ddf18e..7f880c26122 100644
--- a/drivers/ps3/ps3av_cmd.c
+++ b/drivers/ps3/ps3av_cmd.c
@@ -512,7 +512,6 @@ static const u32 ps3av_ns_table[][5] = {
 static void ps3av_cnv_ns(u8 *ns, u32 fs, u32 video_vid)
 {
 	u32 av_vid, ns_val;
-	u8 *p = ns;
 	int d;
 
 	d = ns_val = 0;
@@ -551,24 +550,22 @@ static void ps3av_cnv_ns(u8 *ns, u32 fs, u32 video_vid)
 	else
 		ns_val = ps3av_ns_table[PS3AV_CMD_AUDIO_FS_44K-BASE][d];
 
-	*p++ = ns_val & 0x000000FF;
-	*p++ = (ns_val & 0x0000FF00) >> 8;
-	*p = (ns_val & 0x00FF0000) >> 16;
+	*ns++ = ns_val & 0x000000FF;
+	*ns++ = (ns_val & 0x0000FF00) >> 8;
+	*ns = (ns_val & 0x00FF0000) >> 16;
 }
 
 #undef BASE
 
 static u8 ps3av_cnv_enable(u32 source, const u8 *enable)
 {
-	const u8 *p;
 	u8 ret = 0;
 
 	if (source == PS3AV_CMD_AUDIO_SOURCE_SPDIF) {
 		ret = 0x03;
 	} else if (source == PS3AV_CMD_AUDIO_SOURCE_SERIAL) {
-		p = enable;
-		ret = ((p[0] << 4) + (p[1] << 5) + (p[2] << 6) + (p[3] << 7)) |
-		      0x01;
+		ret = ((enable[0] << 4) + (enable[1] << 5) + (enable[2] << 6) +
+		       (enable[3] << 7)) | 0x01;
 	} else
 		printk(KERN_ERR "%s failed, source:%x\n", __func__, source);
 	return ret;
@@ -576,11 +573,9 @@ static u8 ps3av_cnv_enable(u32 source, const u8 *enable)
 
 static u8 ps3av_cnv_fifomap(const u8 *map)
 {
-	const u8 *p;
 	u8 ret = 0;
 
-	p = map;
-	ret = p[0] + (p[1] << 2) + (p[2] << 4) + (p[3] << 6);
+	ret = map[0] + (map[1] << 2) + (map[2] << 4) + (map[3] << 6);
 	return ret;
 }
 
@@ -927,72 +922,6 @@ int ps3av_cmd_video_get_monitor_info(struct ps3av_pkt_av_get_monitor_info *info,
 	return res;
 }
 
-#ifdef PS3AV_DEBUG
-void ps3av_cmd_av_hw_conf_dump(const struct ps3av_pkt_av_get_hw_conf *hw_conf)
-{
-	printk("av_h_conf:num of hdmi:%d\n", hw_conf->num_of_hdmi);
-	printk("av_h_conf:num of avmulti:%d\n", hw_conf->num_of_avmulti);
-	printk("av_h_conf:num of spdif:%d\n", hw_conf->num_of_spdif);
-}
-
-void ps3av_cmd_av_monitor_info_dump(const struct ps3av_pkt_av_get_monitor_info *monitor_info)
-{
-	const struct ps3av_info_monitor *info = &monitor_info->info;
-	const struct ps3av_info_audio *audio = info->audio;
-	int i;
-
-	printk("Monitor Info: size%d\n", monitor_info->send_hdr.size);
-
-	printk("avport:%02x\n", info->avport);
-	printk("monitor_id:");
-	for (i = 0; i < 10; i++)
-		printk("%02x ", info->monitor_id[i]);
-	printk("\nmonitor_type:%02x\n", info->monitor_type);
-	printk("monitor_name:");
-	for (i = 0; i < 16; i++)
-		printk("%c", info->monitor_name[i]);
-
-	/* resolution */
-	printk("\nresolution_60: bits:%08x native:%08x\n",
-	       info->res_60.res_bits, info->res_60.native);
-	printk("resolution_50: bits:%08x native:%08x\n",
-	       info->res_50.res_bits, info->res_50.native);
-	printk("resolution_other: bits:%08x native:%08x\n",
-	       info->res_other.res_bits, info->res_other.native);
-	printk("resolution_vesa: bits:%08x native:%08x\n",
-	       info->res_vesa.res_bits, info->res_vesa.native);
-
-	/* color space */
-	printk("color space    rgb:%02x\n", info->cs.rgb);
-	printk("color space yuv444:%02x\n", info->cs.yuv444);
-	printk("color space yuv422:%02x\n", info->cs.yuv422);
-
-	/* color info */
-	printk("color info   red:X %04x Y %04x\n",
-	       info->color.red_x, info->color.red_y);
-	printk("color info green:X %04x Y %04x\n",
-	       info->color.green_x, info->color.green_y);
-	printk("color info  blue:X %04x Y %04x\n",
-	       info->color.blue_x, info->color.blue_y);
-	printk("color info white:X %04x Y %04x\n",
-	       info->color.white_x, info->color.white_y);
-	printk("color info gamma: %08x\n", info->color.gamma);
-
-	/* other info */
-	printk("supported_AI:%02x\n", info->supported_ai);
-	printk("speaker_info:%02x\n", info->speaker_info);
-	printk("num of audio:%02x\n", info->num_of_audio_block);
-
-	/* audio block */
-	for (i = 0; i < info->num_of_audio_block; i++) {
-		printk("audio[%d] type:%02x max_ch:%02x fs:%02x sbit:%02x\n",
-		       i, audio->type, audio->max_num_of_ch, audio->fs,
-		       audio->sbit);
-		audio++;
-	}
-}
-#endif /* PS3AV_DEBUG */
-
 #define PS3AV_AV_LAYOUT_0 (PS3AV_CMD_AV_LAYOUT_32 \
 		| PS3AV_CMD_AV_LAYOUT_44 \
 		| PS3AV_CMD_AV_LAYOUT_48)
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index ff9e35cb308..6420a90a4a9 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -139,6 +139,17 @@ config RTC_DRV_DS1307
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ds1307.
 
+config RTC_DRV_DS1374
+	tristate "Maxim/Dallas Semiconductor DS1374 Real Time Clock"
+	depends on RTC_CLASS && I2C
+	help
+	  If you say yes here you get support for Dallas Semiconductor
+	  DS1374 real-time clock chips.  If an interrupt is associated
+	  with the device, the alarm functionality is supported.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called rtc-ds1374.
+
 config RTC_DRV_DS1672
 	tristate "Dallas/Maxim DS1672"
 	help
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index d3a33aa2696..465db4dd50b 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_RTC_DRV_BFIN)	+= rtc-bfin.o
 obj-$(CONFIG_RTC_DRV_CMOS)	+= rtc-cmos.o
 obj-$(CONFIG_RTC_DRV_DS1216)	+= rtc-ds1216.o
 obj-$(CONFIG_RTC_DRV_DS1307)	+= rtc-ds1307.o
+obj-$(CONFIG_RTC_DRV_DS1374)	+= rtc-ds1374.o
 obj-$(CONFIG_RTC_DRV_DS1553)	+= rtc-ds1553.o
 obj-$(CONFIG_RTC_DRV_DS1672)	+= rtc-ds1672.o
 obj-$(CONFIG_RTC_DRV_DS1742)	+= rtc-ds1742.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 10ab3b71ffc..4dfdf019fcc 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -153,6 +153,7 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 	mutex_init(&rtc->ops_lock);
 	spin_lock_init(&rtc->irq_lock);
 	spin_lock_init(&rtc->irq_task_lock);
+	init_waitqueue_head(&rtc->irq_queue);
 
 	strlcpy(rtc->name, name, RTC_DEVICE_NAME_SIZE);
 	snprintf(rtc->dev.bus_id, BUS_ID_SIZE, "rtc%d", id);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index ad66c6ecf36..de0da545c7a 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -12,6 +12,7 @@
 */
 
 #include <linux/rtc.h>
+#include <linux/log2.h>
 
 int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 {
@@ -99,7 +100,7 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
 }
 EXPORT_SYMBOL_GPL(rtc_set_mmss);
 
-int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 {
 	int err;
 
@@ -119,6 +120,87 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	mutex_unlock(&rtc->ops_lock);
 	return err;
 }
+
+int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
+{
+	int err;
+	struct rtc_time before, now;
+	int first_time = 1;
+
+	/* The lower level RTC driver may not be capable of filling
+	 * in all fields of the rtc_time struct (eg. rtc-cmos),
+	 * and so might instead return -1 in some fields.
+	 * We deal with that here by grabbing a current RTC timestamp
+	 * and using values from that for any missing (-1) values.
+	 *
+	 * But this can be racey, because some fields of the RTC timestamp
+	 * may have wrapped in the interval since we read the RTC alarm,
+	 * which would lead to us inserting inconsistent values in place
+	 * of the -1 fields.
+	 *
+	 * Reading the alarm and timestamp in the reverse sequence
+	 * would have the same race condition, and not solve the issue.
+	 *
+	 * So, we must first read the RTC timestamp,
+	 * then read the RTC alarm value,
+	 * and then read a second RTC timestamp.
+	 *
+	 * If any fields of the second timestamp have changed
+	 * when compared with the first timestamp, then we know
+	 * our timestamp may be inconsistent with that used by
+	 * the low-level rtc_read_alarm_internal() function.
+	 *
+	 * So, when the two timestamps disagree, we just loop and do
+	 * the process again to get a fully consistent set of values.
+	 *
+	 * This could all instead be done in the lower level driver,
+	 * but since more than one lower level RTC implementation needs it,
+	 * then it's probably best best to do it here instead of there..
+	 */
+
+	/* Get the "before" timestamp */
+	err = rtc_read_time(rtc, &before);
+	if (err < 0)
+		return err;
+	do {
+		if (!first_time)
+			memcpy(&before, &now, sizeof(struct rtc_time));
+		first_time = 0;
+
+		/* get the RTC alarm values, which may be incomplete */
+		err = rtc_read_alarm_internal(rtc, alarm);
+		if (err)
+			return err;
+		if (!alarm->enabled)
+			return 0;
+
+		/* get the "after" timestamp, to detect wrapped fields */
+		err = rtc_read_time(rtc, &now);
+		if (err < 0)
+			return err;
+
+		/* note that tm_sec is a "don't care" value here: */
+	} while (   before.tm_min   != now.tm_min
+		 || before.tm_hour  != now.tm_hour
+		 || before.tm_mon   != now.tm_mon
+		 || before.tm_year  != now.tm_year
+		 || before.tm_isdst != now.tm_isdst);
+
+	/* Fill in any missing alarm fields using the timestamp */
+	if (alarm->time.tm_sec == -1)
+		alarm->time.tm_sec = now.tm_sec;
+	if (alarm->time.tm_min == -1)
+		alarm->time.tm_min = now.tm_min;
+	if (alarm->time.tm_hour == -1)
+		alarm->time.tm_hour = now.tm_hour;
+	if (alarm->time.tm_mday == -1)
+		alarm->time.tm_mday = now.tm_mday;
+	if (alarm->time.tm_mon == -1)
+		alarm->time.tm_mon = now.tm_mon;
+	if (alarm->time.tm_year == -1)
+		alarm->time.tm_year = now.tm_year;
+	return 0;
+}
 EXPORT_SYMBOL_GPL(rtc_read_alarm);
 
 int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
@@ -210,6 +292,10 @@ int rtc_irq_register(struct rtc_device *rtc, struct rtc_task *task)
 	if (task == NULL || task->func == NULL)
 		return -EINVAL;
 
+	/* Cannot register while the char dev is in use */
+	if (!(mutex_trylock(&rtc->char_lock)))
+		return -EBUSY;
+
 	spin_lock_irq(&rtc->irq_task_lock);
 	if (rtc->irq_task == NULL) {
 		rtc->irq_task = task;
@@ -217,13 +303,14 @@ int rtc_irq_register(struct rtc_device *rtc, struct rtc_task *task)
 	}
 	spin_unlock_irq(&rtc->irq_task_lock);
 
+	mutex_unlock(&rtc->char_lock);
+
 	return retval;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_register);
 
 void rtc_irq_unregister(struct rtc_device *rtc, struct rtc_task *task)
 {
-
 	spin_lock_irq(&rtc->irq_task_lock);
 	if (rtc->irq_task == task)
 		rtc->irq_task = NULL;
@@ -231,6 +318,16 @@ void rtc_irq_unregister(struct rtc_device *rtc, struct rtc_task *task)
 }
 EXPORT_SYMBOL_GPL(rtc_irq_unregister);
 
+/**
+ * rtc_irq_set_state - enable/disable 2^N Hz periodic IRQs
+ * @rtc: the rtc device
+ * @task: currently registered with rtc_irq_register()
+ * @enabled: true to enable periodic IRQs
+ * Context: any
+ *
+ * Note that rtc_irq_set_freq() should previously have been used to
+ * specify the desired frequency of periodic IRQ task->func() callbacks.
+ */
 int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled)
 {
 	int err = 0;
@@ -240,8 +337,10 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled
 		return -ENXIO;
 
 	spin_lock_irqsave(&rtc->irq_task_lock, flags);
+	if (rtc->irq_task != NULL && task == NULL)
+		err = -EBUSY;
 	if (rtc->irq_task != task)
-		err = -ENXIO;
+		err = -EACCES;
 	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
 
 	if (err == 0)
@@ -251,6 +350,16 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_state);
 
+/**
+ * rtc_irq_set_freq - set 2^N Hz periodic IRQ frequency for IRQ
+ * @rtc: the rtc device
+ * @task: currently registered with rtc_irq_register()
+ * @freq: positive frequency with which task->func() will be called
+ * Context: any
+ *
+ * Note that rtc_irq_set_state() is used to enable or disable the
+ * periodic IRQs.
+ */
 int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
 {
 	int err = 0;
@@ -259,9 +368,14 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
 	if (rtc->ops->irq_set_freq == NULL)
 		return -ENXIO;
 
+	if (!is_power_of_2(freq))
+		return -EINVAL;
+
 	spin_lock_irqsave(&rtc->irq_task_lock, flags);
+	if (rtc->irq_task != NULL && task == NULL)
+		err = -EBUSY;
 	if (rtc->irq_task != task)
-		err = -ENXIO;
+		err = -EACCES;
 	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
 
 	if (err == 0) {
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 5d760bb6c2c..e3fe83a23cf 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -246,11 +246,9 @@ static int cmos_irq_set_freq(struct device *dev, int freq)
 
 	/* 0 = no irqs; 1 = 2^15 Hz ... 15 = 2^0 Hz */
 	f = ffs(freq);
-	if (f != 0) {
-		if (f-- > 16 || freq != (1 << f))
-			return -EINVAL;
-		f = 16 - f;
-	}
+	if (f-- > 16)
+		return -EINVAL;
+	f = 16 - f;
 
 	spin_lock_irqsave(&rtc_lock, flags);
 	CMOS_WRITE(RTC_REF_CLCK_32KHZ | f, RTC_FREQ_SELECT);
@@ -435,6 +433,19 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	if (!ports)
 		return -ENODEV;
 
+	/* Claim I/O ports ASAP, minimizing conflict with legacy driver.
+	 *
+	 * REVISIT non-x86 systems may instead use memory space resources
+	 * (needing ioremap etc), not i/o space resources like this ...
+	 */
+	ports = request_region(ports->start,
+			ports->end + 1 - ports->start,
+			driver_name);
+	if (!ports) {
+		dev_dbg(dev, "i/o registers already in use\n");
+		return -EBUSY;
+	}
+
 	cmos_rtc.irq = rtc_irq;
 	cmos_rtc.iomem = ports;
 
@@ -456,24 +467,13 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 
 	cmos_rtc.rtc = rtc_device_register(driver_name, dev,
 				&cmos_rtc_ops, THIS_MODULE);
-	if (IS_ERR(cmos_rtc.rtc))
-		return PTR_ERR(cmos_rtc.rtc);
+	if (IS_ERR(cmos_rtc.rtc)) {
+		retval = PTR_ERR(cmos_rtc.rtc);
+		goto cleanup0;
+	}
 
 	cmos_rtc.dev = dev;
 	dev_set_drvdata(dev, &cmos_rtc);
-
-	/* platform and pnp busses handle resources incompatibly.
-	 *
-	 * REVISIT for non-x86 systems we may need to handle io memory
-	 * resources: ioremap them, and request_mem_region().
-	 */
-	if (is_pnp()) {
-		retval = request_resource(&ioport_resource, ports);
-		if (retval < 0) {
-			dev_dbg(dev, "i/o registers already in use\n");
-			goto cleanup0;
-		}
-	}
 	rename_region(ports, cmos_rtc.rtc->dev.bus_id);
 
 	spin_lock_irq(&rtc_lock);
@@ -536,9 +536,10 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	return 0;
 
 cleanup1:
-	rename_region(ports, NULL);
-cleanup0:
+	cmos_rtc.dev = NULL;
 	rtc_device_unregister(cmos_rtc.rtc);
+cleanup0:
+	release_region(ports->start, ports->end + 1 - ports->start);
 	return retval;
 }
 
@@ -557,19 +558,21 @@ static void cmos_do_shutdown(void)
 static void __exit cmos_do_remove(struct device *dev)
 {
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
+	struct resource *ports;
 
 	cmos_do_shutdown();
 
-	if (is_pnp())
-		release_resource(cmos->iomem);
-	rename_region(cmos->iomem, NULL);
-
 	if (is_valid_irq(cmos->irq))
-		free_irq(cmos->irq, cmos_rtc.rtc);
+		free_irq(cmos->irq, cmos->rtc);
 
-	rtc_device_unregister(cmos_rtc.rtc);
+	rtc_device_unregister(cmos->rtc);
+	cmos->rtc = NULL;
 
-	cmos_rtc.dev = NULL;
+	ports = cmos->iomem;
+	release_region(ports->start, ports->end + 1 - ports->start);
+	cmos->iomem = NULL;
+
+	cmos->dev = NULL;
 	dev_set_drvdata(dev, NULL);
 }
 
@@ -656,7 +659,8 @@ static int cmos_resume(struct device *dev)
 /*----------------------------------------------------------------*/
 
 /* The "CMOS" RTC normally lives on the platform_bus.  On ACPI systems,
- * the device node will always be created as a PNPACPI device.
+ * the device node will always be created as a PNPACPI device.  Plus
+ * pre-ACPI PCs probably list it in the PNPBIOS tables.
  */
 
 #ifdef	CONFIG_PNP
diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c
index 005fff3a350..814583bd2fe 100644
--- a/drivers/rtc/rtc-dev.c
+++ b/drivers/rtc/rtc-dev.c
@@ -142,7 +142,7 @@ static int set_uie(struct rtc_device *rtc)
 static ssize_t
 rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-	struct rtc_device *rtc = to_rtc_device(file->private_data);
+	struct rtc_device *rtc = file->private_data;
 
 	DECLARE_WAITQUEUE(wait, current);
 	unsigned long data;
@@ -196,7 +196,7 @@ rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 static unsigned int rtc_dev_poll(struct file *file, poll_table *wait)
 {
-	struct rtc_device *rtc = to_rtc_device(file->private_data);
+	struct rtc_device *rtc = file->private_data;
 	unsigned long data;
 
 	poll_wait(file, &rtc->irq_queue, wait);
@@ -233,22 +233,12 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 		break;
 
 	case RTC_PIE_ON:
-		if (!capable(CAP_SYS_RESOURCE))
+		if (rtc->irq_freq > rtc->max_user_freq &&
+				!capable(CAP_SYS_RESOURCE))
 			return -EACCES;
 		break;
 	}
 
-	/* avoid conflicting IRQ users */
-	if (cmd == RTC_PIE_ON || cmd == RTC_PIE_OFF || cmd == RTC_IRQP_SET) {
-		spin_lock_irq(&rtc->irq_task_lock);
-		if (rtc->irq_task)
-			err = -EBUSY;
-		spin_unlock_irq(&rtc->irq_task_lock);
-
-		if (err < 0)
-			return err;
-	}
-
 	/* try the driver's ioctl interface */
 	if (ops->ioctl) {
 		err = ops->ioctl(rtc->dev.parent, cmd, arg);
@@ -338,18 +328,20 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 		err = rtc_set_time(rtc, &tm);
 		break;
 
-	case RTC_IRQP_READ:
-		if (ops->irq_set_freq)
-			err = put_user(rtc->irq_freq, (unsigned long __user *)uarg);
-		else
-			err = -ENOTTY;
+	case RTC_PIE_ON:
+		err = rtc_irq_set_state(rtc, NULL, 1);
+		break;
+
+	case RTC_PIE_OFF:
+		err = rtc_irq_set_state(rtc, NULL, 0);
 		break;
 
 	case RTC_IRQP_SET:
-		if (ops->irq_set_freq)
-			err = rtc_irq_set_freq(rtc, rtc->irq_task, arg);
-		else
-			err = -ENOTTY;
+		err = rtc_irq_set_freq(rtc, NULL, arg);
+		break;
+
+	case RTC_IRQP_READ:
+		err = put_user(rtc->irq_freq, (unsigned long __user *)uarg);
 		break;
 
 #if 0
@@ -405,7 +397,7 @@ static int rtc_dev_ioctl(struct inode *inode, struct file *file,
 
 static int rtc_dev_release(struct inode *inode, struct file *file)
 {
-	struct rtc_device *rtc = to_rtc_device(file->private_data);
+	struct rtc_device *rtc = file->private_data;
 
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	clear_uie(rtc);
@@ -419,7 +411,7 @@ static int rtc_dev_release(struct inode *inode, struct file *file)
 
 static int rtc_dev_fasync(int fd, struct file *file, int on)
 {
-	struct rtc_device *rtc = to_rtc_device(file->private_data);
+	struct rtc_device *rtc = file->private_data;
 	return fasync_helper(fd, file, on, &rtc->async_queue);
 }
 
@@ -449,8 +441,6 @@ void rtc_dev_prepare(struct rtc_device *rtc)
 	rtc->dev.devt = MKDEV(MAJOR(rtc_devt), rtc->id);
 
 	mutex_init(&rtc->char_lock);
-	spin_lock_init(&rtc->irq_lock);
-	init_waitqueue_head(&rtc->irq_queue);
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	INIT_WORK(&rtc->uie_task, rtc_uie_task);
 	setup_timer(&rtc->uie_timer, rtc_uie_timer, (unsigned long)rtc);
diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
new file mode 100644
index 00000000000..45bda186bef
--- /dev/null
+++ b/drivers/rtc/rtc-ds1374.c
@@ -0,0 +1,449 @@
+/*
+ * RTC client/driver for the Maxim/Dallas DS1374 Real-Time Clock over I2C
+ *
+ * Based on code by Randy Vinson <rvinson@mvista.com>,
+ * which was based on the m41t00.c by Mark Greer <mgreer@mvista.com>.
+ *
+ * Copyright (C) 2006-2007 Freescale Semiconductor
+ *
+ * 2005 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+/*
+ * It would be more efficient to use i2c msgs/i2c_transfer directly but, as
+ * recommened in .../Documentation/i2c/writing-clients section
+ * "Sending and receiving", using SMBus level communication is preferred.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/rtc.h>
+#include <linux/bcd.h>
+#include <linux/workqueue.h>
+
+#define DS1374_REG_TOD0		0x00 /* Time of Day */
+#define DS1374_REG_TOD1		0x01
+#define DS1374_REG_TOD2		0x02
+#define DS1374_REG_TOD3		0x03
+#define DS1374_REG_WDALM0	0x04 /* Watchdog/Alarm */
+#define DS1374_REG_WDALM1	0x05
+#define DS1374_REG_WDALM2	0x06
+#define DS1374_REG_CR		0x07 /* Control */
+#define DS1374_REG_CR_AIE	0x01 /* Alarm Int. Enable */
+#define DS1374_REG_CR_WDALM	0x20 /* 1=Watchdog, 0=Alarm */
+#define DS1374_REG_CR_WACE	0x40 /* WD/Alarm counter enable */
+#define DS1374_REG_SR		0x08 /* Status */
+#define DS1374_REG_SR_OSF	0x80 /* Oscillator Stop Flag */
+#define DS1374_REG_SR_AF	0x01 /* Alarm Flag */
+#define DS1374_REG_TCR		0x09 /* Trickle Charge */
+
+struct ds1374 {
+	struct i2c_client *client;
+	struct rtc_device *rtc;
+	struct work_struct work;
+
+	/* The mutex protects alarm operations, and prevents a race
+	 * between the enable_irq() in the workqueue and the free_irq()
+	 * in the remove function.
+	 */
+	struct mutex mutex;
+	int exiting;
+};
+
+static struct i2c_driver ds1374_driver;
+
+static int ds1374_read_rtc(struct i2c_client *client, u32 *time,
+                           int reg, int nbytes)
+{
+	u8 buf[4];
+	int ret;
+	int i;
+
+	if (nbytes > 4) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	ret = i2c_smbus_read_i2c_block_data(client, reg, nbytes, buf);
+
+	if (ret < 0)
+		return ret;
+	if (ret < nbytes)
+		return -EIO;
+
+	for (i = nbytes - 1, *time = 0; i >= 0; i--)
+		*time = (*time << 8) | buf[i];
+
+	return 0;
+}
+
+static int ds1374_write_rtc(struct i2c_client *client, u32 time,
+                            int reg, int nbytes)
+{
+	u8 buf[4];
+	int i;
+
+	if (nbytes > 4) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nbytes; i++) {
+		buf[i] = time & 0xff;
+		time >>= 8;
+	}
+
+	return i2c_smbus_write_i2c_block_data(client, reg, nbytes, buf);
+}
+
+static int ds1374_check_rtc_status(struct i2c_client *client)
+{
+	int ret = 0;
+	int control, stat;
+
+	stat = i2c_smbus_read_byte_data(client, DS1374_REG_SR);
+	if (stat < 0)
+		return stat;
+
+	if (stat & DS1374_REG_SR_OSF)
+		dev_warn(&client->dev,
+		         "oscillator discontinuity flagged, "
+		         "time unreliable\n");
+
+	stat &= ~(DS1374_REG_SR_OSF | DS1374_REG_SR_AF);
+
+	ret = i2c_smbus_write_byte_data(client, DS1374_REG_SR, stat);
+	if (ret < 0)
+		return ret;
+
+	/* If the alarm is pending, clear it before requesting
+	 * the interrupt, so an interrupt event isn't reported
+	 * before everything is initialized.
+	 */
+
+	control = i2c_smbus_read_byte_data(client, DS1374_REG_CR);
+	if (control < 0)
+		return control;
+
+	control &= ~(DS1374_REG_CR_WACE | DS1374_REG_CR_AIE);
+	return i2c_smbus_write_byte_data(client, DS1374_REG_CR, control);
+}
+
+static int ds1374_read_time(struct device *dev, struct rtc_time *time)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	u32 itime;
+	int ret;
+
+	ret = ds1374_read_rtc(client, &itime, DS1374_REG_TOD0, 4);
+	if (!ret)
+		rtc_time_to_tm(itime, time);
+
+	return ret;
+}
+
+static int ds1374_set_time(struct device *dev, struct rtc_time *time)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	unsigned long itime;
+
+	rtc_tm_to_time(time, &itime);
+	return ds1374_write_rtc(client, itime, DS1374_REG_TOD0, 4);
+}
+
+/* The ds1374 has a decrementer for an alarm, rather than a comparator.
+ * If the time of day is changed, then the alarm will need to be
+ * reset.
+ */
+static int ds1374_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds1374 *ds1374 = i2c_get_clientdata(client);
+	u32 now, cur_alarm;
+	int cr, sr;
+	int ret = 0;
+
+	if (client->irq < 0)
+		return -EINVAL;
+
+	mutex_lock(&ds1374->mutex);
+
+	cr = ret = i2c_smbus_read_byte_data(client, DS1374_REG_CR);
+	if (ret < 0)
+		goto out;
+
+	sr = ret = i2c_smbus_read_byte_data(client, DS1374_REG_SR);
+	if (ret < 0)
+		goto out;
+
+	ret = ds1374_read_rtc(client, &now, DS1374_REG_TOD0, 4);
+	if (ret)
+		goto out;
+
+	ret = ds1374_read_rtc(client, &cur_alarm, DS1374_REG_WDALM0, 3);
+	if (ret)
+		goto out;
+
+	rtc_time_to_tm(now + cur_alarm, &alarm->time);
+	alarm->enabled = !!(cr & DS1374_REG_CR_WACE);
+	alarm->pending = !!(sr & DS1374_REG_SR_AF);
+
+out:
+	mutex_unlock(&ds1374->mutex);
+	return ret;
+}
+
+static int ds1374_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds1374 *ds1374 = i2c_get_clientdata(client);
+	struct rtc_time now;
+	unsigned long new_alarm, itime;
+	int cr;
+	int ret = 0;
+
+	if (client->irq < 0)
+		return -EINVAL;
+
+	ret = ds1374_read_time(dev, &now);
+	if (ret < 0)
+		return ret;
+
+	rtc_tm_to_time(&alarm->time, &new_alarm);
+	rtc_tm_to_time(&now, &itime);
+
+	new_alarm -= itime;
+
+	/* This can happen due to races, in addition to dates that are
+	 * truly in the past.  To avoid requiring the caller to check for
+	 * races, dates in the past are assumed to be in the recent past
+	 * (i.e. not something that we'd rather the caller know about via
+	 * an error), and the alarm is set to go off as soon as possible.
+	 */
+	if (new_alarm <= 0)
+		new_alarm = 1;
+
+	mutex_lock(&ds1374->mutex);
+
+	ret = cr = i2c_smbus_read_byte_data(client, DS1374_REG_CR);
+	if (ret < 0)
+		goto out;
+
+	/* Disable any existing alarm before setting the new one
+	 * (or lack thereof). */
+	cr &= ~DS1374_REG_CR_WACE;
+
+	ret = i2c_smbus_write_byte_data(client, DS1374_REG_CR, cr);
+	if (ret < 0)
+		goto out;
+
+	ret = ds1374_write_rtc(client, new_alarm, DS1374_REG_WDALM0, 3);
+	if (ret)
+		goto out;
+
+	if (alarm->enabled) {
+		cr |= DS1374_REG_CR_WACE | DS1374_REG_CR_AIE;
+		cr &= ~DS1374_REG_CR_WDALM;
+
+		ret = i2c_smbus_write_byte_data(client, DS1374_REG_CR, cr);
+	}
+
+out:
+	mutex_unlock(&ds1374->mutex);
+	return ret;
+}
+
+static irqreturn_t ds1374_irq(int irq, void *dev_id)
+{
+	struct i2c_client *client = dev_id;
+	struct ds1374 *ds1374 = i2c_get_clientdata(client);
+
+	disable_irq_nosync(irq);
+	schedule_work(&ds1374->work);
+	return IRQ_HANDLED;
+}
+
+static void ds1374_work(struct work_struct *work)
+{
+	struct ds1374 *ds1374 = container_of(work, struct ds1374, work);
+	struct i2c_client *client = ds1374->client;
+	int stat, control;
+
+	mutex_lock(&ds1374->mutex);
+
+	stat = i2c_smbus_read_byte_data(client, DS1374_REG_SR);
+	if (stat < 0)
+		return;
+
+	if (stat & DS1374_REG_SR_AF) {
+		stat &= ~DS1374_REG_SR_AF;
+		i2c_smbus_write_byte_data(client, DS1374_REG_SR, stat);
+
+		control = i2c_smbus_read_byte_data(client, DS1374_REG_CR);
+		if (control < 0)
+			goto out;
+
+		control &= ~(DS1374_REG_CR_WACE | DS1374_REG_CR_AIE);
+		i2c_smbus_write_byte_data(client, DS1374_REG_CR, control);
+
+		/* rtc_update_irq() assumes that it is called
+		 * from IRQ-disabled context.
+		 */
+		local_irq_disable();
+		rtc_update_irq(ds1374->rtc, 1, RTC_AF | RTC_IRQF);
+		local_irq_enable();
+	}
+
+out:
+	if (!ds1374->exiting)
+		enable_irq(client->irq);
+
+	mutex_unlock(&ds1374->mutex);
+}
+
+static int ds1374_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct ds1374 *ds1374 = i2c_get_clientdata(client);
+	int ret = -ENOIOCTLCMD;
+
+	mutex_lock(&ds1374->mutex);
+
+	switch (cmd) {
+	case RTC_AIE_OFF:
+		ret = i2c_smbus_read_byte_data(client, DS1374_REG_CR);
+		if (ret < 0)
+			goto out;
+
+		ret &= ~DS1374_REG_CR_WACE;
+
+		ret = i2c_smbus_write_byte_data(client, DS1374_REG_CR, ret);
+		if (ret < 0)
+			goto out;
+
+		break;
+
+	case RTC_AIE_ON:
+		ret = i2c_smbus_read_byte_data(client, DS1374_REG_CR);
+		if (ret < 0)
+			goto out;
+
+		ret |= DS1374_REG_CR_WACE | DS1374_REG_CR_AIE;
+		ret &= ~DS1374_REG_CR_WDALM;
+
+		ret = i2c_smbus_write_byte_data(client, DS1374_REG_CR, ret);
+		if (ret < 0)
+			goto out;
+
+		break;
+	}
+
+out:
+	mutex_unlock(&ds1374->mutex);
+	return ret;
+}
+
+static const struct rtc_class_ops ds1374_rtc_ops = {
+	.read_time = ds1374_read_time,
+	.set_time = ds1374_set_time,
+	.read_alarm = ds1374_read_alarm,
+	.set_alarm = ds1374_set_alarm,
+	.ioctl = ds1374_ioctl,
+};
+
+static int ds1374_probe(struct i2c_client *client)
+{
+	struct ds1374 *ds1374;
+	int ret;
+
+	ds1374 = kzalloc(sizeof(struct ds1374), GFP_KERNEL);
+	if (!ds1374)
+		return -ENOMEM;
+
+	ds1374->client = client;
+	i2c_set_clientdata(client, ds1374);
+
+	INIT_WORK(&ds1374->work, ds1374_work);
+	mutex_init(&ds1374->mutex);
+
+	ret = ds1374_check_rtc_status(client);
+	if (ret)
+		goto out_free;
+
+	if (client->irq >= 0) {
+		ret = request_irq(client->irq, ds1374_irq, 0,
+		                  "ds1374", client);
+		if (ret) {
+			dev_err(&client->dev, "unable to request IRQ\n");
+			goto out_free;
+		}
+	}
+
+	ds1374->rtc = rtc_device_register(client->name, &client->dev,
+	                                  &ds1374_rtc_ops, THIS_MODULE);
+	if (IS_ERR(ds1374->rtc)) {
+		ret = PTR_ERR(ds1374->rtc);
+		dev_err(&client->dev, "unable to register the class device\n");
+		goto out_irq;
+	}
+
+	return 0;
+
+out_irq:
+	if (client->irq >= 0)
+		free_irq(client->irq, client);
+
+out_free:
+	i2c_set_clientdata(client, NULL);
+	kfree(ds1374);
+	return ret;
+}
+
+static int __devexit ds1374_remove(struct i2c_client *client)
+{
+	struct ds1374 *ds1374 = i2c_get_clientdata(client);
+
+	if (client->irq >= 0) {
+		mutex_lock(&ds1374->mutex);
+		ds1374->exiting = 1;
+		mutex_unlock(&ds1374->mutex);
+
+		free_irq(client->irq, client);
+		flush_scheduled_work();
+	}
+
+	rtc_device_unregister(ds1374->rtc);
+	i2c_set_clientdata(client, NULL);
+	kfree(ds1374);
+	return 0;
+}
+
+static struct i2c_driver ds1374_driver = {
+	.driver = {
+		.name = "rtc-ds1374",
+		.owner = THIS_MODULE,
+	},
+	.probe = ds1374_probe,
+	.remove = __devexit_p(ds1374_remove),
+};
+
+static int __init ds1374_init(void)
+{
+	return i2c_add_driver(&ds1374_driver);
+}
+
+static void __exit ds1374_exit(void)
+{
+	i2c_del_driver(&ds1374_driver);
+}
+
+module_init(ds1374_init);
+module_exit(ds1374_exit);
+
+MODULE_AUTHOR("Scott Wood <scottwood@freescale.com>");
+MODULE_DESCRIPTION("Maxim/Dallas DS1374 RTC Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 5ab3492817d..bb53c09bad1 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -395,7 +395,7 @@ static struct platform_driver ds1553_rtc_driver = {
 	.probe		= ds1553_rtc_probe,
 	.remove		= __devexit_p(ds1553_rtc_remove),
 	.driver		= {
-		.name	= "ds1553",
+		.name	= "rtc-ds1553",
 		.owner	= THIS_MODULE,
 	},
 };
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 67291b0f828..c535b78698e 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -251,7 +251,7 @@ static struct platform_driver ds1742_rtc_driver = {
 	.probe		= ds1742_rtc_probe,
 	.remove		= __devexit_p(ds1742_rtc_remove),
 	.driver		= {
-		.name	= "ds1742",
+		.name	= "rtc-ds1742",
 		.owner	= THIS_MODULE,
 	},
 };
diff --git a/drivers/rtc/rtc-pcf8583.c b/drivers/rtc/rtc-pcf8583.c
index d48b0337458..556d0e7da35 100644
--- a/drivers/rtc/rtc-pcf8583.c
+++ b/drivers/rtc/rtc-pcf8583.c
@@ -332,6 +332,9 @@ static int pcf8583_probe(struct i2c_adapter *adap, int addr, int kind)
 		}
 	};
 
+	if (!i2c_check_functionality(adap, I2C_FUNC_I2C))
+		return 0;
+
 	pcf = kzalloc(sizeof(*pcf), GFP_KERNEL);
 	if (!pcf)
 		return -ENOMEM;
diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 69df94b4484..6cad0841f3c 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -73,11 +73,35 @@ rtc_sysfs_show_since_epoch(struct device *dev, struct device_attribute *attr,
 	return retval;
 }
 
+static ssize_t
+rtc_sysfs_show_max_user_freq(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, "%d\n", to_rtc_device(dev)->max_user_freq);
+}
+
+static ssize_t
+rtc_sysfs_set_max_user_freq(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t n)
+{
+	struct rtc_device *rtc = to_rtc_device(dev);
+	unsigned long val = simple_strtoul(buf, NULL, 0);
+
+	if (val >= 4096 || val == 0)
+		return -EINVAL;
+
+	rtc->max_user_freq = (int)val;
+
+	return n;
+}
+
 static struct device_attribute rtc_attrs[] = {
 	__ATTR(name, S_IRUGO, rtc_sysfs_show_name, NULL),
 	__ATTR(date, S_IRUGO, rtc_sysfs_show_date, NULL),
 	__ATTR(time, S_IRUGO, rtc_sysfs_show_time, NULL),
 	__ATTR(since_epoch, S_IRUGO, rtc_sysfs_show_since_epoch, NULL),
+	__ATTR(max_user_freq, S_IRUGO | S_IWUSR, rtc_sysfs_show_max_user_freq,
+			rtc_sysfs_set_max_user_freq),
 	{ },
 };
 
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 16e5563e0c6..57cac7008e0 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/mempool.h>
 #include <linux/syscalls.h>
+#include <linux/scatterlist.h>
 #include <linux/ioctl.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_tcq.h>
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 3f105fdcf23..51d92b196ee 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -590,7 +590,7 @@ zfcp_qdio_sbals_from_segment(struct zfcp_fsf_req *fsf_req, unsigned long sbtype,
  */
 int
 zfcp_qdio_sbals_from_sg(struct zfcp_fsf_req *fsf_req, unsigned long sbtype,
-                        struct scatterlist *sg,	int sg_count, int max_sbals)
+                        struct scatterlist *sgl, int sg_count, int max_sbals)
 {
 	int sg_index;
 	struct scatterlist *sg_segment;
@@ -606,9 +606,7 @@ zfcp_qdio_sbals_from_sg(struct zfcp_fsf_req *fsf_req, unsigned long sbtype,
 	sbale->flags |= sbtype;
 
 	/* process all segements of scatter-gather list */
-	for (sg_index = 0, sg_segment = sg, bytes = 0;
-	     sg_index < sg_count;
-	     sg_index++, sg_segment++) {
+	for_each_sg(sgl, sg_segment, sg_count, sg_index) {
 		retval = zfcp_qdio_sbals_from_segment(
 				fsf_req,
 				sbtype,
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index efd9d8d3a89..fb14014ee16 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -1990,6 +1990,7 @@ static struct scsi_host_template driver_template = {
 	.max_sectors		= TW_MAX_SECTORS,
 	.cmd_per_lun		= TW_MAX_CMDS_PER_LUN,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.shost_attrs		= twa_host_attrs,
 	.emulated		= 1
 };
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index c7995fc216e..a64153b9603 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -2261,6 +2261,7 @@ static struct scsi_host_template driver_template = {
 	.max_sectors		= TW_MAX_SECTORS,
 	.cmd_per_lun		= TW_MAX_CMDS_PER_LUN,	
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.shost_attrs		= tw_host_attrs,
 	.emulated		= 1
 };
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 9b206176f71..49e1ffa4b2f 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -3575,6 +3575,7 @@ static struct scsi_host_template Bus_Logic_template = {
 	.unchecked_isa_dma = 1,
 	.max_sectors = 128,
 	.use_clustering = ENABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 /*
diff --git a/drivers/scsi/NCR53c406a.c b/drivers/scsi/NCR53c406a.c
index eda8c48f6be..3168a179484 100644
--- a/drivers/scsi/NCR53c406a.c
+++ b/drivers/scsi/NCR53c406a.c
@@ -1066,7 +1066,8 @@ static struct scsi_host_template driver_template =
      .sg_tablesize      	= 32			/*SG_ALL*/ /*SG_NONE*/, 
      .cmd_per_lun       	= 1			/* commands per lun */, 
      .unchecked_isa_dma 	= 1			/* unchecked_isa_dma */,
-     .use_clustering    	= ENABLE_CLUSTERING                               
+     .use_clustering    	= ENABLE_CLUSTERING,
+     .use_sg_chaining           = ENABLE_SG_CHAINING,
 };
 
 #include "scsi_module.c"
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index f608d4a1d6d..d3a6d15fb77 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -1071,6 +1071,7 @@ static struct scsi_host_template inia100_template = {
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun 		= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static int __devinit inia100_probe_one(struct pci_dev *pdev,
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index a7f42a17b5c..038980be763 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -944,6 +944,7 @@ static struct scsi_host_template aac_driver_template = {
 	.cmd_per_lun    		= AAC_NUM_IO_FIB, 
 #endif	
 	.use_clustering			= ENABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 	.emulated                       = 1,
 };
 
diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index cbbfbc9f3e0..961a1882cb7 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c
@@ -61,15 +61,15 @@ static void BAD_DMA(void *address, unsigned int length)
 }
 
 static void BAD_SG_DMA(Scsi_Cmnd * SCpnt,
-		       struct scatterlist *sgpnt,
+		       struct scatterlist *sgp,
 		       int nseg,
 		       int badseg)
 {
 	printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n",
 	       badseg, nseg,
-	       page_address(sgpnt[badseg].page) + sgpnt[badseg].offset,
-	       (unsigned long long)SCSI_SG_PA(&sgpnt[badseg]),
-	       sgpnt[badseg].length);
+	       page_address(sgp->page) + sgp->offset,
+	       (unsigned long long)SCSI_SG_PA(sgp),
+	       sgp->length);
 
 	/*
 	 * Not safe to continue.
@@ -691,7 +691,7 @@ static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
 	memcpy(ccb[mbo].cdb, cmd, ccb[mbo].cdblen);
 
 	if (SCpnt->use_sg) {
-		struct scatterlist *sgpnt;
+		struct scatterlist *sg;
 		struct chain *cptr;
 #ifdef DEBUG
 		unsigned char *ptr;
@@ -699,23 +699,21 @@ static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
 		int i;
 		ccb[mbo].op = 2;	/* SCSI Initiator Command  w/scatter-gather */
 		SCpnt->host_scribble = kmalloc(512, GFP_KERNEL | GFP_DMA);
-		sgpnt = (struct scatterlist *) SCpnt->request_buffer;
 		cptr = (struct chain *) SCpnt->host_scribble;
 		if (cptr == NULL) {
 			/* free the claimed mailbox slot */
 			HOSTDATA(SCpnt->device->host)->SCint[mbo] = NULL;
 			return SCSI_MLQUEUE_HOST_BUSY;
 		}
-		for (i = 0; i < SCpnt->use_sg; i++) {
-			if (sgpnt[i].length == 0 || SCpnt->use_sg > 16 ||
-			    (((int) sgpnt[i].offset) & 1) || (sgpnt[i].length & 1)) {
+		scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) {
+			if (sg->length == 0 || SCpnt->use_sg > 16 ||
+			    (((int) sg->offset) & 1) || (sg->length & 1)) {
 				unsigned char *ptr;
 				printk(KERN_CRIT "Bad segment list supplied to aha1542.c (%d, %d)\n", SCpnt->use_sg, i);
-				for (i = 0; i < SCpnt->use_sg; i++) {
+				scsi_for_each_sg(SCpnt, sg, SCpnt->use_sg, i) {
 					printk(KERN_CRIT "%d: %p %d\n", i,
-					       (page_address(sgpnt[i].page) +
-						sgpnt[i].offset),
-					       sgpnt[i].length);
+					       (page_address(sg->page) +
+						sg->offset), sg->length);
 				};
 				printk(KERN_CRIT "cptr %x: ", (unsigned int) cptr);
 				ptr = (unsigned char *) &cptr[i];
@@ -723,10 +721,10 @@ static int aha1542_queuecommand(Scsi_Cmnd * SCpnt, void (*done) (Scsi_Cmnd *))
 					printk("%02x ", ptr[i]);
 				panic("Foooooooood fight!");
 			};
-			any2scsi(cptr[i].dataptr, SCSI_SG_PA(&sgpnt[i]));
-			if (SCSI_SG_PA(&sgpnt[i]) + sgpnt[i].length - 1 > ISA_DMA_THRESHOLD)
-				BAD_SG_DMA(SCpnt, sgpnt, SCpnt->use_sg, i);
-			any2scsi(cptr[i].datalen, sgpnt[i].length);
+			any2scsi(cptr[i].dataptr, SCSI_SG_PA(sg));
+			if (SCSI_SG_PA(sg) + sg->length - 1 > ISA_DMA_THRESHOLD)
+				BAD_SG_DMA(SCpnt, sg, SCpnt->use_sg, i);
+			any2scsi(cptr[i].datalen, sg->length);
 		};
 		any2scsi(ccb[mbo].datalen, SCpnt->use_sg * sizeof(struct chain));
 		any2scsi(ccb[mbo].dataptr, SCSI_BUF_PA(cptr));
diff --git a/drivers/scsi/aha1740.c b/drivers/scsi/aha1740.c
index e4a4f3a965d..f6722fd4600 100644
--- a/drivers/scsi/aha1740.c
+++ b/drivers/scsi/aha1740.c
@@ -563,6 +563,7 @@ static struct scsi_host_template aha1740_template = {
 	.sg_tablesize     = AHA1740_SCATTER,
 	.cmd_per_lun      = AHA1740_CMDLUN,
 	.use_clustering   = ENABLE_CLUSTERING,
+	.use_sg_chaining  = ENABLE_SG_CHAINING,
 	.eh_abort_handler = aha1740_eh_abort_handler,
 };
 
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index a055a96e3ad..42c0f14a262 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -766,6 +766,7 @@ struct scsi_host_template aic79xx_driver_template = {
 	.max_sectors		= 8192,
 	.cmd_per_lun		= 2,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.slave_alloc		= ahd_linux_slave_alloc,
 	.slave_configure	= ahd_linux_slave_configure,
 	.target_alloc		= ahd_linux_target_alloc,
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index 2e9c38f2e8a..7770befbf50 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -747,6 +747,7 @@ struct scsi_host_template aic7xxx_driver_template = {
 	.max_sectors		= 8192,
 	.cmd_per_lun		= 2,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.slave_alloc		= ahc_linux_slave_alloc,
 	.slave_configure	= ahc_linux_slave_configure,
 	.target_alloc		= ahc_linux_target_alloc,
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index 1a71b0236c9..4025608d696 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -11142,6 +11142,7 @@ static struct scsi_host_template driver_template = {
 	.max_sectors		= 2048,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 #include "scsi_module.c"
diff --git a/drivers/scsi/aic94xx/aic94xx_task.c b/drivers/scsi/aic94xx/aic94xx_task.c
index f2b23e01401..ee0a98bffcd 100644
--- a/drivers/scsi/aic94xx/aic94xx_task.c
+++ b/drivers/scsi/aic94xx/aic94xx_task.c
@@ -94,7 +94,7 @@ static inline int asd_map_scatterlist(struct sas_task *task,
 			res = -ENOMEM;
 			goto err_unmap;
 		}
-		for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) {
+		for_each_sg(task->scatter, sc, num_sg, i) {
 			struct sg_el *sg =
 				&((struct sg_el *)ascb->sg_arr->vaddr)[i];
 			sg->bus_addr = cpu_to_le64((u64)sg_dma_address(sc));
@@ -103,7 +103,7 @@ static inline int asd_map_scatterlist(struct sas_task *task,
 				sg->flags |= ASD_SG_EL_LIST_EOL;
 		}
 
-		for (sc = task->scatter, i = 0; i < 2; i++, sc++) {
+		for_each_sg(task->scatter, sc, 2, i) {
 			sg_arr[i].bus_addr =
 				cpu_to_le64((u64)sg_dma_address(sc));
 			sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc));
@@ -115,7 +115,7 @@ static inline int asd_map_scatterlist(struct sas_task *task,
 		sg_arr[2].bus_addr=cpu_to_le64((u64)ascb->sg_arr->dma_handle);
 	} else {
 		int i;
-		for (sc = task->scatter, i = 0; i < num_sg; i++, sc++) {
+		for_each_sg(task->scatter, sc, num_sg, i) {
 			sg_arr[i].bus_addr =
 				cpu_to_le64((u64)sg_dma_address(sc));
 			sg_arr[i].size = cpu_to_le32((u32)sg_dma_len(sc));
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index cfcf40159ea..f81777586b8 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -122,6 +122,7 @@ static struct scsi_host_template arcmsr_scsi_host_template = {
 	.max_sectors    	= ARCMSR_MAX_XFER_SECTORS,
 	.cmd_per_lun		= ARCMSR_MAX_CMD_PERLUN,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.shost_attrs		= arcmsr_host_attrs,
 };
 #ifdef CONFIG_SCSI_ARCMSR_AER
diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c
index 1591824cf4b..fd42d478920 100644
--- a/drivers/scsi/dc395x.c
+++ b/drivers/scsi/dc395x.c
@@ -4765,6 +4765,7 @@ static struct scsi_host_template dc395x_driver_template = {
 	.eh_bus_reset_handler   = dc395x_eh_bus_reset,
 	.unchecked_isa_dma      = 0,
 	.use_clustering         = DISABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index bea9d659af1..8258506ba7d 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -3295,6 +3295,7 @@ static struct scsi_host_template adpt_template = {
 	.this_id		= 7,
 	.cmd_per_lun		= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static s32 adpt_scsi_register(adpt_hba* pHba)
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index ec2233114bc..7ead5210de9 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -523,7 +523,8 @@ static struct scsi_host_template driver_template = {
 	.slave_configure = eata2x_slave_configure,
 	.this_id = 7,
 	.unchecked_isa_dma = 1,
-	.use_clustering = ENABLE_CLUSTERING
+	.use_clustering = ENABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 #if !defined(__BIG_ENDIAN_BITFIELD) && !defined(__LITTLE_ENDIAN_BITFIELD)
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index adc9559cb6f..112ab6abe62 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -343,6 +343,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	shost->use_clustering = sht->use_clustering;
 	shost->ordered_tag = sht->ordered_tag;
 	shost->active_mode = sht->supported_mode;
+	shost->use_sg_chaining = sht->use_sg_chaining;
 
 	if (sht->max_host_blocked)
 		shost->max_host_blocked = sht->max_host_blocked;
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index 8b384fa7f04..8515054cdf7 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -655,6 +655,7 @@ static struct scsi_host_template driver_template = {
 	.unchecked_isa_dma          = 0,
 	.emulated                   = 0,
 	.use_clustering             = ENABLE_CLUSTERING,
+	.use_sg_chaining            = ENABLE_SG_CHAINING,
 	.proc_name                  = driver_name,
 	.shost_attrs                = hptiop_attrs,
 	.this_id                    = -1,
diff --git a/drivers/scsi/ibmmca.c b/drivers/scsi/ibmmca.c
index 1a924e9b027..714e6273a70 100644
--- a/drivers/scsi/ibmmca.c
+++ b/drivers/scsi/ibmmca.c
@@ -1501,6 +1501,7 @@ static struct scsi_host_template ibmmca_driver_template = {
           .sg_tablesize   = 16,
           .cmd_per_lun    = 1,
           .use_clustering = ENABLE_CLUSTERING,
+          .use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 static int ibmmca_probe(struct device *dev)
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index cda0cc3d182..22d91ee173c 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1548,6 +1548,7 @@ static struct scsi_host_template driver_template = {
 	.this_id = -1,
 	.sg_tablesize = SG_ALL,
 	.use_clustering = ENABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 	.shost_attrs = ibmvscsi_attrs,
 };
 
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index d81bb076a15..d297f64cd43 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -70,6 +70,7 @@ typedef struct idescsi_pc_s {
 	u8 *buffer;				/* Data buffer */
 	u8 *current_position;			/* Pointer into the above buffer */
 	struct scatterlist *sg;			/* Scatter gather table */
+	struct scatterlist *last_sg;		/* Last sg element */
 	int b_count;				/* Bytes transferred from current entry */
 	struct scsi_cmnd *scsi_cmd;		/* SCSI command */
 	void (*done)(struct scsi_cmnd *);	/* Scsi completion routine */
@@ -173,12 +174,6 @@ static void idescsi_input_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigne
 	char *buf;
 
 	while (bcount) {
-		if (pc->sg - scsi_sglist(pc->scsi_cmd) >
-		                                 scsi_sg_count(pc->scsi_cmd)) {
-			printk (KERN_ERR "ide-scsi: scatter gather table too small, discarding data\n");
-			idescsi_discard_data (drive, bcount);
-			return;
-		}
 		count = min(pc->sg->length - pc->b_count, bcount);
 		if (PageHighMem(pc->sg->page)) {
 			unsigned long flags;
@@ -197,10 +192,17 @@ static void idescsi_input_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigne
 		}
 		bcount -= count; pc->b_count += count;
 		if (pc->b_count == pc->sg->length) {
-			pc->sg++;
+			if (pc->sg == pc->last_sg)
+				break;
+			pc->sg = sg_next(pc->sg);
 			pc->b_count = 0;
 		}
 	}
+
+	if (bcount) {
+		printk (KERN_ERR "ide-scsi: scatter gather table too small, discarding data\n");
+		idescsi_discard_data (drive, bcount);
+	}
 }
 
 static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsigned int bcount)
@@ -209,12 +211,6 @@ static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsign
 	char *buf;
 
 	while (bcount) {
-		if (pc->sg - scsi_sglist(pc->scsi_cmd) >
-		                                 scsi_sg_count(pc->scsi_cmd)) {
-			printk (KERN_ERR "ide-scsi: scatter gather table too small, padding with zeros\n");
-			idescsi_output_zeros (drive, bcount);
-			return;
-		}
 		count = min(pc->sg->length - pc->b_count, bcount);
 		if (PageHighMem(pc->sg->page)) {
 			unsigned long flags;
@@ -233,10 +229,17 @@ static void idescsi_output_buffers (ide_drive_t *drive, idescsi_pc_t *pc, unsign
 		}
 		bcount -= count; pc->b_count += count;
 		if (pc->b_count == pc->sg->length) {
-			pc->sg++;
+			if (pc->sg == pc->last_sg)
+				break;
+			pc->sg = sg_next(pc->sg);
 			pc->b_count = 0;
 		}
 	}
+
+	if (bcount) {
+		printk (KERN_ERR "ide-scsi: scatter gather table too small, padding with zeros\n");
+		idescsi_output_zeros (drive, bcount);
+	}
 }
 
 static void hexdump(u8 *x, int len)
@@ -804,6 +807,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
 	memcpy (pc->c, cmd->cmnd, cmd->cmd_len);
 	pc->buffer = NULL;
 	pc->sg = scsi_sglist(cmd);
+	pc->last_sg = sg_last(pc->sg, cmd->use_sg);
 	pc->b_count = 0;
 	pc->request_transfer = pc->buffer_size = scsi_bufflen(cmd);
 	pc->scsi_cmd = cmd;
diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c
index d9dfb69ae03..22d40fd5845 100644
--- a/drivers/scsi/initio.c
+++ b/drivers/scsi/initio.c
@@ -2831,6 +2831,7 @@ static struct scsi_host_template initio_template = {
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static int initio_probe_one(struct pci_dev *pdev,
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 2ed099e2c20..edaac2714c5 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -3252,7 +3252,7 @@ ips_done(ips_ha_t * ha, ips_scb_t * scb)
 		 */
 		if ((scb->breakup) || (scb->sg_break)) {
                         struct scatterlist *sg;
-                        int sg_dma_index, ips_sg_index = 0;
+                        int i, sg_dma_index, ips_sg_index = 0;
 
 			/* we had a data breakup */
 			scb->data_len = 0;
@@ -3261,20 +3261,22 @@ ips_done(ips_ha_t * ha, ips_scb_t * scb)
 
                         /* Spin forward to last dma chunk */
                         sg_dma_index = scb->breakup;
+                        for (i = 0; i < scb->breakup; i++)
+                                sg = sg_next(sg);
 
 			/* Take care of possible partial on last chunk */
                         ips_fill_scb_sg_single(ha,
-                                               sg_dma_address(&sg[sg_dma_index]),
+                                               sg_dma_address(sg),
                                                scb, ips_sg_index++,
-                                               sg_dma_len(&sg[sg_dma_index]));
+                                               sg_dma_len(sg));
 
                         for (; sg_dma_index < scsi_sg_count(scb->scsi_cmd);
-                             sg_dma_index++) {
+                             sg_dma_index++, sg = sg_next(sg)) {
                                 if (ips_fill_scb_sg_single
                                     (ha,
-                                     sg_dma_address(&sg[sg_dma_index]),
+                                     sg_dma_address(sg),
                                      scb, ips_sg_index++,
-                                     sg_dma_len(&sg[sg_dma_index])) < 0)
+                                     sg_dma_len(sg)) < 0)
                                         break;
                         }
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index cd674938ccd..c0755565fae 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1438,6 +1438,7 @@ struct scsi_host_template lpfc_template = {
 	.scan_finished		= lpfc_scan_finished,
 	.this_id		= -1,
 	.sg_tablesize		= LPFC_SG_SEG_CNT,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.cmd_per_lun		= LPFC_CMD_PER_LUN,
 	.use_clustering		= ENABLE_CLUSTERING,
 	.shost_attrs		= lpfc_hba_attrs,
@@ -1460,6 +1461,7 @@ struct scsi_host_template lpfc_vport_template = {
 	.sg_tablesize		= LPFC_SG_SEG_CNT,
 	.cmd_per_lun		= LPFC_CMD_PER_LUN,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.shost_attrs		= lpfc_vport_attrs,
 	.max_sectors		= 0xFFFF,
 };
diff --git a/drivers/scsi/mac53c94.c b/drivers/scsi/mac53c94.c
index b12ad7c7c67..a035001f443 100644
--- a/drivers/scsi/mac53c94.c
+++ b/drivers/scsi/mac53c94.c
@@ -402,6 +402,7 @@ static struct scsi_host_template mac53c94_template = {
 	.sg_tablesize	= SG_ALL,
 	.cmd_per_lun	= 1,
 	.use_clustering	= DISABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 static int mac53c94_probe(struct macio_dev *mdev, const struct of_device_id *match)
diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c
index cdbcaa5ad6c..abe2bda6ac3 100644
--- a/drivers/scsi/mac_scsi.c
+++ b/drivers/scsi/mac_scsi.c
@@ -53,6 +53,11 @@
 #include "scsi.h"
 #include <scsi/scsi_host.h>
 #include "mac_scsi.h"
+
+/* These control the behaviour of the generic 5380 core */
+#define AUTOSENSE
+#define PSEUDO_DMA
+
 #include "NCR5380.h"
 
 #if 0
@@ -571,10 +576,6 @@ static int macscsi_pwrite (struct Scsi_Host *instance,
 }
 
 
-/* These control the behaviour of the generic 5380 core */
-#define AUTOSENSE
-#define PSEUDO_DMA
-
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index e7e11f282c8..10d1aff9938 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -4492,6 +4492,7 @@ static struct scsi_host_template megaraid_template = {
 	.sg_tablesize			= MAX_SGLIST,
 	.cmd_per_lun			= DEF_CMD_PER_LUN,
 	.use_clustering			= ENABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 	.eh_abort_handler		= megaraid_abort,
 	.eh_device_reset_handler	= megaraid_reset,
 	.eh_bus_reset_handler		= megaraid_reset,
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index c6a53dccc16..e4e4c6a39ed 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -361,6 +361,7 @@ static struct scsi_host_template megaraid_template_g = {
 	.eh_host_reset_handler		= megaraid_reset_handler,
 	.change_queue_depth		= megaraid_change_queue_depth,
 	.use_clustering			= ENABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 	.sdev_attrs			= megaraid_sdev_attrs,
 	.shost_attrs			= megaraid_shost_attrs,
 };
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index ebb948c016b..e3c5c528220 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -1110,6 +1110,7 @@ static struct scsi_host_template megasas_template = {
 	.eh_timed_out = megasas_reset_timer,
 	.bios_param = megasas_bios_param,
 	.use_clustering = ENABLE_CLUSTERING,
+	.use_sg_chaining = ENABLE_SG_CHAINING,
 };
 
 /**
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index 651d09b08f2..7470ff39ab2 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1843,6 +1843,7 @@ static struct scsi_host_template mesh_template = {
 	.sg_tablesize			= SG_ALL,
 	.cmd_per_lun			= 2,
 	.use_clustering			= DISABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 };
 
 static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
diff --git a/drivers/scsi/nsp32.c b/drivers/scsi/nsp32.c
index 7fed3537215..28161dc95e0 100644
--- a/drivers/scsi/nsp32.c
+++ b/drivers/scsi/nsp32.c
@@ -281,6 +281,7 @@ static struct scsi_host_template nsp32_template = {
 	.cmd_per_lun			= 1,
 	.this_id			= NSP32_HOST_SCSIID,
 	.use_clustering			= DISABLE_CLUSTERING,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 	.eh_abort_handler       	= nsp32_eh_abort,
 	.eh_bus_reset_handler		= nsp32_eh_bus_reset,
 	.eh_host_reset_handler		= nsp32_eh_host_reset,
diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c
index 961839ecfe8..190e2a7d706 100644
--- a/drivers/scsi/pcmcia/sym53c500_cs.c
+++ b/drivers/scsi/pcmcia/sym53c500_cs.c
@@ -694,6 +694,7 @@ static struct scsi_host_template sym53c500_driver_template = {
      .sg_tablesize		= 32,
      .cmd_per_lun		= 1,
      .use_clustering		= ENABLE_CLUSTERING,
+     .use_sg_chaining		= ENABLE_SG_CHAINING,
      .shost_attrs		= SYM53C500_shost_attrs
 };
 
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index fba8aa8a81b..76089cf55f4 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -2775,7 +2775,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 	struct device_reg __iomem *reg = ha->iobase;
 	struct scsi_cmnd *cmd = sp->cmd;
 	cmd_a64_entry_t *pkt;
-	struct scatterlist *sg = NULL;
+	struct scatterlist *sg = NULL, *s;
 	__le32 *dword_ptr;
 	dma_addr_t dma_handle;
 	int status = 0;
@@ -2889,13 +2889,16 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 	 * Load data segments.
 	 */
 	if (seg_cnt) {	/* If data transfer. */
+		int remseg = seg_cnt;
 		/* Setup packet address segment pointer. */
 		dword_ptr = (u32 *)&pkt->dseg_0_address;
 
 		if (cmd->use_sg) {	/* If scatter gather */
 			/* Load command entry data segments. */
-			for (cnt = 0; cnt < 2 && seg_cnt; cnt++, seg_cnt--) {
-				dma_handle = sg_dma_address(sg);
+			for_each_sg(sg, s, seg_cnt, cnt) {
+				if (cnt == 2)
+					break;
+				dma_handle = sg_dma_address(s);
 #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_SGI_SN2)
 				if (ha->flags.use_pci_vchannel)
 					sn_pci_set_vchan(ha->pdev,
@@ -2906,12 +2909,12 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 					cpu_to_le32(pci_dma_lo32(dma_handle));
 				*dword_ptr++ =
 					cpu_to_le32(pci_dma_hi32(dma_handle));
-				*dword_ptr++ = cpu_to_le32(sg_dma_len(sg));
-				sg++;
+				*dword_ptr++ = cpu_to_le32(sg_dma_len(s));
 				dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n",
 					cpu_to_le32(pci_dma_hi32(dma_handle)),
 					cpu_to_le32(pci_dma_lo32(dma_handle)),
-					cpu_to_le32(sg_dma_len(sg)));
+					cpu_to_le32(sg_dma_len(sg_next(s))));
+				remseg--;
 			}
 			dprintk(5, "qla1280_64bit_start_scsi: Scatter/gather "
 				"command packet data - b %i, t %i, l %i \n",
@@ -2926,7 +2929,9 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 			dprintk(3, "S/G Building Continuation...seg_cnt=0x%x "
 				"remains\n", seg_cnt);
 
-			while (seg_cnt > 0) {
+			while (remseg > 0) {
+				/* Update sg start */
+				sg = s;
 				/* Adjust ring index. */
 				ha->req_ring_index++;
 				if (ha->req_ring_index == REQUEST_ENTRY_CNT) {
@@ -2952,9 +2957,10 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 					(u32 *)&((struct cont_a64_entry *) pkt)->dseg_0_address;
 
 				/* Load continuation entry data segments. */
-				for (cnt = 0; cnt < 5 && seg_cnt;
-				     cnt++, seg_cnt--) {
-					dma_handle = sg_dma_address(sg);
+				for_each_sg(sg, s, remseg, cnt) {
+					if (cnt == 5)
+						break;
+					dma_handle = sg_dma_address(s);
 #if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_SGI_SN2)
 				if (ha->flags.use_pci_vchannel)
 					sn_pci_set_vchan(ha->pdev, 
@@ -2966,13 +2972,13 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 					*dword_ptr++ =
 						cpu_to_le32(pci_dma_hi32(dma_handle));
 					*dword_ptr++ =
-						cpu_to_le32(sg_dma_len(sg));
+						cpu_to_le32(sg_dma_len(s));
 					dprintk(3, "S/G Segment Cont. phys_addr=%x %x, len=0x%x\n",
 						cpu_to_le32(pci_dma_hi32(dma_handle)),
 						cpu_to_le32(pci_dma_lo32(dma_handle)),
-						cpu_to_le32(sg_dma_len(sg)));
-					sg++;
+						cpu_to_le32(sg_dma_len(s)));
 				}
+				remseg -= cnt;
 				dprintk(5, "qla1280_64bit_start_scsi: "
 					"continuation packet data - b %i, t "
 					"%i, l %i \n", SCSI_BUS_32(cmd),
@@ -3062,7 +3068,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 	struct device_reg __iomem *reg = ha->iobase;
 	struct scsi_cmnd *cmd = sp->cmd;
 	struct cmd_entry *pkt;
-	struct scatterlist *sg = NULL;
+	struct scatterlist *sg = NULL, *s;
 	__le32 *dword_ptr;
 	int status = 0;
 	int cnt;
@@ -3188,6 +3194,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 	 * Load data segments.
 	 */
 	if (seg_cnt) {
+		int remseg = seg_cnt;
 		/* Setup packet address segment pointer. */
 		dword_ptr = &pkt->dseg_0_address;
 
@@ -3196,22 +3203,25 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 			qla1280_dump_buffer(1, (char *)sg, 4 * 16);
 
 			/* Load command entry data segments. */
-			for (cnt = 0; cnt < 4 && seg_cnt; cnt++, seg_cnt--) {
+			for_each_sg(sg, s, seg_cnt, cnt) {
+				if (cnt == 4)
+					break;
 				*dword_ptr++ =
-					cpu_to_le32(pci_dma_lo32(sg_dma_address(sg)));
-				*dword_ptr++ =
-					cpu_to_le32(sg_dma_len(sg));
+					cpu_to_le32(pci_dma_lo32(sg_dma_address(s)));
+				*dword_ptr++ = cpu_to_le32(sg_dma_len(s));
 				dprintk(3, "S/G Segment phys_addr=0x%lx, len=0x%x\n",
-					(pci_dma_lo32(sg_dma_address(sg))),
-					(sg_dma_len(sg)));
-				sg++;
+					(pci_dma_lo32(sg_dma_address(s))),
+					(sg_dma_len(s)));
+				remseg--;
 			}
 			/*
 			 * Build continuation packets.
 			 */
 			dprintk(3, "S/G Building Continuation"
 				"...seg_cnt=0x%x remains\n", seg_cnt);
-			while (seg_cnt > 0) {
+			while (remseg > 0) {
+				/* Continue from end point */
+				sg = s;
 				/* Adjust ring index. */
 				ha->req_ring_index++;
 				if (ha->req_ring_index == REQUEST_ENTRY_CNT) {
@@ -3239,19 +3249,20 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
 					&((struct cont_entry *) pkt)->dseg_0_address;
 
 				/* Load continuation entry data segments. */
-				for (cnt = 0; cnt < 7 && seg_cnt;
-				     cnt++, seg_cnt--) {
+				for_each_sg(sg, s, remseg, cnt) {
+					if (cnt == 7)
+						break;
 					*dword_ptr++ =
-						cpu_to_le32(pci_dma_lo32(sg_dma_address(sg)));
+						cpu_to_le32(pci_dma_lo32(sg_dma_address(s)));
 					*dword_ptr++ =
-						cpu_to_le32(sg_dma_len(sg));
+						cpu_to_le32(sg_dma_len(s));
 					dprintk(1,
 						"S/G Segment Cont. phys_addr=0x%x, "
 						"len=0x%x\n",
-						cpu_to_le32(pci_dma_lo32(sg_dma_address(sg))),
-						cpu_to_le32(sg_dma_len(sg)));
-					sg++;
+						cpu_to_le32(pci_dma_lo32(sg_dma_address(s))),
+						cpu_to_le32(sg_dma_len(s)));
 				}
+				remseg -= cnt;
 				dprintk(5, "qla1280_32bit_start_scsi: "
 					"continuation packet data - "
 					"scsi(%i:%i:%i)\n", SCSI_BUS_32(cmd),
@@ -4248,6 +4259,7 @@ static struct scsi_host_template qla1280_driver_template = {
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index a6bb8d0ecf1..0351d380c2d 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -132,6 +132,7 @@ struct scsi_host_template qla2x00_driver_template = {
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.sg_tablesize		= SG_ALL,
 
 	/*
@@ -163,6 +164,7 @@ struct scsi_host_template qla24xx_driver_template = {
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.sg_tablesize		= SG_ALL,
 
 	.max_sectors		= 0xFFFF,
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index b1d565c12c5..03b68d4f3bd 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -94,6 +94,7 @@ static struct scsi_host_template qla4xxx_driver_template = {
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.sg_tablesize		= SG_ALL,
 
 	.max_sectors		= 0xFFFF,
diff --git a/drivers/scsi/qlogicfas.c b/drivers/scsi/qlogicfas.c
index 1e874f1fb5c..1769f965eed 100644
--- a/drivers/scsi/qlogicfas.c
+++ b/drivers/scsi/qlogicfas.c
@@ -197,6 +197,7 @@ static struct scsi_host_template qlogicfas_driver_template = {
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= 1,
 	.use_clustering		= DISABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 static __init int qlogicfas_init(void)
diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c
index e93f80316a1..7a2e7986b03 100644
--- a/drivers/scsi/qlogicpti.c
+++ b/drivers/scsi/qlogicpti.c
@@ -868,7 +868,7 @@ static inline int load_cmd(struct scsi_cmnd *Cmnd, struct Command_Entry *cmd,
 			   struct qlogicpti *qpti, u_int in_ptr, u_int out_ptr)
 {
 	struct dataseg *ds;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *s;
 	int i, n;
 
 	if (Cmnd->use_sg) {
@@ -884,11 +884,12 @@ static inline int load_cmd(struct scsi_cmnd *Cmnd, struct Command_Entry *cmd,
 		n = sg_count;
 		if (n > 4)
 			n = 4;
-		for (i = 0; i < n; i++, sg++) {
-			ds[i].d_base = sg_dma_address(sg);
-			ds[i].d_count = sg_dma_len(sg);
+		for_each_sg(sg, s, n, i) {
+			ds[i].d_base = sg_dma_address(s);
+			ds[i].d_count = sg_dma_len(s);
 		}
 		sg_count -= 4;
+		sg = s;
 		while (sg_count > 0) {
 			struct Continuation_Entry *cont;
 
@@ -907,9 +908,9 @@ static inline int load_cmd(struct scsi_cmnd *Cmnd, struct Command_Entry *cmd,
 			n = sg_count;
 			if (n > 7)
 				n = 7;
-			for (i = 0; i < n; i++, sg++) {
-				ds[i].d_base = sg_dma_address(sg);
-				ds[i].d_count = sg_dma_len(sg);
+			for_each_sg(sg, s, n, i) {
+				ds[i].d_base = sg_dma_address(s);
+				ds[i].d_count = sg_dma_len(s);
 			}
 			sg_count -= n;
 		}
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 4947dfe625a..72ee4c9cfb1 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -38,6 +38,7 @@
 #include <linux/proc_fs.h>
 #include <linux/vmalloc.h>
 #include <linux/moduleparam.h>
+#include <linux/scatterlist.h>
 
 #include <linux/blkdev.h>
 #include "scsi.h"
@@ -600,7 +601,7 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
 	int k, req_len, act_len, len, active;
 	void * kaddr;
 	void * kaddr_off;
-	struct scatterlist * sgpnt;
+	struct scatterlist * sg;
 
 	if (0 == scp->request_bufflen)
 		return 0;
@@ -619,16 +620,16 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
 			scp->resid = req_len - act_len;
 		return 0;
 	}
-	sgpnt = (struct scatterlist *)scp->request_buffer;
 	active = 1;
-	for (k = 0, req_len = 0, act_len = 0; k < scp->use_sg; ++k, ++sgpnt) {
+	req_len = act_len = 0;
+	scsi_for_each_sg(scp, sg, scp->use_sg, k) {
 		if (active) {
 			kaddr = (unsigned char *)
-				kmap_atomic(sgpnt->page, KM_USER0);
+				kmap_atomic(sg->page, KM_USER0);
 			if (NULL == kaddr)
 				return (DID_ERROR << 16);
-			kaddr_off = (unsigned char *)kaddr + sgpnt->offset;
-			len = sgpnt->length;
+			kaddr_off = (unsigned char *)kaddr + sg->offset;
+			len = sg->length;
 			if ((req_len + len) > arr_len) {
 				active = 0;
 				len = arr_len - req_len;
@@ -637,7 +638,7 @@ static int fill_from_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
 			kunmap_atomic(kaddr, KM_USER0);
 			act_len += len;
 		}
-		req_len += sgpnt->length;
+		req_len += sg->length;
 	}
 	if (scp->resid)
 		scp->resid -= act_len;
@@ -653,7 +654,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
 	int k, req_len, len, fin;
 	void * kaddr;
 	void * kaddr_off;
-	struct scatterlist * sgpnt;
+	struct scatterlist * sg;
 
 	if (0 == scp->request_bufflen)
 		return 0;
@@ -668,13 +669,14 @@ static int fetch_to_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
 		memcpy(arr, scp->request_buffer, len);
 		return len;
 	}
-	sgpnt = (struct scatterlist *)scp->request_buffer;
-	for (k = 0, req_len = 0, fin = 0; k < scp->use_sg; ++k, ++sgpnt) {
-		kaddr = (unsigned char *)kmap_atomic(sgpnt->page, KM_USER0);
+	sg = scsi_sglist(scp);
+	req_len = fin = 0;
+	for (k = 0; k < scp->use_sg; ++k, sg = sg_next(sg)) {
+		kaddr = (unsigned char *)kmap_atomic(sg->page, KM_USER0);
 		if (NULL == kaddr)
 			return -1;
-		kaddr_off = (unsigned char *)kaddr + sgpnt->offset;
-		len = sgpnt->length;
+		kaddr_off = (unsigned char *)kaddr + sg->offset;
+		len = sg->length;
 		if ((req_len + len) > max_arr_len) {
 			len = max_arr_len - req_len;
 			fin = 1;
@@ -683,7 +685,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd * scp, unsigned char * arr,
 		kunmap_atomic(kaddr, KM_USER0);
 		if (fin)
 			return req_len + len;
-		req_len += sgpnt->length;
+		req_len += sg->length;
 	}
 	return req_len;
 }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 207f1aa0886..aac8a02cbe8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -17,6 +17,7 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/hardirq.h>
+#include <linux/scatterlist.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -33,35 +34,34 @@
 #define SG_MEMPOOL_NR		ARRAY_SIZE(scsi_sg_pools)
 #define SG_MEMPOOL_SIZE		2
 
+/*
+ * The maximum number of SG segments that we will put inside a scatterlist
+ * (unless chaining is used). Should ideally fit inside a single page, to
+ * avoid a higher order allocation.
+ */
+#define SCSI_MAX_SG_SEGMENTS	128
+
 struct scsi_host_sg_pool {
 	size_t		size;
-	char		*name; 
+	char		*name;
 	struct kmem_cache	*slab;
 	mempool_t	*pool;
 };
 
-#if (SCSI_MAX_PHYS_SEGMENTS < 32)
-#error SCSI_MAX_PHYS_SEGMENTS is too small
-#endif
-
-#define SP(x) { x, "sgpool-" #x } 
+#define SP(x) { x, "sgpool-" #x }
 static struct scsi_host_sg_pool scsi_sg_pools[] = {
 	SP(8),
 	SP(16),
+#if (SCSI_MAX_SG_SEGMENTS > 16)
 	SP(32),
-#if (SCSI_MAX_PHYS_SEGMENTS > 32)
+#if (SCSI_MAX_SG_SEGMENTS > 32)
 	SP(64),
-#if (SCSI_MAX_PHYS_SEGMENTS > 64)
+#if (SCSI_MAX_SG_SEGMENTS > 64)
 	SP(128),
-#if (SCSI_MAX_PHYS_SEGMENTS > 128)
-	SP(256),
-#if (SCSI_MAX_PHYS_SEGMENTS > 256)
-#error SCSI_MAX_PHYS_SEGMENTS is too large
-#endif
 #endif
 #endif
 #endif
-}; 	
+};
 #undef SP
 
 static void scsi_run_queue(struct request_queue *q);
@@ -289,14 +289,16 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
 	struct request_queue *q = rq->q;
 	int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	unsigned int data_len = bufflen, len, bytes, off;
+	struct scatterlist *sg;
 	struct page *page;
 	struct bio *bio = NULL;
 	int i, err, nr_vecs = 0;
 
-	for (i = 0; i < nsegs; i++) {
-		page = sgl[i].page;
-		off = sgl[i].offset;
-		len = sgl[i].length;
+	for_each_sg(sgl, sg, nsegs, i) {
+		page = sg->page;
+		off = sg->offset;
+		len = sg->length;
+ 		data_len += len;
 
 		while (len > 0 && data_len > 0) {
 			/*
@@ -695,56 +697,170 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
 	return NULL;
 }
 
-struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
-{
-	struct scsi_host_sg_pool *sgp;
-	struct scatterlist *sgl;
+/*
+ * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
+ * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
+ */
+#define SCSI_MAX_SG_CHAIN_SEGMENTS	2048
 
-	BUG_ON(!cmd->use_sg);
+static inline unsigned int scsi_sgtable_index(unsigned short nents)
+{
+	unsigned int index;
 
-	switch (cmd->use_sg) {
+	switch (nents) {
 	case 1 ... 8:
-		cmd->sglist_len = 0;
+		index = 0;
 		break;
 	case 9 ... 16:
-		cmd->sglist_len = 1;
+		index = 1;
 		break;
+#if (SCSI_MAX_SG_SEGMENTS > 16)
 	case 17 ... 32:
-		cmd->sglist_len = 2;
+		index = 2;
 		break;
-#if (SCSI_MAX_PHYS_SEGMENTS > 32)
+#if (SCSI_MAX_SG_SEGMENTS > 32)
 	case 33 ... 64:
-		cmd->sglist_len = 3;
+		index = 3;
 		break;
-#if (SCSI_MAX_PHYS_SEGMENTS > 64)
+#if (SCSI_MAX_SG_SEGMENTS > 64)
 	case 65 ... 128:
-		cmd->sglist_len = 4;
-		break;
-#if (SCSI_MAX_PHYS_SEGMENTS  > 128)
-	case 129 ... 256:
-		cmd->sglist_len = 5;
+		index = 4;
 		break;
 #endif
 #endif
 #endif
 	default:
-		return NULL;
+		printk(KERN_ERR "scsi: bad segment count=%d\n", nents);
+		BUG();
 	}
 
-	sgp = scsi_sg_pools + cmd->sglist_len;
-	sgl = mempool_alloc(sgp->pool, gfp_mask);
-	return sgl;
+	return index;
+}
+
+struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
+{
+	struct scsi_host_sg_pool *sgp;
+	struct scatterlist *sgl, *prev, *ret;
+	unsigned int index;
+	int this, left;
+
+	BUG_ON(!cmd->use_sg);
+
+	left = cmd->use_sg;
+	ret = prev = NULL;
+	do {
+		this = left;
+		if (this > SCSI_MAX_SG_SEGMENTS) {
+			this = SCSI_MAX_SG_SEGMENTS - 1;
+			index = SG_MEMPOOL_NR - 1;
+		} else
+			index = scsi_sgtable_index(this);
+
+		left -= this;
+
+		sgp = scsi_sg_pools + index;
+
+		sgl = mempool_alloc(sgp->pool, gfp_mask);
+		if (unlikely(!sgl))
+			goto enomem;
+
+		memset(sgl, 0, sizeof(*sgl) * sgp->size);
+
+		/*
+		 * first loop through, set initial index and return value
+		 */
+		if (!ret)
+			ret = sgl;
+
+		/*
+		 * chain previous sglist, if any. we know the previous
+		 * sglist must be the biggest one, or we would not have
+		 * ended up doing another loop.
+		 */
+		if (prev)
+			sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);
+
+		/*
+		 * don't allow subsequent mempool allocs to sleep, it would
+		 * violate the mempool principle.
+		 */
+		gfp_mask &= ~__GFP_WAIT;
+		gfp_mask |= __GFP_HIGH;
+		prev = sgl;
+	} while (left);
+
+	/*
+	 * ->use_sg may get modified after dma mapping has potentially
+	 * shrunk the number of segments, so keep a copy of it for free.
+	 */
+	cmd->__use_sg = cmd->use_sg;
+	return ret;
+enomem:
+	if (ret) {
+		/*
+		 * Free entries chained off ret. Since we were trying to
+		 * allocate another sglist, we know that all entries are of
+		 * the max size.
+		 */
+		sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+		prev = ret;
+		ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];
+
+		while ((sgl = sg_chain_ptr(ret)) != NULL) {
+			ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
+			mempool_free(sgl, sgp->pool);
+		}
+
+		mempool_free(prev, sgp->pool);
+	}
+	return NULL;
 }
 
 EXPORT_SYMBOL(scsi_alloc_sgtable);
 
-void scsi_free_sgtable(struct scatterlist *sgl, int index)
+void scsi_free_sgtable(struct scsi_cmnd *cmd)
 {
+	struct scatterlist *sgl = cmd->request_buffer;
 	struct scsi_host_sg_pool *sgp;
 
-	BUG_ON(index >= SG_MEMPOOL_NR);
+	/*
+	 * if this is the biggest size sglist, check if we have
+	 * chained parts we need to free
+	 */
+	if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
+		unsigned short this, left;
+		struct scatterlist *next;
+		unsigned int index;
+
+		left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
+		next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
+		while (left && next) {
+			sgl = next;
+			this = left;
+			if (this > SCSI_MAX_SG_SEGMENTS) {
+				this = SCSI_MAX_SG_SEGMENTS - 1;
+				index = SG_MEMPOOL_NR - 1;
+			} else
+				index = scsi_sgtable_index(this);
+
+			left -= this;
+
+			sgp = scsi_sg_pools + index;
+
+			if (left)
+				next = sg_chain_ptr(&sgl[sgp->size - 1]);
+
+			mempool_free(sgl, sgp->pool);
+		}
+
+		/*
+		 * Restore original, will be freed below
+		 */
+		sgl = cmd->request_buffer;
+		sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
+	} else
+		sgp = scsi_sg_pools + scsi_sgtable_index(cmd->__use_sg);
 
-	sgp = scsi_sg_pools + index;
 	mempool_free(sgl, sgp->pool);
 }
 
@@ -770,7 +886,7 @@ EXPORT_SYMBOL(scsi_free_sgtable);
 static void scsi_release_buffers(struct scsi_cmnd *cmd)
 {
 	if (cmd->use_sg)
-		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
+		scsi_free_sgtable(cmd);
 
 	/*
 	 * Zero these out.  They now point to freed memory, and it is
@@ -984,7 +1100,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 static int scsi_init_io(struct scsi_cmnd *cmd)
 {
 	struct request     *req = cmd->request;
-	struct scatterlist *sgpnt;
 	int		   count;
 
 	/*
@@ -997,14 +1112,13 @@ static int scsi_init_io(struct scsi_cmnd *cmd)
 	/*
 	 * If sg table allocation fails, requeue request later.
 	 */
-	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
-	if (unlikely(!sgpnt)) {
+	cmd->request_buffer = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
+	if (unlikely(!cmd->request_buffer)) {
 		scsi_unprep_request(req);
 		return BLKPREP_DEFER;
 	}
 
 	req->buffer = NULL;
-	cmd->request_buffer = (char *) sgpnt;
 	if (blk_pc_request(req))
 		cmd->request_bufflen = req->data_len;
 	else
@@ -1529,8 +1643,25 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
 	if (!q)
 		return NULL;
 
+	/*
+	 * this limit is imposed by hardware restrictions
+	 */
 	blk_queue_max_hw_segments(q, shost->sg_tablesize);
-	blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
+
+	/*
+	 * In the future, sg chaining support will be mandatory and this
+	 * ifdef can then go away. Right now we don't have all archs
+	 * converted, so better keep it safe.
+	 */
+#ifdef ARCH_HAS_SG_CHAIN
+	if (shost->use_sg_chaining)
+		blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS);
+	else
+		blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#else
+	blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS);
+#endif
+
 	blk_queue_max_sectors(q, shost->max_sectors);
 	blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
 	blk_queue_segment_boundary(q, shost->dma_boundary);
@@ -2193,18 +2324,19 @@ EXPORT_SYMBOL_GPL(scsi_target_unblock);
  *
  * Returns virtual address of the start of the mapped page
  */
-void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
+void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count,
 			  size_t *offset, size_t *len)
 {
 	int i;
 	size_t sg_len = 0, len_complete = 0;
+	struct scatterlist *sg;
 	struct page *page;
 
 	WARN_ON(!irqs_disabled());
 
-	for (i = 0; i < sg_count; i++) {
+	for_each_sg(sgl, sg, sg_count, i) {
 		len_complete = sg_len; /* Complete sg-entries */
-		sg_len += sg[i].length;
+		sg_len += sg->length;
 		if (sg_len > *offset)
 			break;
 	}
@@ -2218,10 +2350,10 @@ void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 	}
 
 	/* Offset starting from the beginning of first page in this sg-entry */
-	*offset = *offset - len_complete + sg[i].offset;
+	*offset = *offset - len_complete + sg->offset;
 
 	/* Assumption: contiguous pages can be accessed as "page + i" */
-	page = nth_page(sg[i].page, (*offset >> PAGE_SHIFT));
+	page = nth_page(sg->page, (*offset >> PAGE_SHIFT));
 	*offset &= ~PAGE_MASK;
 
 	/* Bytes in this sg-entry from *offset to the end of the page */
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index 66c692ffa30..a91761c3645 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -332,7 +332,7 @@ static void scsi_tgt_cmd_done(struct scsi_cmnd *cmd)
 	scsi_tgt_uspace_send_status(cmd, tcmd->itn_id, tcmd->tag);
 
 	if (cmd->request_buffer)
-		scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
+		scsi_free_sgtable(cmd);
 
 	queue_work(scsi_tgtd, &tcmd->work);
 }
@@ -373,7 +373,7 @@ static int scsi_tgt_init_cmd(struct scsi_cmnd *cmd, gfp_t gfp_mask)
 	}
 
 	eprintk("cmd %p cnt %d\n", cmd, cmd->use_sg);
-	scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
+	scsi_free_sgtable(cmd);
 	return -EINVAL;
 }
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 0a3a528212c..69f542c4923 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -826,27 +826,6 @@ static int sd_sync_cache(struct scsi_disk *sdkp)
 	return 0;
 }
 
-static int sd_issue_flush(struct request_queue *q, struct gendisk *disk,
-			  sector_t *error_sector)
-{
-	int ret = 0;
-	struct scsi_device *sdp = q->queuedata;
-	struct scsi_disk *sdkp;
-
-	if (sdp->sdev_state != SDEV_RUNNING)
-		return -ENXIO;
-
-	sdkp = scsi_disk_get_from_dev(&sdp->sdev_gendev);
-
-	if (!sdkp)
-               return -ENODEV;
-
-	if (sdkp->WCE)
-		ret = sd_sync_cache(sdkp);
-	scsi_disk_put(sdkp);
-	return ret;
-}
-
 static void sd_prepare_flush(struct request_queue *q, struct request *rq)
 {
 	memset(rq->cmd, 0, sizeof(rq->cmd));
@@ -1697,7 +1676,6 @@ static int sd_probe(struct device *dev)
 	sd_revalidate_disk(gd);
 
 	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
-	blk_queue_issue_flush_fn(sdp->request_queue, sd_issue_flush);
 
 	gd->driverfs_dev = &sdp->sdev_gendev;
 	gd->flags = GENHD_FL_DRIVERFS;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index f6f5fc7d0ce..7238b2dfc49 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1165,7 +1165,7 @@ sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type)
 	sg = rsv_schp->buffer;
 	sa = vma->vm_start;
 	for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-	     ++k, ++sg) {
+	     ++k, sg = sg_next(sg)) {
 		len = vma->vm_end - sa;
 		len = (len < sg->length) ? len : sg->length;
 		if (offset < len) {
@@ -1209,7 +1209,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
 	sa = vma->vm_start;
 	sg = rsv_schp->buffer;
 	for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end);
-	     ++k, ++sg) {
+	     ++k, sg = sg_next(sg)) {
 		len = vma->vm_end - sa;
 		len = (len < sg->length) ? len : sg->length;
 		sa += len;
@@ -1840,7 +1840,7 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
 	}
 	for (k = 0, sg = schp->buffer, rem_sz = blk_size;
 	     (rem_sz > 0) && (k < mx_sc_elems);
-	     ++k, rem_sz -= ret_sz, ++sg) {
+	     ++k, rem_sz -= ret_sz, sg = sg_next(sg)) {
 		
 		num = (rem_sz > scatter_elem_sz_prev) ?
 		      scatter_elem_sz_prev : rem_sz;
@@ -1913,7 +1913,7 @@ sg_write_xfer(Sg_request * srp)
 		if (res)
 			return res;
 
-		for (; p; ++sg, ksglen = sg->length,
+		for (; p; sg = sg_next(sg), ksglen = sg->length,
 		     p = page_address(sg->page)) {
 			if (usglen <= 0)
 				break;
@@ -1992,7 +1992,7 @@ sg_remove_scat(Sg_scatter_hold * schp)
 			int k;
 
 			for (k = 0; (k < schp->k_use_sg) && sg->page;
-			     ++k, ++sg) {
+			     ++k, sg = sg_next(sg)) {
 				SCSI_LOG_TIMEOUT(5, printk(
 				    "sg_remove_scat: k=%d, pg=0x%p, len=%d\n",
 				    k, sg->page, sg->length));
@@ -2045,7 +2045,7 @@ sg_read_xfer(Sg_request * srp)
 		if (res)
 			return res;
 
-		for (; p; ++sg, ksglen = sg->length,
+		for (; p; sg = sg_next(sg), ksglen = sg->length,
 		     p = page_address(sg->page)) {
 			if (usglen <= 0)
 				break;
@@ -2092,7 +2092,7 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
 	if ((!outp) || (num_read_xfer <= 0))
 		return 0;
 
-	for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, ++sg) {
+	for (k = 0; (k < schp->k_use_sg) && sg->page; ++k, sg = sg_next(sg)) {
 		num = sg->length;
 		if (num > num_read_xfer) {
 			if (__copy_to_user(outp, page_address(sg->page),
@@ -2142,7 +2142,7 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size)
 	SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size));
 	rem = size;
 
-	for (k = 0; k < rsv_schp->k_use_sg; ++k, ++sg) {
+	for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) {
 		num = sg->length;
 		if (rem <= num) {
 			sfp->save_scat_len = num;
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index 72f6d801535..e3fab3a6aed 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -1123,6 +1123,7 @@ static struct scsi_host_template driver_template = {
 	.this_id			= -1,
 	.sg_tablesize			= ST_MAX_SG,
 	.cmd_per_lun			= ST_CMD_PER_LUN,
+	.use_sg_chaining		= ENABLE_SG_CHAINING,
 };
 
 static int stex_set_dma_mask(struct pci_dev * pdev)
diff --git a/drivers/scsi/sym53c416.c b/drivers/scsi/sym53c416.c
index 92bfaeafe30..8befab7e983 100644
--- a/drivers/scsi/sym53c416.c
+++ b/drivers/scsi/sym53c416.c
@@ -854,5 +854,6 @@ static struct scsi_host_template driver_template = {
 	.cmd_per_lun =		1,
 	.unchecked_isa_dma =	1,
 	.use_clustering =	ENABLE_CLUSTERING,
+	.use_sg_chaining =	ENABLE_SG_CHAINING,
 };
 #include "scsi_module.c"
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 3db22325ea2..db03c4c8ec1 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -1808,6 +1808,7 @@ static struct scsi_host_template sym2_template = {
 	.eh_host_reset_handler	= sym53c8xx_eh_host_reset_handler,
 	.this_id		= 7,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 	.max_sectors		= 0xFFFF,
 #ifdef SYM_LINUX_PROC_INFO_SUPPORT
 	.proc_info		= sym53c8xx_proc_info,
diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c
index fc9f51818e8..7edd6ceb13b 100644
--- a/drivers/scsi/u14-34f.c
+++ b/drivers/scsi/u14-34f.c
@@ -450,7 +450,8 @@ static struct scsi_host_template driver_template = {
                 .slave_configure         = u14_34f_slave_configure,
                 .this_id                 = 7,
                 .unchecked_isa_dma       = 1,
-                .use_clustering          = ENABLE_CLUSTERING
+                .use_clustering          = ENABLE_CLUSTERING,
+                .use_sg_chaining         = ENABLE_SG_CHAINING,
                 };
 
 #if !defined(__BIG_ENDIAN_BITFIELD) && !defined(__LITTLE_ENDIAN_BITFIELD)
diff --git a/drivers/scsi/ultrastor.c b/drivers/scsi/ultrastor.c
index c08235d5afc..ea72bbeb8f9 100644
--- a/drivers/scsi/ultrastor.c
+++ b/drivers/scsi/ultrastor.c
@@ -1197,5 +1197,6 @@ static struct scsi_host_template driver_template = {
 	.cmd_per_lun       = ULTRASTOR_MAX_CMDS_PER_LUN,
 	.unchecked_isa_dma = 1,
 	.use_clustering    = ENABLE_CLUSTERING,
+	.use_sg_chaining   = ENABLE_SG_CHAINING,
 };
 #include "scsi_module.c"
diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c
index d6fd4259c56..255c611e78b 100644
--- a/drivers/scsi/wd7000.c
+++ b/drivers/scsi/wd7000.c
@@ -1671,6 +1671,7 @@ static struct scsi_host_template driver_template = {
 	.cmd_per_lun		= 1,
 	.unchecked_isa_dma	= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
+	.use_sg_chaining	= ENABLE_SG_CHAINING,
 };
 
 #include "scsi_module.c"
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index 1ea1ed82c35..0e357562ce9 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -1036,6 +1036,7 @@ enum pci_board_num_t {
 	pbn_b0_2_115200,
 	pbn_b0_4_115200,
 	pbn_b0_5_115200,
+	pbn_b0_8_115200,
 
 	pbn_b0_1_921600,
 	pbn_b0_2_921600,
@@ -1172,6 +1173,12 @@ static struct pciserial_board pci_boards[] __devinitdata = {
 		.base_baud	= 115200,
 		.uart_offset	= 8,
 	},
+	[pbn_b0_8_115200] = {
+		.flags		= FL_BASE0,
+		.num_ports	= 8,
+		.base_baud	= 115200,
+		.uart_offset	= 8,
+	},
 
 	[pbn_b0_1_921600] = {
 		.flags		= FL_BASE0,
@@ -2566,6 +2573,119 @@ static struct pci_device_id serial_pci_tbl[] = {
 	{       PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9030,
 		PCI_SUBVENDOR_ID_PERLE, PCI_SUBDEVICE_ID_PCI_RAS8,
 		0, 0, pbn_b2_8_921600 },
+
+	/*
+	 * Mainpine series cards: Fairly standard layout but fools
+	 * parts of the autodetect in some cases and uses otherwise
+	 * unmatched communications subclasses in the PCI Express case
+	 */
+
+	{	/* RockForceDUO */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0200,
+		0, 0, pbn_b0_2_115200 },
+	{	/* RockForceQUATRO */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0300,
+		0, 0, pbn_b0_4_115200 },
+	{	/* RockForceDUO+ */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0400,
+		0, 0, pbn_b0_2_115200 },
+	{	/* RockForceQUATRO+ */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0500,
+		0, 0, pbn_b0_4_115200 },
+	{	/* RockForce+ */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0600,
+		0, 0, pbn_b0_2_115200 },
+	{	/* RockForce+ */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0700,
+		0, 0, pbn_b0_4_115200 },
+	{	/* RockForceOCTO+ */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0800,
+		0, 0, pbn_b0_8_115200 },
+	{	/* RockForceDUO+ */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0C00,
+		0, 0, pbn_b0_2_115200 },
+	{	/* RockForceQUARTRO+ */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x0D00,
+		0, 0, pbn_b0_4_115200 },
+	{	/* RockForceOCTO+ */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x1D00,
+		0, 0, pbn_b0_8_115200 },
+	{	/* RockForceD1 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x2000,
+		0, 0, pbn_b0_1_115200 },
+	{	/* RockForceF1 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x2100,
+		0, 0, pbn_b0_1_115200 },
+	{	/* RockForceD2 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x2200,
+		0, 0, pbn_b0_2_115200 },
+	{	/* RockForceF2 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x2300,
+		0, 0, pbn_b0_2_115200 },
+	{	/* RockForceD4 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x2400,
+		0, 0, pbn_b0_4_115200 },
+	{	/* RockForceF4 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x2500,
+		0, 0, pbn_b0_4_115200 },
+	{	/* RockForceD8 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x2600,
+		0, 0, pbn_b0_8_115200 },
+	{	/* RockForceF8 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x2700,
+		0, 0, pbn_b0_8_115200 },
+	{	/* IQ Express D1 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x3000,
+		0, 0, pbn_b0_1_115200 },
+	{	/* IQ Express F1 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x3100,
+		0, 0, pbn_b0_1_115200 },
+	{	/* IQ Express D2 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x3200,
+		0, 0, pbn_b0_2_115200 },
+	{	/* IQ Express F2 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x3300,
+		0, 0, pbn_b0_2_115200 },
+	{	/* IQ Express D4 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x3400,
+		0, 0, pbn_b0_4_115200 },
+	{	/* IQ Express F4 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x3500,
+		0, 0, pbn_b0_4_115200 },
+	{	/* IQ Express D8 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x3C00,
+		0, 0, pbn_b0_8_115200 },
+	{	/* IQ Express F8 */
+		PCI_VENDOR_ID_MAINPINE, PCI_DEVICE_ID_MAINPINE_PBRIDGE,
+		PCI_VENDOR_ID_MAINPINE, 0x3D00,
+		0, 0, pbn_b0_8_115200 },
+
+
 	/*
 	 * PA Semi PA6T-1682M on-chip UART
 	 */
diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c
index 301c8c0be9d..926f58a674a 100644
--- a/drivers/serial/8250_pnp.c
+++ b/drivers/serial/8250_pnp.c
@@ -327,6 +327,8 @@ static const struct pnp_device_id pnp_dev_table[] = {
 	{	"WACF004",		0	},
 	{	"WACF005",		0	},
 	{       "WACF006",              0       },
+	{       "WACF007",              0       },
+	{       "WACF008",              0       },
 	/* Compaq touchscreen */
 	{       "FPI2002",              0 },
 	/* Fujitsu Stylistic touchscreens */
diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c
index 312bef6bd58..7e8724d3571 100644
--- a/drivers/serial/crisv10.c
+++ b/drivers/serial/crisv10.c
@@ -514,6 +514,8 @@ struct tty_driver *serial_driver;
  * TTY_THRESHOLD_THROTTLE/UNTHROTTLE=128
  * BUF_SIZE can't be > 128
  */
+#define CRIS_BUF_SIZE	512
+
 /* Currently 16 descriptors x 128 bytes = 2048 bytes */
 #define SERIAL_DESCR_BUF_SIZE 256
 
@@ -2497,55 +2499,18 @@ static void flush_to_flip_buffer(struct e100_serial *info)
 		return;
 	}
 
-	length = tty->flip.count;
-	/* Don't flip more than the ldisc has room for.
-	 * The return value from ldisc.receive_room(tty) - might not be up to
-	 * date, the previous flip of up to TTY_FLIPBUF_SIZE might be on the
-	 * processed and not accounted for yet.
-	 * Since we use DMA, 1 SERIAL_DESCR_BUF_SIZE could be on the way.
-	 * Lets buffer data here and let flow control take care of it.
-	 * Since we normally flip large chunks, the ldisc don't react
-	 * with throttle until too late if we flip to much.
-	 */
-	max_flip_size = tty->ldisc.receive_room(tty);
-	if (max_flip_size < 0)
-		max_flip_size = 0;
-	if (max_flip_size <= (TTY_FLIPBUF_SIZE +         /* Maybe not accounted for */
-			      length + info->recv_cnt +  /* We have this queued */
-			      2*SERIAL_DESCR_BUF_SIZE +    /* This could be on the way */
-			      TTY_THRESHOLD_THROTTLE)) { /* Some slack */
-		/* check TTY_THROTTLED first so it indicates our state */
-		if (!test_and_set_bit(TTY_THROTTLED, &tty->flags)) {
-			DFLOW(DEBUG_LOG(info->line,"flush_to_flip throttles room %lu\n", max_flip_size));
-			rs_throttle(tty);
-		}
-#if 0
-		else if (max_flip_size <= (TTY_FLIPBUF_SIZE +         /* Maybe not accounted for */
-					   length + info->recv_cnt +  /* We have this queued */
-					   SERIAL_DESCR_BUF_SIZE +    /* This could be on the way */
-					   TTY_THRESHOLD_THROTTLE)) { /* Some slack */
-			DFLOW(DEBUG_LOG(info->line,"flush_to_flip throttles again! %lu\n", max_flip_size));
-			rs_throttle(tty);
-		}
-#endif
-	}
-
-	if (max_flip_size > TTY_FLIPBUF_SIZE)
-		max_flip_size = TTY_FLIPBUF_SIZE;
-
-	while ((buffer = info->first_recv_buffer) && length < max_flip_size) {
+	while ((buffer = info->first_recv_buffer) != NULL) {
 		unsigned int count = buffer->length;
 
-		if (length + count > max_flip_size)
-			count = max_flip_size - length;
+		count = tty_buffer_request_room(tty, count);
+		if (count == 0) /* Throttle ?? */
+			break;
 
-		memcpy(tty->flip.char_buf_ptr + length, buffer->buffer, count);
-		memset(tty->flip.flag_buf_ptr + length, TTY_NORMAL, count);
-		tty->flip.flag_buf_ptr[length] = buffer->error;
+		if (count > 1)
+			tty_insert_flip_strings(tty, buffer->buffer, count - 1);
+		tty_insert_flip_char(tty, buffer->buffer[count-1], buffer->error);
 
-		length += count;
 		info->recv_cnt -= count;
-		DFLIP(DEBUG_LOG(info->line,"flip: %i\n", length));
 
 		if (count == buffer->length) {
 			info->first_recv_buffer = buffer->next;
@@ -2560,14 +2525,6 @@ static void flush_to_flip_buffer(struct e100_serial *info)
 	if (!info->first_recv_buffer)
 		info->last_recv_buffer = NULL;
 
-	tty->flip.count = length;
-	DFLIP(if (tty->ldisc.chars_in_buffer(tty) > 3500) {
-		DEBUG_LOG(info->line, "ldisc %lu\n",
-			  tty->ldisc.chars_in_buffer(tty));
-		DEBUG_LOG(info->line, "flip.count %lu\n",
-			  tty->flip.count);
-	      }
-	      );
 	restore_flags(flags);
 
 	DFLIP(
@@ -2722,17 +2679,17 @@ struct e100_serial * handle_ser_rx_interrupt_no_dma(struct e100_serial *info)
 		printk("!NO TTY!\n");
 		return info;
 	}
-	if (tty->flip.count >= TTY_FLIPBUF_SIZE - TTY_THRESHOLD_THROTTLE) {
+	if (tty->flip.count >= CRIS_BUF_SIZE - TTY_THRESHOLD_THROTTLE) {
 		/* check TTY_THROTTLED first so it indicates our state */
 		if (!test_and_set_bit(TTY_THROTTLED, &tty->flags)) {
 			DFLOW(DEBUG_LOG(info->line, "rs_throttle flip.count: %i\n", tty->flip.count));
 			rs_throttle(tty);
 		}
 	}
-	if (tty->flip.count >= TTY_FLIPBUF_SIZE) {
+	if (tty->flip.count >= CRIS_BUF_SIZE) {
 		DEBUG_LOG(info->line, "force FLIP! %i\n", tty->flip.count);
 		tty->flip.work.func((void *) tty);
-		if (tty->flip.count >= TTY_FLIPBUF_SIZE) {
+		if (tty->flip.count >= CRIS_BUF_SIZE) {
 			DEBUG_LOG(info->line, "FLIP FULL! %i\n", tty->flip.count);
 			return info;		/* if TTY_DONT_FLIP is set */
 		}
diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c
index 6e09c8b395e..348ee2c19b5 100644
--- a/drivers/serial/m32r_sio.c
+++ b/drivers/serial/m32r_sio.c
@@ -539,7 +539,7 @@ static void serial_do_unlink(struct irq_info *i, struct uart_sio_port *up)
 static int serial_link_irq_chain(struct uart_sio_port *up)
 {
 	struct irq_info *i = irq_lists + up->port.irq;
-	int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0;
+	int ret, irq_flags = 0;
 
 	spin_lock_irq(&i->lock);
 
diff --git a/drivers/serial/m32r_sio.h b/drivers/serial/m32r_sio.h
index 849f1b2c253..e9b7e11793b 100644
--- a/drivers/serial/m32r_sio.h
+++ b/drivers/serial/m32r_sio.h
@@ -46,9 +46,3 @@ struct old_serial_port {
 #define PROBE_ANY	(~0)
 
 #define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8)
-
-#ifdef CONFIG_SERIAL_SIO_SHARE_IRQ
-#define M32R_SIO_SHARE_IRQS 1
-#else
-#define M32R_SIO_SHARE_IRQS 0
-#endif
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index a3bd3a3f41f..68aa4da0186 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -1938,9 +1938,24 @@ static void uart_change_pm(struct uart_state *state, int pm_state)
 	}
 }
 
+struct uart_match {
+	struct uart_port *port;
+	struct uart_driver *driver;
+};
+
+static int serial_match_port(struct device *dev, void *data)
+{
+	struct uart_match *match = data;
+	dev_t devt = MKDEV(match->driver->major, match->driver->minor) + match->port->line;
+
+	return dev->devt == devt; /* Actually, only one tty per port */
+}
+
 int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 {
 	struct uart_state *state = drv->state + port->line;
+	struct device *tty_dev;
+	struct uart_match match = {port, drv};
 
 	mutex_lock(&state->mutex);
 
@@ -1951,6 +1966,15 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 	}
 #endif
 
+	tty_dev = device_find_child(port->dev, &match, serial_match_port);
+	if (device_may_wakeup(tty_dev)) {
+		enable_irq_wake(port->irq);
+		put_device(tty_dev);
+		mutex_unlock(&state->mutex);
+		return 0;
+	}
+	port->suspended = 1;
+
 	if (state->info && state->info->flags & UIF_INITIALIZED) {
 		const struct uart_ops *ops = port->ops;
 
@@ -1999,6 +2023,13 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 	}
 #endif
 
+	if (!port->suspended) {
+		disable_irq_wake(port->irq);
+		mutex_unlock(&state->mutex);
+		return 0;
+	}
+	port->suspended = 0;
+
 	uart_change_pm(state, 0);
 
 	/*
@@ -2278,6 +2309,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 {
 	struct uart_state *state;
 	int ret = 0;
+	struct device *tty_dev;
 
 	BUG_ON(in_interrupt());
 
@@ -2314,7 +2346,13 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 	 * Register the port whether it's detected or not.  This allows
 	 * setserial to be used to alter this ports parameters.
 	 */
-	tty_register_device(drv->tty_driver, port->line, port->dev);
+	tty_dev = tty_register_device(drv->tty_driver, port->line, port->dev);
+	if (likely(!IS_ERR(tty_dev))) {
+		device_can_wakeup(tty_dev) = 1;
+		device_set_wakeup_enable(tty_dev, 0);
+	} else
+		printk(KERN_ERR "Cannot register tty device on line %d\n",
+		       port->line);
 
 	/*
 	 * Ensure UPF_DEAD is not set.
diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c
index 7c8d78fbbbf..5afcb2fa7cd 100644
--- a/drivers/serial/serial_cs.c
+++ b/drivers/serial/serial_cs.c
@@ -911,6 +911,7 @@ static struct pcmcia_device_id serial_ids[] = {
 	PCMCIA_MFC_DEVICE_CIS_MANF_CARD(1, 0x0175, 0x0000, "DP83903.cis"),
 	PCMCIA_MFC_DEVICE_CIS_MANF_CARD(1, 0x0101, 0x0035, "3CXEM556.cis"),
 	PCMCIA_MFC_DEVICE_CIS_MANF_CARD(1, 0x0101, 0x003d, "3CXEM556.cis"),
+	PCMCIA_DEVICE_CIS_PROD_ID12("Sierra Wireless", "AC850", 0xd85f6206, 0x42a2c018, "SW_8xx_SER.cis"),  /* Sierra Wireless AC850 3G Network Adapter R1 */
 	PCMCIA_DEVICE_CIS_MANF_CARD(0x0192, 0x0710, "SW_7xx_SER.cis"),	/* Sierra Wireless AC710/AC750 GPRS Network Adapter R1 */
 	PCMCIA_DEVICE_CIS_MANF_CARD(0x0192, 0xa555, "SW_555_SER.cis"),  /* Sierra Aircard 555 CDMA 1xrtt Modem -- pre update */
 	PCMCIA_DEVICE_CIS_MANF_CARD(0x013f, 0xa555, "SW_555_SER.cis"),  /* Sierra Aircard 555 CDMA 1xrtt Modem -- post update */
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index 0930e2a8551..6846a6c38b6 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -25,19 +25,15 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/console.h>
-#include <linux/sysrq.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/pci.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
 #include <linux/serial_core.h>
 #include <linux/serial.h>
-#include <linux/mutex.h>
 
 #include <asm/io.h>
 
-static char *serial_version = "1.10";
+static char *serial_version = "1.11";
 static char *serial_name = "TX39/49 Serial driver";
 
 #define PASS_LIMIT	256
@@ -68,8 +64,6 @@ static char *serial_name = "TX39/49 Serial driver";
  */
 #define UART_NR  CONFIG_SERIAL_TXX9_NR_UARTS
 
-#define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
-
 struct uart_txx9_port {
 	struct uart_port	port;
 	/* No additional info for now */
@@ -756,21 +750,6 @@ static void serial_txx9_config_port(struct uart_port *port, int uflags)
 	serial_txx9_initialize(port);
 }
 
-static int
-serial_txx9_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
-	unsigned long new_port = ser->port;
-	if (HIGH_BITS_OFFSET)
-		new_port += (unsigned long)ser->port_high << HIGH_BITS_OFFSET;
-	if (ser->type != port->type ||
-	    ser->irq != port->irq ||
-	    ser->io_type != port->iotype ||
-	    new_port != port->iobase ||
-	    (unsigned long)ser->iomem_base != port->mapbase)
-		return -EINVAL;
-	return 0;
-}
-
 static const char *
 serial_txx9_type(struct uart_port *port)
 {
@@ -794,7 +773,6 @@ static struct uart_ops serial_txx9_pops = {
 	.release_port	= serial_txx9_release_port,
 	.request_port	= serial_txx9_request_port,
 	.config_port	= serial_txx9_config_port,
-	.verify_port	= serial_txx9_verify_port,
 };
 
 static struct uart_txx9_port serial_txx9_ports[UART_NR];
@@ -950,7 +928,8 @@ int __init early_serial_txx9_setup(struct uart_port *port)
 
 	serial_txx9_ports[port->line].port = *port;
 	serial_txx9_ports[port->line].port.ops = &serial_txx9_pops;
-	serial_txx9_ports[port->line].port.flags |= UPF_BOOT_AUTOCONF;
+	serial_txx9_ports[port->line].port.flags |=
+		UPF_BOOT_AUTOCONF | UPF_FIXED_PORT;
 	return 0;
 }
 
@@ -995,7 +974,8 @@ static int __devinit serial_txx9_register_port(struct uart_port *port)
 		uart->port.irq      = port->irq;
 		uart->port.uartclk  = port->uartclk;
 		uart->port.iotype   = port->iotype;
-		uart->port.flags    = port->flags | UPF_BOOT_AUTOCONF;
+		uart->port.flags    = port->flags
+			| UPF_BOOT_AUTOCONF | UPF_FIXED_PORT;
 		uart->port.mapbase  = port->mapbase;
 		if (port->dev)
 			uart->port.dev = port->dev;
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b91571122da..a77ede598d3 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -124,16 +124,17 @@ config SPI_MPC52xx_PSC
 	  Controller in master SPI mode.
 
 config SPI_MPC83xx
-	tristate "Freescale MPC83xx SPI controller"
-	depends on SPI_MASTER && PPC_83xx && EXPERIMENTAL
+	tristate "Freescale MPC83xx/QUICC Engine SPI controller"
+	depends on SPI_MASTER && (PPC_83xx || QUICC_ENGINE) && EXPERIMENTAL
 	select SPI_BITBANG
 	help
-	  This enables using the Freescale MPC83xx SPI controller in master
-	  mode.
+	  This enables using the Freescale MPC83xx and QUICC Engine SPI
+	  controllers in master mode.
 
 	  Note, this driver uniquely supports the SPI controller on the MPC83xx
-	  family of PowerPC processors.  The MPC83xx uses a simple set of shift
-	  registers for data (opposed to the CPM based descriptor model).
+	  family of PowerPC processors, plus processors with QUICC Engine
+	  technology. This driver uses a simple set of shift registers for data
+	  (opposed to the CPM based descriptor model).
 
 config SPI_OMAP_UWIRE
 	tristate "OMAP1 MicroWire"
diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index b0469749310..0d342dcdd30 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -211,7 +211,7 @@ static void atmel_spi_next_message(struct spi_master *master)
 	msg = list_entry(as->queue.next, struct spi_message, queue);
 	spi = msg->spi;
 
-	dev_dbg(master->cdev.dev, "start message %p for %s\n",
+	dev_dbg(master->dev.parent, "start message %p for %s\n",
 			msg, spi->dev.bus_id);
 
 	/* select chip if it's not still active */
@@ -266,10 +266,10 @@ static void atmel_spi_dma_unmap_xfer(struct spi_master *master,
 				     struct spi_transfer *xfer)
 {
 	if (xfer->tx_dma != INVALID_DMA_ADDRESS)
-		dma_unmap_single(master->cdev.dev, xfer->tx_dma,
+		dma_unmap_single(master->dev.parent, xfer->tx_dma,
 				 xfer->len, DMA_TO_DEVICE);
 	if (xfer->rx_dma != INVALID_DMA_ADDRESS)
-		dma_unmap_single(master->cdev.dev, xfer->rx_dma,
+		dma_unmap_single(master->dev.parent, xfer->rx_dma,
 				 xfer->len, DMA_FROM_DEVICE);
 }
 
@@ -285,7 +285,7 @@ atmel_spi_msg_done(struct spi_master *master, struct atmel_spi *as,
 	list_del(&msg->queue);
 	msg->status = status;
 
-	dev_dbg(master->cdev.dev,
+	dev_dbg(master->dev.parent,
 		"xfer complete: %u bytes transferred\n",
 		msg->actual_length);
 
@@ -348,7 +348,7 @@ atmel_spi_interrupt(int irq, void *dev_id)
 		if (xfer->delay_usecs)
 			udelay(xfer->delay_usecs);
 
-		dev_warn(master->cdev.dev, "fifo overrun (%u/%u remaining)\n",
+		dev_warn(master->dev.parent, "fifo overrun (%u/%u remaining)\n",
 			 spi_readl(as, TCR), spi_readl(as, RCR));
 
 		/*
@@ -363,7 +363,7 @@ atmel_spi_interrupt(int irq, void *dev_id)
 			if (spi_readl(as, SR) & SPI_BIT(TXEMPTY))
 				break;
 		if (!timeout)
-			dev_warn(master->cdev.dev,
+			dev_warn(master->dev.parent,
 				 "timeout waiting for TXEMPTY");
 		while (spi_readl(as, SR) & SPI_BIT(RDRF))
 			spi_readl(as, RDR);
@@ -526,7 +526,7 @@ static int atmel_spi_transfer(struct spi_device *spi, struct spi_message *msg)
 	struct atmel_spi	*as;
 	struct spi_transfer	*xfer;
 	unsigned long		flags;
-	struct device		*controller = spi->master->cdev.dev;
+	struct device		*controller = spi->master->dev.parent;
 
 	as = spi_master_get_devdata(spi->master);
 
diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c
index d2a4b2bdb07..e9aba932f21 100644
--- a/drivers/spi/mpc52xx_psc_spi.c
+++ b/drivers/spi/mpc52xx_psc_spi.c
@@ -503,7 +503,7 @@ static int __init mpc52xx_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	INIT_LIST_HEAD(&mps->queue);
 
 	mps->workqueue = create_singlethread_workqueue(
-		master->cdev.dev->bus_id);
+		master->dev.parent->bus_id);
 	if (mps->workqueue == NULL) {
 		ret = -EBUSY;
 		goto free_irq;
diff --git a/drivers/spi/omap2_mcspi.c b/drivers/spi/omap2_mcspi.c
index 6b357cdb9ea..3cdab131c4a 100644
--- a/drivers/spi/omap2_mcspi.c
+++ b/drivers/spi/omap2_mcspi.c
@@ -645,7 +645,7 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 
 	clk_enable(mcspi->ick);
 	clk_enable(mcspi->fck);
-	ret =  omap2_mcspi_setup_transfer(spi, NULL);
+	ret = omap2_mcspi_setup_transfer(spi, NULL);
 	clk_disable(mcspi->fck);
 	clk_disable(mcspi->ick);
 
@@ -693,7 +693,6 @@ static void omap2_mcspi_work(struct work_struct *work)
 		struct spi_device		*spi;
 		struct spi_transfer		*t = NULL;
 		int				cs_active = 0;
-		struct omap2_mcspi_device_config *conf;
 		struct omap2_mcspi_cs		*cs;
 		int				par_override = 0;
 		int				status = 0;
@@ -706,7 +705,6 @@ static void omap2_mcspi_work(struct work_struct *work)
 		spin_unlock_irq(&mcspi->lock);
 
 		spi = m->spi;
-		conf = spi->controller_data;
 		cs = spi->controller_state;
 
 		omap2_mcspi_set_enable(spi, 1);
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index d275c615a73..8245b5153f3 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -481,7 +481,7 @@ static void uwire_off(struct uwire_spi *uwire)
 	spi_master_put(uwire->bitbang.master);
 }
 
-static int uwire_probe(struct platform_device *pdev)
+static int __init uwire_probe(struct platform_device *pdev)
 {
 	struct spi_master	*master;
 	struct uwire_spi	*uwire;
@@ -525,7 +525,7 @@ static int uwire_probe(struct platform_device *pdev)
 	return status;
 }
 
-static int uwire_remove(struct platform_device *pdev)
+static int __exit uwire_remove(struct platform_device *pdev)
 {
 	struct uwire_spi	*uwire = dev_get_drvdata(&pdev->dev);
 	int			status;
@@ -543,8 +543,7 @@ static struct platform_driver uwire_driver = {
 		.bus		= &platform_bus_type,
 		.owner		= THIS_MODULE,
 	},
-	.probe		= uwire_probe,
-	.remove		= uwire_remove,
+	.remove		= __exit_p(uwire_remove),
 	// suspend ... unuse ck
 	// resume ... use ck
 };
@@ -566,7 +565,7 @@ static int __init omap_uwire_init(void)
 		omap_writel(val | 0x00AAA000, OMAP730_IO_CONF_9);
 	}
 
-	return platform_driver_register(&uwire_driver);
+	return platform_driver_probe(&uwire_driver, uwire_probe);
 }
 
 static void __exit omap_uwire_exit(void)
diff --git a/drivers/spi/pxa2xx_spi.c b/drivers/spi/pxa2xx_spi.c
index e51311b2da0..5f3d808cbc2 100644
--- a/drivers/spi/pxa2xx_spi.c
+++ b/drivers/spi/pxa2xx_spi.c
@@ -26,7 +26,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/spi/spi.h>
 #include <linux/workqueue.h>
-#include <linux/errno.h>
 #include <linux/delay.h>
 
 #include <asm/io.h>
@@ -1230,7 +1229,7 @@ static void cleanup(struct spi_device *spi)
 	kfree(chip);
 }
 
-static int init_queue(struct driver_data *drv_data)
+static int __init init_queue(struct driver_data *drv_data)
 {
 	INIT_LIST_HEAD(&drv_data->queue);
 	spin_lock_init(&drv_data->lock);
@@ -1243,7 +1242,7 @@ static int init_queue(struct driver_data *drv_data)
 
 	INIT_WORK(&drv_data->pump_messages, pump_messages);
 	drv_data->workqueue = create_singlethread_workqueue(
-					drv_data->master->cdev.dev->bus_id);
+					drv_data->master->dev.parent->bus_id);
 	if (drv_data->workqueue == NULL)
 		return -EBUSY;
 
@@ -1318,7 +1317,7 @@ static int destroy_queue(struct driver_data *drv_data)
 	return 0;
 }
 
-static int pxa2xx_spi_probe(struct platform_device *pdev)
+static int __init pxa2xx_spi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct pxa2xx_spi_master *platform_info;
@@ -1622,8 +1621,7 @@ static struct platform_driver driver = {
 		.bus = &platform_bus_type,
 		.owner = THIS_MODULE,
 	},
-	.probe = pxa2xx_spi_probe,
-	.remove = __devexit_p(pxa2xx_spi_remove),
+	.remove = pxa2xx_spi_remove,
 	.shutdown = pxa2xx_spi_shutdown,
 	.suspend = pxa2xx_spi_suspend,
 	.resume = pxa2xx_spi_resume,
@@ -1631,9 +1629,7 @@ static struct platform_driver driver = {
 
 static int __init pxa2xx_spi_init(void)
 {
-	platform_driver_register(&driver);
-
-	return 0;
+	return platform_driver_probe(&driver, pxa2xx_spi_probe);
 }
 module_init(pxa2xx_spi_init);
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index bcb8dd5fb0b..89769ce16f8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -204,7 +204,7 @@ struct spi_device *spi_new_device(struct spi_master *master,
 				  struct spi_board_info *chip)
 {
 	struct spi_device	*proxy;
-	struct device		*dev = master->cdev.dev;
+	struct device		*dev = master->dev.parent;
 	int			status;
 
 	/* NOTE:  caller did any chip->bus_num checks necessary.
@@ -239,7 +239,7 @@ struct spi_device *spi_new_device(struct spi_master *master,
 	proxy->modalias = chip->modalias;
 
 	snprintf(proxy->dev.bus_id, sizeof proxy->dev.bus_id,
-			"%s.%u", master->cdev.class_id,
+			"%s.%u", master->dev.bus_id,
 			chip->chip_select);
 	proxy->dev.parent = dev;
 	proxy->dev.bus = &spi_bus_type;
@@ -338,18 +338,18 @@ static void scan_boardinfo(struct spi_master *master)
 
 /*-------------------------------------------------------------------------*/
 
-static void spi_master_release(struct class_device *cdev)
+static void spi_master_release(struct device *dev)
 {
 	struct spi_master *master;
 
-	master = container_of(cdev, struct spi_master, cdev);
+	master = container_of(dev, struct spi_master, dev);
 	kfree(master);
 }
 
 static struct class spi_master_class = {
 	.name		= "spi_master",
 	.owner		= THIS_MODULE,
-	.release	= spi_master_release,
+	.dev_release	= spi_master_release,
 };
 
 
@@ -357,7 +357,7 @@ static struct class spi_master_class = {
  * spi_alloc_master - allocate SPI master controller
  * @dev: the controller, possibly using the platform_bus
  * @size: how much zeroed driver-private data to allocate; the pointer to this
- *	memory is in the class_data field of the returned class_device,
+ *	memory is in the driver_data field of the returned device,
  *	accessible with spi_master_get_devdata().
  * Context: can sleep
  *
@@ -383,9 +383,9 @@ struct spi_master *spi_alloc_master(struct device *dev, unsigned size)
 	if (!master)
 		return NULL;
 
-	class_device_initialize(&master->cdev);
-	master->cdev.class = &spi_master_class;
-	master->cdev.dev = get_device(dev);
+	device_initialize(&master->dev);
+	master->dev.class = &spi_master_class;
+	master->dev.parent = get_device(dev);
 	spi_master_set_devdata(master, &master[1]);
 
 	return master;
@@ -415,7 +415,7 @@ EXPORT_SYMBOL_GPL(spi_alloc_master);
 int spi_register_master(struct spi_master *master)
 {
 	static atomic_t		dyn_bus_id = ATOMIC_INIT((1<<15) - 1);
-	struct device		*dev = master->cdev.dev;
+	struct device		*dev = master->dev.parent;
 	int			status = -ENODEV;
 	int			dynamic = 0;
 
@@ -440,12 +440,12 @@ int spi_register_master(struct spi_master *master)
 	/* register the device, then userspace will see it.
 	 * registration fails if the bus ID is in use.
 	 */
-	snprintf(master->cdev.class_id, sizeof master->cdev.class_id,
+	snprintf(master->dev.bus_id, sizeof master->dev.bus_id,
 		"spi%u", master->bus_num);
-	status = class_device_add(&master->cdev);
+	status = device_add(&master->dev);
 	if (status < 0)
 		goto done;
-	dev_dbg(dev, "registered master %s%s\n", master->cdev.class_id,
+	dev_dbg(dev, "registered master %s%s\n", master->dev.bus_id,
 			dynamic ? " (dynamic)" : "");
 
 	/* populate children from any spi device tables */
@@ -478,8 +478,8 @@ void spi_unregister_master(struct spi_master *master)
 {
 	int dummy;
 
-	dummy = device_for_each_child(master->cdev.dev, NULL, __unregister);
-	class_device_unregister(&master->cdev);
+	dummy = device_for_each_child(master->dev.parent, NULL, __unregister);
+	device_unregister(&master->dev);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_master);
 
@@ -495,13 +495,13 @@ EXPORT_SYMBOL_GPL(spi_unregister_master);
  */
 struct spi_master *spi_busnum_to_master(u16 bus_num)
 {
-	struct class_device	*cdev;
+	struct device		*dev;
 	struct spi_master	*master = NULL;
 	struct spi_master	*m;
 
 	down(&spi_master_class.sem);
-	list_for_each_entry(cdev, &spi_master_class.children, node) {
-		m = container_of(cdev, struct spi_master, cdev);
+	list_for_each_entry(dev, &spi_master_class.children, node) {
+		m = container_of(dev, struct spi_master, dev);
 		if (m->bus_num == bus_num) {
 			master = spi_master_get(m);
 			break;
diff --git a/drivers/spi/spi_bfin5xx.c b/drivers/spi/spi_bfin5xx.c
index f540ed77a10..6cb71d74738 100644
--- a/drivers/spi/spi_bfin5xx.c
+++ b/drivers/spi/spi_bfin5xx.c
@@ -39,7 +39,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/spi/spi.h>
 #include <linux/workqueue.h>
-#include <linux/errno.h>
 #include <linux/delay.h>
 
 #include <asm/io.h>
@@ -1107,7 +1106,7 @@ static inline int init_queue(struct driver_data *drv_data)
 	/* init messages workqueue */
 	INIT_WORK(&drv_data->pump_messages, pump_messages);
 	drv_data->workqueue =
-	    create_singlethread_workqueue(drv_data->master->cdev.dev->bus_id);
+	    create_singlethread_workqueue(drv_data->master->dev.parent->bus_id);
 	if (drv_data->workqueue == NULL)
 		return -EBUSY;
 
diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c
index 0c85c984ccb..81639c6be1c 100644
--- a/drivers/spi/spi_bitbang.c
+++ b/drivers/spi/spi_bitbang.c
@@ -472,7 +472,7 @@ int spi_bitbang_start(struct spi_bitbang *bitbang)
 	/* this task is the only thing to touch the SPI bits */
 	bitbang->busy = 0;
 	bitbang->workqueue = create_singlethread_workqueue(
-			bitbang->master->cdev.dev->bus_id);
+			bitbang->master->dev.parent->bus_id);
 	if (bitbang->workqueue == NULL) {
 		status = -EBUSY;
 		goto err1;
diff --git a/drivers/spi/spi_imx.c b/drivers/spi/spi_imx.c
index bd9177f51de..3b4650ae6f1 100644
--- a/drivers/spi/spi_imx.c
+++ b/drivers/spi/spi_imx.c
@@ -1361,7 +1361,7 @@ static void cleanup(struct spi_device *spi)
 	kfree(spi_get_ctldata(spi));
 }
 
-static int init_queue(struct driver_data *drv_data)
+static int __init init_queue(struct driver_data *drv_data)
 {
 	INIT_LIST_HEAD(&drv_data->queue);
 	spin_lock_init(&drv_data->lock);
@@ -1374,7 +1374,7 @@ static int init_queue(struct driver_data *drv_data)
 
 	INIT_WORK(&drv_data->work, pump_messages);
 	drv_data->workqueue = create_singlethread_workqueue(
-					drv_data->master->cdev.dev->bus_id);
+					drv_data->master->dev.parent->bus_id);
 	if (drv_data->workqueue == NULL)
 		return -EBUSY;
 
@@ -1444,7 +1444,7 @@ static int destroy_queue(struct driver_data *drv_data)
 	return 0;
 }
 
-static int spi_imx_probe(struct platform_device *pdev)
+static int __init spi_imx_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct spi_imx_master *platform_info;
@@ -1622,7 +1622,7 @@ err_no_mem:
 	return status;
 }
 
-static int __devexit spi_imx_remove(struct platform_device *pdev)
+static int __exit spi_imx_remove(struct platform_device *pdev)
 {
 	struct driver_data *drv_data = platform_get_drvdata(pdev);
 	int irq;
@@ -1739,8 +1739,7 @@ static struct platform_driver driver = {
 		.bus = &platform_bus_type,
 		.owner = THIS_MODULE,
 	},
-	.probe = spi_imx_probe,
-	.remove = __devexit_p(spi_imx_remove),
+	.remove = __exit_p(spi_imx_remove),
 	.shutdown = spi_imx_shutdown,
 	.suspend = spi_imx_suspend,
 	.resume = spi_imx_resume,
@@ -1748,7 +1747,7 @@ static struct platform_driver driver = {
 
 static int __init spi_imx_init(void)
 {
-	return platform_driver_register(&driver);
+	return platform_driver_probe(&driver, spi_imx_probe);
 }
 module_init(spi_imx_init);
 
diff --git a/drivers/spi/spi_lm70llp.c b/drivers/spi/spi_lm70llp.c
index 4ea68ac1611..39d8d8ad65c 100644
--- a/drivers/spi/spi_lm70llp.c
+++ b/drivers/spi/spi_lm70llp.c
@@ -82,7 +82,7 @@ struct spi_lm70llp {
 	struct pardevice	*pd;
 	struct spi_device	*spidev_lm70;
 	struct spi_board_info	info;
-	struct class_device	*cdev;
+	//struct device		*dev;
 };
 
 /* REVISIT : ugly global ; provides "exclusive open" facility */
diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c
index 32cda77b31c..4580b9cf625 100644
--- a/drivers/spi/spi_mpc83xx.c
+++ b/drivers/spi/spi_mpc83xx.c
@@ -511,7 +511,7 @@ err:
 	return ret;
 }
 
-static int __devexit mpc83xx_spi_remove(struct platform_device *dev)
+static int __exit mpc83xx_spi_remove(struct platform_device *dev)
 {
 	struct mpc83xx_spi *mpc83xx_spi;
 	struct spi_master *master;
@@ -529,8 +529,7 @@ static int __devexit mpc83xx_spi_remove(struct platform_device *dev)
 
 MODULE_ALIAS("mpc83xx_spi");			/* for platform bus hotplug */
 static struct platform_driver mpc83xx_spi_driver = {
-	.probe = mpc83xx_spi_probe,
-	.remove = __devexit_p(mpc83xx_spi_remove),
+	.remove = __exit_p(mpc83xx_spi_remove),
 	.driver = {
 		   .name = "mpc83xx_spi",
 	},
@@ -538,7 +537,7 @@ static struct platform_driver mpc83xx_spi_driver = {
 
 static int __init mpc83xx_spi_init(void)
 {
-	return platform_driver_register(&mpc83xx_spi_driver);
+	return platform_driver_probe(&mpc83xx_spi_driver, mpc83xx_spi_probe);
 }
 
 static void __exit mpc83xx_spi_exit(void)
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index e9b683f7d7b..89d6685a5ca 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -233,7 +233,7 @@ static irqreturn_t s3c24xx_spi_irq(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
-static int s3c24xx_spi_probe(struct platform_device *pdev)
+static int __init s3c24xx_spi_probe(struct platform_device *pdev)
 {
 	struct s3c24xx_spi *hw;
 	struct spi_master *master;
@@ -382,7 +382,7 @@ static int s3c24xx_spi_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int s3c24xx_spi_remove(struct platform_device *dev)
+static int __exit s3c24xx_spi_remove(struct platform_device *dev)
 {
 	struct s3c24xx_spi *hw = platform_get_drvdata(dev);
 
@@ -429,8 +429,7 @@ static int s3c24xx_spi_resume(struct platform_device *pdev)
 
 MODULE_ALIAS("s3c2410_spi");			/* for platform bus hotplug */
 static struct platform_driver s3c24xx_spidrv = {
-	.probe		= s3c24xx_spi_probe,
-	.remove		= s3c24xx_spi_remove,
+	.remove		= __exit_p(s3c24xx_spi_remove),
 	.suspend	= s3c24xx_spi_suspend,
 	.resume		= s3c24xx_spi_resume,
 	.driver		= {
@@ -441,7 +440,7 @@ static struct platform_driver s3c24xx_spidrv = {
 
 static int __init s3c24xx_spi_init(void)
 {
-        return platform_driver_register(&s3c24xx_spidrv);
+        return platform_driver_probe(&s3c24xx_spidrv, s3c24xx_spi_probe);
 }
 
 static void __exit s3c24xx_spi_exit(void)
diff --git a/drivers/spi/spi_txx9.c b/drivers/spi/spi_txx9.c
index b7f4bb239ea..cc5094f37dd 100644
--- a/drivers/spi/spi_txx9.c
+++ b/drivers/spi/spi_txx9.c
@@ -400,7 +400,7 @@ static int __init txx9spi_probe(struct platform_device *dev)
 		goto exit;
 	}
 
-	c->workqueue = create_singlethread_workqueue(master->cdev.dev->bus_id);
+	c->workqueue = create_singlethread_workqueue(master->dev.parent->bus_id);
 	if (!c->workqueue)
 		goto exit;
 	c->last_chipselect = -1;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d20cb545a6e..60a8f55a0cc 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1407,7 +1407,11 @@ fail:
 
 
 /**
- * Similar to usb_disconnect()
+ * usb_deauthorize_device - deauthorize a device (usbcore-internal)
+ * @usb_dev: USB device
+ *
+ * Move the USB device to a very basic state where interfaces are disabled
+ * and the device is in fact unconfigured and unusable.
  *
  * We share a lock (that we have) with device_del(), so we need to
  * defer its call.
diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
index 43722e5a49d..b624320df90 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -1042,7 +1042,8 @@ sisusbcon_set_origin(struct vc_data *c)
 
 /* Interface routine */
 static int
-sisusbcon_resize(struct vc_data *c, unsigned int newcols, unsigned int newrows)
+sisusbcon_resize(struct vc_data *c, unsigned int newcols, unsigned int newrows,
+		 unsigned int user)
 {
 	struct sisusb_usb_data *sisusb;
 	int fh;
diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c
index 4d3cbb12b71..8d3711a7ff0 100644
--- a/drivers/usb/storage/alauda.c
+++ b/drivers/usb/storage/alauda.c
@@ -798,12 +798,13 @@ static int alauda_read_data(struct us_data *us, unsigned long address,
 {
 	unsigned char *buffer;
 	u16 lba, max_lba;
-	unsigned int page, len, index, offset;
+	unsigned int page, len, offset;
 	unsigned int blockshift = MEDIA_INFO(us).blockshift;
 	unsigned int pageshift = MEDIA_INFO(us).pageshift;
 	unsigned int blocksize = MEDIA_INFO(us).blocksize;
 	unsigned int pagesize = MEDIA_INFO(us).pagesize;
 	unsigned int uzonesize = MEDIA_INFO(us).uzonesize;
+	struct scatterlist *sg;
 	int result;
 
 	/*
@@ -827,7 +828,8 @@ static int alauda_read_data(struct us_data *us, unsigned long address,
 	max_lba = MEDIA_INFO(us).capacity >> (blockshift + pageshift);
 
 	result = USB_STOR_TRANSPORT_GOOD;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 		unsigned int zone = lba / uzonesize; /* integer division */
@@ -873,7 +875,7 @@ static int alauda_read_data(struct us_data *us, unsigned long address,
 
 		/* Store the data in the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, TO_XFER_BUF);
+				&sg, &offset, TO_XFER_BUF);
 
 		page = 0;
 		lba++;
@@ -891,11 +893,12 @@ static int alauda_write_data(struct us_data *us, unsigned long address,
 		unsigned int sectors)
 {
 	unsigned char *buffer, *blockbuffer;
-	unsigned int page, len, index, offset;
+	unsigned int page, len, offset;
 	unsigned int blockshift = MEDIA_INFO(us).blockshift;
 	unsigned int pageshift = MEDIA_INFO(us).pageshift;
 	unsigned int blocksize = MEDIA_INFO(us).blocksize;
 	unsigned int pagesize = MEDIA_INFO(us).pagesize;
+	struct scatterlist *sg;
 	u16 lba, max_lba;
 	int result;
 
@@ -929,7 +932,8 @@ static int alauda_write_data(struct us_data *us, unsigned long address,
 	max_lba = MEDIA_INFO(us).capacity >> (pageshift + blockshift);
 
 	result = USB_STOR_TRANSPORT_GOOD;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 		/* Write as many sectors as possible in this block */
@@ -946,7 +950,7 @@ static int alauda_write_data(struct us_data *us, unsigned long address,
 
 		/* Get the data from the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, FROM_XFER_BUF);
+				&sg, &offset, FROM_XFER_BUF);
 
 		result = alauda_write_lba(us, lba, page, pages, buffer,
 			blockbuffer);
diff --git a/drivers/usb/storage/datafab.c b/drivers/usb/storage/datafab.c
index c87ad1bae1d..579e9f52053 100644
--- a/drivers/usb/storage/datafab.c
+++ b/drivers/usb/storage/datafab.c
@@ -98,7 +98,8 @@ static int datafab_read_data(struct us_data *us,
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	// we're working in LBA mode.  according to the ATA spec, 
 	// we can support up to 28-bit addressing.  I don't know if Datafab
@@ -155,7 +156,7 @@ static int datafab_read_data(struct us_data *us,
 
 		// Store the data in the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				 &sg_idx, &sg_offset, TO_XFER_BUF);
+				 &sg, &sg_offset, TO_XFER_BUF);
 
 		sector += thistime;
 		totallen -= len;
@@ -181,7 +182,8 @@ static int datafab_write_data(struct us_data *us,
 	unsigned char thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	// we're working in LBA mode.  according to the ATA spec, 
 	// we can support up to 28-bit addressing.  I don't know if Datafab
@@ -217,7 +219,7 @@ static int datafab_write_data(struct us_data *us,
 
 		// Get the data from the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&sg_idx, &sg_offset, FROM_XFER_BUF);
+				&sg, &sg_offset, FROM_XFER_BUF);
 
 		command[0] = 0;
 		command[1] = thistime;
diff --git a/drivers/usb/storage/jumpshot.c b/drivers/usb/storage/jumpshot.c
index 003fcf54588..61097cbb158 100644
--- a/drivers/usb/storage/jumpshot.c
+++ b/drivers/usb/storage/jumpshot.c
@@ -119,7 +119,8 @@ static int jumpshot_read_data(struct us_data *us,
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	// we're working in LBA mode.  according to the ATA spec, 
 	// we can support up to 28-bit addressing.  I don't know if Jumpshot
@@ -170,7 +171,7 @@ static int jumpshot_read_data(struct us_data *us,
 
 		// Store the data in the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				 &sg_idx, &sg_offset, TO_XFER_BUF);
+				 &sg, &sg_offset, TO_XFER_BUF);
 
 		sector += thistime;
 		totallen -= len;
@@ -195,7 +196,8 @@ static int jumpshot_write_data(struct us_data *us,
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result, waitcount;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	// we're working in LBA mode.  according to the ATA spec, 
 	// we can support up to 28-bit addressing.  I don't know if Jumpshot
@@ -225,7 +227,7 @@ static int jumpshot_write_data(struct us_data *us,
 
 		// Get the data from the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&sg_idx, &sg_offset, FROM_XFER_BUF);
+				&sg, &sg_offset, FROM_XFER_BUF);
 
 		command[0] = 0;
 		command[1] = thistime;
diff --git a/drivers/usb/storage/protocol.c b/drivers/usb/storage/protocol.c
index 9ad30428d2d..cc8f7c52c72 100644
--- a/drivers/usb/storage/protocol.c
+++ b/drivers/usb/storage/protocol.c
@@ -157,7 +157,7 @@ void usb_stor_transparent_scsi_command(struct scsi_cmnd *srb,
  * pick up from where this one left off. */
 
 unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
-	unsigned int buflen, struct scsi_cmnd *srb, unsigned int *index,
+	unsigned int buflen, struct scsi_cmnd *srb, struct scatterlist **sgptr,
 	unsigned int *offset, enum xfer_buf_dir dir)
 {
 	unsigned int cnt;
@@ -184,16 +184,17 @@ unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
 	 * located in high memory -- then kmap() will map it to a temporary
 	 * position in the kernel's virtual address space. */
 	} else {
-		struct scatterlist *sg =
-				(struct scatterlist *) srb->request_buffer
-				+ *index;
+		struct scatterlist *sg = *sgptr;
+
+		if (!sg)
+			sg = (struct scatterlist *) srb->request_buffer;
 
 		/* This loop handles a single s-g list entry, which may
 		 * include multiple pages.  Find the initial page structure
 		 * and the starting offset within the page, and update
 		 * the *offset and *index values for the next loop. */
 		cnt = 0;
-		while (cnt < buflen && *index < srb->use_sg) {
+		while (cnt < buflen) {
 			struct page *page = sg->page +
 					((sg->offset + *offset) >> PAGE_SHIFT);
 			unsigned int poff =
@@ -209,8 +210,7 @@ unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
 
 				/* Transfer continues to next s-g entry */
 				*offset = 0;
-				++*index;
-				++sg;
+				sg = sg_next(sg);
 			}
 
 			/* Transfer the data for all the pages in this
@@ -234,6 +234,7 @@ unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
 				sglen -= plen;
 			}
 		}
+		*sgptr = sg;
 	}
 
 	/* Return the amount actually transferred */
@@ -245,9 +246,10 @@ unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
 void usb_stor_set_xfer_buf(unsigned char *buffer,
 	unsigned int buflen, struct scsi_cmnd *srb)
 {
-	unsigned int index = 0, offset = 0;
+	unsigned int offset = 0;
+	struct scatterlist *sg = NULL;
 
-	usb_stor_access_xfer_buf(buffer, buflen, srb, &index, &offset,
+	usb_stor_access_xfer_buf(buffer, buflen, srb, &sg, &offset,
 			TO_XFER_BUF);
 	if (buflen < srb->request_bufflen)
 		srb->resid = srb->request_bufflen - buflen;
diff --git a/drivers/usb/storage/protocol.h b/drivers/usb/storage/protocol.h
index 845bed4b803..8737a36891c 100644
--- a/drivers/usb/storage/protocol.h
+++ b/drivers/usb/storage/protocol.h
@@ -52,7 +52,7 @@ extern void usb_stor_transparent_scsi_command(struct scsi_cmnd*,
 enum xfer_buf_dir	{TO_XFER_BUF, FROM_XFER_BUF};
 
 extern unsigned int usb_stor_access_xfer_buf(unsigned char *buffer,
-	unsigned int buflen, struct scsi_cmnd *srb, unsigned int *index,
+	unsigned int buflen, struct scsi_cmnd *srb, struct scatterlist **,
 	unsigned int *offset, enum xfer_buf_dir dir);
 
 extern void usb_stor_set_xfer_buf(unsigned char *buffer,
diff --git a/drivers/usb/storage/sddr09.c b/drivers/usb/storage/sddr09.c
index b2ed2a3e6fc..b12202c5da2 100644
--- a/drivers/usb/storage/sddr09.c
+++ b/drivers/usb/storage/sddr09.c
@@ -705,7 +705,8 @@ sddr09_read_data(struct us_data *us,
 	unsigned char *buffer;
 	unsigned int lba, maxlba, pba;
 	unsigned int page, pages;
-	unsigned int len, index, offset;
+	unsigned int len, offset;
+	struct scatterlist *sg;
 	int result;
 
 	// Figure out the initial LBA and page
@@ -730,7 +731,8 @@ sddr09_read_data(struct us_data *us,
 	// contiguous LBA's. Another exercise left to the student.
 
 	result = 0;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 
@@ -777,7 +779,7 @@ sddr09_read_data(struct us_data *us,
 
 		// Store the data in the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, TO_XFER_BUF);
+				&sg, &offset, TO_XFER_BUF);
 
 		page = 0;
 		lba++;
@@ -931,7 +933,8 @@ sddr09_write_data(struct us_data *us,
 	unsigned int pagelen, blocklen;
 	unsigned char *blockbuffer;
 	unsigned char *buffer;
-	unsigned int len, index, offset;
+	unsigned int len, offset;
+	struct scatterlist *sg;
 	int result;
 
 	// Figure out the initial LBA and page
@@ -968,7 +971,8 @@ sddr09_write_data(struct us_data *us,
 	}
 
 	result = 0;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 
@@ -987,7 +991,7 @@ sddr09_write_data(struct us_data *us,
 
 		// Get the data from the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, FROM_XFER_BUF);
+				&sg, &offset, FROM_XFER_BUF);
 
 		result = sddr09_write_lba(us, lba, page, pages,
 				buffer, blockbuffer);
diff --git a/drivers/usb/storage/sddr55.c b/drivers/usb/storage/sddr55.c
index 0b1b5b59ca7..d43a3415e12 100644
--- a/drivers/usb/storage/sddr55.c
+++ b/drivers/usb/storage/sddr55.c
@@ -167,7 +167,8 @@ static int sddr55_read_data(struct us_data *us,
 	unsigned long address;
 
 	unsigned short pages;
-	unsigned int len, index, offset;
+	unsigned int len, offset;
+	struct scatterlist *sg;
 
 	// Since we only read in one block at a time, we have to create
 	// a bounce buffer and move the data a piece at a time between the
@@ -178,7 +179,8 @@ static int sddr55_read_data(struct us_data *us,
 	buffer = kmalloc(len, GFP_NOIO);
 	if (buffer == NULL)
 		return USB_STOR_TRANSPORT_ERROR; /* out of memory */
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors>0) {
 
@@ -255,7 +257,7 @@ static int sddr55_read_data(struct us_data *us,
 
 		// Store the data in the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, TO_XFER_BUF);
+				&sg, &offset, TO_XFER_BUF);
 
 		page = 0;
 		lba++;
@@ -287,7 +289,8 @@ static int sddr55_write_data(struct us_data *us,
 
 	unsigned short pages;
 	int i;
-	unsigned int len, index, offset;
+	unsigned int len, offset;
+	struct scatterlist *sg;
 
 	/* check if we are allowed to write */
 	if (info->read_only || info->force_read_only) {
@@ -304,7 +307,8 @@ static int sddr55_write_data(struct us_data *us,
 	buffer = kmalloc(len, GFP_NOIO);
 	if (buffer == NULL)
 		return USB_STOR_TRANSPORT_ERROR;
-	index = offset = 0;
+	offset = 0;
+	sg = NULL;
 
 	while (sectors > 0) {
 
@@ -322,7 +326,7 @@ static int sddr55_write_data(struct us_data *us,
 
 		// Get the data from the transfer buffer
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-				&index, &offset, FROM_XFER_BUF);
+				&sg, &offset, FROM_XFER_BUF);
 
 		US_DEBUGP("Write %02X pages, to PBA %04X"
 			" (LBA %04X) page %02X\n",
diff --git a/drivers/usb/storage/shuttle_usbat.c b/drivers/usb/storage/shuttle_usbat.c
index 17ca4d73577..cb22a9ad169 100644
--- a/drivers/usb/storage/shuttle_usbat.c
+++ b/drivers/usb/storage/shuttle_usbat.c
@@ -993,7 +993,8 @@ static int usbat_flash_read_data(struct us_data *us,
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	result = usbat_flash_check_media(us, info);
 	if (result != USB_STOR_TRANSPORT_GOOD)
@@ -1047,7 +1048,7 @@ static int usbat_flash_read_data(struct us_data *us,
 	
 		/* Store the data in the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-					 &sg_idx, &sg_offset, TO_XFER_BUF);
+					 &sg, &sg_offset, TO_XFER_BUF);
 
 		sector += thistime;
 		totallen -= len;
@@ -1083,7 +1084,8 @@ static int usbat_flash_write_data(struct us_data *us,
 	unsigned char  thistime;
 	unsigned int totallen, alloclen;
 	int len, result;
-	unsigned int sg_idx = 0, sg_offset = 0;
+	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	result = usbat_flash_check_media(us, info);
 	if (result != USB_STOR_TRANSPORT_GOOD)
@@ -1122,7 +1124,7 @@ static int usbat_flash_write_data(struct us_data *us,
 
 		/* Get the data from the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, us->srb,
-					 &sg_idx, &sg_offset, FROM_XFER_BUF);
+					 &sg, &sg_offset, FROM_XFER_BUF);
 
 		/* ATA command 0x30 (WRITE SECTORS) */
 		usbat_pack_ata_sector_cmd(command, thistime, sector, 0x30);
@@ -1162,8 +1164,8 @@ static int usbat_hp8200e_handle_read10(struct us_data *us,
 	unsigned char *buffer;
 	unsigned int len;
 	unsigned int sector;
-	unsigned int sg_segment = 0;
 	unsigned int sg_offset = 0;
+	struct scatterlist *sg = NULL;
 
 	US_DEBUGP("handle_read10: transfersize %d\n",
 		srb->transfersize);
@@ -1220,9 +1222,6 @@ static int usbat_hp8200e_handle_read10(struct us_data *us,
 	sector |= short_pack(data[7+5], data[7+4]);
 	transferred = 0;
 
-	sg_segment = 0; /* for keeping track of where we are in */
-	sg_offset = 0;  /* the scatter/gather list */
-
 	while (transferred != srb->request_bufflen) {
 
 		if (len > srb->request_bufflen - transferred)
@@ -1255,7 +1254,7 @@ static int usbat_hp8200e_handle_read10(struct us_data *us,
 
 		/* Store the data in the transfer buffer */
 		usb_stor_access_xfer_buf(buffer, len, srb,
-				 &sg_segment, &sg_offset, TO_XFER_BUF);
+				 &sg, &sg_offset, TO_XFER_BUF);
 
 		/* Update the amount transferred and the sector number */
 
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 5216c11d4de..efe474e2cc3 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -5,8 +5,9 @@
 menu "Graphics support"
 	depends on HAS_IOMEM
 
-source "drivers/video/backlight/Kconfig"
-source "drivers/video/display/Kconfig"
+source "drivers/char/agp/Kconfig"
+
+source "drivers/char/drm/Kconfig"
 
 config VGASTATE
        tristate
@@ -19,7 +20,7 @@ config VIDEO_OUTPUT_CONTROL
 	  This framework adds support for low-level control of the video 
 	  output switch.
 
-config FB
+menuconfig FB
 	tristate "Support for frame buffer devices"
 	---help---
 	  The frame buffer device provides an abstraction for the graphics
@@ -103,6 +104,15 @@ config FB_CFB_IMAGEBLIT
 	  blitting. This is used by drivers that don't provide their own
 	  (accelerated) version.
 
+config FB_CFB_REV_PIXELS_IN_BYTE
+	bool
+	depends on FB
+	default n
+	---help---
+	  Allow generic frame-buffer functions to work on displays with 1, 2
+	  and 4 bits per pixel depths which has opposite order of pixels in
+	  byte order to bytes in long order.
+
 config FB_SYS_FILLRECT
 	tristate
 	depends on FB
@@ -535,6 +545,15 @@ config FB_VGA16
 	  To compile this driver as a module, choose M here: the
 	  module will be called vga16fb.
 
+config FB_BF54X_LQ043
+	tristate "SHARP LQ043 TFT LCD (BF548 EZKIT)"
+	depends on FB && (BF54x)
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	help
+	 This is the framebuffer device driver for a SHARP LQ043T1DG01 TFT LCD
+
 config FB_STI
 	tristate "HP STI frame buffer device support"
 	depends on FB && PARISC
@@ -592,6 +611,24 @@ config FB_TGA
 
 	  Say Y if you have one of those.
 
+config FB_UVESA
+	tristate "Userspace VESA VGA graphics support"
+	depends on FB && CONNECTOR
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	select FB_MODE_HELPERS
+	help
+	  This is the frame buffer driver for generic VBE 2.0 compliant
+	  graphic cards. It can also take advantage of VBE 3.0 features,
+	  such as refresh rate adjustment.
+
+	  This driver generally provides more features than vesafb but
+	  requires a userspace helper application called 'v86d'. See
+	  <file:Documentation/fb/uvesafb.txt> for more information.
+
+	  If unsure, say N.
+
 config FB_VESA
 	bool "VESA VGA graphics support"
 	depends on (FB = y) && X86
@@ -1625,7 +1662,7 @@ config FB_PMAG_BA
 
 config FB_PMAGB_B
 	tristate "PMAGB-B TURBOchannel framebuffer support"
-	depends on TC
+	depends on FB && TC
  	select FB_CFB_FILLRECT
  	select FB_CFB_COPYAREA
  	select FB_CFB_IMAGEBLIT
@@ -1793,7 +1830,7 @@ config FB_PNX4008_DUM_RGB
 
 config FB_IBM_GXT4500
 	tristate "Framebuffer support for IBM GXT4500P adaptor"
-	depends on PPC
+	depends on FB && PPC
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
@@ -1833,10 +1870,6 @@ config FB_XILINX
 	  framebuffer. ML300 carries a 640*480 LCD display on the board,
 	  ML403 uses a standard DB15 VGA connector.
 
-if ARCH_OMAP
-	source "drivers/video/omap/Kconfig"
-endif
-
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
@@ -1860,6 +1893,13 @@ config FB_VIRTUAL
 
 	  If unsure, say N.
 
+if ARCH_OMAP
+	source "drivers/video/omap/Kconfig"
+endif
+
+source "drivers/video/backlight/Kconfig"
+source "drivers/video/display/Kconfig"
+
 if VT
 	source "drivers/video/console/Kconfig"
 endif
@@ -1869,4 +1909,3 @@ if FB || SGI_NEWPORT_CONSOLE
 endif
 
 endmenu
-
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 06eec7b182b..59d6c45a910 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -115,10 +115,12 @@ obj-$(CONFIG_FB_XILINX)           += xilinxfb.o
 obj-$(CONFIG_FB_OMAP)             += omap/
 
 # Platform or fallback drivers go here
+obj-$(CONFIG_FB_UVESA)            += uvesafb.o
 obj-$(CONFIG_FB_VESA)             += vesafb.o
 obj-$(CONFIG_FB_IMAC)             += imacfb.o
 obj-$(CONFIG_FB_VGA16)            += vga16fb.o
 obj-$(CONFIG_FB_OF)               += offb.o
+obj-$(CONFIG_FB_BF54X_LQ043)	  += bf54x-lq043fb.o
 
 # the test framebuffer is last
 obj-$(CONFIG_FB_VIRTUAL)          += vfb.o
diff --git a/drivers/video/amifb.c b/drivers/video/amifb.c
index 1a849b870bc..f2e243c353f 100644
--- a/drivers/video/amifb.c
+++ b/drivers/video/amifb.c
@@ -52,7 +52,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/system.h>
 #include <asm/irq.h>
 #include <asm/amigahw.h>
diff --git a/drivers/video/arcfb.c b/drivers/video/arcfb.c
index db15baca3f7..c3431691c9f 100644
--- a/drivers/video/arcfb.c
+++ b/drivers/video/arcfb.c
@@ -48,7 +48,7 @@
 #include <linux/arcfb.h>
 #include <linux/platform_device.h>
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #define floor8(a) (a&(~0x07))
 #define floorXres(a,xres) (a&(~(xres - 1)))
diff --git a/drivers/video/atafb.c b/drivers/video/atafb.c
index 0038a0541c7..5d4fbaa53a6 100644
--- a/drivers/video/atafb.c
+++ b/drivers/video/atafb.c
@@ -58,7 +58,7 @@
 #include <linux/interrupt.h>
 
 #include <asm/setup.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/io.h>
diff --git a/drivers/video/aty/ati_ids.h b/drivers/video/aty/ati_ids.h
index dca2eb8f2dd..3e9d28bcd9f 100644
--- a/drivers/video/aty/ati_ids.h
+++ b/drivers/video/aty/ati_ids.h
@@ -188,6 +188,7 @@
 #define PCI_CHIP_MACH64VT		0x5654
 #define PCI_CHIP_MACH64VU		0x5655
 #define PCI_CHIP_MACH64VV		0x5656
+#define PCI_CHIP_RC410_5A62             0x5A62
 #define PCI_CHIP_RS300_5834		0x5834
 #define PCI_CHIP_RS300_5835		0x5835
 #define PCI_CHIP_RS300_5836		0x5836
diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c
index cfcbe37d2d7..cbd3308b669 100644
--- a/drivers/video/aty/aty128fb.c
+++ b/drivers/video/aty/aty128fb.c
@@ -56,7 +56,7 @@
 #include <linux/vmalloc.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/pci.h>
diff --git a/drivers/video/aty/atyfb.h b/drivers/video/aty/atyfb.h
index dc62f8e282b..7691e73823d 100644
--- a/drivers/video/aty/atyfb.h
+++ b/drivers/video/aty/atyfb.h
@@ -126,6 +126,7 @@ union aty_pll {
      */
 
 struct atyfb_par {
+	u32 pseudo_palette[16];
 	struct { u8 red, green, blue; } palette[256];
 	const struct aty_dac_ops *dac_ops;
 	const struct aty_pll_ops *pll_ops;
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index bc6f0096aa0..abe0c435a66 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -68,7 +68,7 @@
 #include <linux/backlight.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <video/mach64.h>
 #include "atyfb.h"
@@ -541,8 +541,6 @@ static char ram_off[] __devinitdata = "OFF";
 #endif /* CONFIG_FB_ATY_CT */
 
 
-static u32 pseudo_palette[16];
-
 #ifdef CONFIG_FB_ATY_GX
 static char *aty_gx_ram[8] __devinitdata = {
 	ram_dram, ram_vram, ram_vram, ram_dram,
@@ -2577,7 +2575,7 @@ static int __devinit aty_init(struct fb_info *info)
 #endif
 
 	info->fbops = &atyfb_ops;
-	info->pseudo_palette = pseudo_palette;
+	info->pseudo_palette = par->pseudo_palette;
 	info->flags = FBINFO_DEFAULT           |
 	              FBINFO_HWACCEL_IMAGEBLIT |
 	              FBINFO_HWACCEL_FILLRECT  |
diff --git a/drivers/video/aty/mach64_cursor.c b/drivers/video/aty/mach64_cursor.c
index fe2c6ad01a8..faf95da8fcb 100644
--- a/drivers/video/aty/mach64_cursor.c
+++ b/drivers/video/aty/mach64_cursor.c
@@ -8,7 +8,6 @@
 #include <linux/string.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
 
 #ifdef __sparc__
 #include <asm/fbio.h>
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 4b747bdaeea..1e32b3d13f2 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -69,7 +69,7 @@
 #include <linux/device.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_PPC_OF
 
@@ -145,6 +145,8 @@ static struct pci_device_id radeonfb_pci_table[] = {
 	/* 9000/Pro */
 	CHIP_DEF(PCI_CHIP_RV250_If,	RV250,	CHIP_HAS_CRTC2),
 	CHIP_DEF(PCI_CHIP_RV250_Ig,	RV250,	CHIP_HAS_CRTC2),
+
+	CHIP_DEF(PCI_CHIP_RC410_5A62,   RC410,  CHIP_HAS_CRTC2 | CHIP_IS_IGP | CHIP_IS_MOBILITY),
 	/* Mobility 9100 IGP (U3) */
 	CHIP_DEF(PCI_CHIP_RS300_5835,	RS300,	CHIP_HAS_CRTC2 | CHIP_IS_IGP | CHIP_IS_MOBILITY),
 	CHIP_DEF(PCI_CHIP_RS350_7835,	RS300,	CHIP_HAS_CRTC2 | CHIP_IS_IGP | CHIP_IS_MOBILITY),
@@ -1999,6 +2001,7 @@ static void radeon_identify_vram(struct radeonfb_info *rinfo)
         if ((rinfo->family == CHIP_FAMILY_RS100) ||
             (rinfo->family == CHIP_FAMILY_RS200) ||
             (rinfo->family == CHIP_FAMILY_RS300) ||
+            (rinfo->family == CHIP_FAMILY_RC410) ||
 	    (rinfo->family == CHIP_FAMILY_RS480) ) {
           u32 tom = INREG(NB_TOM);
           tmp = ((((tom >> 16) - (tom & 0xffff) + 1) << 6) * 1024);
diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h
index 7c922c7b460..5eac1ce52e7 100644
--- a/drivers/video/aty/radeonfb.h
+++ b/drivers/video/aty/radeonfb.h
@@ -48,6 +48,7 @@ enum radeon_family {
 	CHIP_FAMILY_RV350,
 	CHIP_FAMILY_RV380,    /* RV370/RV380/M22/M24 */
 	CHIP_FAMILY_R420,     /* R420/R423/M18 */
+	CHIP_FAMILY_RC410,
 	CHIP_FAMILY_RS480,
 	CHIP_FAMILY_LAST,
 };
@@ -66,7 +67,8 @@ enum radeon_family {
 				((rinfo)->family == CHIP_FAMILY_R350)  || \
 				((rinfo)->family == CHIP_FAMILY_RV380) || \
 				((rinfo)->family == CHIP_FAMILY_R420)  || \
-		                ((rinfo)->family == CHIP_FAMILY_RS480) )
+                               ((rinfo)->family == CHIP_FAMILY_RC410) || \
+                               ((rinfo)->family == CHIP_FAMILY_RS480))
 
 /*
  * Chip flags
diff --git a/drivers/video/backlight/cr_bllcd.c b/drivers/video/backlight/cr_bllcd.c
index 92e201e81fb..26add889860 100644
--- a/drivers/video/backlight/cr_bllcd.c
+++ b/drivers/video/backlight/cr_bllcd.c
@@ -36,7 +36,6 @@
 #include <linux/backlight.h>
 #include <linux/lcd.h>
 #include <linux/pci.h>
-#include <asm/uaccess.h>
 
 /* The LVDS- and panel power controls sits on the
  * GPIO port of the ISA bridge.
diff --git a/drivers/video/backlight/progear_bl.c b/drivers/video/backlight/progear_bl.c
index 836ab4df0ef..15fb4d58b5b 100644
--- a/drivers/video/backlight/progear_bl.c
+++ b/drivers/video/backlight/progear_bl.c
@@ -23,7 +23,6 @@
 #include <linux/fb.h>
 #include <linux/backlight.h>
 #include <linux/pci.h>
-#include <asm/uaccess.h>
 
 #define PMU_LPCR               0xB0
 #define SB_MPS1                0x61
diff --git a/drivers/video/bf54x-lq043fb.c b/drivers/video/bf54x-lq043fb.c
new file mode 100644
index 00000000000..74d11c31898
--- /dev/null
+++ b/drivers/video/bf54x-lq043fb.c
@@ -0,0 +1,786 @@
+/*
+ * File:         drivers/video/bf54x-lq043.c
+ * Based on:
+ * Author:       Michael Hennerich <hennerich@blackfin.uclinux.org>
+ *
+ * Created:
+ * Description:  ADSP-BF54x Framebufer driver
+ *
+ *
+ * Modified:
+ *               Copyright 2004-2007 Analog Devices Inc.
+ *
+ * Bugs:         Enter bugs at http://blackfin.uclinux.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/backlight.h>
+#include <linux/lcd.h>
+#include <linux/spinlock.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+
+#include <asm/blackfin.h>
+#include <asm/irq.h>
+#include <asm/dpmc.h>
+#include <asm/dma-mapping.h>
+#include <asm/dma.h>
+#include <asm/gpio.h>
+#include <asm/portmux.h>
+
+#include <asm/mach/bf54x-lq043.h>
+
+#define NO_BL_SUPPORT
+
+#define DRIVER_NAME "bf54x-lq043"
+static char driver_name[] = DRIVER_NAME;
+
+#define BFIN_LCD_NBR_PALETTE_ENTRIES	256
+
+#define EPPI0_18 {P_PPI0_CLK, P_PPI0_FS1, P_PPI0_FS2, P_PPI0_D0, P_PPI0_D1, P_PPI0_D2, P_PPI0_D3, \
+ P_PPI0_D4, P_PPI0_D5, P_PPI0_D6, P_PPI0_D7, P_PPI0_D8, P_PPI0_D9, P_PPI0_D10, \
+ P_PPI0_D11, P_PPI0_D12, P_PPI0_D13, P_PPI0_D14, P_PPI0_D15, P_PPI0_D16, P_PPI0_D17, 0}
+
+#define EPPI0_24 {P_PPI0_D18, P_PPI0_D19, P_PPI0_D20, P_PPI0_D21, P_PPI0_D22, P_PPI0_D23, 0}
+
+struct bfin_bf54xfb_info {
+	struct fb_info *fb;
+	struct device *dev;
+
+	struct bfin_bf54xfb_mach_info *mach_info;
+
+	unsigned char *fb_buffer;	/* RGB Buffer */
+
+	dma_addr_t dma_handle;
+	int lq043_mmap;
+	int lq043_open_cnt;
+	int irq;
+	spinlock_t lock;	/* lock */
+};
+
+static int nocursor;
+module_param(nocursor, int, 0644);
+MODULE_PARM_DESC(nocursor, "cursor enable/disable");
+
+static int outp_rgb666;
+module_param(outp_rgb666, int, 0);
+MODULE_PARM_DESC(outp_rgb666, "Output 18-bit RGB666");
+
+#define LCD_X_RES		480	/*Horizontal Resolution */
+#define LCD_Y_RES		272	/* Vertical Resolution */
+
+#define LCD_BPP			24	/* Bit Per Pixel */
+#define	DMA_BUS_SIZE		32
+
+/* 	-- Horizontal synchronizing --
+ *
+ * Timing characteristics taken from the SHARP LQ043T1DG01 datasheet
+ * (LCY-W-06602A Page 9 of 22)
+ *
+ * Clock Frequency 	1/Tc Min 7.83 Typ 9.00 Max 9.26 MHz
+ *
+ * Period 		TH - 525 - Clock
+ * Pulse width 		THp - 41 - Clock
+ * Horizontal period 	THd - 480 - Clock
+ * Back porch 		THb - 2 - Clock
+ * Front porch 		THf - 2 - Clock
+ *
+ * -- Vertical synchronizing --
+ * Period 		TV - 286 - Line
+ * Pulse width 		TVp - 10 - Line
+ * Vertical period 	TVd - 272 - Line
+ * Back porch 		TVb - 2 - Line
+ * Front porch 		TVf - 2 - Line
+ */
+
+#define	LCD_CLK         	(8*1000*1000)	/* 8MHz */
+
+/* # active data to transfer after Horizontal Delay clock */
+#define EPPI_HCOUNT		LCD_X_RES
+
+/* # active lines to transfer after Vertical Delay clock */
+#define EPPI_VCOUNT		LCD_Y_RES
+
+/* Samples per Line = 480 (active data) + 45 (padding) */
+#define EPPI_LINE		525
+
+/* Lines per Frame = 272 (active data) + 14 (padding) */
+#define EPPI_FRAME		286
+
+/* FS1 (Hsync) Width (Typical)*/
+#define EPPI_FS1W_HBL		41
+
+/* FS1 (Hsync) Period (Typical) */
+#define EPPI_FS1P_AVPL		EPPI_LINE
+
+/* Horizontal Delay clock after assertion of Hsync (Typical) */
+#define EPPI_HDELAY		43
+
+/* FS2 (Vsync) Width    = FS1 (Hsync) Period * 10 */
+#define EPPI_FS2W_LVB		(EPPI_LINE * 10)
+
+ /* FS2 (Vsync) Period   = FS1 (Hsync) Period * Lines per Frame */
+#define EPPI_FS2P_LAVF		(EPPI_LINE * EPPI_FRAME)
+
+/* Vertical Delay after assertion of Vsync (2 Lines) */
+#define EPPI_VDELAY		12
+
+#define EPPI_CLIP		0xFF00FF00
+
+/* EPPI Control register configuration value for RGB out
+ * - EPPI as Output
+ * GP 2 frame sync mode,
+ * Internal Clock generation disabled, Internal FS generation enabled,
+ * Receives samples on EPPI_CLK raising edge, Transmits samples on EPPI_CLK falling edge,
+ * FS1 & FS2 are active high,
+ * DLEN = 6 (24 bits for RGB888 out) or 5 (18 bits for RGB666 out)
+ * DMA Unpacking disabled when RGB Formating is enabled, otherwise DMA unpacking enabled
+ * Swapping Enabled,
+ * One (DMA) Channel Mode,
+ * RGB Formatting Enabled for RGB666 output, disabled for RGB888 output
+ * Regular watermark - when FIFO is 100% full,
+ * Urgent watermark - when FIFO is 75% full
+ */
+
+#define EPPI_CONTROL		(0x20136E2E | SWAPEN)
+
+static inline u16 get_eppi_clkdiv(u32 target_ppi_clk)
+{
+	u32 sclk = get_sclk();
+
+	/* EPPI_CLK = (SCLK) / (2 * (EPPI_CLKDIV[15:0] + 1)) */
+
+	return (((sclk / target_ppi_clk) / 2) - 1);
+}
+
+static void config_ppi(struct bfin_bf54xfb_info *fbi)
+{
+
+	u16 eppi_clkdiv = get_eppi_clkdiv(LCD_CLK);
+
+	bfin_write_EPPI0_FS1W_HBL(EPPI_FS1W_HBL);
+	bfin_write_EPPI0_FS1P_AVPL(EPPI_FS1P_AVPL);
+	bfin_write_EPPI0_FS2W_LVB(EPPI_FS2W_LVB);
+	bfin_write_EPPI0_FS2P_LAVF(EPPI_FS2P_LAVF);
+	bfin_write_EPPI0_CLIP(EPPI_CLIP);
+
+	bfin_write_EPPI0_FRAME(EPPI_FRAME);
+	bfin_write_EPPI0_LINE(EPPI_LINE);
+
+	bfin_write_EPPI0_HCOUNT(EPPI_HCOUNT);
+	bfin_write_EPPI0_HDELAY(EPPI_HDELAY);
+	bfin_write_EPPI0_VCOUNT(EPPI_VCOUNT);
+	bfin_write_EPPI0_VDELAY(EPPI_VDELAY);
+
+	bfin_write_EPPI0_CLKDIV(eppi_clkdiv);
+
+/*
+ * DLEN = 6 (24 bits for RGB888 out) or 5 (18 bits for RGB666 out)
+ * RGB Formatting Enabled for RGB666 output, disabled for RGB888 output
+ */
+	if (outp_rgb666)
+		bfin_write_EPPI0_CONTROL((EPPI_CONTROL & ~DLENGTH) | DLEN_18 |
+					 RGB_FMT_EN);
+	else
+		bfin_write_EPPI0_CONTROL(((EPPI_CONTROL & ~DLENGTH) | DLEN_24) &
+					 ~RGB_FMT_EN);
+
+
+}
+
+static int config_dma(struct bfin_bf54xfb_info *fbi)
+{
+
+	set_dma_config(CH_EPPI0,
+		       set_bfin_dma_config(DIR_READ, DMA_FLOW_AUTO,
+					   INTR_DISABLE, DIMENSION_2D,
+					   DATA_SIZE_32));
+	set_dma_x_count(CH_EPPI0, (LCD_X_RES * LCD_BPP) / DMA_BUS_SIZE);
+	set_dma_x_modify(CH_EPPI0, DMA_BUS_SIZE / 8);
+	set_dma_y_count(CH_EPPI0, LCD_Y_RES);
+	set_dma_y_modify(CH_EPPI0, DMA_BUS_SIZE / 8);
+	set_dma_start_addr(CH_EPPI0, (unsigned long)fbi->fb_buffer);
+
+	return 0;
+}
+
+static int request_ports(struct bfin_bf54xfb_info *fbi)
+{
+
+	u16 eppi_req_18[] = EPPI0_18;
+	u16 disp = fbi->mach_info->disp;
+
+	if (gpio_request(disp, NULL)) {
+		printk(KERN_ERR "Requesting GPIO %d faild\n", disp);
+		return -EFAULT;
+	}
+
+	if (peripheral_request_list(eppi_req_18, DRIVER_NAME)) {
+		printk(KERN_ERR "Requesting Peripherals faild\n");
+		gpio_free(disp);
+		return -EFAULT;
+	}
+
+	if (!outp_rgb666) {
+
+		u16 eppi_req_24[] = EPPI0_24;
+
+		if (peripheral_request_list(eppi_req_24, DRIVER_NAME)) {
+			printk(KERN_ERR "Requesting Peripherals faild\n");
+			peripheral_free_list(eppi_req_18);
+			gpio_free(disp);
+			return -EFAULT;
+		}
+	}
+
+	gpio_direction_output(disp);
+	gpio_set_value(disp, 1);
+
+	return 0;
+}
+
+static void free_ports(struct bfin_bf54xfb_info *fbi)
+{
+
+	u16 eppi_req_18[] = EPPI0_18;
+
+	gpio_free(fbi->mach_info->disp);
+
+	peripheral_free_list(eppi_req_18);
+
+	if (!outp_rgb666) {
+		u16 eppi_req_24[] = EPPI0_24;
+		peripheral_free_list(eppi_req_24);
+	}
+}
+
+static int bfin_bf54x_fb_open(struct fb_info *info, int user)
+{
+	struct bfin_bf54xfb_info *fbi = info->par;
+
+	spin_lock(&fbi->lock);
+	fbi->lq043_open_cnt++;
+
+	if (fbi->lq043_open_cnt <= 1) {
+
+		bfin_write_EPPI0_CONTROL(0);
+		SSYNC();
+
+		config_dma(fbi);
+		config_ppi(fbi);
+
+		/* start dma */
+		enable_dma(CH_EPPI0);
+		bfin_write_EPPI0_CONTROL(bfin_read_EPPI0_CONTROL() | EPPI_EN);
+	}
+
+	spin_unlock(&fbi->lock);
+
+	return 0;
+}
+
+static int bfin_bf54x_fb_release(struct fb_info *info, int user)
+{
+	struct bfin_bf54xfb_info *fbi = info->par;
+
+	spin_lock(&fbi->lock);
+
+	fbi->lq043_open_cnt--;
+	fbi->lq043_mmap = 0;
+
+	if (fbi->lq043_open_cnt <= 0) {
+
+		bfin_write_EPPI0_CONTROL(0);
+		SSYNC();
+		disable_dma(CH_EPPI0);
+		memset(fbi->fb_buffer, 0, info->fix.smem_len);
+	}
+
+	spin_unlock(&fbi->lock);
+
+	return 0;
+}
+
+static int bfin_bf54x_fb_check_var(struct fb_var_screeninfo *var,
+				   struct fb_info *info)
+{
+
+	if (var->bits_per_pixel != LCD_BPP) {
+		pr_debug("%s: depth not supported: %u BPP\n", __FUNCTION__,
+			 var->bits_per_pixel);
+		return -EINVAL;
+	}
+
+	if (info->var.xres != var->xres || info->var.yres != var->yres ||
+	    info->var.xres_virtual != var->xres_virtual ||
+	    info->var.yres_virtual != var->yres_virtual) {
+		pr_debug("%s: Resolution not supported: X%u x Y%u \n",
+			 __FUNCTION__, var->xres, var->yres);
+		return -EINVAL;
+	}
+
+	/*
+	 *  Memory limit
+	 */
+
+	if ((info->fix.line_length * var->yres_virtual) > info->fix.smem_len) {
+		pr_debug("%s: Memory Limit requested yres_virtual = %u\n",
+			 __FUNCTION__, var->yres_virtual);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int bfin_bf54x_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+
+	struct bfin_bf54xfb_info *fbi = info->par;
+
+	if (fbi->lq043_mmap)
+		return -1;
+
+	spin_lock(&fbi->lock);
+	fbi->lq043_mmap = 1;
+	spin_unlock(&fbi->lock);
+
+	vma->vm_start = (unsigned long)(fbi->fb_buffer);
+
+	vma->vm_end = vma->vm_start + info->fix.smem_len;
+	/* For those who don't understand how mmap works, go read
+	 *   Documentation/nommu-mmap.txt.
+	 * For those that do, you will know that the VM_MAYSHARE flag
+	 * must be set in the vma->vm_flags structure on noMMU
+	 *   Other flags can be set, and are documented in
+	 *   include/linux/mm.h
+	 */
+	vma->vm_flags |= VM_MAYSHARE;
+
+	return 0;
+}
+
+int bfin_bf54x_fb_cursor(struct fb_info *info, struct fb_cursor *cursor)
+{
+	if (nocursor)
+		return 0;
+	else
+		return -EINVAL;	/* just to force soft_cursor() call */
+}
+
+static int bfin_bf54x_fb_setcolreg(u_int regno, u_int red, u_int green,
+				   u_int blue, u_int transp,
+				   struct fb_info *info)
+{
+	if (regno >= BFIN_LCD_NBR_PALETTE_ENTRIES)
+		return -EINVAL;
+
+	if (info->var.grayscale) {
+		/* grayscale = 0.30*R + 0.59*G + 0.11*B */
+		red = green = blue = (red * 77 + green * 151 + blue * 28) >> 8;
+	}
+
+	if (info->fix.visual == FB_VISUAL_TRUECOLOR) {
+
+		u32 value;
+		/* Place color in the pseudopalette */
+		if (regno > 16)
+			return -EINVAL;
+
+		red >>= (16 - info->var.red.length);
+		green >>= (16 - info->var.green.length);
+		blue >>= (16 - info->var.blue.length);
+
+		value = (red << info->var.red.offset) |
+		    (green << info->var.green.offset) |
+		    (blue << info->var.blue.offset);
+		value &= 0xFFFFFF;
+
+		((u32 *) (info->pseudo_palette))[regno] = value;
+
+	}
+
+	return 0;
+}
+
+static struct fb_ops bfin_bf54x_fb_ops = {
+	.owner = THIS_MODULE,
+	.fb_open = bfin_bf54x_fb_open,
+	.fb_release = bfin_bf54x_fb_release,
+	.fb_check_var = bfin_bf54x_fb_check_var,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+	.fb_mmap = bfin_bf54x_fb_mmap,
+	.fb_cursor = bfin_bf54x_fb_cursor,
+	.fb_setcolreg = bfin_bf54x_fb_setcolreg,
+};
+
+#ifndef NO_BL_SUPPORT
+static int bl_get_brightness(struct backlight_device *bd)
+{
+	return 0;
+}
+
+static struct backlight_ops bfin_lq043fb_bl_ops = {
+	.get_brightness = bl_get_brightness,
+};
+
+static struct backlight_device *bl_dev;
+
+static int bfin_lcd_get_power(struct lcd_device *dev)
+{
+	return 0;
+}
+
+static int bfin_lcd_set_power(struct lcd_device *dev, int power)
+{
+	return 0;
+}
+
+static int bfin_lcd_get_contrast(struct lcd_device *dev)
+{
+	return 0;
+}
+
+static int bfin_lcd_set_contrast(struct lcd_device *dev, int contrast)
+{
+
+	return 0;
+}
+
+static int bfin_lcd_check_fb(struct fb_info *fi)
+{
+	if (!fi || (fi == &bfin_bf54x_fb))
+		return 1;
+	return 0;
+}
+
+static struct lcd_ops bfin_lcd_ops = {
+	.get_power = bfin_lcd_get_power,
+	.set_power = bfin_lcd_set_power,
+	.get_contrast = bfin_lcd_get_contrast,
+	.set_contrast = bfin_lcd_set_contrast,
+	.check_fb = bfin_lcd_check_fb,
+};
+
+static struct lcd_device *lcd_dev;
+#endif
+
+static irqreturn_t bfin_bf54x_irq_error(int irq, void *dev_id)
+{
+
+	/*struct bfin_bf54xfb_info *info = (struct bfin_bf54xfb_info *)dev_id;*/
+
+	u16 status = bfin_read_EPPI0_STATUS();
+
+	bfin_write_EPPI0_STATUS(0xFFFF);
+
+	if (status) {
+		bfin_write_EPPI0_CONTROL(bfin_read_EPPI0_CONTROL() & ~EPPI_EN);
+		disable_dma(CH_EPPI0);
+
+		/* start dma */
+		enable_dma(CH_EPPI0);
+		bfin_write_EPPI0_CONTROL(bfin_read_EPPI0_CONTROL() | EPPI_EN);
+		bfin_write_EPPI0_STATUS(0xFFFF);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int __init bfin_bf54x_probe(struct platform_device *pdev)
+{
+	struct bfin_bf54xfb_info *info;
+	struct fb_info *fbinfo;
+	int ret;
+
+	printk(KERN_INFO DRIVER_NAME ": FrameBuffer initializing...\n");
+
+	if (request_dma(CH_EPPI0, "CH_EPPI0") < 0) {
+		printk(KERN_ERR DRIVER_NAME
+		       ": couldn't request CH_EPPI0 DMA\n");
+		ret = -EFAULT;
+		goto out1;
+	}
+
+	fbinfo =
+	    framebuffer_alloc(sizeof(struct bfin_bf54xfb_info), &pdev->dev);
+	if (!fbinfo) {
+		ret = -ENOMEM;
+		goto out2;
+	}
+
+	info = fbinfo->par;
+	info->fb = fbinfo;
+	info->dev = &pdev->dev;
+
+	platform_set_drvdata(pdev, fbinfo);
+
+	strcpy(fbinfo->fix.id, driver_name);
+
+	info->mach_info = pdev->dev.platform_data;
+
+	if (info->mach_info == NULL) {
+		dev_err(&pdev->dev,
+			"no platform data for lcd, cannot attach\n");
+		ret = -EINVAL;
+		goto out3;
+	}
+
+	fbinfo->fix.type = FB_TYPE_PACKED_PIXELS;
+	fbinfo->fix.type_aux = 0;
+	fbinfo->fix.xpanstep = 0;
+	fbinfo->fix.ypanstep = 0;
+	fbinfo->fix.ywrapstep = 0;
+	fbinfo->fix.accel = FB_ACCEL_NONE;
+	fbinfo->fix.visual = FB_VISUAL_TRUECOLOR;
+
+	fbinfo->var.nonstd = 0;
+	fbinfo->var.activate = FB_ACTIVATE_NOW;
+	fbinfo->var.height = info->mach_info->height;
+	fbinfo->var.width = info->mach_info->width;
+	fbinfo->var.accel_flags = 0;
+	fbinfo->var.vmode = FB_VMODE_NONINTERLACED;
+
+	fbinfo->fbops = &bfin_bf54x_fb_ops;
+	fbinfo->flags = FBINFO_FLAG_DEFAULT;
+
+	fbinfo->var.xres = info->mach_info->xres.defval;
+	fbinfo->var.xres_virtual = info->mach_info->xres.defval;
+	fbinfo->var.yres = info->mach_info->yres.defval;
+	fbinfo->var.yres_virtual = info->mach_info->yres.defval;
+	fbinfo->var.bits_per_pixel = info->mach_info->bpp.defval;
+
+	fbinfo->var.upper_margin = 0;
+	fbinfo->var.lower_margin = 0;
+	fbinfo->var.vsync_len = 0;
+
+	fbinfo->var.left_margin = 0;
+	fbinfo->var.right_margin = 0;
+	fbinfo->var.hsync_len = 0;
+
+	fbinfo->var.red.offset = 16;
+	fbinfo->var.green.offset = 8;
+	fbinfo->var.blue.offset = 0;
+	fbinfo->var.transp.offset = 0;
+	fbinfo->var.red.length = 8;
+	fbinfo->var.green.length = 8;
+	fbinfo->var.blue.length = 8;
+	fbinfo->var.transp.length = 0;
+	fbinfo->fix.smem_len = info->mach_info->xres.max *
+	    info->mach_info->yres.max * info->mach_info->bpp.max / 8;
+
+	fbinfo->fix.line_length = fbinfo->var.xres_virtual *
+	    fbinfo->var.bits_per_pixel / 8;
+
+	info->fb_buffer =
+	    dma_alloc_coherent(NULL, fbinfo->fix.smem_len, &info->dma_handle,
+			       GFP_KERNEL);
+
+	if (NULL == info->fb_buffer) {
+		printk(KERN_ERR DRIVER_NAME
+		       ": couldn't allocate dma buffer.\n");
+		ret = -ENOMEM;
+		goto out3;
+	}
+
+	memset(info->fb_buffer, 0, fbinfo->fix.smem_len);
+
+	fbinfo->screen_base = (void *)info->fb_buffer;
+	fbinfo->fix.smem_start = (int)info->fb_buffer;
+
+	fbinfo->fbops = &bfin_bf54x_fb_ops;
+
+	fbinfo->pseudo_palette = kmalloc(sizeof(u32) * 16, GFP_KERNEL);
+	if (!fbinfo->pseudo_palette) {
+		printk(KERN_ERR DRIVER_NAME
+		       "Fail to allocate pseudo_palette\n");
+
+		ret = -ENOMEM;
+		goto out4;
+	}
+
+	memset(fbinfo->pseudo_palette, 0, sizeof(u32) * 16);
+
+	if (fb_alloc_cmap(&fbinfo->cmap, BFIN_LCD_NBR_PALETTE_ENTRIES, 0)
+	    < 0) {
+		printk(KERN_ERR DRIVER_NAME
+		       "Fail to allocate colormap (%d entries)\n",
+		       BFIN_LCD_NBR_PALETTE_ENTRIES);
+		ret = -EFAULT;
+		goto out5;
+	}
+
+	if (request_ports(info)) {
+		printk(KERN_ERR DRIVER_NAME ": couldn't request gpio port.\n");
+		ret = -EFAULT;
+		goto out6;
+	}
+
+	info->irq = platform_get_irq(pdev, 0);
+	if (info->irq < 0) {
+		ret = -EINVAL;
+		goto out7;
+	}
+
+	if (request_irq(info->irq, (void *)bfin_bf54x_irq_error, IRQF_DISABLED,
+			"PPI ERROR", info) < 0) {
+		printk(KERN_ERR DRIVER_NAME
+		       ": unable to request PPI ERROR IRQ\n");
+		ret = -EFAULT;
+		goto out7;
+	}
+
+	if (register_framebuffer(fbinfo) < 0) {
+		printk(KERN_ERR DRIVER_NAME
+		       ": unable to register framebuffer.\n");
+		ret = -EINVAL;
+		goto out8;
+	}
+#ifndef NO_BL_SUPPORT
+	bl_dev =
+	    backlight_device_register("bf54x-bl", NULL, NULL,
+				      &bfin_lq043fb_bl_ops);
+	bl_dev->props.max_brightness = 255;
+
+	lcd_dev = lcd_device_register(DRIVER_NAME, NULL, &bfin_lcd_ops);
+	lcd_dev->props.max_contrast = 255, printk(KERN_INFO "Done.\n");
+#endif
+
+	return 0;
+
+out8:
+	free_irq(info->irq, info);
+out7:
+	free_ports(info);
+out6:
+	fb_dealloc_cmap(&fbinfo->cmap);
+out5:
+	kfree(fbinfo->pseudo_palette);
+out4:
+	dma_free_coherent(NULL, fbinfo->fix.smem_len, info->fb_buffer,
+			  info->dma_handle);
+out3:
+	framebuffer_release(fbinfo);
+out2:
+	free_dma(CH_EPPI0);
+out1:
+	platform_set_drvdata(pdev, NULL);
+
+	return ret;
+}
+
+static int bfin_bf54x_remove(struct platform_device *pdev)
+{
+
+	struct fb_info *fbinfo = platform_get_drvdata(pdev);
+	struct bfin_bf54xfb_info *info = fbinfo->par;
+
+	free_dma(CH_EPPI0);
+	free_irq(info->irq, info);
+
+	if (info->fb_buffer != NULL)
+		dma_free_coherent(NULL, fbinfo->fix.smem_len, info->fb_buffer,
+				  info->dma_handle);
+
+	kfree(fbinfo->pseudo_palette);
+	fb_dealloc_cmap(&fbinfo->cmap);
+
+#ifndef NO_BL_SUPPORT
+	lcd_device_unregister(lcd_dev);
+	backlight_device_unregister(bl_dev);
+#endif
+
+	unregister_framebuffer(fbinfo);
+
+	free_ports(info);
+
+	printk(KERN_INFO DRIVER_NAME ": Unregister LCD driver.\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int bfin_bf54x_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct fb_info *fbinfo = platform_get_drvdata(pdev);
+	struct bfin_bf54xfb_info *info = fbinfo->par;
+
+	bfin_write_EPPI0_CONTROL(bfin_read_EPPI0_CONTROL() & ~EPPI_EN);
+	disable_dma(CH_EPPI0);
+	bfin_write_EPPI0_STATUS(0xFFFF);
+
+	return 0;
+}
+
+static int bfin_bf54x_resume(struct platform_device *pdev)
+{
+	struct fb_info *fbinfo = platform_get_drvdata(pdev);
+	struct bfin_bf54xfb_info *info = fbinfo->par;
+
+	enable_dma(CH_EPPI0);
+	bfin_write_EPPI0_CONTROL(bfin_read_EPPI0_CONTROL() | EPPI_EN);
+
+	return 0;
+}
+#else
+#define bfin_bf54x_suspend	NULL
+#define bfin_bf54x_resume	NULL
+#endif
+
+static struct platform_driver bfin_bf54x_driver = {
+	.probe = bfin_bf54x_probe,
+	.remove = bfin_bf54x_remove,
+	.suspend = bfin_bf54x_suspend,
+	.resume = bfin_bf54x_resume,
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .owner = THIS_MODULE,
+		   },
+};
+
+static int __devinit bfin_bf54x_driver_init(void)
+{
+	return platform_driver_register(&bfin_bf54x_driver);
+}
+
+static void __exit bfin_bf54x_driver_cleanup(void)
+{
+	platform_driver_unregister(&bfin_bf54x_driver);
+}
+
+MODULE_DESCRIPTION("Blackfin BF54x TFT LCD Driver");
+MODULE_LICENSE("GPL");
+
+module_init(bfin_bf54x_driver_init);
+module_exit(bfin_bf54x_driver_cleanup);
diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
index 032210f45be..b07e419b12d 100644
--- a/drivers/video/cfbcopyarea.c
+++ b/drivers/video/cfbcopyarea.c
@@ -45,14 +45,14 @@
 
 static void
 bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src,
-	int src_idx, int bits, unsigned n)
+	int src_idx, int bits, unsigned n, u32 bswapmask)
 {
 	unsigned long first, last;
 	int const shift = dst_idx-src_idx;
 	int left, right;
 
-	first = FB_SHIFT_HIGH(~0UL, dst_idx);
-	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+	first = fb_shifted_pixels_mask_long(dst_idx, bswapmask);
+	last = ~fb_shifted_pixels_mask_long((dst_idx+n) % bits, bswapmask);
 
 	if (!shift) {
 		// Same alignment for source and dest
@@ -94,29 +94,34 @@ bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src
 				FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
 		}
 	} else {
+		/* Different alignment for source and dest */
 		unsigned long d0, d1;
 		int m;
-		// Different alignment for source and dest
 
 		right = shift & (bits - 1);
 		left = -shift & (bits - 1);
+		bswapmask &= shift;
 
 		if (dst_idx+n <= bits) {
 			// Single destination word
 			if (last)
 				first &= last;
+			d0 = FB_READL(src);
+			d0 = fb_rev_pixels_in_long(d0, bswapmask);
 			if (shift > 0) {
 				// Single source word
-				FB_WRITEL( comp( FB_READL(src) >> right, FB_READL(dst), first), dst);
+				d0 >>= right;
 			} else if (src_idx+n <= bits) {
 				// Single source word
-				FB_WRITEL( comp(FB_READL(src) << left, FB_READL(dst), first), dst);
+				d0 <<= left;;
 			} else {
 				// 2 source words
-				d0 = FB_READL(src++);
-				d1 = FB_READL(src);
-				FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), first), dst);
+				d1 = FB_READL(src + 1);
+				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+				d0 = d0<<left | d1>>right;
 			}
+			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
 		} else {
 			// Multiple destination words
 			/** We must always remember the last value read, because in case
@@ -125,25 +130,31 @@ bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src
 			overlap with the current long from SRC. We store this value in
 			'd0'. */
 			d0 = FB_READL(src++);
+			d0 = fb_rev_pixels_in_long(d0, bswapmask);
 			// Leading bits
 			if (shift > 0) {
 				// Single source word
-				FB_WRITEL( comp(d0 >> right, FB_READL(dst), first), dst);
+				d1 = d0;
+				d0 >>= right;
 				dst++;
 				n -= bits - dst_idx;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src++);
-				FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), first), dst);
-				d0 = d1;
+				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+
+				d0 = d0<<left | d1>>right;
 				dst++;
 				n -= bits - dst_idx;
 			}
+			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+			d0 = d1;
 
 			// Main chunk
 			m = n % bits;
 			n /= bits;
-			while (n >= 4) {
+			while ((n >= 4) && !bswapmask) {
 				d1 = FB_READL(src++);
 				FB_WRITEL(d0 << left | d1 >> right, dst++);
 				d0 = d1;
@@ -160,7 +171,10 @@ bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src
 			}
 			while (n--) {
 				d1 = FB_READL(src++);
-				FB_WRITEL(d0 << left | d1 >> right, dst++);
+				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+				d0 = d0 << left | d1 >> right;
+				d0 = fb_rev_pixels_in_long(d0, bswapmask);
+				FB_WRITEL(d0, dst++);
 				d0 = d1;
 			}
 
@@ -168,12 +182,16 @@ bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src
 			if (last) {
 				if (m <= right) {
 					// Single source word
-					FB_WRITEL( comp(d0 << left, FB_READL(dst), last), dst);
+					d0 <<= left;
 				} else {
 					// 2 source words
 					d1 = FB_READL(src);
-					FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), last), dst);
+					d1 = fb_rev_pixels_in_long(d1,
+								bswapmask);
+					d0 = d0<<left | d1>>right;
 				}
+				d0 = fb_rev_pixels_in_long(d0, bswapmask);
+				FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
 			}
 		}
 	}
@@ -185,7 +203,7 @@ bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src
 
 static void
 bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src,
-		int src_idx, int bits, unsigned n)
+		int src_idx, int bits, unsigned n, u32 bswapmask)
 {
 	unsigned long first, last;
 	int shift;
@@ -203,8 +221,8 @@ bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem
 
 	shift = dst_idx-src_idx;
 
-	first = FB_SHIFT_LOW(~0UL, bits - 1 - dst_idx);
-	last = ~(FB_SHIFT_LOW(~0UL, bits - 1 - ((dst_idx-n) % bits)));
+	first = fb_shifted_pixels_mask_long(bits - 1 - dst_idx, bswapmask);
+	last = ~fb_shifted_pixels_mask_long(bits - 1 - ((dst_idx-n) % bits), bswapmask);
 
 	if (!shift) {
 		// Same alignment for source and dest
@@ -247,24 +265,32 @@ bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem
 		}
 	} else {
 		// Different alignment for source and dest
+		unsigned long d0, d1;
+		int m;
 
 		int const left = -shift & (bits-1);
 		int const right = shift & (bits-1);
+		bswapmask &= shift;
 
 		if ((unsigned long)dst_idx+1 >= n) {
 			// Single destination word
 			if (last)
 				first &= last;
+			d0 = FB_READL(src);
 			if (shift < 0) {
 				// Single source word
-				FB_WRITEL( comp( FB_READL(src)<<left, FB_READL(dst), first), dst);
+				d0 <<= left;
 			} else if (1+(unsigned long)src_idx >= n) {
 				// Single source word
-				FB_WRITEL( comp( FB_READL(src)>>right, FB_READL(dst), first), dst);
+				d0 >>= right;
 			} else {
 				// 2 source words
-				FB_WRITEL( comp( (FB_READL(src)>>right | FB_READL(src-1)<<left), FB_READL(dst), first), dst);
+				d1 = FB_READL(src - 1);
+				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+				d0 = d0>>right | d1<<left;
 			}
+			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
 		} else {
 			// Multiple destination words
 			/** We must always remember the last value read, because in case
@@ -272,27 +298,30 @@ bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem
 			1bpp), we always collect one full long for DST and that might
 			overlap with the current long from SRC. We store this value in
 			'd0'. */
-			unsigned long d0, d1;
-			int m;
 
 			d0 = FB_READL(src--);
+			d0 = fb_rev_pixels_in_long(d0, bswapmask);
 			// Leading bits
 			if (shift < 0) {
 				// Single source word
-				FB_WRITEL( comp( (d0 << left), FB_READL(dst), first), dst);
+				d1 = d0;
+				d0 <<= left;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src--);
-				FB_WRITEL( comp( (d0>>right | d1<<left), FB_READL(dst), first), dst);
-				d0 = d1;
+				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+				d0 = d0>>right | d1<<left;
 			}
+			d0 = fb_rev_pixels_in_long(d0, bswapmask);
+			FB_WRITEL(comp(d0, FB_READL(dst), first), dst);
+			d0 = d1;
 			dst--;
 			n -= dst_idx+1;
 
 			// Main chunk
 			m = n % bits;
 			n /= bits;
-			while (n >= 4) {
+			while ((n >= 4) && !bswapmask) {
 				d1 = FB_READL(src--);
 				FB_WRITEL(d0 >> right | d1 << left, dst--);
 				d0 = d1;
@@ -309,7 +338,10 @@ bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem
 			}
 			while (n--) {
 				d1 = FB_READL(src--);
-				FB_WRITEL(d0 >> right | d1 << left, dst--);
+				d1 = fb_rev_pixels_in_long(d1, bswapmask);
+				d0 = d0 >> right | d1 << left;
+				d0 = fb_rev_pixels_in_long(d0, bswapmask);
+				FB_WRITEL(d0, dst--);
 				d0 = d1;
 			}
 
@@ -317,12 +349,16 @@ bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem
 			if (last) {
 				if (m <= left) {
 					// Single source word
-					FB_WRITEL( comp(d0 >> right, FB_READL(dst), last), dst);
+					d0 >>= right;
 				} else {
 					// 2 source words
 					d1 = FB_READL(src);
-					FB_WRITEL( comp(d0>>right | d1<<left, FB_READL(dst), last), dst);
+					d1 = fb_rev_pixels_in_long(d1,
+								bswapmask);
+					d0 = d0>>right | d1<<left;
 				}
+				d0 = fb_rev_pixels_in_long(d0, bswapmask);
+				FB_WRITEL(comp(d0, FB_READL(dst), last), dst);
 			}
 		}
 	}
@@ -336,6 +372,7 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 	unsigned long __iomem *dst = NULL, *src = NULL;
 	int bits = BITS_PER_LONG, bytes = bits >> 3;
 	int dst_idx = 0, src_idx = 0, rev_copy = 0;
+	u32 bswapmask = fb_compute_bswapmask(p);
 
 	if (p->state != FBINFO_STATE_RUNNING)
 		return;
@@ -368,7 +405,7 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 			src += src_idx >> (ffs(bits) - 1);
 			src_idx &= (bytes - 1);
 			bitcpy_rev(dst, dst_idx, src, src_idx, bits,
-				width*p->var.bits_per_pixel);
+				width*p->var.bits_per_pixel, bswapmask);
 		}
 	} else {
 		while (height--) {
@@ -377,7 +414,7 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 			src += src_idx >> (ffs(bits) - 1);
 			src_idx &= (bytes - 1);
 			bitcpy(dst, dst_idx, src, src_idx, bits,
-				width*p->var.bits_per_pixel);
+				width*p->var.bits_per_pixel, bswapmask);
 			dst_idx += bits_per_line;
 			src_idx += bits_per_line;
 		}
diff --git a/drivers/video/cfbfillrect.c b/drivers/video/cfbfillrect.c
index 71623b4f8ca..23d70a12e4d 100644
--- a/drivers/video/cfbfillrect.c
+++ b/drivers/video/cfbfillrect.c
@@ -36,15 +36,16 @@
      */
 
 static void
-bitfill_aligned(unsigned long __iomem *dst, int dst_idx, unsigned long pat, unsigned n, int bits)
+bitfill_aligned(unsigned long __iomem *dst, int dst_idx, unsigned long pat,
+		unsigned n, int bits, u32 bswapmask)
 {
 	unsigned long first, last;
 
 	if (!n)
 		return;
 
-	first = FB_SHIFT_HIGH(~0UL, dst_idx);
-	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+	first = fb_shifted_pixels_mask_long(dst_idx, bswapmask);
+	last = ~fb_shifted_pixels_mask_long((dst_idx+n) % bits, bswapmask);
 
 	if (dst_idx+n <= bits) {
 		// Single word
@@ -146,7 +147,8 @@ bitfill_unaligned(unsigned long __iomem *dst, int dst_idx, unsigned long pat,
      *  Aligned pattern invert using 32/64-bit memory accesses
      */
 static void
-bitfill_aligned_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat, unsigned n, int bits)
+bitfill_aligned_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat,
+		unsigned n, int bits, u32 bswapmask)
 {
 	unsigned long val = pat, dat;
 	unsigned long first, last;
@@ -154,8 +156,8 @@ bitfill_aligned_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat,
 	if (!n)
 		return;
 
-	first = FB_SHIFT_HIGH(~0UL, dst_idx);
-	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
+	first = fb_shifted_pixels_mask_long(dst_idx, bswapmask);
+	last = ~fb_shifted_pixels_mask_long((dst_idx+n) % bits, bswapmask);
 
 	if (dst_idx+n <= bits) {
 		// Single word
@@ -303,8 +305,10 @@ void cfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
 	if (p->fbops->fb_sync)
 		p->fbops->fb_sync(p);
 	if (!left) {
+		u32 bswapmask = fb_compute_bswapmask(p);
 		void (*fill_op32)(unsigned long __iomem *dst, int dst_idx,
-		                  unsigned long pat, unsigned n, int bits) = NULL;
+		                  unsigned long pat, unsigned n, int bits,
+				  u32 bswapmask) = NULL;
 
 		switch (rect->rop) {
 		case ROP_XOR:
@@ -321,7 +325,7 @@ void cfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
 		while (height--) {
 			dst += dst_idx >> (ffs(bits) - 1);
 			dst_idx &= (bits - 1);
-			fill_op32(dst, dst_idx, pat, width*bpp, bits);
+			fill_op32(dst, dst_idx, pat, width*bpp, bits, bswapmask);
 			dst_idx += p->fix.line_length*8;
 		}
 	} else {
diff --git a/drivers/video/cfbimgblt.c b/drivers/video/cfbimgblt.c
index 261004473c8..f598907b42a 100644
--- a/drivers/video/cfbimgblt.c
+++ b/drivers/video/cfbimgblt.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/fb.h>
 #include <asm/types.h>
+#include "fb_draw.h"
 
 #define DEBUG
 
@@ -87,6 +88,7 @@ static inline void color_imageblit(const struct fb_image *image,
 	u32 null_bits = 32 - bpp;
 	u32 *palette = (u32 *) p->pseudo_palette;
 	const u8 *src = image->data;
+	u32 bswapmask = fb_compute_bswapmask(p);
 
 	dst2 = (u32 __iomem *) dst1;
 	for (i = image->height; i--; ) {
@@ -96,7 +98,7 @@ static inline void color_imageblit(const struct fb_image *image,
 		val = 0;
 		
 		if (start_index) {
-			u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0, start_index));
+			u32 start_mask = ~fb_shifted_pixels_mask_u32(start_index, bswapmask);
 			val = FB_READL(dst) & start_mask;
 			shift = start_index;
 		}
@@ -107,7 +109,7 @@ static inline void color_imageblit(const struct fb_image *image,
 			else
 				color = *src;
 			color <<= FB_LEFT_POS(bpp);
-			val |= FB_SHIFT_HIGH(color, shift);
+			val |= FB_SHIFT_HIGH(color, shift ^ bswapmask);
 			if (shift >= null_bits) {
 				FB_WRITEL(val, dst++);
 	
@@ -119,7 +121,7 @@ static inline void color_imageblit(const struct fb_image *image,
 			src++;
 		}
 		if (shift) {
-			u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
+			u32 end_mask = fb_shifted_pixels_mask_u32(shift, bswapmask);
 
 			FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
 		}
@@ -147,7 +149,8 @@ static inline void slow_imageblit(const struct fb_image *image, struct fb_info *
 	u32 spitch = (image->width+7)/8;
 	const u8 *src = image->data, *s;
 	u32 i, j, l;
-	
+	u32 bswapmask = fb_compute_bswapmask(p);
+
 	dst2 = (u32 __iomem *) dst1;
 	fgcolor <<= FB_LEFT_POS(bpp);
 	bgcolor <<= FB_LEFT_POS(bpp);
@@ -161,7 +164,7 @@ static inline void slow_imageblit(const struct fb_image *image, struct fb_info *
 
 		/* write leading bits */
 		if (start_index) {
-			u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0,start_index));
+			u32 start_mask = ~fb_shifted_pixels_mask_u32(start_index, bswapmask);
 			val = FB_READL(dst) & start_mask;
 			shift = start_index;
 		}
@@ -169,7 +172,7 @@ static inline void slow_imageblit(const struct fb_image *image, struct fb_info *
 		while (j--) {
 			l--;
 			color = (*s & (1 << l)) ? fgcolor : bgcolor;
-			val |= FB_SHIFT_HIGH(color, shift);
+			val |= FB_SHIFT_HIGH(color, shift ^ bswapmask);
 			
 			/* Did the bitshift spill bits to the next long? */
 			if (shift >= null_bits) {
@@ -184,7 +187,7 @@ static inline void slow_imageblit(const struct fb_image *image, struct fb_info *
 
 		/* write trailing bits */
  		if (shift) {
-			u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
+			u32 end_mask = fb_shifted_pixels_mask_u32(shift, bswapmask);
 
 			FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
 		}
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index 8269d704ab2..ce22bf5de35 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -45,7 +45,6 @@
 #include <linux/delay.h>
 #include <linux/fb.h>
 #include <linux/init.h>
-#include <linux/selection.h>
 #include <asm/pgtable.h>
 
 #ifdef CONFIG_ZORRO
@@ -59,14 +58,13 @@
 #endif
 #ifdef CONFIG_PPC_PREP
 #include <asm/machdep.h>
-#define isPReP (machine_is(prep))
+#define isPReP machine_is(prep)
 #else
 #define isPReP 0
 #endif
 
-#include "video/vga.h"
-#include "video/cirrus.h"
-
+#include <video/vga.h>
+#include <video/cirrus.h>
 
 /*****************************************************************
  *
@@ -82,7 +80,8 @@
 
 /* debug output */
 #ifdef CIRRUSFB_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args)
+#define DPRINTK(fmt, args...) \
+	printk(KERN_DEBUG "%s: " fmt, __FUNCTION__ , ## args)
 #else
 #define DPRINTK(fmt, args...)
 #endif
@@ -90,19 +89,15 @@
 /* debugging assertions */
 #ifndef CIRRUSFB_NDEBUG
 #define assert(expr) \
-        if(!(expr)) { \
-        printk( "Assertion failed! %s,%s,%s,line=%d\n",\
-        #expr,__FILE__,__FUNCTION__,__LINE__); \
-        }
+	if (!(expr)) { \
+		printk("Assertion failed! %s,%s,%s,line=%d\n", \
+		#expr, __FILE__, __FUNCTION__, __LINE__); \
+	}
 #else
 #define assert(expr)
 #endif
 
-#define MB_ (1024*1024)
-#define KB_ (1024)
-
-#define MAX_NUM_BOARDS 7
-
+#define MB_ (1024 * 1024)
 
 /*****************************************************************
  *
@@ -111,7 +106,7 @@
  */
 
 /* board types */
-typedef enum {
+enum cirrus_board {
 	BT_NONE = 0,
 	BT_SD64,
 	BT_PICCOLO,
@@ -121,13 +116,12 @@ typedef enum {
 	BT_ALPINE,	/* GD543x/4x */
 	BT_GD5480,
 	BT_LAGUNA,	/* GD546x */
-} cirrusfb_board_t;
-
+};
 
 /*
  * per-board-type information, used for enumerating and abstracting
  * chip-specific information
- * NOTE: MUST be in the same order as cirrusfb_board_t in order to
+ * NOTE: MUST be in the same order as enum cirrus_board in order to
  * use direct indexing on this array
  * NOTE: '__initdata' cannot be used as some of this info
  * is required at runtime.  Maybe separate into an init-only and
@@ -139,7 +133,8 @@ static const struct cirrusfb_board_info_rec {
 	/* for  1/4bpp, 8bpp 15/16bpp, 24bpp, 32bpp - numbers from xorg code */
 	bool init_sr07 : 1; /* init SR07 during init_vgachip() */
 	bool init_sr1f : 1; /* write SR1F during init_vgachip() */
-	bool scrn_start_bit19 : 1; /* construct bit 19 of screen start address */
+	/* construct bit 19 of screen start address */
+	bool scrn_start_bit19 : 1;
 
 	/* initial SR07 value, then for each mode */
 	unsigned char sr07;
@@ -261,30 +256,28 @@ static const struct cirrusfb_board_info_rec {
 	}
 };
 
-
 #ifdef CONFIG_PCI
 #define CHIP(id, btype) \
 	{ PCI_VENDOR_ID_CIRRUS, id, PCI_ANY_ID, PCI_ANY_ID, 0, 0, (btype) }
 
 static struct pci_device_id cirrusfb_pci_table[] = {
-	CHIP( PCI_DEVICE_ID_CIRRUS_5436, BT_ALPINE ),
-	CHIP( PCI_DEVICE_ID_CIRRUS_5434_8, BT_ALPINE ),
-	CHIP( PCI_DEVICE_ID_CIRRUS_5434_4, BT_ALPINE ),
-	CHIP( PCI_DEVICE_ID_CIRRUS_5430, BT_ALPINE ), /* GD-5440 is same id */
-	CHIP( PCI_DEVICE_ID_CIRRUS_7543, BT_ALPINE ),
-	CHIP( PCI_DEVICE_ID_CIRRUS_7548, BT_ALPINE ),
-	CHIP( PCI_DEVICE_ID_CIRRUS_5480, BT_GD5480 ), /* MacPicasso likely */
-	CHIP( PCI_DEVICE_ID_CIRRUS_5446, BT_PICASSO4 ), /* Picasso 4 is 5446 */
-	CHIP( PCI_DEVICE_ID_CIRRUS_5462, BT_LAGUNA ), /* CL Laguna */
-	CHIP( PCI_DEVICE_ID_CIRRUS_5464, BT_LAGUNA ), /* CL Laguna 3D */
-	CHIP( PCI_DEVICE_ID_CIRRUS_5465, BT_LAGUNA ), /* CL Laguna 3DA*/
+	CHIP(PCI_DEVICE_ID_CIRRUS_5436, BT_ALPINE),
+	CHIP(PCI_DEVICE_ID_CIRRUS_5434_8, BT_ALPINE),
+	CHIP(PCI_DEVICE_ID_CIRRUS_5434_4, BT_ALPINE),
+	CHIP(PCI_DEVICE_ID_CIRRUS_5430, BT_ALPINE), /* GD-5440 is same id */
+	CHIP(PCI_DEVICE_ID_CIRRUS_7543, BT_ALPINE),
+	CHIP(PCI_DEVICE_ID_CIRRUS_7548, BT_ALPINE),
+	CHIP(PCI_DEVICE_ID_CIRRUS_5480, BT_GD5480), /* MacPicasso likely */
+	CHIP(PCI_DEVICE_ID_CIRRUS_5446, BT_PICASSO4), /* Picasso 4 is 5446 */
+	CHIP(PCI_DEVICE_ID_CIRRUS_5462, BT_LAGUNA), /* CL Laguna */
+	CHIP(PCI_DEVICE_ID_CIRRUS_5464, BT_LAGUNA), /* CL Laguna 3D */
+	CHIP(PCI_DEVICE_ID_CIRRUS_5465, BT_LAGUNA), /* CL Laguna 3DA*/
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, cirrusfb_pci_table);
 #undef CHIP
 #endif /* CONFIG_PCI */
 
-
 #ifdef CONFIG_ZORRO
 static const struct zorro_device_id cirrusfb_zorro_table[] = {
 	{
@@ -294,7 +287,7 @@ static const struct zorro_device_id cirrusfb_zorro_table[] = {
 		.id		= ZORRO_PROD_HELFRICH_PICCOLO_RAM,
 		.driver_data	= BT_PICCOLO,
 	}, {
-		.id		= ZORRO_PROD_VILLAGE_TRONIC_PICASSO_II_II_PLUS_RAM,
+		.id	= ZORRO_PROD_VILLAGE_TRONIC_PICASSO_II_II_PLUS_RAM,
 		.driver_data	= BT_PICASSO,
 	}, {
 		.id		= ZORRO_PROD_GVP_EGS_28_24_SPECTRUM_RAM,
@@ -333,12 +326,7 @@ static const struct {
 };
 #endif /* CONFIG_ZORRO */
 
-
 struct cirrusfb_regs {
-	__u32 line_length;	/* in BYTES! */
-	__u32 visual;
-	__u32 type;
-
 	long freq;
 	long nom;
 	long den;
@@ -364,37 +352,23 @@ struct cirrusfb_regs {
 	long VertBlankEnd;
 };
 
-
-
 #ifdef CIRRUSFB_DEBUG
-typedef enum {
-        CRT,
-        SEQ
-} cirrusfb_dbg_reg_class_t;
-#endif                          /* CIRRUSFB_DEBUG */
-
-
-
+enum cirrusfb_dbg_reg_class {
+	CRT,
+	SEQ
+};
+#endif		/* CIRRUSFB_DEBUG */
 
 /* info about board */
 struct cirrusfb_info {
-	struct fb_info *info;
-
-	u8 __iomem *fbmem;
 	u8 __iomem *regbase;
-	u8 __iomem *mem;
-	unsigned long size;
-	cirrusfb_board_t btype;
+	enum cirrus_board btype;
 	unsigned char SFR;	/* Shadow of special function register */
 
-	unsigned long fbmem_phys;
-	unsigned long fbregs_phys;
-
 	struct cirrusfb_regs currentmode;
 	int blank_mode;
 
 	u32	pseudo_palette[16];
-	struct { u8 red, green, blue, pad; } palette[256];
 
 #ifdef CONFIG_ZORRO
 	struct zorro_dev *zdev;
@@ -402,12 +376,11 @@ struct cirrusfb_info {
 #ifdef CONFIG_PCI
 	struct pci_dev *pdev;
 #endif
-	void (*unmap)(struct cirrusfb_info *cinfo);
+	void (*unmap)(struct fb_info *info);
 };
 
-
 static unsigned cirrusfb_def_mode = 1;
-static int noaccel = 0;
+static int noaccel;
 
 /*
  *    Predefined Video Modes
@@ -441,7 +414,7 @@ static const struct {
 			.lower_margin	= 8,
 			.hsync_len	= 96,
 			.vsync_len	= 4,
-			.sync		= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+			.sync	= FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
 			.vmode		= FB_VMODE_NONINTERLACED
 		 }
 	}, {
@@ -502,27 +475,29 @@ static const struct {
 /****************************************************************************/
 /**** BEGIN PROTOTYPES ******************************************************/
 
-
 /*--- Interface used by the world ------------------------------------------*/
-static int cirrusfb_init (void);
+static int cirrusfb_init(void);
 #ifndef MODULE
-static int cirrusfb_setup (char *options);
+static int cirrusfb_setup(char *options);
 #endif
 
-static int cirrusfb_open (struct fb_info *info, int user);
-static int cirrusfb_release (struct fb_info *info, int user);
-static int cirrusfb_setcolreg (unsigned regno, unsigned red, unsigned green,
-			       unsigned blue, unsigned transp,
-			       struct fb_info *info);
-static int cirrusfb_check_var (struct fb_var_screeninfo *var,
-			       struct fb_info *info);
-static int cirrusfb_set_par (struct fb_info *info);
-static int cirrusfb_pan_display (struct fb_var_screeninfo *var,
-				 struct fb_info *info);
-static int cirrusfb_blank (int blank_mode, struct fb_info *info);
-static void cirrusfb_fillrect (struct fb_info *info, const struct fb_fillrect *region);
-static void cirrusfb_copyarea(struct fb_info *info, const struct fb_copyarea *area);
-static void cirrusfb_imageblit(struct fb_info *info, const struct fb_image *image);
+static int cirrusfb_open(struct fb_info *info, int user);
+static int cirrusfb_release(struct fb_info *info, int user);
+static int cirrusfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			      unsigned blue, unsigned transp,
+			      struct fb_info *info);
+static int cirrusfb_check_var(struct fb_var_screeninfo *var,
+			      struct fb_info *info);
+static int cirrusfb_set_par(struct fb_info *info);
+static int cirrusfb_pan_display(struct fb_var_screeninfo *var,
+				struct fb_info *info);
+static int cirrusfb_blank(int blank_mode, struct fb_info *info);
+static void cirrusfb_fillrect(struct fb_info *info,
+			      const struct fb_fillrect *region);
+static void cirrusfb_copyarea(struct fb_info *info,
+			      const struct fb_copyarea *area);
+static void cirrusfb_imageblit(struct fb_info *info,
+			       const struct fb_image *image);
 
 /* function table of the above functions */
 static struct fb_ops cirrusfb_ops = {
@@ -540,68 +515,68 @@ static struct fb_ops cirrusfb_ops = {
 };
 
 /*--- Hardware Specific Routines -------------------------------------------*/
-static int cirrusfb_decode_var (const struct fb_var_screeninfo *var,
+static int cirrusfb_decode_var(const struct fb_var_screeninfo *var,
 				struct cirrusfb_regs *regs,
-				const struct fb_info *info);
+				struct fb_info *info);
 /*--- Internal routines ----------------------------------------------------*/
-static void init_vgachip (struct cirrusfb_info *cinfo);
-static void switch_monitor (struct cirrusfb_info *cinfo, int on);
-static void WGen (const struct cirrusfb_info *cinfo,
-		  int regnum, unsigned char val);
-static unsigned char RGen (const struct cirrusfb_info *cinfo, int regnum);
-static void AttrOn (const struct cirrusfb_info *cinfo);
-static void WHDR (const struct cirrusfb_info *cinfo, unsigned char val);
-static void WSFR (struct cirrusfb_info *cinfo, unsigned char val);
-static void WSFR2 (struct cirrusfb_info *cinfo, unsigned char val);
-static void WClut (struct cirrusfb_info *cinfo, unsigned char regnum, unsigned char red,
-		   unsigned char green,
-		   unsigned char blue);
+static void init_vgachip(struct fb_info *info);
+static void switch_monitor(struct cirrusfb_info *cinfo, int on);
+static void WGen(const struct cirrusfb_info *cinfo,
+		 int regnum, unsigned char val);
+static unsigned char RGen(const struct cirrusfb_info *cinfo, int regnum);
+static void AttrOn(const struct cirrusfb_info *cinfo);
+static void WHDR(const struct cirrusfb_info *cinfo, unsigned char val);
+static void WSFR(struct cirrusfb_info *cinfo, unsigned char val);
+static void WSFR2(struct cirrusfb_info *cinfo, unsigned char val);
+static void WClut(struct cirrusfb_info *cinfo, unsigned char regnum,
+		  unsigned char red, unsigned char green, unsigned char blue);
 #if 0
-static void RClut (struct cirrusfb_info *cinfo, unsigned char regnum, unsigned char *red,
-		   unsigned char *green,
-		   unsigned char *blue);
+static void RClut(struct cirrusfb_info *cinfo, unsigned char regnum,
+		  unsigned char *red, unsigned char *green,
+		  unsigned char *blue);
 #endif
-static void cirrusfb_WaitBLT (u8 __iomem *regbase);
-static void cirrusfb_BitBLT (u8 __iomem *regbase, int bits_per_pixel,
-			     u_short curx, u_short cury,
-			     u_short destx, u_short desty,
-			     u_short width, u_short height,
-			     u_short line_length);
-static void cirrusfb_RectFill (u8 __iomem *regbase, int bits_per_pixel,
-			       u_short x, u_short y,
-			       u_short width, u_short height,
-			       u_char color, u_short line_length);
-
-static void bestclock (long freq, long *best,
-		       long *nom, long *den,
-		       long *div, long maxfreq);
+static void cirrusfb_WaitBLT(u8 __iomem *regbase);
+static void cirrusfb_BitBLT(u8 __iomem *regbase, int bits_per_pixel,
+			    u_short curx, u_short cury,
+			    u_short destx, u_short desty,
+			    u_short width, u_short height,
+			    u_short line_length);
+static void cirrusfb_RectFill(u8 __iomem *regbase, int bits_per_pixel,
+			      u_short x, u_short y,
+			      u_short width, u_short height,
+			      u_char color, u_short line_length);
+
+static void bestclock(long freq, long *best,
+		      long *nom, long *den,
+		      long *div, long maxfreq);
 
 #ifdef CIRRUSFB_DEBUG
-static void cirrusfb_dump (void);
-static void cirrusfb_dbg_reg_dump (caddr_t regbase);
-static void cirrusfb_dbg_print_regs (caddr_t regbase, cirrusfb_dbg_reg_class_t reg_class,...);
-static void cirrusfb_dbg_print_byte (const char *name, unsigned char val);
+static void cirrusfb_dump(void);
+static void cirrusfb_dbg_reg_dump(caddr_t regbase);
+static void cirrusfb_dbg_print_regs(caddr_t regbase,
+				    enum cirrusfb_dbg_reg_class reg_class, ...);
+static void cirrusfb_dbg_print_byte(const char *name, unsigned char val);
 #endif /* CIRRUSFB_DEBUG */
 
 /*** END   PROTOTYPES ********************************************************/
 /*****************************************************************************/
 /*** BEGIN Interface Used by the World ***************************************/
 
-static int opencount = 0;
+static int opencount;
 
 /*--- Open /dev/fbx ---------------------------------------------------------*/
-static int cirrusfb_open (struct fb_info *info, int user)
+static int cirrusfb_open(struct fb_info *info, int user)
 {
 	if (opencount++ == 0)
-		switch_monitor (info->par, 1);
+		switch_monitor(info->par, 1);
 	return 0;
 }
 
 /*--- Close /dev/fbx --------------------------------------------------------*/
-static int cirrusfb_release (struct fb_info *info, int user)
+static int cirrusfb_release(struct fb_info *info, int user)
 {
 	if (--opencount == 0)
-		switch_monitor (info->par, 0);
+		switch_monitor(info->par, 0);
 	return 0;
 }
 
@@ -610,11 +585,11 @@ static int cirrusfb_release (struct fb_info *info, int user)
 /**** BEGIN Hardware specific Routines **************************************/
 
 /* Get a good MCLK value */
-static long cirrusfb_get_mclk (long freq, int bpp, long *div)
+static long cirrusfb_get_mclk(long freq, int bpp, long *div)
 {
 	long mclk;
 
-	assert (div != NULL);
+	assert(div != NULL);
 
 	/* Calculate MCLK, in case VCLK is high enough to require > 50MHz.
 	 * Assume a 64-bit data path for now.  The formula is:
@@ -624,23 +599,23 @@ static long cirrusfb_get_mclk (long freq, int bpp, long *div)
 	mclk = (mclk * 12) / 10;
 	if (mclk < 50000)
 		mclk = 50000;
-	DPRINTK ("Use MCLK of %ld kHz\n", mclk);
+	DPRINTK("Use MCLK of %ld kHz\n", mclk);
 
 	/* Calculate value for SR1F.  Multiply by 2 so we can round up. */
 	mclk = ((mclk * 16) / 14318);
 	mclk = (mclk + 1) / 2;
-	DPRINTK ("Set SR1F[5:0] to 0x%lx\n", mclk);
+	DPRINTK("Set SR1F[5:0] to 0x%lx\n", mclk);
 
 	/* Determine if we should use MCLK instead of VCLK, and if so, what we
 	   * should divide it by to get VCLK */
 	switch (freq) {
 	case 24751 ... 25249:
 		*div = 2;
-		DPRINTK ("Using VCLK = MCLK/2\n");
+		DPRINTK("Using VCLK = MCLK/2\n");
 		break;
 	case 49501 ... 50499:
 		*div = 1;
-		DPRINTK ("Using VCLK = MCLK\n");
+		DPRINTK("Using VCLK = MCLK\n");
 		break;
 	default:
 		*div = 0;
@@ -653,7 +628,6 @@ static long cirrusfb_get_mclk (long freq, int bpp, long *div)
 static int cirrusfb_check_var(struct fb_var_screeninfo *var,
 			      struct fb_info *info)
 {
-	struct cirrusfb_info *cinfo = info->par;
 	int nom, den;		/* translyting from pixels->bytes */
 	int yres, i;
 	static struct { int xres, yres; } modes[] =
@@ -665,63 +639,55 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var,
 	  { -1, -1 } };
 
 	switch (var->bits_per_pixel) {
-	case 0 ... 1:
-		var->bits_per_pixel = 1;
+	case 1:
 		nom = 4;
 		den = 8;
 		break;		/* 8 pixel per byte, only 1/4th of mem usable */
-	case 2 ... 8:
-		var->bits_per_pixel = 8;
-		nom = 1;
+	case 8:
+	case 16:
+	case 24:
+	case 32:
+		nom = var->bits_per_pixel / 8;
 		den = 1;
 		break;		/* 1 pixel == 1 byte */
-	case 9 ... 16:
-		var->bits_per_pixel = 16;
-		nom = 2;
-		den = 1;
-		break;		/* 2 bytes per pixel */
-	case 17 ... 24:
-		var->bits_per_pixel = 24;
-		nom = 3;
-		den = 1;
-		break;		/* 3 bytes per pixel */
-	case 25 ... 32:
-		var->bits_per_pixel = 32;
-		nom = 4;
-		den = 1;
-		break;		/* 4 bytes per pixel */
 	default:
-		printk ("cirrusfb: mode %dx%dx%d rejected...color depth not supported.\n",
+		printk(KERN_ERR "cirrusfb: mode %dx%dx%d rejected..."
+			"color depth not supported.\n",
 			var->xres, var->yres, var->bits_per_pixel);
-		DPRINTK ("EXIT - EINVAL error\n");
+		DPRINTK("EXIT - EINVAL error\n");
 		return -EINVAL;
 	}
 
-	if (var->xres * nom / den * var->yres > cinfo->size) {
-		printk ("cirrusfb: mode %dx%dx%d rejected...resolution too high to fit into video memory!\n",
+	if (var->xres * nom / den * var->yres > info->screen_size) {
+		printk(KERN_ERR "cirrusfb: mode %dx%dx%d rejected..."
+			"resolution too high to fit into video memory!\n",
 			var->xres, var->yres, var->bits_per_pixel);
-		DPRINTK ("EXIT - EINVAL error\n");
+		DPRINTK("EXIT - EINVAL error\n");
 		return -EINVAL;
 	}
 
 	/* use highest possible virtual resolution */
 	if (var->xres_virtual == -1 &&
 	    var->yres_virtual == -1) {
-		printk ("cirrusfb: using maximum available virtual resolution\n");
+		printk(KERN_INFO
+		     "cirrusfb: using maximum available virtual resolution\n");
 		for (i = 0; modes[i].xres != -1; i++) {
-			if (modes[i].xres * nom / den * modes[i].yres < cinfo->size / 2)
+			int size = modes[i].xres * nom / den * modes[i].yres;
+			if (size < info->screen_size / 2)
 				break;
 		}
 		if (modes[i].xres == -1) {
-			printk ("cirrusfb: could not find a virtual resolution that fits into video memory!!\n");
-			DPRINTK ("EXIT - EINVAL error\n");
+			printk(KERN_ERR "cirrusfb: could not find a virtual "
+				"resolution that fits into video memory!!\n");
+			DPRINTK("EXIT - EINVAL error\n");
 			return -EINVAL;
 		}
 		var->xres_virtual = modes[i].xres;
 		var->yres_virtual = modes[i].yres;
 
-		printk ("cirrusfb: virtual resolution set to maximum of %dx%d\n",
-			var->xres_virtual, var->yres_virtual);
+		printk(KERN_INFO "cirrusfb: virtual resolution set to "
+			"maximum of %dx%d\n", var->xres_virtual,
+			var->yres_virtual);
 	}
 
 	if (var->xres_virtual < var->xres)
@@ -744,23 +710,19 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var,
 	case 1:
 		var->red.offset = 0;
 		var->red.length = 1;
-		var->green.offset = 0;
-		var->green.length = 1;
-		var->blue.offset = 0;
-		var->blue.length = 1;
+		var->green = var->red;
+		var->blue = var->red;
 		break;
 
 	case 8:
 		var->red.offset = 0;
 		var->red.length = 6;
-		var->green.offset = 0;
-		var->green.length = 6;
-		var->blue.offset = 0;
-		var->blue.length = 6;
+		var->green = var->red;
+		var->blue = var->red;
 		break;
 
 	case 16:
-		if(isPReP) {
+		if (isPReP) {
 			var->red.offset = 2;
 			var->green.offset = -3;
 			var->blue.offset = 8;
@@ -775,22 +737,8 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var,
 		break;
 
 	case 24:
-		if(isPReP) {
-			var->red.offset = 8;
-			var->green.offset = 16;
-			var->blue.offset = 24;
-		} else {
-			var->red.offset = 16;
-			var->green.offset = 8;
-			var->blue.offset = 0;
-		}
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		break;
-
 	case 32:
-		if(isPReP) {
+		if (isPReP) {
 			var->red.offset = 8;
 			var->green.offset = 16;
 			var->blue.offset = 24;
@@ -825,54 +773,42 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var,
 		yres = (yres + 1) / 2;
 
 	if (yres >= 1280) {
-		printk (KERN_WARNING "cirrusfb: ERROR: VerticalTotal >= 1280; special treatment required! (TODO)\n");
-		DPRINTK ("EXIT - EINVAL error\n");
+		printk(KERN_ERR "cirrusfb: ERROR: VerticalTotal >= 1280; "
+			"special treatment required! (TODO)\n");
+		DPRINTK("EXIT - EINVAL error\n");
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
-static int cirrusfb_decode_var (const struct fb_var_screeninfo *var,
+static int cirrusfb_decode_var(const struct fb_var_screeninfo *var,
 				struct cirrusfb_regs *regs,
-				const struct fb_info *info)
+				struct fb_info *info)
 {
 	long freq;
 	long maxclock;
-	int maxclockidx = 0;
+	int maxclockidx = var->bits_per_pixel >> 3;
 	struct cirrusfb_info *cinfo = info->par;
 	int xres, hfront, hsync, hback;
 	int yres, vfront, vsync, vback;
 
-	switch(var->bits_per_pixel) {
+	switch (var->bits_per_pixel) {
 	case 1:
-		regs->line_length = var->xres_virtual / 8;
-		regs->visual = FB_VISUAL_MONO10;
-		maxclockidx = 0;
+		info->fix.line_length = var->xres_virtual / 8;
+		info->fix.visual = FB_VISUAL_MONO10;
 		break;
 
 	case 8:
-		regs->line_length = var->xres_virtual;
-		regs->visual = FB_VISUAL_PSEUDOCOLOR;
-		maxclockidx = 1;
+		info->fix.line_length = var->xres_virtual;
+		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
 		break;
 
 	case 16:
-		regs->line_length = var->xres_virtual * 2;
-		regs->visual = FB_VISUAL_DIRECTCOLOR;
-		maxclockidx = 2;
-		break;
-
 	case 24:
-		regs->line_length = var->xres_virtual * 3;
-		regs->visual = FB_VISUAL_DIRECTCOLOR;
-		maxclockidx = 3;
-		break;
-
 	case 32:
-		regs->line_length = var->xres_virtual * 4;
-		regs->visual = FB_VISUAL_DIRECTCOLOR;
-		maxclockidx = 4;
+		info->fix.line_length = var->xres_virtual * maxclockidx;
+		info->fix.visual = FB_VISUAL_DIRECTCOLOR;
 		break;
 
 	default:
@@ -882,12 +818,12 @@ static int cirrusfb_decode_var (const struct fb_var_screeninfo *var,
 		break;
 	}
 
-	regs->type = FB_TYPE_PACKED_PIXELS;
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
 
 	/* convert from ps to kHz */
-	freq = 1000000000 / var->pixclock;
+	freq = PICOS2KHZ(var->pixclock);
 
-	DPRINTK ("desired pixclock: %ld kHz\n", freq);
+	DPRINTK("desired pixclock: %ld kHz\n", freq);
 
 	maxclock = cirrusfb_board_info[cinfo->btype].maxclock[maxclockidx];
 	regs->multiplexing = 0;
@@ -902,8 +838,9 @@ static int cirrusfb_decode_var (const struct fb_var_screeninfo *var,
 			break;
 
 		default:
-			printk (KERN_WARNING "cirrusfb: ERROR: Frequency greater than maxclock (%ld kHz)\n", maxclock);
-			DPRINTK ("EXIT - return -EINVAL\n");
+			printk(KERN_ERR "cirrusfb: Frequency greater "
+				"than maxclock (%ld kHz)\n", maxclock);
+			DPRINTK("EXIT - return -EINVAL\n");
 			return -EINVAL;
 		}
 	}
@@ -914,14 +851,16 @@ static int cirrusfb_decode_var (const struct fb_var_screeninfo *var,
 	case 16:
 	case 32:
 		if (regs->HorizRes <= 800)
-			freq /= 2;	/* Xbh has this type of clock for 32-bit */
+			/* Xbh has this type of clock for 32-bit */
+			freq /= 2;
 		break;
 	}
 #endif
 
-	bestclock (freq, &regs->freq, &regs->nom, &regs->den, &regs->div,
-		   maxclock);
-	regs->mclk = cirrusfb_get_mclk (freq, var->bits_per_pixel, &regs->divMCLK);
+	bestclock(freq, &regs->freq, &regs->nom, &regs->den, &regs->div,
+		  maxclock);
+	regs->mclk = cirrusfb_get_mclk(freq, var->bits_per_pixel,
+					&regs->divMCLK);
 
 	xres = var->xres;
 	hfront = var->right_margin;
@@ -948,7 +887,8 @@ static int cirrusfb_decode_var (const struct fb_var_screeninfo *var,
 	regs->HorizTotal = (xres + hfront + hsync + hback) / 8 - 5;
 	regs->HorizDispEnd = xres / 8 - 1;
 	regs->HorizBlankStart = xres / 8;
-	regs->HorizBlankEnd = regs->HorizTotal + 5;	/* does not count with "-5" */
+	/* does not count with "-5" */
+	regs->HorizBlankEnd = regs->HorizTotal + 5;
 	regs->HorizSyncStart = (xres + hfront) / 8 + 1;
 	regs->HorizSyncEnd = (xres + hfront + hsync) / 8 + 1;
 
@@ -976,23 +916,23 @@ static int cirrusfb_decode_var (const struct fb_var_screeninfo *var,
 	return 0;
 }
 
-
-static void cirrusfb_set_mclk (const struct cirrusfb_info *cinfo, int val, int div)
+static void cirrusfb_set_mclk(const struct cirrusfb_info *cinfo, int val,
+				int div)
 {
-	assert (cinfo != NULL);
+	assert(cinfo != NULL);
 
 	if (div == 2) {
 		/* VCLK = MCLK/2 */
-		unsigned char old = vga_rseq (cinfo->regbase, CL_SEQR1E);
-		vga_wseq (cinfo->regbase, CL_SEQR1E, old | 0x1);
-		vga_wseq (cinfo->regbase, CL_SEQR1F, 0x40 | (val & 0x3f));
+		unsigned char old = vga_rseq(cinfo->regbase, CL_SEQR1E);
+		vga_wseq(cinfo->regbase, CL_SEQR1E, old | 0x1);
+		vga_wseq(cinfo->regbase, CL_SEQR1F, 0x40 | (val & 0x3f));
 	} else if (div == 1) {
 		/* VCLK = MCLK */
-		unsigned char old = vga_rseq (cinfo->regbase, CL_SEQR1E);
-		vga_wseq (cinfo->regbase, CL_SEQR1E, old & ~0x1);
-		vga_wseq (cinfo->regbase, CL_SEQR1F, 0x40 | (val & 0x3f));
+		unsigned char old = vga_rseq(cinfo->regbase, CL_SEQR1E);
+		vga_wseq(cinfo->regbase, CL_SEQR1E, old & ~0x1);
+		vga_wseq(cinfo->regbase, CL_SEQR1F, 0x40 | (val & 0x3f));
 	} else {
-		vga_wseq (cinfo->regbase, CL_SEQR1F, val & 0x3f);
+		vga_wseq(cinfo->regbase, CL_SEQR1F, val & 0x3f);
 	}
 }
 
@@ -1001,7 +941,7 @@ static void cirrusfb_set_mclk (const struct cirrusfb_info *cinfo, int val, int d
 
 	actually writes the values for a new video mode into the hardware,
 **************************************************************************/
-static int cirrusfb_set_par_foo (struct fb_info *info)
+static int cirrusfb_set_par_foo(struct fb_info *info)
 {
 	struct cirrusfb_info *cinfo = info->par;
 	struct fb_var_screeninfo *var = &info->var;
@@ -1011,15 +951,15 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 	int offset = 0, err;
 	const struct cirrusfb_board_info_rec *bi;
 
-	DPRINTK ("ENTER\n");
-	DPRINTK ("Requested mode: %dx%dx%d\n",
+	DPRINTK("ENTER\n");
+	DPRINTK("Requested mode: %dx%dx%d\n",
 	       var->xres, var->yres, var->bits_per_pixel);
-	DPRINTK ("pixclock: %d\n", var->pixclock);
+	DPRINTK("pixclock: %d\n", var->pixclock);
 
-	init_vgachip (cinfo);
+	init_vgachip(info);
 
 	err = cirrusfb_decode_var(var, &regs, info);
-	if(err) {
+	if (err) {
 		/* should never happen */
 		DPRINTK("mode change aborted.  invalid var.\n");
 		return -EINVAL;
@@ -1027,34 +967,35 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 
 	bi = &cirrusfb_board_info[cinfo->btype];
 
-
 	/* unlock register VGA_CRTC_H_TOTAL..CRT7 */
-	vga_wcrt (regbase, VGA_CRTC_V_SYNC_END, 0x20);	/* previously: 0x00) */
+	vga_wcrt(regbase, VGA_CRTC_V_SYNC_END, 0x20);	/* previously: 0x00) */
 
 	/* if debugging is enabled, all parameters get output before writing */
-	DPRINTK ("CRT0: %ld\n", regs.HorizTotal);
-	vga_wcrt (regbase, VGA_CRTC_H_TOTAL, regs.HorizTotal);
+	DPRINTK("CRT0: %ld\n", regs.HorizTotal);
+	vga_wcrt(regbase, VGA_CRTC_H_TOTAL, regs.HorizTotal);
 
-	DPRINTK ("CRT1: %ld\n", regs.HorizDispEnd);
-	vga_wcrt (regbase, VGA_CRTC_H_DISP, regs.HorizDispEnd);
+	DPRINTK("CRT1: %ld\n", regs.HorizDispEnd);
+	vga_wcrt(regbase, VGA_CRTC_H_DISP, regs.HorizDispEnd);
 
-	DPRINTK ("CRT2: %ld\n", regs.HorizBlankStart);
-	vga_wcrt (regbase, VGA_CRTC_H_BLANK_START, regs.HorizBlankStart);
+	DPRINTK("CRT2: %ld\n", regs.HorizBlankStart);
+	vga_wcrt(regbase, VGA_CRTC_H_BLANK_START, regs.HorizBlankStart);
 
-	DPRINTK ("CRT3: 128+%ld\n", regs.HorizBlankEnd % 32);	/*  + 128: Compatible read */
-	vga_wcrt (regbase, VGA_CRTC_H_BLANK_END, 128 + (regs.HorizBlankEnd % 32));
+	/*  + 128: Compatible read */
+	DPRINTK("CRT3: 128+%ld\n", regs.HorizBlankEnd % 32);
+	vga_wcrt(regbase, VGA_CRTC_H_BLANK_END,
+		 128 + (regs.HorizBlankEnd % 32));
 
-	DPRINTK ("CRT4: %ld\n", regs.HorizSyncStart);
-	vga_wcrt (regbase, VGA_CRTC_H_SYNC_START, regs.HorizSyncStart);
+	DPRINTK("CRT4: %ld\n", regs.HorizSyncStart);
+	vga_wcrt(regbase, VGA_CRTC_H_SYNC_START, regs.HorizSyncStart);
 
 	tmp = regs.HorizSyncEnd % 32;
 	if (regs.HorizBlankEnd & 32)
 		tmp += 128;
-	DPRINTK ("CRT5: %d\n", tmp);
-	vga_wcrt (regbase, VGA_CRTC_H_SYNC_END, tmp);
+	DPRINTK("CRT5: %d\n", tmp);
+	vga_wcrt(regbase, VGA_CRTC_H_SYNC_END, tmp);
 
-	DPRINTK ("CRT6: %ld\n", regs.VertTotal & 0xff);
-	vga_wcrt (regbase, VGA_CRTC_V_TOTAL, (regs.VertTotal & 0xff));
+	DPRINTK("CRT6: %ld\n", regs.VertTotal & 0xff);
+	vga_wcrt(regbase, VGA_CRTC_V_TOTAL, (regs.VertTotal & 0xff));
 
 	tmp = 16;		/* LineCompare bit #9 */
 	if (regs.VertTotal & 256)
@@ -1071,34 +1012,34 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 		tmp |= 64;
 	if (regs.VertSyncStart & 512)
 		tmp |= 128;
-	DPRINTK ("CRT7: %d\n", tmp);
-	vga_wcrt (regbase, VGA_CRTC_OVERFLOW, tmp);
+	DPRINTK("CRT7: %d\n", tmp);
+	vga_wcrt(regbase, VGA_CRTC_OVERFLOW, tmp);
 
 	tmp = 0x40;		/* LineCompare bit #8 */
 	if (regs.VertBlankStart & 512)
 		tmp |= 0x20;
 	if (var->vmode & FB_VMODE_DOUBLE)
 		tmp |= 0x80;
-	DPRINTK ("CRT9: %d\n", tmp);
-	vga_wcrt (regbase, VGA_CRTC_MAX_SCAN, tmp);
+	DPRINTK("CRT9: %d\n", tmp);
+	vga_wcrt(regbase, VGA_CRTC_MAX_SCAN, tmp);
 
-	DPRINTK ("CRT10: %ld\n", regs.VertSyncStart & 0xff);
-	vga_wcrt (regbase, VGA_CRTC_V_SYNC_START, (regs.VertSyncStart & 0xff));
+	DPRINTK("CRT10: %ld\n", regs.VertSyncStart & 0xff);
+	vga_wcrt(regbase, VGA_CRTC_V_SYNC_START, regs.VertSyncStart & 0xff);
 
-	DPRINTK ("CRT11: 64+32+%ld\n", regs.VertSyncEnd % 16);
-	vga_wcrt (regbase, VGA_CRTC_V_SYNC_END, (regs.VertSyncEnd % 16 + 64 + 32));
+	DPRINTK("CRT11: 64+32+%ld\n", regs.VertSyncEnd % 16);
+	vga_wcrt(regbase, VGA_CRTC_V_SYNC_END, regs.VertSyncEnd % 16 + 64 + 32);
 
-	DPRINTK ("CRT12: %ld\n", regs.VertDispEnd & 0xff);
-	vga_wcrt (regbase, VGA_CRTC_V_DISP_END, (regs.VertDispEnd & 0xff));
+	DPRINTK("CRT12: %ld\n", regs.VertDispEnd & 0xff);
+	vga_wcrt(regbase, VGA_CRTC_V_DISP_END, regs.VertDispEnd & 0xff);
 
-	DPRINTK ("CRT15: %ld\n", regs.VertBlankStart & 0xff);
-	vga_wcrt (regbase, VGA_CRTC_V_BLANK_START, (regs.VertBlankStart & 0xff));
+	DPRINTK("CRT15: %ld\n", regs.VertBlankStart & 0xff);
+	vga_wcrt(regbase, VGA_CRTC_V_BLANK_START, regs.VertBlankStart & 0xff);
 
-	DPRINTK ("CRT16: %ld\n", regs.VertBlankEnd & 0xff);
-	vga_wcrt (regbase, VGA_CRTC_V_BLANK_END, (regs.VertBlankEnd & 0xff));
+	DPRINTK("CRT16: %ld\n", regs.VertBlankEnd & 0xff);
+	vga_wcrt(regbase, VGA_CRTC_V_BLANK_END, regs.VertBlankEnd & 0xff);
 
-	DPRINTK ("CRT18: 0xff\n");
-	vga_wcrt (regbase, VGA_CRTC_LINE_COMPARE, 0xff);
+	DPRINTK("CRT18: 0xff\n");
+	vga_wcrt(regbase, VGA_CRTC_LINE_COMPARE, 0xff);
 
 	tmp = 0;
 	if (var->vmode & FB_VMODE_INTERLACED)
@@ -1112,57 +1053,63 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 	if (regs.VertBlankEnd & 512)
 		tmp |= 128;
 
-	DPRINTK ("CRT1a: %d\n", tmp);
-	vga_wcrt (regbase, CL_CRT1A, tmp);
+	DPRINTK("CRT1a: %d\n", tmp);
+	vga_wcrt(regbase, CL_CRT1A, tmp);
 
 	/* set VCLK0 */
 	/* hardware RefClock: 14.31818 MHz */
 	/* formula: VClk = (OSC * N) / (D * (1+P)) */
 	/* Example: VClk = (14.31818 * 91) / (23 * (1+1)) = 28.325 MHz */
 
-	vga_wseq (regbase, CL_SEQRB, regs.nom);
+	vga_wseq(regbase, CL_SEQRB, regs.nom);
 	tmp = regs.den << 1;
 	if (regs.div != 0)
 		tmp |= 1;
 
+	/* 6 bit denom; ONLY 5434!!! (bugged me 10 days) */
 	if ((cinfo->btype == BT_SD64) ||
 	    (cinfo->btype == BT_ALPINE) ||
 	    (cinfo->btype == BT_GD5480))
-		tmp |= 0x80;	/* 6 bit denom; ONLY 5434!!! (bugged me 10 days) */
+		tmp |= 0x80;
 
-	DPRINTK ("CL_SEQR1B: %ld\n", (long) tmp);
-	vga_wseq (regbase, CL_SEQR1B, tmp);
+	DPRINTK("CL_SEQR1B: %ld\n", (long) tmp);
+	vga_wseq(regbase, CL_SEQR1B, tmp);
 
 	if (regs.VertRes >= 1024)
 		/* 1280x1024 */
-		vga_wcrt (regbase, VGA_CRTC_MODE, 0xc7);
+		vga_wcrt(regbase, VGA_CRTC_MODE, 0xc7);
 	else
 		/* mode control: VGA_CRTC_START_HI enable, ROTATE(?), 16bit
 		 * address wrap, no compat. */
-		vga_wcrt (regbase, VGA_CRTC_MODE, 0xc3);
+		vga_wcrt(regbase, VGA_CRTC_MODE, 0xc3);
 
-/* HAEH?        vga_wcrt (regbase, VGA_CRTC_V_SYNC_END, 0x20);  * previously: 0x00  unlock VGA_CRTC_H_TOTAL..CRT7 */
+/* HAEH?	vga_wcrt(regbase, VGA_CRTC_V_SYNC_END, 0x20);
+ * previously: 0x00  unlock VGA_CRTC_H_TOTAL..CRT7 */
 
 	/* don't know if it would hurt to also program this if no interlaced */
 	/* mode is used, but I feel better this way.. :-) */
 	if (var->vmode & FB_VMODE_INTERLACED)
-		vga_wcrt (regbase, VGA_CRTC_REGS, regs.HorizTotal / 2);
+		vga_wcrt(regbase, VGA_CRTC_REGS, regs.HorizTotal / 2);
 	else
-		vga_wcrt (regbase, VGA_CRTC_REGS, 0x00);	/* interlace control */
+		vga_wcrt(regbase, VGA_CRTC_REGS, 0x00);	/* interlace control */
 
-	vga_wseq (regbase, VGA_SEQ_CHARACTER_MAP, 0);
+	vga_wseq(regbase, VGA_SEQ_CHARACTER_MAP, 0);
 
 	/* adjust horizontal/vertical sync type (low/high) */
-	tmp = 0x03;		/* enable display memory & CRTC I/O address for color mode */
+	/* enable display memory & CRTC I/O address for color mode */
+	tmp = 0x03;
 	if (var->sync & FB_SYNC_HOR_HIGH_ACT)
 		tmp |= 0x40;
 	if (var->sync & FB_SYNC_VERT_HIGH_ACT)
 		tmp |= 0x80;
-	WGen (cinfo, VGA_MIS_W, tmp);
+	WGen(cinfo, VGA_MIS_W, tmp);
 
-	vga_wcrt (regbase, VGA_CRTC_PRESET_ROW, 0);	/* Screen A Preset Row-Scan register */
-	vga_wcrt (regbase, VGA_CRTC_CURSOR_START, 0);	/* text cursor on and start line */
-	vga_wcrt (regbase, VGA_CRTC_CURSOR_END, 31);	/* text cursor end line */
+	/* Screen A Preset Row-Scan register */
+	vga_wcrt(regbase, VGA_CRTC_PRESET_ROW, 0);
+	/* text cursor on and start line */
+	vga_wcrt(regbase, VGA_CRTC_CURSOR_START, 0);
+	/* text cursor end line */
+	vga_wcrt(regbase, VGA_CRTC_CURSOR_END, 31);
 
 	/******************************************************
 	 *
@@ -1172,8 +1119,8 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 
 	/* programming for different color depths */
 	if (var->bits_per_pixel == 1) {
-		DPRINTK ("cirrusfb: preparing for 1 bit deep display\n");
-		vga_wgfx (regbase, VGA_GFX_MODE, 0);	/* mode register */
+		DPRINTK("cirrusfb: preparing for 1 bit deep display\n");
+		vga_wgfx(regbase, VGA_GFX_MODE, 0);	/* mode register */
 
 		/* SR07 */
 		switch (cinfo->btype) {
@@ -1184,71 +1131,77 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 		case BT_PICASSO4:
 		case BT_ALPINE:
 		case BT_GD5480:
-			DPRINTK (" (for GD54xx)\n");
-			vga_wseq (regbase, CL_SEQR7,
+			DPRINTK(" (for GD54xx)\n");
+			vga_wseq(regbase, CL_SEQR7,
 				  regs.multiplexing ?
 					bi->sr07_1bpp_mux : bi->sr07_1bpp);
 			break;
 
 		case BT_LAGUNA:
-			DPRINTK (" (for GD546x)\n");
-			vga_wseq (regbase, CL_SEQR7,
-				vga_rseq (regbase, CL_SEQR7) & ~0x01);
+			DPRINTK(" (for GD546x)\n");
+			vga_wseq(regbase, CL_SEQR7,
+				vga_rseq(regbase, CL_SEQR7) & ~0x01);
 			break;
 
 		default:
-			printk (KERN_WARNING "cirrusfb: unknown Board\n");
+			printk(KERN_WARNING "cirrusfb: unknown Board\n");
 			break;
 		}
 
 		/* Extended Sequencer Mode */
 		switch (cinfo->btype) {
 		case BT_SD64:
-			/* setting the SEQRF on SD64 is not necessary (only during init) */
-			DPRINTK ("(for SD64)\n");
-			vga_wseq (regbase, CL_SEQR1F, 0x1a);		/*  MCLK select */
+			/* setting the SEQRF on SD64 is not necessary
+			 * (only during init)
+			 */
+			DPRINTK("(for SD64)\n");
+			/*  MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x1a);
 			break;
 
 		case BT_PICCOLO:
-			DPRINTK ("(for Piccolo)\n");
-/* ### ueberall 0x22? */
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* ##vorher 1c MCLK select */
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* evtl d0 bei 1 bit? avoid FIFO underruns..? */
+		case BT_SPECTRUM:
+			DPRINTK("(for Piccolo/Spectrum)\n");
+			/* ### ueberall 0x22? */
+			/* ##vorher 1c MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x22);
+			/* evtl d0 bei 1 bit? avoid FIFO underruns..? */
+			vga_wseq(regbase, CL_SEQRF, 0xb0);
 			break;
 
 		case BT_PICASSO:
-			DPRINTK ("(for Picasso)\n");
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* ##vorher 22 MCLK select */
-			vga_wseq (regbase, CL_SEQRF, 0xd0);	/* ## vorher d0 avoid FIFO underruns..? */
-			break;
-
-		case BT_SPECTRUM:
-			DPRINTK ("(for Spectrum)\n");
-/* ### ueberall 0x22? */
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* ##vorher 1c MCLK select */
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* evtl d0? avoid FIFO underruns..? */
+			DPRINTK("(for Picasso)\n");
+			/* ##vorher 22 MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x22);
+			/* ## vorher d0 avoid FIFO underruns..? */
+			vga_wseq(regbase, CL_SEQRF, 0xd0);
 			break;
 
 		case BT_PICASSO4:
 		case BT_ALPINE:
 		case BT_GD5480:
 		case BT_LAGUNA:
-			DPRINTK (" (for GD54xx)\n");
+			DPRINTK(" (for GD54xx)\n");
 			/* do nothing */
 			break;
 
 		default:
-			printk (KERN_WARNING "cirrusfb: unknown Board\n");
+			printk(KERN_WARNING "cirrusfb: unknown Board\n");
 			break;
 		}
 
-		WGen (cinfo, VGA_PEL_MSK, 0x01);	/* pixel mask: pass-through for first plane */
+		/* pixel mask: pass-through for first plane */
+		WGen(cinfo, VGA_PEL_MSK, 0x01);
 		if (regs.multiplexing)
-			WHDR (cinfo, 0x4a);	/* hidden dac reg: 1280x1024 */
+			/* hidden dac reg: 1280x1024 */
+			WHDR(cinfo, 0x4a);
 		else
-			WHDR (cinfo, 0);	/* hidden dac: nothing */
-		vga_wseq (regbase, VGA_SEQ_MEMORY_MODE, 0x06);	/* memory mode: odd/even, ext. memory */
-		vga_wseq (regbase, VGA_SEQ_PLANE_WRITE, 0x01);	/* plane mask: only write to first plane */
+			/* hidden dac: nothing */
+			WHDR(cinfo, 0);
+		/* memory mode: odd/even, ext. memory */
+		vga_wseq(regbase, VGA_SEQ_MEMORY_MODE, 0x06);
+		/* plane mask: only write to first plane */
+		vga_wseq(regbase, VGA_SEQ_PLANE_WRITE, 0x01);
 		offset = var->xres_virtual / 16;
 	}
 
@@ -1259,7 +1212,7 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 	 */
 
 	else if (var->bits_per_pixel == 8) {
-		DPRINTK ("cirrusfb: preparing for 8 bit deep display\n");
+		DPRINTK("cirrusfb: preparing for 8 bit deep display\n");
 		switch (cinfo->btype) {
 		case BT_SD64:
 		case BT_PICCOLO:
@@ -1268,75 +1221,77 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 		case BT_PICASSO4:
 		case BT_ALPINE:
 		case BT_GD5480:
-			DPRINTK (" (for GD54xx)\n");
-			vga_wseq (regbase, CL_SEQR7,
+			DPRINTK(" (for GD54xx)\n");
+			vga_wseq(regbase, CL_SEQR7,
 				  regs.multiplexing ?
 					bi->sr07_8bpp_mux : bi->sr07_8bpp);
 			break;
 
 		case BT_LAGUNA:
-			DPRINTK (" (for GD546x)\n");
-			vga_wseq (regbase, CL_SEQR7,
-				vga_rseq (regbase, CL_SEQR7) | 0x01);
+			DPRINTK(" (for GD546x)\n");
+			vga_wseq(regbase, CL_SEQR7,
+				vga_rseq(regbase, CL_SEQR7) | 0x01);
 			break;
 
 		default:
-			printk (KERN_WARNING "cirrusfb: unknown Board\n");
+			printk(KERN_WARNING "cirrusfb: unknown Board\n");
 			break;
 		}
 
 		switch (cinfo->btype) {
 		case BT_SD64:
-			vga_wseq (regbase, CL_SEQR1F, 0x1d);		/* MCLK select */
+			/* MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x1d);
 			break;
 
 		case BT_PICCOLO:
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* ### vorher 1c MCLK select */
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
-			break;
-
 		case BT_PICASSO:
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* ### vorher 1c MCLK select */
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
-			break;
-
 		case BT_SPECTRUM:
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* ### vorher 1c MCLK select */
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
+			/* ### vorher 1c MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x22);
+			/* Fast Page-Mode writes */
+			vga_wseq(regbase, CL_SEQRF, 0xb0);
 			break;
 
 		case BT_PICASSO4:
 #ifdef CONFIG_ZORRO
-			vga_wseq (regbase, CL_SEQRF, 0xb8);	/* ### INCOMPLETE!! */
+			/* ### INCOMPLETE!! */
+			vga_wseq(regbase, CL_SEQRF, 0xb8);
 #endif
-/*          vga_wseq (regbase, CL_SEQR1F, 0x1c); */
+/*	  		vga_wseq(regbase, CL_SEQR1F, 0x1c); */
 			break;
 
 		case BT_ALPINE:
-			DPRINTK (" (for GD543x)\n");
-			cirrusfb_set_mclk (cinfo, regs.mclk, regs.divMCLK);
+			DPRINTK(" (for GD543x)\n");
+			cirrusfb_set_mclk(cinfo, regs.mclk, regs.divMCLK);
 			/* We already set SRF and SR1F */
 			break;
 
 		case BT_GD5480:
 		case BT_LAGUNA:
-			DPRINTK (" (for GD54xx)\n");
+			DPRINTK(" (for GD54xx)\n");
 			/* do nothing */
 			break;
 
 		default:
-			printk (KERN_WARNING "cirrusfb: unknown Board\n");
+			printk(KERN_WARNING "cirrusfb: unknown Board\n");
 			break;
 		}
 
-		vga_wgfx (regbase, VGA_GFX_MODE, 64);	/* mode register: 256 color mode */
-		WGen (cinfo, VGA_PEL_MSK, 0xff);	/* pixel mask: pass-through all planes */
+		/* mode register: 256 color mode */
+		vga_wgfx(regbase, VGA_GFX_MODE, 64);
+		/* pixel mask: pass-through all planes */
+		WGen(cinfo, VGA_PEL_MSK, 0xff);
 		if (regs.multiplexing)
-			WHDR (cinfo, 0x4a);	/* hidden dac reg: 1280x1024 */
+			/* hidden dac reg: 1280x1024 */
+			WHDR(cinfo, 0x4a);
 		else
-			WHDR (cinfo, 0);	/* hidden dac: nothing */
-		vga_wseq (regbase, VGA_SEQ_MEMORY_MODE, 0x0a);	/* memory mode: chain4, ext. memory */
-		vga_wseq (regbase, VGA_SEQ_PLANE_WRITE, 0xff);	/* plane mask: enable writing to all 4 planes */
+			/* hidden dac: nothing */
+			WHDR(cinfo, 0);
+		/* memory mode: chain4, ext. memory */
+		vga_wseq(regbase, VGA_SEQ_MEMORY_MODE, 0x0a);
+		/* plane mask: enable writing to all 4 planes */
+		vga_wseq(regbase, VGA_SEQ_PLANE_WRITE, 0xff);
 		offset = var->xres_virtual / 8;
 	}
 
@@ -1347,72 +1302,77 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 	 */
 
 	else if (var->bits_per_pixel == 16) {
-		DPRINTK ("cirrusfb: preparing for 16 bit deep display\n");
+		DPRINTK("cirrusfb: preparing for 16 bit deep display\n");
 		switch (cinfo->btype) {
 		case BT_SD64:
-			vga_wseq (regbase, CL_SEQR7, 0xf7);	/* Extended Sequencer Mode: 256c col. mode */
-			vga_wseq (regbase, CL_SEQR1F, 0x1e);		/* MCLK select */
+			/* Extended Sequencer Mode: 256c col. mode */
+			vga_wseq(regbase, CL_SEQR7, 0xf7);
+			/* MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x1e);
 			break;
 
 		case BT_PICCOLO:
-			vga_wseq (regbase, CL_SEQR7, 0x87);
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* MCLK select */
+		case BT_SPECTRUM:
+			vga_wseq(regbase, CL_SEQR7, 0x87);
+			/* Fast Page-Mode writes */
+			vga_wseq(regbase, CL_SEQRF, 0xb0);
+			/* MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x22);
 			break;
 
 		case BT_PICASSO:
-			vga_wseq (regbase, CL_SEQR7, 0x27);
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* MCLK select */
-			break;
-
-		case BT_SPECTRUM:
-			vga_wseq (regbase, CL_SEQR7, 0x87);
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* MCLK select */
+			vga_wseq(regbase, CL_SEQR7, 0x27);
+			/* Fast Page-Mode writes */
+			vga_wseq(regbase, CL_SEQRF, 0xb0);
+			/* MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x22);
 			break;
 
 		case BT_PICASSO4:
-			vga_wseq (regbase, CL_SEQR7, 0x27);
-/*          vga_wseq (regbase, CL_SEQR1F, 0x1c);  */
+			vga_wseq(regbase, CL_SEQR7, 0x27);
+/*			vga_wseq(regbase, CL_SEQR1F, 0x1c);  */
 			break;
 
 		case BT_ALPINE:
-			DPRINTK (" (for GD543x)\n");
+			DPRINTK(" (for GD543x)\n");
 			if (regs.HorizRes >= 1024)
-				vga_wseq (regbase, CL_SEQR7, 0xa7);
+				vga_wseq(regbase, CL_SEQR7, 0xa7);
 			else
-				vga_wseq (regbase, CL_SEQR7, 0xa3);
-			cirrusfb_set_mclk (cinfo, regs.mclk, regs.divMCLK);
+				vga_wseq(regbase, CL_SEQR7, 0xa3);
+			cirrusfb_set_mclk(cinfo, regs.mclk, regs.divMCLK);
 			break;
 
 		case BT_GD5480:
-			DPRINTK (" (for GD5480)\n");
-			vga_wseq (regbase, CL_SEQR7, 0x17);
+			DPRINTK(" (for GD5480)\n");
+			vga_wseq(regbase, CL_SEQR7, 0x17);
 			/* We already set SRF and SR1F */
 			break;
 
 		case BT_LAGUNA:
-			DPRINTK (" (for GD546x)\n");
-			vga_wseq (regbase, CL_SEQR7,
-				vga_rseq (regbase, CL_SEQR7) & ~0x01);
+			DPRINTK(" (for GD546x)\n");
+			vga_wseq(regbase, CL_SEQR7,
+				vga_rseq(regbase, CL_SEQR7) & ~0x01);
 			break;
 
 		default:
-			printk (KERN_WARNING "CIRRUSFB: unknown Board\n");
+			printk(KERN_WARNING "CIRRUSFB: unknown Board\n");
 			break;
 		}
 
-		vga_wgfx (regbase, VGA_GFX_MODE, 64);	/* mode register: 256 color mode */
-		WGen (cinfo, VGA_PEL_MSK, 0xff);	/* pixel mask: pass-through all planes */
+		/* mode register: 256 color mode */
+		vga_wgfx(regbase, VGA_GFX_MODE, 64);
+		/* pixel mask: pass-through all planes */
+		WGen(cinfo, VGA_PEL_MSK, 0xff);
 #ifdef CONFIG_PCI
-		WHDR (cinfo, 0xc0);	/* Copy Xbh */
+		WHDR(cinfo, 0xc0);	/* Copy Xbh */
 #elif defined(CONFIG_ZORRO)
 		/* FIXME: CONFIG_PCI and CONFIG_ZORRO may be defined both */
-		WHDR (cinfo, 0xa0);	/* hidden dac reg: nothing special */
+		WHDR(cinfo, 0xa0);	/* hidden dac reg: nothing special */
 #endif
-		vga_wseq (regbase, VGA_SEQ_MEMORY_MODE, 0x0a);	/* memory mode: chain4, ext. memory */
-		vga_wseq (regbase, VGA_SEQ_PLANE_WRITE, 0xff);	/* plane mask: enable writing to all 4 planes */
+		/* memory mode: chain4, ext. memory */
+		vga_wseq(regbase, VGA_SEQ_MEMORY_MODE, 0x0a);
+		/* plane mask: enable writing to all 4 planes */
+		vga_wseq(regbase, VGA_SEQ_PLANE_WRITE, 0xff);
 		offset = var->xres_virtual / 4;
 	}
 
@@ -1423,64 +1383,70 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 	 */
 
 	else if (var->bits_per_pixel == 32) {
-		DPRINTK ("cirrusfb: preparing for 24/32 bit deep display\n");
+		DPRINTK("cirrusfb: preparing for 24/32 bit deep display\n");
 		switch (cinfo->btype) {
 		case BT_SD64:
-			vga_wseq (regbase, CL_SEQR7, 0xf9);	/* Extended Sequencer Mode: 256c col. mode */
-			vga_wseq (regbase, CL_SEQR1F, 0x1e);		/* MCLK select */
+			/* Extended Sequencer Mode: 256c col. mode */
+			vga_wseq(regbase, CL_SEQR7, 0xf9);
+			/* MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x1e);
 			break;
 
 		case BT_PICCOLO:
-			vga_wseq (regbase, CL_SEQR7, 0x85);
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* MCLK select */
+		case BT_SPECTRUM:
+			vga_wseq(regbase, CL_SEQR7, 0x85);
+			/* Fast Page-Mode writes */
+			vga_wseq(regbase, CL_SEQRF, 0xb0);
+			/* MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x22);
 			break;
 
 		case BT_PICASSO:
-			vga_wseq (regbase, CL_SEQR7, 0x25);
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* MCLK select */
-			break;
-
-		case BT_SPECTRUM:
-			vga_wseq (regbase, CL_SEQR7, 0x85);
-			vga_wseq (regbase, CL_SEQRF, 0xb0);	/* Fast Page-Mode writes */
-			vga_wseq (regbase, CL_SEQR1F, 0x22);		/* MCLK select */
+			vga_wseq(regbase, CL_SEQR7, 0x25);
+			/* Fast Page-Mode writes */
+			vga_wseq(regbase, CL_SEQRF, 0xb0);
+			/* MCLK select */
+			vga_wseq(regbase, CL_SEQR1F, 0x22);
 			break;
 
 		case BT_PICASSO4:
-			vga_wseq (regbase, CL_SEQR7, 0x25);
-/*          vga_wseq (regbase, CL_SEQR1F, 0x1c);  */
+			vga_wseq(regbase, CL_SEQR7, 0x25);
+/*			vga_wseq(regbase, CL_SEQR1F, 0x1c);  */
 			break;
 
 		case BT_ALPINE:
-			DPRINTK (" (for GD543x)\n");
-			vga_wseq (regbase, CL_SEQR7, 0xa9);
-			cirrusfb_set_mclk (cinfo, regs.mclk, regs.divMCLK);
+			DPRINTK(" (for GD543x)\n");
+			vga_wseq(regbase, CL_SEQR7, 0xa9);
+			cirrusfb_set_mclk(cinfo, regs.mclk, regs.divMCLK);
 			break;
 
 		case BT_GD5480:
-			DPRINTK (" (for GD5480)\n");
-			vga_wseq (regbase, CL_SEQR7, 0x19);
+			DPRINTK(" (for GD5480)\n");
+			vga_wseq(regbase, CL_SEQR7, 0x19);
 			/* We already set SRF and SR1F */
 			break;
 
 		case BT_LAGUNA:
-			DPRINTK (" (for GD546x)\n");
-			vga_wseq (regbase, CL_SEQR7,
-				vga_rseq (regbase, CL_SEQR7) & ~0x01);
+			DPRINTK(" (for GD546x)\n");
+			vga_wseq(regbase, CL_SEQR7,
+				vga_rseq(regbase, CL_SEQR7) & ~0x01);
 			break;
 
 		default:
-			printk (KERN_WARNING "cirrusfb: unknown Board\n");
+			printk(KERN_WARNING "cirrusfb: unknown Board\n");
 			break;
 		}
 
-		vga_wgfx (regbase, VGA_GFX_MODE, 64);	/* mode register: 256 color mode */
-		WGen (cinfo, VGA_PEL_MSK, 0xff);	/* pixel mask: pass-through all planes */
-		WHDR (cinfo, 0xc5);	/* hidden dac reg: 8-8-8 mode (24 or 32) */
-		vga_wseq (regbase, VGA_SEQ_MEMORY_MODE, 0x0a);	/* memory mode: chain4, ext. memory */
-		vga_wseq (regbase, VGA_SEQ_PLANE_WRITE, 0xff);	/* plane mask: enable writing to all 4 planes */
+		/* mode register: 256 color mode */
+		vga_wgfx(regbase, VGA_GFX_MODE, 64);
+		/* pixel mask: pass-through all planes */
+		WGen(cinfo, VGA_PEL_MSK, 0xff);
+		/* hidden dac reg: 8-8-8 mode (24 or 32) */
+		WHDR(cinfo, 0xc5);
+		/* memory mode: chain4, ext. memory */
+		vga_wseq(regbase, VGA_SEQ_MEMORY_MODE, 0x0a);
+		/* plane mask: enable writing to all 4 planes */
+		vga_wseq(regbase, VGA_SEQ_PLANE_WRITE, 0xff);
 		offset = var->xres_virtual / 4;
 	}
 
@@ -1490,48 +1456,67 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 	 *
 	 */
 
-	else {
-		printk (KERN_ERR "cirrusfb: What's this?? requested color depth == %d.\n",
+	else
+		printk(KERN_ERR "cirrusfb: What's this?? "
+			" requested color depth == %d.\n",
 			var->bits_per_pixel);
-	}
 
-	vga_wcrt (regbase, VGA_CRTC_OFFSET, offset & 0xff);
+	vga_wcrt(regbase, VGA_CRTC_OFFSET, offset & 0xff);
 	tmp = 0x22;
 	if (offset & 0x100)
 		tmp |= 0x10;	/* offset overflow bit */
 
-	vga_wcrt (regbase, CL_CRT1B, tmp);	/* screen start addr #16-18, fastpagemode cycles */
+	/* screen start addr #16-18, fastpagemode cycles */
+	vga_wcrt(regbase, CL_CRT1B, tmp);
 
 	if (cinfo->btype == BT_SD64 ||
 	    cinfo->btype == BT_PICASSO4 ||
 	    cinfo->btype == BT_ALPINE ||
 	    cinfo->btype == BT_GD5480)
-		vga_wcrt (regbase, CL_CRT1D, 0x00);	/* screen start address bit 19 */
-
-	vga_wcrt (regbase, VGA_CRTC_CURSOR_HI, 0);	/* text cursor location high */
-	vga_wcrt (regbase, VGA_CRTC_CURSOR_LO, 0);	/* text cursor location low */
-	vga_wcrt (regbase, VGA_CRTC_UNDERLINE, 0);	/* underline row scanline = at very bottom */
-
-	vga_wattr (regbase, VGA_ATC_MODE, 1);	/* controller mode */
-	vga_wattr (regbase, VGA_ATC_OVERSCAN, 0);		/* overscan (border) color */
-	vga_wattr (regbase, VGA_ATC_PLANE_ENABLE, 15);	/* color plane enable */
-	vga_wattr (regbase, CL_AR33, 0);	/* pixel panning */
-	vga_wattr (regbase, VGA_ATC_COLOR_PAGE, 0);	/* color select */
+		/* screen start address bit 19 */
+		vga_wcrt(regbase, CL_CRT1D, 0x00);
+
+	/* text cursor location high */
+	vga_wcrt(regbase, VGA_CRTC_CURSOR_HI, 0);
+	/* text cursor location low */
+	vga_wcrt(regbase, VGA_CRTC_CURSOR_LO, 0);
+	/* underline row scanline = at very bottom */
+	vga_wcrt(regbase, VGA_CRTC_UNDERLINE, 0);
+
+	/* controller mode */
+	vga_wattr(regbase, VGA_ATC_MODE, 1);
+	/* overscan (border) color */
+	vga_wattr(regbase, VGA_ATC_OVERSCAN, 0);
+	/* color plane enable */
+	vga_wattr(regbase, VGA_ATC_PLANE_ENABLE, 15);
+	/* pixel panning */
+	vga_wattr(regbase, CL_AR33, 0);
+	/* color select */
+	vga_wattr(regbase, VGA_ATC_COLOR_PAGE, 0);
 
 	/* [ EGS: SetOffset(); ] */
 	/* From SetOffset(): Turn on VideoEnable bit in Attribute controller */
-	AttrOn (cinfo);
-
-	vga_wgfx (regbase, VGA_GFX_SR_VALUE, 0);	/* set/reset register */
-	vga_wgfx (regbase, VGA_GFX_SR_ENABLE, 0);		/* set/reset enable */
-	vga_wgfx (regbase, VGA_GFX_COMPARE_VALUE, 0);	/* color compare */
-	vga_wgfx (regbase, VGA_GFX_DATA_ROTATE, 0);	/* data rotate */
-	vga_wgfx (regbase, VGA_GFX_PLANE_READ, 0);	/* read map select */
-	vga_wgfx (regbase, VGA_GFX_MISC, 1);	/* miscellaneous register */
-	vga_wgfx (regbase, VGA_GFX_COMPARE_MASK, 15);	/* color don't care */
-	vga_wgfx (regbase, VGA_GFX_BIT_MASK, 255);	/* bit mask */
-
-	vga_wseq (regbase, CL_SEQR12, 0x0);	/* graphics cursor attributes: nothing special */
+	AttrOn(cinfo);
+
+	/* set/reset register */
+	vga_wgfx(regbase, VGA_GFX_SR_VALUE, 0);
+	/* set/reset enable */
+	vga_wgfx(regbase, VGA_GFX_SR_ENABLE, 0);
+	/* color compare */
+	vga_wgfx(regbase, VGA_GFX_COMPARE_VALUE, 0);
+	/* data rotate */
+	vga_wgfx(regbase, VGA_GFX_DATA_ROTATE, 0);
+	/* read map select */
+	vga_wgfx(regbase, VGA_GFX_PLANE_READ, 0);
+	/* miscellaneous register */
+	vga_wgfx(regbase, VGA_GFX_MISC, 1);
+	/* color don't care */
+	vga_wgfx(regbase, VGA_GFX_COMPARE_MASK, 15);
+	/* bit mask */
+	vga_wgfx(regbase, VGA_GFX_BIT_MASK, 255);
+
+	/* graphics cursor attributes: nothing special */
+	vga_wseq(regbase, CL_SEQR12, 0x0);
 
 	/* finally, turn on everything - turn off "FullBandwidth" bit */
 	/* also, set "DotClock%2" bit where requested */
@@ -1542,36 +1527,33 @@ static int cirrusfb_set_par_foo (struct fb_info *info)
 	tmp |= 0x08;
 */
 
-	vga_wseq (regbase, VGA_SEQ_CLOCK_MODE, tmp);
-	DPRINTK ("CL_SEQR1: %d\n", tmp);
+	vga_wseq(regbase, VGA_SEQ_CLOCK_MODE, tmp);
+	DPRINTK("CL_SEQR1: %d\n", tmp);
 
 	cinfo->currentmode = regs;
-	info->fix.type = regs.type;
-	info->fix.visual = regs.visual;
-	info->fix.line_length = regs.line_length;
 
 	/* pan to requested offset */
-	cirrusfb_pan_display (var, info);
+	cirrusfb_pan_display(var, info);
 
 #ifdef CIRRUSFB_DEBUG
-	cirrusfb_dump ();
+	cirrusfb_dump();
 #endif
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 	return 0;
 }
 
 /* for some reason incomprehensible to me, cirrusfb requires that you write
  * the registers twice for the settings to take..grr. -dte */
-static int cirrusfb_set_par (struct fb_info *info)
+static int cirrusfb_set_par(struct fb_info *info)
 {
-	cirrusfb_set_par_foo (info);
-	return cirrusfb_set_par_foo (info);
+	cirrusfb_set_par_foo(info);
+	return cirrusfb_set_par_foo(info);
 }
 
-static int cirrusfb_setcolreg (unsigned regno, unsigned red, unsigned green,
-			       unsigned blue, unsigned transp,
-			       struct fb_info *info)
+static int cirrusfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			      unsigned blue, unsigned transp,
+			      struct fb_info *info)
 {
 	struct cirrusfb_info *cinfo = info->par;
 
@@ -1584,34 +1566,18 @@ static int cirrusfb_setcolreg (unsigned regno, unsigned red, unsigned green,
 		green >>= (16 - info->var.green.length);
 		blue >>= (16 - info->var.blue.length);
 
-		if (regno>=16)
+		if (regno >= 16)
 			return 1;
 		v = (red << info->var.red.offset) |
 		    (green << info->var.green.offset) |
 		    (blue << info->var.blue.offset);
 
-		switch (info->var.bits_per_pixel) {
-			case 8:
-				cinfo->pseudo_palette[regno] = v;
-				break;
-			case 16:
-				cinfo->pseudo_palette[regno] = v;
-				break;
-			case 24:
-			case 32:
-				cinfo->pseudo_palette[regno] = v;
-				break;
-		}
+		cinfo->pseudo_palette[regno] = v;
 		return 0;
 	}
 
-	cinfo->palette[regno].red = red;
-	cinfo->palette[regno].green = green;
-	cinfo->palette[regno].blue = blue;
-
-	if (info->var.bits_per_pixel == 8) {
-			WClut (cinfo, regno, red >> 10, green >> 10, blue >> 10);
-	}
+	if (info->var.bits_per_pixel == 8)
+		WClut(cinfo, regno, red >> 10, green >> 10, blue >> 10);
 
 	return 0;
 
@@ -1622,8 +1588,8 @@ static int cirrusfb_setcolreg (unsigned regno, unsigned red, unsigned green,
 
 	performs display panning - provided hardware permits this
 **************************************************************************/
-static int cirrusfb_pan_display (struct fb_var_screeninfo *var,
-				 struct fb_info *info)
+static int cirrusfb_pan_display(struct fb_var_screeninfo *var,
+				struct fb_info *info)
 {
 	int xoffset = 0;
 	int yoffset = 0;
@@ -1631,8 +1597,8 @@ static int cirrusfb_pan_display (struct fb_var_screeninfo *var,
 	unsigned char tmp = 0, tmp2 = 0, xpix;
 	struct cirrusfb_info *cinfo = info->par;
 
-	DPRINTK ("ENTER\n");
-	DPRINTK ("virtual offset: (%d,%d)\n", var->xoffset, var->yoffset);
+	DPRINTK("ENTER\n");
+	DPRINTK("virtual offset: (%d,%d)\n", var->xoffset, var->yoffset);
 
 	/* no range checks for xoffset and yoffset,   */
 	/* as fb_pan_display has already done this */
@@ -1645,7 +1611,7 @@ static int cirrusfb_pan_display (struct fb_var_screeninfo *var,
 	xoffset = var->xoffset * info->var.bits_per_pixel / 8;
 	yoffset = var->yoffset;
 
-	base = yoffset * cinfo->currentmode.line_length + xoffset;
+	base = yoffset * info->fix.line_length + xoffset;
 
 	if (info->var.bits_per_pixel == 1) {
 		/* base is already correct */
@@ -1655,11 +1621,13 @@ static int cirrusfb_pan_display (struct fb_var_screeninfo *var,
 		xpix = (unsigned char) ((xoffset % 4) * 2);
 	}
 
-        cirrusfb_WaitBLT(cinfo->regbase); /* make sure all the BLT's are done */
+	cirrusfb_WaitBLT(cinfo->regbase); /* make sure all the BLT's are done */
 
 	/* lower 8 + 8 bits of screen start address */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_START_LO, (unsigned char) (base & 0xff));
-	vga_wcrt (cinfo->regbase, VGA_CRTC_START_HI, (unsigned char) (base >> 8));
+	vga_wcrt(cinfo->regbase, VGA_CRTC_START_LO,
+		 (unsigned char) (base & 0xff));
+	vga_wcrt(cinfo->regbase, VGA_CRTC_START_HI,
+		 (unsigned char) (base >> 8));
 
 	/* construct bits 16, 17 and 18 of screen start address */
 	if (base & 0x10000)
@@ -1669,50 +1637,49 @@ static int cirrusfb_pan_display (struct fb_var_screeninfo *var,
 	if (base & 0x40000)
 		tmp |= 0x08;
 
-	tmp2 = (vga_rcrt (cinfo->regbase, CL_CRT1B) & 0xf2) | tmp;	/* 0xf2 is %11110010, exclude tmp bits */
-	vga_wcrt (cinfo->regbase, CL_CRT1B, tmp2);
+	/* 0xf2 is %11110010, exclude tmp bits */
+	tmp2 = (vga_rcrt(cinfo->regbase, CL_CRT1B) & 0xf2) | tmp;
+	vga_wcrt(cinfo->regbase, CL_CRT1B, tmp2);
 
 	/* construct bit 19 of screen start address */
-	if (cirrusfb_board_info[cinfo->btype].scrn_start_bit19) {
-		tmp2 = 0;
-		if (base & 0x80000)
-			tmp2 = 0x80;
-		vga_wcrt (cinfo->regbase, CL_CRT1D, tmp2);
-	}
+	if (cirrusfb_board_info[cinfo->btype].scrn_start_bit19)
+		vga_wcrt(cinfo->regbase, CL_CRT1D, (base >> 12) & 0x80);
 
-	/* write pixel panning value to AR33; this does not quite work in 8bpp */
-	/* ### Piccolo..? Will this work? */
+	/* write pixel panning value to AR33; this does not quite work in 8bpp
+	 *
+	 * ### Piccolo..? Will this work?
+	 */
 	if (info->var.bits_per_pixel == 1)
-		vga_wattr (cinfo->regbase, CL_AR33, xpix);
+		vga_wattr(cinfo->regbase, CL_AR33, xpix);
 
-	cirrusfb_WaitBLT (cinfo->regbase);
+	cirrusfb_WaitBLT(cinfo->regbase);
 
-	DPRINTK ("EXIT\n");
-	return (0);
+	DPRINTK("EXIT\n");
+	return 0;
 }
 
-
-static int cirrusfb_blank (int blank_mode, struct fb_info *info)
+static int cirrusfb_blank(int blank_mode, struct fb_info *info)
 {
 	/*
-	 *  Blank the screen if blank_mode != 0, else unblank. If blank == NULL
-	 *  then the caller blanks by setting the CLUT (Color Look Up Table) to all
-	 *  black. Return 0 if blanking succeeded, != 0 if un-/blanking failed due
-	 *  to e.g. a video mode which doesn't support it. Implements VESA suspend
-	 *  and powerdown modes on hardware that supports disabling hsync/vsync:
-	 *    blank_mode == 2: suspend vsync
-	 *    blank_mode == 3: suspend hsync
-	 *    blank_mode == 4: powerdown
+	 * Blank the screen if blank_mode != 0, else unblank. If blank == NULL
+	 * then the caller blanks by setting the CLUT (Color Look Up Table)
+	 * to all black. Return 0 if blanking succeeded, != 0 if un-/blanking
+	 * failed due to e.g. a video mode which doesn't support it.
+	 * Implements VESA suspend and powerdown modes on hardware that
+	 * supports disabling hsync/vsync:
+	 *   blank_mode == 2: suspend vsync
+	 *   blank_mode == 3: suspend hsync
+	 *   blank_mode == 4: powerdown
 	 */
 	unsigned char val;
 	struct cirrusfb_info *cinfo = info->par;
 	int current_mode = cinfo->blank_mode;
 
-	DPRINTK ("ENTER, blank mode = %d\n", blank_mode);
+	DPRINTK("ENTER, blank mode = %d\n", blank_mode);
 
 	if (info->state != FBINFO_STATE_RUNNING ||
 	    current_mode == blank_mode) {
-		DPRINTK ("EXIT, returning 0\n");
+		DPRINTK("EXIT, returning 0\n");
 		return 0;
 	}
 
@@ -1720,17 +1687,19 @@ static int cirrusfb_blank (int blank_mode, struct fb_info *info)
 	if (current_mode == FB_BLANK_NORMAL ||
 	    current_mode == FB_BLANK_UNBLANK) {
 		/* unblank the screen */
-		val = vga_rseq (cinfo->regbase, VGA_SEQ_CLOCK_MODE);
-		vga_wseq (cinfo->regbase, VGA_SEQ_CLOCK_MODE, val & 0xdf);	/* clear "FullBandwidth" bit */
+		val = vga_rseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE);
+		/* clear "FullBandwidth" bit */
+		vga_wseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE, val & 0xdf);
 		/* and undo VESA suspend trickery */
-		vga_wgfx (cinfo->regbase, CL_GRE, 0x00);
+		vga_wgfx(cinfo->regbase, CL_GRE, 0x00);
 	}
 
 	/* set new */
-	if(blank_mode > FB_BLANK_NORMAL) {
+	if (blank_mode > FB_BLANK_NORMAL) {
 		/* blank the screen */
-		val = vga_rseq (cinfo->regbase, VGA_SEQ_CLOCK_MODE);
-		vga_wseq (cinfo->regbase, VGA_SEQ_CLOCK_MODE, val | 0x20);	/* set "FullBandwidth" bit */
+		val = vga_rseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE);
+		/* set "FullBandwidth" bit */
+		vga_wseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE, val | 0x20);
 	}
 
 	switch (blank_mode) {
@@ -1738,21 +1707,21 @@ static int cirrusfb_blank (int blank_mode, struct fb_info *info)
 	case FB_BLANK_NORMAL:
 		break;
 	case FB_BLANK_VSYNC_SUSPEND:
-		vga_wgfx (cinfo->regbase, CL_GRE, 0x04);
+		vga_wgfx(cinfo->regbase, CL_GRE, 0x04);
 		break;
 	case FB_BLANK_HSYNC_SUSPEND:
-		vga_wgfx (cinfo->regbase, CL_GRE, 0x02);
+		vga_wgfx(cinfo->regbase, CL_GRE, 0x02);
 		break;
 	case FB_BLANK_POWERDOWN:
-		vga_wgfx (cinfo->regbase, CL_GRE, 0x06);
+		vga_wgfx(cinfo->regbase, CL_GRE, 0x06);
 		break;
 	default:
-		DPRINTK ("EXIT, returning 1\n");
+		DPRINTK("EXIT, returning 1\n");
 		return 1;
 	}
 
 	cinfo->blank_mode = blank_mode;
-	DPRINTK ("EXIT, returning 0\n");
+	DPRINTK("EXIT, returning 0\n");
 
 	/* Let fbcon do a soft blank for us */
 	return (blank_mode == FB_BLANK_NORMAL) ? 1 : 0;
@@ -1761,45 +1730,51 @@ static int cirrusfb_blank (int blank_mode, struct fb_info *info)
 /****************************************************************************/
 /**** BEGIN Internal Routines ***********************************************/
 
-static void init_vgachip (struct cirrusfb_info *cinfo)
+static void init_vgachip(struct fb_info *info)
 {
+	struct cirrusfb_info *cinfo = info->par;
 	const struct cirrusfb_board_info_rec *bi;
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
-	assert (cinfo != NULL);
+	assert(cinfo != NULL);
 
 	bi = &cirrusfb_board_info[cinfo->btype];
 
 	/* reset board globally */
 	switch (cinfo->btype) {
 	case BT_PICCOLO:
-		WSFR (cinfo, 0x01);
-		udelay (500);
-		WSFR (cinfo, 0x51);
-		udelay (500);
+		WSFR(cinfo, 0x01);
+		udelay(500);
+		WSFR(cinfo, 0x51);
+		udelay(500);
 		break;
 	case BT_PICASSO:
-		WSFR2 (cinfo, 0xff);
-		udelay (500);
+		WSFR2(cinfo, 0xff);
+		udelay(500);
 		break;
 	case BT_SD64:
 	case BT_SPECTRUM:
-		WSFR (cinfo, 0x1f);
-		udelay (500);
-		WSFR (cinfo, 0x4f);
-		udelay (500);
+		WSFR(cinfo, 0x1f);
+		udelay(500);
+		WSFR(cinfo, 0x4f);
+		udelay(500);
 		break;
 	case BT_PICASSO4:
-		vga_wcrt (cinfo->regbase, CL_CRT51, 0x00);	/* disable flickerfixer */
-		mdelay (100);
-		vga_wgfx (cinfo->regbase, CL_GR2F, 0x00);	/* from Klaus' NetBSD driver: */
-		vga_wgfx (cinfo->regbase, CL_GR33, 0x00);	/* put blitter into 542x compat */
-		vga_wgfx (cinfo->regbase, CL_GR31, 0x00);	/* mode */
+		/* disable flickerfixer */
+		vga_wcrt(cinfo->regbase, CL_CRT51, 0x00);
+		mdelay(100);
+		/* from Klaus' NetBSD driver: */
+		vga_wgfx(cinfo->regbase, CL_GR2F, 0x00);
+		/* put blitter into 542x compat */
+		vga_wgfx(cinfo->regbase, CL_GR33, 0x00);
+		/* mode */
+		vga_wgfx(cinfo->regbase, CL_GR31, 0x00);
 		break;
 
 	case BT_GD5480:
-		vga_wgfx (cinfo->regbase, CL_GR2F, 0x00);	/* from Klaus' NetBSD driver: */
+		/* from Klaus' NetBSD driver: */
+		vga_wgfx(cinfo->regbase, CL_GR2F, 0x00);
 		break;
 
 	case BT_ALPINE:
@@ -1807,153 +1782,208 @@ static void init_vgachip (struct cirrusfb_info *cinfo)
 		break;
 
 	default:
-		printk (KERN_ERR "cirrusfb: Warning: Unknown board type\n");
+		printk(KERN_ERR "cirrusfb: Warning: Unknown board type\n");
 		break;
 	}
 
-	assert (cinfo->size > 0); /* make sure RAM size set by this point */
+	/* make sure RAM size set by this point */
+	assert(info->screen_size > 0);
 
 	/* the P4 is not fully initialized here; I rely on it having been */
 	/* inited under AmigaOS already, which seems to work just fine    */
-	/* (Klaus advised to do it this way)                              */
+	/* (Klaus advised to do it this way)			      */
 
 	if (cinfo->btype != BT_PICASSO4) {
-		WGen (cinfo, CL_VSSM, 0x10);	/* EGS: 0x16 */
-		WGen (cinfo, CL_POS102, 0x01);
-		WGen (cinfo, CL_VSSM, 0x08);	/* EGS: 0x0e */
+		WGen(cinfo, CL_VSSM, 0x10);	/* EGS: 0x16 */
+		WGen(cinfo, CL_POS102, 0x01);
+		WGen(cinfo, CL_VSSM, 0x08);	/* EGS: 0x0e */
 
 		if (cinfo->btype != BT_SD64)
-			WGen (cinfo, CL_VSSM2, 0x01);
+			WGen(cinfo, CL_VSSM2, 0x01);
 
-		vga_wseq (cinfo->regbase, CL_SEQR0, 0x03);	/* reset sequencer logic */
+		/* reset sequencer logic */
+		vga_wseq(cinfo->regbase, CL_SEQR0, 0x03);
 
-		vga_wseq (cinfo->regbase, VGA_SEQ_CLOCK_MODE, 0x21);	/* FullBandwidth (video off) and 8/9 dot clock */
-		WGen (cinfo, VGA_MIS_W, 0xc1);	/* polarity (-/-), disable access to display memory, VGA_CRTC_START_HI base address: color */
+		/* FullBandwidth (video off) and 8/9 dot clock */
+		vga_wseq(cinfo->regbase, VGA_SEQ_CLOCK_MODE, 0x21);
+		/* polarity (-/-), disable access to display memory,
+		 * VGA_CRTC_START_HI base address: color
+		 */
+		WGen(cinfo, VGA_MIS_W, 0xc1);
 
-/*      vga_wgfx (cinfo->regbase, CL_GRA, 0xce);    "magic cookie" - doesn't make any sense to me.. */
-		vga_wseq (cinfo->regbase, CL_SEQR6, 0x12);	/* unlock all extension registers */
+		/* "magic cookie" - doesn't make any sense to me.. */
+/*      vga_wgfx(cinfo->regbase, CL_GRA, 0xce);   */
+		/* unlock all extension registers */
+		vga_wseq(cinfo->regbase, CL_SEQR6, 0x12);
 
-		vga_wgfx (cinfo->regbase, CL_GR31, 0x04);	/* reset blitter */
+		/* reset blitter */
+		vga_wgfx(cinfo->regbase, CL_GR31, 0x04);
 
 		switch (cinfo->btype) {
 		case BT_GD5480:
-			vga_wseq (cinfo->regbase, CL_SEQRF, 0x98);
+			vga_wseq(cinfo->regbase, CL_SEQRF, 0x98);
 			break;
 		case BT_ALPINE:
 			break;
 		case BT_SD64:
-			vga_wseq (cinfo->regbase, CL_SEQRF, 0xb8);
+			vga_wseq(cinfo->regbase, CL_SEQRF, 0xb8);
 			break;
 		default:
-			vga_wseq (cinfo->regbase, CL_SEQR16, 0x0f);
-			vga_wseq (cinfo->regbase, CL_SEQRF, 0xb0);
+			vga_wseq(cinfo->regbase, CL_SEQR16, 0x0f);
+			vga_wseq(cinfo->regbase, CL_SEQRF, 0xb0);
 			break;
 		}
 	}
-	vga_wseq (cinfo->regbase, VGA_SEQ_PLANE_WRITE, 0xff);	/* plane mask: nothing */
-	vga_wseq (cinfo->regbase, VGA_SEQ_CHARACTER_MAP, 0x00);	/* character map select: doesn't even matter in gx mode */
-	vga_wseq (cinfo->regbase, VGA_SEQ_MEMORY_MODE, 0x0e);	/* memory mode: chain-4, no odd/even, ext. memory */
+	/* plane mask: nothing */
+	vga_wseq(cinfo->regbase, VGA_SEQ_PLANE_WRITE, 0xff);
+	/* character map select: doesn't even matter in gx mode */
+	vga_wseq(cinfo->regbase, VGA_SEQ_CHARACTER_MAP, 0x00);
+	/* memory mode: chain-4, no odd/even, ext. memory */
+	vga_wseq(cinfo->regbase, VGA_SEQ_MEMORY_MODE, 0x0e);
 
 	/* controller-internal base address of video memory */
 	if (bi->init_sr07)
-		vga_wseq (cinfo->regbase, CL_SEQR7, bi->sr07);
+		vga_wseq(cinfo->regbase, CL_SEQR7, bi->sr07);
 
-	/*  vga_wseq (cinfo->regbase, CL_SEQR8, 0x00); *//* EEPROM control: shouldn't be necessary to write to this at all.. */
+	/*  vga_wseq(cinfo->regbase, CL_SEQR8, 0x00); */
+	/* EEPROM control: shouldn't be necessary to write to this at all.. */
 
-	vga_wseq (cinfo->regbase, CL_SEQR10, 0x00);		/* graphics cursor X position (incomplete; position gives rem. 3 bits */
-	vga_wseq (cinfo->regbase, CL_SEQR11, 0x00);		/* graphics cursor Y position (..."... ) */
-	vga_wseq (cinfo->regbase, CL_SEQR12, 0x00);		/* graphics cursor attributes */
-	vga_wseq (cinfo->regbase, CL_SEQR13, 0x00);		/* graphics cursor pattern address */
+	/* graphics cursor X position (incomplete; position gives rem. 3 bits */
+	vga_wseq(cinfo->regbase, CL_SEQR10, 0x00);
+	/* graphics cursor Y position (..."... ) */
+	vga_wseq(cinfo->regbase, CL_SEQR11, 0x00);
+	/* graphics cursor attributes */
+	vga_wseq(cinfo->regbase, CL_SEQR12, 0x00);
+	/* graphics cursor pattern address */
+	vga_wseq(cinfo->regbase, CL_SEQR13, 0x00);
 
 	/* writing these on a P4 might give problems..  */
 	if (cinfo->btype != BT_PICASSO4) {
-		vga_wseq (cinfo->regbase, CL_SEQR17, 0x00);		/* configuration readback and ext. color */
-		vga_wseq (cinfo->regbase, CL_SEQR18, 0x02);		/* signature generator */
+		/* configuration readback and ext. color */
+		vga_wseq(cinfo->regbase, CL_SEQR17, 0x00);
+		/* signature generator */
+		vga_wseq(cinfo->regbase, CL_SEQR18, 0x02);
 	}
 
 	/* MCLK select etc. */
 	if (bi->init_sr1f)
-		vga_wseq (cinfo->regbase, CL_SEQR1F, bi->sr1f);
-
-	vga_wcrt (cinfo->regbase, VGA_CRTC_PRESET_ROW, 0x00);	/* Screen A preset row scan: none */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_CURSOR_START, 0x20);	/* Text cursor start: disable text cursor */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_CURSOR_END, 0x00);	/* Text cursor end: - */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_START_HI, 0x00);	/* Screen start address high: 0 */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_START_LO, 0x00);	/* Screen start address low: 0 */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_CURSOR_HI, 0x00);	/* text cursor location high: 0 */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_CURSOR_LO, 0x00);	/* text cursor location low: 0 */
-
-	vga_wcrt (cinfo->regbase, VGA_CRTC_UNDERLINE, 0x00);	/* Underline Row scanline: - */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_MODE, 0xc3);	/* mode control: timing enable, byte mode, no compat modes */
-	vga_wcrt (cinfo->regbase, VGA_CRTC_LINE_COMPARE, 0x00);	/* Line Compare: not needed */
+		vga_wseq(cinfo->regbase, CL_SEQR1F, bi->sr1f);
+
+	/* Screen A preset row scan: none */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_PRESET_ROW, 0x00);
+	/* Text cursor start: disable text cursor */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_CURSOR_START, 0x20);
+	/* Text cursor end: - */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_CURSOR_END, 0x00);
+	/* Screen start address high: 0 */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_START_HI, 0x00);
+	/* Screen start address low: 0 */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_START_LO, 0x00);
+	/* text cursor location high: 0 */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_CURSOR_HI, 0x00);
+	/* text cursor location low: 0 */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_CURSOR_LO, 0x00);
+
+	/* Underline Row scanline: - */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_UNDERLINE, 0x00);
+	/* mode control: timing enable, byte mode, no compat modes */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_MODE, 0xc3);
+	/* Line Compare: not needed */
+	vga_wcrt(cinfo->regbase, VGA_CRTC_LINE_COMPARE, 0x00);
 	/* ### add 0x40 for text modes with > 30 MHz pixclock */
-	vga_wcrt (cinfo->regbase, CL_CRT1B, 0x02);	/* ext. display controls: ext.adr. wrap */
-
-	vga_wgfx (cinfo->regbase, VGA_GFX_SR_VALUE, 0x00);	/* Set/Reset registes: - */
-	vga_wgfx (cinfo->regbase, VGA_GFX_SR_ENABLE, 0x00);	/* Set/Reset enable: - */
-	vga_wgfx (cinfo->regbase, VGA_GFX_COMPARE_VALUE, 0x00);	/* Color Compare: - */
-	vga_wgfx (cinfo->regbase, VGA_GFX_DATA_ROTATE, 0x00);	/* Data Rotate: - */
-	vga_wgfx (cinfo->regbase, VGA_GFX_PLANE_READ, 0x00);	/* Read Map Select: - */
-	vga_wgfx (cinfo->regbase, VGA_GFX_MODE, 0x00);	/* Mode: conf. for 16/4/2 color mode, no odd/even, read/write mode 0 */
-	vga_wgfx (cinfo->regbase, VGA_GFX_MISC, 0x01);	/* Miscellaneous: memory map base address, graphics mode */
-	vga_wgfx (cinfo->regbase, VGA_GFX_COMPARE_MASK, 0x0f);	/* Color Don't care: involve all planes */
-	vga_wgfx (cinfo->regbase, VGA_GFX_BIT_MASK, 0xff);	/* Bit Mask: no mask at all */
+	/* ext. display controls: ext.adr. wrap */
+	vga_wcrt(cinfo->regbase, CL_CRT1B, 0x02);
+
+	/* Set/Reset registes: - */
+	vga_wgfx(cinfo->regbase, VGA_GFX_SR_VALUE, 0x00);
+	/* Set/Reset enable: - */
+	vga_wgfx(cinfo->regbase, VGA_GFX_SR_ENABLE, 0x00);
+	/* Color Compare: - */
+	vga_wgfx(cinfo->regbase, VGA_GFX_COMPARE_VALUE, 0x00);
+	/* Data Rotate: - */
+	vga_wgfx(cinfo->regbase, VGA_GFX_DATA_ROTATE, 0x00);
+	/* Read Map Select: - */
+	vga_wgfx(cinfo->regbase, VGA_GFX_PLANE_READ, 0x00);
+	/* Mode: conf. for 16/4/2 color mode, no odd/even, read/write mode 0 */
+	vga_wgfx(cinfo->regbase, VGA_GFX_MODE, 0x00);
+	/* Miscellaneous: memory map base address, graphics mode */
+	vga_wgfx(cinfo->regbase, VGA_GFX_MISC, 0x01);
+	/* Color Don't care: involve all planes */
+	vga_wgfx(cinfo->regbase, VGA_GFX_COMPARE_MASK, 0x0f);
+	/* Bit Mask: no mask at all */
+	vga_wgfx(cinfo->regbase, VGA_GFX_BIT_MASK, 0xff);
 	if (cinfo->btype == BT_ALPINE)
-		vga_wgfx (cinfo->regbase, CL_GRB, 0x20);	/* (5434 can't have bit 3 set for bitblt) */
+		/* (5434 can't have bit 3 set for bitblt) */
+		vga_wgfx(cinfo->regbase, CL_GRB, 0x20);
 	else
-		vga_wgfx (cinfo->regbase, CL_GRB, 0x28);	/* Graphics controller mode extensions: finer granularity, 8byte data latches */
-
-	vga_wgfx (cinfo->regbase, CL_GRC, 0xff);	/* Color Key compare: - */
-	vga_wgfx (cinfo->regbase, CL_GRD, 0x00);	/* Color Key compare mask: - */
-	vga_wgfx (cinfo->regbase, CL_GRE, 0x00);	/* Miscellaneous control: - */
-	/*  vga_wgfx (cinfo->regbase, CL_GR10, 0x00); *//* Background color byte 1: - */
-/*  vga_wgfx (cinfo->regbase, CL_GR11, 0x00); */
-
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE0, 0x00);	/* Attribute Controller palette registers: "identity mapping" */
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE1, 0x01);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE2, 0x02);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE3, 0x03);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE4, 0x04);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE5, 0x05);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE6, 0x06);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE7, 0x07);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE8, 0x08);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTE9, 0x09);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTEA, 0x0a);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTEB, 0x0b);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTEC, 0x0c);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTED, 0x0d);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTEE, 0x0e);
-	vga_wattr (cinfo->regbase, VGA_ATC_PALETTEF, 0x0f);
-
-	vga_wattr (cinfo->regbase, VGA_ATC_MODE, 0x01);	/* Attribute Controller mode: graphics mode */
-	vga_wattr (cinfo->regbase, VGA_ATC_OVERSCAN, 0x00);	/* Overscan color reg.: reg. 0 */
-	vga_wattr (cinfo->regbase, VGA_ATC_PLANE_ENABLE, 0x0f);	/* Color Plane enable: Enable all 4 planes */
-/* ###  vga_wattr (cinfo->regbase, CL_AR33, 0x00); * Pixel Panning: - */
-	vga_wattr (cinfo->regbase, VGA_ATC_COLOR_PAGE, 0x00);	/* Color Select: - */
-
-	WGen (cinfo, VGA_PEL_MSK, 0xff);	/* Pixel mask: no mask */
+	/* Graphics controller mode extensions: finer granularity,
+	 * 8byte data latches
+	 */
+		vga_wgfx(cinfo->regbase, CL_GRB, 0x28);
+
+	vga_wgfx(cinfo->regbase, CL_GRC, 0xff);	/* Color Key compare: - */
+	vga_wgfx(cinfo->regbase, CL_GRD, 0x00);	/* Color Key compare mask: - */
+	vga_wgfx(cinfo->regbase, CL_GRE, 0x00);	/* Miscellaneous control: - */
+	/* Background color byte 1: - */
+	/*  vga_wgfx (cinfo->regbase, CL_GR10, 0x00); */
+	/*  vga_wgfx (cinfo->regbase, CL_GR11, 0x00); */
+
+	/* Attribute Controller palette registers: "identity mapping" */
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE0, 0x00);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE1, 0x01);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE2, 0x02);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE3, 0x03);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE4, 0x04);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE5, 0x05);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE6, 0x06);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE7, 0x07);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE8, 0x08);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTE9, 0x09);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTEA, 0x0a);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTEB, 0x0b);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTEC, 0x0c);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTED, 0x0d);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTEE, 0x0e);
+	vga_wattr(cinfo->regbase, VGA_ATC_PALETTEF, 0x0f);
+
+	/* Attribute Controller mode: graphics mode */
+	vga_wattr(cinfo->regbase, VGA_ATC_MODE, 0x01);
+	/* Overscan color reg.: reg. 0 */
+	vga_wattr(cinfo->regbase, VGA_ATC_OVERSCAN, 0x00);
+	/* Color Plane enable: Enable all 4 planes */
+	vga_wattr(cinfo->regbase, VGA_ATC_PLANE_ENABLE, 0x0f);
+/* ###  vga_wattr(cinfo->regbase, CL_AR33, 0x00); * Pixel Panning: - */
+	/* Color Select: - */
+	vga_wattr(cinfo->regbase, VGA_ATC_COLOR_PAGE, 0x00);
+
+	WGen(cinfo, VGA_PEL_MSK, 0xff);	/* Pixel mask: no mask */
 
 	if (cinfo->btype != BT_ALPINE && cinfo->btype != BT_GD5480)
-		WGen (cinfo, VGA_MIS_W, 0xc3);	/* polarity (-/-), enable display mem, VGA_CRTC_START_HI i/o base = color */
+	/* polarity (-/-), enable display mem,
+	 * VGA_CRTC_START_HI i/o base = color
+	 */
+		WGen(cinfo, VGA_MIS_W, 0xc3);
 
-	vga_wgfx (cinfo->regbase, CL_GR31, 0x04);	/* BLT Start/status: Blitter reset */
-	vga_wgfx (cinfo->regbase, CL_GR31, 0x00);	/* - " -           : "end-of-reset" */
+	/* BLT Start/status: Blitter reset */
+	vga_wgfx(cinfo->regbase, CL_GR31, 0x04);
+	/* - " -	   : "end-of-reset" */
+	vga_wgfx(cinfo->regbase, CL_GR31, 0x00);
 
 	/* misc... */
-	WHDR (cinfo, 0);	/* Hidden DAC register: - */
+	WHDR(cinfo, 0);	/* Hidden DAC register: - */
 
-	printk (KERN_DEBUG "cirrusfb: This board has %ld bytes of DRAM memory\n", cinfo->size);
-	DPRINTK ("EXIT\n");
+	printk(KERN_DEBUG "cirrusfb: This board has %ld bytes of DRAM memory\n",
+		info->screen_size);
+	DPRINTK("EXIT\n");
 	return;
 }
 
-static void switch_monitor (struct cirrusfb_info *cinfo, int on)
+static void switch_monitor(struct cirrusfb_info *cinfo, int on)
 {
 #ifdef CONFIG_ZORRO /* only works on Zorro boards */
 	static int IsOn = 0;	/* XXX not ok for multiple boards */
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
 	if (cinfo->btype == BT_PICASSO4)
 		return;		/* nothing to switch */
@@ -1963,77 +1993,56 @@ static void switch_monitor (struct cirrusfb_info *cinfo, int on)
 		return;		/* nothing to switch */
 	if (cinfo->btype == BT_PICASSO) {
 		if ((on && !IsOn) || (!on && IsOn))
-			WSFR (cinfo, 0xff);
+			WSFR(cinfo, 0xff);
 
-		DPRINTK ("EXIT\n");
+		DPRINTK("EXIT\n");
 		return;
 	}
 	if (on) {
 		switch (cinfo->btype) {
 		case BT_SD64:
-			WSFR (cinfo, cinfo->SFR | 0x21);
+			WSFR(cinfo, cinfo->SFR | 0x21);
 			break;
 		case BT_PICCOLO:
-			WSFR (cinfo, cinfo->SFR | 0x28);
+			WSFR(cinfo, cinfo->SFR | 0x28);
 			break;
 		case BT_SPECTRUM:
-			WSFR (cinfo, 0x6f);
+			WSFR(cinfo, 0x6f);
 			break;
 		default: /* do nothing */ break;
 		}
 	} else {
 		switch (cinfo->btype) {
 		case BT_SD64:
-			WSFR (cinfo, cinfo->SFR & 0xde);
+			WSFR(cinfo, cinfo->SFR & 0xde);
 			break;
 		case BT_PICCOLO:
-			WSFR (cinfo, cinfo->SFR & 0xd7);
+			WSFR(cinfo, cinfo->SFR & 0xd7);
 			break;
 		case BT_SPECTRUM:
-			WSFR (cinfo, 0x4f);
+			WSFR(cinfo, 0x4f);
 			break;
 		default: /* do nothing */ break;
 		}
 	}
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 #endif /* CONFIG_ZORRO */
 }
 
-
 /******************************************/
 /* Linux 2.6-style  accelerated functions */
 /******************************************/
 
-static void cirrusfb_prim_fillrect(struct cirrusfb_info *cinfo,
-				   const struct fb_fillrect *region)
-{
-	int m; /* bytes per pixel */
-	u32 color = (cinfo->info->fix.visual == FB_VISUAL_TRUECOLOR) ?
-		cinfo->pseudo_palette[region->color] : region->color;
-
-	if(cinfo->info->var.bits_per_pixel == 1) {
-		cirrusfb_RectFill(cinfo->regbase, cinfo->info->var.bits_per_pixel,
-				  region->dx / 8, region->dy,
-				  region->width / 8, region->height,
-				  color,
-				  cinfo->currentmode.line_length);
-	} else {
-		m = ( cinfo->info->var.bits_per_pixel + 7 ) / 8;
-		cirrusfb_RectFill(cinfo->regbase, cinfo->info->var.bits_per_pixel,
-				  region->dx * m, region->dy,
-				  region->width * m, region->height,
-				  color,
-				  cinfo->currentmode.line_length);
-	}
-	return;
-}
-
-static void cirrusfb_fillrect (struct fb_info *info, const struct fb_fillrect *region)
+static void cirrusfb_fillrect(struct fb_info *info,
+			      const struct fb_fillrect *region)
 {
-	struct cirrusfb_info *cinfo = info->par;
 	struct fb_fillrect modded;
 	int vxres, vyres;
+	struct cirrusfb_info *cinfo = info->par;
+	int m = info->var.bits_per_pixel;
+	u32 color = (info->fix.visual == FB_VISUAL_TRUECOLOR) ?
+		cinfo->pseudo_palette[region->color] : region->color;
 
 	if (info->state != FBINFO_STATE_RUNNING)
 		return;
@@ -2047,49 +2056,30 @@ static void cirrusfb_fillrect (struct fb_info *info, const struct fb_fillrect *r
 
 	memcpy(&modded, region, sizeof(struct fb_fillrect));
 
-	if(!modded.width || !modded.height ||
+	if (!modded.width || !modded.height ||
 	   modded.dx >= vxres || modded.dy >= vyres)
 		return;
 
-	if(modded.dx + modded.width  > vxres) modded.width  = vxres - modded.dx;
-	if(modded.dy + modded.height > vyres) modded.height = vyres - modded.dy;
-
-	cirrusfb_prim_fillrect(cinfo, &modded);
-}
-
-static void cirrusfb_prim_copyarea(struct cirrusfb_info *cinfo,
-				   const struct fb_copyarea *area)
-{
-	int m; /* bytes per pixel */
-	if(cinfo->info->var.bits_per_pixel == 1) {
-		cirrusfb_BitBLT(cinfo->regbase, cinfo->info->var.bits_per_pixel,
-				area->sx / 8, area->sy,
-				area->dx / 8, area->dy,
-				area->width / 8, area->height,
-				cinfo->currentmode.line_length);
-	} else {
-		m = ( cinfo->info->var.bits_per_pixel + 7 ) / 8;
-		cirrusfb_BitBLT(cinfo->regbase, cinfo->info->var.bits_per_pixel,
-				area->sx * m, area->sy,
-				area->dx * m, area->dy,
-				area->width * m, area->height,
-				cinfo->currentmode.line_length);
-	}
-	return;
+	if (modded.dx + modded.width  > vxres)
+		modded.width  = vxres - modded.dx;
+	if (modded.dy + modded.height > vyres)
+		modded.height = vyres - modded.dy;
+
+	cirrusfb_RectFill(cinfo->regbase,
+			  info->var.bits_per_pixel,
+			  (region->dx * m) / 8, region->dy,
+			  (region->width * m) / 8, region->height,
+			  color,
+			  info->fix.line_length);
 }
 
-
-static void cirrusfb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+static void cirrusfb_copyarea(struct fb_info *info,
+			      const struct fb_copyarea *area)
 {
-	struct cirrusfb_info *cinfo = info->par;
 	struct fb_copyarea modded;
 	u32 vxres, vyres;
-	modded.sx = area->sx;
-	modded.sy = area->sy;
-	modded.dx = area->dx;
-	modded.dy = area->dy;
-	modded.width  = area->width;
-	modded.height = area->height;
+	struct cirrusfb_info *cinfo = info->par;
+	int m = info->var.bits_per_pixel;
 
 	if (info->state != FBINFO_STATE_RUNNING)
 		return;
@@ -2100,90 +2090,106 @@ static void cirrusfb_copyarea(struct fb_info *info, const struct fb_copyarea *ar
 
 	vxres = info->var.xres_virtual;
 	vyres = info->var.yres_virtual;
+	memcpy(&modded, area, sizeof(struct fb_copyarea));
 
-	if(!modded.width || !modded.height ||
+	if (!modded.width || !modded.height ||
 	   modded.sx >= vxres || modded.sy >= vyres ||
 	   modded.dx >= vxres || modded.dy >= vyres)
 		return;
 
-	if(modded.sx + modded.width > vxres)  modded.width = vxres - modded.sx;
-	if(modded.dx + modded.width > vxres)  modded.width = vxres - modded.dx;
-	if(modded.sy + modded.height > vyres) modded.height = vyres - modded.sy;
-	if(modded.dy + modded.height > vyres) modded.height = vyres - modded.dy;
+	if (modded.sx + modded.width > vxres)
+		modded.width = vxres - modded.sx;
+	if (modded.dx + modded.width > vxres)
+		modded.width = vxres - modded.dx;
+	if (modded.sy + modded.height > vyres)
+		modded.height = vyres - modded.sy;
+	if (modded.dy + modded.height > vyres)
+		modded.height = vyres - modded.dy;
+
+	cirrusfb_BitBLT(cinfo->regbase, info->var.bits_per_pixel,
+			(area->sx * m) / 8, area->sy,
+			(area->dx * m) / 8, area->dy,
+			(area->width * m) / 8, area->height,
+			info->fix.line_length);
 
-	cirrusfb_prim_copyarea(cinfo, &modded);
 }
 
-static void cirrusfb_imageblit(struct fb_info *info, const struct fb_image *image)
+static void cirrusfb_imageblit(struct fb_info *info,
+			       const struct fb_image *image)
 {
 	struct cirrusfb_info *cinfo = info->par;
 
-        cirrusfb_WaitBLT(cinfo->regbase);
+	cirrusfb_WaitBLT(cinfo->regbase);
 	cfb_imageblit(info, image);
 }
 
-
 #ifdef CONFIG_PPC_PREP
 #define PREP_VIDEO_BASE ((volatile unsigned long) 0xC0000000)
 #define PREP_IO_BASE    ((volatile unsigned char *) 0x80000000)
-static void get_prep_addrs (unsigned long *display, unsigned long *registers)
+static void get_prep_addrs(unsigned long *display, unsigned long *registers)
 {
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
 	*display = PREP_VIDEO_BASE;
 	*registers = (unsigned long) PREP_IO_BASE;
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
 #endif				/* CONFIG_PPC_PREP */
 
-
 #ifdef CONFIG_PCI
-static int release_io_ports = 0;
+static int release_io_ports;
 
 /* Pulled the logic from XFree86 Cirrus driver to get the memory size,
  * based on the DRAM bandwidth bit and DRAM bank switching bit.  This
  * works with 1MB, 2MB and 4MB configurations (which the Motorola boards
  * seem to have. */
-static unsigned int cirrusfb_get_memsize (u8 __iomem *regbase)
+static unsigned int cirrusfb_get_memsize(u8 __iomem *regbase)
 {
 	unsigned long mem;
 	unsigned char SRF;
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
-	SRF = vga_rseq (regbase, CL_SEQRF);
+	SRF = vga_rseq(regbase, CL_SEQRF);
 	switch ((SRF & 0x18)) {
-	    case 0x08: mem = 512 * 1024; break;
-	    case 0x10: mem = 1024 * 1024; break;
-		/* 64-bit DRAM data bus width; assume 2MB. Also indicates 2MB memory
-		   * on the 5430. */
-	    case 0x18: mem = 2048 * 1024; break;
-	    default: printk ("CLgenfb: Unknown memory size!\n");
+	case 0x08:
+		mem = 512 * 1024;
+		break;
+	case 0x10:
+		mem = 1024 * 1024;
+		break;
+	/* 64-bit DRAM data bus width; assume 2MB. Also indicates 2MB memory
+	 * on the 5430.
+	 */
+	case 0x18:
+		mem = 2048 * 1024;
+		break;
+	default:
+		printk(KERN_WARNING "CLgenfb: Unknown memory size!\n");
 		mem = 1024 * 1024;
 	}
-	if (SRF & 0x80) {
-		/* If DRAM bank switching is enabled, there must be twice as much
-		   * memory installed. (4MB on the 5434) */
+	if (SRF & 0x80)
+	/* If DRAM bank switching is enabled, there must be twice as much
+	 * memory installed. (4MB on the 5434)
+	 */
 		mem *= 2;
-	}
+
 	/* TODO: Handling of GD5446/5480 (see XF86 sources ...) */
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 	return mem;
 }
 
-
-
-static void get_pci_addrs (const struct pci_dev *pdev,
-			   unsigned long *display, unsigned long *registers)
+static void get_pci_addrs(const struct pci_dev *pdev,
+			  unsigned long *display, unsigned long *registers)
 {
-	assert (pdev != NULL);
-	assert (display != NULL);
-	assert (registers != NULL);
+	assert(pdev != NULL);
+	assert(display != NULL);
+	assert(registers != NULL);
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
 	*display = 0;
 	*registers = 0;
@@ -2198,51 +2204,47 @@ static void get_pci_addrs (const struct pci_dev *pdev,
 		*registers = pci_resource_start(pdev, 1);
 	}
 
-	assert (*display != 0);
+	assert(*display != 0);
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
-
-static void cirrusfb_pci_unmap (struct cirrusfb_info *cinfo)
+static void cirrusfb_pci_unmap(struct fb_info *info)
 {
+	struct cirrusfb_info *cinfo = info->par;
 	struct pci_dev *pdev = cinfo->pdev;
 
-	iounmap(cinfo->fbmem);
+	iounmap(info->screen_base);
 #if 0 /* if system didn't claim this region, we would... */
 	release_mem_region(0xA0000, 65535);
 #endif
 	if (release_io_ports)
 		release_region(0x3C0, 32);
 	pci_release_regions(pdev);
-	framebuffer_release(cinfo->info);
 }
 #endif /* CONFIG_PCI */
 
-
 #ifdef CONFIG_ZORRO
-static void __devexit cirrusfb_zorro_unmap (struct cirrusfb_info *cinfo)
+static void __devexit cirrusfb_zorro_unmap(struct cirrusfb_info *cinfo)
 {
 	zorro_release_device(cinfo->zdev);
 
 	if (cinfo->btype == BT_PICASSO4) {
 		cinfo->regbase -= 0x600000;
-		iounmap ((void *)cinfo->regbase);
-		iounmap ((void *)cinfo->fbmem);
+		iounmap((void *)cinfo->regbase);
+		iounmap(info->screen_base);
 	} else {
 		if (zorro_resource_start(cinfo->zdev) > 0x01000000)
-			iounmap ((void *)cinfo->fbmem);
+			iounmap(info->screen_base);
 	}
-	framebuffer_release(cinfo->info);
 }
 #endif /* CONFIG_ZORRO */
 
-static int cirrusfb_set_fbinfo(struct cirrusfb_info *cinfo)
+static int cirrusfb_set_fbinfo(struct fb_info *info)
 {
-	struct fb_info *info = cinfo->info;
+	struct cirrusfb_info *cinfo = info->par;
 	struct fb_var_screeninfo *var = &info->var;
 
-	info->par = cinfo;
 	info->pseudo_palette = cinfo->pseudo_palette;
 	info->flags = FBINFO_DEFAULT
 		    | FBINFO_HWACCEL_XPAN
@@ -2252,7 +2254,6 @@ static int cirrusfb_set_fbinfo(struct cirrusfb_info *cinfo)
 	if (noaccel)
 		info->flags |= FBINFO_HWACCEL_DISABLED;
 	info->fbops = &cirrusfb_ops;
-	info->screen_base = cinfo->fbmem;
 	if (cinfo->btype == BT_GD5480) {
 		if (var->bits_per_pixel == 16)
 			info->screen_base += 1 * MB_;
@@ -2266,18 +2267,15 @@ static int cirrusfb_set_fbinfo(struct cirrusfb_info *cinfo)
 
 	/* monochrome: only 1 memory plane */
 	/* 8 bit and above: Use whole memory area */
-	info->fix.smem_start = cinfo->fbmem_phys;
-	info->fix.smem_len   = (var->bits_per_pixel == 1) ? cinfo->size / 4 : cinfo->size;
-	info->fix.type       = cinfo->currentmode.type;
+	info->fix.smem_len   = info->screen_size;
+	if (var->bits_per_pixel == 1)
+		info->fix.smem_len /= 4;
 	info->fix.type_aux   = 0;
-	info->fix.visual     = cinfo->currentmode.visual;
 	info->fix.xpanstep   = 1;
 	info->fix.ypanstep   = 1;
 	info->fix.ywrapstep  = 0;
-	info->fix.line_length = cinfo->currentmode.line_length;
 
 	/* FIXME: map region at 0xB8000 if available, fill in here */
-	info->fix.mmio_start = cinfo->fbregs_phys;
 	info->fix.mmio_len   = 0;
 	info->fix.accel = FB_ACCEL_NONE;
 
@@ -2286,23 +2284,23 @@ static int cirrusfb_set_fbinfo(struct cirrusfb_info *cinfo)
 	return 0;
 }
 
-static int cirrusfb_register(struct cirrusfb_info *cinfo)
+static int cirrusfb_register(struct fb_info *info)
 {
-	struct fb_info *info;
+	struct cirrusfb_info *cinfo = info->par;
 	int err;
-	cirrusfb_board_t btype;
+	enum cirrus_board btype;
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
-	printk (KERN_INFO "cirrusfb: Driver for Cirrus Logic based graphic boards, v" CIRRUSFB_VERSION "\n");
+	printk(KERN_INFO "cirrusfb: Driver for Cirrus Logic based "
+		"graphic boards, v" CIRRUSFB_VERSION "\n");
 
-	info = cinfo->info;
 	btype = cinfo->btype;
 
 	/* sanity checks */
-	assert (btype != BT_NONE);
+	assert(btype != BT_NONE);
 
-	DPRINTK ("cirrusfb: (RAM start set to: 0x%p)\n", cinfo->fbmem);
+	DPRINTK("cirrusfb: (RAM start set to: 0x%p)\n", info->screen_base);
 
 	/* Make pretend we've set the var so our structures are in a "good" */
 	/* state, even though we haven't written the mode to the hw yet...  */
@@ -2317,47 +2315,49 @@ static int cirrusfb_register(struct cirrusfb_info *cinfo)
 	}
 
 	/* set all the vital stuff */
-	cirrusfb_set_fbinfo(cinfo);
+	cirrusfb_set_fbinfo(info);
 
 	err = register_framebuffer(info);
 	if (err < 0) {
-		printk (KERN_ERR "cirrusfb: could not register fb device; err = %d!\n", err);
+		printk(KERN_ERR "cirrusfb: could not register "
+			"fb device; err = %d!\n", err);
 		goto err_dealloc_cmap;
 	}
 
-	DPRINTK ("EXIT, returning 0\n");
+	DPRINTK("EXIT, returning 0\n");
 	return 0;
 
 err_dealloc_cmap:
 	fb_dealloc_cmap(&info->cmap);
 err_unmap_cirrusfb:
-	cinfo->unmap(cinfo);
+	cinfo->unmap(info);
+	framebuffer_release(info);
 	return err;
 }
 
-static void __devexit cirrusfb_cleanup (struct fb_info *info)
+static void __devexit cirrusfb_cleanup(struct fb_info *info)
 {
 	struct cirrusfb_info *cinfo = info->par;
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
-	switch_monitor (cinfo, 0);
+	switch_monitor(cinfo, 0);
 
-	unregister_framebuffer (info);
-	fb_dealloc_cmap (&info->cmap);
-	printk ("Framebuffer unregistered\n");
-	cinfo->unmap(cinfo);
+	unregister_framebuffer(info);
+	fb_dealloc_cmap(&info->cmap);
+	printk("Framebuffer unregistered\n");
+	cinfo->unmap(info);
+	framebuffer_release(info);
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
-
 #ifdef CONFIG_PCI
-static int cirrusfb_pci_register (struct pci_dev *pdev,
+static int cirrusfb_pci_register(struct pci_dev *pdev,
 				  const struct pci_device_id *ent)
 {
 	struct cirrusfb_info *cinfo;
 	struct fb_info *info;
-	cirrusfb_board_t btype;
+	enum cirrus_board btype;
 	unsigned long board_addr, board_size;
 	int ret;
 
@@ -2375,35 +2375,37 @@ static int cirrusfb_pci_register (struct pci_dev *pdev,
 	}
 
 	cinfo = info->par;
-	cinfo->info = info;
 	cinfo->pdev = pdev;
-	cinfo->btype = btype = (cirrusfb_board_t) ent->driver_data;
+	cinfo->btype = btype = (enum cirrus_board) ent->driver_data;
 
-	DPRINTK (" Found PCI device, base address 0 is 0x%lx, btype set to %d\n",
+	DPRINTK(" Found PCI device, base address 0 is 0x%x, btype set to %d\n",
 		pdev->resource[0].start, btype);
-	DPRINTK (" base address 1 is 0x%lx\n", pdev->resource[1].start);
+	DPRINTK(" base address 1 is 0x%x\n", pdev->resource[1].start);
 
-	if(isPReP) {
-		pci_write_config_dword (pdev, PCI_BASE_ADDRESS_0, 0x00000000);
+	if (isPReP) {
+		pci_write_config_dword(pdev, PCI_BASE_ADDRESS_0, 0x00000000);
 #ifdef CONFIG_PPC_PREP
-		get_prep_addrs (&board_addr, &cinfo->fbregs_phys);
+		get_prep_addrs(&board_addr, &info->fix.mmio_start);
 #endif
-		/* PReP dies if we ioremap the IO registers, but it works w/out... */
-		cinfo->regbase = (char __iomem *) cinfo->fbregs_phys;
+	/* PReP dies if we ioremap the IO registers, but it works w/out... */
+		cinfo->regbase = (char __iomem *) info->fix.mmio_start;
 	} else {
-		DPRINTK ("Attempt to get PCI info for Cirrus Graphics Card\n");
-		get_pci_addrs (pdev, &board_addr, &cinfo->fbregs_phys);
-		cinfo->regbase = NULL;		/* FIXME: this forces VGA.  alternatives? */
+		DPRINTK("Attempt to get PCI info for Cirrus Graphics Card\n");
+		get_pci_addrs(pdev, &board_addr, &info->fix.mmio_start);
+		/* FIXME: this forces VGA.  alternatives? */
+		cinfo->regbase = NULL;
 	}
 
-	DPRINTK ("Board address: 0x%lx, register address: 0x%lx\n", board_addr, cinfo->fbregs_phys);
+	DPRINTK("Board address: 0x%lx, register address: 0x%lx\n",
+		board_addr, info->fix.mmio_start);
 
 	board_size = (btype == BT_GD5480) ?
-		32 * MB_ : cirrusfb_get_memsize (cinfo->regbase);
+		32 * MB_ : cirrusfb_get_memsize(cinfo->regbase);
 
 	ret = pci_request_regions(pdev, "cirrusfb");
-	if (ret <0) {
-		printk(KERN_ERR "cirrusfb: cannot reserve region 0x%lx, abort\n",
+	if (ret < 0) {
+		printk(KERN_ERR "cirrusfb: cannot reserve region 0x%lx, "
+		       "abort\n",
 		       board_addr);
 		goto err_release_fb;
 	}
@@ -2419,23 +2421,24 @@ static int cirrusfb_pci_register (struct pci_dev *pdev,
 	if (request_region(0x3C0, 32, "cirrusfb"))
 		release_io_ports = 1;
 
-	cinfo->fbmem = ioremap(board_addr, board_size);
-	if (!cinfo->fbmem) {
+	info->screen_base = ioremap(board_addr, board_size);
+	if (!info->screen_base) {
 		ret = -EIO;
 		goto err_release_legacy;
 	}
 
-	cinfo->fbmem_phys = board_addr;
-	cinfo->size = board_size;
+	info->fix.smem_start = board_addr;
+	info->screen_size = board_size;
 	cinfo->unmap = cirrusfb_pci_unmap;
 
-	printk (" RAM (%lu kB) at 0xx%lx, ", cinfo->size / KB_, board_addr);
-	printk ("Cirrus Logic chipset on PCI bus\n");
+	printk(KERN_INFO " RAM (%lu kB) at 0xx%lx, ",
+		info->screen_size >> 10, board_addr);
+	printk(KERN_INFO "Cirrus Logic chipset on PCI bus\n");
 	pci_set_drvdata(pdev, info);
 
-	ret = cirrusfb_register(cinfo);
+	ret = cirrusfb_register(info);
 	if (ret)
-		iounmap(cinfo->fbmem);
+		iounmap(info->screen_base);
 	return ret;
 
 err_release_legacy:
@@ -2453,14 +2456,14 @@ err_out:
 	return ret;
 }
 
-static void __devexit cirrusfb_pci_unregister (struct pci_dev *pdev)
+static void __devexit cirrusfb_pci_unregister(struct pci_dev *pdev)
 {
 	struct fb_info *info = pci_get_drvdata(pdev);
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
-	cirrusfb_cleanup (info);
+	cirrusfb_cleanup(info);
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
 static struct pci_driver cirrusfb_pci_driver = {
@@ -2477,14 +2480,13 @@ static struct pci_driver cirrusfb_pci_driver = {
 };
 #endif /* CONFIG_PCI */
 
-
 #ifdef CONFIG_ZORRO
 static int cirrusfb_zorro_register(struct zorro_dev *z,
 				   const struct zorro_device_id *ent)
 {
 	struct cirrusfb_info *cinfo;
 	struct fb_info *info;
-	cirrusfb_board_t btype;
+	enum cirrus_board btype;
 	struct zorro_dev *z2 = NULL;
 	unsigned long board_addr, board_size, size;
 	int ret;
@@ -2498,83 +2500,86 @@ static int cirrusfb_zorro_register(struct zorro_dev *z,
 
 	info = framebuffer_alloc(sizeof(struct cirrusfb_info), &z->dev);
 	if (!info) {
-		printk (KERN_ERR "cirrusfb: could not allocate memory\n");
+		printk(KERN_ERR "cirrusfb: could not allocate memory\n");
 		ret = -ENOMEM;
 		goto err_out;
 	}
 
 	cinfo = info->par;
-	cinfo->info = info;
 	cinfo->btype = btype;
 
-	assert (z > 0);
-	assert (z2 >= 0);
-	assert (btype != BT_NONE);
+	assert(z > 0);
+	assert(z2 >= 0);
+	assert(btype != BT_NONE);
 
 	cinfo->zdev = z;
 	board_addr = zorro_resource_start(z);
 	board_size = zorro_resource_len(z);
-	cinfo->size = size;
+	info->screen_size = size;
 
 	if (!zorro_request_device(z, "cirrusfb")) {
-		printk(KERN_ERR "cirrusfb: cannot reserve region 0x%lx, abort\n",
+		printk(KERN_ERR "cirrusfb: cannot reserve region 0x%lx, "
+		       "abort\n",
 		       board_addr);
 		ret = -EBUSY;
 		goto err_release_fb;
 	}
 
-	printk (" RAM (%lu MB) at $%lx, ", board_size / MB_, board_addr);
+	printk(" RAM (%lu MB) at $%lx, ", board_size / MB_, board_addr);
 
 	ret = -EIO;
 
 	if (btype == BT_PICASSO4) {
-		printk (" REG at $%lx\n", board_addr + 0x600000);
+		printk(KERN_INFO " REG at $%lx\n", board_addr + 0x600000);
 
 		/* To be precise, for the P4 this is not the */
 		/* begin of the board, but the begin of RAM. */
 		/* for P4, map in its address space in 2 chunks (### TEST! ) */
 		/* (note the ugly hardcoded 16M number) */
-		cinfo->regbase = ioremap (board_addr, 16777216);
+		cinfo->regbase = ioremap(board_addr, 16777216);
 		if (!cinfo->regbase)
 			goto err_release_region;
 
-		DPRINTK ("cirrusfb: Virtual address for board set to: $%p\n", cinfo->regbase);
+		DPRINTK("cirrusfb: Virtual address for board set to: $%p\n",
+			cinfo->regbase);
 		cinfo->regbase += 0x600000;
-		cinfo->fbregs_phys = board_addr + 0x600000;
+		info->fix.mmio_start = board_addr + 0x600000;
 
-		cinfo->fbmem_phys = board_addr + 16777216;
-		cinfo->fbmem = ioremap (cinfo->fbmem_phys, 16777216);
-		if (!cinfo->fbmem)
+		info->fix.smem_start = board_addr + 16777216;
+		info->screen_base = ioremap(info->fix.smem_start, 16777216);
+		if (!info->screen_base)
 			goto err_unmap_regbase;
 	} else {
-		printk (" REG at $%lx\n", (unsigned long) z2->resource.start);
+		printk(KERN_INFO " REG at $%lx\n",
+			(unsigned long) z2->resource.start);
 
-		cinfo->fbmem_phys = board_addr;
+		info->fix.smem_start = board_addr;
 		if (board_addr > 0x01000000)
-			cinfo->fbmem = ioremap (board_addr, board_size);
+			info->screen_base = ioremap(board_addr, board_size);
 		else
-			cinfo->fbmem = (caddr_t) ZTWO_VADDR (board_addr);
-		if (!cinfo->fbmem)
+			info->screen_base = (caddr_t) ZTWO_VADDR(board_addr);
+		if (!info->screen_base)
 			goto err_release_region;
 
 		/* set address for REG area of board */
-		cinfo->regbase = (caddr_t) ZTWO_VADDR (z2->resource.start);
-		cinfo->fbregs_phys = z2->resource.start;
+		cinfo->regbase = (caddr_t) ZTWO_VADDR(z2->resource.start);
+		info->fix.mmio_start = z2->resource.start;
 
-		DPRINTK ("cirrusfb: Virtual address for board set to: $%p\n", cinfo->regbase);
+		DPRINTK("cirrusfb: Virtual address for board set to: $%p\n",
+			cinfo->regbase);
 	}
 	cinfo->unmap = cirrusfb_zorro_unmap;
 
-	printk (KERN_INFO "Cirrus Logic chipset on Zorro bus\n");
+	printk(KERN_INFO "Cirrus Logic chipset on Zorro bus\n");
 	zorro_set_drvdata(z, info);
 
 	ret = cirrusfb_register(cinfo);
 	if (ret) {
 		if (btype == BT_PICASSO4) {
-			iounmap(cinfo->fbmem);
+			iounmap(info->screen_base);
 			iounmap(cinfo->regbase - 0x600000);
 		} else if (board_addr > 0x01000000)
-			iounmap(cinfo->fbmem);
+			iounmap(info->screen_base);
 	}
 	return ret;
 
@@ -2592,11 +2597,11 @@ err_out:
 void __devexit cirrusfb_zorro_unregister(struct zorro_dev *z)
 {
 	struct fb_info *info = zorro_get_drvdata(z);
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
-	cirrusfb_cleanup (info);
+	cirrusfb_cleanup(info);
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
 static struct zorro_driver cirrusfb_zorro_driver = {
@@ -2628,26 +2633,24 @@ static int __init cirrusfb_init(void)
 	return error;
 }
 
-
-
 #ifndef MODULE
 static int __init cirrusfb_setup(char *options) {
 	char *this_opt, s[32];
 	int i;
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
 	if (!options || !*options)
 		return 0;
 
-	while ((this_opt = strsep (&options, ",")) != NULL) {	
+	while ((this_opt = strsep(&options, ",")) != NULL) {
 		if (!*this_opt) continue;
 
 		DPRINTK("cirrusfb_setup: option '%s'\n", this_opt);
 
 		for (i = 0; i < NUM_TOTAL_MODES; i++) {
-			sprintf (s, "mode:%s", cirrusfb_predefined[i].name);
-			if (strcmp (this_opt, s) == 0)
+			sprintf(s, "mode:%s", cirrusfb_predefined[i].name);
+			if (strcmp(this_opt, s) == 0)
 				cirrusfb_def_mode = i;
 		}
 		if (!strcmp(this_opt, "noaccel"))
@@ -2657,7 +2660,6 @@ static int __init cirrusfb_setup(char *options) {
 }
 #endif
 
-
     /*
      *  Modularization
      */
@@ -2666,7 +2668,7 @@ MODULE_AUTHOR("Copyright 1999,2000 Jeff Garzik <jgarzik@pobox.com>");
 MODULE_DESCRIPTION("Accelerated FBDev driver for Cirrus Logic chips");
 MODULE_LICENSE("GPL");
 
-static void __exit cirrusfb_exit (void)
+static void __exit cirrusfb_exit(void)
 {
 #ifdef CONFIG_PCI
 	pci_unregister_driver(&cirrusfb_pci_driver);
@@ -2682,66 +2684,67 @@ module_init(cirrusfb_init);
 module_exit(cirrusfb_exit);
 #endif
 
-
 /**********************************************************************/
 /* about the following functions - I have used the same names for the */
 /* functions as Markus Wild did in his Retina driver for NetBSD as    */
 /* they just made sense for this purpose. Apart from that, I wrote    */
-/* these functions myself.                                            */
+/* these functions myself.					    */
 /**********************************************************************/
 
 /*** WGen() - write into one of the external/general registers ***/
-static void WGen (const struct cirrusfb_info *cinfo,
+static void WGen(const struct cirrusfb_info *cinfo,
 		  int regnum, unsigned char val)
 {
 	unsigned long regofs = 0;
 
 	if (cinfo->btype == BT_PICASSO) {
 		/* Picasso II specific hack */
-/*              if (regnum == VGA_PEL_IR || regnum == VGA_PEL_D || regnum == CL_VSSM2) */
+/*	      if (regnum == VGA_PEL_IR || regnum == VGA_PEL_D ||
+		  regnum == CL_VSSM2) */
 		if (regnum == VGA_PEL_IR || regnum == VGA_PEL_D)
 			regofs = 0xfff;
 	}
 
-	vga_w (cinfo->regbase, regofs + regnum, val);
+	vga_w(cinfo->regbase, regofs + regnum, val);
 }
 
 /*** RGen() - read out one of the external/general registers ***/
-static unsigned char RGen (const struct cirrusfb_info *cinfo, int regnum)
+static unsigned char RGen(const struct cirrusfb_info *cinfo, int regnum)
 {
 	unsigned long regofs = 0;
 
 	if (cinfo->btype == BT_PICASSO) {
 		/* Picasso II specific hack */
-/*              if (regnum == VGA_PEL_IR || regnum == VGA_PEL_D || regnum == CL_VSSM2) */
+/*	      if (regnum == VGA_PEL_IR || regnum == VGA_PEL_D ||
+		  regnum == CL_VSSM2) */
 		if (regnum == VGA_PEL_IR || regnum == VGA_PEL_D)
 			regofs = 0xfff;
 	}
 
-	return vga_r (cinfo->regbase, regofs + regnum);
+	return vga_r(cinfo->regbase, regofs + regnum);
 }
 
 /*** AttrOn() - turn on VideoEnable for Attribute controller ***/
-static void AttrOn (const struct cirrusfb_info *cinfo)
+static void AttrOn(const struct cirrusfb_info *cinfo)
 {
-	assert (cinfo != NULL);
+	assert(cinfo != NULL);
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
-	if (vga_rcrt (cinfo->regbase, CL_CRT24) & 0x80) {
+	if (vga_rcrt(cinfo->regbase, CL_CRT24) & 0x80) {
 		/* if we're just in "write value" mode, write back the */
 		/* same value as before to not modify anything */
-		vga_w (cinfo->regbase, VGA_ATT_IW,
-		       vga_r (cinfo->regbase, VGA_ATT_R));
+		vga_w(cinfo->regbase, VGA_ATT_IW,
+		      vga_r(cinfo->regbase, VGA_ATT_R));
 	}
 	/* turn on video bit */
-/*      vga_w (cinfo->regbase, VGA_ATT_IW, 0x20); */
-	vga_w (cinfo->regbase, VGA_ATT_IW, 0x33);
+/*      vga_w(cinfo->regbase, VGA_ATT_IW, 0x20); */
+	vga_w(cinfo->regbase, VGA_ATT_IW, 0x33);
 
 	/* dummy write on Reg0 to be on "write index" mode next time */
-	vga_w (cinfo->regbase, VGA_ATT_IW, 0x00);
+	vga_w(cinfo->regbase, VGA_ATT_IW, 0x00);
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
 /*** WHDR() - write into the Hidden DAC register ***/
@@ -2750,119 +2753,115 @@ static void AttrOn (const struct cirrusfb_info *cinfo)
  * registers of their functional group) here is a specialized routine for
  * accessing the HDR
  */
-static void WHDR (const struct cirrusfb_info *cinfo, unsigned char val)
+static void WHDR(const struct cirrusfb_info *cinfo, unsigned char val)
 {
 	unsigned char dummy;
 
 	if (cinfo->btype == BT_PICASSO) {
 		/* Klaus' hint for correct access to HDR on some boards */
 		/* first write 0 to pixel mask (3c6) */
-		WGen (cinfo, VGA_PEL_MSK, 0x00);
-		udelay (200);
+		WGen(cinfo, VGA_PEL_MSK, 0x00);
+		udelay(200);
 		/* next read dummy from pixel address (3c8) */
-		dummy = RGen (cinfo, VGA_PEL_IW);
-		udelay (200);
+		dummy = RGen(cinfo, VGA_PEL_IW);
+		udelay(200);
 	}
 	/* now do the usual stuff to access the HDR */
 
-	dummy = RGen (cinfo, VGA_PEL_MSK);
-	udelay (200);
-	dummy = RGen (cinfo, VGA_PEL_MSK);
-	udelay (200);
-	dummy = RGen (cinfo, VGA_PEL_MSK);
-	udelay (200);
-	dummy = RGen (cinfo, VGA_PEL_MSK);
-	udelay (200);
+	dummy = RGen(cinfo, VGA_PEL_MSK);
+	udelay(200);
+	dummy = RGen(cinfo, VGA_PEL_MSK);
+	udelay(200);
+	dummy = RGen(cinfo, VGA_PEL_MSK);
+	udelay(200);
+	dummy = RGen(cinfo, VGA_PEL_MSK);
+	udelay(200);
 
-	WGen (cinfo, VGA_PEL_MSK, val);
-	udelay (200);
+	WGen(cinfo, VGA_PEL_MSK, val);
+	udelay(200);
 
 	if (cinfo->btype == BT_PICASSO) {
 		/* now first reset HDR access counter */
-		dummy = RGen (cinfo, VGA_PEL_IW);
-		udelay (200);
+		dummy = RGen(cinfo, VGA_PEL_IW);
+		udelay(200);
 
 		/* and at the end, restore the mask value */
 		/* ## is this mask always 0xff? */
-		WGen (cinfo, VGA_PEL_MSK, 0xff);
-		udelay (200);
+		WGen(cinfo, VGA_PEL_MSK, 0xff);
+		udelay(200);
 	}
 }
 
-
 /*** WSFR() - write to the "special function register" (SFR) ***/
-static void WSFR (struct cirrusfb_info *cinfo, unsigned char val)
+static void WSFR(struct cirrusfb_info *cinfo, unsigned char val)
 {
 #ifdef CONFIG_ZORRO
-	assert (cinfo->regbase != NULL);
+	assert(cinfo->regbase != NULL);
 	cinfo->SFR = val;
-	z_writeb (val, cinfo->regbase + 0x8000);
+	z_writeb(val, cinfo->regbase + 0x8000);
 #endif
 }
 
 /* The Picasso has a second register for switching the monitor bit */
-static void WSFR2 (struct cirrusfb_info *cinfo, unsigned char val)
+static void WSFR2(struct cirrusfb_info *cinfo, unsigned char val)
 {
 #ifdef CONFIG_ZORRO
 	/* writing an arbitrary value to this one causes the monitor switcher */
 	/* to flip to Amiga display */
-	assert (cinfo->regbase != NULL);
+	assert(cinfo->regbase != NULL);
 	cinfo->SFR = val;
-	z_writeb (val, cinfo->regbase + 0x9000);
+	z_writeb(val, cinfo->regbase + 0x9000);
 #endif
 }
 
-
 /*** WClut - set CLUT entry (range: 0..63) ***/
-static void WClut (struct cirrusfb_info *cinfo, unsigned char regnum, unsigned char red,
+static void WClut(struct cirrusfb_info *cinfo, unsigned char regnum, unsigned char red,
 	    unsigned char green, unsigned char blue)
 {
 	unsigned int data = VGA_PEL_D;
 
 	/* address write mode register is not translated.. */
-	vga_w (cinfo->regbase, VGA_PEL_IW, regnum);
+	vga_w(cinfo->regbase, VGA_PEL_IW, regnum);
 
 	if (cinfo->btype == BT_PICASSO || cinfo->btype == BT_PICASSO4 ||
 	    cinfo->btype == BT_ALPINE || cinfo->btype == BT_GD5480) {
 		/* but DAC data register IS, at least for Picasso II */
 		if (cinfo->btype == BT_PICASSO)
 			data += 0xfff;
-		vga_w (cinfo->regbase, data, red);
-		vga_w (cinfo->regbase, data, green);
-		vga_w (cinfo->regbase, data, blue);
+		vga_w(cinfo->regbase, data, red);
+		vga_w(cinfo->regbase, data, green);
+		vga_w(cinfo->regbase, data, blue);
 	} else {
-		vga_w (cinfo->regbase, data, blue);
-		vga_w (cinfo->regbase, data, green);
-		vga_w (cinfo->regbase, data, red);
+		vga_w(cinfo->regbase, data, blue);
+		vga_w(cinfo->regbase, data, green);
+		vga_w(cinfo->regbase, data, red);
 	}
 }
 
-
 #if 0
 /*** RClut - read CLUT entry (range 0..63) ***/
-static void RClut (struct cirrusfb_info *cinfo, unsigned char regnum, unsigned char *red,
+static void RClut(struct cirrusfb_info *cinfo, unsigned char regnum, unsigned char *red,
 	    unsigned char *green, unsigned char *blue)
 {
 	unsigned int data = VGA_PEL_D;
 
-	vga_w (cinfo->regbase, VGA_PEL_IR, regnum);
+	vga_w(cinfo->regbase, VGA_PEL_IR, regnum);
 
 	if (cinfo->btype == BT_PICASSO || cinfo->btype == BT_PICASSO4 ||
 	    cinfo->btype == BT_ALPINE || cinfo->btype == BT_GD5480) {
 		if (cinfo->btype == BT_PICASSO)
 			data += 0xfff;
-		*red = vga_r (cinfo->regbase, data);
-		*green = vga_r (cinfo->regbase, data);
-		*blue = vga_r (cinfo->regbase, data);
+		*red = vga_r(cinfo->regbase, data);
+		*green = vga_r(cinfo->regbase, data);
+		*blue = vga_r(cinfo->regbase, data);
 	} else {
-		*blue = vga_r (cinfo->regbase, data);
-		*green = vga_r (cinfo->regbase, data);
-		*red = vga_r (cinfo->regbase, data);
+		*blue = vga_r(cinfo->regbase, data);
+		*green = vga_r(cinfo->regbase, data);
+		*red = vga_r(cinfo->regbase, data);
 	}
 }
 #endif
 
-
 /*******************************************************************
 	cirrusfb_WaitBLT()
 
@@ -2870,10 +2869,10 @@ static void RClut (struct cirrusfb_info *cinfo, unsigned char regnum, unsigned c
 *********************************************************************/
 
 /* FIXME: use interrupts instead */
-static void cirrusfb_WaitBLT (u8 __iomem *regbase)
+static void cirrusfb_WaitBLT(u8 __iomem *regbase)
 {
 	/* now busy-wait until we're done */
-	while (vga_rgfx (regbase, CL_GR31) & 0x08)
+	while (vga_rgfx(regbase, CL_GR31) & 0x08)
 		/* do nothing */ ;
 }
 
@@ -2883,15 +2882,17 @@ static void cirrusfb_WaitBLT (u8 __iomem *regbase)
 	perform accelerated "scrolling"
 ********************************************************************/
 
-static void cirrusfb_BitBLT (u8 __iomem *regbase, int bits_per_pixel,
-			     u_short curx, u_short cury, u_short destx, u_short desty,
-			     u_short width, u_short height, u_short line_length)
+static void cirrusfb_BitBLT(u8 __iomem *regbase, int bits_per_pixel,
+			    u_short curx, u_short cury,
+			    u_short destx, u_short desty,
+			    u_short width, u_short height,
+			    u_short line_length)
 {
 	u_short nwidth, nheight;
 	u_long nsrc, ndest;
 	u_char bltmode;
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
 	nwidth = width - 1;
 	nheight = height - 1;
@@ -2911,9 +2912,13 @@ static void cirrusfb_BitBLT (u8 __iomem *regbase, int bits_per_pixel,
 		nsrc = (cury * line_length) + curx;
 		ndest = (desty * line_length) + destx;
 	} else {
-		/* this means start addresses are at the end, counting backwards */
-		nsrc = cury * line_length + curx + nheight * line_length + nwidth;
-		ndest = desty * line_length + destx + nheight * line_length + nwidth;
+		/* this means start addresses are at the end,
+		 * counting backwards
+		 */
+		nsrc = cury * line_length + curx +
+			nheight * line_length + nwidth;
+		ndest = desty * line_length + destx +
+			nheight * line_length + nwidth;
 	}
 
 	/*
@@ -2929,52 +2934,65 @@ static void cirrusfb_BitBLT (u8 __iomem *regbase, int bits_per_pixel,
 	   start/stop
 	 */
 
-        cirrusfb_WaitBLT(regbase);
+	cirrusfb_WaitBLT(regbase);
 
 	/* pitch: set to line_length */
-	vga_wgfx (regbase, CL_GR24, line_length & 0xff);	/* dest pitch low */
-	vga_wgfx (regbase, CL_GR25, (line_length >> 8));	/* dest pitch hi */
-	vga_wgfx (regbase, CL_GR26, line_length & 0xff);	/* source pitch low */
-	vga_wgfx (regbase, CL_GR27, (line_length >> 8));	/* source pitch hi */
+	/* dest pitch low */
+	vga_wgfx(regbase, CL_GR24, line_length & 0xff);
+	/* dest pitch hi */
+	vga_wgfx(regbase, CL_GR25, line_length >> 8);
+	/* source pitch low */
+	vga_wgfx(regbase, CL_GR26, line_length & 0xff);
+	/* source pitch hi */
+	vga_wgfx(regbase, CL_GR27, line_length >> 8);
 
 	/* BLT width: actual number of pixels - 1 */
-	vga_wgfx (regbase, CL_GR20, nwidth & 0xff);	/* BLT width low */
-	vga_wgfx (regbase, CL_GR21, (nwidth >> 8));	/* BLT width hi */
+	/* BLT width low */
+	vga_wgfx(regbase, CL_GR20, nwidth & 0xff);
+	/* BLT width hi */
+	vga_wgfx(regbase, CL_GR21, nwidth >> 8);
 
 	/* BLT height: actual number of lines -1 */
-	vga_wgfx (regbase, CL_GR22, nheight & 0xff);	/* BLT height low */
-	vga_wgfx (regbase, CL_GR23, (nheight >> 8));	/* BLT width hi */
+	/* BLT height low */
+	vga_wgfx(regbase, CL_GR22, nheight & 0xff);
+	/* BLT width hi */
+	vga_wgfx(regbase, CL_GR23, nheight >> 8);
 
 	/* BLT destination */
-	vga_wgfx (regbase, CL_GR28, (u_char) (ndest & 0xff));	/* BLT dest low */
-	vga_wgfx (regbase, CL_GR29, (u_char) (ndest >> 8));	/* BLT dest mid */
-	vga_wgfx (regbase, CL_GR2A, (u_char) (ndest >> 16));	/* BLT dest hi */
+	/* BLT dest low */
+	vga_wgfx(regbase, CL_GR28, (u_char) (ndest & 0xff));
+	/* BLT dest mid */
+	vga_wgfx(regbase, CL_GR29, (u_char) (ndest >> 8));
+	/* BLT dest hi */
+	vga_wgfx(regbase, CL_GR2A, (u_char) (ndest >> 16));
 
 	/* BLT source */
-	vga_wgfx (regbase, CL_GR2C, (u_char) (nsrc & 0xff));	/* BLT src low */
-	vga_wgfx (regbase, CL_GR2D, (u_char) (nsrc >> 8));		/* BLT src mid */
-	vga_wgfx (regbase, CL_GR2E, (u_char) (nsrc >> 16));	/* BLT src hi */
+	/* BLT src low */
+	vga_wgfx(regbase, CL_GR2C, (u_char) (nsrc & 0xff));
+	/* BLT src mid */
+	vga_wgfx(regbase, CL_GR2D, (u_char) (nsrc >> 8));
+	/* BLT src hi */
+	vga_wgfx(regbase, CL_GR2E, (u_char) (nsrc >> 16));
 
 	/* BLT mode */
-	vga_wgfx (regbase, CL_GR30, bltmode);	/* BLT mode */
+	vga_wgfx(regbase, CL_GR30, bltmode);	/* BLT mode */
 
 	/* BLT ROP: SrcCopy */
-	vga_wgfx (regbase, CL_GR32, 0x0d);		/* BLT ROP */
+	vga_wgfx(regbase, CL_GR32, 0x0d);	/* BLT ROP */
 
 	/* and finally: GO! */
-	vga_wgfx (regbase, CL_GR31, 0x02);		/* BLT Start/status */
+	vga_wgfx(regbase, CL_GR31, 0x02);	/* BLT Start/status */
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
-
 /*******************************************************************
 	cirrusfb_RectFill()
 
 	perform accelerated rectangle fill
 ********************************************************************/
 
-static void cirrusfb_RectFill (u8 __iomem *regbase, int bits_per_pixel,
+static void cirrusfb_RectFill(u8 __iomem *regbase, int bits_per_pixel,
 		     u_short x, u_short y, u_short width, u_short height,
 		     u_char color, u_short line_length)
 {
@@ -2982,93 +3000,95 @@ static void cirrusfb_RectFill (u8 __iomem *regbase, int bits_per_pixel,
 	u_long ndest;
 	u_char op;
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
 	nwidth = width - 1;
 	nheight = height - 1;
 
 	ndest = (y * line_length) + x;
 
-        cirrusfb_WaitBLT(regbase);
+	cirrusfb_WaitBLT(regbase);
 
 	/* pitch: set to line_length */
-	vga_wgfx (regbase, CL_GR24, line_length & 0xff);	/* dest pitch low */
-	vga_wgfx (regbase, CL_GR25, (line_length >> 8));	/* dest pitch hi */
-	vga_wgfx (regbase, CL_GR26, line_length & 0xff);	/* source pitch low */
-	vga_wgfx (regbase, CL_GR27, (line_length >> 8));	/* source pitch hi */
+	vga_wgfx(regbase, CL_GR24, line_length & 0xff);	/* dest pitch low */
+	vga_wgfx(regbase, CL_GR25, line_length >> 8);	/* dest pitch hi */
+	vga_wgfx(regbase, CL_GR26, line_length & 0xff);	/* source pitch low */
+	vga_wgfx(regbase, CL_GR27, line_length >> 8);	/* source pitch hi */
 
 	/* BLT width: actual number of pixels - 1 */
-	vga_wgfx (regbase, CL_GR20, nwidth & 0xff);	/* BLT width low */
-	vga_wgfx (regbase, CL_GR21, (nwidth >> 8));	/* BLT width hi */
+	vga_wgfx(regbase, CL_GR20, nwidth & 0xff);	/* BLT width low */
+	vga_wgfx(regbase, CL_GR21, nwidth >> 8);	/* BLT width hi */
 
 	/* BLT height: actual number of lines -1 */
-	vga_wgfx (regbase, CL_GR22, nheight & 0xff);		/* BLT height low */
-	vga_wgfx (regbase, CL_GR23, (nheight >> 8));		/* BLT width hi */
+	vga_wgfx(regbase, CL_GR22, nheight & 0xff);	/* BLT height low */
+	vga_wgfx(regbase, CL_GR23, nheight >> 8);	/* BLT width hi */
 
 	/* BLT destination */
-	vga_wgfx (regbase, CL_GR28, (u_char) (ndest & 0xff));	/* BLT dest low */
-	vga_wgfx (regbase, CL_GR29, (u_char) (ndest >> 8));	/* BLT dest mid */
-	vga_wgfx (regbase, CL_GR2A, (u_char) (ndest >> 16));		/* BLT dest hi */
+	/* BLT dest low */
+	vga_wgfx(regbase, CL_GR28, (u_char) (ndest & 0xff));
+	/* BLT dest mid */
+	vga_wgfx(regbase, CL_GR29, (u_char) (ndest >> 8));
+	/* BLT dest hi */
+	vga_wgfx(regbase, CL_GR2A, (u_char) (ndest >> 16));
 
 	/* BLT source: set to 0 (is a dummy here anyway) */
-	vga_wgfx (regbase, CL_GR2C, 0x00);	/* BLT src low */
-	vga_wgfx (regbase, CL_GR2D, 0x00);	/* BLT src mid */
-	vga_wgfx (regbase, CL_GR2E, 0x00);	/* BLT src hi */
+	vga_wgfx(regbase, CL_GR2C, 0x00);	/* BLT src low */
+	vga_wgfx(regbase, CL_GR2D, 0x00);	/* BLT src mid */
+	vga_wgfx(regbase, CL_GR2E, 0x00);	/* BLT src hi */
 
 	/* This is a ColorExpand Blt, using the */
 	/* same color for foreground and background */
-	vga_wgfx (regbase, VGA_GFX_SR_VALUE, color);	/* foreground color */
-	vga_wgfx (regbase, VGA_GFX_SR_ENABLE, color);	/* background color */
+	vga_wgfx(regbase, VGA_GFX_SR_VALUE, color);	/* foreground color */
+	vga_wgfx(regbase, VGA_GFX_SR_ENABLE, color);	/* background color */
 
 	op = 0xc0;
 	if (bits_per_pixel == 16) {
-		vga_wgfx (regbase, CL_GR10, color);	/* foreground color */
-		vga_wgfx (regbase, CL_GR11, color);	/* background color */
+		vga_wgfx(regbase, CL_GR10, color);	/* foreground color */
+		vga_wgfx(regbase, CL_GR11, color);	/* background color */
 		op = 0x50;
 		op = 0xd0;
 	} else if (bits_per_pixel == 32) {
-		vga_wgfx (regbase, CL_GR10, color);	/* foreground color */
-		vga_wgfx (regbase, CL_GR11, color);	/* background color */
-		vga_wgfx (regbase, CL_GR12, color);	/* foreground color */
-		vga_wgfx (regbase, CL_GR13, color);	/* background color */
-		vga_wgfx (regbase, CL_GR14, 0);	/* foreground color */
-		vga_wgfx (regbase, CL_GR15, 0);	/* background color */
+		vga_wgfx(regbase, CL_GR10, color);	/* foreground color */
+		vga_wgfx(regbase, CL_GR11, color);	/* background color */
+		vga_wgfx(regbase, CL_GR12, color);	/* foreground color */
+		vga_wgfx(regbase, CL_GR13, color);	/* background color */
+		vga_wgfx(regbase, CL_GR14, 0);	/* foreground color */
+		vga_wgfx(regbase, CL_GR15, 0);	/* background color */
 		op = 0x50;
 		op = 0xf0;
 	}
 	/* BLT mode: color expand, Enable 8x8 copy (faster?) */
-	vga_wgfx (regbase, CL_GR30, op);	/* BLT mode */
+	vga_wgfx(regbase, CL_GR30, op);	/* BLT mode */
 
 	/* BLT ROP: SrcCopy */
-	vga_wgfx (regbase, CL_GR32, 0x0d);	/* BLT ROP */
+	vga_wgfx(regbase, CL_GR32, 0x0d);	/* BLT ROP */
 
 	/* and finally: GO! */
-	vga_wgfx (regbase, CL_GR31, 0x02);	/* BLT Start/status */
+	vga_wgfx(regbase, CL_GR31, 0x02);	/* BLT Start/status */
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
-
 /**************************************************************************
  * bestclock() - determine closest possible clock lower(?) than the
  * desired pixel clock
  **************************************************************************/
-static void bestclock (long freq, long *best, long *nom,
+static void bestclock(long freq, long *best, long *nom,
 		       long *den, long *div, long maxfreq)
 {
 	long n, h, d, f;
 
-	assert (best != NULL);
-	assert (nom != NULL);
-	assert (den != NULL);
-	assert (div != NULL);
-	assert (maxfreq > 0);
+	assert(best != NULL);
+	assert(nom != NULL);
+	assert(den != NULL);
+	assert(div != NULL);
+	assert(maxfreq > 0);
 
 	*nom = 0;
 	*den = 0;
 	*div = 0;
 
-	DPRINTK ("ENTER\n");
+	DPRINTK("ENTER\n");
 
 	if (freq < 8000)
 		freq = 8000;
@@ -3085,7 +3105,7 @@ static void bestclock (long freq, long *best, long *nom,
 			if (d > 31)
 				d = (d / 2) * 2;
 			h = (14318 * n) / d;
-			if (abs (h - freq) < abs (*best - freq)) {
+			if (abs(h - freq) < abs(*best - freq)) {
 				*best = h;
 				*nom = n;
 				if (d < 32) {
@@ -3102,7 +3122,7 @@ static void bestclock (long freq, long *best, long *nom,
 			if (d > 31)
 				d = (d / 2) * 2;
 			h = (14318 * n) / d;
-			if (abs (h - freq) < abs (*best - freq)) {
+			if (abs(h - freq) < abs(*best - freq)) {
 				*best = h;
 				*nom = n;
 				if (d < 32) {
@@ -3116,14 +3136,13 @@ static void bestclock (long freq, long *best, long *nom,
 		}
 	}
 
-	DPRINTK ("Best possible values for given frequency:\n");
-	DPRINTK ("        best: %ld kHz  nom: %ld  den: %ld  div: %ld\n",
-		 freq, *nom, *den, *div);
+	DPRINTK("Best possible values for given frequency:\n");
+	DPRINTK("	best: %ld kHz  nom: %ld  den: %ld  div: %ld\n",
+		freq, *nom, *den, *div);
 
-	DPRINTK ("EXIT\n");
+	DPRINTK("EXIT\n");
 }
 
-
 /* -------------------------------------------------------------------------
  *
  * debugging functions
@@ -3145,21 +3164,20 @@ static void bestclock (long freq, long *best, long *nom,
  */
 
 static
-void cirrusfb_dbg_print_byte (const char *name, unsigned char val)
+void cirrusfb_dbg_print_byte(const char *name, unsigned char val)
 {
-	DPRINTK ("%8s = 0x%02X (bits 7-0: %c%c%c%c%c%c%c%c)\n",
-		 name, val,
-		 val & 0x80 ? '1' : '0',
-		 val & 0x40 ? '1' : '0',
-		 val & 0x20 ? '1' : '0',
-		 val & 0x10 ? '1' : '0',
-		 val & 0x08 ? '1' : '0',
-		 val & 0x04 ? '1' : '0',
-		 val & 0x02 ? '1' : '0',
-		 val & 0x01 ? '1' : '0');
+	DPRINTK("%8s = 0x%02X (bits 7-0: %c%c%c%c%c%c%c%c)\n",
+		name, val,
+		val & 0x80 ? '1' : '0',
+		val & 0x40 ? '1' : '0',
+		val & 0x20 ? '1' : '0',
+		val & 0x10 ? '1' : '0',
+		val & 0x08 ? '1' : '0',
+		val & 0x04 ? '1' : '0',
+		val & 0x02 ? '1' : '0',
+		val & 0x01 ? '1' : '0');
 }
 
-
 /**
  * cirrusfb_dbg_print_regs
  * @base: If using newmmio, the newmmio base address, otherwise %NULL
@@ -3172,25 +3190,26 @@ void cirrusfb_dbg_print_byte (const char *name, unsigned char val)
  */
 
 static
-void cirrusfb_dbg_print_regs (caddr_t regbase, cirrusfb_dbg_reg_class_t reg_class,...)
+void cirrusfb_dbg_print_regs(caddr_t regbase,
+			     enum cirrusfb_dbg_reg_class reg_class, ...)
 {
 	va_list list;
 	unsigned char val = 0;
 	unsigned reg;
 	char *name;
 
-	va_start (list, reg_class);
+	va_start(list, reg_class);
 
-	name = va_arg (list, char *);
+	name = va_arg(list, char *);
 	while (name != NULL) {
-		reg = va_arg (list, int);
+		reg = va_arg(list, int);
 
 		switch (reg_class) {
 		case CRT:
-			val = vga_rcrt (regbase, (unsigned char) reg);
+			val = vga_rcrt(regbase, (unsigned char) reg);
 			break;
 		case SEQ:
-			val = vga_rseq (regbase, (unsigned char) reg);
+			val = vga_rseq(regbase, (unsigned char) reg);
 			break;
 		default:
 			/* should never occur */
@@ -3198,15 +3217,14 @@ void cirrusfb_dbg_print_regs (caddr_t regbase, cirrusfb_dbg_reg_class_t reg_clas
 			break;
 		}
 
-		cirrusfb_dbg_print_byte (name, val);
+		cirrusfb_dbg_print_byte(name, val);
 
-		name = va_arg (list, char *);
+		name = va_arg(list, char *);
 	}
 
-	va_end (list);
+	va_end(list);
 }
 
-
 /**
  * cirrusfb_dump
  * @cirrusfbinfo:
@@ -3214,13 +3232,11 @@ void cirrusfb_dbg_print_regs (caddr_t regbase, cirrusfb_dbg_reg_class_t reg_clas
  * DESCRIPTION:
  */
 
-static
-void cirrusfb_dump (void)
+static void cirrusfb_dump(void)
 {
-	cirrusfb_dbg_reg_dump (NULL);
+	cirrusfb_dbg_reg_dump(NULL);
 }
 
-
 /**
  * cirrusfb_dbg_reg_dump
  * @base: If using newmmio, the newmmio base address, otherwise %NULL
@@ -3232,11 +3248,11 @@ void cirrusfb_dump (void)
  */
 
 static
-void cirrusfb_dbg_reg_dump (caddr_t regbase)
+void cirrusfb_dbg_reg_dump(caddr_t regbase)
 {
-	DPRINTK ("CIRRUSFB VGA CRTC register dump:\n");
+	DPRINTK("CIRRUSFB VGA CRTC register dump:\n");
 
-	cirrusfb_dbg_print_regs (regbase, CRT,
+	cirrusfb_dbg_print_regs(regbase, CRT,
 			   "CR00", 0x00,
 			   "CR01", 0x01,
 			   "CR02", 0x02,
@@ -3286,11 +3302,11 @@ void cirrusfb_dbg_reg_dump (caddr_t regbase)
 			   "CR3F", 0x3F,
 			   NULL);
 
-	DPRINTK ("\n");
+	DPRINTK("\n");
 
-	DPRINTK ("CIRRUSFB VGA SEQ register dump:\n");
+	DPRINTK("CIRRUSFB VGA SEQ register dump:\n");
 
-	cirrusfb_dbg_print_regs (regbase, SEQ,
+	cirrusfb_dbg_print_regs(regbase, SEQ,
 			   "SR00", 0x00,
 			   "SR01", 0x01,
 			   "SR02", 0x02,
@@ -3319,7 +3335,7 @@ void cirrusfb_dbg_reg_dump (caddr_t regbase)
 			   "SR1F", 0x1F,
 			   NULL);
 
-	DPRINTK ("\n");
+	DPRINTK("\n");
 }
 
 #endif				/* CIRRUSFB_DEBUG */
diff --git a/drivers/video/clps711xfb.c b/drivers/video/clps711xfb.c
index dea6579941b..17b5267f44d 100644
--- a/drivers/video/clps711xfb.c
+++ b/drivers/video/clps711xfb.c
@@ -29,7 +29,7 @@
 
 #include <asm/hardware.h>
 #include <asm/mach-types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <asm/hardware/clps7111.h>
 #include <asm/arch/syspld.h>
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index e58c87b3e3a..0f32f4a00b2 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -78,7 +78,6 @@
 #include <asm/fb.h>
 #include <asm/irq.h>
 #include <asm/system.h>
-#include <asm/uaccess.h>
 #ifdef CONFIG_ATARI
 #include <asm/atariints.h>
 #endif
@@ -2169,7 +2168,7 @@ static __inline__ void updatescrollmode(struct display *p,
 }
 
 static int fbcon_resize(struct vc_data *vc, unsigned int width, 
-			unsigned int height)
+			unsigned int height, unsigned int user)
 {
 	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
 	struct fbcon_ops *ops = info->fbcon_par;
@@ -2406,7 +2405,7 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
 			update_screen(vc);
 	}
 
-	if (fbcon_is_inactive(vc, info) ||
+	if (mode_switch || fbcon_is_inactive(vc, info) ||
 	    ops->blank_state != FB_BLANK_UNBLANK)
 		fbcon_del_cursor_timer(info);
 	else
diff --git a/drivers/video/console/font_10x18.c b/drivers/video/console/font_10x18.c
index e6aa0eab5bb..6be72bb218e 100644
--- a/drivers/video/console/font_10x18.c
+++ b/drivers/video/console/font_10x18.c
@@ -5133,14 +5133,14 @@ static const unsigned char fontdata_10x18[FONTDATAMAX] = {
 
 
 const struct font_desc font_10x18 = {
-	FONT10x18_IDX,
-	"10x18",
-	10,
-	18,
-	fontdata_10x18,
+	.idx	= FONT10x18_IDX,
+	.name	= "10x18",
+	.width	= 10,
+	.height	= 18,
+	.data	= fontdata_10x18,
 #ifdef __sparc__
-	5
+	.pref	= 5,
 #else
-	-1
+	.pref	= -1,
 #endif
 };
diff --git a/drivers/video/console/font_6x11.c b/drivers/video/console/font_6x11.c
index 89976cd9749..46e86e67aa6 100644
--- a/drivers/video/console/font_6x11.c
+++ b/drivers/video/console/font_6x11.c
@@ -3342,10 +3342,11 @@ static const unsigned char fontdata_6x11[FONTDATAMAX] = {
 
 
 const struct font_desc font_vga_6x11 = {
-	VGA6x11_IDX,
-	"ProFont6x11",
-	6,
-	11,
-	fontdata_6x11,
-	-2000	/* Try avoiding this font if possible unless on MAC */
+	.idx	= VGA6x11_IDX,
+	.name	= "ProFont6x11",
+	.width	= 6,
+	.height	= 11,
+	.data	= fontdata_6x11,
+	/* Try avoiding this font if possible unless on MAC */
+	.pref	= -2000,
 };
diff --git a/drivers/video/console/font_7x14.c b/drivers/video/console/font_7x14.c
index bbf11664739..3b7dbf9c060 100644
--- a/drivers/video/console/font_7x14.c
+++ b/drivers/video/console/font_7x14.c
@@ -4109,10 +4109,10 @@ static const unsigned char fontdata_7x14[FONTDATAMAX] = {
 
 
 const struct font_desc font_7x14 = {
-	FONT7x14_IDX,
-	"7x14",
-	7,
-	14,
-	fontdata_7x14,
-	0
+	.idx	= FONT7x14_IDX,
+	.name	= "7x14",
+	.width	= 7,
+	.height	= 14,
+	.data	= fontdata_7x14,
+	.pref	= 0,
 };
diff --git a/drivers/video/console/font_8x16.c b/drivers/video/console/font_8x16.c
index 74fe86f28ff..00a0c67a5c7 100644
--- a/drivers/video/console/font_8x16.c
+++ b/drivers/video/console/font_8x16.c
@@ -5,6 +5,7 @@
 /**********************************************/
 
 #include <linux/font.h>
+#include <linux/module.h>
 
 #define FONTDATAMAX 4096
 
@@ -4622,10 +4623,11 @@ static const unsigned char fontdata_8x16[FONTDATAMAX] = {
 
 
 const struct font_desc font_vga_8x16 = {
-	VGA8x16_IDX,
-	"VGA8x16",
-	8,
-	16,
-	fontdata_8x16,
-	0
+	.idx	= VGA8x16_IDX,
+	.name	= "VGA8x16",
+	.width	= 8,
+	.height	= 16,
+	.data	= fontdata_8x16,
+	.pref	= 0,
 };
+EXPORT_SYMBOL(font_vga_8x16);
diff --git a/drivers/video/console/font_8x8.c b/drivers/video/console/font_8x8.c
index 26199f8ee90..9f56efe2cee 100644
--- a/drivers/video/console/font_8x8.c
+++ b/drivers/video/console/font_8x8.c
@@ -2574,10 +2574,10 @@ static const unsigned char fontdata_8x8[FONTDATAMAX] = {
 
 
 const struct font_desc font_vga_8x8 = {
-	VGA8x8_IDX,
-	"VGA8x8",
-	8,
-	8,
-	fontdata_8x8,
-	0
+	.idx	= VGA8x8_IDX,
+	.name	= "VGA8x8",
+	.width	= 8,
+	.height	= 8,
+	.data	= fontdata_8x8,
+	.pref	= 0,
 };
diff --git a/drivers/video/console/font_acorn_8x8.c b/drivers/video/console/font_acorn_8x8.c
index 40f3d4eeb19..639e31ae110 100644
--- a/drivers/video/console/font_acorn_8x8.c
+++ b/drivers/video/console/font_acorn_8x8.c
@@ -262,14 +262,14 @@ static const unsigned char acorndata_8x8[] = {
 };
 
 const struct font_desc font_acorn_8x8 = {
-	ACORN8x8_IDX,
-	"Acorn8x8",
-	8,
-	8,
-	acorndata_8x8,
+	.idx	= ACORN8x8_IDX,
+	.name	= "Acorn8x8",
+	.width	= 8,
+	.height	= 8,
+	.data	= acorndata_8x8,
 #ifdef CONFIG_ARCH_ACORN
-	20
+	.pref	= 20,
 #else
-	0
+	.pref	= 0,
 #endif
 };
diff --git a/drivers/video/console/font_mini_4x6.c b/drivers/video/console/font_mini_4x6.c
index d818234fdf1..a19a7f33133 100644
--- a/drivers/video/console/font_mini_4x6.c
+++ b/drivers/video/console/font_mini_4x6.c
@@ -2148,11 +2148,11 @@ static const unsigned char fontdata_mini_4x6[FONTDATAMAX] = {
 };
 
 const struct font_desc font_mini_4x6 = {
-	MINI4x6_IDX,
-	"MINI4x6",
-	4,
-	6,
-	fontdata_mini_4x6,
-	3
+	.idx	= MINI4x6_IDX,
+	.name	= "MINI4x6",
+	.width	= 4,
+	.height	= 6,
+	.data	= fontdata_mini_4x6,
+	.pref	= 3,
 };
 
diff --git a/drivers/video/console/font_pearl_8x8.c b/drivers/video/console/font_pearl_8x8.c
index e646c88f55c..dc6ad539ca4 100644
--- a/drivers/video/console/font_pearl_8x8.c
+++ b/drivers/video/console/font_pearl_8x8.c
@@ -2578,10 +2578,10 @@ static const unsigned char fontdata_pearl8x8[FONTDATAMAX] = {
 };
 
 const struct font_desc font_pearl_8x8 = {
-	PEARL8x8_IDX,
-	"PEARL8x8",
-	8,
-	8,
-	fontdata_pearl8x8,
-	2
+	.idx	= PEARL8x8_IDX,
+	.name	= "PEARL8x8",
+	.width	= 8,
+	.height	= 8,
+	.data	= fontdata_pearl8x8,
+	.pref	= 2,
 };
diff --git a/drivers/video/console/font_sun12x22.c b/drivers/video/console/font_sun12x22.c
index ab5eb93407b..d3643853c33 100644
--- a/drivers/video/console/font_sun12x22.c
+++ b/drivers/video/console/font_sun12x22.c
@@ -6152,14 +6152,14 @@ static const unsigned char fontdata_sun12x22[FONTDATAMAX] = {
 
 
 const struct font_desc font_sun_12x22 = {
-	SUN12x22_IDX,
-	"SUN12x22",
-	12,
-	22,
-	fontdata_sun12x22,
+	.idx	= SUN12x22_IDX,
+	.name	= "SUN12x22",
+	.width	= 12,
+	.height	= 22,
+	.data	= fontdata_sun12x22,
 #ifdef __sparc__
-	5
+	.pref	= 5,
 #else
-	-1
+	.pref	= -1,
 #endif
 };
diff --git a/drivers/video/console/font_sun8x16.c b/drivers/video/console/font_sun8x16.c
index 41f910f5529..5abf290c6eb 100644
--- a/drivers/video/console/font_sun8x16.c
+++ b/drivers/video/console/font_sun8x16.c
@@ -262,14 +262,14 @@ static const unsigned char fontdata_sun8x16[FONTDATAMAX] = {
 };
 
 const struct font_desc font_sun_8x16 = {
-	SUN8x16_IDX,
-	"SUN8x16",
-	8,
-	16,
-	fontdata_sun8x16,
+	.idx	= SUN8x16_IDX,
+	.name	= "SUN8x16",
+	.width	= 8,
+	.height	= 16,
+	.data	= fontdata_sun8x16,
 #ifdef __sparc__
-	10
+	.pref	= 10,
 #else
-	-1
+	.pref	= -1,
 #endif
 };
diff --git a/drivers/video/console/newport_con.c b/drivers/video/console/newport_con.c
index dda0586ab3f..f57d7b2758b 100644
--- a/drivers/video/console/newport_con.c
+++ b/drivers/video/console/newport_con.c
@@ -98,14 +98,19 @@ static inline void newport_init_cmap(void)
 	}
 }
 
-static void newport_show_logo(void)
+static struct linux_logo *newport_show_logo(void)
 {
 #ifdef CONFIG_LOGO_SGI_CLUT224
 	const struct linux_logo *logo = fb_find_logo(8);
-	const unsigned char *clut = logo->clut;
-	const unsigned char *data = logo->data;
+	const unsigned char *clut;
+	const unsigned char *data;
 	unsigned long i;
 
+	if (!logo)
+		return NULL;
+	*clut = logo->clut;
+	*data = logo->data;
+
 	for (i = 0; i < logo->clutsize; i++) {
 		newport_bfwait(npregs);
 		newport_cmap_setaddr(npregs, i + 0x20);
@@ -123,6 +128,8 @@ static void newport_show_logo(void)
 
 	for (i = 0; i < logo->width*logo->height; i++)
 		npregs->go.hostrw0 = *data++ << 24;
+
+	return logo;
 #endif /* CONFIG_LOGO_SGI_CLUT224 */
 }
 
@@ -465,9 +472,10 @@ static int newport_switch(struct vc_data *vc)
 	npregs->cset.topscan = 0x3ff;
 
 	if (!logo_drawn) {
-		newport_show_logo();
-		logo_drawn = 1;
-		logo_active = 1;
+		if (newport_show_logo()) {
+			logo_drawn = 1;
+			logo_active = 1;
+		}
 	}
 
 	return 1;
diff --git a/drivers/video/console/softcursor.c b/drivers/video/console/softcursor.c
index 03cfb7ac573..25f835bf3d7 100644
--- a/drivers/video/console/softcursor.c
+++ b/drivers/video/console/softcursor.c
@@ -15,7 +15,6 @@
 #include <linux/fb.h>
 #include <linux/slab.h>
 
-#include <asm/uaccess.h>
 #include <asm/io.h>
 
 #include "fbcon.h"
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index d18b73aafa0..e9afb7ebd56 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -1278,13 +1278,14 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font)
 #endif
 
 static int vgacon_resize(struct vc_data *c, unsigned int width,
-				unsigned int height)
+			 unsigned int height, unsigned int user)
 {
 	if (width % 2 || width > ORIG_VIDEO_COLS ||
 	    height > (ORIG_VIDEO_LINES * vga_default_font_height)/
 	    c->vc_font.height)
-		/* let svgatextmode tinker with video timings */
-		return 0;
+		/* let svgatextmode tinker with video timings and
+		   return success */
+		return (user) ? 0 : -EINVAL;
 
 	if (CON_IS_VISIBLE(c) && !vga_is_gfx) /* who knows */
 		vgacon_doresize(c, width, height);
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 30ede6e8830..9bb2cbfe4a3 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -50,7 +50,6 @@
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
-#include <asm/uaccess.h>
 
 #ifdef __arm__
 #include <asm/mach-types.h>
diff --git a/drivers/video/epson1355fb.c b/drivers/video/epson1355fb.c
index 33be46ccb54..cc2810ef5de 100644
--- a/drivers/video/epson1355fb.c
+++ b/drivers/video/epson1355fb.c
@@ -57,7 +57,7 @@
 
 #include <asm/types.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #include <video/epson1355.h>
 
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 1a8643f053d..a0c5d9d90d7 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -19,7 +19,6 @@
 #include <linux/interrupt.h>
 #include <linux/fb.h>
 #include <linux/list.h>
-#include <asm/uaccess.h>
 
 /* to support deferred IO */
 #include <linux/rmap.h>
diff --git a/drivers/video/fb_draw.h b/drivers/video/fb_draw.h
index c5c45203833..cdafbe14ef1 100644
--- a/drivers/video/fb_draw.h
+++ b/drivers/video/fb_draw.h
@@ -2,6 +2,7 @@
 #define _FB_DRAW_H
 
 #include <asm/types.h>
+#include <linux/fb.h>
 
     /*
      *  Compose two values, using a bitmask as decision value
@@ -69,4 +70,97 @@ pixel_to_pat( u32 bpp, u32 pixel)
     }
 }
 #endif
+
+#ifdef CONFIG_FB_CFB_REV_PIXELS_IN_BYTE
+#if BITS_PER_LONG == 64
+#define REV_PIXELS_MASK1 0x5555555555555555ul
+#define REV_PIXELS_MASK2 0x3333333333333333ul
+#define REV_PIXELS_MASK4 0x0f0f0f0f0f0f0f0ful
+#else
+#define REV_PIXELS_MASK1 0x55555555ul
+#define REV_PIXELS_MASK2 0x33333333ul
+#define REV_PIXELS_MASK4 0x0f0f0f0ful
+#endif
+
+static inline unsigned long fb_rev_pixels_in_long(unsigned long val,
+						  u32 bswapmask)
+{
+	if (bswapmask & 1)
+		val = comp(val >> 1, val << 1, REV_PIXELS_MASK1);
+	if (bswapmask & 2)
+		val = comp(val >> 2, val << 2, REV_PIXELS_MASK2);
+	if (bswapmask & 3)
+		val = comp(val >> 4, val << 4, REV_PIXELS_MASK4);
+}
+
+static inline u32 fb_shifted_pixels_mask_u32(u32 index, u32 bswapmask)
+{
+	u32 mask;
+
+	if (!bswapmask) {
+		mask = FB_SHIFT_HIGH(~(u32)0, index);
+	} else {
+		mask = 0xff << FB_LEFT_POS(8);
+		mask = FB_SHIFT_LOW(mask, index & (bswapmask)) & mask;
+		mask = FB_SHIFT_HIGH(mask, index & ~(bswapmask));
+#if defined(__i386__) || defined(__x86_64__)
+		/* Shift argument is limited to 0 - 31 on x86 based CPU's */
+		if(index + bswapmask < 32)
+#endif
+			mask |= FB_SHIFT_HIGH(~(u32)0,
+					(index + bswapmask) & ~(bswapmask));
+	}
+	return mask;
+}
+
+static inline unsigned long fb_shifted_pixels_mask_long(u32 index, u32 bswapmask)
+{
+	unsigned long mask;
+
+	if (!bswapmask) {
+		mask = FB_SHIFT_HIGH(~0UL, index);
+	} else {
+		mask = 0xff << FB_LEFT_POS(8);
+		mask = FB_SHIFT_LOW(mask, index & (bswapmask)) & mask;
+		mask = FB_SHIFT_HIGH(mask, index & ~(bswapmask));
+#if defined(__i386__) || defined(__x86_64__)
+		/* Shift argument is limited to 0 - 31 on x86 based CPU's */
+		if(index + bswapmask < BITS_PER_LONG)
+#endif
+			mask |= FB_SHIFT_HIGH(~0UL,
+					(index + bswapmask) & ~(bswapmask));
+	}
+	return mask;
+}
+
+
+static inline u32 fb_compute_bswapmask(struct fb_info *info)
+{
+	u32 bswapmask = 0;
+	unsigned bpp = info->var.bits_per_pixel;
+
+	if ((bpp < 8) && (info->var.nonstd & FB_NONSTD_REV_PIX_IN_B)) {
+		/*
+		 * Reversed order of pixel layout in bytes
+		 * works only for 1, 2 and 4 bpp
+		 */
+		bswapmask = 7 - bpp + 1;
+	}
+	return bswapmask;
+}
+
+#else /* CONFIG_FB_CFB_REV_PIXELS_IN_BYTE */
+
+static inline unsigned long fb_rev_pixels_in_long(unsigned long val,
+						  u32 bswapmask)
+{
+	return val;
+}
+
+#define fb_shifted_pixels_mask_u32(i, b) FB_SHIFT_HIGH(~(u32)0, (i))
+#define fb_shifted_pixels_mask_long(i, b) FB_SHIFT_HIGH(~0UL, (i))
+#define fb_compute_bswapmask(...) 0
+
+#endif  /* CONFIG_FB_CFB_REV_PIXELS_IN_BYTE */
+
 #endif /* FB_DRAW_H */
diff --git a/drivers/video/fb_sys_fops.c b/drivers/video/fb_sys_fops.c
index cf2538d669c..ff275d7f3ea 100644
--- a/drivers/video/fb_sys_fops.c
+++ b/drivers/video/fb_sys_fops.c
@@ -11,7 +11,7 @@
  */
 #include <linux/fb.h>
 #include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 ssize_t fb_sys_read(struct fb_info *info, char __user *buf, size_t count,
 		    loff_t *ppos)
diff --git a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c
index 148108afdd5..91b78e69150 100644
--- a/drivers/video/fbcmap.c
+++ b/drivers/video/fbcmap.c
@@ -15,8 +15,7 @@
 #include <linux/module.h>
 #include <linux/fb.h>
 #include <linux/slab.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 static u16 red2[] __read_mostly = {
     0x0000, 0xaaaa
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 07402720470..1194f5e060e 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1567,8 +1567,6 @@ int fb_new_modelist(struct fb_info *info)
 static char *video_options[FB_MAX] __read_mostly;
 static int ofonly __read_mostly;
 
-extern const char *global_mode_option;
-
 /**
  * fb_get_options - get kernel boot parameters
  * @name:   framebuffer name as it would appear in
@@ -1636,7 +1634,7 @@ static int __init video_setup(char *options)
  	}
 
  	if (!global && !strstr(options, "fb:")) {
- 		global_mode_option = options;
+ 		fb_mode_option = options;
  		global = 1;
  	}
 
@@ -1663,7 +1661,6 @@ EXPORT_SYMBOL(register_framebuffer);
 EXPORT_SYMBOL(unregister_framebuffer);
 EXPORT_SYMBOL(num_registered_fb);
 EXPORT_SYMBOL(registered_fb);
-EXPORT_SYMBOL(fb_prepare_logo);
 EXPORT_SYMBOL(fb_show_logo);
 EXPORT_SYMBOL(fb_set_var);
 EXPORT_SYMBOL(fb_blank);
diff --git a/drivers/video/fbmon.c b/drivers/video/fbmon.c
index 438b9411905..4ba9c089441 100644
--- a/drivers/video/fbmon.c
+++ b/drivers/video/fbmon.c
@@ -591,7 +591,7 @@ static struct fb_videomode *fb_create_modedb(unsigned char *edid, int *dbsize)
 {
 	struct fb_videomode *mode, *m;
 	unsigned char *block;
-	int num = 0, i;
+	int num = 0, i, first = 1;
 
 	mode = kzalloc(50 * sizeof(struct fb_videomode), GFP_KERNEL);
 	if (mode == NULL)
@@ -608,8 +608,6 @@ static struct fb_videomode *fb_create_modedb(unsigned char *edid, int *dbsize)
 	DPRINTK("   Detailed Timings\n");
 	block = edid + DETAILED_TIMING_DESCRIPTIONS_START;
 	for (i = 0; i < 4; i++, block+= DETAILED_TIMING_DESCRIPTION_SIZE) {
-	        int first = 1;
-
 		if (!(block[0] == 0x00 && block[1] == 0x00)) {
 			get_detailed_timing(block, &mode[num]);
 			if (first) {
diff --git a/drivers/video/geode/lxfb_core.c b/drivers/video/geode/lxfb_core.c
index 5e30b40c8c0..583185fd7c9 100644
--- a/drivers/video/geode/lxfb_core.c
+++ b/drivers/video/geode/lxfb_core.c
@@ -566,12 +566,7 @@ static int __init lxfb_setup(char *options)
 	if (!options || !*options)
 		return 0;
 
-	while (1) {
-		char *opt = strsep(&options, ",");
-
-		if (opt == NULL)
-			break;
-
+	while ((opt = strsep(&options, ",")) != NULL) {
 		if (!*opt)
 			continue;
 
diff --git a/drivers/video/hecubafb.c b/drivers/video/hecubafb.c
index abfcb50364c..94e0df8a6f6 100644
--- a/drivers/video/hecubafb.c
+++ b/drivers/video/hecubafb.c
@@ -45,7 +45,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/list.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* Apollo controller specific defines */
 #define APOLLO_START_NEW_IMG	0xA0
diff --git a/drivers/video/imsttfb.c b/drivers/video/imsttfb.c
index 94f4511023d..3ab91bf2157 100644
--- a/drivers/video/imsttfb.c
+++ b/drivers/video/imsttfb.c
@@ -29,7 +29,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #if defined(CONFIG_PPC)
 #include <linux/nvram.h>
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index a1258989859..11609552a38 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -34,7 +34,6 @@
 
 #include <asm/hardware.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
 #include <asm/arch/imxfb.h>
 
 /*
@@ -467,7 +466,7 @@ static int __init imxfb_init_fbinfo(struct device *dev)
 	info->var.vmode	= FB_VMODE_NONINTERLACED;
 
 	info->fbops			= &imxfb_ops;
-	info->flags			= FBINFO_FLAG_DEFAULT;
+	info->flags			= FBINFO_FLAG_DEFAULT | FBINFO_READS_FAST;
 
 	fbi->rgb[RGB_16]		= &def_rgb_16;
 	fbi->rgb[RGB_8]			= &def_rgb_8;
@@ -480,6 +479,7 @@ static int __init imxfb_init_fbinfo(struct device *dev)
 	info->var.yres_virtual		= inf->yres;
 	fbi->max_bpp			= inf->bpp;
 	info->var.bits_per_pixel	= inf->bpp;
+	info->var.nonstd		= inf->nonstd;
 	info->var.pixclock		= inf->pixclock;
 	info->var.hsync_len		= inf->hsync_len;
 	info->var.left_margin		= inf->left_margin;
diff --git a/drivers/video/intelfb/intelfb.h b/drivers/video/intelfb/intelfb.h
index 6148300fadd..2fe3f7def53 100644
--- a/drivers/video/intelfb/intelfb.h
+++ b/drivers/video/intelfb/intelfb.h
@@ -231,8 +231,8 @@ struct intelfb_hwstate {
 struct intelfb_heap_data {
 	u32 physical;
 	u8 __iomem *virtual;
-	u32 offset;  // in GATT pages
-	u32 size;    // in bytes
+	u32 offset;		/* in GATT pages */
+	u32 size;		/* in bytes */
 };
 
 #ifdef CONFIG_FB_INTEL_I2C
@@ -270,9 +270,9 @@ struct intelfb_info {
 	struct intelfb_hwstate save_state;
 
 	/* agpgart structs */
-	struct agp_memory *gtt_fb_mem;     // use all stolen memory or vram
-	struct agp_memory *gtt_ring_mem;   // ring buffer
-	struct agp_memory *gtt_cursor_mem; // hw cursor
+	struct agp_memory *gtt_fb_mem;     /* use all stolen memory or vram */
+	struct agp_memory *gtt_ring_mem;   /* ring buffer */
+	struct agp_memory *gtt_cursor_mem; /* hw cursor */
 
 	/* use a gart reserved fb mem */
 	u8 fbmem_gart;
@@ -346,7 +346,7 @@ struct intelfb_info {
 
 	/* driver registered */
 	int registered;
-	
+
 	/* index into plls */
 	int pll_index;
 
@@ -355,7 +355,10 @@ struct intelfb_info {
 	struct intelfb_output_rec output[MAX_OUTPUTS];
 };
 
-#define IS_I9XX(dinfo) (((dinfo)->chipset == INTEL_915G)||(dinfo->chipset == INTEL_915GM)||((dinfo)->chipset == INTEL_945G)||(dinfo->chipset==INTEL_945GM))
+#define IS_I9XX(dinfo) (((dinfo)->chipset == INTEL_915G) ||	\
+			((dinfo)->chipset == INTEL_915GM) ||	\
+			((dinfo)->chipset == INTEL_945G) ||	\
+			((dinfo)->chipset==INTEL_945GM))
 
 #ifndef FBIO_WAITFORVSYNC
 #define FBIO_WAITFORVSYNC	_IOW('F', 0x20, __u32)
diff --git a/drivers/video/intelfb/intelfb_i2c.c b/drivers/video/intelfb/intelfb_i2c.c
index 61e4c8759b2..94c08bb5acf 100644
--- a/drivers/video/intelfb/intelfb_i2c.c
+++ b/drivers/video/intelfb/intelfb_i2c.c
@@ -58,7 +58,8 @@ static void intelfb_gpio_setscl(void *data, int state)
 	struct intelfb_info *dinfo = chan->dinfo;
 	u32 val;
 
-	OUTREG(chan->reg, (state ? SCL_VAL_OUT : 0) | SCL_DIR | SCL_DIR_MASK | SCL_VAL_MASK);
+	OUTREG(chan->reg, (state ? SCL_VAL_OUT : 0) |
+	       SCL_DIR | SCL_DIR_MASK | SCL_VAL_MASK);
 	val = INREG(chan->reg);
 }
 
@@ -68,7 +69,8 @@ static void intelfb_gpio_setsda(void *data, int state)
 	struct intelfb_info *dinfo = chan->dinfo;
 	u32 val;
 
-	OUTREG(chan->reg, (state ? SDA_VAL_OUT : 0) | SDA_DIR | SDA_DIR_MASK | SDA_VAL_MASK);
+	OUTREG(chan->reg, (state ? SDA_VAL_OUT : 0) |
+	       SDA_DIR | SDA_DIR_MASK | SDA_VAL_MASK);
 	val = INREG(chan->reg);
 }
 
@@ -97,26 +99,26 @@ static int intelfb_gpio_getsda(void *data)
 }
 
 static int intelfb_setup_i2c_bus(struct intelfb_info *dinfo,
-								 struct intelfb_i2c_chan *chan,
-								 const u32 reg, const char *name)
+				 struct intelfb_i2c_chan *chan,
+				 const u32 reg, const char *name)
 {
 	int rc;
 
-	chan->dinfo					= dinfo;
-	chan->reg					= reg;
+	chan->dinfo			= dinfo;
+	chan->reg			= reg;
 	snprintf(chan->adapter.name, sizeof(chan->adapter.name),
 		 "intelfb %s", name);
-	chan->adapter.owner			= THIS_MODULE;
-	chan->adapter.id			= I2C_HW_B_INTELFB;
+	chan->adapter.owner		= THIS_MODULE;
+	chan->adapter.id		= I2C_HW_B_INTELFB;
 	chan->adapter.algo_data		= &chan->algo;
 	chan->adapter.dev.parent	= &chan->dinfo->pdev->dev;
-	chan->algo.setsda			= intelfb_gpio_setsda;
-	chan->algo.setscl			= intelfb_gpio_setscl;
-	chan->algo.getsda			= intelfb_gpio_getsda;
-	chan->algo.getscl			= intelfb_gpio_getscl;
-	chan->algo.udelay			= 40;
-	chan->algo.timeout			= 20;
-	chan->algo.data				= chan;
+	chan->algo.setsda		= intelfb_gpio_setsda;
+	chan->algo.setscl		= intelfb_gpio_setscl;
+	chan->algo.getsda		= intelfb_gpio_getsda;
+	chan->algo.getscl		= intelfb_gpio_getscl;
+	chan->algo.udelay		= 40;
+	chan->algo.timeout		= 20;
+	chan->algo.data			= chan;
 
 	i2c_set_adapdata(&chan->adapter, chan);
 
@@ -142,40 +144,44 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
 	dinfo->output[i].type = INTELFB_OUTPUT_ANALOG;
 
 	/* setup the DDC bus for analog output */
-	intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus, GPIOA, "CRTDDC_A");
+	intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus, GPIOA,
+			      "CRTDDC_A");
 	i++;
 
-    /* need to add the output busses for each device
-       - this function is very incomplete
-       - i915GM has LVDS and TVOUT for example
-    */
-    switch(dinfo->chipset) {
+	/* need to add the output busses for each device
+	   - this function is very incomplete
+	   - i915GM has LVDS and TVOUT for example
+	*/
+	switch(dinfo->chipset) {
 	case INTEL_830M:
 	case INTEL_845G:
 	case INTEL_855GM:
 	case INTEL_865G:
 		dinfo->output[i].type = INTELFB_OUTPUT_DVO;
-		intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus, GPIOD, "DVODDC_D");
-		intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus, GPIOE, "DVOI2C_E");
+		intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].ddc_bus,
+				      GPIOD, "DVODDC_D");
+		intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus,
+				      GPIOE, "DVOI2C_E");
 		i++;
 		break;
 	case INTEL_915G:
 	case INTEL_915GM:
-		/* has  some LVDS + tv-out */
+		/* has some LVDS + tv-out */
 	case INTEL_945G:
 	case INTEL_945GM:
 		/* SDVO ports have a single control bus - 2 devices */
 		dinfo->output[i].type = INTELFB_OUTPUT_SDVO;
-		intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus, GPIOE, "SDVOCTRL_E");
+		intelfb_setup_i2c_bus(dinfo, &dinfo->output[i].i2c_bus,
+				      GPIOE, "SDVOCTRL_E");
 		/* TODO: initialize the SDVO */
-//		I830SDVOInit(pScrn, i, DVOB);
+		/* I830SDVOInit(pScrn, i, DVOB); */
 		i++;
 
 		/* set up SDVOC */
 		dinfo->output[i].type = INTELFB_OUTPUT_SDVO;
 		dinfo->output[i].i2c_bus = dinfo->output[i - 1].i2c_bus;
 		/* TODO: initialize the SDVO */
-//		I830SDVOInit(pScrn, i, DVOC);
+		/* I830SDVOInit(pScrn, i, DVOC); */
 		i++;
 		break;
 	}
diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c
index b75eda84858..0428f211f19 100644
--- a/drivers/video/intelfb/intelfbdrv.c
+++ b/drivers/video/intelfb/intelfbdrv.c
@@ -99,13 +99,6 @@
  *              Add vram option to reserve more memory than stolen by BIOS
  *              Fix intelfbhw_pan_display typo
  *              Add __initdata annotations
- *
- * TODO:
- *
- *
- * Wish List:
- *
- *
  */
 
 #include <linux/module.h>
@@ -222,8 +215,8 @@ static struct pci_driver intelfb_driver = {
 /* Module description/parameters */
 MODULE_AUTHOR("David Dawes <dawes@tungstengraphics.com>, "
 	      "Sylvain Meyer <sylvain.meyer@worldonline.fr>");
-MODULE_DESCRIPTION(
-	"Framebuffer driver for Intel(R) " SUPPORTED_CHIPSETS " chipsets");
+MODULE_DESCRIPTION("Framebuffer driver for Intel(R) " SUPPORTED_CHIPSETS
+		   " chipsets");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DEVICE_TABLE(pci, intelfb_pci_table);
 
@@ -271,8 +264,7 @@ MODULE_PARM_DESC(mode,
 #define OPT_INTVAL(opt, name) simple_strtoul(opt + strlen(name) + 1, NULL, 0)
 #define OPT_STRVAL(opt, name) (opt + strlen(name))
 
-static __inline__ char *
-get_opt_string(const char *this_opt, const char *name)
+static __inline__ char * get_opt_string(const char *this_opt, const char *name)
 {
 	const char *p;
 	int i;
@@ -290,8 +282,8 @@ get_opt_string(const char *this_opt, const char *name)
 	return ret;
 }
 
-static __inline__ int
-get_opt_int(const char *this_opt, const char *name, int *ret)
+static __inline__ int get_opt_int(const char *this_opt, const char *name,
+				  int *ret)
 {
 	if (!ret)
 		return 0;
@@ -303,8 +295,8 @@ get_opt_int(const char *this_opt, const char *name, int *ret)
 	return 1;
 }
 
-static __inline__ int
-get_opt_bool(const char *this_opt, const char *name, int *ret)
+static __inline__ int get_opt_bool(const char *this_opt, const char *name,
+				   int *ret)
 {
 	if (!ret)
 		return 0;
@@ -324,8 +316,7 @@ get_opt_bool(const char *this_opt, const char *name, int *ret)
 	return 1;
 }
 
-static int __init
-intelfb_setup(char *options)
+static int __init intelfb_setup(char *options)
 {
 	char *this_opt;
 
@@ -355,7 +346,7 @@ intelfb_setup(char *options)
 			continue;
 		if (get_opt_bool(this_opt, "accel", &accel))
 			;
- 		else if (get_opt_int(this_opt, "vram", &vram))
+		else if (get_opt_int(this_opt, "vram", &vram))
 			;
 		else if (get_opt_bool(this_opt, "hwcursor", &hwcursor))
 			;
@@ -376,8 +367,7 @@ intelfb_setup(char *options)
 
 #endif
 
-static int __init
-intelfb_init(void)
+static int __init intelfb_init(void)
 {
 #ifndef MODULE
 	char *option = NULL;
@@ -401,8 +391,7 @@ intelfb_init(void)
 	return pci_register_driver(&intelfb_driver);
 }
 
-static void __exit
-intelfb_exit(void)
+static void __exit intelfb_exit(void)
 {
 	DBG_MSG("intelfb_exit\n");
 	pci_unregister_driver(&intelfb_driver);
@@ -428,8 +417,8 @@ static inline void __devinit set_mtrr(struct intelfb_info *dinfo)
 }
 static inline void unset_mtrr(struct intelfb_info *dinfo)
 {
-  	if (dinfo->has_mtrr)
-  		mtrr_del(dinfo->mtrr_reg, dinfo->aperture.physical,
+	if (dinfo->has_mtrr)
+		mtrr_del(dinfo->mtrr_reg, dinfo->aperture.physical,
 			 dinfo->aperture.size);
 }
 #else
@@ -442,8 +431,7 @@ static inline void unset_mtrr(struct intelfb_info *dinfo)
  *                        driver init / cleanup                *
  ***************************************************************/
 
-static void
-cleanup(struct intelfb_info *dinfo)
+static void cleanup(struct intelfb_info *dinfo)
 {
 	DBG_MSG("cleanup\n");
 
@@ -499,8 +487,8 @@ cleanup(struct intelfb_info *dinfo)
 } while (0)
 
 
-static int __devinit
-intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int __devinit intelfb_pci_register(struct pci_dev *pdev,
+					  const struct pci_device_id *ent)
 {
 	struct fb_info *info;
 	struct intelfb_info *dinfo;
@@ -510,8 +498,8 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
 	int agp_memtype;
 	const char *s;
 	struct agp_bridge_data *bridge;
- 	int aperture_bar = 0;
- 	int mmio_bar = 1;
+	int aperture_bar = 0;
+	int mmio_bar = 1;
 	int offset;
 
 	DBG_MSG("intelfb_pci_register\n");
@@ -637,9 +625,8 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dinfo->ring.size = RINGBUFFER_SIZE;
 		dinfo->ring_tail_mask = dinfo->ring.size - 1;
 	}
-	if (dinfo->hwcursor) {
+	if (dinfo->hwcursor)
 		dinfo->cursor.size = HW_CURSOR_SIZE;
-	}
 
 	/* Use agpgart to manage the GATT */
 	if (!(bridge = agp_backend_acquire(pdev))) {
@@ -662,18 +649,15 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
 		offset = ROUND_UP_TO_PAGE(MB(voffset))/GTT_PAGE_SIZE;
 
 	/* set the mem offsets - set them after the already used pages */
-	if (dinfo->accel) {
+	if (dinfo->accel)
 		dinfo->ring.offset = offset + gtt_info.current_memory;
-	}
-	if (dinfo->hwcursor) {
+	if (dinfo->hwcursor)
 		dinfo->cursor.offset = offset +
 			+ gtt_info.current_memory + (dinfo->ring.size >> 12);
-	}
-	if (dinfo->fbmem_gart) {
+	if (dinfo->fbmem_gart)
 		dinfo->fb.offset = offset +
 			+ gtt_info.current_memory + (dinfo->ring.size >> 12)
 			+ (dinfo->cursor.size >> 12);
-	}
 
 	/* Allocate memories (which aren't stolen) */
 	/* Map the fb and MMIO regions */
@@ -689,7 +673,7 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dinfo->mmio_base =
 		(u8 __iomem *)ioremap_nocache(dinfo->mmio_base_phys,
-					       INTEL_REG_SIZE);
+					      INTEL_REG_SIZE);
 	if (!dinfo->mmio_base) {
 		ERR_MSG("Cannot remap MMIO region.\n");
 		cleanup(dinfo);
@@ -837,10 +821,8 @@ intelfb_pci_register(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (bailearly == 3)
 		bailout(dinfo);
 
-	if (FIXED_MODE(dinfo)) {
-		/* remap fb address */
+	if (FIXED_MODE(dinfo))	/* remap fb address */
 		update_dinfo(dinfo, &dinfo->initial_var);
-	}
 
 	if (bailearly == 4)
 		bailout(dinfo);
@@ -939,8 +921,7 @@ intelfb_pci_unregister(struct pci_dev *pdev)
  *                       helper functions                      *
  ***************************************************************/
 
-int __inline__
-intelfb_var_to_depth(const struct fb_var_screeninfo *var)
+int __inline__ intelfb_var_to_depth(const struct fb_var_screeninfo *var)
 {
 	DBG_MSG("intelfb_var_to_depth: bpp: %d, green.length is %d\n",
 		var->bits_per_pixel, var->green.length);
@@ -956,8 +937,7 @@ intelfb_var_to_depth(const struct fb_var_screeninfo *var)
 }
 
 
-static __inline__ int
-var_to_refresh(const struct fb_var_screeninfo *var)
+static __inline__ int var_to_refresh(const struct fb_var_screeninfo *var)
 {
 	int xtot = var->xres + var->left_margin + var->right_margin +
 		   var->hsync_len;
@@ -971,8 +951,7 @@ var_to_refresh(const struct fb_var_screeninfo *var)
  *                Various intialisation functions              *
  ***************************************************************/
 
-static void __devinit
-get_initial_mode(struct intelfb_info *dinfo)
+static void __devinit get_initial_mode(struct intelfb_info *dinfo)
 {
 	struct fb_var_screeninfo *var;
 	int xtot, ytot;
@@ -1039,8 +1018,7 @@ get_initial_mode(struct intelfb_info *dinfo)
 	}
 }
 
-static int __devinit
-intelfb_init_var(struct intelfb_info *dinfo)
+static int __devinit intelfb_init_var(struct intelfb_info *dinfo)
 {
 	struct fb_var_screeninfo *var;
 	int msrc = 0;
@@ -1087,10 +1065,9 @@ intelfb_init_var(struct intelfb_info *dinfo)
 
 		}
 
-		if (!msrc) {
+		if (!msrc)
 			msrc = fb_find_mode(var, dinfo->info, PREFERRED_MODE,
 					    NULL, 0, NULL, 0);
-		}
 	}
 
 	if (!msrc) {
@@ -1122,8 +1099,7 @@ intelfb_init_var(struct intelfb_info *dinfo)
 	return 0;
 }
 
-static int __devinit
-intelfb_set_fbinfo(struct intelfb_info *dinfo)
+static int __devinit intelfb_set_fbinfo(struct intelfb_info *dinfo)
 {
 	struct fb_info *info = dinfo->info;
 
@@ -1159,8 +1135,8 @@ intelfb_set_fbinfo(struct intelfb_info *dinfo)
 }
 
 /* Update dinfo to match the active video mode. */
-static void
-update_dinfo(struct intelfb_info *dinfo, struct fb_var_screeninfo *var)
+static void update_dinfo(struct intelfb_info *dinfo,
+			 struct fb_var_screeninfo *var)
 {
 	DBG_MSG("update_dinfo\n");
 
@@ -1208,36 +1184,32 @@ update_dinfo(struct intelfb_info *dinfo, struct fb_var_screeninfo *var)
  *                       fbdev interface                       *
  ***************************************************************/
 
-static int
-intelfb_open(struct fb_info *info, int user)
+static int intelfb_open(struct fb_info *info, int user)
 {
 	struct intelfb_info *dinfo = GET_DINFO(info);
 
-	if (user) {
+	if (user)
 		dinfo->open++;
-	}
 
 	return 0;
 }
 
-static int
-intelfb_release(struct fb_info *info, int user)
+static int intelfb_release(struct fb_info *info, int user)
 {
 	struct intelfb_info *dinfo = GET_DINFO(info);
 
 	if (user) {
 		dinfo->open--;
 		msleep(1);
-		if (!dinfo->open) {
+		if (!dinfo->open)
 			intelfbhw_disable_irq(dinfo);
-		}
 	}
 
 	return 0;
 }
 
-static int
-intelfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+static int intelfb_check_var(struct fb_var_screeninfo *var,
+			     struct fb_info *info)
 {
 	int change_var = 0;
 	struct fb_var_screeninfo v;
@@ -1271,15 +1243,15 @@ intelfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	}
 
 	/* Check for a supported bpp. */
-	if (v.bits_per_pixel <= 8) {
+	if (v.bits_per_pixel <= 8)
 		v.bits_per_pixel = 8;
-	} else if (v.bits_per_pixel <= 16) {
+	else if (v.bits_per_pixel <= 16) {
 		if (v.bits_per_pixel == 16)
 			v.green.length = 6;
 		v.bits_per_pixel = 16;
-	} else if (v.bits_per_pixel <= 32) {
+	} else if (v.bits_per_pixel <= 32)
 		v.bits_per_pixel = 32;
-	} else
+	else
 		return -EINVAL;
 
 	change_var = ((info->var.xres != var->xres) ||
@@ -1361,10 +1333,9 @@ intelfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	return 0;
 }
 
-static int
-intelfb_set_par(struct fb_info *info)
+static int intelfb_set_par(struct fb_info *info)
 {
- 	struct intelfb_hwstate *hw;
+	struct intelfb_hwstate *hw;
         struct intelfb_info *dinfo = GET_DINFO(info);
 
 	if (FIXED_MODE(dinfo)) {
@@ -1372,9 +1343,9 @@ intelfb_set_par(struct fb_info *info)
 		return -EINVAL;
 	}
 
- 	hw = kmalloc(sizeof(*hw), GFP_ATOMIC);
- 	if (!hw)
- 		return -ENOMEM;
+	hw = kmalloc(sizeof(*hw), GFP_ATOMIC);
+	if (!hw)
+		return -ENOMEM;
 
 	DBG_MSG("intelfb_set_par (%dx%d-%d)\n", info->var.xres,
 		info->var.yres, info->var.bits_per_pixel);
@@ -1384,15 +1355,15 @@ intelfb_set_par(struct fb_info *info)
 	if (ACCEL(dinfo, info))
 		intelfbhw_2d_stop(dinfo);
 
- 	memcpy(hw, &dinfo->save_state, sizeof(*hw));
- 	if (intelfbhw_mode_to_hw(dinfo, hw, &info->var))
- 		goto invalid_mode;
- 	if (intelfbhw_program_mode(dinfo, hw, 0))
- 		goto invalid_mode;
+	memcpy(hw, &dinfo->save_state, sizeof(*hw));
+	if (intelfbhw_mode_to_hw(dinfo, hw, &info->var))
+		goto invalid_mode;
+	if (intelfbhw_program_mode(dinfo, hw, 0))
+		goto invalid_mode;
 
 #if REGDUMP > 0
- 	intelfbhw_read_hw_state(dinfo, hw, 0);
- 	intelfbhw_print_hw_state(dinfo, hw);
+	intelfbhw_read_hw_state(dinfo, hw, 0);
+	intelfbhw_print_hw_state(dinfo, hw);
 #endif
 
 	update_dinfo(dinfo, &info->var);
@@ -1408,9 +1379,9 @@ intelfb_set_par(struct fb_info *info)
 		info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN |
 		FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT |
 		FBINFO_HWACCEL_IMAGEBLIT;
-	} else {
+	} else
 		info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
-	}
+
 	kfree(hw);
 	return 0;
 invalid_mode:
@@ -1418,9 +1389,9 @@ invalid_mode:
 	return -EINVAL;
 }
 
-static int
-intelfb_setcolreg(unsigned regno, unsigned red, unsigned green,
-		  unsigned blue, unsigned transp, struct fb_info *info)
+static int intelfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			     unsigned blue, unsigned transp,
+			     struct fb_info *info)
 {
 	struct intelfb_info *dinfo = GET_DINFO(info);
 
@@ -1463,23 +1434,22 @@ intelfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	return 0;
 }
 
-static int
-intelfb_blank(int blank, struct fb_info *info)
+static int intelfb_blank(int blank, struct fb_info *info)
 {
 	intelfbhw_do_blank(blank, info);
 	return 0;
 }
 
-static int
-intelfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+static int intelfb_pan_display(struct fb_var_screeninfo *var,
+			       struct fb_info *info)
 {
 	intelfbhw_pan_display(var, info);
 	return 0;
 }
 
 /* When/if we have our own ioctls. */
-static int
-intelfb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
+static int intelfb_ioctl(struct fb_info *info, unsigned int cmd,
+			 unsigned long arg)
 {
 	int retval = 0;
 	struct intelfb_info *dinfo = GET_DINFO(info);
@@ -1499,8 +1469,8 @@ intelfb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg)
 	return retval;
 }
 
-static void
-intelfb_fillrect (struct fb_info *info, const struct fb_fillrect *rect)
+static void intelfb_fillrect (struct fb_info *info,
+			      const struct fb_fillrect *rect)
 {
         struct intelfb_info *dinfo = GET_DINFO(info);
 	u32 rop, color;
@@ -1514,7 +1484,7 @@ intelfb_fillrect (struct fb_info *info, const struct fb_fillrect *rect)
 
 	if (rect->rop == ROP_COPY)
 		rop = PAT_ROP_GXCOPY;
-	else // ROP_XOR
+	else /* ROP_XOR */
 		rop = PAT_ROP_GXXOR;
 
 	if (dinfo->depth != 8)
@@ -1528,8 +1498,8 @@ intelfb_fillrect (struct fb_info *info, const struct fb_fillrect *rect)
 			      rop);
 }
 
-static void
-intelfb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
+static void intelfb_copyarea(struct fb_info *info,
+			     const struct fb_copyarea *region)
 {
         struct intelfb_info *dinfo = GET_DINFO(info);
 
@@ -1545,8 +1515,8 @@ intelfb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 			    dinfo->pitch, info->var.bits_per_pixel);
 }
 
-static void
-intelfb_imageblit(struct fb_info *info, const struct fb_image *image)
+static void intelfb_imageblit(struct fb_info *info,
+			      const struct fb_image *image)
 {
         struct intelfb_info *dinfo = GET_DINFO(info);
 	u32 fgcolor, bgcolor;
@@ -1574,8 +1544,7 @@ intelfb_imageblit(struct fb_info *info, const struct fb_image *image)
 		return cfb_imageblit(info, image);
 }
 
-static int
-intelfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
+static int intelfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 {
         struct intelfb_info *dinfo = GET_DINFO(info);
 	u32 physical;
@@ -1689,8 +1658,7 @@ intelfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 	return 0;
 }
 
-static int
-intelfb_sync(struct fb_info *info)
+static int intelfb_sync(struct fb_info *info)
 {
         struct intelfb_info *dinfo = GET_DINFO(info);
 
diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c
index 6a47682d861..2a0e32074f7 100644
--- a/drivers/video/intelfb/intelfbhw.c
+++ b/drivers/video/intelfb/intelfbhw.c
@@ -56,17 +56,16 @@ static struct pll_min_max plls[PLLS_MAX] = {
 	  6, 16, 3, 16,
 	  4, 128, 0, 31,
 	  930000, 1400000, 165000, 48000,
-	  4, 2 }, //I8xx
+	  4, 2 },		/* I8xx */
 
 	{ 75, 120, 10, 20,
 	  5, 9, 4, 7,
 	  5, 80, 1, 8,
 	  1400000, 2800000, 200000, 96000,
-	  10, 5 }  //I9xx
+	  10, 5 }		/* I9xx */
 };
 
-int
-intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo)
+int intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo)
 {
 	u32 tmp;
 	if (!pdev || !dinfo)
@@ -149,9 +148,8 @@ intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo)
 	}
 }
 
-int
-intelfbhw_get_memory(struct pci_dev *pdev, int *aperture_size,
-		     int *stolen_size)
+int intelfbhw_get_memory(struct pci_dev *pdev, int *aperture_size,
+			 int *stolen_size)
 {
 	struct pci_dev *bridge_dev;
 	u16 tmp;
@@ -254,8 +252,7 @@ intelfbhw_get_memory(struct pci_dev *pdev, int *aperture_size,
 	}
 }
 
-int
-intelfbhw_check_non_crt(struct intelfb_info *dinfo)
+int intelfbhw_check_non_crt(struct intelfb_info *dinfo)
 {
 	int dvo = 0;
 
@@ -271,8 +268,7 @@ intelfbhw_check_non_crt(struct intelfb_info *dinfo)
 	return dvo;
 }
 
-const char *
-intelfbhw_dvo_to_string(int dvo)
+const char * intelfbhw_dvo_to_string(int dvo)
 {
 	if (dvo & DVOA_PORT)
 		return "DVO port A";
@@ -287,9 +283,8 @@ intelfbhw_dvo_to_string(int dvo)
 }
 
 
-int
-intelfbhw_validate_mode(struct intelfb_info *dinfo,
-			struct fb_var_screeninfo *var)
+int intelfbhw_validate_mode(struct intelfb_info *dinfo,
+			    struct fb_var_screeninfo *var)
 {
 	int bytes_per_pixel;
 	int tmp;
@@ -322,17 +317,26 @@ intelfbhw_validate_mode(struct intelfb_info *dinfo,
 			var->yres, VACTIVE_MASK + 1);
 		return 1;
 	}
-
-	/* Check for interlaced/doublescan modes. */
-	if (var->vmode & FB_VMODE_INTERLACED) {
-		WRN_MSG("Mode is interlaced.\n");
+	if (var->xres < 4) {
+		WRN_MSG("X resolution too small (%d vs 4).\n", var->xres);
+		return 1;
+	}
+	if (var->yres < 4) {
+		WRN_MSG("Y resolution too small (%d vs 4).\n", var->yres);
 		return 1;
 	}
+
+	/* Check for doublescan modes. */
 	if (var->vmode & FB_VMODE_DOUBLE) {
 		WRN_MSG("Mode is double-scan.\n");
 		return 1;
 	}
 
+	if ((var->vmode & FB_VMODE_INTERLACED) && (var->yres & 1)) {
+		WRN_MSG("Odd number of lines in interlaced mode\n");
+		return 1;
+	}
+
 	/* Check if clock is OK. */
 	tmp = 1000000000 / var->pixclock;
 	if (tmp < MIN_CLOCK) {
@@ -349,8 +353,7 @@ intelfbhw_validate_mode(struct intelfb_info *dinfo,
 	return 0;
 }
 
-int
-intelfbhw_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+int intelfbhw_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
 {
 	struct intelfb_info *dinfo = GET_DINFO(info);
 	u32 offset, xoffset, yoffset;
@@ -372,9 +375,10 @@ intelfbhw_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
 	offset += dinfo->fb.offset << 12;
 
 	dinfo->vsync.pan_offset = offset;
-	if ((var->activate & FB_ACTIVATE_VBL) && !intelfbhw_enable_irq(dinfo, 0)) {
+	if ((var->activate & FB_ACTIVATE_VBL) &&
+	    !intelfbhw_enable_irq(dinfo))
 		dinfo->vsync.pan_display = 1;
-	} else {
+	else {
 		dinfo->vsync.pan_display = 0;
 		OUTREG(DSPABASE, offset);
 	}
@@ -383,8 +387,7 @@ intelfbhw_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
 }
 
 /* Blank the screen. */
-void
-intelfbhw_do_blank(int blank, struct fb_info *info)
+void intelfbhw_do_blank(int blank, struct fb_info *info)
 {
 	struct intelfb_info *dinfo = GET_DINFO(info);
 	u32 tmp;
@@ -409,11 +412,10 @@ intelfbhw_do_blank(int blank, struct fb_info *info)
 	DBG_MSG("cursor_on is %d\n", dinfo->cursor_on);
 #endif
 	if (dinfo->cursor_on) {
-		if (blank) {
+		if (blank)
 			intelfbhw_cursor_hide(dinfo);
-		} else {
+		else
 			intelfbhw_cursor_show(dinfo);
-		}
 		dinfo->cursor_on = 1;
 	}
 	dinfo->cursor_blanked = blank;
@@ -441,19 +443,18 @@ intelfbhw_do_blank(int blank, struct fb_info *info)
 }
 
 
-void
-intelfbhw_setcolreg(struct intelfb_info *dinfo, unsigned regno,
-		    unsigned red, unsigned green, unsigned blue,
-		    unsigned transp)
+void intelfbhw_setcolreg(struct intelfb_info *dinfo, unsigned regno,
+			 unsigned red, unsigned green, unsigned blue,
+			 unsigned transp)
 {
+	u32 palette_reg = (dinfo->pipe == PIPE_A) ?
+			  PALETTE_A : PALETTE_B;
+
 #if VERBOSE > 0
 	DBG_MSG("intelfbhw_setcolreg: %d: (%d, %d, %d)\n",
 		regno, red, green, blue);
 #endif
 
-	u32 palette_reg = (dinfo->pipe == PIPE_A) ?
-			  PALETTE_A : PALETTE_B;
-
 	OUTREG(palette_reg + (regno << 2),
 	       (red << PALETTE_8_RED_SHIFT) |
 	       (green << PALETTE_8_GREEN_SHIFT) |
@@ -461,9 +462,8 @@ intelfbhw_setcolreg(struct intelfb_info *dinfo, unsigned regno,
 }
 
 
-int
-intelfbhw_read_hw_state(struct intelfb_info *dinfo, struct intelfb_hwstate *hw,
-			int flag)
+int intelfbhw_read_hw_state(struct intelfb_info *dinfo,
+			    struct intelfb_hwstate *hw, int flag)
 {
 	int i;
 
@@ -610,7 +610,8 @@ static int calc_vclock3(int index, int m, int n, int p)
 	return plls[index].ref_clk * m / n / p;
 }
 
-static int calc_vclock(int index, int m1, int m2, int n, int p1, int p2, int lvds)
+static int calc_vclock(int index, int m1, int m2, int n, int p1, int p2,
+		       int lvds)
 {
 	struct pll_min_max *pll = &plls[index];
 	u32 m, vco, p;
@@ -619,17 +620,16 @@ static int calc_vclock(int index, int m1, int m2, int n, int p1, int p2, int lvd
 	n += 2;
 	vco = pll->ref_clk * m / n;
 
-	if (index == PLLS_I8xx) {
+	if (index == PLLS_I8xx)
 		p = ((p1 + 2) * (1 << (p2 + 1)));
-	} else {
+	else
 		p = ((p1) * (p2 ? 5 : 10));
-	}
 	return vco / p;
 }
 
 #if REGDUMP
-static void
-intelfbhw_get_p1p2(struct intelfb_info *dinfo, int dpll, int *o_p1, int *o_p2)
+static void intelfbhw_get_p1p2(struct intelfb_info *dinfo, int dpll,
+			       int *o_p1, int *o_p2)
 {
 	int p1, p2;
 
@@ -638,7 +638,7 @@ intelfbhw_get_p1p2(struct intelfb_info *dinfo, int dpll, int *o_p1, int *o_p2)
 			p1 = 1;
 		else
 			p1 = (dpll >> DPLL_P1_SHIFT) & 0xff;
-		
+
 		p1 = ffs(p1);
 
 		p2 = (dpll >> DPLL_I9XX_P2_SHIFT) & DPLL_P2_MASK;
@@ -656,8 +656,8 @@ intelfbhw_get_p1p2(struct intelfb_info *dinfo, int dpll, int *o_p1, int *o_p2)
 #endif
 
 
-void
-intelfbhw_print_hw_state(struct intelfb_info *dinfo, struct intelfb_hwstate *hw)
+void intelfbhw_print_hw_state(struct intelfb_info *dinfo,
+			      struct intelfb_hwstate *hw)
 {
 #if REGDUMP
 	int i, m1, m2, n, p1, p2;
@@ -670,7 +670,7 @@ intelfbhw_print_hw_state(struct intelfb_info *dinfo, struct intelfb_hwstate *hw)
 	printk("hw state dump start\n");
 	printk("	VGA0_DIVISOR:		0x%08x\n", hw->vga0_divisor);
 	printk("	VGA1_DIVISOR:		0x%08x\n", hw->vga1_divisor);
-	printk("	VGAPD: 			0x%08x\n", hw->vga_pd);
+	printk("	VGAPD:			0x%08x\n", hw->vga_pd);
 	n = (hw->vga0_divisor >> FP_N_DIVISOR_SHIFT) & FP_DIVISOR_MASK;
 	m1 = (hw->vga0_divisor >> FP_M1_DIVISOR_SHIFT) & FP_DIVISOR_MASK;
 	m2 = (hw->vga0_divisor >> FP_M2_DIVISOR_SHIFT) & FP_DIVISOR_MASK;
@@ -689,7 +689,8 @@ intelfbhw_print_hw_state(struct intelfb_info *dinfo, struct intelfb_hwstate *hw)
 	intelfbhw_get_p1p2(dinfo, hw->vga_pd, &p1, &p2);
 	printk("	VGA1: (m1, m2, n, p1, p2) = (%d, %d, %d, %d, %d)\n",
 	       m1, m2, n, p1, p2);
-	printk("	VGA1: clock is %d\n", calc_vclock(index, m1, m2, n, p1, p2, 0));
+	printk("	VGA1: clock is %d\n",
+	       calc_vclock(index, m1, m2, n, p1, p2, 0));
 
 	printk("	DPLL_A:			0x%08x\n", hw->dpll_a);
 	printk("	DPLL_B:			0x%08x\n", hw->dpll_b);
@@ -706,7 +707,8 @@ intelfbhw_print_hw_state(struct intelfb_info *dinfo, struct intelfb_hwstate *hw)
 
 	printk("	PLLA0: (m1, m2, n, p1, p2) = (%d, %d, %d, %d, %d)\n",
 	       m1, m2, n, p1, p2);
-	printk("	PLLA0: clock is %d\n", calc_vclock(index, m1, m2, n, p1, p2, 0));
+	printk("	PLLA0: clock is %d\n",
+	       calc_vclock(index, m1, m2, n, p1, p2, 0));
 
 	n = (hw->fpa1 >> FP_N_DIVISOR_SHIFT) & FP_DIVISOR_MASK;
 	m1 = (hw->fpa1 >> FP_M1_DIVISOR_SHIFT) & FP_DIVISOR_MASK;
@@ -716,7 +718,8 @@ intelfbhw_print_hw_state(struct intelfb_info *dinfo, struct intelfb_hwstate *hw)
 
 	printk("	PLLA1: (m1, m2, n, p1, p2) = (%d, %d, %d, %d, %d)\n",
 	       m1, m2, n, p1, p2);
-	printk("	PLLA1: clock is %d\n", calc_vclock(index, m1, m2, n, p1, p2, 0));
+	printk("	PLLA1: clock is %d\n",
+	       calc_vclock(index, m1, m2, n, p1, p2, 0));
 
 #if 0
 	printk("	PALETTE_A:\n");
@@ -821,8 +824,8 @@ intelfbhw_print_hw_state(struct intelfb_info *dinfo, struct intelfb_hwstate *hw)
 
 
 /* Split the M parameter into M1 and M2. */
-static int
-splitm(int index, unsigned int m, unsigned int *retm1, unsigned int *retm2)
+static int splitm(int index, unsigned int m, unsigned int *retm1,
+		  unsigned int *retm2)
 {
 	int m1, m2;
 	int testm;
@@ -843,8 +846,8 @@ splitm(int index, unsigned int m, unsigned int *retm1, unsigned int *retm2)
 }
 
 /* Split the P parameter into P1 and P2. */
-static int
-splitp(int index, unsigned int p, unsigned int *retp1, unsigned int *retp2)
+static int splitp(int index, unsigned int p, unsigned int *retp1,
+		  unsigned int *retp2)
 {
 	int p1, p2;
 	struct pll_min_max *pll = &plls[index];
@@ -878,9 +881,8 @@ splitp(int index, unsigned int p, unsigned int *retp1, unsigned int *retp2)
 	}
 }
 
-static int
-calc_pll_params(int index, int clock, u32 *retm1, u32 *retm2, u32 *retn, u32 *retp1,
-		u32 *retp2, u32 *retclock)
+static int calc_pll_params(int index, int clock, u32 *retm1, u32 *retm2,
+			   u32 *retn, u32 *retp1, u32 *retp2, u32 *retclock)
 {
 	u32 m1, m2, n, p1, p2, n1, testm;
 	u32 f_vco, p, p_best = 0, m, f_out = 0;
@@ -975,8 +977,8 @@ calc_pll_params(int index, int clock, u32 *retm1, u32 *retm2, u32 *retn, u32 *re
 	return 0;
 }
 
-static __inline__ int
-check_overflow(u32 value, u32 limit, const char *description)
+static __inline__ int check_overflow(u32 value, u32 limit,
+				     const char *description)
 {
 	if (value > limit) {
 		WRN_MSG("%s value %d exceeds limit %d\n",
@@ -987,9 +989,9 @@ check_overflow(u32 value, u32 limit, const char *description)
 }
 
 /* It is assumed that hw is filled in with the initial state information. */
-int
-intelfbhw_mode_to_hw(struct intelfb_info *dinfo, struct intelfb_hwstate *hw,
-		     struct fb_var_screeninfo *var)
+int intelfbhw_mode_to_hw(struct intelfb_info *dinfo,
+			 struct intelfb_hwstate *hw,
+			 struct fb_var_screeninfo *var)
 {
 	int pipe = PIPE_A;
 	u32 *dpll, *fp0, *fp1;
@@ -1093,9 +1095,8 @@ intelfbhw_mode_to_hw(struct intelfb_info *dinfo, struct intelfb_hwstate *hw,
 	if (IS_I9XX(dinfo)) {
 		*dpll |= (p2 << DPLL_I9XX_P2_SHIFT);
 		*dpll |= (1 << (p1 - 1)) << DPLL_P1_SHIFT;
-	} else {
+	} else
 		*dpll |= (p2 << DPLL_P2_SHIFT) | (p1 << DPLL_P1_SHIFT);
-	}
 
 	*fp0 = (n << FP_N_DIVISOR_SHIFT) |
 	       (m1 << FP_M1_DIVISOR_SHIFT) |
@@ -1139,6 +1140,8 @@ intelfbhw_mode_to_hw(struct intelfb_info *dinfo, struct intelfb_hwstate *hw,
 		hblank_end);
 
 	vactive = var->yres;
+	if (var->vmode & FB_VMODE_INTERLACED)
+		vactive--; /* the chip adds 2 halflines automatically */
 	vsync_start = vactive + var->lower_margin;
 	vsync_end = vsync_start + var->vsync_len;
 	vtotal = vsync_end + var->upper_margin;
@@ -1220,19 +1223,24 @@ intelfbhw_mode_to_hw(struct intelfb_info *dinfo, struct intelfb_hwstate *hw,
 
 	/* Set the palette to 8-bit mode. */
 	*pipe_conf &= ~PIPECONF_GAMMA;
+
+	if (var->vmode & FB_VMODE_INTERLACED)
+		*pipe_conf |= PIPECONF_INTERLACE_W_FIELD_INDICATION;
+	else
+		*pipe_conf &= ~PIPECONF_INTERLACE_MASK;
+
 	return 0;
 }
 
 /* Program a (non-VGA) video mode. */
-int
-intelfbhw_program_mode(struct intelfb_info *dinfo,
-		     const struct intelfb_hwstate *hw, int blank)
+int intelfbhw_program_mode(struct intelfb_info *dinfo,
+			   const struct intelfb_hwstate *hw, int blank)
 {
 	int pipe = PIPE_A;
 	u32 tmp;
 	const u32 *dpll, *fp0, *fp1, *pipe_conf;
 	const u32 *hs, *ht, *hb, *vs, *vt, *vb, *ss;
-	u32 dpll_reg, fp0_reg, fp1_reg, pipe_conf_reg;
+	u32 dpll_reg, fp0_reg, fp1_reg, pipe_conf_reg, pipe_stat_reg;
 	u32 hsync_reg, htotal_reg, hblank_reg;
 	u32 vsync_reg, vtotal_reg, vblank_reg;
 	u32 src_size_reg;
@@ -1273,6 +1281,7 @@ intelfbhw_program_mode(struct intelfb_info *dinfo,
 		fp0_reg = FPB0;
 		fp1_reg = FPB1;
 		pipe_conf_reg = PIPEBCONF;
+		pipe_stat_reg = PIPEBSTAT;
 		hsync_reg = HSYNC_B;
 		htotal_reg = HTOTAL_B;
 		hblank_reg = HBLANK_B;
@@ -1296,6 +1305,7 @@ intelfbhw_program_mode(struct intelfb_info *dinfo,
 		fp0_reg = FPA0;
 		fp1_reg = FPA1;
 		pipe_conf_reg = PIPEACONF;
+		pipe_stat_reg = PIPEASTAT;
 		hsync_reg = HSYNC_A;
 		htotal_reg = HTOTAL_A;
 		hblank_reg = HBLANK_A;
@@ -1312,8 +1322,8 @@ intelfbhw_program_mode(struct intelfb_info *dinfo,
 
 	count = 0;
 	do {
-		tmp_val[count%3] = INREG(0x70000);
-		if ((tmp_val[0] == tmp_val[1]) && (tmp_val[1]==tmp_val[2]))
+		tmp_val[count % 3] = INREG(PIPEA_DSL);
+		if ((tmp_val[0] == tmp_val[1]) && (tmp_val[1] == tmp_val[2]))
 			break;
 		count++;
 		udelay(1);
@@ -1322,7 +1332,7 @@ intelfbhw_program_mode(struct intelfb_info *dinfo,
 			tmp &= ~PIPECONF_ENABLE;
 			OUTREG(pipe_conf_reg, tmp);
 		}
-	} while(count < 2000);
+	} while (count < 2000);
 
 	OUTREG(ADPA, INREG(ADPA) & ~ADPA_DAC_ENABLE);
 
@@ -1382,6 +1392,17 @@ intelfbhw_program_mode(struct intelfb_info *dinfo,
 	OUTREG(vtotal_reg, *vt);
 	OUTREG(src_size_reg, *ss);
 
+	switch (dinfo->info->var.vmode & (FB_VMODE_INTERLACED |
+					  FB_VMODE_ODD_FLD_FIRST)) {
+	case FB_VMODE_INTERLACED | FB_VMODE_ODD_FLD_FIRST:
+		OUTREG(pipe_stat_reg, 0xFFFF | PIPESTAT_FLD_EVT_ODD_EN);
+		break;
+	case FB_VMODE_INTERLACED: /* even lines first */
+		OUTREG(pipe_stat_reg, 0xFFFF | PIPESTAT_FLD_EVT_EVEN_EN);
+		break;
+	default:		/* non-interlaced */
+		OUTREG(pipe_stat_reg, 0xFFFF); /* clear all status bits only */
+	}
 	/* Enable pipe */
 	OUTREG(pipe_conf_reg, *pipe_conf | PIPECONF_ENABLE);
 
@@ -1446,8 +1467,7 @@ static  u32 get_ring_space(struct intelfb_info *dinfo)
 	return ring_space;
 }
 
-static int
-wait_ring(struct intelfb_info *dinfo, int n)
+static int wait_ring(struct intelfb_info *dinfo, int n)
 {
 	int i = 0;
 	unsigned long end;
@@ -1489,16 +1509,15 @@ wait_ring(struct intelfb_info *dinfo, int n)
 	return i;
 }
 
-static void
-do_flush(struct intelfb_info *dinfo) {
+static void do_flush(struct intelfb_info *dinfo)
+{
 	START_RING(2);
 	OUT_RING(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
 	OUT_RING(MI_NOOP);
 	ADVANCE_RING();
 }
 
-void
-intelfbhw_do_sync(struct intelfb_info *dinfo)
+void intelfbhw_do_sync(struct intelfb_info *dinfo)
 {
 #if VERBOSE > 0
 	DBG_MSG("intelfbhw_do_sync\n");
@@ -1517,8 +1536,7 @@ intelfbhw_do_sync(struct intelfb_info *dinfo)
 	dinfo->ring_space = dinfo->ring.size - RING_MIN_FREE;
 }
 
-static void
-refresh_ring(struct intelfb_info *dinfo)
+static void refresh_ring(struct intelfb_info *dinfo)
 {
 #if VERBOSE > 0
 	DBG_MSG("refresh_ring\n");
@@ -1529,8 +1547,7 @@ refresh_ring(struct intelfb_info *dinfo)
 	dinfo->ring_space = get_ring_space(dinfo);
 }
 
-static void
-reset_state(struct intelfb_info *dinfo)
+static void reset_state(struct intelfb_info *dinfo)
 {
 	int i;
 	u32 tmp;
@@ -1560,12 +1577,11 @@ reset_state(struct intelfb_info *dinfo)
 }
 
 /* Stop the 2D engine, and turn off the ring buffer. */
-void
-intelfbhw_2d_stop(struct intelfb_info *dinfo)
+void intelfbhw_2d_stop(struct intelfb_info *dinfo)
 {
 #if VERBOSE > 0
-	DBG_MSG("intelfbhw_2d_stop: accel: %d, ring_active: %d\n", dinfo->accel,
-		dinfo->ring_active);
+	DBG_MSG("intelfbhw_2d_stop: accel: %d, ring_active: %d\n",
+		dinfo->accel, dinfo->ring_active);
 #endif
 
 	if (!dinfo->accel)
@@ -1580,8 +1596,7 @@ intelfbhw_2d_stop(struct intelfb_info *dinfo)
  * It is assumed that the graphics engine has been stopped by previously
  * calling intelfb_2d_stop().
  */
-void
-intelfbhw_2d_start(struct intelfb_info *dinfo)
+void intelfbhw_2d_start(struct intelfb_info *dinfo)
 {
 #if VERBOSE > 0
 	DBG_MSG("intelfbhw_2d_start: accel: %d, ring_active: %d\n",
@@ -1605,9 +1620,8 @@ intelfbhw_2d_start(struct intelfb_info *dinfo)
 }
 
 /* 2D fillrect (solid fill or invert) */
-void
-intelfbhw_do_fillrect(struct intelfb_info *dinfo, u32 x, u32 y, u32 w, u32 h,
-		      u32 color, u32 pitch, u32 bpp, u32 rop)
+void intelfbhw_do_fillrect(struct intelfb_info *dinfo, u32 x, u32 y, u32 w,
+			   u32 h, u32 color, u32 pitch, u32 bpp, u32 rop)
 {
 	u32 br00, br09, br13, br14, br16;
 
@@ -1696,9 +1710,9 @@ intelfbhw_do_bitblt(struct intelfb_info *dinfo, u32 curx, u32 cury,
 	ADVANCE_RING();
 }
 
-int
-intelfbhw_do_drawglyph(struct intelfb_info *dinfo, u32 fg, u32 bg, u32 w,
-		       u32 h, const u8* cdat, u32 x, u32 y, u32 pitch, u32 bpp)
+int intelfbhw_do_drawglyph(struct intelfb_info *dinfo, u32 fg, u32 bg, u32 w,
+			   u32 h, const u8* cdat, u32 x, u32 y, u32 pitch,
+			   u32 bpp)
 {
 	int nbytes, ndwords, pad, tmp;
 	u32 br00, br09, br13, br18, br19, br22, br23;
@@ -1785,8 +1799,7 @@ intelfbhw_do_drawglyph(struct intelfb_info *dinfo, u32 fg, u32 bg, u32 w,
 }
 
 /* HW cursor functions. */
-void
-intelfbhw_cursor_init(struct intelfb_info *dinfo)
+void intelfbhw_cursor_init(struct intelfb_info *dinfo)
 {
 	u32 tmp;
 
@@ -1817,8 +1830,7 @@ intelfbhw_cursor_init(struct intelfb_info *dinfo)
 	}
 }
 
-void
-intelfbhw_cursor_hide(struct intelfb_info *dinfo)
+void intelfbhw_cursor_hide(struct intelfb_info *dinfo)
 {
 	u32 tmp;
 
@@ -1843,8 +1855,7 @@ intelfbhw_cursor_hide(struct intelfb_info *dinfo)
 	}
 }
 
-void
-intelfbhw_cursor_show(struct intelfb_info *dinfo)
+void intelfbhw_cursor_show(struct intelfb_info *dinfo)
 {
 	u32 tmp;
 
@@ -1873,8 +1884,7 @@ intelfbhw_cursor_show(struct intelfb_info *dinfo)
 	}
 }
 
-void
-intelfbhw_cursor_setpos(struct intelfb_info *dinfo, int x, int y)
+void intelfbhw_cursor_setpos(struct intelfb_info *dinfo, int x, int y)
 {
 	u32 tmp;
 
@@ -1892,13 +1902,11 @@ intelfbhw_cursor_setpos(struct intelfb_info *dinfo, int x, int y)
 	      ((y & CURSOR_POS_MASK) << CURSOR_Y_SHIFT);
 	OUTREG(CURSOR_A_POSITION, tmp);
 
-	if (IS_I9XX(dinfo)) {
+	if (IS_I9XX(dinfo))
 		OUTREG(CURSOR_A_BASEADDR, dinfo->cursor.physical);
-	}
 }
 
-void
-intelfbhw_cursor_setcolor(struct intelfb_info *dinfo, u32 bg, u32 fg)
+void intelfbhw_cursor_setcolor(struct intelfb_info *dinfo, u32 bg, u32 fg)
 {
 #if VERBOSE > 0
 	DBG_MSG("intelfbhw_cursor_setcolor\n");
@@ -1910,9 +1918,8 @@ intelfbhw_cursor_setcolor(struct intelfb_info *dinfo, u32 bg, u32 fg)
 	OUTREG(CURSOR_A_PALETTE3, bg & CURSOR_PALETTE_MASK);
 }
 
-void
-intelfbhw_cursor_load(struct intelfb_info *dinfo, int width, int height,
-		      u8 *data)
+void intelfbhw_cursor_load(struct intelfb_info *dinfo, int width, int height,
+			   u8 *data)
 {
 	u8 __iomem *addr = (u8 __iomem *)dinfo->cursor.virtual;
 	int i, j, w = width / 8;
@@ -1940,8 +1947,8 @@ intelfbhw_cursor_load(struct intelfb_info *dinfo, int width, int height,
 	}
 }
 
-void
-intelfbhw_cursor_reset(struct intelfb_info *dinfo) {
+void intelfbhw_cursor_reset(struct intelfb_info *dinfo)
+{
 	u8 __iomem *addr = (u8 __iomem *)dinfo->cursor.virtual;
 	int i, j;
 
@@ -1961,72 +1968,72 @@ intelfbhw_cursor_reset(struct intelfb_info *dinfo) {
 	}
 }
 
-static irqreturn_t
-intelfbhw_irq(int irq, void *dev_id) {
-	int handled = 0;
+static irqreturn_t intelfbhw_irq(int irq, void *dev_id)
+{
 	u16 tmp;
 	struct intelfb_info *dinfo = (struct intelfb_info *)dev_id;
 
 	spin_lock(&dinfo->int_lock);
 
 	tmp = INREG16(IIR);
-	tmp &= VSYNC_PIPE_A_INTERRUPT;
+	if (dinfo->info->var.vmode & FB_VMODE_INTERLACED)
+		tmp &= PIPE_A_EVENT_INTERRUPT;
+	else
+		tmp &= VSYNC_PIPE_A_INTERRUPT; /* non-interlaced */
 
 	if (tmp == 0) {
 		spin_unlock(&dinfo->int_lock);
-		return IRQ_RETVAL(handled);
+		return IRQ_RETVAL(0); /* not us */
 	}
 
-	OUTREG16(IIR, tmp);
+	/* clear status bits 0-15 ASAP and don't touch bits 16-31 */
+	OUTREG(PIPEASTAT, INREG(PIPEASTAT));
 
-	if (tmp & VSYNC_PIPE_A_INTERRUPT) {
-		dinfo->vsync.count++;
-		if (dinfo->vsync.pan_display) {
-			dinfo->vsync.pan_display = 0;
-			OUTREG(DSPABASE, dinfo->vsync.pan_offset);
-		}
-		wake_up_interruptible(&dinfo->vsync.wait);
-		handled = 1;
+	OUTREG16(IIR, tmp);
+	if (dinfo->vsync.pan_display) {
+		dinfo->vsync.pan_display = 0;
+		OUTREG(DSPABASE, dinfo->vsync.pan_offset);
 	}
 
+	dinfo->vsync.count++;
+	wake_up_interruptible(&dinfo->vsync.wait);
+
 	spin_unlock(&dinfo->int_lock);
 
-	return IRQ_RETVAL(handled);
+	return IRQ_RETVAL(1);
 }
 
-int
-intelfbhw_enable_irq(struct intelfb_info *dinfo, int reenable) {
-
+int intelfbhw_enable_irq(struct intelfb_info *dinfo)
+{
+	u16 tmp;
 	if (!test_and_set_bit(0, &dinfo->irq_flags)) {
 		if (request_irq(dinfo->pdev->irq, intelfbhw_irq, IRQF_SHARED,
-		     "intelfb", dinfo)) {
+				"intelfb", dinfo)) {
 			clear_bit(0, &dinfo->irq_flags);
 			return -EINVAL;
 		}
 
 		spin_lock_irq(&dinfo->int_lock);
-		OUTREG16(HWSTAM, 0xfffe);
-		OUTREG16(IMR, 0x0);
-		OUTREG16(IER, VSYNC_PIPE_A_INTERRUPT);
-		spin_unlock_irq(&dinfo->int_lock);
-	} else if (reenable) {
-		u16 ier;
-
+		OUTREG16(HWSTAM, 0xfffe); /* i830 DRM uses ffff */
+		OUTREG16(IMR, 0);
+	} else
 		spin_lock_irq(&dinfo->int_lock);
-		ier = INREG16(IER);
-		if ((ier & VSYNC_PIPE_A_INTERRUPT)) {
-			DBG_MSG("someone disabled the IRQ [%08X]\n", ier);
-			OUTREG(IER, VSYNC_PIPE_A_INTERRUPT);
-		}
-		spin_unlock_irq(&dinfo->int_lock);
+
+	if (dinfo->info->var.vmode & FB_VMODE_INTERLACED)
+		tmp = PIPE_A_EVENT_INTERRUPT;
+	else
+		tmp = VSYNC_PIPE_A_INTERRUPT; /* non-interlaced */
+	if (tmp != INREG16(IER)) {
+		DBG_MSG("changing IER to 0x%X\n", tmp);
+		OUTREG16(IER, tmp);
 	}
+
+	spin_unlock_irq(&dinfo->int_lock);
 	return 0;
 }
 
-void
-intelfbhw_disable_irq(struct intelfb_info *dinfo) {
-	u16 tmp;
-
+void intelfbhw_disable_irq(struct intelfb_info *dinfo)
+{
 	if (test_and_clear_bit(0, &dinfo->irq_flags)) {
 		if (dinfo->vsync.pan_display) {
 			dinfo->vsync.pan_display = 0;
@@ -2037,16 +2044,15 @@ intelfbhw_disable_irq(struct intelfb_info *dinfo) {
 		OUTREG16(IMR, 0xffff);
 		OUTREG16(IER, 0x0);
 
-		tmp = INREG16(IIR);
-		OUTREG16(IIR, tmp);
+		OUTREG16(IIR, INREG16(IIR)); /* clear IRQ requests */
 		spin_unlock_irq(&dinfo->int_lock);
 
 		free_irq(dinfo->pdev->irq, dinfo);
 	}
 }
 
-int
-intelfbhw_wait_for_vsync(struct intelfb_info *dinfo, u32 pipe) {
+int intelfbhw_wait_for_vsync(struct intelfb_info *dinfo, u32 pipe)
+{
 	struct intelfb_vsync *vsync;
 	unsigned int count;
 	int ret;
@@ -2059,18 +2065,16 @@ intelfbhw_wait_for_vsync(struct intelfb_info *dinfo, u32 pipe) {
 			return -ENODEV;
 	}
 
-	ret = intelfbhw_enable_irq(dinfo, 0);
-	if (ret) {
+	ret = intelfbhw_enable_irq(dinfo);
+	if (ret)
 		return ret;
-	}
 
 	count = vsync->count;
-	ret = wait_event_interruptible_timeout(vsync->wait, count != vsync->count, HZ/10);
-	if (ret < 0) {
+	ret = wait_event_interruptible_timeout(vsync->wait,
+					       count != vsync->count, HZ / 10);
+	if (ret < 0)
 		return ret;
-	}
 	if (ret == 0) {
-		intelfbhw_enable_irq(dinfo, 1);
 		DBG_MSG("wait_for_vsync timed out!\n");
 		return -ETIMEDOUT;
 	}
diff --git a/drivers/video/intelfb/intelfbhw.h b/drivers/video/intelfb/intelfbhw.h
index 8c54ba8fbdd..0b076bac321 100644
--- a/drivers/video/intelfb/intelfbhw.h
+++ b/drivers/video/intelfb/intelfbhw.h
@@ -83,7 +83,7 @@
  */
 #define RING_MIN_FREE			64
 
-#define IPEHR     		0x2088
+#define IPEHR			0x2088
 
 #define INSTDONE		0x2090
 #define PRI_RING_EMPTY			1
@@ -93,7 +93,7 @@
 #define IIR			0x20A4
 #define IMR			0x20A8
 #define VSYNC_PIPE_A_INTERRUPT		(1 << 7)
-#define PIPE_A_EVENT_INTERRUPT		(1 << 4)
+#define PIPE_A_EVENT_INTERRUPT		(1 << 6)
 #define VSYNC_PIPE_B_INTERRUPT		(1 << 5)
 #define PIPE_B_EVENT_INTERRUPT		(1 << 4)
 #define HOST_PORT_EVENT_INTERRUPT	(1 << 3)
@@ -128,9 +128,9 @@
 
 #define GPIOA             0x5010
 #define GPIOB             0x5014
-#define GPIOC             0x5018 // this may be external DDC on i830
-#define GPIOD             0x501C // this is DVO DDC
-#define GPIOE             0x5020 // this is DVO i2C
+#define GPIOC             0x5018 /* this may be external DDC on i830 */
+#define GPIOD             0x501C /* this is DVO DDC */
+#define GPIOE             0x5020 /* this is DVO i2C */
 #define GPIOF             0x5024
 
 /* PLL registers */
@@ -269,15 +269,20 @@
 #define PORT_ENABLE		        (1 << 31)
 #define PORT_PIPE_SELECT_SHIFT	        30
 #define PORT_TV_FLAGS_MASK              0xFF
-#define PORT_TV_FLAGS                   0xC4  // ripped from my BIOS
-                                              // to understand and correct
+#define PORT_TV_FLAGS                   0xC4	/* ripped from my BIOS
+						   to understand and correct */
 
 #define DVOA_SRCDIM		0x61124
 #define DVOB_SRCDIM		0x61144
 #define DVOC_SRCDIM		0x61164
 
+#define PIPEA_DSL		0x70000
+#define PIPEB_DSL		0x71000
 #define PIPEACONF		0x70008
 #define PIPEBCONF		0x71008
+#define PIPEASTAT		0x70024 /* bits 0-15 are "write 1 to clear" */
+#define PIPEBSTAT		0x71024
+
 #define PIPECONF_ENABLE			(1 << 31)
 #define PIPECONF_DISABLE		0
 #define PIPECONF_DOUBLE_WIDE		(1 << 30)
@@ -286,6 +291,35 @@
 #define PIPECONF_UNLOCKED		0
 #define PIPECONF_GAMMA			(1 << 24)
 #define PIPECONF_PALETTE		0
+#define PIPECONF_PROGRESSIVE			(0 << 21)
+#define PIPECONF_INTERLACE_W_FIELD_INDICATION	(6 << 21)
+#define PIPECONF_INTERLACE_FIELD_0_ONLY		(7 << 21)
+#define PIPECONF_INTERLACE_MASK			(7 << 21)
+
+/* enable bits, write 1 to enable */
+#define PIPESTAT_FIFO_UNDERRUN		(1 << 31)
+#define PIPESTAT_CRC_ERROR_EN		(1 << 29)
+#define PIPESTAT_CRC_DONE_EN		(1 << 28)
+#define PIPESTAT_HOTPLUG_EN		(1 << 26)
+#define PIPESTAT_VERTICAL_SYNC_EN	(1 << 25)
+#define PIPESTAT_DISPLINE_COMP_EN	(1 << 24)
+#define PIPESTAT_FLD_EVT_ODD_EN		(1 << 21)
+#define PIPESTAT_FLD_EVT_EVEN_EN	(1 << 20)
+#define PIPESTAT_TV_HOTPLUG_EN		(1 << 18)
+#define PIPESTAT_VBLANK_EN		(1 << 17)
+#define PIPESTAT_OVL_UPDATE_EN		(1 << 16)
+/* status bits, write 1 to clear */
+#define PIPESTAT_HOTPLUG_STATE		(1 << 15)
+#define PIPESTAT_CRC_ERROR		(1 << 13)
+#define PIPESTAT_CRC_DONE		(1 << 12)
+#define PIPESTAT_HOTPLUG		(1 << 10)
+#define PIPESTAT_VSYNC			(1 << 9)
+#define PIPESTAT_DISPLINE_COMP		(1 << 8)
+#define PIPESTAT_FLD_EVT_ODD		(1 << 5)
+#define PIPESTAT_FLD_EVT_EVEN		(1 << 4)
+#define PIPESTAT_TV_HOTPLUG		(1 << 2)
+#define PIPESTAT_VBLANK			(1 << 1)
+#define PIPESTAT_OVL_UPDATE		(1 << 0)
 
 #define DISPARB			0x70030
 #define DISPARB_AEND_MASK		0x1ff
@@ -365,7 +399,7 @@
 #define DISPPLANE_8BPP			(0x2<<26)
 #define DISPPLANE_15_16BPP		(0x4<<26)
 #define DISPPLANE_16BPP			(0x5<<26)
-#define DISPPLANE_32BPP_NO_ALPHA 	(0x6<<26)
+#define DISPPLANE_32BPP_NO_ALPHA	(0x6<<26)
 #define DISPPLANE_32BPP			(0x7<<26)
 #define DISPPLANE_STEREO_ENABLE		(1<<25)
 #define DISPPLANE_STEREO_DISABLE	0
@@ -567,7 +601,7 @@ extern void intelfbhw_cursor_setcolor(struct intelfb_info *dinfo, u32 bg,
 extern void intelfbhw_cursor_load(struct intelfb_info *dinfo, int width,
 				  int height, u8 *data);
 extern void intelfbhw_cursor_reset(struct intelfb_info *dinfo);
-extern int intelfbhw_enable_irq(struct intelfb_info *dinfo, int reenable);
+extern int intelfbhw_enable_irq(struct intelfb_info *dinfo);
 extern void intelfbhw_disable_irq(struct intelfb_info *dinfo);
 extern int intelfbhw_wait_for_vsync(struct intelfb_info *dinfo, u32 pipe);
 
diff --git a/drivers/video/kyro/fbdev.c b/drivers/video/kyro/fbdev.c
index 1c557990739..acb9370fdb1 100644
--- a/drivers/video/kyro/fbdev.c
+++ b/drivers/video/kyro/fbdev.c
@@ -21,7 +21,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
 #endif
diff --git a/drivers/video/logo/logo.c b/drivers/video/logo/logo.c
index 2b0f799aa8d..a9283bae779 100644
--- a/drivers/video/logo/logo.c
+++ b/drivers/video/logo/logo.c
@@ -34,6 +34,10 @@ extern const struct linux_logo logo_superh_vga16;
 extern const struct linux_logo logo_superh_clut224;
 extern const struct linux_logo logo_m32r_clut224;
 
+static int nologo;
+module_param(nologo, bool, 0);
+MODULE_PARM_DESC(nologo, "Disables startup logo");
+
 /* logo's are marked __initdata. Use __init_refok to tell
  * modpost that it is intended that this function uses data
  * marked __initdata.
@@ -42,6 +46,9 @@ const struct linux_logo * __init_refok fb_find_logo(int depth)
 {
 	const struct linux_logo *logo = NULL;
 
+	if (nologo)
+		return NULL;
+
 	if (depth >= 1) {
 #ifdef CONFIG_LOGO_LINUX_MONO
 		/* Generic Linux logo */
diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index 86ca7b17900..b25972ac6ee 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -113,7 +113,7 @@
 #include "matroxfb_g450.h"
 #include <linux/matroxfb.h>
 #include <linux/interrupt.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 #ifdef CONFIG_PPC_PMAC
 #include <asm/machdep.h>
diff --git a/drivers/video/matrox/matroxfb_crtc2.c b/drivers/video/matrox/matroxfb_crtc2.c
index 4b3344e0369..a6ab5b6a58d 100644
--- a/drivers/video/matrox/matroxfb_crtc2.c
+++ b/drivers/video/matrox/matroxfb_crtc2.c
@@ -15,7 +15,7 @@
 #include "matroxfb_misc.h"
 #include "matroxfb_DAC1064.h"
 #include <linux/matroxfb.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /* **************************************************** */
 
diff --git a/drivers/video/matrox/matroxfb_g450.c b/drivers/video/matrox/matroxfb_g450.c
index 4d610b405d4..6209a761f67 100644
--- a/drivers/video/matrox/matroxfb_g450.c
+++ b/drivers/video/matrox/matroxfb_g450.c
@@ -17,7 +17,6 @@
 #include "matroxfb_DAC1064.h"
 #include "g450_pll.h"
 #include <linux/matroxfb.h>
-#include <asm/uaccess.h>
 #include <asm/div64.h>
 
 #include "matroxfb_g450.h"
diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c
index de0d755f901..49cd53e46c0 100644
--- a/drivers/video/matrox/matroxfb_maven.c
+++ b/drivers/video/matrox/matroxfb_maven.c
@@ -18,7 +18,6 @@
 #include <linux/i2c.h>
 #include <linux/matroxfb.h>
 #include <asm/div64.h>
-#include <asm/uaccess.h>
 
 #define MAVEN_I2CID	(0x1B)
 
diff --git a/drivers/video/mbx/mbxfb.c b/drivers/video/mbx/mbxfb.c
index 980d5f62390..80cd117ca65 100644
--- a/drivers/video/mbx/mbxfb.c
+++ b/drivers/video/mbx/mbxfb.c
@@ -1,7 +1,7 @@
 /*
  *  linux/drivers/video/mbx/mbxfb.c
  *
- *  Copyright (C) 2006 8D Technologies inc
+ *  Copyright (C) 2006-2007 8D Technologies inc
  *  Raphael Assenat <raph@8d.com>
  *  	- Added video overlay support
  *  	- Various improvements
@@ -334,8 +334,8 @@ static int mbxfb_blank(int blank, struct fb_info *info)
 
 static int mbxfb_setupOverlay(struct mbxfb_overlaySetup *set)
 {
-	u32 vsctrl, vbbase, vscadr, vsadr;
-	u32 sssize, spoctrl, svctrl, shctrl;
+	u32 vsctrl, vscadr, vsadr;
+	u32 sssize, spoctrl, shctrl;
 	u32 vubase, vvbase;
 	u32 vovrclk;
 
@@ -349,13 +349,11 @@ static int mbxfb_setupOverlay(struct mbxfb_overlaySetup *set)
 	vscadr = readl(VSCADR);
 	vubase = readl(VUBASE);
 	vvbase = readl(VVBASE);
+	shctrl = readl(SHCTRL);
 
 	spoctrl = readl(SPOCTRL);
 	sssize = readl(SSSIZE);
 
-
-	vbbase = Vbbase_Glalpha(set->alpha);
-
 	vsctrl &= ~(	FMsk(VSCTRL_VSWIDTH) |
 					FMsk(VSCTRL_VSHEIGHT) |
 					FMsk(VSCTRL_VPIXFMT) |
@@ -364,38 +362,41 @@ static int mbxfb_setupOverlay(struct mbxfb_overlaySetup *set)
 	vsctrl |= Vsctrl_Width(set->width) | Vsctrl_Height(set->height) |
 				VSCTRL_CSC_EN;
 
-	vscadr &= ~(VSCADR_STR_EN | VSCADR_COLKEY_EN | VSCADR_COLKEYSRC |
-				FMsk(VSCADR_BLEND_M) | FMsk(VSCADR_BLEND_POS) |
-				FMsk(VSCADR_VBASE_ADR) );
+	vscadr &= ~(VSCADR_STR_EN | FMsk(VSCADR_VBASE_ADR) );
 	vubase &= ~(VUBASE_UVHALFSTR | FMsk(VUBASE_UBASE_ADR));
 	vvbase &= ~(FMsk(VVBASE_VBASE_ADR));
 
-	switch (set->fmt)
-	{
-		case MBXFB_FMT_YUV12:
-			vsctrl |= VSCTRL_VPIXFMT_YUV12;
+	switch (set->fmt) {
+	case MBXFB_FMT_YUV16:
+		vsctrl |= VSCTRL_VPIXFMT_YUV12;
 
-			set->Y_stride = ((set->width) + 0xf ) & ~0xf;
+		set->Y_stride = ((set->width) + 0xf ) & ~0xf;
+		break;
+	case MBXFB_FMT_YUV12:
+		vsctrl |= VSCTRL_VPIXFMT_YUV12;
 
+		set->Y_stride = ((set->width) + 0xf ) & ~0xf;
+		vubase |= VUBASE_UVHALFSTR;
+
+		break;
+	case MBXFB_FMT_UY0VY1:
+		vsctrl |= VSCTRL_VPIXFMT_UY0VY1;
+		set->Y_stride = (set->width*2 + 0xf ) & ~0xf;
+		break;
+	case MBXFB_FMT_VY0UY1:
+		vsctrl |= VSCTRL_VPIXFMT_VY0UY1;
+		set->Y_stride = (set->width*2 + 0xf ) & ~0xf;
+		break;
+	case MBXFB_FMT_Y0UY1V:
+		vsctrl |= VSCTRL_VPIXFMT_Y0UY1V;
+		set->Y_stride = (set->width*2 + 0xf ) & ~0xf;
+		break;
+	case MBXFB_FMT_Y0VY1U:
+		vsctrl |= VSCTRL_VPIXFMT_Y0VY1U;
+		set->Y_stride = (set->width*2 + 0xf ) & ~0xf;
 			break;
-		case MBXFB_FMT_UY0VY1:
-			vsctrl |= VSCTRL_VPIXFMT_UY0VY1;
-			set->Y_stride = (set->width*2 + 0xf ) & ~0xf;
-			break;
-		case MBXFB_FMT_VY0UY1:
-			vsctrl |= VSCTRL_VPIXFMT_VY0UY1;
-			set->Y_stride = (set->width*2 + 0xf ) & ~0xf;
-			break;
-		case MBXFB_FMT_Y0UY1V:
-			vsctrl |= VSCTRL_VPIXFMT_Y0UY1V;
-			set->Y_stride = (set->width*2 + 0xf ) & ~0xf;
-			break;
-		case MBXFB_FMT_Y0VY1U:
-			vsctrl |= VSCTRL_VPIXFMT_Y0VY1U;
-			set->Y_stride = (set->width*2 + 0xf ) & ~0xf;
-			break;
-		default:
-			return -EINVAL;
+	default:
+		return -EINVAL;
 	}
 
 	/* VSCTRL has the bits which sets the Video Pixel Format.
@@ -417,8 +418,7 @@ static int mbxfb_setupOverlay(struct mbxfb_overlaySetup *set)
 			(0x60000 + set->mem_offset + set->V_offset)>>3);
 
 
-	vscadr |= VSCADR_BLEND_VID | VSCADR_BLEND_GLOB |
-		Vscadr_Vbase_Adr((0x60000 + set->mem_offset)>>4);
+	vscadr |= Vscadr_Vbase_Adr((0x60000 + set->mem_offset)>>4);
 
 	if (set->enable)
 		vscadr |= VSCADR_STR_EN;
@@ -433,9 +433,8 @@ static int mbxfb_setupOverlay(struct mbxfb_overlaySetup *set)
 
 	spoctrl &= ~(SPOCTRL_H_SC_BP | SPOCTRL_V_SC_BP |
 			SPOCTRL_HV_SC_OR | SPOCTRL_VS_UR_C |
-			FMsk(SPOCTRL_VORDER) | FMsk(SPOCTRL_VPITCH));
-	spoctrl = Spoctrl_Vpitch((set->height<<11)/set->scaled_height)
-							| SPOCTRL_VORDER_2TAP;
+			FMsk(SPOCTRL_VPITCH));
+	spoctrl |= Spoctrl_Vpitch((set->height<<11)/set->scaled_height);
 
 	/* Bypass horiz/vert scaler when same size */
 	if (set->scaled_width == set->width)
@@ -443,14 +442,11 @@ static int mbxfb_setupOverlay(struct mbxfb_overlaySetup *set)
 	if (set->scaled_height == set->height)
 		spoctrl |= SPOCTRL_V_SC_BP;
 
-	svctrl = Svctrl_Initial1(1<<10) | Svctrl_Initial2(1<<10);
-
-	shctrl = Shctrl_Hinitial(4<<11)
-			| Shctrl_Hpitch((set->width<<11)/set->scaled_width);
+	shctrl &= ~(FMsk(SHCTRL_HPITCH) | SHCTRL_HDECIM);
+	shctrl |= Shctrl_Hpitch((set->width<<11)/set->scaled_width);
 
 	/* Video plane registers */
 	write_reg(vsctrl, VSCTRL);
-	write_reg(vbbase, VBBASE);
 	write_reg(vscadr, VSCADR);
 	write_reg(vubase, VUBASE);
 	write_reg(vvbase, VVBASE);
@@ -459,28 +455,8 @@ static int mbxfb_setupOverlay(struct mbxfb_overlaySetup *set)
 	/* Video scaler registers */
 	write_reg(sssize, SSSIZE);
 	write_reg(spoctrl, SPOCTRL);
-	write_reg(svctrl, SVCTRL);
 	write_reg(shctrl, SHCTRL);
 
-	/* RAPH: Using those coefficients, the scaled
-	 * image is quite blurry. I dont know how
-	 * to improve them ; The chip documentation
-	 * was not helpful.. */
-	write_reg(0x21212121, VSCOEFF0);
-	write_reg(0x21212121, VSCOEFF1);
-	write_reg(0x21212121, VSCOEFF2);
-	write_reg(0x21212121, VSCOEFF3);
-	write_reg(0x21212121, VSCOEFF4);
-	write_reg(0x00000000, HSCOEFF0);
-	write_reg(0x00000000, HSCOEFF1);
-	write_reg(0x00000000, HSCOEFF2);
-	write_reg(0x03020201, HSCOEFF3);
-	write_reg(0x09070604, HSCOEFF4);
-	write_reg(0x0f0e0c0a, HSCOEFF5);
-	write_reg(0x15141211, HSCOEFF6);
-	write_reg(0x19181716, HSCOEFF7);
-	write_reg(0x00000019, HSCOEFF8);
-
 	/* Clock */
 	if (set->enable)
 		vovrclk |= 1;
@@ -492,27 +468,206 @@ static int mbxfb_setupOverlay(struct mbxfb_overlaySetup *set)
 	return 0;
 }
 
+static int mbxfb_ioctl_planeorder(struct mbxfb_planeorder *porder)
+{
+	unsigned long gscadr, vscadr;
+
+	if (porder->bottom == porder->top)
+		return -EINVAL;
+
+	gscadr = readl(GSCADR);
+	vscadr = readl(VSCADR);
+
+	gscadr &= ~(FMsk(GSCADR_BLEND_POS));
+	vscadr &= ~(FMsk(VSCADR_BLEND_POS));
+
+	switch (porder->bottom) {
+	case MBXFB_PLANE_GRAPHICS:
+		gscadr |= GSCADR_BLEND_GFX;
+		break;
+	case MBXFB_PLANE_VIDEO:
+		vscadr |= VSCADR_BLEND_GFX;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (porder->top) {
+	case MBXFB_PLANE_GRAPHICS:
+		gscadr |= GSCADR_BLEND_VID;
+		break;
+	case MBXFB_PLANE_VIDEO:
+		vscadr |= GSCADR_BLEND_VID;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	write_reg_dly(vscadr, VSCADR);
+	write_reg_dly(gscadr, GSCADR);
+
+	return 0;
+
+}
+
+static int mbxfb_ioctl_alphactl(struct mbxfb_alphaCtl *alpha)
+{
+	unsigned long vscadr, vbbase, vcmsk;
+	unsigned long gscadr, gbbase, gdrctrl;
+
+	vbbase = Vbbase_Glalpha(alpha->overlay_global_alpha) |
+				Vbbase_Colkey(alpha->overlay_colorkey);
+
+	gbbase = Gbbase_Glalpha(alpha->graphics_global_alpha) |
+				Gbbase_Colkey(alpha->graphics_colorkey);
+
+	vcmsk = readl(VCMSK);
+	vcmsk &= ~(FMsk(VCMSK_COLKEY_M));
+	vcmsk |= Vcmsk_colkey_m(alpha->overlay_colorkey_mask);
+
+	gdrctrl = readl(GDRCTRL);
+	gdrctrl &= ~(FMsk(GDRCTRL_COLKEYM));
+	gdrctrl |= Gdrctrl_Colkeym(alpha->graphics_colorkey_mask);
+
+	vscadr = readl(VSCADR);
+	vscadr &= ~(FMsk(VSCADR_BLEND_M) | VSCADR_COLKEYSRC | VSCADR_COLKEY_EN);
+
+	gscadr = readl(GSCADR);
+	gscadr &= ~(FMsk(GSCADR_BLEND_M) | GSCADR_COLKEY_EN | GSCADR_COLKEYSRC);
+
+	switch (alpha->overlay_colorkey_mode) {
+	case MBXFB_COLORKEY_DISABLED:
+		break;
+	case MBXFB_COLORKEY_PREVIOUS:
+		vscadr |= VSCADR_COLKEY_EN;
+		break;
+	case MBXFB_COLORKEY_CURRENT:
+		vscadr |= VSCADR_COLKEY_EN | VSCADR_COLKEYSRC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (alpha->overlay_blend_mode) {
+	case MBXFB_ALPHABLEND_NONE:
+		vscadr |= VSCADR_BLEND_NONE;
+		break;
+	case MBXFB_ALPHABLEND_GLOBAL:
+		vscadr |= VSCADR_BLEND_GLOB;
+		break;
+	case MBXFB_ALPHABLEND_PIXEL:
+		vscadr |= VSCADR_BLEND_PIX;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (alpha->graphics_colorkey_mode) {
+	case MBXFB_COLORKEY_DISABLED:
+		break;
+	case MBXFB_COLORKEY_PREVIOUS:
+		gscadr |= GSCADR_COLKEY_EN;
+		break;
+	case MBXFB_COLORKEY_CURRENT:
+		gscadr |= GSCADR_COLKEY_EN | GSCADR_COLKEYSRC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (alpha->graphics_blend_mode) {
+	case MBXFB_ALPHABLEND_NONE:
+		gscadr |= GSCADR_BLEND_NONE;
+		break;
+	case MBXFB_ALPHABLEND_GLOBAL:
+		gscadr |= GSCADR_BLEND_GLOB;
+		break;
+	case MBXFB_ALPHABLEND_PIXEL:
+		gscadr |= GSCADR_BLEND_PIX;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	write_reg_dly(vbbase, VBBASE);
+	write_reg_dly(gbbase, GBBASE);
+	write_reg_dly(vcmsk, VCMSK);
+	write_reg_dly(gdrctrl, GDRCTRL);
+	write_reg_dly(gscadr, GSCADR);
+	write_reg_dly(vscadr, VSCADR);
+
+	return 0;
+}
+
 static int mbxfb_ioctl(struct fb_info *info, unsigned int cmd,
 				unsigned long arg)
 {
-	struct mbxfb_overlaySetup setup;
+	struct mbxfb_overlaySetup	setup;
+	struct mbxfb_planeorder 	porder;
+	struct mbxfb_alphaCtl 		alpha;
+	struct mbxfb_reg			reg;
 	int res;
+	__u32 tmp;
 
-	if (cmd == MBXFB_IOCX_OVERLAY)
+	switch (cmd)
 	{
-		if (copy_from_user(&setup, (void __user*)arg,
-					sizeof(struct mbxfb_overlaySetup)))
+		case MBXFB_IOCX_OVERLAY:
+			if (copy_from_user(&setup, (void __user*)arg,
+						sizeof(struct mbxfb_overlaySetup)))
+				return -EFAULT;
+
+			res = mbxfb_setupOverlay(&setup);
+			if (res)
+				return res;
+
+			if (copy_to_user((void __user*)arg, &setup,
+						sizeof(struct mbxfb_overlaySetup)))
+				return -EFAULT;
+
+			return 0;
+
+		case MBXFB_IOCS_PLANEORDER:
+			if (copy_from_user(&porder, (void __user*)arg,
+					sizeof(struct mbxfb_planeorder)))
 			return -EFAULT;
 
-		res = mbxfb_setupOverlay(&setup);
-		if (res)
-			return res;
+			return mbxfb_ioctl_planeorder(&porder);
 
-		if (copy_to_user((void __user*)arg, &setup,
-					sizeof(struct mbxfb_overlaySetup)))
+		case MBXFB_IOCS_ALPHA:
+			if (copy_from_user(&alpha, (void __user*)arg,
+					sizeof(struct mbxfb_alphaCtl)))
 			return -EFAULT;
 
-		return 0;
+			return mbxfb_ioctl_alphactl(&alpha);
+
+		case MBXFB_IOCS_REG:
+			if (copy_from_user(&reg, (void __user*)arg,
+						sizeof(struct mbxfb_reg)))
+				return -EFAULT;
+
+			if (reg.addr >= 0x10000) /* regs are from 0x3fe0000 to 0x3feffff */
+				return -EINVAL;
+
+			tmp = readl(virt_base_2700 + reg.addr);
+			tmp &= ~reg.mask;
+			tmp |= reg.val & reg.mask;
+			writel(tmp, virt_base_2700 + reg.addr);
+
+			return 0;
+		case MBXFB_IOCX_REG:
+			if (copy_from_user(&reg, (void __user*)arg,
+						sizeof(struct mbxfb_reg)))
+				return -EFAULT;
+
+			if (reg.addr >= 0x10000)	/* regs are from 0x3fe0000 to 0x3feffff */
+				return -EINVAL;
+			reg.val = readl(virt_base_2700 + reg.addr);
+
+			if (copy_to_user((void __user*)arg, &reg,
+						sizeof(struct mbxfb_reg)))
+				return -EFAULT;
+
+			return 0;
 	}
 	return -EINVAL;
 }
@@ -558,7 +713,6 @@ static void __devinit setup_memc(struct fb_info *fbi)
 	       LMTYPE);
 	/* enable memory controller */
 	write_reg_dly(LMPWR_MC_PWR_ACT, LMPWR);
-
 	/* perform dummy reads */
 	for ( i = 0; i < 16; i++ ) {
 		tmp = readl(fbi->screen_base);
@@ -588,8 +742,8 @@ static void enable_clocks(struct fb_info *fbi)
 	write_reg_dly(0x00000000, VOVRCLK);
 	write_reg_dly(PIXCLK_EN, PIXCLK);
 	write_reg_dly(MEMCLK_EN, MEMCLK);
-	write_reg_dly(0x00000006, M24CLK);
-	write_reg_dly(0x00000006, MBXCLK);
+	write_reg_dly(0x00000001, M24CLK);
+	write_reg_dly(0x00000001, MBXCLK);
 	write_reg_dly(SDCLK_EN, SDCLK);
 	write_reg_dly(0x00000001, PIXCLKDIV);
 }
@@ -597,6 +751,7 @@ static void enable_clocks(struct fb_info *fbi)
 static void __devinit setup_graphics(struct fb_info *fbi)
 {
 	unsigned long gsctrl;
+	unsigned long vscadr;
 
 	gsctrl = GSCTRL_GAMMA_EN | Gsctrl_Width(fbi->var.xres) |
 		Gsctrl_Height(fbi->var.yres);
@@ -620,6 +775,11 @@ static void __devinit setup_graphics(struct fb_info *fbi)
 	write_reg_dly(0x00ffffff, GDRCTRL);
 	write_reg_dly((GSCADR_STR_EN | Gscadr_Gbase_Adr(0x6000)), GSCADR);
 	write_reg_dly(0x00000000, GPLUT);
+
+	vscadr = readl(VSCADR);
+	vscadr &= ~(FMsk(VSCADR_BLEND_POS) | FMsk(VSCADR_BLEND_M));
+	vscadr |= VSCADR_BLEND_VID | VSCADR_BLEND_NONE;
+	write_reg_dly(vscadr, VSCADR);
 }
 
 static void __devinit setup_display(struct fb_info *fbi)
@@ -638,13 +798,47 @@ static void __devinit setup_display(struct fb_info *fbi)
 
 static void __devinit enable_controller(struct fb_info *fbi)
 {
+	u32 svctrl, shctrl;
+
 	write_reg_dly(SYSRST_RST, SYSRST);
 
+	/* setup a timeout, raise drive strength */
+	write_reg_dly(0xffffff0c, SYSCFG);
 
 	enable_clocks(fbi);
 	setup_memc(fbi);
 	setup_graphics(fbi);
 	setup_display(fbi);
+
+	shctrl = readl(SHCTRL);
+	shctrl &= ~(FMsk(SHCTRL_HINITIAL));
+	shctrl |= Shctrl_Hinitial(4<<11);
+	writel(shctrl, SHCTRL);
+
+	svctrl = Svctrl_Initial1(1<<10) | Svctrl_Initial2(1<<10);
+	writel(svctrl, SVCTRL);
+
+	writel(SPOCTRL_H_SC_BP | SPOCTRL_V_SC_BP | SPOCTRL_VORDER_4TAP
+			, SPOCTRL);
+
+	/* Those coefficients are good for scaling up. For scaling
+	 * down, the application has to calculate them. */
+	write_reg(0xff000100, VSCOEFF0);
+	write_reg(0xfdfcfdfe, VSCOEFF1);
+	write_reg(0x170d0500, VSCOEFF2);
+	write_reg(0x3d372d22, VSCOEFF3);
+	write_reg(0x00000040, VSCOEFF4);
+
+	write_reg(0xff010100, HSCOEFF0);
+	write_reg(0x00000000, HSCOEFF1);
+	write_reg(0x02010000, HSCOEFF2);
+	write_reg(0x01020302, HSCOEFF3);
+	write_reg(0xf9fbfe00, HSCOEFF4);
+	write_reg(0xfbf7f6f7, HSCOEFF5);
+	write_reg(0x1c110700, HSCOEFF6);
+	write_reg(0x3e393127, HSCOEFF7);
+	write_reg(0x00000040, HSCOEFF8);
+
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/video/mbx/reg_bits.h b/drivers/video/mbx/reg_bits.h
index 9a24fb0c7d4..5f14b4befd7 100644
--- a/drivers/video/mbx/reg_bits.h
+++ b/drivers/video/mbx/reg_bits.h
@@ -215,7 +215,7 @@
 /* GSCADR graphics stream control address register fields */
 #define GSCADR_STR_EN	(1 << 31)
 #define GSCADR_COLKEY_EN	(1 << 30)
-#define GSCADR_COLKEYSCR	(1 << 29)
+#define GSCADR_COLKEYSRC	(1 << 29)
 #define GSCADR_BLEND_M	Fld(2,27)
 #define GSCADR_BLEND_NONE	((0x0) << FShft(GSCADR_BLEND_M))
 #define GSCADR_BLEND_INV	((0x1) << FShft(GSCADR_BLEND_M))
@@ -303,6 +303,67 @@
 #define VSADR_YSTART		Fld(11,0)
 #define Vsadr_Ystart(x)		((x) << FShft(VSADR_YSTART))
 
+/* VSCTRL - Video Surface Control Register */
+#define VSCTRL_VPIXFMT		Fld(4,27)
+#define VSCTRL_VPIXFMT_YUV12	((0x9) << FShft(VSCTRL_VPIXFMT))
+#define VSCTRL_VPIXFMT_UY0VY1	((0xc) << FShft(VSCTRL_VPIXFMT))
+#define VSCTRL_VPIXFMT_VY0UY1	((0xd) << FShft(VSCTRL_VPIXFMT))
+#define VSCTRL_VPIXFMT_Y0UY1V	((0xe) << FShft(VSCTRL_VPIXFMT))
+#define VSCTRL_VPIXFMT_Y0VY1U	((0xf) << FShft(VSCTRL_VPIXFMT))
+#define VSCTRL_GAMMA_EN		(1 << 26)
+#define VSCTRL_CSC_EN		(1 << 25)
+#define VSCTRL_COSITED		(1 << 22)
+#define VSCTRL_VSWIDTH		Fld(11,11)
+#define Vsctrl_Width(Pixels) /* Video Width [1-2048] */ \
+			(((Pixels) - 1) << FShft(VSCTRL_VSWIDTH))
+#define VSCTRL_VSHEIGHT		Fld(11,0)
+#define Vsctrl_Height(Pixels) /* Video Height [1-2048] */ \
+			(((Pixels) - 1) << FShft(VSCTRL_VSHEIGHT))
+
+/* VBBASE - Video Blending Base Register */
+#define VBBASE_GLALPHA		Fld(8,24)
+#define Vbbase_Glalpha(x)	((x) << FShft(VBBASE_GLALPHA))
+
+#define VBBASE_COLKEY		Fld(24,0)
+#define Vbbase_Colkey(x)	((x) << FShft(VBBASE_COLKEY))
+
+/* VCMSK - Video Color Key Mask Register */
+#define VCMSK_COLKEY_M		Fld(24,0)
+#define Vcmsk_colkey_m(x)	((x) << FShft(VCMSK_COLKEY_M))
+
+/* VSCADR - Video Stream Control Rddress Register */
+#define VSCADR_STR_EN		(1 << 31)
+#define VSCADR_COLKEY_EN	(1 << 30)
+#define VSCADR_COLKEYSRC	(1 << 29)
+#define VSCADR_BLEND_M		Fld(2,27)
+#define VSCADR_BLEND_NONE	((0x0) << FShft(VSCADR_BLEND_M))
+#define VSCADR_BLEND_INV	((0x1) << FShft(VSCADR_BLEND_M))
+#define VSCADR_BLEND_GLOB	((0x2) << FShft(VSCADR_BLEND_M))
+#define VSCADR_BLEND_PIX	((0x3) << FShft(VSCADR_BLEND_M))
+#define VSCADR_BLEND_POS	Fld(2,24)
+#define VSCADR_BLEND_GFX	((0x0) << FShft(VSCADR_BLEND_POS))
+#define VSCADR_BLEND_VID	((0x1) << FShft(VSCADR_BLEND_POS))
+#define VSCADR_BLEND_CUR	((0x2) << FShft(VSCADR_BLEND_POS))
+#define VSCADR_VBASE_ADR	Fld(23,0)
+#define Vscadr_Vbase_Adr(x)	((x) << FShft(VSCADR_VBASE_ADR))
+
+/* VUBASE - Video U Base Register */
+#define VUBASE_UVHALFSTR	(1 << 31)
+#define VUBASE_UBASE_ADR	Fld(24,0)
+#define Vubase_Ubase_Adr(x)	((x) << FShft(VUBASE_UBASE_ADR))
+
+/* VVBASE - Video V Base Register */
+#define VVBASE_VBASE_ADR	Fld(24,0)
+#define Vvbase_Vbase_Adr(x)	((x) << FShft(VVBASE_VBASE_ADR))
+
+/* VSADR - Video Stride Address Register */
+#define VSADR_SRCSTRIDE		Fld(10,22)
+#define Vsadr_Srcstride(x)	((x) << FShft(VSADR_SRCSTRIDE))
+#define VSADR_XSTART		Fld(11,11)
+#define Vsadr_Xstart(x)		((x) << FShft(VSADR_XSTART))
+#define VSADR_YSTART		Fld(11,0)
+#define Vsadr_Ystart(x)		((x) << FShft(VSADR_YSTART))
+
 /* HCCTRL - Hardware Cursor Register fields */
 #define HCCTRL_CUR_EN	(1 << 31)
 #define HCCTRL_COLKEY_EN	(1 << 29)
@@ -479,6 +540,30 @@
 #define DINTRE_HBLNK1_EN	(1 << 1)
 #define DINTRE_HBLNK0_EN	(1 << 0)
 
+/* DINTRS - Display Interrupt Status Register */
+#define DINTRS_CUR_OR_S		(1 << 18)
+#define DINTRS_STR2_OR_S	(1 << 17)
+#define DINTRS_STR1_OR_S	(1 << 16)
+#define DINTRS_CUR_UR_S		(1 << 6)
+#define DINTRS_STR2_UR_S	(1 << 5)
+#define DINTRS_STR1_UR_S	(1 << 4)
+#define DINTRS_VEVENT1_S	(1 << 3)
+#define DINTRS_VEVENT0_S	(1 << 2)
+#define DINTRS_HBLNK1_S		(1 << 1)
+#define DINTRS_HBLNK0_S		(1 << 0)
+
+/* DINTRE - Display Interrupt Enable Register */
+#define DINTRE_CUR_OR_EN	(1 << 18)
+#define DINTRE_STR2_OR_EN	(1 << 17)
+#define DINTRE_STR1_OR_EN	(1 << 16)
+#define DINTRE_CUR_UR_EN	(1 << 6)
+#define DINTRE_STR2_UR_EN	(1 << 5)
+#define DINTRE_STR1_UR_EN	(1 << 4)
+#define DINTRE_VEVENT1_EN	(1 << 3)
+#define DINTRE_VEVENT0_EN	(1 << 2)
+#define DINTRE_HBLNK1_EN	(1 << 1)
+#define DINTRE_HBLNK0_EN	(1 << 0)
+
 
 /* DLSTS - display load status register */
 #define DLSTS_RLD_ADONE	(1 << 23)
diff --git a/drivers/video/mbx/regs.h b/drivers/video/mbx/regs.h
index a7c63d865aa..063099d4883 100644
--- a/drivers/video/mbx/regs.h
+++ b/drivers/video/mbx/regs.h
@@ -30,7 +30,7 @@
 #define VOVRCLK		__REG_2700G(0x00000044)
 #define PIXCLK		__REG_2700G(0x00000048)
 #define MEMCLK		__REG_2700G(0x0000004c)
-#define M24CLK		__REG_2700G(0x00000054)
+#define M24CLK		__REG_2700G(0x00000050)
 #define MBXCLK		__REG_2700G(0x00000054)
 #define SDCLK		__REG_2700G(0x00000058)
 #define PIXCLKDIV	__REG_2700G(0x0000005c)
diff --git a/drivers/video/modedb.c b/drivers/video/modedb.c
index 3741ad72940..42f5d76a877 100644
--- a/drivers/video/modedb.c
+++ b/drivers/video/modedb.c
@@ -27,7 +27,7 @@
 #define DPRINTK(fmt, args...)
 #endif
 
-const char *global_mode_option;
+const char *fb_mode_option;
 
     /*
      *  Standard video mode definitions (taken from XFree86)
@@ -72,7 +72,7 @@ static const struct fb_videomode modedb[] = {
 	0, FB_VMODE_NONINTERLACED
     }, {
 	/* 1152x864 @ 89 Hz interlaced, 44 kHz hsync */
-	NULL, 69, 1152, 864, 15384, 96, 16, 110, 1, 216, 10,
+	NULL, 89, 1152, 864, 15384, 96, 16, 110, 1, 216, 10,
 	0, FB_VMODE_INTERLACED
     }, {
 	/* 800x600 @ 72 Hz, 48.0 kHz hsync */
@@ -120,11 +120,11 @@ static const struct fb_videomode modedb[] = {
 	0, FB_VMODE_NONINTERLACED
     }, {
 	/* 1400x1050 @ 60Hz, 63.9 kHz hsync */
-	NULL, 68, 1400, 1050, 9259, 136, 40, 13, 1, 112, 3,
+	NULL, 60, 1400, 1050, 9259, 136, 40, 13, 1, 112, 3,
 	0, FB_VMODE_NONINTERLACED   	
     }, {
 	/* 1400x1050 @ 75,107 Hz, 82,392 kHz +hsync +vsync*/
-	NULL, 75, 1400, 1050, 9271, 120, 56, 13, 0, 112, 3,
+	NULL, 75, 1400, 1050, 7190, 120, 56, 23, 10, 112, 13,
 	FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED
     }, {
 	/* 1400x1050 @ 60 Hz, ? kHz +hsync +vsync*/
@@ -253,7 +253,7 @@ static const struct fb_videomode modedb[] = {
 	FB_VMODE_NONINTERLACED
     }, {
 	/* 1152x768, 60 Hz, PowerBook G4 Titanium I and II */
-	NULL, 60, 1152, 768, 15386, 158, 26, 29, 3, 136, 6,
+	NULL, 60, 1152, 768, 14047, 158, 26, 29, 3, 136, 6,
 	FB_SYNC_HOR_HIGH_ACT|FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED
     }, {
 	/* 1366x768, 60 Hz, 47.403 kHz hsync, WXGA 16:9 aspect ratio */
@@ -306,7 +306,7 @@ const struct fb_videomode vesa_modes[] = {
 	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
 	  FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
         /* 12 1024x768i-43 VESA */
-	{ NULL, 53, 1024, 768, 22271, 56, 8, 41, 0, 176, 8,
+	{ NULL, 43, 1024, 768, 22271, 56, 8, 41, 0, 176, 8,
 	  FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
 	  FB_VMODE_INTERLACED, FB_MODE_IS_VESA },
 	/* 13 1024x768-60 VESA */
@@ -383,7 +383,7 @@ const struct fb_videomode vesa_modes[] = {
 	{ NULL, 60, 1920, 1440, 4273, 344, 128, 56, 1, 200, 3,
 	  FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
 	/* 33 1920x1440-75 VESA */
-	{ NULL, 60, 1920, 1440, 3367, 352, 144, 56, 1, 224, 3,
+	{ NULL, 75, 1920, 1440, 3367, 352, 144, 56, 1, 224, 3,
 	  FB_SYNC_VERT_HIGH_ACT, FB_VMODE_NONINTERLACED, FB_MODE_IS_VESA },
 };
 EXPORT_SYMBOL(vesa_modes);
@@ -510,7 +510,7 @@ int fb_find_mode(struct fb_var_screeninfo *var,
 	default_bpp = 8;
 
     /* Did the user specify a video mode? */
-    if (mode_option || (mode_option = global_mode_option)) {
+    if (mode_option || (mode_option = fb_mode_option)) {
 	const char *name = mode_option;
 	unsigned int namelen = strlen(name);
 	int res_specified = 0, bpp_specified = 0, refresh_specified = 0;
@@ -606,26 +606,43 @@ done:
 	DPRINTK("Trying specified video mode%s %ix%i\n",
 	    refresh_specified ? "" : " (ignoring refresh rate)", xres, yres);
 
-	diff = refresh;
+	if (!refresh_specified) {
+		/*
+		 * If the caller has provided a custom mode database and a
+		 * valid monspecs structure, we look for the mode with the
+		 * highest refresh rate.  Otherwise we play it safe it and
+		 * try to find a mode with a refresh rate closest to the
+		 * standard 60 Hz.
+		 */
+		if (db != modedb &&
+		    info->monspecs.vfmin && info->monspecs.vfmax &&
+		    info->monspecs.hfmin && info->monspecs.hfmax &&
+		    info->monspecs.dclkmax) {
+			refresh = 1000;
+		} else {
+			refresh = 60;
+		}
+	}
+
+	diff = -1;
 	best = -1;
 	for (i = 0; i < dbsize; i++) {
-		if (name_matches(db[i], name, namelen) ||
-		    (res_specified && res_matches(db[i], xres, yres))) {
-			if(!fb_try_mode(var, info, &db[i], bpp)) {
-				if(!refresh_specified || db[i].refresh == refresh)
-					return 1;
-				else {
-					if(diff > abs(db[i].refresh - refresh)) {
-						diff = abs(db[i].refresh - refresh);
-						best = i;
-					}
+		if ((name_matches(db[i], name, namelen) ||
+		    (res_specified && res_matches(db[i], xres, yres))) &&
+		    !fb_try_mode(var, info, &db[i], bpp)) {
+			if (refresh_specified && db[i].refresh == refresh) {
+				return 1;
+			} else {
+				if (abs(db[i].refresh - refresh) < diff) {
+					diff = abs(db[i].refresh - refresh);
+					best = i;
 				}
 			}
 		}
 	}
 	if (best != -1) {
 		fb_try_mode(var, info, &db[best], bpp);
-		return 2;
+		return (refresh_specified) ? 2 : 1;
 	}
 
 	diff = xres + yres;
@@ -938,6 +955,7 @@ void fb_destroy_modelist(struct list_head *head)
 		kfree(pos);
 	}
 }
+EXPORT_SYMBOL_GPL(fb_destroy_modelist);
 
 /**
  * fb_videomode_to_modelist: convert mode array to mode list
diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c
index 731d7a5c5aa..4b6a99b5be0 100644
--- a/drivers/video/neofb.c
+++ b/drivers/video/neofb.c
@@ -72,7 +72,6 @@
 #include <asm/irq.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
-#include <asm/uaccess.h>
 
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
diff --git a/drivers/video/nvidia/nv_i2c.c b/drivers/video/nvidia/nv_i2c.c
index afe4567e1ff..6fd7cb8f9b8 100644
--- a/drivers/video/nvidia/nv_i2c.c
+++ b/drivers/video/nvidia/nv_i2c.c
@@ -125,11 +125,13 @@ void nvidia_create_i2c_busses(struct nvidia_par *par)
 	par->chan[1].par = par;
 	par->chan[2].par = par;
 
-	par->chan[0].ddc_base = 0x36;
- 	nvidia_setup_i2c_bus(&par->chan[0], "nvidia #0", I2C_CLASS_HWMON);
+	par->chan[0].ddc_base = (par->reverse_i2c) ? 0x36 : 0x3e;
+ 	nvidia_setup_i2c_bus(&par->chan[0], "nvidia #0",
+			     (par->reverse_i2c) ? I2C_CLASS_HWMON : 0);
 
-	par->chan[1].ddc_base = 0x3e;
- 	nvidia_setup_i2c_bus(&par->chan[1], "nvidia #1", 0);
+	par->chan[1].ddc_base = (par->reverse_i2c) ? 0x3e : 0x36;
+ 	nvidia_setup_i2c_bus(&par->chan[1], "nvidia #1",
+			     (par->reverse_i2c) ? 0 : I2C_CLASS_HWMON);
 
 	par->chan[2].ddc_base = 0x50;
  	nvidia_setup_i2c_bus(&par->chan[2], "nvidia #2", 0);
diff --git a/drivers/video/nvidia/nv_type.h b/drivers/video/nvidia/nv_type.h
index 2fdf77ec39f..f132aab8c5d 100644
--- a/drivers/video/nvidia/nv_type.h
+++ b/drivers/video/nvidia/nv_type.h
@@ -135,6 +135,7 @@ struct nvidia_par {
 	int paneltweak;
 	int LVDS;
 	int pm_state;
+	int reverse_i2c;
 	u32 crtcSync_read;
 	u32 fpSyncs;
 	u32 dmaPut;
diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index a7fe214f0f7..30e14eb1f51 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -79,6 +79,7 @@ static int noscale __devinitdata = 0;
 static int paneltweak __devinitdata = 0;
 static int vram __devinitdata = 0;
 static int bpp __devinitdata = 8;
+static int reverse_i2c __devinitdata;
 #ifdef CONFIG_MTRR
 static int nomtrr __devinitdata = 0;
 #endif
@@ -1305,6 +1306,7 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd,
 	par->CRTCnumber = forceCRTC;
 	par->FpScale = (!noscale);
 	par->paneltweak = paneltweak;
+	par->reverse_i2c = reverse_i2c;
 
 	/* enable IO and mem if not already done */
 	pci_read_config_word(pd, PCI_COMMAND, &cmd);
@@ -1486,6 +1488,8 @@ static int __devinit nvidiafb_setup(char *options)
 			noaccel = 1;
 		} else if (!strncmp(this_opt, "noscale", 7)) {
 			noscale = 1;
+		} else if (!strncmp(this_opt, "reverse_i2c", 11)) {
+			reverse_i2c = 1;
 		} else if (!strncmp(this_opt, "paneltweak:", 11)) {
 			paneltweak = simple_strtoul(this_opt+11, NULL, 0);
 		} else if (!strncmp(this_opt, "vram:", 5)) {
@@ -1582,6 +1586,8 @@ MODULE_PARM_DESC(mode_option, "Specify initial video mode");
 module_param(bpp, int, 0);
 MODULE_PARM_DESC(bpp, "pixel width in bits"
 		 "(default=8)");
+module_param(reverse_i2c, int, 0);
+MODULE_PARM_DESC(reverse_i2c, "reverse port assignment of the i2c bus");
 #ifdef CONFIG_MTRR
 module_param(nomtrr, bool, 0);
 MODULE_PARM_DESC(nomtrr, "Disables MTRR support (0 or 1=disabled) "
diff --git a/drivers/video/pm2fb.c b/drivers/video/pm2fb.c
index 10c0cc6e93f..5591dfb22b1 100644
--- a/drivers/video/pm2fb.c
+++ b/drivers/video/pm2fb.c
@@ -11,7 +11,7 @@
  * and additional input from James Simmon's port of Hannu Mallat's tdfx
  * driver.
  *
- * I have a Creative Graphics Blaster Exxtreme card - pm2fb on x86.  I
+ * I have a Creative Graphics Blaster Exxtreme card - pm2fb on x86. I
  * have no access to other pm2fb implementations. Sparc (and thus
  * hopefully other big-endian) devices now work, thanks to a lot of
  * testing work by Ron Murray. I have no access to CVision hardware,
@@ -38,6 +38,9 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#ifdef CONFIG_MTRR
+#include <asm/mtrr.h>
+#endif
 
 #include <video/permedia2.h>
 #include <video/cvisionppc.h>
@@ -52,15 +55,19 @@
 
 #undef PM2FB_MASTER_DEBUG
 #ifdef PM2FB_MASTER_DEBUG
-#define DPRINTK(a,b...)	printk(KERN_DEBUG "pm2fb: %s: " a, __FUNCTION__ , ## b)
+#define DPRINTK(a, b...)	\
+	printk(KERN_DEBUG "pm2fb: %s: " a, __FUNCTION__ , ## b)
 #else
-#define DPRINTK(a,b...)
+#define DPRINTK(a, b...)
 #endif
 
+#define PM2_PIXMAP_SIZE	(1600 * 4)
+
 /*
  * Driver data
  */
-static char *mode __devinitdata = NULL;
+static int hwcursor = 1;
+static char *mode __devinitdata;
 
 /*
  * The XFree GLINT driver will (I think to implement hardware cursor
@@ -73,6 +80,11 @@ static char *mode __devinitdata = NULL;
  */
 static int lowhsync;
 static int lowvsync;
+static int noaccel __devinitdata;
+/* mtrr option */
+#ifdef CONFIG_MTRR
+static int nomtrr __devinitdata;
+#endif
 
 /*
  * The hardware state of the graphics card that isn't part of the
@@ -88,6 +100,7 @@ struct pm2fb_par
 	u32		mem_control;	/* MemControl reg at probe */
 	u32		boot_address;	/* BootAddress reg at probe */
 	u32		palette[16];
+	int		mtrr_handle;
 };
 
 /*
@@ -135,60 +148,39 @@ static struct fb_var_screeninfo pm2fb_var __devinitdata = {
  * Utility functions
  */
 
-static inline u32 RD32(unsigned char __iomem *base, s32 off)
-{
-	return fb_readl(base + off);
-}
-
-static inline void WR32(unsigned char __iomem *base, s32 off, u32 v)
+static inline u32 pm2_RD(struct pm2fb_par *p, s32 off)
 {
-	fb_writel(v, base + off);
+	return fb_readl(p->v_regs + off);
 }
 
-static inline u32 pm2_RD(struct pm2fb_par* p, s32 off)
+static inline void pm2_WR(struct pm2fb_par *p, s32 off, u32 v)
 {
-	return RD32(p->v_regs, off);
+	fb_writel(v, p->v_regs + off);
 }
 
-static inline void pm2_WR(struct pm2fb_par* p, s32 off, u32 v)
+static inline u32 pm2_RDAC_RD(struct pm2fb_par *p, s32 idx)
 {
-	WR32(p->v_regs, off, v);
+	pm2_WR(p, PM2R_RD_PALETTE_WRITE_ADDRESS, idx);
+	mb();
+	return pm2_RD(p, PM2R_RD_INDEXED_DATA);
 }
 
-static inline u32 pm2_RDAC_RD(struct pm2fb_par* p, s32 idx)
+static inline u32 pm2v_RDAC_RD(struct pm2fb_par *p, s32 idx)
 {
-	int index = PM2R_RD_INDEXED_DATA;
-	switch (p->type) {
-	case PM2_TYPE_PERMEDIA2:
-		pm2_WR(p, PM2R_RD_PALETTE_WRITE_ADDRESS, idx);
-		break;
-	case PM2_TYPE_PERMEDIA2V:
-		pm2_WR(p, PM2VR_RD_INDEX_LOW, idx & 0xff);
-		index = PM2VR_RD_INDEXED_DATA;
-		break;
-	}
+	pm2_WR(p, PM2VR_RD_INDEX_LOW, idx & 0xff);
 	mb();
-	return pm2_RD(p, index);
+	return pm2_RD(p,  PM2VR_RD_INDEXED_DATA);
 }
 
-static inline void pm2_RDAC_WR(struct pm2fb_par* p, s32 idx, u32 v)
+static inline void pm2_RDAC_WR(struct pm2fb_par *p, s32 idx, u32 v)
 {
-	int index = PM2R_RD_INDEXED_DATA;
-	switch (p->type) {
-	case PM2_TYPE_PERMEDIA2:
-		pm2_WR(p, PM2R_RD_PALETTE_WRITE_ADDRESS, idx);
-		break;
-	case PM2_TYPE_PERMEDIA2V:
-		pm2_WR(p, PM2VR_RD_INDEX_LOW, idx & 0xff);
-		index = PM2VR_RD_INDEXED_DATA;
-		break;
-	}
+	pm2_WR(p, PM2R_RD_PALETTE_WRITE_ADDRESS, idx);
 	wmb();
-	pm2_WR(p, index, v);
+	pm2_WR(p, PM2R_RD_INDEXED_DATA, v);
 	wmb();
 }
 
-static inline void pm2v_RDAC_WR(struct pm2fb_par* p, s32 idx, u32 v)
+static inline void pm2v_RDAC_WR(struct pm2fb_par *p, s32 idx, u32 v)
 {
 	pm2_WR(p, PM2VR_RD_INDEX_LOW, idx & 0xff);
 	wmb();
@@ -199,10 +191,10 @@ static inline void pm2v_RDAC_WR(struct pm2fb_par* p, s32 idx, u32 v)
 #ifdef CONFIG_FB_PM2_FIFO_DISCONNECT
 #define WAIT_FIFO(p, a)
 #else
-static inline void WAIT_FIFO(struct pm2fb_par* p, u32 a)
+static inline void WAIT_FIFO(struct pm2fb_par *p, u32 a)
 {
-	while( pm2_RD(p, PM2R_IN_FIFO_SPACE) < a );
-	mb();
+	while (pm2_RD(p, PM2R_IN_FIFO_SPACE) < a)
+		cpu_relax();
 }
 #endif
 
@@ -238,7 +230,7 @@ static u32 partprod(u32 xres)
 
 	for (i = 0; pp_table[i].width && pp_table[i].width != xres; i++)
 		;
-	if ( pp_table[i].width == 0 )
+	if (pp_table[i].width == 0)
 		DPRINTK("invalid width %u\n", xres);
 	return pp_table[i].pp;
 }
@@ -246,25 +238,22 @@ static u32 partprod(u32 xres)
 static u32 to3264(u32 timing, int bpp, int is64)
 {
 	switch (bpp) {
+	case 24:
+		timing *= 3;
 	case 8:
-		timing >>= 2 + is64;
-		break;
+		timing >>= 1;
 	case 16:
-		timing >>= 1 + is64;
-		break;
-	case 24:
-		timing = (timing * 3) >> (2 + is64);
-		break;
+		timing >>= 1;
 	case 32:
-		if (is64)
-			timing >>= 1;
 		break;
 	}
+	if (is64)
+		timing >>= 1;
 	return timing;
 }
 
-static void pm2_mnp(u32 clk, unsigned char* mm, unsigned char* nn,
-		    unsigned char* pp)
+static void pm2_mnp(u32 clk, unsigned char *mm, unsigned char *nn,
+		    unsigned char *pp)
 {
 	unsigned char m;
 	unsigned char n;
@@ -278,13 +267,13 @@ static void pm2_mnp(u32 clk, unsigned char* mm, unsigned char* nn,
 		for (m = 2; m; m++) {
 			f = PM2_REFERENCE_CLOCK * m / n;
 			if (f >= 150000 && f <= 300000) {
-				for ( p = 0; p < 5; p++, f >>= 1) {
-					curr = ( clk > f ) ? clk - f : f - clk;
-					if ( curr < delta ) {
-						delta=curr;
-						*mm=m;
-						*nn=n;
-						*pp=p;
+				for (p = 0; p < 5; p++, f >>= 1) {
+					curr = (clk > f) ? clk - f : f - clk;
+					if (curr < delta) {
+						delta = curr;
+						*mm = m;
+						*nn = n;
+						*pp = p;
 					}
 				}
 			}
@@ -292,8 +281,8 @@ static void pm2_mnp(u32 clk, unsigned char* mm, unsigned char* nn,
 	}
 }
 
-static void pm2v_mnp(u32 clk, unsigned char* mm, unsigned char* nn,
-		     unsigned char* pp)
+static void pm2v_mnp(u32 clk, unsigned char *mm, unsigned char *nn,
+		     unsigned char *pp)
 {
 	unsigned char m;
 	unsigned char n;
@@ -302,23 +291,24 @@ static void pm2v_mnp(u32 clk, unsigned char* mm, unsigned char* nn,
 	s32 delta = 1000;
 
 	*mm = *nn = *pp = 0;
-	for ( m = 1; m < 128; m++) {
+	for (m = 1; m < 128; m++) {
 		for (n = 2 * m + 1; n; n++) {
-			for ( p = 0; p < 2; p++) {
-				f = ( PM2_REFERENCE_CLOCK >> ( p + 1 )) * n / m;
-				if ( clk > f - delta && clk < f + delta ) {
-					delta = ( clk > f ) ? clk - f : f - clk;
-					*mm=m;
-					*nn=n;
-					*pp=p;
+			for (p = 0; p < 2; p++) {
+				f = (PM2_REFERENCE_CLOCK >> (p + 1)) * n / m;
+				if (clk > f - delta && clk < f + delta) {
+					delta = (clk > f) ? clk - f : f - clk;
+					*mm = m;
+					*nn = n;
+					*pp = p;
 				}
 			}
 		}
 	}
 }
 
-static void clear_palette(struct pm2fb_par* p) {
-	int i=256;
+static void clear_palette(struct pm2fb_par *p)
+{
+	int i = 256;
 
 	WAIT_FIFO(p, 1);
 	pm2_WR(p, PM2R_RD_PALETTE_WRITE_ADDRESS, 0);
@@ -331,14 +321,14 @@ static void clear_palette(struct pm2fb_par* p) {
 	}
 }
 
-static void reset_card(struct pm2fb_par* p)
+static void reset_card(struct pm2fb_par *p)
 {
 	if (p->type == PM2_TYPE_PERMEDIA2V)
 		pm2_WR(p, PM2VR_RD_INDEX_HIGH, 0);
 	pm2_WR(p, PM2R_RESET_STATUS, 0);
 	mb();
 	while (pm2_RD(p, PM2R_RESET_STATUS) & PM2F_BEING_RESET)
-		;
+		cpu_relax();
 	mb();
 #ifdef CONFIG_FB_PM2_FIFO_DISCONNECT
 	DPRINTK("FIFO disconnect enabled\n");
@@ -354,11 +344,11 @@ static void reset_card(struct pm2fb_par* p)
 	pm2_WR(p, PM2R_MEM_CONFIG, p->mem_config);
 }
 
-static void reset_config(struct pm2fb_par* p)
+static void reset_config(struct pm2fb_par *p)
 {
-	WAIT_FIFO(p, 52);
+	WAIT_FIFO(p, 53);
 	pm2_WR(p, PM2R_CHIP_CONFIG, pm2_RD(p, PM2R_CHIP_CONFIG) &
-	       ~(PM2F_VGA_ENABLE|PM2F_VGA_FIXED));
+			~(PM2F_VGA_ENABLE | PM2F_VGA_FIXED));
 	pm2_WR(p, PM2R_BYPASS_WRITE_MASK, ~(0L));
 	pm2_WR(p, PM2R_FRAMEBUFFER_WRITE_MASK, ~(0L));
 	pm2_WR(p, PM2R_FIFO_CONTROL, 0);
@@ -393,31 +383,32 @@ static void reset_config(struct pm2fb_par* p)
 	pm2_WR(p, PM2R_STATISTICS_MODE, 0);
 	pm2_WR(p, PM2R_SCISSOR_MODE, 0);
 	pm2_WR(p, PM2R_FILTER_MODE, PM2F_SYNCHRONIZATION);
+	pm2_WR(p, PM2R_RD_PIXEL_MASK, 0xff);
 	switch (p->type) {
 	case PM2_TYPE_PERMEDIA2:
 		pm2_RDAC_WR(p, PM2I_RD_MODE_CONTROL, 0); /* no overlay */
 		pm2_RDAC_WR(p, PM2I_RD_CURSOR_CONTROL, 0);
 		pm2_RDAC_WR(p, PM2I_RD_MISC_CONTROL, PM2F_RD_PALETTE_WIDTH_8);
+		pm2_RDAC_WR(p, PM2I_RD_COLOR_KEY_CONTROL, 0);
+		pm2_RDAC_WR(p, PM2I_RD_OVERLAY_KEY, 0);
+		pm2_RDAC_WR(p, PM2I_RD_RED_KEY, 0);
+		pm2_RDAC_WR(p, PM2I_RD_GREEN_KEY, 0);
+		pm2_RDAC_WR(p, PM2I_RD_BLUE_KEY, 0);
 		break;
 	case PM2_TYPE_PERMEDIA2V:
 		pm2v_RDAC_WR(p, PM2VI_RD_MISC_CONTROL, 1); /* 8bit */
 		break;
 	}
-	pm2_RDAC_WR(p, PM2I_RD_COLOR_KEY_CONTROL, 0);
-	pm2_RDAC_WR(p, PM2I_RD_OVERLAY_KEY, 0);
-	pm2_RDAC_WR(p, PM2I_RD_RED_KEY, 0);
-	pm2_RDAC_WR(p, PM2I_RD_GREEN_KEY, 0);
-	pm2_RDAC_WR(p, PM2I_RD_BLUE_KEY, 0);
 }
 
-static void set_aperture(struct pm2fb_par* p, u32 depth)
+static void set_aperture(struct pm2fb_par *p, u32 depth)
 {
 	/*
 	 * The hardware is little-endian. When used in big-endian
 	 * hosts, the on-chip aperture settings are used where
 	 * possible to translate from host to card byte order.
 	 */
-	WAIT_FIFO(p, 4);
+	WAIT_FIFO(p, 2);
 #ifdef __LITTLE_ENDIAN
 	pm2_WR(p, PM2R_APERTURE_ONE, PM2F_APERTURE_STANDARD);
 #else
@@ -440,11 +431,11 @@ static void set_aperture(struct pm2fb_par* p, u32 depth)
 	}
 #endif
 
-	// We don't use aperture two, so this may be superflous
+	/* We don't use aperture two, so this may be superflous */
 	pm2_WR(p, PM2R_APERTURE_TWO, PM2F_APERTURE_STANDARD);
 }
 
-static void set_color(struct pm2fb_par* p, unsigned char regno,
+static void set_color(struct pm2fb_par *p, unsigned char regno,
 		      unsigned char r, unsigned char g, unsigned char b)
 {
 	WAIT_FIFO(p, 4);
@@ -457,7 +448,7 @@ static void set_color(struct pm2fb_par* p, unsigned char regno,
 	pm2_WR(p, PM2R_RD_PALETTE_DATA, b);
 }
 
-static void set_memclock(struct pm2fb_par* par, u32 clk)
+static void set_memclock(struct pm2fb_par *par, u32 clk)
 {
 	int i;
 	unsigned char m, n, p;
@@ -465,7 +456,7 @@ static void set_memclock(struct pm2fb_par* par, u32 clk)
 	switch (par->type) {
 	case PM2_TYPE_PERMEDIA2V:
 		pm2v_mnp(clk/2, &m, &n, &p);
-		WAIT_FIFO(par, 8);
+		WAIT_FIFO(par, 12);
 		pm2_WR(par, PM2VR_RD_INDEX_HIGH, PM2VI_RD_MCLK_CONTROL >> 8);
 		pm2v_RDAC_WR(par, PM2VI_RD_MCLK_CONTROL, 0);
 		pm2v_RDAC_WR(par, PM2VI_RD_MCLK_PRESCALE, m);
@@ -473,10 +464,9 @@ static void set_memclock(struct pm2fb_par* par, u32 clk)
 		pm2v_RDAC_WR(par, PM2VI_RD_MCLK_POSTSCALE, p);
 		pm2v_RDAC_WR(par, PM2VI_RD_MCLK_CONTROL, 1);
 		rmb();
-		for (i = 256;
-		     i && !(pm2_RDAC_RD(par, PM2VI_RD_MCLK_CONTROL) & 2);
-		     i--)
-			;
+		for (i = 256; i; i--)
+			if (pm2v_RDAC_RD(par, PM2VI_RD_MCLK_CONTROL) & 2)
+				break;
 		pm2_WR(par, PM2VR_RD_INDEX_HIGH, 0);
 		break;
 	case PM2_TYPE_PERMEDIA2:
@@ -488,15 +478,14 @@ static void set_memclock(struct pm2fb_par* par, u32 clk)
 		pm2_RDAC_WR(par, PM2I_RD_MEMORY_CLOCK_3, 8|p);
 		pm2_RDAC_RD(par, PM2I_RD_MEMORY_CLOCK_STATUS);
 		rmb();
-		for (i = 256;
-		     i && !(pm2_RD(par, PM2R_RD_INDEXED_DATA) & PM2F_PLL_LOCKED);
-		     i--)
-			;
+		for (i = 256; i; i--)
+			if (pm2_RD(par, PM2R_RD_INDEXED_DATA) & PM2F_PLL_LOCKED)
+				break;
 		break;
 	}
 }
 
-static void set_pixclock(struct pm2fb_par* par, u32 clk)
+static void set_pixclock(struct pm2fb_par *par, u32 clk)
 {
 	int i;
 	unsigned char m, n, p;
@@ -504,17 +493,16 @@ static void set_pixclock(struct pm2fb_par* par, u32 clk)
 	switch (par->type) {
 	case PM2_TYPE_PERMEDIA2:
 		pm2_mnp(clk, &m, &n, &p);
-		WAIT_FIFO(par, 8);
+		WAIT_FIFO(par, 10);
 		pm2_RDAC_WR(par, PM2I_RD_PIXEL_CLOCK_A3, 0);
 		pm2_RDAC_WR(par, PM2I_RD_PIXEL_CLOCK_A1, m);
 		pm2_RDAC_WR(par, PM2I_RD_PIXEL_CLOCK_A2, n);
 		pm2_RDAC_WR(par, PM2I_RD_PIXEL_CLOCK_A3, 8|p);
 		pm2_RDAC_RD(par, PM2I_RD_PIXEL_CLOCK_STATUS);
 		rmb();
-		for (i = 256;
-		     i && !(pm2_RD(par, PM2R_RD_INDEXED_DATA) & PM2F_PLL_LOCKED);
-		     i--)
-			;
+		for (i = 256; i; i--)
+			if (pm2_RD(par, PM2R_RD_INDEXED_DATA) & PM2F_PLL_LOCKED)
+				break;
 		break;
 	case PM2_TYPE_PERMEDIA2V:
 		pm2v_mnp(clk/2, &m, &n, &p);
@@ -528,11 +516,10 @@ static void set_pixclock(struct pm2fb_par* par, u32 clk)
 	}
 }
 
-static void set_video(struct pm2fb_par* p, u32 video) {
+static void set_video(struct pm2fb_par *p, u32 video)
+{
 	u32 tmp;
-	u32 vsync;
-
-	vsync = video;
+	u32 vsync = video;
 
 	DPRINTK("video = 0x%x\n", video);
 
@@ -542,10 +529,10 @@ static void set_video(struct pm2fb_par* p, u32 video) {
 	 * driver may well. So always set +hsync/+vsync and then set
 	 * the RAMDAC to invert the sync if necessary.
 	 */
-	vsync &= ~(PM2F_HSYNC_MASK|PM2F_VSYNC_MASK);
-	vsync |= PM2F_HSYNC_ACT_HIGH|PM2F_VSYNC_ACT_HIGH;
+	vsync &= ~(PM2F_HSYNC_MASK | PM2F_VSYNC_MASK);
+	vsync |= PM2F_HSYNC_ACT_HIGH | PM2F_VSYNC_ACT_HIGH;
 
-	WAIT_FIFO(p, 5);
+	WAIT_FIFO(p, 3);
 	pm2_WR(p, PM2R_VIDEO_CONTROL, vsync);
 
 	switch (p->type) {
@@ -564,16 +551,11 @@ static void set_video(struct pm2fb_par* p, u32 video) {
 		if ((video & PM2F_VSYNC_MASK) == PM2F_VSYNC_ACT_LOW)
 			tmp |= 4; /* invert vsync */
 		pm2v_RDAC_WR(p, PM2VI_RD_SYNC_CONTROL, tmp);
-		pm2v_RDAC_WR(p, PM2VI_RD_MISC_CONTROL, 1);
 		break;
 	}
 }
 
 /*
- *
- */
-
-/**
  *	pm2fb_check_var - Optional function. Validates a var passed in.
  *	@var: frame buffer variable screen structure
  *	@info: frame buffer structure that represents a single frame buffer
@@ -594,15 +576,22 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	}
 
 	if (var->xres != var->xres_virtual) {
-		DPRINTK("virtual x resolution != physical x resolution not supported\n");
+		DPRINTK("virtual x resolution != "
+			"physical x resolution not supported\n");
 		return -EINVAL;
 	}
 
 	if (var->yres > var->yres_virtual) {
-		DPRINTK("virtual y resolution < physical y resolution not possible\n");
+		DPRINTK("virtual y resolution < "
+			"physical y resolution not possible\n");
 		return -EINVAL;
 	}
 
+	/* permedia cannot blit over 2048 */
+	if (var->yres_virtual > 2047) {
+		var->yres_virtual = 2047;
+	}
+
 	if (var->xoffset) {
 		DPRINTK("xoffset not supported\n");
 		return -EINVAL;
@@ -614,7 +603,7 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	}
 
 	var->xres = (var->xres + 15) & ~15; /* could sometimes be 8 */
-	lpitch = var->xres * ((var->bits_per_pixel + 7)>>3);
+	lpitch = var->xres * ((var->bits_per_pixel + 7) >> 3);
 
 	if (var->xres < 320 || var->xres > 1600) {
 		DPRINTK("width not supported: %u\n", var->xres);
@@ -633,15 +622,18 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	}
 
 	if (PICOS2KHZ(var->pixclock) > PM2_MAX_PIXCLOCK) {
-		DPRINTK("pixclock too high (%ldKHz)\n", PICOS2KHZ(var->pixclock));
+		DPRINTK("pixclock too high (%ldKHz)\n",
+			PICOS2KHZ(var->pixclock));
 		return -EINVAL;
 	}
 
 	var->transp.offset = 0;
 	var->transp.length = 0;
-	switch(var->bits_per_pixel) {
+	switch (var->bits_per_pixel) {
 	case 8:
-		var->red.length = var->green.length = var->blue.length = 8;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
 		break;
 	case 16:
 		var->red.offset   = 11;
@@ -657,7 +649,9 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 		var->red.offset	  = 16;
 		var->green.offset = 8;
 		var->blue.offset  = 0;
-		var->red.length = var->green.length = var->blue.length = 8;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
 		break;
 	case 24:
 #ifdef __BIG_ENDIAN
@@ -668,10 +662,13 @@ static int pm2fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 		var->blue.offset  = 0;
 #endif
 		var->green.offset = 8;
-		var->red.length = var->green.length = var->blue.length = 8;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
 		break;
 	}
-	var->height = var->width = -1;
+	var->height = -1;
+	var->width = -1;
 
 	var->accel_flags = 0;	/* Can't mmap if this is on */
 
@@ -691,7 +688,9 @@ static int pm2fb_set_par(struct fb_info *info)
 {
 	struct pm2fb_par *par = info->par;
 	u32 pixclock;
-	u32 width, height, depth;
+	u32 width = (info->var.xres_virtual + 7) & ~7;
+	u32 height = info->var.yres_virtual;
+	u32 depth = (info->var.bits_per_pixel + 7) & ~7;
 	u32 hsstart, hsend, hbend, htotal;
 	u32 vsstart, vsend, vbend, vtotal;
 	u32 stride;
@@ -701,22 +700,19 @@ static int pm2fb_set_par(struct fb_info *info)
 	u32 txtmap = 0;
 	u32 pixsize = 0;
 	u32 clrformat = 0;
-	u32 xres;
+	u32 misc = 1; /* 8-bit DAC */
+	u32 xres = (info->var.xres + 31) & ~31;
 	int data64;
 
 	reset_card(par);
 	reset_config(par);
 	clear_palette(par);
-	if ( par->memclock )
+	if (par->memclock)
 		set_memclock(par, par->memclock);
 
-	width = (info->var.xres_virtual + 7) & ~7;
-	height = info->var.yres_virtual;
-	depth = (info->var.bits_per_pixel + 7) & ~7;
 	depth = (depth > 32) ? 32 : depth;
 	data64 = depth > 8 || par->type == PM2_TYPE_PERMEDIA2V;
 
-	xres = (info->var.xres + 31) & ~31;
 	pixclock = PICOS2KHZ(info->var.pixclock);
 	if (pixclock > PM2_MAX_PIXCLOCK) {
 		DPRINTK("pixclock too high (%uKHz)\n", pixclock);
@@ -731,7 +727,8 @@ static int pm2fb_set_par(struct fb_info *info)
 		? info->var.lower_margin - 1
 		: 0;	/* FIXME! */
 	vsend = info->var.lower_margin + info->var.vsync_len - 1;
-	vbend = info->var.lower_margin + info->var.vsync_len + info->var.upper_margin;
+	vbend = info->var.lower_margin + info->var.vsync_len +
+		info->var.upper_margin;
 	vtotal = info->var.yres + vbend - 1;
 	stride = to3264(width, depth, 1);
 	base = to3264(info->var.yoffset * xres + info->var.xoffset, depth, 1);
@@ -744,25 +741,25 @@ static int pm2fb_set_par(struct fb_info *info)
 			video |= PM2F_HSYNC_ACT_LOW;
 		} else
 			video |= PM2F_HSYNC_ACT_HIGH;
-	}
-	else
+	} else
 		video |= PM2F_HSYNC_ACT_LOW;
+
 	if (info->var.sync & FB_SYNC_VERT_HIGH_ACT) {
 		if (lowvsync) {
 			DPRINTK("ignoring +vsync, using -vsync.\n");
 			video |= PM2F_VSYNC_ACT_LOW;
 		} else
 			video |= PM2F_VSYNC_ACT_HIGH;
-	}
-	else
+	} else
 		video |= PM2F_VSYNC_ACT_LOW;
-	if ((info->var.vmode & FB_VMODE_MASK)==FB_VMODE_INTERLACED) {
+
+	if ((info->var.vmode & FB_VMODE_MASK) == FB_VMODE_INTERLACED) {
 		DPRINTK("interlaced not supported\n");
 		return -EINVAL;
 	}
-	if ((info->var.vmode & FB_VMODE_MASK)==FB_VMODE_DOUBLE)
+	if ((info->var.vmode & FB_VMODE_MASK) == FB_VMODE_DOUBLE)
 		video |= PM2F_LINE_DOUBLE;
-	if ((info->var.activate & FB_ACTIVATE_MASK)==FB_ACTIVATE_NOW)
+	if ((info->var.activate & FB_ACTIVATE_MASK) == FB_ACTIVATE_NOW)
 		video |= PM2F_VIDEO_ENABLE;
 	par->video = video;
 
@@ -783,12 +780,10 @@ static int pm2fb_set_par(struct fb_info *info)
 
 	mb();
 	WAIT_FIFO(par, 19);
-	pm2_RDAC_WR(par, PM2I_RD_COLOR_KEY_CONTROL,
-		    ( depth == 8 ) ? 0 : PM2F_COLOR_KEY_TEST_OFF);
 	switch (depth) {
 	case 8:
 		pm2_WR(par, PM2R_FB_READ_PIXEL, 0);
-		clrformat = 0x0e;
+		clrformat = 0x2e;
 		break;
 	case 16:
 		pm2_WR(par, PM2R_FB_READ_PIXEL, 1);
@@ -796,6 +791,7 @@ static int pm2fb_set_par(struct fb_info *info)
 		txtmap = PM2F_TEXTEL_SIZE_16;
 		pixsize = 1;
 		clrformat = 0x70;
+		misc |= 8;
 		break;
 	case 32:
 		pm2_WR(par, PM2R_FB_READ_PIXEL, 2);
@@ -803,6 +799,7 @@ static int pm2fb_set_par(struct fb_info *info)
 		txtmap = PM2F_TEXTEL_SIZE_32;
 		pixsize = 2;
 		clrformat = 0x20;
+		misc |= 8;
 		break;
 	case 24:
 		pm2_WR(par, PM2R_FB_READ_PIXEL, 4);
@@ -810,6 +807,7 @@ static int pm2fb_set_par(struct fb_info *info)
 		txtmap = PM2F_TEXTEL_SIZE_24;
 		pixsize = 4;
 		clrformat = 0x20;
+		misc |= 8;
 		break;
 	}
 	pm2_WR(par, PM2R_FB_WRITE_MODE, PM2F_FB_WRITE_ENABLE);
@@ -834,14 +832,19 @@ static int pm2fb_set_par(struct fb_info *info)
 	pm2_WR(par, PM2R_SCREEN_BASE, base);
 	wmb();
 	set_video(par, video);
-	WAIT_FIFO(par, 4);
+	WAIT_FIFO(par, 10);
 	switch (par->type) {
 	case PM2_TYPE_PERMEDIA2:
 		pm2_RDAC_WR(par, PM2I_RD_COLOR_MODE, clrmode);
+		pm2_RDAC_WR(par, PM2I_RD_COLOR_KEY_CONTROL,
+				(depth == 8) ? 0 : PM2F_COLOR_KEY_TEST_OFF);
 		break;
 	case PM2_TYPE_PERMEDIA2V:
+		pm2v_RDAC_WR(par, PM2VI_RD_DAC_CONTROL, 0);
 		pm2v_RDAC_WR(par, PM2VI_RD_PIXEL_SIZE, pixsize);
 		pm2v_RDAC_WR(par, PM2VI_RD_COLOR_FORMAT, clrformat);
+		pm2v_RDAC_WR(par, PM2VI_RD_MISC_CONTROL, misc);
+		pm2v_RDAC_WR(par, PM2VI_RD_OVERLAY_KEY, 0);
 		break;
 	}
 	set_pixclock(par, pixclock);
@@ -872,16 +875,15 @@ static int pm2fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	struct pm2fb_par *par = info->par;
 
 	if (regno >= info->cmap.len)  /* no. of hw registers */
-		return 1;
+		return -EINVAL;
 	/*
 	 * Program hardware... do anything you want with transp
 	 */
 
 	/* grayscale works only partially under directcolor */
-	if (info->var.grayscale) {
-		/* grayscale = 0.30*R + 0.59*G + 0.11*B */
+	/* grayscale = 0.30*R + 0.59*G + 0.11*B */
+	if (info->var.grayscale)
 		red = green = blue = (red * 77 + green * 151 + blue * 28) >> 8;
-	}
 
 	/* Directcolor:
 	 *   var->{color}.offset contains start of bitfield
@@ -931,7 +933,7 @@ static int pm2fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 		u32 v;
 
 		if (regno >= 16)
-			return 1;
+			return -EINVAL;
 
 		v = (red << info->var.red.offset) |
 			(green << info->var.green.offset) |
@@ -948,8 +950,7 @@ static int pm2fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 			break;
 		}
 		return 0;
-	}
-	else if (info->fix.visual == FB_VISUAL_PSEUDOCOLOR)
+	} else if (info->fix.visual == FB_VISUAL_PSEUDOCOLOR)
 		set_color(par, regno, red, green, blue);
 
 	return 0;
@@ -972,11 +973,9 @@ static int pm2fb_pan_display(struct fb_var_screeninfo *var,
 {
 	struct pm2fb_par *p = info->par;
 	u32 base;
-	u32 depth;
-	u32 xres;
+	u32 depth = (var->bits_per_pixel + 7) & ~7;
+	u32 xres = (var->xres + 31) & ~31;
 
-	xres = (var->xres + 31) & ~31;
-	depth = (var->bits_per_pixel + 7) & ~7;
 	depth = (depth > 32) ? 32 : depth;
 	base = to3264(var->yoffset * xres + var->xoffset, depth, 1);
 	WAIT_FIFO(p, 1);
@@ -1018,15 +1017,15 @@ static int pm2fb_blank(int blank_mode, struct fb_info *info)
 		break;
 	case FB_BLANK_VSYNC_SUSPEND:
 		/* VSync: Off */
-		video &= ~(PM2F_VSYNC_MASK | PM2F_BLANK_LOW );
+		video &= ~(PM2F_VSYNC_MASK | PM2F_BLANK_LOW);
 		break;
 	case FB_BLANK_HSYNC_SUSPEND:
 		/* HSync: Off */
-		video &= ~(PM2F_HSYNC_MASK | PM2F_BLANK_LOW );
+		video &= ~(PM2F_HSYNC_MASK | PM2F_BLANK_LOW);
 		break;
 	case FB_BLANK_POWERDOWN:
 		/* HSync: Off, VSync: Off */
-		video &= ~(PM2F_VSYNC_MASK | PM2F_HSYNC_MASK| PM2F_BLANK_LOW);
+		video &= ~(PM2F_VSYNC_MASK | PM2F_HSYNC_MASK | PM2F_BLANK_LOW);
 		break;
 	}
 	set_video(par, video);
@@ -1042,48 +1041,20 @@ static int pm2fb_sync(struct fb_info *info)
 	mb();
 	do {
 		while (pm2_RD(par, PM2R_OUT_FIFO_WORDS) == 0)
-			udelay(10);
-		rmb();
+			cpu_relax();
 	} while (pm2_RD(par, PM2R_OUT_FIFO) != PM2TAG(PM2R_SYNC));
 
 	return 0;
 }
 
-/*
- * block operation. copy=0: rectangle fill, copy=1: rectangle copy.
- */
-static void pm2fb_block_op(struct fb_info* info, int copy,
-				s32 xsrc, s32 ysrc,
-				s32 x, s32 y, s32 w, s32 h,
-				u32 color) {
-	struct pm2fb_par *par = info->par;
-
-	if (!w || !h)
-		return;
-	WAIT_FIFO(par, 5);
-	pm2_WR(par, PM2R_CONFIG, PM2F_CONFIG_FB_WRITE_ENABLE |
-		PM2F_CONFIG_FB_READ_SOURCE_ENABLE);
-	if (copy)
-		pm2_WR(par, PM2R_FB_SOURCE_DELTA,
-			((ysrc-y) & 0xfff) << 16 | ((xsrc-x) & 0xfff));
-	else
-		pm2_WR(par, PM2R_FB_BLOCK_COLOR, color);
-	pm2_WR(par, PM2R_RECTANGLE_ORIGIN, (y << 16) | x);
-	pm2_WR(par, PM2R_RECTANGLE_SIZE, (h << 16) | w);
-	wmb();
-	pm2_WR(par, PM2R_RENDER, PM2F_RENDER_RECTANGLE |
-				(x<xsrc ? PM2F_INCREASE_X : 0) |
-				(y<ysrc ? PM2F_INCREASE_Y : 0) |
-				(copy ? 0 : PM2F_RENDER_FASTFILL));
-}
-
-static void pm2fb_fillrect (struct fb_info *info,
+static void pm2fb_fillrect(struct fb_info *info,
 				const struct fb_fillrect *region)
 {
+	struct pm2fb_par *par = info->par;
 	struct fb_fillrect modded;
 	int vxres, vyres;
 	u32 color = (info->fix.visual == FB_VISUAL_TRUECOLOR) ?
-		((u32*)info->pseudo_palette)[region->color] : region->color;
+		((u32 *)info->pseudo_palette)[region->color] : region->color;
 
 	if (info->state != FBINFO_STATE_RUNNING)
 		return;
@@ -1098,31 +1069,46 @@ static void pm2fb_fillrect (struct fb_info *info,
 
 	memcpy(&modded, region, sizeof(struct fb_fillrect));
 
-	if(!modded.width || !modded.height ||
-	   modded.dx >= vxres || modded.dy >= vyres)
+	if (!modded.width || !modded.height ||
+	    modded.dx >= vxres || modded.dy >= vyres)
 		return;
 
-	if(modded.dx + modded.width  > vxres)
+	if (modded.dx + modded.width  > vxres)
 		modded.width  = vxres - modded.dx;
-	if(modded.dy + modded.height > vyres)
+	if (modded.dy + modded.height > vyres)
 		modded.height = vyres - modded.dy;
 
-	if(info->var.bits_per_pixel == 8)
+	if (info->var.bits_per_pixel == 8)
 		color |= color << 8;
-	if(info->var.bits_per_pixel <= 16)
+	if (info->var.bits_per_pixel <= 16)
 		color |= color << 16;
 
-	if(info->var.bits_per_pixel != 24)
-		pm2fb_block_op(info, 0, 0, 0,
-				modded.dx, modded.dy,
-				modded.width, modded.height, color);
-	else
-		cfb_fillrect(info, region);
+	WAIT_FIFO(par, 3);
+	pm2_WR(par, PM2R_CONFIG, PM2F_CONFIG_FB_WRITE_ENABLE);
+	pm2_WR(par, PM2R_RECTANGLE_ORIGIN, (modded.dy << 16) | modded.dx);
+	pm2_WR(par, PM2R_RECTANGLE_SIZE, (modded.height << 16) | modded.width);
+	if (info->var.bits_per_pixel != 24) {
+		WAIT_FIFO(par, 2);
+		pm2_WR(par, PM2R_FB_BLOCK_COLOR, color);
+		wmb();
+		pm2_WR(par, PM2R_RENDER,
+				PM2F_RENDER_RECTANGLE | PM2F_RENDER_FASTFILL);
+	} else {
+		WAIT_FIFO(par, 4);
+		pm2_WR(par, PM2R_COLOR_DDA_MODE, 1);
+		pm2_WR(par, PM2R_CONSTANT_COLOR, color);
+		wmb();
+		pm2_WR(par, PM2R_RENDER,
+				PM2F_RENDER_RECTANGLE |
+				PM2F_INCREASE_X | PM2F_INCREASE_Y );
+		pm2_WR(par, PM2R_COLOR_DDA_MODE, 0);
+	}
 }
 
 static void pm2fb_copyarea(struct fb_info *info,
 				const struct fb_copyarea *area)
 {
+	struct pm2fb_par *par = info->par;
 	struct fb_copyarea modded;
 	u32 vxres, vyres;
 
@@ -1138,23 +1124,359 @@ static void pm2fb_copyarea(struct fb_info *info,
 	vxres = info->var.xres_virtual;
 	vyres = info->var.yres_virtual;
 
-	if(!modded.width || !modded.height ||
-	   modded.sx >= vxres || modded.sy >= vyres ||
-	   modded.dx >= vxres || modded.dy >= vyres)
+	if (!modded.width || !modded.height ||
+	    modded.sx >= vxres || modded.sy >= vyres ||
+	    modded.dx >= vxres || modded.dy >= vyres)
 		return;
 
-	if(modded.sx + modded.width > vxres)
+	if (modded.sx + modded.width > vxres)
 		modded.width = vxres - modded.sx;
-	if(modded.dx + modded.width > vxres)
+	if (modded.dx + modded.width > vxres)
 		modded.width = vxres - modded.dx;
-	if(modded.sy + modded.height > vyres)
+	if (modded.sy + modded.height > vyres)
 		modded.height = vyres - modded.sy;
-	if(modded.dy + modded.height > vyres)
+	if (modded.dy + modded.height > vyres)
 		modded.height = vyres - modded.dy;
 
-	pm2fb_block_op(info, 1, modded.sx, modded.sy,
-			modded.dx, modded.dy,
-			modded.width, modded.height, 0);
+	WAIT_FIFO(par, 5);
+	pm2_WR(par, PM2R_CONFIG, PM2F_CONFIG_FB_WRITE_ENABLE |
+		PM2F_CONFIG_FB_READ_SOURCE_ENABLE);
+	pm2_WR(par, PM2R_FB_SOURCE_DELTA,
+			((modded.sy - modded.dy) & 0xfff) << 16 |
+			((modded.sx - modded.dx) & 0xfff));
+	pm2_WR(par, PM2R_RECTANGLE_ORIGIN, (modded.dy << 16) | modded.dx);
+	pm2_WR(par, PM2R_RECTANGLE_SIZE, (modded.height << 16) | modded.width);
+	wmb();
+	pm2_WR(par, PM2R_RENDER, PM2F_RENDER_RECTANGLE |
+				(modded.dx < modded.sx ? PM2F_INCREASE_X : 0) |
+				(modded.dy < modded.sy ? PM2F_INCREASE_Y : 0));
+}
+
+static void pm2fb_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+	struct pm2fb_par *par = info->par;
+	u32 height = image->height;
+	u32 fgx, bgx;
+	const u32 *src = (const u32 *)image->data;
+	u32 xres = (info->var.xres + 31) & ~31;
+
+	if (info->state != FBINFO_STATE_RUNNING)
+		return;
+	if (info->flags & FBINFO_HWACCEL_DISABLED || image->depth != 1) {
+		cfb_imageblit(info, image);
+		return;
+	}
+	switch (info->fix.visual) {
+	case FB_VISUAL_PSEUDOCOLOR:
+		fgx = image->fg_color;
+		bgx = image->bg_color;
+		break;
+	case FB_VISUAL_TRUECOLOR:
+	default:
+		fgx = par->palette[image->fg_color];
+		bgx = par->palette[image->bg_color];
+		break;
+	}
+	if (info->var.bits_per_pixel == 8) {
+		fgx |= fgx << 8;
+		bgx |= bgx << 8;
+	}
+	if (info->var.bits_per_pixel <= 16) {
+		fgx |= fgx << 16;
+		bgx |= bgx << 16;
+	}
+
+	WAIT_FIFO(par, 13);
+	pm2_WR(par, PM2R_FB_READ_MODE, partprod(xres));
+	pm2_WR(par, PM2R_SCISSOR_MIN_XY,
+			((image->dy & 0xfff) << 16) | (image->dx & 0x0fff));
+	pm2_WR(par, PM2R_SCISSOR_MAX_XY,
+			(((image->dy + image->height) & 0x0fff) << 16) |
+			((image->dx + image->width) & 0x0fff));
+	pm2_WR(par, PM2R_SCISSOR_MODE, 1);
+	/* GXcopy & UNIT_ENABLE */
+	pm2_WR(par, PM2R_LOGICAL_OP_MODE, (0x3 << 1) | 1);
+	pm2_WR(par, PM2R_RECTANGLE_ORIGIN,
+			((image->dy & 0xfff) << 16) | (image->dx & 0x0fff));
+	pm2_WR(par, PM2R_RECTANGLE_SIZE,
+			((image->height & 0x0fff) << 16) |
+			((image->width) & 0x0fff));
+	if (info->var.bits_per_pixel == 24) {
+		pm2_WR(par, PM2R_COLOR_DDA_MODE, 1);
+		/* clear area */
+		pm2_WR(par, PM2R_CONSTANT_COLOR, bgx);
+		pm2_WR(par, PM2R_RENDER,
+			PM2F_RENDER_RECTANGLE |
+			PM2F_INCREASE_X | PM2F_INCREASE_Y);
+		/* BitMapPackEachScanline & invert bits and byte order*/
+		/* force background */
+		pm2_WR(par, PM2R_RASTERIZER_MODE,  (1 << 9) | 1 | (3 << 7));
+		pm2_WR(par, PM2R_CONSTANT_COLOR, fgx);
+		pm2_WR(par, PM2R_RENDER,
+			PM2F_RENDER_RECTANGLE |
+			PM2F_INCREASE_X | PM2F_INCREASE_Y |
+			PM2F_RENDER_SYNC_ON_BIT_MASK);
+	} else {
+		pm2_WR(par, PM2R_COLOR_DDA_MODE, 0);
+		/* clear area */
+		pm2_WR(par, PM2R_FB_BLOCK_COLOR, bgx);
+		pm2_WR(par, PM2R_RENDER,
+			PM2F_RENDER_RECTANGLE |
+			PM2F_RENDER_FASTFILL |
+			PM2F_INCREASE_X | PM2F_INCREASE_Y);
+		/* invert bits and byte order*/
+		pm2_WR(par, PM2R_RASTERIZER_MODE,  1 | (3 << 7));
+		pm2_WR(par, PM2R_FB_BLOCK_COLOR, fgx);
+		pm2_WR(par, PM2R_RENDER,
+			PM2F_RENDER_RECTANGLE |
+			PM2F_INCREASE_X | PM2F_INCREASE_Y |
+			PM2F_RENDER_FASTFILL |
+			PM2F_RENDER_SYNC_ON_BIT_MASK);
+	}
+
+	while (height--) {
+		int width = ((image->width + 7) >> 3)
+				+ info->pixmap.scan_align - 1;
+		width >>= 2;
+		WAIT_FIFO(par, width);
+		while (width--) {
+			pm2_WR(par, PM2R_BIT_MASK_PATTERN, *src);
+			src++;
+		}
+	}
+	WAIT_FIFO(par, 3);
+	pm2_WR(par, PM2R_RASTERIZER_MODE, 0);
+	pm2_WR(par, PM2R_COLOR_DDA_MODE, 0);
+	pm2_WR(par, PM2R_SCISSOR_MODE, 0);
+}
+
+/*
+ *	Hardware cursor support.
+ */
+static const u8 cursor_bits_lookup[16] = {
+	0x00, 0x40, 0x10, 0x50, 0x04, 0x44, 0x14, 0x54,
+	0x01, 0x41, 0x11, 0x51, 0x05, 0x45, 0x15, 0x55
+};
+
+static int pm2vfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
+{
+	struct pm2fb_par *par = info->par;
+	u8 mode = PM2F_CURSORMODE_TYPE_X;
+	int x = cursor->image.dx - info->var.xoffset;
+	int y = cursor->image.dy - info->var.yoffset;
+
+	if (cursor->enable)
+		mode |= PM2F_CURSORMODE_CURSOR_ENABLE;
+
+	pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_MODE, mode);
+
+	if (!cursor->enable)
+		x = 2047;	/* push it outside display */
+	pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_X_LOW, x & 0xff);
+	pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_X_HIGH, (x >> 8) & 0xf);
+	pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_Y_LOW, y & 0xff);
+	pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_Y_HIGH, (y >> 8) & 0xf);
+
+	/*
+	 * If the cursor is not be changed this means either we want the
+	 * current cursor state (if enable is set) or we want to query what
+	 * we can do with the cursor (if enable is not set)
+	 */
+	if (!cursor->set)
+		return 0;
+
+	if (cursor->set & FB_CUR_SETHOT) {
+		pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_X_HOT,
+			     cursor->hot.x & 0x3f);
+		pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_Y_HOT,
+			     cursor->hot.y & 0x3f);
+	}
+
+	if (cursor->set & FB_CUR_SETCMAP) {
+		u32 fg_idx = cursor->image.fg_color;
+		u32 bg_idx = cursor->image.bg_color;
+		struct fb_cmap cmap = info->cmap;
+
+		/* the X11 driver says one should use these color registers */
+		pm2_WR(par, PM2VR_RD_INDEX_HIGH, PM2VI_RD_CURSOR_PALETTE >> 8);
+		pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_PALETTE + 0,
+			     cmap.red[bg_idx] >> 8 );
+		pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_PALETTE + 1,
+			     cmap.green[bg_idx] >> 8 );
+		pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_PALETTE + 2,
+			     cmap.blue[bg_idx] >> 8 );
+
+		pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_PALETTE + 3,
+			     cmap.red[fg_idx] >> 8 );
+		pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_PALETTE + 4,
+			     cmap.green[fg_idx] >> 8 );
+		pm2v_RDAC_WR(par, PM2VI_RD_CURSOR_PALETTE + 5,
+			     cmap.blue[fg_idx] >> 8 );
+		pm2_WR(par, PM2VR_RD_INDEX_HIGH, 0);
+	}
+
+	if (cursor->set & (FB_CUR_SETSHAPE | FB_CUR_SETIMAGE)) {
+		u8 *bitmap = (u8 *)cursor->image.data;
+		u8 *mask = (u8 *)cursor->mask;
+		int i;
+		int pos = PM2VI_RD_CURSOR_PATTERN;
+
+		for (i = 0; i < cursor->image.height; i++) {
+			int j = (cursor->image.width + 7) >> 3;
+			int k = 8 - j;
+
+			pm2_WR(par, PM2VR_RD_INDEX_HIGH, pos >> 8);
+
+			for (; j > 0; j--) {
+				u8 data = *bitmap ^ *mask;
+
+				if (cursor->rop == ROP_COPY)
+					data = *mask & *bitmap;
+				/* Upper 4 bits of bitmap data */
+				pm2v_RDAC_WR(par, pos++,
+					cursor_bits_lookup[data >> 4] |
+					(cursor_bits_lookup[*mask >> 4] << 1));
+				/* Lower 4 bits of bitmap */
+				pm2v_RDAC_WR(par, pos++,
+					cursor_bits_lookup[data & 0xf] |
+					(cursor_bits_lookup[*mask & 0xf] << 1));
+				bitmap++;
+				mask++;
+			}
+			for (; k > 0; k--) {
+				pm2v_RDAC_WR(par, pos++, 0);
+				pm2v_RDAC_WR(par, pos++, 0);
+			}
+		}
+
+		while (pos < (1024 + PM2VI_RD_CURSOR_PATTERN)) {
+			pm2_WR(par, PM2VR_RD_INDEX_HIGH, pos >> 8);
+			pm2v_RDAC_WR(par, pos++, 0);
+		}
+
+		pm2_WR(par, PM2VR_RD_INDEX_HIGH, 0);
+	}
+	return 0;
+}
+
+static int pm2fb_cursor(struct fb_info *info, struct fb_cursor *cursor)
+{
+	struct pm2fb_par *par = info->par;
+	u8 mode;
+
+	if (!hwcursor)
+		return -EINVAL;	/* just to force soft_cursor() call */
+
+	/* Too large of a cursor or wrong bpp :-( */
+	if (cursor->image.width > 64 ||
+	    cursor->image.height > 64 ||
+	    cursor->image.depth > 1)
+		return -EINVAL;
+
+	if (par->type == PM2_TYPE_PERMEDIA2V)
+		return pm2vfb_cursor(info, cursor);
+
+	mode = 0x40;
+	if (cursor->enable)
+		 mode = 0x43;
+
+	pm2_RDAC_WR(par, PM2I_RD_CURSOR_CONTROL, mode);
+
+	/*
+	 * If the cursor is not be changed this means either we want the
+	 * current cursor state (if enable is set) or we want to query what
+	 * we can do with the cursor (if enable is not set)
+	 */
+	if (!cursor->set)
+		return 0;
+
+	if (cursor->set & FB_CUR_SETPOS) {
+		int x = cursor->image.dx - info->var.xoffset + 63;
+		int y = cursor->image.dy - info->var.yoffset + 63;
+
+		WAIT_FIFO(par, 4);
+		pm2_WR(par, PM2R_RD_CURSOR_X_LSB, x & 0xff);
+		pm2_WR(par, PM2R_RD_CURSOR_X_MSB, (x >> 8) & 0x7);
+		pm2_WR(par, PM2R_RD_CURSOR_Y_LSB, y & 0xff);
+		pm2_WR(par, PM2R_RD_CURSOR_Y_MSB, (y >> 8) & 0x7);
+	}
+
+	if (cursor->set & FB_CUR_SETCMAP) {
+		u32 fg_idx = cursor->image.fg_color;
+		u32 bg_idx = cursor->image.bg_color;
+
+		WAIT_FIFO(par, 7);
+		pm2_WR(par, PM2R_RD_CURSOR_COLOR_ADDRESS, 1);
+		pm2_WR(par, PM2R_RD_CURSOR_COLOR_DATA,
+			info->cmap.red[bg_idx] >> 8);
+		pm2_WR(par, PM2R_RD_CURSOR_COLOR_DATA,
+			info->cmap.green[bg_idx] >> 8);
+		pm2_WR(par, PM2R_RD_CURSOR_COLOR_DATA,
+			info->cmap.blue[bg_idx] >> 8);
+
+		pm2_WR(par, PM2R_RD_CURSOR_COLOR_DATA,
+			info->cmap.red[fg_idx] >> 8);
+		pm2_WR(par, PM2R_RD_CURSOR_COLOR_DATA,
+			info->cmap.green[fg_idx] >> 8);
+		pm2_WR(par, PM2R_RD_CURSOR_COLOR_DATA,
+			info->cmap.blue[fg_idx] >> 8);
+	}
+
+	if (cursor->set & (FB_CUR_SETSHAPE | FB_CUR_SETIMAGE)) {
+		u8 *bitmap = (u8 *)cursor->image.data;
+		u8 *mask = (u8 *)cursor->mask;
+		int i;
+
+		WAIT_FIFO(par, 1);
+		pm2_WR(par, PM2R_RD_PALETTE_WRITE_ADDRESS, 0);
+
+		for (i = 0; i < cursor->image.height; i++) {
+			int j = (cursor->image.width + 7) >> 3;
+			int k = 8 - j;
+
+			WAIT_FIFO(par, 8);
+			for (; j > 0; j--) {
+				u8 data = *bitmap ^ *mask;
+
+				if (cursor->rop == ROP_COPY)
+					data = *mask & *bitmap;
+				/* bitmap data */
+				pm2_WR(par, PM2R_RD_CURSOR_DATA, data);
+				bitmap++;
+				mask++;
+			}
+			for (; k > 0; k--)
+				pm2_WR(par, PM2R_RD_CURSOR_DATA, 0);
+		}
+		for (; i < 64; i++) {
+			int j = 8;
+			WAIT_FIFO(par, 8);
+			while (j-- > 0)
+				pm2_WR(par, PM2R_RD_CURSOR_DATA, 0);
+		}
+
+		mask = (u8 *)cursor->mask;
+		for (i = 0; i < cursor->image.height; i++) {
+			int j = (cursor->image.width + 7) >> 3;
+			int k = 8 - j;
+
+			WAIT_FIFO(par, 8);
+			for (; j > 0; j--) {
+				/* mask */
+				pm2_WR(par, PM2R_RD_CURSOR_DATA, *mask);
+				mask++;
+			}
+			for (; k > 0; k--)
+				pm2_WR(par, PM2R_RD_CURSOR_DATA, 0);
+		}
+		for (; i < 64; i++) {
+			int j = 8;
+			WAIT_FIFO(par, 8);
+			while (j-- > 0)
+				pm2_WR(par, PM2R_RD_CURSOR_DATA, 0);
+		}
+	}
+	return 0;
 }
 
 /* ------------ Hardware Independent Functions ------------ */
@@ -1172,8 +1494,9 @@ static struct fb_ops pm2fb_ops = {
 	.fb_pan_display	= pm2fb_pan_display,
 	.fb_fillrect	= pm2fb_fillrect,
 	.fb_copyarea	= pm2fb_copyarea,
-	.fb_imageblit	= cfb_imageblit,
+	.fb_imageblit	= pm2fb_imageblit,
 	.fb_sync	= pm2fb_sync,
+	.fb_cursor	= pm2fb_cursor,
 };
 
 /*
@@ -1194,16 +1517,17 @@ static int __devinit pm2fb_probe(struct pci_dev *pdev,
 {
 	struct pm2fb_par *default_par;
 	struct fb_info *info;
-	int err, err_retval = -ENXIO;
+	int err;
+	int retval = -ENXIO;
 
 	err = pci_enable_device(pdev);
-	if ( err ) {
+	if (err) {
 		printk(KERN_WARNING "pm2fb: Can't enable pdev: %d\n", err);
 		return err;
 	}
 
 	info = framebuffer_alloc(sizeof(struct pm2fb_par), &pdev->dev);
-	if ( !info )
+	if (!info)
 		return -ENOMEM;
 	default_par = info->par;
 
@@ -1236,14 +1560,14 @@ static int __devinit pm2fb_probe(struct pci_dev *pdev,
 	DPRINTK("Register base at 0x%lx\n", pm2fb_fix.mmio_start);
 
 	/* Registers - request region and map it. */
-	if ( !request_mem_region(pm2fb_fix.mmio_start, pm2fb_fix.mmio_len,
-				 "pm2fb regbase") ) {
+	if (!request_mem_region(pm2fb_fix.mmio_start, pm2fb_fix.mmio_len,
+				"pm2fb regbase")) {
 		printk(KERN_WARNING "pm2fb: Can't reserve regbase.\n");
 		goto err_exit_neither;
 	}
 	default_par->v_regs =
 		ioremap_nocache(pm2fb_fix.mmio_start, pm2fb_fix.mmio_len);
-	if ( !default_par->v_regs ) {
+	if (!default_par->v_regs) {
 		printk(KERN_WARNING "pm2fb: Can't remap %s register area.\n",
 		       pm2fb_fix.id);
 		release_mem_region(pm2fb_fix.mmio_start, pm2fb_fix.mmio_len);
@@ -1258,72 +1582,101 @@ static int __devinit pm2fb_probe(struct pci_dev *pdev,
 		default_par->mem_control, default_par->boot_address,
 		default_par->mem_config);
 
-	if(default_par->mem_control == 0 &&
+	if (default_par->mem_control == 0 &&
 		default_par->boot_address == 0x31 &&
 		default_par->mem_config == 0x259fffff) {
 		default_par->memclock = CVPPC_MEMCLOCK;
-		default_par->mem_control=0;
-		default_par->boot_address=0x20;
-		default_par->mem_config=0xe6002021;
+		default_par->mem_control = 0;
+		default_par->boot_address = 0x20;
+		default_par->mem_config = 0xe6002021;
 		if (pdev->subsystem_vendor == 0x1048 &&
 			pdev->subsystem_device == 0x0a31) {
-			DPRINTK("subsystem_vendor: %04x, subsystem_device: %04x\n",
+			DPRINTK("subsystem_vendor: %04x, "
+				"subsystem_device: %04x\n",
 				pdev->subsystem_vendor, pdev->subsystem_device);
-			DPRINTK("We have not been initialized by VGA BIOS "
-				"and are running on an Elsa Winner 2000 Office\n");
+			DPRINTK("We have not been initialized by VGA BIOS and "
+				"are running on an Elsa Winner 2000 Office\n");
 			DPRINTK("Initializing card timings manually...\n");
-			default_par->memclock=70000;
+			default_par->memclock = 100000;
 		}
 		if (pdev->subsystem_vendor == 0x3d3d &&
 			pdev->subsystem_device == 0x0100) {
-			DPRINTK("subsystem_vendor: %04x, subsystem_device: %04x\n",
+			DPRINTK("subsystem_vendor: %04x, "
+				"subsystem_device: %04x\n",
 				pdev->subsystem_vendor, pdev->subsystem_device);
-			DPRINTK("We have not been initialized by VGA BIOS "
-				"and are running on an 3dlabs reference board\n");
+			DPRINTK("We have not been initialized by VGA BIOS and "
+				"are running on an 3dlabs reference board\n");
 			DPRINTK("Initializing card timings manually...\n");
-			default_par->memclock=74894;
+			default_par->memclock = 74894;
 		}
 	}
 
 	/* Now work out how big lfb is going to be. */
-	switch(default_par->mem_config & PM2F_MEM_CONFIG_RAM_MASK) {
+	switch (default_par->mem_config & PM2F_MEM_CONFIG_RAM_MASK) {
 	case PM2F_MEM_BANKS_1:
-		pm2fb_fix.smem_len=0x200000;
+		pm2fb_fix.smem_len = 0x200000;
 		break;
 	case PM2F_MEM_BANKS_2:
-		pm2fb_fix.smem_len=0x400000;
+		pm2fb_fix.smem_len = 0x400000;
 		break;
 	case PM2F_MEM_BANKS_3:
-		pm2fb_fix.smem_len=0x600000;
+		pm2fb_fix.smem_len = 0x600000;
 		break;
 	case PM2F_MEM_BANKS_4:
-		pm2fb_fix.smem_len=0x800000;
+		pm2fb_fix.smem_len = 0x800000;
 		break;
 	}
 	pm2fb_fix.smem_start = pci_resource_start(pdev, 1);
 
 	/* Linear frame buffer - request region and map it. */
-	if ( !request_mem_region(pm2fb_fix.smem_start, pm2fb_fix.smem_len,
-				 "pm2fb smem") ) {
+	if (!request_mem_region(pm2fb_fix.smem_start, pm2fb_fix.smem_len,
+				"pm2fb smem")) {
 		printk(KERN_WARNING "pm2fb: Can't reserve smem.\n");
 		goto err_exit_mmio;
 	}
 	info->screen_base =
 		ioremap_nocache(pm2fb_fix.smem_start, pm2fb_fix.smem_len);
-	if ( !info->screen_base ) {
+	if (!info->screen_base) {
 		printk(KERN_WARNING "pm2fb: Can't ioremap smem area.\n");
 		release_mem_region(pm2fb_fix.smem_start, pm2fb_fix.smem_len);
 		goto err_exit_mmio;
 	}
 
+#ifdef CONFIG_MTRR
+	default_par->mtrr_handle = -1;
+	if (!nomtrr)
+		default_par->mtrr_handle =
+			mtrr_add(pm2fb_fix.smem_start,
+				 pm2fb_fix.smem_len,
+				 MTRR_TYPE_WRCOMB, 1);
+#endif
+
 	info->fbops		= &pm2fb_ops;
 	info->fix		= pm2fb_fix;
 	info->pseudo_palette	= default_par->palette;
 	info->flags		= FBINFO_DEFAULT |
 				  FBINFO_HWACCEL_YPAN |
 				  FBINFO_HWACCEL_COPYAREA |
+				  FBINFO_HWACCEL_IMAGEBLIT |
 				  FBINFO_HWACCEL_FILLRECT;
 
+	info->pixmap.addr = kmalloc(PM2_PIXMAP_SIZE, GFP_KERNEL);
+	if (!info->pixmap.addr) {
+		retval = -ENOMEM;
+		goto err_exit_pixmap;
+	}
+	info->pixmap.size = PM2_PIXMAP_SIZE;
+	info->pixmap.buf_align = 4;
+	info->pixmap.scan_align = 4;
+	info->pixmap.access_align = 32;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+
+	if (noaccel) {
+		printk(KERN_DEBUG "disabling acceleration\n");
+		info->flags |= FBINFO_HWACCEL_DISABLED;
+		info->pixmap.scan_align = 1;
+	}
+
 	if (!mode)
 		mode = "640x480@60";
 
@@ -1350,6 +1703,8 @@ static int __devinit pm2fb_probe(struct pci_dev *pdev,
  err_exit_all:
 	fb_dealloc_cmap(&info->cmap);
  err_exit_both:
+	kfree(info->pixmap.addr);
+ err_exit_pixmap:
 	iounmap(info->screen_base);
 	release_mem_region(pm2fb_fix.smem_start, pm2fb_fix.smem_len);
  err_exit_mmio:
@@ -1357,7 +1712,7 @@ static int __devinit pm2fb_probe(struct pci_dev *pdev,
 	release_mem_region(pm2fb_fix.mmio_start, pm2fb_fix.mmio_len);
  err_exit_neither:
 	framebuffer_release(info);
-	return err_retval;
+	return retval;
 }
 
 /**
@@ -1369,34 +1724,34 @@ static int __devinit pm2fb_probe(struct pci_dev *pdev,
  */
 static void __devexit pm2fb_remove(struct pci_dev *pdev)
 {
-	struct fb_info* info = pci_get_drvdata(pdev);
-	struct fb_fix_screeninfo* fix = &info->fix;
+	struct fb_info *info = pci_get_drvdata(pdev);
+	struct fb_fix_screeninfo *fix = &info->fix;
 	struct pm2fb_par *par = info->par;
 
 	unregister_framebuffer(info);
 
+#ifdef CONFIG_MTRR
+	if (par->mtrr_handle >= 0)
+		mtrr_del(par->mtrr_handle, info->fix.smem_start,
+			 info->fix.smem_len);
+#endif /* CONFIG_MTRR */
 	iounmap(info->screen_base);
 	release_mem_region(fix->smem_start, fix->smem_len);
 	iounmap(par->v_regs);
 	release_mem_region(fix->mmio_start, fix->mmio_len);
 
 	pci_set_drvdata(pdev, NULL);
+	kfree(info->pixmap.addr);
 	kfree(info);
 }
 
 static struct pci_device_id pm2fb_id_table[] = {
 	{ PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_TVP4020,
-	  PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY << 16,
-	  0xff0000, 0 },
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
 	{ PCI_VENDOR_ID_3DLABS, PCI_DEVICE_ID_3DLABS_PERMEDIA2,
-	  PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY << 16,
-	  0xff0000, 0 },
-	{ PCI_VENDOR_ID_3DLABS, PCI_DEVICE_ID_3DLABS_PERMEDIA2V,
-	  PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY << 16,
-	  0xff0000, 0 },
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
 	{ PCI_VENDOR_ID_3DLABS, PCI_DEVICE_ID_3DLABS_PERMEDIA2V,
-	  PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NOT_DEFINED_VGA << 8,
-	  0xff00, 0 },
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
 	{ 0, }
 };
 
@@ -1418,7 +1773,7 @@ MODULE_DEVICE_TABLE(pci, pm2fb_id_table);
  */
 static int __init pm2fb_setup(char *options)
 {
-	char* this_opt;
+	char *this_opt;
 
 	if (!options || !*options)
 		return 0;
@@ -1426,13 +1781,20 @@ static int __init pm2fb_setup(char *options)
 	while ((this_opt = strsep(&options, ",")) != NULL) {
 		if (!*this_opt)
 			continue;
-		if(!strcmp(this_opt, "lowhsync")) {
+		if (!strcmp(this_opt, "lowhsync"))
 			lowhsync = 1;
-		} else if(!strcmp(this_opt, "lowvsync")) {
+		else if (!strcmp(this_opt, "lowvsync"))
 			lowvsync = 1;
-		} else {
+		else if (!strncmp(this_opt, "hwcursor=", 9))
+			hwcursor = simple_strtoul(this_opt + 9, NULL, 0);
+#ifdef CONFIG_MTRR
+		else if (!strncmp(this_opt, "nomtrr", 6))
+			nomtrr = 1;
+#endif
+		else if (!strncmp(this_opt, "noaccel", 7))
+			noaccel = 1;
+		else
 			mode = this_opt;
-		}
 	}
 	return 0;
 }
@@ -1474,6 +1836,15 @@ module_param(lowhsync, bool, 0);
 MODULE_PARM_DESC(lowhsync, "Force horizontal sync low regardless of mode");
 module_param(lowvsync, bool, 0);
 MODULE_PARM_DESC(lowvsync, "Force vertical sync low regardless of mode");
+module_param(noaccel, bool, 0);
+MODULE_PARM_DESC(noaccel, "Disable acceleration");
+module_param(hwcursor, int, 0644);
+MODULE_PARM_DESC(hwcursor, "Enable hardware cursor "
+			"(1=enable, 0=disable, default=1)");
+#ifdef CONFIG_MTRR
+module_param(nomtrr, bool, 0);
+MODULE_PARM_DESC(nomtrr, "Disable MTRR support (0 or 1=disabled) (default=0)");
+#endif
 
 MODULE_AUTHOR("Jim Hague <jim.hague@acm.org>");
 MODULE_DESCRIPTION("Permedia2 framebuffer device driver");
diff --git a/drivers/video/pm3fb.c b/drivers/video/pm3fb.c
index 5b3f54c0918..070659992c1 100644
--- a/drivers/video/pm3fb.c
+++ b/drivers/video/pm3fb.c
@@ -32,6 +32,9 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#ifdef CONFIG_MTRR
+#include <asm/mtrr.h>
+#endif
 
 #include <video/pm3fb.h>
 
@@ -41,15 +44,25 @@
 
 #undef PM3FB_MASTER_DEBUG
 #ifdef PM3FB_MASTER_DEBUG
-#define DPRINTK(a,b...)	printk(KERN_DEBUG "pm3fb: %s: " a, __FUNCTION__ , ## b)
+#define DPRINTK(a, b...)	\
+	printk(KERN_DEBUG "pm3fb: %s: " a, __FUNCTION__ , ## b)
 #else
-#define DPRINTK(a,b...)
+#define DPRINTK(a, b...)
 #endif
 
+#define PM3_PIXMAP_SIZE	(2048 * 4)
+
 /*
  * Driver data
  */
+static int hwcursor = 1;
 static char *mode_option __devinitdata;
+static int noaccel __devinitdata;
+
+/* mtrr option */
+#ifdef CONFIG_MTRR
+static int nomtrr __devinitdata;
+#endif
 
 /*
  * This structure defines the hardware state of the graphics card. Normally
@@ -61,8 +74,9 @@ static char *mode_option __devinitdata;
 struct pm3_par {
 	unsigned char	__iomem *v_regs;/* virtual address of p_regs */
 	u32		video;		/* video flags before blanking */
-	u32		base;		/* screen base (xoffset+yoffset) in 128 bits unit */
+	u32		base;		/* screen base in 128 bits unit */
 	u32		palette[16];
+	int		mtrr_handle;
 };
 
 /*
@@ -96,7 +110,8 @@ static inline void PM3_WRITE_REG(struct pm3_par *par, s32 off, u32 v)
 
 static inline void PM3_WAIT(struct pm3_par *par, u32 n)
 {
-	while (PM3_READ_REG(par, PM3InFIFOSpace) < n);
+	while (PM3_READ_REG(par, PM3InFIFOSpace) < n)
+		cpu_relax();
 }
 
 static inline void PM3_WRITE_DAC_REG(struct pm3_par *par, unsigned r, u8 v)
@@ -133,7 +148,7 @@ static void pm3fb_clear_colormap(struct pm3_par *par,
 
 }
 
-/* Calculating various clock parameter */
+/* Calculating various clock parameters */
 static void pm3fb_calculate_clock(unsigned long reqclock,
 				unsigned char *prescale,
 				unsigned char *feedback,
@@ -164,7 +179,7 @@ static void pm3fb_calculate_clock(unsigned long reqclock,
 
 static inline int pm3fb_depth(const struct fb_var_screeninfo *var)
 {
-	if ( var->bits_per_pixel == 16 )
+	if (var->bits_per_pixel == 16)
 		return var->red.length + var->green.length
 			+ var->blue.length;
 
@@ -195,8 +210,8 @@ static int pm3fb_sync(struct fb_info *info)
 	PM3_WRITE_REG(par, PM3Sync, 0);
 	mb();
 	do {
-		while ((PM3_READ_REG(par, PM3OutFIFOWords)) == 0);
-		rmb();
+		while ((PM3_READ_REG(par, PM3OutFIFOWords)) == 0)
+			cpu_relax();
 	} while ((PM3_READ_REG(par, PM3OutputFifo)) != PM3Sync_Tag);
 
 	return 0;
@@ -276,15 +291,22 @@ static void pm3fb_init_engine(struct fb_info *info)
 
 	PM3_WAIT(par, 2);
 	{
-		unsigned long rm = 1;
+		/* invert bits in bitmask */
+		unsigned long rm = 1 | (3 << 7);
 		switch (info->var.bits_per_pixel) {
 		case 8:
 			PM3_WRITE_REG(par, PM3PixelSize,
 					   PM3PixelSize_GLOBAL_8BIT);
+#ifdef __BIG_ENDIAN
+			rm |= 3 << 15;
+#endif
 			break;
 		case 16:
 			PM3_WRITE_REG(par, PM3PixelSize,
 					   PM3PixelSize_GLOBAL_16BIT);
+#ifdef __BIG_ENDIAN
+			rm |= 2 << 15;
+#endif
 			break;
 		case 32:
 			PM3_WRITE_REG(par, PM3PixelSize,
@@ -342,7 +364,7 @@ static void pm3fb_init_engine(struct fb_info *info)
 
 	PM3_WRITE_REG(par, PM3dXDom, 0x0);
 	PM3_WRITE_REG(par, PM3dXSub, 0x0);
-	PM3_WRITE_REG(par, PM3dY, (1 << 16));
+	PM3_WRITE_REG(par, PM3dY, 1 << 16);
 	PM3_WRITE_REG(par, PM3StartXDom, 0x0);
 	PM3_WRITE_REG(par, PM3StartXSub, 0x0);
 	PM3_WRITE_REG(par, PM3StartY, 0x0);
@@ -357,71 +379,350 @@ static void pm3fb_init_engine(struct fb_info *info)
 	pm3fb_sync(info);
 }
 
-static void pm3fb_fillrect (struct fb_info *info,
+static void pm3fb_fillrect(struct fb_info *info,
 				const struct fb_fillrect *region)
 {
 	struct pm3_par *par = info->par;
 	struct fb_fillrect modded;
 	int vxres, vyres;
+	int rop;
 	u32 color = (info->fix.visual == FB_VISUAL_TRUECOLOR) ?
-		((u32*)info->pseudo_palette)[region->color] : region->color;
+		((u32 *)info->pseudo_palette)[region->color] : region->color;
 
 	if (info->state != FBINFO_STATE_RUNNING)
 		return;
-	if ((info->flags & FBINFO_HWACCEL_DISABLED) ||
-		region->rop != ROP_COPY ) {
+	if (info->flags & FBINFO_HWACCEL_DISABLED) {
 		cfb_fillrect(info, region);
 		return;
 	}
+	if (region->rop == ROP_COPY )
+		rop = PM3Config2D_ForegroundROP(0x3); /* GXcopy */
+	else
+		rop = PM3Config2D_ForegroundROP(0x6) | /* GXxor */
+			PM3Config2D_FBDestReadEnable;
 
 	vxres = info->var.xres_virtual;
 	vyres = info->var.yres_virtual;
 
 	memcpy(&modded, region, sizeof(struct fb_fillrect));
 
-	if(!modded.width || !modded.height ||
-	   modded.dx >= vxres || modded.dy >= vyres)
+	if (!modded.width || !modded.height ||
+	    modded.dx >= vxres || modded.dy >= vyres)
 		return;
 
-	if(modded.dx + modded.width  > vxres)
+	if (modded.dx + modded.width  > vxres)
 		modded.width  = vxres - modded.dx;
-	if(modded.dy + modded.height > vyres)
+	if (modded.dy + modded.height > vyres)
 		modded.height = vyres - modded.dy;
 
-	if(info->var.bits_per_pixel == 8)
+	if (info->var.bits_per_pixel == 8)
 		color |= color << 8;
-	if(info->var.bits_per_pixel <= 16)
+	if (info->var.bits_per_pixel <= 16)
 		color |= color << 16;
 
 	PM3_WAIT(par, 4);
-
+	/* ROP Ox3 is GXcopy */
 	PM3_WRITE_REG(par, PM3Config2D,
-				  PM3Config2D_UseConstantSource |
-				  PM3Config2D_ForegroundROPEnable |
-				  (PM3Config2D_ForegroundROP(0x3)) |	/* Ox3 is GXcopy */
-				  PM3Config2D_FBWriteEnable);
+			PM3Config2D_UseConstantSource |
+			PM3Config2D_ForegroundROPEnable |
+			rop |
+			PM3Config2D_FBWriteEnable);
 
 	PM3_WRITE_REG(par, PM3ForegroundColor, color);
 
 	PM3_WRITE_REG(par, PM3RectanglePosition,
-		      (PM3RectanglePosition_XOffset(modded.dx)) |
-		      (PM3RectanglePosition_YOffset(modded.dy)));
+			PM3RectanglePosition_XOffset(modded.dx) |
+			PM3RectanglePosition_YOffset(modded.dy));
 
 	PM3_WRITE_REG(par, PM3Render2D,
 		      PM3Render2D_XPositive |
 		      PM3Render2D_YPositive |
 		      PM3Render2D_Operation_Normal |
 		      PM3Render2D_SpanOperation |
-		      (PM3Render2D_Width(modded.width)) |
-		      (PM3Render2D_Height(modded.height)));
+		      PM3Render2D_Width(modded.width) |
+		      PM3Render2D_Height(modded.height));
+}
+
+static void pm3fb_copyarea(struct fb_info *info,
+				const struct fb_copyarea *area)
+{
+	struct pm3_par *par = info->par;
+	struct fb_copyarea modded;
+	u32 vxres, vyres;
+	int x_align, o_x, o_y;
+
+	if (info->state != FBINFO_STATE_RUNNING)
+		return;
+	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+		cfb_copyarea(info, area);
+		return;
+	}
+
+	memcpy(&modded, area, sizeof(struct fb_copyarea));
+
+	vxres = info->var.xres_virtual;
+	vyres = info->var.yres_virtual;
+
+	if (!modded.width || !modded.height ||
+	    modded.sx >= vxres || modded.sy >= vyres ||
+	    modded.dx >= vxres || modded.dy >= vyres)
+		return;
+
+	if (modded.sx + modded.width > vxres)
+		modded.width = vxres - modded.sx;
+	if (modded.dx + modded.width > vxres)
+		modded.width = vxres - modded.dx;
+	if (modded.sy + modded.height > vyres)
+		modded.height = vyres - modded.sy;
+	if (modded.dy + modded.height > vyres)
+		modded.height = vyres - modded.dy;
+
+	o_x = modded.sx - modded.dx;	/*(sx > dx ) ? (sx - dx) : (dx - sx); */
+	o_y = modded.sy - modded.dy;	/*(sy > dy ) ? (sy - dy) : (dy - sy); */
+
+	x_align = (modded.sx & 0x1f);
+
+	PM3_WAIT(par, 6);
+
+	PM3_WRITE_REG(par, PM3Config2D,
+			PM3Config2D_UserScissorEnable |
+			PM3Config2D_ForegroundROPEnable |
+			PM3Config2D_Blocking |
+			PM3Config2D_ForegroundROP(0x3) | /* Ox3 is GXcopy */
+			PM3Config2D_FBWriteEnable);
+
+	PM3_WRITE_REG(par, PM3ScissorMinXY,
+			((modded.dy & 0x0fff) << 16) | (modded.dx & 0x0fff));
+	PM3_WRITE_REG(par, PM3ScissorMaxXY,
+			(((modded.dy + modded.height) & 0x0fff) << 16) |
+			((modded.dx + modded.width) & 0x0fff));
+
+	PM3_WRITE_REG(par, PM3FBSourceReadBufferOffset,
+			PM3FBSourceReadBufferOffset_XOffset(o_x) |
+			PM3FBSourceReadBufferOffset_YOffset(o_y));
+
+	PM3_WRITE_REG(par, PM3RectanglePosition,
+			PM3RectanglePosition_XOffset(modded.dx - x_align) |
+			PM3RectanglePosition_YOffset(modded.dy));
+
+	PM3_WRITE_REG(par, PM3Render2D,
+			((modded.sx > modded.dx) ? PM3Render2D_XPositive : 0) |
+			((modded.sy > modded.dy) ? PM3Render2D_YPositive : 0) |
+			PM3Render2D_Operation_Normal |
+			PM3Render2D_SpanOperation |
+			PM3Render2D_FBSourceReadEnable |
+			PM3Render2D_Width(modded.width + x_align) |
+			PM3Render2D_Height(modded.height));
+}
+
+static void pm3fb_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+	struct pm3_par *par = info->par;
+	u32 height = image->height;
+	u32 fgx, bgx;
+	const u32 *src = (const u32 *)image->data;
+
+	if (info->state != FBINFO_STATE_RUNNING)
+		return;
+	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+		cfb_imageblit(info, image);
+		return;
+	}
+	switch (info->fix.visual) {
+	case FB_VISUAL_PSEUDOCOLOR:
+		fgx = image->fg_color;
+		bgx = image->bg_color;
+		break;
+	case FB_VISUAL_TRUECOLOR:
+	default:
+		fgx = par->palette[image->fg_color];
+		bgx = par->palette[image->bg_color];
+		break;
+	}
+	if (image->depth != 1)
+		return cfb_imageblit(info, image);
+
+	if (info->var.bits_per_pixel == 8) {
+		fgx |= fgx << 8;
+		bgx |= bgx << 8;
+	}
+	if (info->var.bits_per_pixel <= 16) {
+		fgx |= fgx << 16;
+		bgx |= bgx << 16;
+	}
+
+	PM3_WAIT(par, 7);
+
+	PM3_WRITE_REG(par, PM3ForegroundColor, fgx);
+	PM3_WRITE_REG(par, PM3BackgroundColor, bgx);
+
+	/* ROP Ox3 is GXcopy */
+	PM3_WRITE_REG(par, PM3Config2D,
+			PM3Config2D_UserScissorEnable |
+			PM3Config2D_UseConstantSource |
+			PM3Config2D_ForegroundROPEnable |
+			PM3Config2D_ForegroundROP(0x3) |
+			PM3Config2D_OpaqueSpan |
+			PM3Config2D_FBWriteEnable);
+	PM3_WRITE_REG(par, PM3ScissorMinXY,
+			((image->dy & 0x0fff) << 16) | (image->dx & 0x0fff));
+	PM3_WRITE_REG(par, PM3ScissorMaxXY,
+			(((image->dy + image->height) & 0x0fff) << 16) |
+			((image->dx + image->width) & 0x0fff));
+	PM3_WRITE_REG(par, PM3RectanglePosition,
+			PM3RectanglePosition_XOffset(image->dx) |
+			PM3RectanglePosition_YOffset(image->dy));
+	PM3_WRITE_REG(par, PM3Render2D,
+			PM3Render2D_XPositive |
+			PM3Render2D_YPositive |
+			PM3Render2D_Operation_SyncOnBitMask |
+			PM3Render2D_SpanOperation |
+			PM3Render2D_Width(image->width) |
+			PM3Render2D_Height(image->height));
+
+
+	while (height--) {
+		int width = ((image->width + 7) >> 3)
+				+ info->pixmap.scan_align - 1;
+		width >>= 2;
+
+		while (width >= PM3_FIFO_SIZE) {
+			int i = PM3_FIFO_SIZE - 1;
+
+			PM3_WAIT(par, PM3_FIFO_SIZE);
+			while (i--) {
+				PM3_WRITE_REG(par, PM3BitMaskPattern, *src);
+				src++;
+			}
+			width -= PM3_FIFO_SIZE - 1;
+		}
+
+		PM3_WAIT(par, width + 1);
+		while (width--) {
+			PM3_WRITE_REG(par, PM3BitMaskPattern, *src);
+			src++;
+		}
+	}
 }
 /* end of acceleration functions */
 
+/*
+ *	Hardware Cursor support.
+ */
+static const u8 cursor_bits_lookup[16] = {
+	0x00, 0x40, 0x10, 0x50, 0x04, 0x44, 0x14, 0x54,
+	0x01, 0x41, 0x11, 0x51, 0x05, 0x45, 0x15, 0x55
+};
+
+static int pm3fb_cursor(struct fb_info *info, struct fb_cursor *cursor)
+{
+	struct pm3_par *par = info->par;
+	u8 mode;
+
+	if (!hwcursor)
+		return -EINVAL;	/* just to force soft_cursor() call */
+
+	/* Too large of a cursor or wrong bpp :-( */
+	if (cursor->image.width > 64 ||
+	    cursor->image.height > 64 ||
+	    cursor->image.depth > 1)
+		return -EINVAL;
+
+	mode = PM3RD_CursorMode_TYPE_X;
+	if (cursor->enable)
+		 mode |= PM3RD_CursorMode_CURSOR_ENABLE;
+
+	PM3_WRITE_DAC_REG(par, PM3RD_CursorMode, mode);
+
+	/*
+	 * If the cursor is not be changed this means either we want the
+	 * current cursor state (if enable is set) or we want to query what
+	 * we can do with the cursor (if enable is not set)
+	 */
+	if (!cursor->set)
+		return 0;
+
+	if (cursor->set & FB_CUR_SETPOS) {
+		int x = cursor->image.dx - info->var.xoffset;
+		int y = cursor->image.dy - info->var.yoffset;
+
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorXLow, x & 0xff);
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorXHigh, (x >> 8) & 0xf);
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorYLow, y & 0xff);
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorYHigh, (y >> 8) & 0xf);
+	}
+
+	if (cursor->set & FB_CUR_SETHOT) {
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorHotSpotX,
+				  cursor->hot.x & 0x3f);
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorHotSpotY,
+				  cursor->hot.y & 0x3f);
+	}
+
+	if (cursor->set & FB_CUR_SETCMAP) {
+		u32 fg_idx = cursor->image.fg_color;
+		u32 bg_idx = cursor->image.bg_color;
+		struct fb_cmap cmap = info->cmap;
+
+		/* the X11 driver says one should use these color registers */
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorPalette(39),
+				  cmap.red[fg_idx] >> 8 );
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorPalette(40),
+				  cmap.green[fg_idx] >> 8 );
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorPalette(41),
+				  cmap.blue[fg_idx] >> 8 );
+
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorPalette(42),
+				  cmap.red[bg_idx] >> 8 );
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorPalette(43),
+				  cmap.green[bg_idx] >> 8 );
+		PM3_WRITE_DAC_REG(par, PM3RD_CursorPalette(44),
+				  cmap.blue[bg_idx] >> 8 );
+	}
+
+	if (cursor->set & (FB_CUR_SETSHAPE | FB_CUR_SETIMAGE)) {
+		u8 *bitmap = (u8 *)cursor->image.data;
+		u8 *mask = (u8 *)cursor->mask;
+		int i;
+		int pos = PM3RD_CursorPattern(0);
+
+		for (i = 0; i < cursor->image.height; i++) {
+			int j = (cursor->image.width + 7) >> 3;
+			int k = 8 - j;
+
+			for (; j > 0; j--) {
+				u8 data = *bitmap ^ *mask;
+
+				if (cursor->rop == ROP_COPY)
+					data = *mask & *bitmap;
+				/* Upper 4 bits of bitmap data */
+				PM3_WRITE_DAC_REG(par, pos++,
+					cursor_bits_lookup[data >> 4] |
+					(cursor_bits_lookup[*mask >> 4] << 1));
+				/* Lower 4 bits of bitmap */
+				PM3_WRITE_DAC_REG(par, pos++,
+					cursor_bits_lookup[data & 0xf] |
+					(cursor_bits_lookup[*mask & 0xf] << 1));
+				bitmap++;
+				mask++;
+			}
+			for (; k > 0; k--) {
+				PM3_WRITE_DAC_REG(par, pos++, 0);
+				PM3_WRITE_DAC_REG(par, pos++, 0);
+			}
+		}
+		while (pos < PM3RD_CursorPattern(1024))
+			PM3_WRITE_DAC_REG(par, pos++, 0);
+	}
+	return 0;
+}
+
 /* write the mode to registers */
 static void pm3fb_write_mode(struct fb_info *info)
 {
 	struct pm3_par *par = info->par;
-	char tempsync = 0x00, tempmisc = 0x00;
+	char tempsync = 0x00;
+	char tempmisc = 0x00;
 	const u32 hsstart = info->var.right_margin;
 	const u32 hsend = hsstart + info->var.hsync_len;
 	const u32 hbend = hsend + info->var.left_margin;
@@ -618,47 +919,57 @@ static int pm3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	unsigned bpp = var->red.length + var->green.length
 			+ var->blue.length + var->transp.length;
 
-	if ( bpp != var->bits_per_pixel ) {
+	if (bpp != var->bits_per_pixel) {
 		/* set predefined mode for bits_per_pixel settings */
 
-		switch(var->bits_per_pixel) {
+		switch (var->bits_per_pixel) {
 		case 8:
-			var->red.length = var->green.length = var->blue.length = 8;
-			var->red.offset = var->green.offset = var->blue.offset = 0;
+			var->red.length = 8;
+			var->green.length = 8;
+			var->blue.length = 8;
+			var->red.offset = 0;
+			var->green.offset = 0;
+			var->blue.offset = 0;
 			var->transp.offset = 0;
 			var->transp.length = 0;
 			break;
 		case 16:
-			var->red.length = var->blue.length = 5;
+			var->red.length = 5;
+			var->blue.length = 5;
 			var->green.length = 6;
 			var->transp.length = 0;
 			break;
 		case 32:
-			var->red.length = var->green.length = var->blue.length = 8;
+			var->red.length = 8;
+			var->green.length = 8;
+			var->blue.length = 8;
 			var->transp.length = 8;
 			break;
 		default:
-			DPRINTK("depth not supported: %u\n", var->bits_per_pixel);
+			DPRINTK("depth not supported: %u\n",
+				var->bits_per_pixel);
 			return -EINVAL;
 		}
 	}
 	/* it is assumed BGRA order */
-	if (var->bits_per_pixel > 8 )
-	{
+	if (var->bits_per_pixel > 8 ) {
 		var->blue.offset = 0;
 		var->green.offset = var->blue.length;
 		var->red.offset = var->green.offset + var->green.length;
 		var->transp.offset = var->red.offset + var->red.length;
 	}
-	var->height = var->width = -1;
+	var->height = -1;
+	var->width = -1;
 
 	if (var->xres != var->xres_virtual) {
-		DPRINTK("virtual x resolution != physical x resolution not supported\n");
+		DPRINTK("virtual x resolution != "
+			"physical x resolution not supported\n");
 		return -EINVAL;
 	}
 
 	if (var->yres > var->yres_virtual) {
-		DPRINTK("virtual y resolution < physical y resolution not possible\n");
+		DPRINTK("virtual y resolution < "
+			"physical y resolution not possible\n");
 		return -EINVAL;
 	}
 
@@ -673,7 +984,7 @@ static int pm3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	}
 
 	var->xres = (var->xres + 31) & ~31; /* could sometimes be 8 */
-	lpitch = var->xres * ((var->bits_per_pixel + 7)>>3);
+	lpitch = var->xres * ((var->bits_per_pixel + 7) >> 3);
 
 	if (var->xres < 200 || var->xres > 2048) {
 		DPRINTK("width not supported: %u\n", var->xres);
@@ -692,7 +1003,8 @@ static int pm3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	}
 
 	if (PICOS2KHZ(var->pixclock) > PM3_MAX_PIXCLOCK) {
-		DPRINTK("pixclock too high (%ldKHz)\n", PICOS2KHZ(var->pixclock));
+		DPRINTK("pixclock too high (%ldKHz)\n",
+			PICOS2KHZ(var->pixclock));
 		return -EINVAL;
 	}
 
@@ -709,7 +1021,7 @@ static int pm3fb_set_par(struct fb_info *info)
 	const u32 xres = (info->var.xres + 31) & ~31;
 	const unsigned bpp = info->var.bits_per_pixel;
 
-	par->base = pm3fb_shift_bpp(bpp,(info->var.yoffset * xres)
+	par->base = pm3fb_shift_bpp(bpp, (info->var.yoffset * xres)
 					+ info->var.xoffset);
 	par->video = 0;
 
@@ -725,15 +1037,12 @@ static int pm3fb_set_par(struct fb_info *info)
 
 	if ((info->var.vmode & FB_VMODE_MASK) == FB_VMODE_DOUBLE)
 		par->video |= PM3VideoControl_LINE_DOUBLE_ON;
-	else
-		par->video |= PM3VideoControl_LINE_DOUBLE_OFF;
 
 	if ((info->var.activate & FB_ACTIVATE_MASK) == FB_ACTIVATE_NOW)
 		par->video |= PM3VideoControl_ENABLE;
-	else {
-		par->video |= PM3VideoControl_DISABLE;
+	else
 		DPRINTK("PM3Video disabled\n");
-	}
+
 	switch (bpp) {
 	case 8:
 		par->video |= PM3VideoControl_PIXELSIZE_8BIT;
@@ -751,13 +1060,11 @@ static int pm3fb_set_par(struct fb_info *info)
 
 	info->fix.visual =
 		(bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
-	info->fix.line_length = ((info->var.xres_virtual + 7)  & ~7)
-					* bpp / 8;
+	info->fix.line_length = ((info->var.xres_virtual + 7)  >> 3) * bpp;
 
 /*	pm3fb_clear_memory(info, 0);*/
 	pm3fb_clear_colormap(par, 0, 0, 0);
-	PM3_WRITE_DAC_REG(par, PM3RD_CursorMode,
-			  PM3RD_CursorMode_CURSOR_DISABLE);
+	PM3_WRITE_DAC_REG(par, PM3RD_CursorMode, 0);
 	pm3fb_init_engine(info);
 	pm3fb_write_mode(info);
 	return 0;
@@ -773,10 +1080,9 @@ static int pm3fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	   return -EINVAL;
 
 	/* grayscale works only partially under directcolor */
-	if (info->var.grayscale) {
-	   /* grayscale = 0.30*R + 0.59*G + 0.11*B */
+	/* grayscale = 0.30*R + 0.59*G + 0.11*B */
+	if (info->var.grayscale)
 	   red = green = blue = (red * 77 + green * 151 + blue * 28) >> 8;
-	}
 
 	/* Directcolor:
 	 *   var->{color}.offset contains start of bitfield
@@ -790,8 +1096,8 @@ static int pm3fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	 *
 	 * Pseudocolor:
 	 *	var->{color}.offset is 0
-	 *	var->{color}.length contains width of DAC or the number of unique
-	 *			colors available (color depth)
+	 *	var->{color}.length contains width of DAC or the number
+	 *			of unique colors available (color depth)
 	 *	pseudo_palette is not used
 	 *	RAMDAC[X] is programmed to (red, green, blue)
 	 *	color depth = var->{color}.length
@@ -801,7 +1107,7 @@ static int pm3fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	 * This is the point where the color is converted to something that
 	 * is acceptable by the hardware.
 	 */
-#define CNVT_TOHW(val,width) ((((val)<<(width))+0x7FFF-(val))>>16)
+#define CNVT_TOHW(val, width) ((((val) << (width)) + 0x7FFF - (val)) >> 16)
 	red = CNVT_TOHW(red, info->var.red.length);
 	green = CNVT_TOHW(green, info->var.green.length);
 	blue = CNVT_TOHW(blue, info->var.blue.length);
@@ -825,12 +1131,11 @@ static int pm3fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 			break;
 		case 16:
 		case 32:
-			((u32*)(info->pseudo_palette))[regno] = v;
+			((u32 *)(info->pseudo_palette))[regno] = v;
 			break;
 		}
 		return 0;
-	}
-	else if (info->fix.visual == FB_VISUAL_PSEUDOCOLOR)
+	} else if (info->fix.visual == FB_VISUAL_PSEUDOCOLOR)
 		pm3fb_set_color(par, regno, red, green, blue);
 
 	return 0;
@@ -871,7 +1176,7 @@ static int pm3fb_blank(int blank_mode, struct fb_info *info)
 		video |= PM3VideoControl_ENABLE;
 		break;
 	case FB_BLANK_NORMAL:
-		video &= ~(PM3VideoControl_ENABLE);
+		video &= ~PM3VideoControl_ENABLE;
 		break;
 	case FB_BLANK_HSYNC_SUSPEND:
 		video &= ~(PM3VideoControl_HSYNC_MASK |
@@ -892,7 +1197,7 @@ static int pm3fb_blank(int blank_mode, struct fb_info *info)
 	}
 
 	PM3_WAIT(par, 1);
-	PM3_WRITE_REG(par,PM3VideoControl, video);
+	PM3_WRITE_REG(par, PM3VideoControl, video);
 	return 0;
 }
 
@@ -907,10 +1212,11 @@ static struct fb_ops pm3fb_ops = {
 	.fb_setcolreg	= pm3fb_setcolreg,
 	.fb_pan_display	= pm3fb_pan_display,
 	.fb_fillrect	= pm3fb_fillrect,
-	.fb_copyarea	= cfb_copyarea,
-	.fb_imageblit	= cfb_imageblit,
+	.fb_copyarea	= pm3fb_copyarea,
+	.fb_imageblit	= pm3fb_imageblit,
 	.fb_blank	= pm3fb_blank,
 	.fb_sync	= pm3fb_sync,
+	.fb_cursor	= pm3fb_cursor,
 };
 
 /* ------------------------------------------------------------------------- */
@@ -923,7 +1229,8 @@ static struct fb_ops pm3fb_ops = {
 /* the pm3fb_fix.smem_start is also set */
 static unsigned long pm3fb_size_memory(struct pm3_par *par)
 {
-	unsigned long	memsize = 0, tempBypass, i, temp1, temp2;
+	unsigned long	memsize = 0;
+	unsigned long	tempBypass, i, temp1, temp2;
 	unsigned char	__iomem *screen_mem;
 
 	pm3fb_fix.smem_len = 64 * 1024l * 1024; /* request full aperture size */
@@ -951,7 +1258,9 @@ static unsigned long pm3fb_size_memory(struct pm3_par *par)
 	PM3_WAIT(par, 1);
 	PM3_WRITE_REG(par, PM3MemBypassWriteMask, 0xFFFFFFFF);
 
-	/* pm3 split up memory, replicates, and do a lot of nasty stuff IMHO ;-) */
+	/* pm3 split up memory, replicates, and do a lot of
+	 * nasty stuff IMHO ;-)
+	 */
 	for (i = 0; i < 32; i++) {
 		fb_writel(i * 0x00345678,
 			  (screen_mem + (i * 1048576)));
@@ -1008,8 +1317,9 @@ static int __devinit pm3fb_probe(struct pci_dev *dev,
 {
 	struct fb_info *info;
 	struct pm3_par *par;
-	struct device* device = &dev->dev; /* for pci drivers */
-	int err, retval = -ENXIO;
+	struct device *device = &dev->dev; /* for pci drivers */
+	int err;
+	int retval = -ENXIO;
 
 	err = pci_enable_device(dev);
 	if (err) {
@@ -1031,6 +1341,10 @@ static int __devinit pm3fb_probe(struct pci_dev *dev,
 	 */
 	pm3fb_fix.mmio_start = pci_resource_start(dev, 0);
 	pm3fb_fix.mmio_len = PM3_REGS_SIZE;
+#if defined(__BIG_ENDIAN)
+	pm3fb_fix.mmio_start += PM3_REGS_SIZE;
+	DPRINTK("Adjusting register base for big-endian.\n");
+#endif
 
 	/* Registers - request region and map it. */
 	if (!request_mem_region(pm3fb_fix.mmio_start, pm3fb_fix.mmio_len,
@@ -1047,15 +1361,10 @@ static int __devinit pm3fb_probe(struct pci_dev *dev,
 		goto err_exit_neither;
 	}
 
-#if defined(__BIG_ENDIAN)
-	pm3fb_fix.mmio_start += PM3_REGS_SIZE;
-	DPRINTK("Adjusting register base for big-endian.\n");
-#endif
 	/* Linear frame buffer - request region and map it. */
 	pm3fb_fix.smem_start = pci_resource_start(dev, 1);
 	pm3fb_fix.smem_len = pm3fb_size_memory(par);
-	if (!pm3fb_fix.smem_len)
-	{
+	if (!pm3fb_fix.smem_len) {
 		printk(KERN_WARNING "pm3fb: Can't find memory on board.\n");
 		goto err_exit_mmio;
 	}
@@ -1073,6 +1382,12 @@ static int __devinit pm3fb_probe(struct pci_dev *dev,
 	}
 	info->screen_size = pm3fb_fix.smem_len;
 
+#ifdef CONFIG_MTRR
+	if (!nomtrr)
+		par->mtrr_handle = mtrr_add(pm3fb_fix.smem_start,
+						pm3fb_fix.smem_len,
+						MTRR_TYPE_WRCOMB, 1);
+#endif
 	info->fbops = &pm3fb_ops;
 
 	par->video = PM3_READ_REG(par, PM3VideoControl);
@@ -1080,7 +1395,26 @@ static int __devinit pm3fb_probe(struct pci_dev *dev,
 	info->fix = pm3fb_fix;
 	info->pseudo_palette = par->palette;
 	info->flags = FBINFO_DEFAULT |
-			FBINFO_HWACCEL_FILLRECT;/* | FBINFO_HWACCEL_YPAN;*/
+			FBINFO_HWACCEL_XPAN |
+			FBINFO_HWACCEL_YPAN |
+			FBINFO_HWACCEL_COPYAREA |
+			FBINFO_HWACCEL_IMAGEBLIT |
+			FBINFO_HWACCEL_FILLRECT;
+
+	if (noaccel) {
+		printk(KERN_DEBUG "disabling acceleration\n");
+		info->flags |= FBINFO_HWACCEL_DISABLED;
+	}
+	info->pixmap.addr = kmalloc(PM3_PIXMAP_SIZE, GFP_KERNEL);
+	if (!info->pixmap.addr) {
+		retval = -ENOMEM;
+		goto err_exit_pixmap;
+	}
+	info->pixmap.size = PM3_PIXMAP_SIZE;
+	info->pixmap.buf_align = 4;
+	info->pixmap.scan_align = 4;
+	info->pixmap.access_align = 32;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
 
 	/*
 	 * This should give a reasonable default video mode. The following is
@@ -1118,6 +1452,8 @@ static int __devinit pm3fb_probe(struct pci_dev *dev,
  err_exit_all:
 	fb_dealloc_cmap(&info->cmap);
  err_exit_both:
+	kfree(info->pixmap.addr);
+ err_exit_pixmap:
 	iounmap(info->screen_base);
 	release_mem_region(pm3fb_fix.smem_start, pm3fb_fix.smem_len);
  err_exit_mmio:
@@ -1142,12 +1478,18 @@ static void __devexit pm3fb_remove(struct pci_dev *dev)
 		unregister_framebuffer(info);
 		fb_dealloc_cmap(&info->cmap);
 
+#ifdef CONFIG_MTRR
+	if (par->mtrr_handle >= 0)
+		mtrr_del(par->mtrr_handle, info->fix.smem_start,
+			 info->fix.smem_len);
+#endif /* CONFIG_MTRR */
 		iounmap(info->screen_base);
 		release_mem_region(fix->smem_start, fix->smem_len);
 		iounmap(par->v_regs);
 		release_mem_region(fix->mmio_start, fix->mmio_len);
 
 		pci_set_drvdata(dev, NULL);
+		kfree(info->pixmap.addr);
 		framebuffer_release(info);
 	}
 }
@@ -1168,21 +1510,76 @@ static struct pci_driver pm3fb_driver = {
 
 MODULE_DEVICE_TABLE(pci, pm3fb_id_table);
 
+#ifndef MODULE
+	/*
+	 *  Setup
+	 */
+
+/*
+ * Only necessary if your driver takes special options,
+ * otherwise we fall back on the generic fb_setup().
+ */
+static int __init pm3fb_setup(char *options)
+{
+	char *this_opt;
+
+	/* Parse user speficied options (`video=pm3fb:') */
+	if (!options || !*options)
+		return 0;
+
+	while ((this_opt = strsep(&options, ",")) != NULL) {
+		if (!*this_opt)
+			continue;
+		else if (!strncmp(this_opt, "noaccel", 7))
+			noaccel = 1;
+		else if (!strncmp(this_opt, "hwcursor=", 9))
+			hwcursor = simple_strtoul(this_opt + 9, NULL, 0);
+#ifdef CONFIG_MTRR
+		else if (!strncmp(this_opt, "nomtrr", 6))
+			nomtrr = 1;
+#endif
+		else
+			mode_option = this_opt;
+	}
+	return 0;
+}
+#endif /* MODULE */
+
 static int __init pm3fb_init(void)
 {
+	/*
+	 *  For kernel boot options (in 'video=pm3fb:<options>' format)
+	 */
 #ifndef MODULE
-	if (fb_get_options("pm3fb", NULL))
+	char *option = NULL;
+
+	if (fb_get_options("pm3fb", &option))
 		return -ENODEV;
+	pm3fb_setup(option);
 #endif
+
 	return pci_register_driver(&pm3fb_driver);
 }
 
+#ifdef MODULE
 static void __exit pm3fb_exit(void)
 {
 	pci_unregister_driver(&pm3fb_driver);
 }
 
-module_init(pm3fb_init);
 module_exit(pm3fb_exit);
+#endif
+module_init(pm3fb_init);
+
+module_param(noaccel, bool, 0);
+MODULE_PARM_DESC(noaccel, "Disable acceleration");
+module_param(hwcursor, int, 0644);
+MODULE_PARM_DESC(hwcursor, "Enable hardware cursor "
+			"(1=enable, 0=disable, default=1)");
+#ifdef CONFIG_MTRR
+module_param(nomtrr, bool, 0);
+MODULE_PARM_DESC(nomtrr, "Disable MTRR support (0 or 1=disabled) (default=0)");
+#endif
 
+MODULE_DESCRIPTION("Permedia3 framebuffer device driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/video/pmag-ba-fb.c b/drivers/video/pmag-ba-fb.c
index 264d37243fa..3a3f80f6521 100644
--- a/drivers/video/pmag-ba-fb.c
+++ b/drivers/video/pmag-ba-fb.c
@@ -147,16 +147,23 @@ static int __init pmagbafb_probe(struct device *dev)
 	resource_size_t start, len;
 	struct fb_info *info;
 	struct pmagbafb_par *par;
+	int err;
 
 	info = framebuffer_alloc(sizeof(struct pmagbafb_par), dev);
-	if (!info)
+	if (!info) {
+		printk(KERN_ERR "%s: Cannot allocate memory\n", dev->bus_id);
 		return -ENOMEM;
+	}
 
 	par = info->par;
 	dev_set_drvdata(dev, info);
 
-	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0)
+	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
+		printk(KERN_ERR "%s: Cannot allocate color map\n",
+		       dev->bus_id);
+		err = -ENOMEM;
 		goto err_alloc;
+	}
 
 	info->fbops = &pmagbafb_ops;
 	info->fix = pmagbafb_fix;
@@ -166,28 +173,41 @@ static int __init pmagbafb_probe(struct device *dev)
 	/* Request the I/O MEM resource.  */
 	start = tdev->resource.start;
 	len = tdev->resource.end - start + 1;
-	if (!request_mem_region(start, len, dev->bus_id))
+	if (!request_mem_region(start, len, dev->bus_id)) {
+		printk(KERN_ERR "%s: Cannot reserve FB region\n", dev->bus_id);
+		err = -EBUSY;
 		goto err_cmap;
+	}
 
 	/* MMIO mapping setup.  */
 	info->fix.mmio_start = start;
 	par->mmio = ioremap_nocache(info->fix.mmio_start, info->fix.mmio_len);
-	if (!par->mmio)
+	if (!par->mmio) {
+		printk(KERN_ERR "%s: Cannot map MMIO\n", dev->bus_id);
+		err = -ENOMEM;
 		goto err_resource;
+	}
 	par->dac = par->mmio + PMAG_BA_BT459;
 
 	/* Frame buffer mapping setup.  */
 	info->fix.smem_start = start + PMAG_BA_FBMEM;
 	info->screen_base = ioremap_nocache(info->fix.smem_start,
 					    info->fix.smem_len);
-	if (!info->screen_base)
+	if (!info->screen_base) {
+		printk(KERN_ERR "%s: Cannot map FB\n", dev->bus_id);
+		err = -ENOMEM;
 		goto err_mmio_map;
+	}
 	info->screen_size = info->fix.smem_len;
 
 	pmagbafb_erase_cursor(info);
 
-	if (register_framebuffer(info) < 0)
+	err = register_framebuffer(info);
+	if (err < 0) {
+		printk(KERN_ERR "%s: Cannot register framebuffer\n",
+		       dev->bus_id);
 		goto err_smem_map;
+	}
 
 	get_device(dev);
 
@@ -211,7 +231,7 @@ err_cmap:
 
 err_alloc:
 	framebuffer_release(info);
-	return -ENXIO;
+	return err;
 }
 
 static int __exit pmagbafb_remove(struct device *dev)
diff --git a/drivers/video/pmagb-b-fb.c b/drivers/video/pmagb-b-fb.c
index 7a0ce7d5af6..9b80597241b 100644
--- a/drivers/video/pmagb-b-fb.c
+++ b/drivers/video/pmagb-b-fb.c
@@ -254,16 +254,23 @@ static int __init pmagbbfb_probe(struct device *dev)
 	struct pmagbbfb_par *par;
 	char freq0[12], freq1[12];
 	u32 vid_base;
+	int err;
 
 	info = framebuffer_alloc(sizeof(struct pmagbbfb_par), dev);
-	if (!info)
+	if (!info) {
+		printk(KERN_ERR "%s: Cannot allocate memory\n", dev->bus_id);
 		return -ENOMEM;
+	}
 
 	par = info->par;
 	dev_set_drvdata(dev, info);
 
-	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0)
+	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
+		printk(KERN_ERR "%s: Cannot allocate color map\n",
+		       dev->bus_id);
+		err = -ENOMEM;
 		goto err_alloc;
+	}
 
 	info->fbops = &pmagbbfb_ops;
 	info->fix = pmagbbfb_fix;
@@ -273,22 +280,31 @@ static int __init pmagbbfb_probe(struct device *dev)
 	/* Request the I/O MEM resource.  */
 	start = tdev->resource.start;
 	len = tdev->resource.end - start + 1;
-	if (!request_mem_region(start, len, dev->bus_id))
+	if (!request_mem_region(start, len, dev->bus_id)) {
+		printk(KERN_ERR "%s: Cannot reserve FB region\n", dev->bus_id);
+		err = -EBUSY;
 		goto err_cmap;
+	}
 
 	/* MMIO mapping setup.  */
 	info->fix.mmio_start = start;
 	par->mmio = ioremap_nocache(info->fix.mmio_start, info->fix.mmio_len);
-	if (!par->mmio)
+	if (!par->mmio) {
+		printk(KERN_ERR "%s: Cannot map MMIO\n", dev->bus_id);
+		err = -ENOMEM;
 		goto err_resource;
+	}
 	par->sfb = par->mmio + PMAGB_B_SFB;
 	par->dac = par->mmio + PMAGB_B_BT459;
 
 	/* Frame buffer mapping setup.  */
 	info->fix.smem_start = start + PMAGB_B_FBMEM;
 	par->smem = ioremap_nocache(info->fix.smem_start, info->fix.smem_len);
-	if (!par->smem)
+	if (!par->smem) {
+		printk(KERN_ERR "%s: Cannot map FB\n", dev->bus_id);
+		err = -ENOMEM;
 		goto err_mmio_map;
+	}
 	vid_base = sfb_read(par, SFB_REG_VID_BASE);
 	info->screen_base = (void __iomem *)par->smem + vid_base * 0x1000;
 	info->screen_size = info->fix.smem_len - 2 * vid_base * 0x1000;
@@ -297,8 +313,12 @@ static int __init pmagbbfb_probe(struct device *dev)
 	pmagbbfb_screen_setup(info);
 	pmagbbfb_osc_setup(info);
 
-	if (register_framebuffer(info) < 0)
+	err = register_framebuffer(info);
+	if (err < 0) {
+		printk(KERN_ERR "%s: Cannot register framebuffer\n",
+		       dev->bus_id);
 		goto err_smem_map;
+	}
 
 	get_device(dev);
 
@@ -330,7 +350,7 @@ err_cmap:
 
 err_alloc:
 	framebuffer_release(info);
-	return -ENXIO;
+	return err;
 }
 
 static int __exit pmagbbfb_remove(struct device *dev)
diff --git a/drivers/video/pnx4008/pnxrgbfb.c b/drivers/video/pnx4008/pnxrgbfb.c
index f29e66e2d77..685761a0732 100644
--- a/drivers/video/pnx4008/pnxrgbfb.c
+++ b/drivers/video/pnx4008/pnxrgbfb.c
@@ -26,7 +26,6 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 
-#include <asm/uaccess.h>
 #include "sdum.h"
 #include "fbcommon.h"
 
diff --git a/drivers/video/ps3fb.c b/drivers/video/ps3fb.c
index 646ec823c16..b3463ddcfd6 100644
--- a/drivers/video/ps3fb.c
+++ b/drivers/video/ps3fb.c
@@ -22,22 +22,14 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-#include <linux/tty.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/console.h>
 #include <linux/ioctl.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
-
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/fb.h>
 #include <linux/init.h>
-#include <asm/time.h>
 
 #include <asm/abs_addr.h>
 #include <asm/lv1call.h>
@@ -48,12 +40,6 @@
 
 #define DEVICE_NAME		"ps3fb"
 
-#ifdef PS3FB_DEBUG
-#define DPRINTK(fmt, args...) printk("%s: " fmt, __func__ , ##args)
-#else
-#define DPRINTK(fmt, args...)
-#endif
-
 #define L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_SYNC	0x101
 #define L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_FLIP	0x102
 #define L1GPU_CONTEXT_ATTRIBUTE_FB_SETUP	0x600
@@ -66,8 +52,10 @@
 #define L1GPU_DISPLAY_SYNC_VSYNC		2
 
 #define DDR_SIZE				(0)	/* used no ddr */
-#define GPU_OFFSET				(64 * 1024)
+#define GPU_CMD_BUF_SIZE			(64 * 1024)
 #define GPU_IOIF				(0x0d000000UL)
+#define GPU_ALIGN_UP(x)				_ALIGN_UP((x), 64)
+#define GPU_MAX_LINE_LENGTH			(65536 - 64)
 
 #define PS3FB_FULL_MODE_BIT			0x80
 
@@ -131,13 +119,12 @@ struct ps3fb_priv {
 
 	u64 context_handle, memory_handle;
 	void *xdr_ea;
+	size_t xdr_size;
 	struct gpu_driver_info *dinfo;
-	u32 res_index;
 
 	u64 vblank_count;	/* frame count */
 	wait_queue_head_t wait_vsync;
 
-	u32 num_frames;		/* num of frame buffers */
 	atomic_t ext_flip;	/* on/off flip with vsync */
 	atomic_t f_count;	/* fb_open count */
 	int is_blanked;
@@ -146,6 +133,18 @@ struct ps3fb_priv {
 };
 static struct ps3fb_priv ps3fb;
 
+struct ps3fb_par {
+	u32 pseudo_palette[16];
+	int mode_id, new_mode_id;
+	int res_index;
+	unsigned int num_frames;	/* num of frame buffers */
+	unsigned int width;
+	unsigned int height;
+	unsigned long full_offset;	/* start of fullscreen DDR fb */
+	unsigned long fb_offset;	/* start of actual DDR fb */
+	unsigned long pan_offset;
+};
+
 struct ps3fb_res_table {
 	u32 xres;
 	u32 yres;
@@ -294,29 +293,31 @@ static const struct fb_videomode ps3fb_modedb[] = {
 #define Y_OFF(i)	(ps3fb_res[i].yoff)	/* top/bottom margin (pixel) */
 #define WIDTH(i)	(ps3fb_res[i].xres)	/* width of FB */
 #define HEIGHT(i)	(ps3fb_res[i].yres)	/* height of FB */
-#define BPP	4		/* number of bytes per pixel */
-#define VP_OFF(i)	(WIDTH(i) * Y_OFF(i) * BPP + X_OFF(i) * BPP)
-#define FB_OFF(i)	(GPU_OFFSET - VP_OFF(i) % GPU_OFFSET)
+#define BPP		4			/* number of bytes per pixel */
+
+/* Start of the virtual frame buffer (relative to fullscreen ) */
+#define VP_OFF(i)	((WIDTH(i) * Y_OFF(i) + X_OFF(i)) * BPP)
+
 
 static int ps3fb_mode;
 module_param(ps3fb_mode, int, 0);
 
 static char *mode_option __devinitdata;
 
-static int ps3fb_get_res_table(u32 xres, u32 yres)
+static int ps3fb_get_res_table(u32 xres, u32 yres, int mode)
 {
 	int full_mode;
 	unsigned int i;
 	u32 x, y, f;
 
-	full_mode = (ps3fb_mode & PS3FB_FULL_MODE_BIT) ? PS3FB_RES_FULL : 0;
+	full_mode = (mode & PS3FB_FULL_MODE_BIT) ? PS3FB_RES_FULL : 0;
 	for (i = 0;; i++) {
 		x = ps3fb_res[i].xres;
 		y = ps3fb_res[i].yres;
 		f = ps3fb_res[i].type;
 
 		if (!x) {
-			DPRINTK("ERROR: ps3fb_get_res_table()\n");
+			pr_debug("ERROR: ps3fb_get_res_table()\n");
 			return -1;
 		}
 
@@ -335,7 +336,7 @@ static int ps3fb_get_res_table(u32 xres, u32 yres)
 }
 
 static unsigned int ps3fb_find_mode(const struct fb_var_screeninfo *var,
-				    u32 *line_length)
+				    u32 *ddr_line_length, u32 *xdr_line_length)
 {
 	unsigned int i, mode;
 
@@ -350,31 +351,41 @@ static unsigned int ps3fb_find_mode(const struct fb_var_screeninfo *var,
 		    var->upper_margin == ps3fb_modedb[i].upper_margin &&
 		    var->lower_margin == ps3fb_modedb[i].lower_margin &&
 		    var->sync == ps3fb_modedb[i].sync &&
-		    (var->vmode & FB_VMODE_MASK) == ps3fb_modedb[i].vmode) {
-			/* Cropped broadcast modes use the full line_length */
-			*line_length =
-			    ps3fb_modedb[i < 10 ? i + 13 : i].xres * 4;
-			/* Full broadcast modes have the full mode bit set */
-			mode = i > 12 ? (i - 12) | PS3FB_FULL_MODE_BIT : i + 1;
-
-			DPRINTK("ps3fb_find_mode: mode %u\n", mode);
-			return mode;
-		}
+		    (var->vmode & FB_VMODE_MASK) == ps3fb_modedb[i].vmode)
+			goto found;
 
-	DPRINTK("ps3fb_find_mode: mode not found\n");
+	pr_debug("ps3fb_find_mode: mode not found\n");
 	return 0;
 
+found:
+	/* Cropped broadcast modes use the full line length */
+	*ddr_line_length = ps3fb_modedb[i < 10 ? i + 13 : i].xres * BPP;
+
+	if (ps3_compare_firmware_version(1, 9, 0) >= 0) {
+		*xdr_line_length = GPU_ALIGN_UP(max(var->xres,
+						    var->xres_virtual) * BPP);
+		if (*xdr_line_length > GPU_MAX_LINE_LENGTH)
+			*xdr_line_length = GPU_MAX_LINE_LENGTH;
+	} else
+		*xdr_line_length = *ddr_line_length;
+
+	/* Full broadcast modes have the full mode bit set */
+	mode = i > 12 ? (i - 12) | PS3FB_FULL_MODE_BIT : i + 1;
+
+	pr_debug("ps3fb_find_mode: mode %u\n", mode);
+
+	return mode;
 }
 
-static const struct fb_videomode *ps3fb_default_mode(void)
+static const struct fb_videomode *ps3fb_default_mode(int id)
 {
-	u32 mode = ps3fb_mode & PS3AV_MODE_MASK;
+	u32 mode = id & PS3AV_MODE_MASK;
 	u32 flags;
 
 	if (mode < 1 || mode > 13)
 		return NULL;
 
-	flags = ps3fb_mode & ~PS3AV_MODE_MASK;
+	flags = id & ~PS3AV_MODE_MASK;
 
 	if (mode <= 10 && flags & PS3FB_FULL_MODE_BIT) {
 		/* Full broadcast mode */
@@ -384,55 +395,77 @@ static const struct fb_videomode *ps3fb_default_mode(void)
 	return &ps3fb_modedb[mode - 1];
 }
 
-static int ps3fb_sync(u32 frame)
+static void ps3fb_sync_image(struct device *dev, u64 frame_offset,
+			     u64 dst_offset, u64 src_offset, u32 width,
+			     u32 height, u32 dst_line_length,
+			     u32 src_line_length)
 {
-	int i, status;
-	u32 xres, yres;
-	u64 fb_ioif, offset;
-
-	i = ps3fb.res_index;
-	xres = ps3fb_res[i].xres;
-	yres = ps3fb_res[i].yres;
+	int status;
+	u64 line_length;
 
-	if (frame > ps3fb.num_frames - 1) {
-		printk(KERN_WARNING "%s: invalid frame number (%u)\n",
-		       __func__, frame);
-		return -EINVAL;
-	}
-	offset = xres * yres * BPP * frame;
+	line_length = dst_line_length;
+	if (src_line_length != dst_line_length)
+		line_length |= (u64)src_line_length << 32;
 
-	fb_ioif = GPU_IOIF + FB_OFF(i) + offset;
 	status = lv1_gpu_context_attribute(ps3fb.context_handle,
 					   L1GPU_CONTEXT_ATTRIBUTE_FB_BLIT,
-					   offset, fb_ioif,
+					   dst_offset, GPU_IOIF + src_offset,
 					   L1GPU_FB_BLIT_WAIT_FOR_COMPLETION |
-					   (xres << 16) | yres,
-					   xres * BPP);	/* line_length */
+					   (width << 16) | height,
+					   line_length);
 	if (status)
-		printk(KERN_ERR
-		       "%s: lv1_gpu_context_attribute FB_BLIT failed: %d\n",
-		       __func__, status);
+		dev_err(dev,
+			"%s: lv1_gpu_context_attribute FB_BLIT failed: %d\n",
+			__func__, status);
 #ifdef HEAD_A
 	status = lv1_gpu_context_attribute(ps3fb.context_handle,
 					   L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_FLIP,
-					   0, offset, 0, 0);
+					   0, frame_offset, 0, 0);
 	if (status)
-		printk(KERN_ERR
-		       "%s: lv1_gpu_context_attribute FLIP failed: %d\n",
-		       __func__, status);
+		dev_err(dev, "%s: lv1_gpu_context_attribute FLIP failed: %d\n",
+			__func__, status);
 #endif
 #ifdef HEAD_B
 	status = lv1_gpu_context_attribute(ps3fb.context_handle,
 					   L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_FLIP,
-					   1, offset, 0, 0);
+					   1, frame_offset, 0, 0);
 	if (status)
-		printk(KERN_ERR
-		       "%s: lv1_gpu_context_attribute FLIP failed: %d\n",
-		       __func__, status);
+		dev_err(dev, "%s: lv1_gpu_context_attribute FLIP failed: %d\n",
+			__func__, status);
 #endif
-	return 0;
 }
 
+static int ps3fb_sync(struct fb_info *info, u32 frame)
+{
+	struct ps3fb_par *par = info->par;
+	int i, error = 0;
+	u32 ddr_line_length, xdr_line_length;
+	u64 ddr_base, xdr_base;
+
+	acquire_console_sem();
+
+	if (frame > par->num_frames - 1) {
+		dev_dbg(info->device, "%s: invalid frame number (%u)\n",
+			__func__, frame);
+		error = -EINVAL;
+		goto out;
+	}
+
+	i = par->res_index;
+	xdr_line_length = info->fix.line_length;
+	ddr_line_length = ps3fb_res[i].xres * BPP;
+	xdr_base = frame * info->var.yres_virtual * xdr_line_length;
+	ddr_base = frame * ps3fb_res[i].yres * ddr_line_length;
+
+	ps3fb_sync_image(info->device, ddr_base + par->full_offset,
+			 ddr_base + par->fb_offset, xdr_base + par->pan_offset,
+			 par->width, par->height, ddr_line_length,
+			 xdr_line_length);
+
+out:
+	release_console_sem();
+	return error;
+}
 
 static int ps3fb_open(struct fb_info *info, int user)
 {
@@ -445,7 +478,7 @@ static int ps3fb_release(struct fb_info *info, int user)
 	if (atomic_dec_and_test(&ps3fb.f_count)) {
 		if (atomic_read(&ps3fb.ext_flip)) {
 			atomic_set(&ps3fb.ext_flip, 0);
-			ps3fb_sync(0);	/* single buffer */
+			ps3fb_sync(info, 0);	/* single buffer */
 		}
 	}
 	return 0;
@@ -461,39 +494,37 @@ static int ps3fb_release(struct fb_info *info, int user)
 
 static int ps3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
-	u32 line_length;
+	u32 xdr_line_length, ddr_line_length;
 	int mode;
-	int i;
 
-	DPRINTK("var->xres:%u info->var.xres:%u\n", var->xres, info->var.xres);
-	DPRINTK("var->yres:%u info->var.yres:%u\n", var->yres, info->var.yres);
+	dev_dbg(info->device, "var->xres:%u info->var.xres:%u\n", var->xres,
+		info->var.xres);
+	dev_dbg(info->device, "var->yres:%u info->var.yres:%u\n", var->yres,
+		info->var.yres);
 
 	/* FIXME For now we do exact matches only */
-	mode = ps3fb_find_mode(var, &line_length);
+	mode = ps3fb_find_mode(var, &ddr_line_length, &xdr_line_length);
 	if (!mode)
 		return -EINVAL;
 
-	/*
-	 *  FB_VMODE_CONUPDATE and FB_VMODE_SMOOTH_XPAN are equal!
-	 *  as FB_VMODE_SMOOTH_XPAN is only used internally
-	 */
+	/* Virtual screen */
+	if (var->xres_virtual < var->xres)
+		var->xres_virtual = var->xres;
+	if (var->yres_virtual < var->yres)
+		var->yres_virtual = var->yres;
 
-	if (var->vmode & FB_VMODE_CONUPDATE) {
-		var->vmode |= FB_VMODE_YWRAP;
-		var->xoffset = info->var.xoffset;
-		var->yoffset = info->var.yoffset;
+	if (var->xres_virtual > xdr_line_length / BPP) {
+		dev_dbg(info->device,
+			"Horizontal virtual screen size too large\n");
+		return -EINVAL;
 	}
 
-	/* Virtual screen and panning are not supported */
-	if (var->xres_virtual > var->xres || var->yres_virtual > var->yres ||
-	    var->xoffset || var->yoffset) {
-		DPRINTK("Virtual screen and panning are not supported\n");
+	if (var->xoffset + var->xres > var->xres_virtual ||
+	    var->yoffset + var->yres > var->yres_virtual) {
+		dev_dbg(info->device, "panning out-of-range\n");
 		return -EINVAL;
 	}
 
-	var->xres_virtual = var->xres;
-	var->yres_virtual = var->yres;
-
 	/* We support ARGB8888 only */
 	if (var->bits_per_pixel > 32 || var->grayscale ||
 	    var->red.offset > 16 || var->green.offset > 8 ||
@@ -502,7 +533,7 @@ static int ps3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	    var->blue.length > 8 || var->transp.length > 8 ||
 	    var->red.msb_right || var->green.msb_right ||
 	    var->blue.msb_right || var->transp.msb_right || var->nonstd) {
-		DPRINTK("We support ARGB8888 only\n");
+		dev_dbg(info->device, "We support ARGB8888 only\n");
 		return -EINVAL;
 	}
 
@@ -522,14 +553,13 @@ static int ps3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 
 	/* Rotation is not supported */
 	if (var->rotate) {
-		DPRINTK("Rotation is not supported\n");
+		dev_dbg(info->device, "Rotation is not supported\n");
 		return -EINVAL;
 	}
 
 	/* Memory limit */
-	i = ps3fb_get_res_table(var->xres, var->yres);
-	if (ps3fb_res[i].xres*ps3fb_res[i].yres*BPP > ps3fb_videomemory.size) {
-		DPRINTK("Not enough memory\n");
+	if (var->yres_virtual * xdr_line_length > ps3fb.xdr_size) {
+		dev_dbg(info->device, "Not enough memory\n");
 		return -ENOMEM;
 	}
 
@@ -545,36 +575,69 @@ static int ps3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 
 static int ps3fb_set_par(struct fb_info *info)
 {
-	unsigned int mode;
+	struct ps3fb_par *par = info->par;
+	unsigned int mode, ddr_line_length, xdr_line_length, lines, maxlines;
 	int i;
 	unsigned long offset;
-	static int first = 1;
+	u64 dst;
 
-	DPRINTK("xres:%d xv:%d yres:%d yv:%d clock:%d\n",
+	dev_dbg(info->device, "xres:%d xv:%d yres:%d yv:%d clock:%d\n",
 		info->var.xres, info->var.xres_virtual,
 		info->var.yres, info->var.yres_virtual, info->var.pixclock);
-	i = ps3fb_get_res_table(info->var.xres, info->var.yres);
-	ps3fb.res_index = i;
 
-	mode = ps3fb_find_mode(&info->var, &info->fix.line_length);
+	mode = ps3fb_find_mode(&info->var, &ddr_line_length, &xdr_line_length);
 	if (!mode)
 		return -EINVAL;
 
-	offset = FB_OFF(i) + VP_OFF(i);
-	info->fix.smem_len = ps3fb_videomemory.size - offset;
-	info->screen_base = (char __iomem *)ps3fb.xdr_ea + offset;
-	memset(ps3fb.xdr_ea, 0, ps3fb_videomemory.size);
+	i = ps3fb_get_res_table(info->var.xres, info->var.yres, mode);
+	par->res_index = i;
+
+	info->fix.smem_start = virt_to_abs(ps3fb.xdr_ea);
+	info->fix.smem_len = ps3fb.xdr_size;
+	info->fix.xpanstep = info->var.xres_virtual > info->var.xres ? 1 : 0;
+	info->fix.ypanstep = info->var.yres_virtual > info->var.yres ? 1 : 0;
+	info->fix.line_length = xdr_line_length;
+
+	info->screen_base = (char __iomem *)ps3fb.xdr_ea;
 
-	ps3fb.num_frames = ps3fb_videomemory.size/
-			   (ps3fb_res[i].xres*ps3fb_res[i].yres*BPP);
+	par->num_frames = ps3fb.xdr_size /
+			  max(ps3fb_res[i].yres * ddr_line_length,
+			      info->var.yres_virtual * xdr_line_length);
 
 	/* Keep the special bits we cannot set using fb_var_screeninfo */
-	ps3fb_mode = (ps3fb_mode & ~PS3AV_MODE_MASK) | mode;
+	par->new_mode_id = (par->new_mode_id & ~PS3AV_MODE_MASK) | mode;
+
+	par->width = info->var.xres;
+	par->height = info->var.yres;
+	offset = VP_OFF(i);
+	par->fb_offset = GPU_ALIGN_UP(offset);
+	par->full_offset = par->fb_offset - offset;
+	par->pan_offset = info->var.yoffset * xdr_line_length +
+			  info->var.xoffset * BPP;
+
+	if (par->new_mode_id != par->mode_id) {
+		if (ps3av_set_video_mode(par->new_mode_id)) {
+			par->new_mode_id = par->mode_id;
+			return -EINVAL;
+		}
+		par->mode_id = par->new_mode_id;
+	}
 
-	if (ps3av_set_video_mode(ps3fb_mode, first))
-		return -EINVAL;
+	/* Clear XDR frame buffer memory */
+	memset(ps3fb.xdr_ea, 0, ps3fb.xdr_size);
+
+	/* Clear DDR frame buffer memory */
+	lines = ps3fb_res[i].yres * par->num_frames;
+	if (par->full_offset)
+		lines++;
+	maxlines = ps3fb.xdr_size / ddr_line_length;
+	for (dst = 0; lines; dst += maxlines * ddr_line_length) {
+		unsigned int l = min(lines, maxlines);
+		ps3fb_sync_image(info->device, 0, dst, 0, ps3fb_res[i].xres, l,
+				 ddr_line_length, ddr_line_length);
+		lines -= l;
+	}
 
-	first = 0;
 	return 0;
 }
 
@@ -601,6 +664,16 @@ static int ps3fb_setcolreg(unsigned int regno, unsigned int red,
 	return 0;
 }
 
+static int ps3fb_pan_display(struct fb_var_screeninfo *var,
+			     struct fb_info *info)
+{
+	struct ps3fb_par *par = info->par;
+
+	par->pan_offset = var->yoffset * info->fix.line_length +
+			  var->xoffset * BPP;
+	return 0;
+}
+
     /*
      *  As we have a virtual frame buffer, we need our own mmap function
      */
@@ -608,24 +681,19 @@ static int ps3fb_setcolreg(unsigned int regno, unsigned int red,
 static int ps3fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	unsigned long size, offset;
-	int i;
-
-	i = ps3fb_get_res_table(info->var.xres, info->var.yres);
-	if (i == -1)
-		return -EINVAL;
 
 	size = vma->vm_end - vma->vm_start;
 	offset = vma->vm_pgoff << PAGE_SHIFT;
 	if (offset + size > info->fix.smem_len)
 		return -EINVAL;
 
-	offset += info->fix.smem_start + FB_OFF(i) + VP_OFF(i);
+	offset += info->fix.smem_start;
 	if (remap_pfn_range(vma, vma->vm_start, offset >> PAGE_SHIFT,
 			    size, vma->vm_page_prot))
 		return -EAGAIN;
 
-	printk(KERN_DEBUG "ps3fb: mmap framebuffer P(%lx)->V(%lx)\n", offset,
-	       vma->vm_start);
+	dev_dbg(info->device, "ps3fb: mmap framebuffer P(%lx)->V(%lx)\n",
+		offset, vma->vm_start);
 	return 0;
 }
 
@@ -637,7 +705,7 @@ static int ps3fb_blank(int blank, struct fb_info *info)
 {
 	int retval;
 
-	DPRINTK("%s: blank:%d\n", __func__, blank);
+	dev_dbg(info->device, "%s: blank:%d\n", __func__, blank);
 	switch (blank) {
 	case FB_BLANK_POWERDOWN:
 	case FB_BLANK_HSYNC_SUSPEND:
@@ -664,7 +732,7 @@ static int ps3fb_get_vblank(struct fb_vblank *vblank)
 	return 0;
 }
 
-int ps3fb_wait_for_vsync(u32 crtc)
+static int ps3fb_wait_for_vsync(u32 crtc)
 {
 	int ret;
 	u64 count;
@@ -679,9 +747,7 @@ int ps3fb_wait_for_vsync(u32 crtc)
 	return 0;
 }
 
-EXPORT_SYMBOL_GPL(ps3fb_wait_for_vsync);
-
-void ps3fb_flip_ctl(int on, void *data)
+static void ps3fb_flip_ctl(int on, void *data)
 {
 	struct ps3fb_priv *priv = data;
 	if (on)
@@ -699,14 +765,14 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd,
 		       unsigned long arg)
 {
 	void __user *argp = (void __user *)arg;
-	u32 val, old_mode;
+	u32 val;
 	int retval = -EFAULT;
 
 	switch (cmd) {
 	case FBIOGET_VBLANK:
 		{
 			struct fb_vblank vblank;
-			DPRINTK("FBIOGET_VBLANK:\n");
+			dev_dbg(info->device, "FBIOGET_VBLANK:\n");
 			retval = ps3fb_get_vblank(&vblank);
 			if (retval)
 				break;
@@ -719,7 +785,7 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd,
 	case FBIO_WAITFORVSYNC:
 		{
 			u32 crt;
-			DPRINTK("FBIO_WAITFORVSYNC:\n");
+			dev_dbg(info->device, "FBIO_WAITFORVSYNC:\n");
 			if (get_user(crt, (u32 __user *) arg))
 				break;
 
@@ -729,6 +795,7 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd,
 
 	case PS3FB_IOCTL_SETMODE:
 		{
+			struct ps3fb_par *par = info->par;
 			const struct fb_videomode *mode;
 			struct fb_var_screeninfo var;
 
@@ -736,15 +803,13 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd,
 				break;
 
 			if (!(val & PS3AV_MODE_MASK)) {
-				u32 id = ps3av_get_auto_mode(0);
+				u32 id = ps3av_get_auto_mode();
 				if (id > 0)
 					val = (val & ~PS3AV_MODE_MASK) | id;
 			}
-			DPRINTK("PS3FB_IOCTL_SETMODE:%x\n", val);
+			dev_dbg(info->device, "PS3FB_IOCTL_SETMODE:%x\n", val);
 			retval = -EINVAL;
-			old_mode = ps3fb_mode;
-			ps3fb_mode = val;
-			mode = ps3fb_default_mode();
+			mode = ps3fb_default_mode(val);
 			if (mode) {
 				var = info->var;
 				fb_videomode_to_var(&var, mode);
@@ -752,45 +817,44 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd,
 				info->flags |= FBINFO_MISC_USEREVENT;
 				/* Force, in case only special bits changed */
 				var.activate |= FB_ACTIVATE_FORCE;
+				par->new_mode_id = val;
 				retval = fb_set_var(info, &var);
 				info->flags &= ~FBINFO_MISC_USEREVENT;
 				release_console_sem();
 			}
-			if (retval)
-				ps3fb_mode = old_mode;
 			break;
 		}
 
 	case PS3FB_IOCTL_GETMODE:
 		val = ps3av_get_mode();
-		DPRINTK("PS3FB_IOCTL_GETMODE:%x\n", val);
+		dev_dbg(info->device, "PS3FB_IOCTL_GETMODE:%x\n", val);
 		if (!copy_to_user(argp, &val, sizeof(val)))
 			retval = 0;
 		break;
 
 	case PS3FB_IOCTL_SCREENINFO:
 		{
+			struct ps3fb_par *par = info->par;
 			struct ps3fb_ioctl_res res;
-			int i = ps3fb.res_index;
-			DPRINTK("PS3FB_IOCTL_SCREENINFO:\n");
-			res.xres = ps3fb_res[i].xres;
-			res.yres = ps3fb_res[i].yres;
-			res.xoff = ps3fb_res[i].xoff;
-			res.yoff = ps3fb_res[i].yoff;
-			res.num_frames = ps3fb.num_frames;
+			dev_dbg(info->device, "PS3FB_IOCTL_SCREENINFO:\n");
+			res.xres = info->fix.line_length / BPP;
+			res.yres = info->var.yres_virtual;
+			res.xoff = (res.xres - info->var.xres) / 2;
+			res.yoff = (res.yres - info->var.yres) / 2;
+			res.num_frames = par->num_frames;
 			if (!copy_to_user(argp, &res, sizeof(res)))
 				retval = 0;
 			break;
 		}
 
 	case PS3FB_IOCTL_ON:
-		DPRINTK("PS3FB_IOCTL_ON:\n");
+		dev_dbg(info->device, "PS3FB_IOCTL_ON:\n");
 		atomic_inc(&ps3fb.ext_flip);
 		retval = 0;
 		break;
 
 	case PS3FB_IOCTL_OFF:
-		DPRINTK("PS3FB_IOCTL_OFF:\n");
+		dev_dbg(info->device, "PS3FB_IOCTL_OFF:\n");
 		atomic_dec_if_positive(&ps3fb.ext_flip);
 		retval = 0;
 		break;
@@ -799,8 +863,8 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd,
 		if (copy_from_user(&val, argp, sizeof(val)))
 			break;
 
-		DPRINTK("PS3FB_IOCTL_FSEL:%d\n", val);
-		retval = ps3fb_sync(val);
+		dev_dbg(info->device, "PS3FB_IOCTL_FSEL:%d\n", val);
+		retval = ps3fb_sync(info, val);
 		break;
 
 	default:
@@ -812,13 +876,15 @@ static int ps3fb_ioctl(struct fb_info *info, unsigned int cmd,
 
 static int ps3fbd(void *arg)
 {
+	struct fb_info *info = arg;
+
 	set_freezable();
 	while (!kthread_should_stop()) {
 		try_to_freeze();
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (ps3fb.is_kicked) {
 			ps3fb.is_kicked = 0;
-			ps3fb_sync(0);	/* single buffer */
+			ps3fb_sync(info, 0);	/* single buffer */
 		}
 		schedule();
 	}
@@ -827,14 +893,15 @@ static int ps3fbd(void *arg)
 
 static irqreturn_t ps3fb_vsync_interrupt(int irq, void *ptr)
 {
+	struct device *dev = ptr;
 	u64 v1;
 	int status;
 	struct display_head *head = &ps3fb.dinfo->display_head[1];
 
 	status = lv1_gpu_context_intr(ps3fb.context_handle, &v1);
 	if (status) {
-		printk(KERN_ERR "%s: lv1_gpu_context_intr failed: %d\n",
-		       __func__, status);
+		dev_err(dev, "%s: lv1_gpu_context_intr failed: %d\n", __func__,
+			status);
 		return IRQ_NONE;
 	}
 
@@ -854,35 +921,35 @@ static irqreturn_t ps3fb_vsync_interrupt(int irq, void *ptr)
 
 
 static int ps3fb_vsync_settings(struct gpu_driver_info *dinfo,
-				struct ps3_system_bus_device *dev)
+				struct device *dev)
 {
 	int error;
 
-	DPRINTK("version_driver:%x\n", dinfo->version_driver);
-	DPRINTK("irq outlet:%x\n", dinfo->irq.irq_outlet);
-	DPRINTK("version_gpu:%x memory_size:%x ch:%x core_freq:%d mem_freq:%d\n",
+	dev_dbg(dev, "version_driver:%x\n", dinfo->version_driver);
+	dev_dbg(dev, "irq outlet:%x\n", dinfo->irq.irq_outlet);
+	dev_dbg(dev,
+		"version_gpu: %x memory_size: %x ch: %x core_freq: %d "
+		"mem_freq:%d\n",
 		dinfo->version_gpu, dinfo->memory_size, dinfo->hardware_channel,
 		dinfo->nvcore_frequency/1000000, dinfo->memory_frequency/1000000);
 
 	if (dinfo->version_driver != GPU_DRIVER_INFO_VERSION) {
-		printk(KERN_ERR "%s: version_driver err:%x\n", __func__,
-		       dinfo->version_driver);
+		dev_err(dev, "%s: version_driver err:%x\n", __func__,
+			dinfo->version_driver);
 		return -EINVAL;
 	}
 
 	error = ps3_irq_plug_setup(PS3_BINDING_CPU_ANY, dinfo->irq.irq_outlet,
 				   &ps3fb.irq_no);
 	if (error) {
-		printk(KERN_ERR "%s: ps3_alloc_irq failed %d\n", __func__,
-		       error);
+		dev_err(dev, "%s: ps3_alloc_irq failed %d\n", __func__, error);
 		return error;
 	}
 
 	error = request_irq(ps3fb.irq_no, ps3fb_vsync_interrupt, IRQF_DISABLED,
 			    DEVICE_NAME, dev);
 	if (error) {
-		printk(KERN_ERR "%s: request_irq failed %d\n", __func__,
-		       error);
+		dev_err(dev, "%s: request_irq failed %d\n", __func__, error);
 		ps3_irq_plug_destroy(ps3fb.irq_no);
 		return error;
 	}
@@ -892,29 +959,31 @@ static int ps3fb_vsync_settings(struct gpu_driver_info *dinfo,
 	return 0;
 }
 
-static int ps3fb_xdr_settings(u64 xdr_lpar)
+static int ps3fb_xdr_settings(u64 xdr_lpar, struct device *dev)
 {
 	int status;
 
 	status = lv1_gpu_context_iomap(ps3fb.context_handle, GPU_IOIF,
 				       xdr_lpar, ps3fb_videomemory.size, 0);
 	if (status) {
-		printk(KERN_ERR "%s: lv1_gpu_context_iomap failed: %d\n",
-		       __func__, status);
+		dev_err(dev, "%s: lv1_gpu_context_iomap failed: %d\n",
+			__func__, status);
 		return -ENXIO;
 	}
-	DPRINTK("video:%p xdr_ea:%p ioif:%lx lpar:%lx phys:%lx size:%lx\n",
+	dev_dbg(dev,
+		"video:%p xdr_ea:%p ioif:%lx lpar:%lx phys:%lx size:%lx\n",
 		ps3fb_videomemory.address, ps3fb.xdr_ea, GPU_IOIF, xdr_lpar,
 		virt_to_abs(ps3fb.xdr_ea), ps3fb_videomemory.size);
 
 	status = lv1_gpu_context_attribute(ps3fb.context_handle,
 					   L1GPU_CONTEXT_ATTRIBUTE_FB_SETUP,
-					   xdr_lpar, ps3fb_videomemory.size,
-					   GPU_IOIF, 0);
+					   xdr_lpar + ps3fb.xdr_size,
+					   GPU_CMD_BUF_SIZE,
+					   GPU_IOIF + ps3fb.xdr_size, 0);
 	if (status) {
-		printk(KERN_ERR
-		       "%s: lv1_gpu_context_attribute FB_SETUP failed: %d\n",
-		       __func__, status);
+		dev_err(dev,
+			"%s: lv1_gpu_context_attribute FB_SETUP failed: %d\n",
+			__func__, status);
 		return -ENXIO;
 	}
 	return 0;
@@ -928,6 +997,7 @@ static struct fb_ops ps3fb_ops = {
 	.fb_check_var	= ps3fb_check_var,
 	.fb_set_par	= ps3fb_set_par,
 	.fb_setcolreg	= ps3fb_setcolreg,
+	.fb_pan_display	= ps3fb_pan_display,
 	.fb_fillrect	= sys_fillrect,
 	.fb_copyarea	= sys_copyarea,
 	.fb_imageblit	= sys_imageblit,
@@ -944,7 +1014,7 @@ static struct fb_fix_screeninfo ps3fb_fix __initdata = {
 	.accel =	FB_ACCEL_NONE,
 };
 
-static int ps3fb_set_sync(void)
+static int ps3fb_set_sync(struct device *dev)
 {
 	int status;
 
@@ -953,8 +1023,10 @@ static int ps3fb_set_sync(void)
 					   L1GPU_CONTEXT_ATTRIBUTE_DISPLAY_SYNC,
 					   0, L1GPU_DISPLAY_SYNC_VSYNC, 0, 0);
 	if (status) {
-		printk(KERN_ERR "%s: lv1_gpu_context_attribute DISPLAY_SYNC "
-		       "failed: %d\n", __func__, status);
+		dev_err(dev,
+			"%s: lv1_gpu_context_attribute DISPLAY_SYNC failed: "
+			"%d\n",
+			__func__, status);
 		return -1;
 	}
 #endif
@@ -964,8 +1036,10 @@ static int ps3fb_set_sync(void)
 					   1, L1GPU_DISPLAY_SYNC_VSYNC, 0, 0);
 
 	if (status) {
-		printk(KERN_ERR "%s: lv1_gpu_context_attribute DISPLAY_MODE "
-		       "failed: %d\n", __func__, status);
+		dev_err(dev,
+			"%s: lv1_gpu_context_attribute DISPLAY_SYNC failed: "
+			"%d\n",
+			__func__, status);
 		return -1;
 	}
 #endif
@@ -975,6 +1049,7 @@ static int ps3fb_set_sync(void)
 static int __devinit ps3fb_probe(struct ps3_system_bus_device *dev)
 {
 	struct fb_info *info;
+	struct ps3fb_par *par;
 	int retval = -ENOMEM;
 	u32 xres, yres;
 	u64 ddr_lpar = 0;
@@ -983,98 +1058,106 @@ static int __devinit ps3fb_probe(struct ps3_system_bus_device *dev)
 	u64 lpar_reports = 0;
 	u64 lpar_reports_size = 0;
 	u64 xdr_lpar;
-	int status;
-	unsigned long offset;
+	int status, res_index;
 	struct task_struct *task;
 
 	status = ps3_open_hv_device(dev);
 	if (status) {
-		printk(KERN_ERR "%s: ps3_open_hv_device failed\n", __func__);
+		dev_err(&dev->core, "%s: ps3_open_hv_device failed\n",
+			__func__);
 		goto err;
 	}
 
 	if (!ps3fb_mode)
 		ps3fb_mode = ps3av_get_mode();
-	DPRINTK("ps3av_mode:%d\n", ps3fb_mode);
+	dev_dbg(&dev->core, "ps3av_mode:%d\n", ps3fb_mode);
 
 	if (ps3fb_mode > 0 &&
 	    !ps3av_video_mode2res(ps3fb_mode, &xres, &yres)) {
-		ps3fb.res_index = ps3fb_get_res_table(xres, yres);
-		DPRINTK("res_index:%d\n", ps3fb.res_index);
+		res_index = ps3fb_get_res_table(xres, yres, ps3fb_mode);
+		dev_dbg(&dev->core, "res_index:%d\n", res_index);
 	} else
-		ps3fb.res_index = GPU_RES_INDEX;
+		res_index = GPU_RES_INDEX;
 
 	atomic_set(&ps3fb.f_count, -1);	/* fbcon opens ps3fb */
 	atomic_set(&ps3fb.ext_flip, 0);	/* for flip with vsync */
 	init_waitqueue_head(&ps3fb.wait_vsync);
-	ps3fb.num_frames = 1;
 
-	ps3fb_set_sync();
+	ps3fb_set_sync(&dev->core);
 
 	/* get gpu context handle */
 	status = lv1_gpu_memory_allocate(DDR_SIZE, 0, 0, 0, 0,
 					 &ps3fb.memory_handle, &ddr_lpar);
 	if (status) {
-		printk(KERN_ERR "%s: lv1_gpu_memory_allocate failed: %d\n",
-		       __func__, status);
+		dev_err(&dev->core, "%s: lv1_gpu_memory_allocate failed: %d\n",
+			__func__, status);
 		goto err;
 	}
-	DPRINTK("ddr:lpar:0x%lx\n", ddr_lpar);
+	dev_dbg(&dev->core, "ddr:lpar:0x%lx\n", ddr_lpar);
 
 	status = lv1_gpu_context_allocate(ps3fb.memory_handle, 0,
 					  &ps3fb.context_handle,
 					  &lpar_dma_control, &lpar_driver_info,
 					  &lpar_reports, &lpar_reports_size);
 	if (status) {
-		printk(KERN_ERR "%s: lv1_gpu_context_attribute failed: %d\n",
-		       __func__, status);
+		dev_err(&dev->core,
+			"%s: lv1_gpu_context_attribute failed: %d\n", __func__,
+			status);
 		goto err_gpu_memory_free;
 	}
 
 	/* vsync interrupt */
 	ps3fb.dinfo = ioremap(lpar_driver_info, 128 * 1024);
 	if (!ps3fb.dinfo) {
-		printk(KERN_ERR "%s: ioremap failed\n", __func__);
+		dev_err(&dev->core, "%s: ioremap failed\n", __func__);
 		goto err_gpu_context_free;
 	}
 
-	retval = ps3fb_vsync_settings(ps3fb.dinfo, dev);
+	retval = ps3fb_vsync_settings(ps3fb.dinfo, &dev->core);
 	if (retval)
 		goto err_iounmap_dinfo;
 
-	/* xdr frame buffer */
+	/* XDR frame buffer */
 	ps3fb.xdr_ea = ps3fb_videomemory.address;
 	xdr_lpar = ps3_mm_phys_to_lpar(__pa(ps3fb.xdr_ea));
-	retval = ps3fb_xdr_settings(xdr_lpar);
+
+	/* Clear memory to prevent kernel info leakage into userspace */
+	memset(ps3fb.xdr_ea, 0, ps3fb_videomemory.size);
+
+	/* The GPU command buffer is at the end of video memory */
+	ps3fb.xdr_size = ps3fb_videomemory.size - GPU_CMD_BUF_SIZE;
+
+	retval = ps3fb_xdr_settings(xdr_lpar, &dev->core);
 	if (retval)
 		goto err_free_irq;
 
-	/*
-	 * ps3fb must clear memory to prevent kernel info
-	 * leakage into userspace
-	 */
-	memset(ps3fb.xdr_ea, 0, ps3fb_videomemory.size);
-	info = framebuffer_alloc(sizeof(u32) * 16, &dev->core);
+	info = framebuffer_alloc(sizeof(struct ps3fb_par), &dev->core);
 	if (!info)
 		goto err_free_irq;
 
-	offset = FB_OFF(ps3fb.res_index) + VP_OFF(ps3fb.res_index);
-	info->screen_base = (char __iomem *)ps3fb.xdr_ea + offset;
+	par = info->par;
+	par->mode_id = ~ps3fb_mode;	/* != ps3fb_mode, to trigger change */
+	par->new_mode_id = ps3fb_mode;
+	par->res_index = res_index;
+	par->num_frames = 1;
+
+	info->screen_base = (char __iomem *)ps3fb.xdr_ea;
 	info->fbops = &ps3fb_ops;
 
 	info->fix = ps3fb_fix;
 	info->fix.smem_start = virt_to_abs(ps3fb.xdr_ea);
-	info->fix.smem_len = ps3fb_videomemory.size - offset;
-	info->pseudo_palette = info->par;
-	info->par = NULL;
-	info->flags = FBINFO_DEFAULT | FBINFO_READS_FAST;
+	info->fix.smem_len = ps3fb.xdr_size;
+	info->pseudo_palette = par->pseudo_palette;
+	info->flags = FBINFO_DEFAULT | FBINFO_READS_FAST |
+		      FBINFO_HWACCEL_XPAN | FBINFO_HWACCEL_YPAN;
 
 	retval = fb_alloc_cmap(&info->cmap, 256, 0);
 	if (retval < 0)
 		goto err_framebuffer_release;
 
 	if (!fb_find_mode(&info->var, info, mode_option, ps3fb_modedb,
-			  ARRAY_SIZE(ps3fb_modedb), ps3fb_default_mode(), 32)) {
+			  ARRAY_SIZE(ps3fb_modedb),
+			  ps3fb_default_mode(par->new_mode_id), 32)) {
 		retval = -EINVAL;
 		goto err_fb_dealloc;
 	}
@@ -1088,9 +1171,9 @@ static int __devinit ps3fb_probe(struct ps3_system_bus_device *dev)
 
 	dev->core.driver_data = info;
 
-	printk(KERN_INFO
-	       "fb%d: PS3 frame buffer device, using %ld KiB of video memory\n",
-	       info->node, ps3fb_videomemory.size >> 10);
+	dev_info(info->device, "%s %s, using %lu KiB of video memory\n",
+		 dev_driver_string(info->dev), info->dev->bus_id,
+		 ps3fb.xdr_size >> 10);
 
 	task = kthread_run(ps3fbd, info, DEVICE_NAME);
 	if (IS_ERR(task)) {
@@ -1127,7 +1210,7 @@ static int ps3fb_shutdown(struct ps3_system_bus_device *dev)
 	int status;
 	struct fb_info *info = dev->core.driver_data;
 
-	DPRINTK(" -> %s:%d\n", __func__, __LINE__);
+	dev_dbg(&dev->core, " -> %s:%d\n", __func__, __LINE__);
 
 	ps3fb_flip_ctl(0, &ps3fb);	/* flip off */
 	ps3fb.dinfo->irq.mask = 0;
@@ -1152,14 +1235,16 @@ static int ps3fb_shutdown(struct ps3_system_bus_device *dev)
 
 	status = lv1_gpu_context_free(ps3fb.context_handle);
 	if (status)
-		DPRINTK("lv1_gpu_context_free failed: %d\n", status);
+		dev_dbg(&dev->core, "lv1_gpu_context_free failed: %d\n",
+			status);
 
 	status = lv1_gpu_memory_free(ps3fb.memory_handle);
 	if (status)
-		DPRINTK("lv1_gpu_memory_free failed: %d\n", status);
+		dev_dbg(&dev->core, "lv1_gpu_memory_free failed: %d\n",
+			status);
 
 	ps3_close_hv_device(dev);
-	DPRINTK(" <- %s:%d\n", __func__, __LINE__);
+	dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__);
 
 	return 0;
 }
@@ -1212,9 +1297,9 @@ static int __init ps3fb_init(void)
 
 static void __exit ps3fb_exit(void)
 {
-	DPRINTK(" -> %s:%d\n", __func__, __LINE__);
+	pr_debug(" -> %s:%d\n", __func__, __LINE__);
 	ps3_system_bus_driver_unregister(&ps3fb_driver);
-	DPRINTK(" <- %s:%d\n", __func__, __LINE__);
+	pr_debug(" <- %s:%d\n", __func__, __LINE__);
 }
 
 module_init(ps3fb_init);
diff --git a/drivers/video/pvr2fb.c b/drivers/video/pvr2fb.c
index 06805c9b237..6a3d0b57489 100644
--- a/drivers/video/pvr2fb.c
+++ b/drivers/video/pvr2fb.c
@@ -72,7 +72,7 @@
 #endif
 
 #ifdef CONFIG_SH_STORE_QUEUES
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/cpu/sq.h>
 #endif
 
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index f9b12ab5964..10f912df2da 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -43,7 +43,6 @@
 #include <asm/hardware.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/uaccess.h>
 #include <asm/div64.h>
 #include <asm/arch/pxa-regs.h>
 #include <asm/arch/bitfield.h>
@@ -108,20 +107,38 @@ pxafb_setpalettereg(u_int regno, u_int red, u_int green, u_int blue,
 		       u_int trans, struct fb_info *info)
 {
 	struct pxafb_info *fbi = (struct pxafb_info *)info;
-	u_int val, ret = 1;
+	u_int val;
 
-	if (regno < fbi->palette_size) {
-		if (fbi->fb.var.grayscale) {
-			val = ((blue >> 8) & 0x00ff);
-		} else {
-			val  = ((red   >>  0) & 0xf800);
-			val |= ((green >>  5) & 0x07e0);
-			val |= ((blue  >> 11) & 0x001f);
-		}
+	if (regno >= fbi->palette_size)
+		return 1;
+
+	if (fbi->fb.var.grayscale) {
+		fbi->palette_cpu[regno] = ((blue >> 8) & 0x00ff);
+		return 0;
+	}
+
+	switch (fbi->lccr4 & LCCR4_PAL_FOR_MASK) {
+	case LCCR4_PAL_FOR_0:
+		val  = ((red   >>  0) & 0xf800);
+		val |= ((green >>  5) & 0x07e0);
+		val |= ((blue  >> 11) & 0x001f);
 		fbi->palette_cpu[regno] = val;
-		ret = 0;
+		break;
+	case LCCR4_PAL_FOR_1:
+		val  = ((red   << 8) & 0x00f80000);
+		val |= ((green >> 0) & 0x0000fc00);
+		val |= ((blue  >> 8) & 0x000000f8);
+		((u32*)(fbi->palette_cpu))[regno] = val;
+		break;
+	case LCCR4_PAL_FOR_2:
+		val  = ((red   << 8) & 0x00fc0000);
+		val |= ((green >> 0) & 0x0000fc00);
+		val |= ((blue  >> 8) & 0x000000fc);
+		((u32*)(fbi->palette_cpu))[regno] = val;
+		break;
 	}
-	return ret;
+
+	return 0;
 }
 
 static int
@@ -363,7 +380,10 @@ static int pxafb_set_par(struct fb_info *info)
 	else
 		fbi->palette_size = var->bits_per_pixel == 1 ? 4 : 1 << var->bits_per_pixel;
 
-	palette_mem_size = fbi->palette_size * sizeof(u16);
+	if ((fbi->lccr4 & LCCR4_PAL_FOR_MASK) == LCCR4_PAL_FOR_0)
+		palette_mem_size = fbi->palette_size * sizeof(u16);
+	else
+		palette_mem_size = fbi->palette_size * sizeof(u32);
 
 	pr_debug("pxafb: palette_mem_size = 0x%08lx\n", palette_mem_size);
 
@@ -680,7 +700,13 @@ static int pxafb_activate_var(struct fb_var_screeninfo *var, struct pxafb_info *
 
 	fbi->dmadesc_palette_cpu->fsadr = fbi->palette_dma;
 	fbi->dmadesc_palette_cpu->fidr  = 0;
-	fbi->dmadesc_palette_cpu->ldcmd = (fbi->palette_size * 2) | LDCMD_PAL;
+	if ((fbi->lccr4 & LCCR4_PAL_FOR_MASK) == LCCR4_PAL_FOR_0)
+		fbi->dmadesc_palette_cpu->ldcmd = fbi->palette_size *
+							sizeof(u16);
+	else
+		fbi->dmadesc_palette_cpu->ldcmd = fbi->palette_size *
+							sizeof(u32);
+	fbi->dmadesc_palette_cpu->ldcmd |= LDCMD_PAL;
 
 	if (var->bits_per_pixel == 16) {
 		/* palette shouldn't be loaded in true-color mode */
@@ -719,6 +745,8 @@ static int pxafb_activate_var(struct fb_var_screeninfo *var, struct pxafb_info *
 	fbi->reg_lccr1 = new_regs.lccr1;
 	fbi->reg_lccr2 = new_regs.lccr2;
 	fbi->reg_lccr3 = new_regs.lccr3;
+	fbi->reg_lccr4 = LCCR4 & (~LCCR4_PAL_FOR_MASK);
+	fbi->reg_lccr4 |= (fbi->lccr4 & LCCR4_PAL_FOR_MASK);
 	set_hsync_time(fbi, pcd);
 	local_irq_restore(flags);
 
@@ -825,6 +853,7 @@ static void pxafb_enable_controller(struct pxafb_info *fbi)
 	pr_debug("LCCR1 0x%08x\n", (unsigned int) LCCR1);
 	pr_debug("LCCR2 0x%08x\n", (unsigned int) LCCR2);
 	pr_debug("LCCR3 0x%08x\n", (unsigned int) LCCR3);
+	pr_debug("LCCR4 0x%08x\n", (unsigned int) LCCR4);
 }
 
 static void pxafb_disable_controller(struct pxafb_info *fbi)
@@ -1094,10 +1123,13 @@ static int __init pxafb_map_video_memory(struct pxafb_info *fbi)
 		 * dma_writecombine_mmap)
 		 */
 		fbi->fb.fix.smem_start = fbi->screen_dma;
-
 		fbi->palette_size = fbi->fb.var.bits_per_pixel == 8 ? 256 : 16;
 
-		palette_mem_size = fbi->palette_size * sizeof(u16);
+		if ((fbi->lccr4 & LCCR4_PAL_FOR_MASK) == LCCR4_PAL_FOR_0)
+			palette_mem_size = fbi->palette_size * sizeof(u16);
+		else
+			palette_mem_size = fbi->palette_size * sizeof(u32);
+
 		pr_debug("pxafb: palette_mem_size = 0x%08lx\n", palette_mem_size);
 
 		fbi->palette_cpu = (u16 *)(fbi->map_cpu + PAGE_SIZE - palette_mem_size);
@@ -1160,6 +1192,7 @@ static struct pxafb_info * __init pxafb_init_fbinfo(struct device *dev)
 
 	fbi->lccr0			= inf->lccr0;
 	fbi->lccr3			= inf->lccr3;
+	fbi->lccr4			= inf->lccr4;
 	fbi->state			= C_STARTUP;
 	fbi->task_state			= (u_char)-1;
 
diff --git a/drivers/video/pxafb.h b/drivers/video/pxafb.h
index f8605b807b0..d920b8a14c3 100644
--- a/drivers/video/pxafb.h
+++ b/drivers/video/pxafb.h
@@ -71,6 +71,7 @@ struct pxafb_info {
 
 	u_int			lccr0;
 	u_int			lccr3;
+	u_int			lccr4;
 	u_int			cmap_inverse:1,
 				cmap_static:1,
 				unused:30;
@@ -79,6 +80,7 @@ struct pxafb_info {
 	u_int			reg_lccr1;
 	u_int			reg_lccr2;
 	u_int			reg_lccr3;
+	u_int			reg_lccr4;
 
 	unsigned long	hsync_time;
 
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 8a4c6470d79..ae08d458709 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -20,7 +20,7 @@
  *
  * 2004-12-04: Arnaud Patard <arnaud.patard@rtp-net.org>
  *      - Added the possibility to set on or off the
- *      debugging mesaages
+ *      debugging messages
  *      - Replaced 0 and 1 by on or off when reading the
  *      /sys files
  *
@@ -31,8 +31,8 @@
  *	- add pixel clock divisor control
  *
  * 2004-11-11: Arnaud Patard <arnaud.patard@rtp-net.org>
- * 	- Removed the use of currcon as it no more exist
- * 	- Added LCD power sysfs interface
+ *	- Removed the use of currcon as it no more exists
+ *	- Added LCD power sysfs interface
  *
  * 2004-11-03: Ben Dooks <ben-linux@fluff.org>
  *	- minor cleanups
@@ -49,12 +49,12 @@
  *      - Suppress command line options
  *
  * 2004-09-15: Arnaud Patard <arnaud.patard@rtp-net.org>
- * 	- code cleanup
+ *	- code cleanup
  *
  * 2004-09-07: Arnaud Patard <arnaud.patard@rtp-net.org>
- * 	- Renamed from h1940fb.c to s3c2410fb.c
- * 	- Add support for different devices
- * 	- Backlight support
+ *	- Renamed from h1940fb.c to s3c2410fb.c
+ *	- Add support for different devices
+ *	- Backlight support
  *
  * 2004-09-05: Herbert P�tzl <herbert@13thfloor.at>
  *	- added clock (de-)allocation code
@@ -82,13 +82,10 @@
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
-#include <linux/workqueue.h>
-#include <linux/wait.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 
 #include <asm/io.h>
-#include <asm/uaccess.h>
 #include <asm/div64.h>
 
 #include <asm/mach/map.h>
@@ -102,14 +99,11 @@
 
 #include "s3c2410fb.h"
 
-
-static struct s3c2410fb_mach_info *mach_info;
-
 /* Debugging stuff */
 #ifdef CONFIG_FB_S3C2410_DEBUG
-static int debug	   = 1;
+static int debug	= 1;
 #else
-static int debug	   = 0;
+static int debug	= 0;
 #endif
 
 #define dprintk(msg...)	if (debug) { printk(KERN_DEBUG "s3c2410fb: " msg); }
@@ -119,48 +113,48 @@ static int debug	   = 0;
 /* s3c2410fb_set_lcdaddr
  *
  * initialise lcd controller address pointers
-*/
-
-static void s3c2410fb_set_lcdaddr(struct s3c2410fb_info *fbi)
+ */
+static void s3c2410fb_set_lcdaddr(struct fb_info *info)
 {
-	struct fb_var_screeninfo *var = &fbi->fb->var;
 	unsigned long saddr1, saddr2, saddr3;
+	struct s3c2410fb_info *fbi = info->par;
+	void __iomem *regs = fbi->io;
 
-	saddr1  = fbi->fb->fix.smem_start >> 1;
-	saddr2  = fbi->fb->fix.smem_start;
-	saddr2 += (var->xres * var->yres * var->bits_per_pixel)/8;
-	saddr2>>= 1;
+	saddr1  = info->fix.smem_start >> 1;
+	saddr2  = info->fix.smem_start;
+	saddr2 += info->fix.line_length * info->var.yres;
+	saddr2 >>= 1;
 
-	saddr3 =  S3C2410_OFFSIZE(0) | S3C2410_PAGEWIDTH((var->xres * var->bits_per_pixel / 16) & 0x3ff);
+	saddr3 = S3C2410_OFFSIZE(0) |
+		 S3C2410_PAGEWIDTH((info->fix.line_length / 2) & 0x3ff);
 
 	dprintk("LCDSADDR1 = 0x%08lx\n", saddr1);
 	dprintk("LCDSADDR2 = 0x%08lx\n", saddr2);
 	dprintk("LCDSADDR3 = 0x%08lx\n", saddr3);
 
-	writel(saddr1, S3C2410_LCDSADDR1);
-	writel(saddr2, S3C2410_LCDSADDR2);
-	writel(saddr3, S3C2410_LCDSADDR3);
+	writel(saddr1, regs + S3C2410_LCDSADDR1);
+	writel(saddr2, regs + S3C2410_LCDSADDR2);
+	writel(saddr3, regs + S3C2410_LCDSADDR3);
 }
 
 /* s3c2410fb_calc_pixclk()
  *
  * calculate divisor for clk->pixclk
-*/
-
+ */
 static unsigned int s3c2410fb_calc_pixclk(struct s3c2410fb_info *fbi,
 					  unsigned long pixclk)
 {
 	unsigned long clk = clk_get_rate(fbi->clk);
 	unsigned long long div;
 
-	/* pixclk is in picoseoncds, our clock is in Hz
+	/* pixclk is in picoseconds, our clock is in Hz
 	 *
 	 * Hz -> picoseconds is / 10^-12
 	 */
 
 	div = (unsigned long long)clk * pixclk;
-	do_div(div,1000000UL);
-	do_div(div,1000000UL);
+	div >>= 12;			/* div / 2^12 */
+	do_div(div, 625 * 625UL * 625); /* div / 5^12 */
 
 	dprintk("pixclk %ld, divisor is %ld\n", pixclk, (long)div);
 	return div;
@@ -176,246 +170,278 @@ static int s3c2410fb_check_var(struct fb_var_screeninfo *var,
 			       struct fb_info *info)
 {
 	struct s3c2410fb_info *fbi = info->par;
+	struct s3c2410fb_mach_info *mach_info = fbi->dev->platform_data;
+	struct s3c2410fb_display *display = NULL;
+	struct s3c2410fb_display *default_display = mach_info->displays +
+						    mach_info->default_display;
+	int type = default_display->type;
+	unsigned i;
 
 	dprintk("check_var(var=%p, info=%p)\n", var, info);
 
 	/* validate x/y resolution */
+	/* choose default mode if possible */
+	if (var->yres == default_display->yres &&
+	    var->xres == default_display->xres &&
+	    var->bits_per_pixel == default_display->bpp)
+		display = default_display;
+	else
+		for (i = 0; i < mach_info->num_displays; i++)
+			if (type == mach_info->displays[i].type &&
+			    var->yres == mach_info->displays[i].yres &&
+			    var->xres == mach_info->displays[i].xres &&
+			    var->bits_per_pixel == mach_info->displays[i].bpp) {
+				display = mach_info->displays + i;
+				break;
+			}
 
-	if (var->yres > fbi->mach_info->yres.max)
-		var->yres = fbi->mach_info->yres.max;
-	else if (var->yres < fbi->mach_info->yres.min)
-		var->yres = fbi->mach_info->yres.min;
-
-	if (var->xres > fbi->mach_info->xres.max)
-		var->yres = fbi->mach_info->xres.max;
-	else if (var->xres < fbi->mach_info->xres.min)
-		var->xres = fbi->mach_info->xres.min;
-
-	/* validate bpp */
-
-	if (var->bits_per_pixel > fbi->mach_info->bpp.max)
-		var->bits_per_pixel = fbi->mach_info->bpp.max;
-	else if (var->bits_per_pixel < fbi->mach_info->bpp.min)
-		var->bits_per_pixel = fbi->mach_info->bpp.min;
+	if (!display) {
+		dprintk("wrong resolution or depth %dx%d at %d bpp\n",
+			var->xres, var->yres, var->bits_per_pixel);
+		return -EINVAL;
+	}
 
+	/* it is always the size as the display */
+	var->xres_virtual = display->xres;
+	var->yres_virtual = display->yres;
+	var->height = display->height;
+	var->width = display->width;
+
+	/* copy lcd settings */
+	var->pixclock = display->pixclock;
+	var->left_margin = display->left_margin;
+	var->right_margin = display->right_margin;
+	var->upper_margin = display->upper_margin;
+	var->lower_margin = display->lower_margin;
+	var->vsync_len = display->vsync_len;
+	var->hsync_len = display->hsync_len;
+
+	fbi->regs.lcdcon5 = display->lcdcon5;
+	/* set display type */
+	fbi->regs.lcdcon1 = display->type;
+
+	var->transp.offset = 0;
+	var->transp.length = 0;
 	/* set r/g/b positions */
 	switch (var->bits_per_pixel) {
-		case 1:
-		case 2:
-		case 4:
-			var->red.offset    	= 0;
-			var->red.length    	= var->bits_per_pixel;
-			var->green         	= var->red;
-			var->blue          	= var->red;
-			var->transp.offset 	= 0;
-			var->transp.length 	= 0;
-			break;
-		case 8:
-			if ( fbi->mach_info->type != S3C2410_LCDCON1_TFT ) {
-				/* 8 bpp 332 */
-				var->red.length		= 3;
-				var->red.offset		= 5;
-				var->green.length	= 3;
-				var->green.offset	= 2;
-				var->blue.length	= 2;
-				var->blue.offset	= 0;
-				var->transp.length	= 0;
-			} else {
-				var->red.offset    	= 0;
-				var->red.length    	= var->bits_per_pixel;
-				var->green         	= var->red;
-				var->blue          	= var->red;
-				var->transp.offset 	= 0;
-				var->transp.length 	= 0;
-			}
-			break;
-		case 12:
-			/* 12 bpp 444 */
-			var->red.length		= 4;
-			var->red.offset		= 8;
-			var->green.length	= 4;
-			var->green.offset	= 4;
-			var->blue.length	= 4;
+	case 1:
+	case 2:
+	case 4:
+		var->red.offset	= 0;
+		var->red.length	= var->bits_per_pixel;
+		var->green	= var->red;
+		var->blue	= var->red;
+		break;
+	case 8:
+		if (display->type != S3C2410_LCDCON1_TFT) {
+			/* 8 bpp 332 */
+			var->red.length		= 3;
+			var->red.offset		= 5;
+			var->green.length	= 3;
+			var->green.offset	= 2;
+			var->blue.length	= 2;
 			var->blue.offset	= 0;
-			var->transp.length	= 0;
-			break;
-
-		default:
-		case 16:
-			if (fbi->regs.lcdcon5 & S3C2410_LCDCON5_FRM565 ) {
-				/* 16 bpp, 565 format */
-				var->red.offset		= 11;
-				var->green.offset	= 5;
-				var->blue.offset	= 0;
-				var->red.length		= 5;
-				var->green.length	= 6;
-				var->blue.length	= 5;
-				var->transp.length	= 0;
-			} else {
-				/* 16 bpp, 5551 format */
-				var->red.offset		= 11;
-				var->green.offset	= 6;
-				var->blue.offset	= 1;
-				var->red.length		= 5;
-				var->green.length	= 5;
-				var->blue.length	= 5;
-				var->transp.length	= 0;
-			}
-			break;
-		case 24:
-			/* 24 bpp 888 */
+		} else {
+			var->red.offset		= 0;
 			var->red.length		= 8;
-			var->red.offset		= 16;
-			var->green.length	= 8;
-			var->green.offset	= 8;
-			var->blue.length	= 8;
-			var->blue.offset	= 0;
-			var->transp.length	= 0;
-			break;
-
+			var->green		= var->red;
+			var->blue		= var->red;
+		}
+		break;
+	case 12:
+		/* 12 bpp 444 */
+		var->red.length		= 4;
+		var->red.offset		= 8;
+		var->green.length	= 4;
+		var->green.offset	= 4;
+		var->blue.length	= 4;
+		var->blue.offset	= 0;
+		break;
 
+	default:
+	case 16:
+		if (display->lcdcon5 & S3C2410_LCDCON5_FRM565) {
+			/* 16 bpp, 565 format */
+			var->red.offset		= 11;
+			var->green.offset	= 5;
+			var->blue.offset	= 0;
+			var->red.length		= 5;
+			var->green.length	= 6;
+			var->blue.length	= 5;
+		} else {
+			/* 16 bpp, 5551 format */
+			var->red.offset		= 11;
+			var->green.offset	= 6;
+			var->blue.offset	= 1;
+			var->red.length		= 5;
+			var->green.length	= 5;
+			var->blue.length	= 5;
+		}
+		break;
+	case 32:
+		/* 24 bpp 888 and 8 dummy */
+		var->red.length		= 8;
+		var->red.offset		= 16;
+		var->green.length	= 8;
+		var->green.offset	= 8;
+		var->blue.length	= 8;
+		var->blue.offset	= 0;
+		break;
 	}
 	return 0;
 }
 
-
-/* s3c2410fb_activate_var
+/* s3c2410fb_calculate_stn_lcd_regs
  *
- * activate (set) the controller from the given framebuffer
- * information
-*/
-
-static void s3c2410fb_activate_var(struct s3c2410fb_info *fbi,
-				   struct fb_var_screeninfo *var)
+ * calculate register values from var settings
+ */
+static void s3c2410fb_calculate_stn_lcd_regs(const struct fb_info *info,
+					     struct s3c2410fb_hw *regs)
 {
-	int hs;
+	const struct s3c2410fb_info *fbi = info->par;
+	const struct fb_var_screeninfo *var = &info->var;
+	int type = regs->lcdcon1 & ~S3C2410_LCDCON1_TFT;
+	int hs = var->xres >> 2;
+	unsigned wdly = (var->left_margin >> 4) - 1;
+	unsigned wlh = (var->hsync_len >> 4) - 1;
 
-	fbi->regs.lcdcon1 &= ~S3C2410_LCDCON1_MODEMASK;
-	fbi->regs.lcdcon1 &= ~S3C2410_LCDCON1_TFT;
+	if (type != S3C2410_LCDCON1_STN4)
+		hs >>= 1;
 
-	dprintk("%s: var->xres  = %d\n", __FUNCTION__, var->xres);
-	dprintk("%s: var->yres  = %d\n", __FUNCTION__, var->yres);
-	dprintk("%s: var->bpp   = %d\n", __FUNCTION__, var->bits_per_pixel);
+	switch (var->bits_per_pixel) {
+	case 1:
+		regs->lcdcon1 |= S3C2410_LCDCON1_STN1BPP;
+		break;
+	case 2:
+		regs->lcdcon1 |= S3C2410_LCDCON1_STN2GREY;
+		break;
+	case 4:
+		regs->lcdcon1 |= S3C2410_LCDCON1_STN4GREY;
+		break;
+	case 8:
+		regs->lcdcon1 |= S3C2410_LCDCON1_STN8BPP;
+		hs *= 3;
+		break;
+	case 12:
+		regs->lcdcon1 |= S3C2410_LCDCON1_STN12BPP;
+		hs *= 3;
+		break;
 
-	fbi->regs.lcdcon1 |= fbi->mach_info->type;
-
-	if (fbi->mach_info->type == S3C2410_LCDCON1_TFT)
-		switch (var->bits_per_pixel) {
-		case 1:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_TFT1BPP;
-			break;
-		case 2:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_TFT2BPP;
-			break;
-		case 4:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_TFT4BPP;
-			break;
-		case 8:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_TFT8BPP;
-			break;
-		case 16:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_TFT16BPP;
-			break;
-
-		default:
-			/* invalid pixel depth */
-			dev_err(fbi->dev, "invalid bpp %d\n", var->bits_per_pixel);
-		}
-	else
-		switch (var->bits_per_pixel) {
-		case 1:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_STN1BPP;
-			break;
-		case 2:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_STN2GREY;
-			break;
-		case 4:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_STN4GREY;
-			break;
-		case 8:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_STN8BPP;
-			break;
-		case 12:
-			fbi->regs.lcdcon1 |= S3C2410_LCDCON1_STN12BPP;
-			break;
-
-		default:
-			/* invalid pixel depth */
-			dev_err(fbi->dev, "invalid bpp %d\n", var->bits_per_pixel);
-		}
+	default:
+		/* invalid pixel depth */
+		dev_err(fbi->dev, "invalid bpp %d\n",
+			var->bits_per_pixel);
+	}
+	/* update X/Y info */
+	dprintk("setting horz: lft=%d, rt=%d, sync=%d\n",
+		var->left_margin, var->right_margin, var->hsync_len);
 
-	/* check to see if we need to update sync/borders */
+	regs->lcdcon2 = S3C2410_LCDCON2_LINEVAL(var->yres - 1);
 
-	if (!fbi->mach_info->fixed_syncs) {
-		dprintk("setting vert: up=%d, low=%d, sync=%d\n",
-			var->upper_margin, var->lower_margin,
-			var->vsync_len);
+	if (wdly > 3)
+		wdly = 3;
 
-		dprintk("setting horz: lft=%d, rt=%d, sync=%d\n",
-			var->left_margin, var->right_margin,
-			var->hsync_len);
+	if (wlh > 3)
+		wlh = 3;
 
-		fbi->regs.lcdcon2 =
-			S3C2410_LCDCON2_VBPD(var->upper_margin - 1) |
-			S3C2410_LCDCON2_VFPD(var->lower_margin - 1) |
-			S3C2410_LCDCON2_VSPW(var->vsync_len - 1);
+	regs->lcdcon3 =	S3C2410_LCDCON3_WDLY(wdly) |
+			S3C2410_LCDCON3_LINEBLANK(var->right_margin / 8) |
+			S3C2410_LCDCON3_HOZVAL(hs - 1);
 
-		fbi->regs.lcdcon3 =
-			S3C2410_LCDCON3_HBPD(var->right_margin - 1) |
-			S3C2410_LCDCON3_HFPD(var->left_margin - 1);
+	regs->lcdcon4 = S3C2410_LCDCON4_WLH(wlh);
+}
 
-		fbi->regs.lcdcon4 &= ~S3C2410_LCDCON4_HSPW(0xff);
-		fbi->regs.lcdcon4 |=  S3C2410_LCDCON4_HSPW(var->hsync_len - 1);
-	}
+/* s3c2410fb_calculate_tft_lcd_regs
+ *
+ * calculate register values from var settings
+ */
+static void s3c2410fb_calculate_tft_lcd_regs(const struct fb_info *info,
+					     struct s3c2410fb_hw *regs)
+{
+	const struct s3c2410fb_info *fbi = info->par;
+	const struct fb_var_screeninfo *var = &info->var;
 
+	switch (var->bits_per_pixel) {
+	case 1:
+		regs->lcdcon1 |= S3C2410_LCDCON1_TFT1BPP;
+		break;
+	case 2:
+		regs->lcdcon1 |= S3C2410_LCDCON1_TFT2BPP;
+		break;
+	case 4:
+		regs->lcdcon1 |= S3C2410_LCDCON1_TFT4BPP;
+		break;
+	case 8:
+		regs->lcdcon1 |= S3C2410_LCDCON1_TFT8BPP;
+		regs->lcdcon5 |= S3C2410_LCDCON5_BSWP |
+				 S3C2410_LCDCON5_FRM565;
+		regs->lcdcon5 &= ~S3C2410_LCDCON5_HWSWP;
+		break;
+	case 16:
+		regs->lcdcon1 |= S3C2410_LCDCON1_TFT16BPP;
+		regs->lcdcon5 &= ~S3C2410_LCDCON5_BSWP;
+		regs->lcdcon5 |= S3C2410_LCDCON5_HWSWP;
+		break;
+	case 32:
+		regs->lcdcon1 |= S3C2410_LCDCON1_TFT24BPP;
+		regs->lcdcon5 &= ~(S3C2410_LCDCON5_BSWP |
+				   S3C2410_LCDCON5_HWSWP |
+				   S3C2410_LCDCON5_BPP24BL);
+		break;
+	default:
+		/* invalid pixel depth */
+		dev_err(fbi->dev, "invalid bpp %d\n",
+			var->bits_per_pixel);
+	}
 	/* update X/Y info */
+	dprintk("setting vert: up=%d, low=%d, sync=%d\n",
+		var->upper_margin, var->lower_margin, var->vsync_len);
 
-	fbi->regs.lcdcon2 &= ~S3C2410_LCDCON2_LINEVAL(0x3ff);
-	fbi->regs.lcdcon2 |=  S3C2410_LCDCON2_LINEVAL(var->yres - 1);
-
-	switch(fbi->mach_info->type) {
-		case S3C2410_LCDCON1_DSCAN4:
-		case S3C2410_LCDCON1_STN8:
-			hs = var->xres / 8;
-			break;
-		case S3C2410_LCDCON1_STN4:
-			hs = var->xres / 4;
-			break;
-		default:
-		case S3C2410_LCDCON1_TFT:
-			hs = var->xres;
-			break;
-
-	}
+	dprintk("setting horz: lft=%d, rt=%d, sync=%d\n",
+		var->left_margin, var->right_margin, var->hsync_len);
 
-	/* Special cases : STN color displays */
-	if ( ((fbi->regs.lcdcon1 & S3C2410_LCDCON1_MODEMASK) == S3C2410_LCDCON1_STN8BPP) \
-	  || ((fbi->regs.lcdcon1 & S3C2410_LCDCON1_MODEMASK) == S3C2410_LCDCON1_STN12BPP) ) {
-		hs = hs * 3;
-	}
+	regs->lcdcon2 = S3C2410_LCDCON2_LINEVAL(var->yres - 1) |
+			S3C2410_LCDCON2_VBPD(var->upper_margin - 1) |
+			S3C2410_LCDCON2_VFPD(var->lower_margin - 1) |
+			S3C2410_LCDCON2_VSPW(var->vsync_len - 1);
 
+	regs->lcdcon3 = S3C2410_LCDCON3_HBPD(var->right_margin - 1) |
+			S3C2410_LCDCON3_HFPD(var->left_margin - 1) |
+			S3C2410_LCDCON3_HOZVAL(var->xres - 1);
 
-	fbi->regs.lcdcon3 &= ~S3C2410_LCDCON3_HOZVAL(0x7ff);
-	fbi->regs.lcdcon3 |=  S3C2410_LCDCON3_HOZVAL(hs - 1);
+	regs->lcdcon4 = S3C2410_LCDCON4_HSPW(var->hsync_len - 1);
+}
 
-	if (var->pixclock > 0) {
-		int clkdiv = s3c2410fb_calc_pixclk(fbi, var->pixclock);
+/* s3c2410fb_activate_var
+ *
+ * activate (set) the controller from the given framebuffer
+ * information
+ */
+static void s3c2410fb_activate_var(struct fb_info *info)
+{
+	struct s3c2410fb_info *fbi = info->par;
+	void __iomem *regs = fbi->io;
+	int type = fbi->regs.lcdcon1 & S3C2410_LCDCON1_TFT;
+	struct fb_var_screeninfo *var = &info->var;
+	int clkdiv = s3c2410fb_calc_pixclk(fbi, var->pixclock) / 2;
 
-		if (fbi->mach_info->type == S3C2410_LCDCON1_TFT) {
-			clkdiv = (clkdiv / 2) -1;
-			if (clkdiv < 0)
-				clkdiv = 0;
-		}
-		else {
-			clkdiv = (clkdiv / 2);
-			if (clkdiv < 2)
-				clkdiv = 2;
-		}
+	dprintk("%s: var->xres  = %d\n", __FUNCTION__, var->xres);
+	dprintk("%s: var->yres  = %d\n", __FUNCTION__, var->yres);
+	dprintk("%s: var->bpp   = %d\n", __FUNCTION__, var->bits_per_pixel);
 
-		fbi->regs.lcdcon1 &= ~S3C2410_LCDCON1_CLKVAL(0x3ff);
-		fbi->regs.lcdcon1 |=  S3C2410_LCDCON1_CLKVAL(clkdiv);
+	if (type == S3C2410_LCDCON1_TFT) {
+		s3c2410fb_calculate_tft_lcd_regs(info, &fbi->regs);
+		--clkdiv;
+		if (clkdiv < 0)
+			clkdiv = 0;
+	} else {
+		s3c2410fb_calculate_stn_lcd_regs(info, &fbi->regs);
+		if (clkdiv < 2)
+			clkdiv = 2;
 	}
 
+	fbi->regs.lcdcon1 |=  S3C2410_LCDCON1_CLKVAL(clkdiv);
+
 	/* write new registers */
 
 	dprintk("new register set:\n");
@@ -425,47 +451,48 @@ static void s3c2410fb_activate_var(struct s3c2410fb_info *fbi,
 	dprintk("lcdcon[4] = 0x%08lx\n", fbi->regs.lcdcon4);
 	dprintk("lcdcon[5] = 0x%08lx\n", fbi->regs.lcdcon5);
 
-	writel(fbi->regs.lcdcon1 & ~S3C2410_LCDCON1_ENVID, S3C2410_LCDCON1);
-	writel(fbi->regs.lcdcon2, S3C2410_LCDCON2);
-	writel(fbi->regs.lcdcon3, S3C2410_LCDCON3);
-	writel(fbi->regs.lcdcon4, S3C2410_LCDCON4);
-	writel(fbi->regs.lcdcon5, S3C2410_LCDCON5);
+	writel(fbi->regs.lcdcon1 & ~S3C2410_LCDCON1_ENVID,
+		regs + S3C2410_LCDCON1);
+	writel(fbi->regs.lcdcon2, regs + S3C2410_LCDCON2);
+	writel(fbi->regs.lcdcon3, regs + S3C2410_LCDCON3);
+	writel(fbi->regs.lcdcon4, regs + S3C2410_LCDCON4);
+	writel(fbi->regs.lcdcon5, regs + S3C2410_LCDCON5);
 
 	/* set lcd address pointers */
-	s3c2410fb_set_lcdaddr(fbi);
+	s3c2410fb_set_lcdaddr(info);
 
-	writel(fbi->regs.lcdcon1, S3C2410_LCDCON1);
+	fbi->regs.lcdcon1 |= S3C2410_LCDCON1_ENVID,
+	writel(fbi->regs.lcdcon1, regs + S3C2410_LCDCON1);
 }
 
-
 /*
- *      s3c2410fb_set_par - Optional function. Alters the hardware state.
+ *      s3c2410fb_set_par - Alters the hardware state.
  *      @info: frame buffer structure that represents a single frame buffer
  *
  */
 static int s3c2410fb_set_par(struct fb_info *info)
 {
-	struct s3c2410fb_info *fbi = info->par;
 	struct fb_var_screeninfo *var = &info->var;
 
-	switch (var->bits_per_pixel)
-	{
-		case 16:
-			fbi->fb->fix.visual = FB_VISUAL_TRUECOLOR;
-			break;
-		case 1:
-			 fbi->fb->fix.visual = FB_VISUAL_MONO01;
-			 break;
-		default:
-			 fbi->fb->fix.visual = FB_VISUAL_PSEUDOCOLOR;
-			 break;
+	switch (var->bits_per_pixel) {
+	case 32:
+	case 16:
+	case 12:
+		info->fix.visual = FB_VISUAL_TRUECOLOR;
+		break;
+	case 1:
+		info->fix.visual = FB_VISUAL_MONO01;
+		break;
+	default:
+		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+		break;
 	}
 
-	fbi->fb->fix.line_length     = (var->width*var->bits_per_pixel)/8;
+	info->fix.line_length = (var->width * var->bits_per_pixel) / 8;
 
 	/* activate this new configuration */
 
-	s3c2410fb_activate_var(fbi, var);
+	s3c2410fb_activate_var(info);
 	return 0;
 }
 
@@ -493,7 +520,8 @@ static void schedule_palette_update(struct s3c2410fb_info *fbi,
 }
 
 /* from pxafb.c */
-static inline unsigned int chan_to_field(unsigned int chan, struct fb_bitfield *bf)
+static inline unsigned int chan_to_field(unsigned int chan,
+					 struct fb_bitfield *bf)
 {
 	chan &= 0xffff;
 	chan >>= 16 - bf->length;
@@ -505,20 +533,22 @@ static int s3c2410fb_setcolreg(unsigned regno,
 			       unsigned transp, struct fb_info *info)
 {
 	struct s3c2410fb_info *fbi = info->par;
+	void __iomem *regs = fbi->io;
 	unsigned int val;
 
-	/* dprintk("setcol: regno=%d, rgb=%d,%d,%d\n", regno, red, green, blue); */
+	/* dprintk("setcol: regno=%d, rgb=%d,%d,%d\n",
+		   regno, red, green, blue); */
 
-	switch (fbi->fb->fix.visual) {
+	switch (info->fix.visual) {
 	case FB_VISUAL_TRUECOLOR:
-		/* true-colour, use pseuo-palette */
+		/* true-colour, use pseudo-palette */
 
 		if (regno < 16) {
-			u32 *pal = fbi->fb->pseudo_palette;
+			u32 *pal = info->pseudo_palette;
 
-			val  = chan_to_field(red,   &fbi->fb->var.red);
-			val |= chan_to_field(green, &fbi->fb->var.green);
-			val |= chan_to_field(blue,  &fbi->fb->var.blue);
+			val  = chan_to_field(red,   &info->var.red);
+			val |= chan_to_field(green, &info->var.green);
+			val |= chan_to_field(blue,  &info->var.blue);
 
 			pal[regno] = val;
 		}
@@ -528,25 +558,24 @@ static int s3c2410fb_setcolreg(unsigned regno,
 		if (regno < 256) {
 			/* currently assume RGB 5-6-5 mode */
 
-			val  = ((red   >>  0) & 0xf800);
-			val |= ((green >>  5) & 0x07e0);
-			val |= ((blue  >> 11) & 0x001f);
+			val  = (red   >>  0) & 0xf800;
+			val |= (green >>  5) & 0x07e0;
+			val |= (blue  >> 11) & 0x001f;
 
-			writel(val, S3C2410_TFTPAL(regno));
+			writel(val, regs + S3C2410_TFTPAL(regno));
 			schedule_palette_update(fbi, regno, val);
 		}
 
 		break;
 
 	default:
-		return 1;   /* unknown type */
+		return 1;	/* unknown type */
 	}
 
 	return 0;
 }
 
-
-/**
+/*
  *      s3c2410fb_blank
  *	@blank_mode: the blank mode we want.
  *	@info: frame buffer structure that represents a single frame buffer
@@ -564,31 +593,31 @@ static int s3c2410fb_setcolreg(unsigned regno,
  */
 static int s3c2410fb_blank(int blank_mode, struct fb_info *info)
 {
-	dprintk("blank(mode=%d, info=%p)\n", blank_mode, info);
+	struct s3c2410fb_info *fbi = info->par;
+	void __iomem *regs = fbi->io;
 
-	if (mach_info == NULL)
-		return -EINVAL;
+	dprintk("blank(mode=%d, info=%p)\n", blank_mode, info);
 
 	if (blank_mode == FB_BLANK_UNBLANK)
-		writel(0x0, S3C2410_TPAL);
+		writel(0x0, regs + S3C2410_TPAL);
 	else {
 		dprintk("setting TPAL to output 0x000000\n");
-		writel(S3C2410_TPAL_EN, S3C2410_TPAL);
+		writel(S3C2410_TPAL_EN, regs + S3C2410_TPAL);
 	}
 
 	return 0;
 }
 
-static int s3c2410fb_debug_show(struct device *dev, struct device_attribute *attr, char *buf)
+static int s3c2410fb_debug_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	return snprintf(buf, PAGE_SIZE, "%s\n", debug ? "on" : "off");
 }
-static int s3c2410fb_debug_store(struct device *dev, struct device_attribute *attr,
-					   const char *buf, size_t len)
-{
-	if (mach_info == NULL)
-		return -EINVAL;
 
+static int s3c2410fb_debug_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t len)
+{
 	if (len < 1)
 		return -EINVAL;
 
@@ -607,10 +636,7 @@ static int s3c2410fb_debug_store(struct device *dev, struct device_attribute *at
 	return len;
 }
 
-
-static DEVICE_ATTR(debug, 0666,
-		   s3c2410fb_debug_show,
-		   s3c2410fb_debug_store);
+static DEVICE_ATTR(debug, 0666, s3c2410fb_debug_show, s3c2410fb_debug_store);
 
 static struct fb_ops s3c2410fb_ops = {
 	.owner		= THIS_MODULE,
@@ -623,7 +649,6 @@ static struct fb_ops s3c2410fb_ops = {
 	.fb_imageblit	= cfb_imageblit,
 };
 
-
 /*
  * s3c2410fb_map_video_memory():
  *	Allocates the DRAM memory for the frame buffer.  This buffer is
@@ -632,36 +657,38 @@ static struct fb_ops s3c2410fb_ops = {
  *	cache.  Once this area is remapped, all virtual memory
  *	access to the video memory should occur at the new region.
  */
-static int __init s3c2410fb_map_video_memory(struct s3c2410fb_info *fbi)
+static int __init s3c2410fb_map_video_memory(struct fb_info *info)
 {
-	dprintk("map_video_memory(fbi=%p)\n", fbi);
+	struct s3c2410fb_info *fbi = info->par;
+	dma_addr_t map_dma;
+	unsigned map_size = PAGE_ALIGN(info->fix.smem_len);
 
-	fbi->map_size = PAGE_ALIGN(fbi->fb->fix.smem_len + PAGE_SIZE);
-	fbi->map_cpu  = dma_alloc_writecombine(fbi->dev, fbi->map_size,
-					       &fbi->map_dma, GFP_KERNEL);
+	dprintk("map_video_memory(fbi=%p)\n", fbi);
 
-	fbi->map_size = fbi->fb->fix.smem_len;
+	info->screen_base = dma_alloc_writecombine(fbi->dev, map_size,
+						   &map_dma, GFP_KERNEL);
 
-	if (fbi->map_cpu) {
+	if (info->screen_base) {
 		/* prevent initial garbage on screen */
 		dprintk("map_video_memory: clear %p:%08x\n",
-			fbi->map_cpu, fbi->map_size);
-		memset(fbi->map_cpu, 0xf0, fbi->map_size);
+			info->screen_base, map_size);
+		memset(info->screen_base, 0xf0, map_size);
 
-		fbi->screen_dma		= fbi->map_dma;
-		fbi->fb->screen_base	= fbi->map_cpu;
-		fbi->fb->fix.smem_start  = fbi->screen_dma;
+		info->fix.smem_start = map_dma;
 
-		dprintk("map_video_memory: dma=%08x cpu=%p size=%08x\n",
-			fbi->map_dma, fbi->map_cpu, fbi->fb->fix.smem_len);
+		dprintk("map_video_memory: dma=%08lx cpu=%p size=%08x\n",
+			info->fix.smem_start, info->screen_base, map_size);
 	}
 
-	return fbi->map_cpu ? 0 : -ENOMEM;
+	return info->screen_base ? 0 : -ENOMEM;
 }
 
-static inline void s3c2410fb_unmap_video_memory(struct s3c2410fb_info *fbi)
+static inline void s3c2410fb_unmap_video_memory(struct fb_info *info)
 {
-	dma_free_writecombine(fbi->dev,fbi->map_size,fbi->map_cpu, fbi->map_dma);
+	struct s3c2410fb_info *fbi = info->par;
+
+	dma_free_writecombine(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
+			      info->screen_base, info->fix.smem_start);
 }
 
 static inline void modify_gpio(void __iomem *reg,
@@ -673,13 +700,13 @@ static inline void modify_gpio(void __iomem *reg,
 	writel(tmp | set, reg);
 }
 
-
 /*
  * s3c2410fb_init_registers - Initialise all LCD-related registers
  */
-
-static int s3c2410fb_init_registers(struct s3c2410fb_info *fbi)
+static int s3c2410fb_init_registers(struct fb_info *info)
 {
+	struct s3c2410fb_info *fbi = info->par;
+	struct s3c2410fb_mach_info *mach_info = fbi->dev->platform_data;
 	unsigned long flags;
 	void __iomem *regs = fbi->io;
 
@@ -696,14 +723,6 @@ static int s3c2410fb_init_registers(struct s3c2410fb_info *fbi)
 
 	local_irq_restore(flags);
 
-	writel(fbi->regs.lcdcon1, regs + S3C2410_LCDCON1);
-	writel(fbi->regs.lcdcon2, regs + S3C2410_LCDCON2);
-	writel(fbi->regs.lcdcon3, regs + S3C2410_LCDCON3);
-	writel(fbi->regs.lcdcon4, regs + S3C2410_LCDCON4);
-	writel(fbi->regs.lcdcon5, regs + S3C2410_LCDCON5);
-
- 	s3c2410fb_set_lcdaddr(fbi);
-
 	dprintk("LPCSEL    = 0x%08lx\n", mach_info->lpcsel);
 	writel(mach_info->lpcsel, regs + S3C2410_LPCSEL);
 
@@ -712,22 +731,19 @@ static int s3c2410fb_init_registers(struct s3c2410fb_info *fbi)
 	/* ensure temporary palette disabled */
 	writel(0x00, regs + S3C2410_TPAL);
 
-	/* Enable video by setting the ENVID bit to 1 */
-	fbi->regs.lcdcon1 |= S3C2410_LCDCON1_ENVID;
-	writel(fbi->regs.lcdcon1, regs + S3C2410_LCDCON1);
 	return 0;
 }
 
 static void s3c2410fb_write_palette(struct s3c2410fb_info *fbi)
 {
 	unsigned int i;
-	unsigned long ent;
 	void __iomem *regs = fbi->io;
 
 	fbi->palette_ready = 0;
 
 	for (i = 0; i < 256; i++) {
-		if ((ent = fbi->palette_buffer[i]) == PALETTE_BUFF_CLEAR)
+		unsigned long ent = fbi->palette_buffer[i];
+		if (ent == PALETTE_BUFF_CLEAR)
 			continue;
 
 		writel(ent, regs + S3C2410_TFTPAL(i));
@@ -761,13 +777,14 @@ static irqreturn_t s3c2410fb_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static char driver_name[]="s3c2410fb";
+static char driver_name[] = "s3c2410fb";
 
 static int __init s3c2410fb_probe(struct platform_device *pdev)
 {
 	struct s3c2410fb_info *info;
-	struct fb_info	   *fbinfo;
-	struct s3c2410fb_hw *mregs;
+	struct s3c2410fb_display *display;
+	struct fb_info *fbinfo;
+	struct s3c2410fb_mach_info *mach_info;
 	struct resource *res;
 	int ret;
 	int irq;
@@ -777,11 +794,12 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
 
 	mach_info = pdev->dev.platform_data;
 	if (mach_info == NULL) {
-		dev_err(&pdev->dev,"no platform data for lcd, cannot attach\n");
+		dev_err(&pdev->dev,
+			"no platform data for lcd, cannot attach\n");
 		return -EINVAL;
 	}
 
-	mregs = &mach_info->regs;
+	display = mach_info->displays + mach_info->default_display;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
@@ -790,22 +808,22 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
 	}
 
 	fbinfo = framebuffer_alloc(sizeof(struct s3c2410fb_info), &pdev->dev);
-	if (!fbinfo) {
+	if (!fbinfo)
 		return -ENOMEM;
-	}
+
+	platform_set_drvdata(pdev, fbinfo);
 
 	info = fbinfo->par;
-	info->fb = fbinfo;
 	info->dev = &pdev->dev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res == NULL) {
-		dev_err(&pdev->dev, "failed to get memory registersn");
+		dev_err(&pdev->dev, "failed to get memory registers\n");
 		ret = -ENXIO;
 		goto dealloc_fb;
 	}
 
-	size = (res->end - res->start)+1;
+	size = (res->end - res->start) + 1;
 	info->mem = request_mem_region(res->start, size, pdev->name);
 	if (info->mem == NULL) {
 		dev_err(&pdev->dev, "failed to get memory region\n");
@@ -820,21 +838,14 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
 		goto release_mem;
 	}
 
-	platform_set_drvdata(pdev, fbinfo);
-
 	dprintk("devinit\n");
 
 	strcpy(fbinfo->fix.id, driver_name);
 
-	memcpy(&info->regs, &mach_info->regs, sizeof(info->regs));
-
-	/* Stop the video and unset ENVID if set */
-	info->regs.lcdcon1 &= ~S3C2410_LCDCON1_ENVID;
+	/* Stop the video */
 	lcdcon1 = readl(info->io + S3C2410_LCDCON1);
 	writel(lcdcon1 & ~S3C2410_LCDCON1_ENVID, info->io + S3C2410_LCDCON1);
 
-	info->mach_info		    = pdev->dev.platform_data;
-
 	fbinfo->fix.type	    = FB_TYPE_PACKED_PIXELS;
 	fbinfo->fix.type_aux	    = 0;
 	fbinfo->fix.xpanstep	    = 0;
@@ -844,8 +855,6 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
 
 	fbinfo->var.nonstd	    = 0;
 	fbinfo->var.activate	    = FB_ACTIVATE_NOW;
-	fbinfo->var.height	    = mach_info->height;
-	fbinfo->var.width	    = mach_info->width;
 	fbinfo->var.accel_flags     = 0;
 	fbinfo->var.vmode	    = FB_VMODE_NONINTERLACED;
 
@@ -853,32 +862,6 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
 	fbinfo->flags		    = FBINFO_FLAG_DEFAULT;
 	fbinfo->pseudo_palette      = &info->pseudo_pal;
 
-	fbinfo->var.xres	    = mach_info->xres.defval;
-	fbinfo->var.xres_virtual    = mach_info->xres.defval;
-	fbinfo->var.yres	    = mach_info->yres.defval;
-	fbinfo->var.yres_virtual    = mach_info->yres.defval;
-	fbinfo->var.bits_per_pixel  = mach_info->bpp.defval;
-
-	fbinfo->var.upper_margin    = S3C2410_LCDCON2_GET_VBPD(mregs->lcdcon2) + 1;
-	fbinfo->var.lower_margin    = S3C2410_LCDCON2_GET_VFPD(mregs->lcdcon2) + 1;
-	fbinfo->var.vsync_len	    = S3C2410_LCDCON2_GET_VSPW(mregs->lcdcon2) + 1;
-
-	fbinfo->var.left_margin	    = S3C2410_LCDCON3_GET_HFPD(mregs->lcdcon3) + 1;
-	fbinfo->var.right_margin    = S3C2410_LCDCON3_GET_HBPD(mregs->lcdcon3) + 1;
-	fbinfo->var.hsync_len	    = S3C2410_LCDCON4_GET_HSPW(mregs->lcdcon4) + 1;
-
-	fbinfo->var.red.offset      = 11;
-	fbinfo->var.green.offset    = 5;
-	fbinfo->var.blue.offset     = 0;
-	fbinfo->var.transp.offset   = 0;
-	fbinfo->var.red.length      = 5;
-	fbinfo->var.green.length    = 6;
-	fbinfo->var.blue.length     = 5;
-	fbinfo->var.transp.length   = 0;
-	fbinfo->fix.smem_len        =	mach_info->xres.max *
-					mach_info->yres.max *
-					mach_info->bpp.max / 8;
-
 	for (i = 0; i < 256; i++)
 		info->palette_buffer[i] = PALETTE_BUFF_CLEAR;
 
@@ -901,23 +884,39 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
 
 	msleep(1);
 
+	/* find maximum required memory size for display */
+	for (i = 0; i < mach_info->num_displays; i++) {
+		unsigned long smem_len = mach_info->displays[i].xres;
+
+		smem_len *= mach_info->displays[i].yres;
+		smem_len *= mach_info->displays[i].bpp;
+		smem_len >>= 3;
+		if (fbinfo->fix.smem_len < smem_len)
+			fbinfo->fix.smem_len = smem_len;
+	}
+
 	/* Initialize video memory */
-	ret = s3c2410fb_map_video_memory(info);
+	ret = s3c2410fb_map_video_memory(fbinfo);
 	if (ret) {
-		printk( KERN_ERR "Failed to allocate video RAM: %d\n", ret);
+		printk(KERN_ERR "Failed to allocate video RAM: %d\n", ret);
 		ret = -ENOMEM;
 		goto release_clock;
 	}
 
 	dprintk("got video memory\n");
 
-	ret = s3c2410fb_init_registers(info);
+	fbinfo->var.xres = display->xres;
+	fbinfo->var.yres = display->yres;
+	fbinfo->var.bits_per_pixel = display->bpp;
+
+	s3c2410fb_init_registers(fbinfo);
 
-	ret = s3c2410fb_check_var(&fbinfo->var, fbinfo);
+	s3c2410fb_check_var(&fbinfo->var, fbinfo);
 
 	ret = register_framebuffer(fbinfo);
 	if (ret < 0) {
-		printk(KERN_ERR "Failed to register framebuffer device: %d\n", ret);
+		printk(KERN_ERR "Failed to register framebuffer device: %d\n",
+			ret);
 		goto free_video_memory;
 	}
 
@@ -930,18 +929,19 @@ static int __init s3c2410fb_probe(struct platform_device *pdev)
 	return 0;
 
 free_video_memory:
-	s3c2410fb_unmap_video_memory(info);
+	s3c2410fb_unmap_video_memory(fbinfo);
 release_clock:
 	clk_disable(info->clk);
 	clk_put(info->clk);
 release_irq:
-	free_irq(irq,info);
+	free_irq(irq, info);
 release_regs:
 	iounmap(info->io);
 release_mem:
 	release_resource(info->mem);
 	kfree(info->mem);
 dealloc_fb:
+	platform_set_drvdata(pdev, NULL);
 	framebuffer_release(fbinfo);
 	return ret;
 }
@@ -949,8 +949,7 @@ dealloc_fb:
 /* s3c2410fb_stop_lcd
  *
  * shutdown the lcd controller
-*/
-
+ */
 static void s3c2410fb_stop_lcd(struct s3c2410fb_info *fbi)
 {
 	unsigned long flags;
@@ -968,28 +967,33 @@ static void s3c2410fb_stop_lcd(struct s3c2410fb_info *fbi)
  */
 static int s3c2410fb_remove(struct platform_device *pdev)
 {
-	struct fb_info	   *fbinfo = platform_get_drvdata(pdev);
+	struct fb_info *fbinfo = platform_get_drvdata(pdev);
 	struct s3c2410fb_info *info = fbinfo->par;
 	int irq;
 
+	unregister_framebuffer(fbinfo);
+
 	s3c2410fb_stop_lcd(info);
 	msleep(1);
 
-	s3c2410fb_unmap_video_memory(info);
+	s3c2410fb_unmap_video_memory(fbinfo);
 
- 	if (info->clk) {
- 		clk_disable(info->clk);
- 		clk_put(info->clk);
- 		info->clk = NULL;
+	if (info->clk) {
+		clk_disable(info->clk);
+		clk_put(info->clk);
+		info->clk = NULL;
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	free_irq(irq,info);
+	free_irq(irq, info);
+
+	iounmap(info->io);
 
 	release_resource(info->mem);
 	kfree(info->mem);
-	iounmap(info->io);
-	unregister_framebuffer(fbinfo);
+
+	platform_set_drvdata(pdev, NULL);
+	framebuffer_release(fbinfo);
 
 	return 0;
 }
@@ -997,7 +1001,6 @@ static int s3c2410fb_remove(struct platform_device *pdev)
 #ifdef CONFIG_PM
 
 /* suspend and resume support for the lcd controller */
-
 static int s3c2410fb_suspend(struct platform_device *dev, pm_message_t state)
 {
 	struct fb_info	   *fbinfo = platform_get_drvdata(dev);
@@ -1044,7 +1047,7 @@ static struct platform_driver s3c2410fb_driver = {
 	},
 };
 
-int __devinit s3c2410fb_init(void)
+int __init s3c2410fb_init(void)
 {
 	return platform_driver_register(&s3c2410fb_driver);
 }
@@ -1054,10 +1057,10 @@ static void __exit s3c2410fb_cleanup(void)
 	platform_driver_unregister(&s3c2410fb_driver);
 }
 
-
 module_init(s3c2410fb_init);
 module_exit(s3c2410fb_cleanup);
 
-MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>, Ben Dooks <ben-linux@fluff.org>");
+MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>, "
+	      "Ben Dooks <ben-linux@fluff.org>");
 MODULE_DESCRIPTION("Framebuffer driver for the s3c2410");
 MODULE_LICENSE("GPL");
diff --git a/drivers/video/s3c2410fb.h b/drivers/video/s3c2410fb.h
index 17c7915b7ac..6ce5dc26c5f 100644
--- a/drivers/video/s3c2410fb.h
+++ b/drivers/video/s3c2410fb.h
@@ -16,7 +16,7 @@
  *
  * 2004-09-07: Arnaud Patard <arnaud.patard@rtp-net.org>
  * 	- Renamed from h1940fb.h to s3c2410fb.h
- * 	- Chenged h1940 to s3c2410
+ * 	- Changed h1940 to s3c2410
  *
  * 2004-07-15: Arnaud Patard <arnaud.patard@rtp-net.org>
  *	- First version
@@ -26,25 +26,14 @@
 #define __S3C2410FB_H
 
 struct s3c2410fb_info {
-	struct fb_info		*fb;
 	struct device		*dev;
 	struct clk		*clk;
 
 	struct resource		*mem;
 	void __iomem		*io;
 
-	struct s3c2410fb_mach_info *mach_info;
-
-	/* raw memory addresses */
-	dma_addr_t		map_dma;	/* physical */
-	u_char *		map_cpu;	/* virtual */
-	u_int			map_size;
-
 	struct s3c2410fb_hw	regs;
 
-	/* addresses of pieces placed in raw buffer */
-	u_char *		screen_cpu;	/* virtual address of buffer */
-	dma_addr_t		screen_dma;	/* physical address of buffer */
 	unsigned int		palette_ready;
 
 	/* keep these registers in case we need to re-write palette */
diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c
index d11735895a0..7d53bc23b9c 100644
--- a/drivers/video/s3fb.c
+++ b/drivers/video/s3fb.c
@@ -400,11 +400,17 @@ static int s3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
 	struct s3fb_info *par = info->par;
 	int rv, mem, step;
+	u16 m, n, r;
 
 	/* Find appropriate format */
 	rv = svga_match_format (s3fb_formats, var, NULL);
-	if ((rv < 0) || ((par->chip == CHIP_988_VIRGE_VX) ? (rv == 7) : (rv == 6)))
-	{		/* 24bpp on VIRGE VX, 32bpp on others */
+
+	/* 32bpp mode is not supported on VIRGE VX,
+	   24bpp is not supported on others */
+	if ((par->chip == CHIP_988_VIRGE_VX) ? (rv == 7) : (rv == 6))
+		rv = -EINVAL;
+
+	if (rv < 0) {
 		printk(KERN_ERR "fb%d: unsupported mode requested\n", info->node);
 		return rv;
 	}
@@ -422,20 +428,26 @@ static int s3fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 
 	/* Check whether have enough memory */
 	mem = ((var->bits_per_pixel * var->xres_virtual) >> 3) * var->yres_virtual;
-	if (mem > info->screen_size)
-	{
+	if (mem > info->screen_size) {
 		printk(KERN_ERR "fb%d: not enough framebuffer memory (%d kB requested , %d kB available)\n",
 			info->node, mem >> 10, (unsigned int) (info->screen_size >> 10));
 		return -EINVAL;
 	}
 
 	rv = svga_check_timings (&s3_timing_regs, var, info->node);
-	if (rv < 0)
-	{
+	if (rv < 0) {
 		printk(KERN_ERR "fb%d: invalid timings requested\n", info->node);
 		return rv;
 	}
 
+	rv = svga_compute_pll(&s3_pll, PICOS2KHZ(var->pixclock), &m, &n, &r,
+				info->node);
+	if (rv < 0) {
+		printk(KERN_ERR "fb%d: invalid pixclock value requested\n",
+			info->node);
+		return rv;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index 5d2a4a4b731..ab2b2110478 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -178,7 +178,6 @@
 #include <asm/hardware.h>
 #include <asm/io.h>
 #include <asm/mach-types.h>
-#include <asm/uaccess.h>
 #include <asm/arch/assabet.h>
 #include <asm/arch/shannon.h>
 
diff --git a/drivers/video/savage/savagefb_driver.c b/drivers/video/savage/savagefb_driver.c
index b855f4a34af..37b135d5d12 100644
--- a/drivers/video/savage/savagefb_driver.c
+++ b/drivers/video/savage/savagefb_driver.c
@@ -57,7 +57,6 @@
 #include <asm/irq.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
-#include <asm/uaccess.h>
 
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c
index e8ccace0125..bc7d2368373 100644
--- a/drivers/video/sis/sis_main.c
+++ b/drivers/video/sis/sis_main.c
@@ -58,7 +58,7 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/io.h>
 #ifdef CONFIG_MTRR
 #include <asm/mtrr.h>
diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c
index 64779e70408..62321458f71 100644
--- a/drivers/video/skeletonfb.c
+++ b/drivers/video/skeletonfb.c
@@ -780,7 +780,7 @@ static int __devinit xxxfb_probe(struct pci_dev *dev,
      *
      * NOTE: This field is currently unused.
      */
-    info->pixmap.scan_align = 32;
+    info->pixmap.access_align = 32;
 /***************************** End optional stage ***************************/
 
     /*
diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index c86df126f93..1be95a68d69 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -28,6 +28,7 @@
 #include <linux/wait.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
+#include <linux/console.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
@@ -62,6 +63,8 @@ struct sm501fb_info {
 	struct resource		*regs_res;	/* registers resource */
 	struct sm501_platdata_fb *pdata;	/* our platform data */
 
+	unsigned long		 pm_crt_ctrl;	/* pm: crt ctrl save */
+
 	int			 irq;
 	int			 swap_endian;	/* set to swap rgb=>bgr */
 	void __iomem		*regs;		/* remapped registers */
@@ -774,6 +777,11 @@ static int sm501fb_set_par_pnl(struct fb_info *info)
 	writel(control, fbi->regs + SM501_DC_PANEL_CONTROL);
 	sm501fb_sync_regs(fbi);
 
+	/* ensure the panel interface is not tristated at this point */
+
+	sm501_modify_reg(fbi->dev->parent, SM501_SYSTEM_CONTROL,
+			 0, SM501_SYSCTRL_PANEL_TRISTATE);
+
 	/* power the panel up */
 	sm501fb_panel_power(fbi, 1);
 	return 0;
@@ -1687,19 +1695,25 @@ static int sm501fb_suspend_fb(struct sm501fb_info *info,
 		goto err_nocursor;
 	}
 
+	dev_dbg(info->dev, "suspending screen to %p\n", par->store_fb);
+	dev_dbg(info->dev, "suspending cursor to %p\n", par->store_cursor);
+
 	memcpy_fromio(par->store_fb, par->screen.k_addr, par->screen.size);
 	memcpy_fromio(par->store_cursor, par->cursor.k_addr, par->cursor.size);
-
 	/* blank the relevant interface to ensure unit power minimised */
 	(par->ops.fb_blank)(FB_BLANK_POWERDOWN, fbi);
 
+	acquire_console_sem();
+	fb_set_suspend(fbi, 1);
+	release_console_sem();
+
 	return 0;
 
  err_nocursor:
 	vfree(par->store_fb);
+	par->store_fb = NULL;
 
 	return -ENOMEM;
-
 }
 
 static void sm501fb_resume_fb(struct sm501fb_info *info,
@@ -1717,8 +1731,20 @@ static void sm501fb_resume_fb(struct sm501fb_info *info,
 
 	/* restore the data */
 
-	memcpy_toio(par->screen.k_addr, par->store_fb, par->screen.size);
-	memcpy_toio(par->cursor.k_addr, par->store_cursor, par->cursor.size);
+	dev_dbg(info->dev, "restoring screen from %p\n", par->store_fb);
+	dev_dbg(info->dev, "restoring cursor from %p\n", par->store_cursor);
+
+	if (par->store_fb)
+		memcpy_toio(par->screen.k_addr, par->store_fb,
+			    par->screen.size);
+
+	if (par->store_cursor)
+		memcpy_toio(par->cursor.k_addr, par->store_cursor,
+			    par->cursor.size);
+
+	acquire_console_sem();
+	fb_set_suspend(fbi, 0);
+	release_console_sem();
 
 	vfree(par->store_fb);
 	vfree(par->store_cursor);
@@ -1731,6 +1757,9 @@ static int sm501fb_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct sm501fb_info *info = platform_get_drvdata(pdev);
 
+	/* store crt control to resume with */
+	info->pm_crt_ctrl = readl(info->regs + SM501_DC_CRT_CONTROL);
+
 	sm501fb_suspend_fb(info, HEAD_CRT);
 	sm501fb_suspend_fb(info, HEAD_PANEL);
 
@@ -1740,12 +1769,24 @@ static int sm501fb_suspend(struct platform_device *pdev, pm_message_t state)
 	return 0;
 }
 
+#define SM501_CRT_CTRL_SAVE (SM501_DC_CRT_CONTROL_TVP |        \
+			     SM501_DC_CRT_CONTROL_SEL)
+
+
 static int sm501fb_resume(struct platform_device *pdev)
 {
 	struct sm501fb_info *info = platform_get_drvdata(pdev);
+	unsigned long crt_ctrl;
 
 	sm501_unit_power(info->dev->parent, SM501_GATE_DISPLAY, 1);
 
+	/* restore the items we want to be saved for crt control */
+
+	crt_ctrl = readl(info->regs + SM501_DC_CRT_CONTROL);
+	crt_ctrl &= ~SM501_CRT_CTRL_SAVE;
+	crt_ctrl |= info->pm_crt_ctrl & SM501_CRT_CTRL_SAVE;
+	writel(crt_ctrl, info->regs + SM501_DC_CRT_CONTROL);
+
 	sm501fb_resume_fb(info, HEAD_CRT);
 	sm501fb_resume_fb(info, HEAD_PANEL);
 
diff --git a/drivers/video/sstfb.c b/drivers/video/sstfb.c
index 5eff28ce4f4..97784f9c184 100644
--- a/drivers/video/sstfb.c
+++ b/drivers/video/sstfb.c
@@ -88,7 +88,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <asm/io.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <video/sstfb.h>
 
 
diff --git a/drivers/video/svgalib.c b/drivers/video/svgalib.c
index 25df928d37d..9c710670157 100644
--- a/drivers/video/svgalib.c
+++ b/drivers/video/svgalib.c
@@ -598,9 +598,11 @@ void svga_set_timings(const struct svga_timing_regs *tm, struct fb_var_screeninf
 /* ------------------------------------------------------------------------- */
 
 
-int svga_match_format(const struct svga_fb_format *frm, struct fb_var_screeninfo *var, struct fb_fix_screeninfo *fix)
+static inline int match_format(const struct svga_fb_format *frm,
+			       struct fb_var_screeninfo *var)
 {
 	int i = 0;
+	int stored = -EINVAL;
 
 	while (frm->bits_per_pixel != SVGA_FORMAT_END_VAL)
 	{
@@ -609,25 +611,38 @@ int svga_match_format(const struct svga_fb_format *frm, struct fb_var_screeninfo
 		    (var->green.length   <= frm->green.length)   &&
 		    (var->blue.length    <= frm->blue.length)    &&
 		    (var->transp.length  <= frm->transp.length)  &&
-		    (var->nonstd	 == frm->nonstd)) {
-		    	var->bits_per_pixel = frm->bits_per_pixel;
-			var->red            = frm->red;
-			var->green          = frm->green;
-			var->blue           = frm->blue;
-			var->transp         = frm->transp;
-			var->nonstd         = frm->nonstd;
-			if (fix != NULL) {
-				fix->type      = frm->type;
-				fix->type_aux  = frm->type_aux;
-				fix->visual    = frm->visual;
-				fix->xpanstep  = frm->xpanstep;
-			}
+		    (var->nonstd	 == frm->nonstd))
 			return i;
-		}
+		if (var->bits_per_pixel == frm->bits_per_pixel)
+			stored = i;
 		i++;
 		frm++;
 	}
-	return -EINVAL;
+	return stored;
+}
+
+int svga_match_format(const struct svga_fb_format *frm,
+		      struct fb_var_screeninfo *var,
+		      struct fb_fix_screeninfo *fix)
+{
+	int i = match_format(frm, var);
+
+	if (i >= 0) {
+		var->bits_per_pixel = frm[i].bits_per_pixel;
+		var->red            = frm[i].red;
+		var->green          = frm[i].green;
+		var->blue           = frm[i].blue;
+		var->transp         = frm[i].transp;
+		var->nonstd         = frm[i].nonstd;
+		if (fix != NULL) {
+			fix->type      = frm[i].type;
+			fix->type_aux  = frm[i].type_aux;
+			fix->visual    = frm[i].visual;
+			fix->xpanstep  = frm[i].xpanstep;
+		}
+	}
+
+	return i;
 }
 
 
diff --git a/drivers/video/tdfxfb.c b/drivers/video/tdfxfb.c
index 689ce0270b8..057bdd59380 100644
--- a/drivers/video/tdfxfb.c
+++ b/drivers/video/tdfxfb.c
@@ -4,13 +4,13 @@
  *
  * Author: Hannu Mallat <hmallat@cc.hut.fi>
  *
- * Copyright � 1999 Hannu Mallat
+ * Copyright © 1999 Hannu Mallat
  * All rights reserved
  *
  * Created      : Thu Sep 23 18:17:43 1999, hmallat
  * Last modified: Tue Nov  2 21:19:47 1999, hmallat
  *
- * Lots of the information here comes from the Daryll Strauss' Banshee 
+ * Lots of the information here comes from the Daryll Strauss' Banshee
  * patches to the XF86 server, and the rest comes from the 3dfx
  * Banshee specification. I'm very much indebted to Daryll for his
  * work on the X server.
@@ -23,7 +23,7 @@
  * behave very differently from the Voodoo3/4/5. For anyone wanting to
  * use frame buffer on the Voodoo1/2, see the sstfb driver (which is
  * located at http://www.sourceforge.net/projects/sstfb).
- * 
+ *
  * While I _am_ grateful to 3Dfx for releasing the specs for Banshee,
  * I do wish the next version is a bit more complete. Without the XF86
  * patches I couldn't have gotten even this far... for instance, the
@@ -33,9 +33,8 @@
  *
  * The structure of this driver comes pretty much from the Permedia
  * driver by Ilario Nardinocchi, which in turn is based on skeletonfb.
- * 
+ *
  * TODO:
- * - support for 16/32 bpp needs fixing (funky bootup penguin)
  * - multihead support (basically need to support an array of fb_infos)
  * - support other architectures (PPC, Alpha); does the fact that the VGA
  *   core can be accessed only thru I/O (not memory mapped) complicate
@@ -43,18 +42,18 @@
  *
  * Version history:
  *
- * 0.1.4 (released 2002-05-28) ported over to new fbdev api by James Simmons
+ * 0.1.4 (released 2002-05-28)	ported over to new fbdev api by James Simmons
  *
- * 0.1.3 (released 1999-11-02) added Attila's panning support, code
- *			       reorg, hwcursor address page size alignment
- *                             (for mmaping both frame buffer and regs),
- *                             and my changes to get rid of hardcoded
- *                             VGA i/o register locations (uses PCI
- *                             configuration info now)
- * 0.1.2 (released 1999-10-19) added Attila Kesmarki's bug fixes and
- *                             improvements
- * 0.1.1 (released 1999-10-07) added Voodoo3 support by Harold Oga.
- * 0.1.0 (released 1999-10-06) initial version
+ * 0.1.3 (released 1999-11-02)	added Attila's panning support, code
+ *				reorg, hwcursor address page size alignment
+ *				(for mmaping both frame buffer and regs),
+ *				and my changes to get rid of hardcoded
+ *				VGA i/o register locations (uses PCI
+ *				configuration info now)
+ * 0.1.2 (released 1999-10-19)	added Attila Kesmarki's bug fixes and
+ *				improvements
+ * 0.1.1 (released 1999-10-07)	added Voodoo3 support by Harold Oga.
+ * 0.1.0 (released 1999-10-06)	initial version
  *
  */
 
@@ -64,24 +63,32 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/nvram.h>
 #include <asm/io.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
 
 #include <video/tdfx.h>
 
-#undef TDFXFB_DEBUG 
-#ifdef TDFXFB_DEBUG
-#define DPRINTK(a,b...) printk(KERN_DEBUG "fb: %s: " a, __FUNCTION__ , ## b)
+#define DPRINTK(a, b...) pr_debug("fb: %s: " a, __FUNCTION__ , ## b)
+
+#ifdef CONFIG_MTRR
+#include <asm/mtrr.h>
 #else
-#define DPRINTK(a,b...)
-#endif 
+/* duplicate asm/mtrr.h defines to work on archs without mtrr */
+#define MTRR_TYPE_WRCOMB     1
+
+static inline int mtrr_add(unsigned long base, unsigned long size,
+				unsigned int type, char increment)
+{
+    return -ENODEV;
+}
+static inline int mtrr_del(int reg, unsigned long base,
+				unsigned long size)
+{
+    return -ENODEV;
+}
+#endif
 
 #define BANSHEE_MAX_PIXCLOCK 270000
 #define VOODOO3_MAX_PIXCLOCK 300000
@@ -90,9 +97,9 @@
 static struct fb_fix_screeninfo tdfx_fix __devinitdata = {
 	.id =		"3Dfx",
 	.type =		FB_TYPE_PACKED_PIXELS,
-	.visual =	FB_VISUAL_PSEUDOCOLOR, 
+	.visual =	FB_VISUAL_PSEUDOCOLOR,
 	.ypanstep =	1,
-	.ywrapstep =	1, 
+	.ywrapstep =	1,
 	.accel =	FB_ACCEL_3DFX_BANSHEE
 };
 
@@ -102,7 +109,7 @@ static struct fb_var_screeninfo tdfx_var __devinitdata = {
 	.yres =		480,
 	.xres_virtual =	640,
 	.yres_virtual =	1024,
-	.bits_per_pixel =8,
+	.bits_per_pixel = 8,
 	.red =		{0, 8, 0},
 	.blue =		{0, 8, 0},
 	.green =	{0, 8, 0},
@@ -142,103 +149,79 @@ static struct pci_device_id tdfxfb_id_table[] = {
 
 static struct pci_driver tdfxfb_driver = {
 	.name		= "tdfxfb",
-	.id_table 	= tdfxfb_id_table,
-	.probe 		= tdfxfb_probe,
-	.remove 	= __devexit_p(tdfxfb_remove),
+	.id_table	= tdfxfb_id_table,
+	.probe		= tdfxfb_probe,
+	.remove		= __devexit_p(tdfxfb_remove),
 };
 
 MODULE_DEVICE_TABLE(pci, tdfxfb_id_table);
 
 /*
- *  Frame buffer device API
+ * Driver data
  */
-static int tdfxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *fb); 
-static int tdfxfb_set_par(struct fb_info *info); 
-static int tdfxfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, 
-			    u_int transp, struct fb_info *info); 
-static int tdfxfb_blank(int blank, struct fb_info *info); 
-static int tdfxfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info);
-static int banshee_wait_idle(struct fb_info *info);
-#ifdef CONFIG_FB_3DFX_ACCEL
-static void tdfxfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
-static void tdfxfb_copyarea(struct fb_info *info, const struct fb_copyarea *area);  
-static void tdfxfb_imageblit(struct fb_info *info, const struct fb_image *image); 
-#endif /* CONFIG_FB_3DFX_ACCEL */
-
-static struct fb_ops tdfxfb_ops = {
-	.owner		= THIS_MODULE,
-	.fb_check_var	= tdfxfb_check_var,
-	.fb_set_par	= tdfxfb_set_par,
-	.fb_setcolreg	= tdfxfb_setcolreg,
-	.fb_blank	= tdfxfb_blank,
-	.fb_pan_display	= tdfxfb_pan_display,
-	.fb_sync	= banshee_wait_idle,
-#ifdef CONFIG_FB_3DFX_ACCEL
-	.fb_fillrect	= tdfxfb_fillrect,
-	.fb_copyarea	= tdfxfb_copyarea,
-	.fb_imageblit	= tdfxfb_imageblit,
-#else
-	.fb_fillrect	= cfb_fillrect,
-	.fb_copyarea	= cfb_copyarea,
-	.fb_imageblit	= cfb_imageblit,
-#endif
-};
-
-/*
- * do_xxx: Hardware-specific functions
- */
-static u32 do_calc_pll(int freq, int *freq_out);
-static void  do_write_regs(struct fb_info *info, struct banshee_reg *reg);
-static unsigned long do_lfb_size(struct tdfx_par *par, unsigned short);
-
-/*
- * Driver data 
- */
-static int  nopan   = 0;
-static int  nowrap  = 1;      // not implemented (yet)
-static char *mode_option __devinitdata = NULL;
-
-/* ------------------------------------------------------------------------- 
- *                      Hardware-specific funcions
+static int nopan;
+static int nowrap = 1;      /* not implemented (yet) */
+static int hwcursor = 1;
+static char *mode_option __devinitdata;
+/* mtrr option */
+static int nomtrr __devinitdata;
+
+/* -------------------------------------------------------------------------
+ *			Hardware-specific funcions
  * ------------------------------------------------------------------------- */
 
-#ifdef VGA_REG_IO 
-static inline  u8 vga_inb(struct tdfx_par *par, u32 reg) { return inb(reg); }
-
-static inline void vga_outb(struct tdfx_par *par, u32 reg,  u8 val) { outb(val, reg); }
-#else
-static inline  u8 vga_inb(struct tdfx_par *par, u32 reg) { 
-	return inb(par->iobase + reg - 0x300); 
+static inline u8 vga_inb(struct tdfx_par *par, u32 reg)
+{
+	return inb(par->iobase + reg - 0x300);
 }
-static inline void vga_outb(struct tdfx_par *par, u32 reg,  u8 val) { 
-	outb(val, par->iobase + reg - 0x300); 
+
+static inline void vga_outb(struct tdfx_par *par, u32 reg, u8 val)
+{
+	outb(val, par->iobase + reg - 0x300);
 }
-#endif
 
-static inline void gra_outb(struct tdfx_par *par, u32 idx, u8 val) {
-	vga_outb(par, GRA_I, idx); vga_outb(par, GRA_D, val);
+static inline void gra_outb(struct tdfx_par *par, u32 idx, u8 val)
+{
+	vga_outb(par, GRA_I, idx);
+	wmb();
+	vga_outb(par, GRA_D, val);
+	wmb();
 }
 
-static inline void seq_outb(struct tdfx_par *par, u32 idx, u8 val) {
-	vga_outb(par, SEQ_I, idx); vga_outb(par, SEQ_D, val);
+static inline void seq_outb(struct tdfx_par *par, u32 idx, u8 val)
+{
+	vga_outb(par, SEQ_I, idx);
+	wmb();
+	vga_outb(par, SEQ_D, val);
+	wmb();
 }
 
-static inline u8 seq_inb(struct tdfx_par *par, u32 idx) {
-	vga_outb(par, SEQ_I, idx); return vga_inb(par, SEQ_D);
+static inline u8 seq_inb(struct tdfx_par *par, u32 idx)
+{
+	vga_outb(par, SEQ_I, idx);
+	mb();
+	return vga_inb(par, SEQ_D);
 }
 
-static inline void crt_outb(struct tdfx_par *par, u32 idx, u8 val) {
-	vga_outb(par, CRT_I, idx); vga_outb(par, CRT_D, val);
+static inline void crt_outb(struct tdfx_par *par, u32 idx, u8 val)
+{
+	vga_outb(par, CRT_I, idx);
+	wmb();
+	vga_outb(par, CRT_D, val);
+	wmb();
 }
 
-static inline u8 crt_inb(struct tdfx_par *par, u32 idx) {
-	vga_outb(par, CRT_I, idx); return vga_inb(par, CRT_D);
+static inline u8 crt_inb(struct tdfx_par *par, u32 idx)
+{
+	vga_outb(par, CRT_I, idx);
+	mb();
+	return vga_inb(par, CRT_D);
 }
 
-static inline void att_outb(struct tdfx_par *par, u32 idx, u8 val) 
+static inline void att_outb(struct tdfx_par *par, u32 idx, u8 val)
 {
 	unsigned char tmp;
-	
+
 	tmp = vga_inb(par, IS1_R);
 	vga_outb(par, ATT_IW, idx);
 	vga_outb(par, ATT_IW, val);
@@ -267,10 +250,11 @@ static inline void vga_enable_video(struct tdfx_par *par)
 static inline void vga_enable_palette(struct tdfx_par *par)
 {
 	vga_inb(par, IS1_R);
+	mb();
 	vga_outb(par, ATT_IW, 0x20);
 }
 
-static inline u32 tdfx_inl(struct tdfx_par *par, unsigned int reg) 
+static inline u32 tdfx_inl(struct tdfx_par *par, unsigned int reg)
 {
 	return readl(par->regbase_virt + reg);
 }
@@ -284,9 +268,10 @@ static inline void banshee_make_room(struct tdfx_par *par, int size)
 {
 	/* Note: The Voodoo3's onboard FIFO has 32 slots. This loop
 	 * won't quit if you ask for more. */
-	while((tdfx_inl(par, STATUS) & 0x1f) < size-1);
+	while ((tdfx_inl(par, STATUS) & 0x1f) < size - 1)
+		cpu_relax();
 }
- 
+
 static int banshee_wait_idle(struct fb_info *info)
 {
 	struct tdfx_par *par = info->par;
@@ -295,28 +280,31 @@ static int banshee_wait_idle(struct fb_info *info)
 	banshee_make_room(par, 1);
 	tdfx_outl(par, COMMAND_3D, COMMAND_3D_NOP);
 
-	while(1) {
-		i = (tdfx_inl(par, STATUS) & STATUS_BUSY) ? 0 : i + 1;
-		if(i == 3) break;
-	}
+	do {
+		if ((tdfx_inl(par, STATUS) & STATUS_BUSY) == 0)
+			i++;
+	} while (i < 3);
+
 	return 0;
 }
 
 /*
- * Set the color of a palette entry in 8bpp mode 
+ * Set the color of a palette entry in 8bpp mode
  */
 static inline void do_setpalentry(struct tdfx_par *par, unsigned regno, u32 c)
-{  
+{
 	banshee_make_room(par, 2);
 	tdfx_outl(par, DACADDR, regno);
+	/* read after write makes it working */
+	tdfx_inl(par, DACADDR);
 	tdfx_outl(par, DACDATA, c);
 }
 
-static u32 do_calc_pll(int freq, int* freq_out) 
+static u32 do_calc_pll(int freq, int *freq_out)
 {
 	int m, n, k, best_m, best_n, best_k, best_error;
 	int fref = 14318;
-  
+
 	best_error = freq;
 	best_n = best_m = best_k = 0;
 
@@ -326,27 +314,28 @@ static u32 do_calc_pll(int freq, int* freq_out)
 			 * Estimate value of n that produces target frequency
 			 * with current m and k
 			 */
-			int n_estimated = (freq * (m + 2) * (1 << k) / fref) - 2;
+			int n_estimated = ((freq * (m + 2) << k) / fref) - 2;
 
 			/* Search neighborhood of estimated n */
-			for (n = max(0, n_estimated - 1);
-					n <= min(255, n_estimated + 1); n++) {
+			for (n = max(0, n_estimated);
+				n <= min(255, n_estimated + 1);
+				n++) {
 				/*
 				 * Calculate PLL freqency with current m, k and
 				 * estimated n
 				 */
-				int f = fref * (n + 2) / (m + 2) / (1 << k);
-				int error = abs (f - freq);
+				int f = (fref * (n + 2) / (m + 2)) >> k;
+				int error = abs(f - freq);
 
 				/*
-				 *  If this is the closest we've come to the
+				 * If this is the closest we've come to the
 				 * target frequency then remember n, m and k
 				 */
-				if (error  < best_error) {
+				if (error < best_error) {
 					best_error = error;
-					best_n     = n;
-					best_m     = m;
-					best_k     = k;
+					best_n = n;
+					best_m = m;
+					best_k = k;
 				}
 			}
 		}
@@ -355,12 +344,12 @@ static u32 do_calc_pll(int freq, int* freq_out)
 	n = best_n;
 	m = best_m;
 	k = best_k;
-	*freq_out = fref*(n + 2)/(m + 2)/(1 << k);
+	*freq_out = (fref * (n + 2) / (m + 2)) >> k;
 
 	return (n << 8) | (m << 2) | k;
 }
 
-static void do_write_regs(struct fb_info *info, struct banshee_reg* reg) 
+static void do_write_regs(struct fb_info *info, struct banshee_reg *reg)
 {
 	struct tdfx_par *par = info->par;
 	int i;
@@ -372,13 +361,13 @@ static void do_write_regs(struct fb_info *info, struct banshee_reg* reg)
 	crt_outb(par, 0x11, crt_inb(par, 0x11) & 0x7f); /* CRT unprotect */
 
 	banshee_make_room(par, 3);
-	tdfx_outl(par, VGAINIT1,	reg->vgainit1 &  0x001FFFFF);
-	tdfx_outl(par, VIDPROCCFG,	reg->vidcfg   & ~0x00000001);
+	tdfx_outl(par, VGAINIT1, reg->vgainit1 & 0x001FFFFF);
+	tdfx_outl(par, VIDPROCCFG, reg->vidcfg & ~0x00000001);
 #if 0
 	tdfx_outl(par, PLLCTRL1, reg->mempll);
 	tdfx_outl(par, PLLCTRL2, reg->gfxpll);
 #endif
-	tdfx_outl(par, PLLCTRL0,	reg->vidpll);
+	tdfx_outl(par, PLLCTRL0, reg->vidpll);
 
 	vga_outb(par, MISC_W, reg->misc[0x00] | 0x01);
 
@@ -400,72 +389,65 @@ static void do_write_regs(struct fb_info *info, struct banshee_reg* reg)
 	vga_enable_palette(par);
 	vga_enable_video(par);
 
-	banshee_make_room(par, 11);
-	tdfx_outl(par, 	VGAINIT0,      reg->vgainit0);
-	tdfx_outl(par,	DACMODE,       reg->dacmode);
-	tdfx_outl(par,	VIDDESKSTRIDE, reg->stride);
-	tdfx_outl(par,	HWCURPATADDR,  0);
-   
-	tdfx_outl(par,	VIDSCREENSIZE,reg->screensize);
-	tdfx_outl(par,	VIDDESKSTART,	reg->startaddr);
-	tdfx_outl(par,	VIDPROCCFG,	reg->vidcfg);
-	tdfx_outl(par,	VGAINIT1,	reg->vgainit1);  
-	tdfx_outl(par,	MISCINIT0,	reg->miscinit0);	
-
-	banshee_make_room(par,	8);
-	tdfx_outl(par,	SRCBASE,         reg->srcbase);
-	tdfx_outl(par,	DSTBASE,         reg->dstbase);
-	tdfx_outl(par,	COMMANDEXTRA_2D, 0);
-	tdfx_outl(par,	CLIP0MIN,        0);
-	tdfx_outl(par,	CLIP0MAX,        0x0fff0fff);
-	tdfx_outl(par,	CLIP1MIN,        0);
-	tdfx_outl(par,	CLIP1MAX,        0x0fff0fff);
-	tdfx_outl(par,	SRCXY,	   0);
+	banshee_make_room(par, 9);
+	tdfx_outl(par, VGAINIT0, reg->vgainit0);
+	tdfx_outl(par, DACMODE, reg->dacmode);
+	tdfx_outl(par, VIDDESKSTRIDE, reg->stride);
+	tdfx_outl(par, HWCURPATADDR, reg->curspataddr);
+
+	tdfx_outl(par, VIDSCREENSIZE, reg->screensize);
+	tdfx_outl(par, VIDDESKSTART, reg->startaddr);
+	tdfx_outl(par, VIDPROCCFG, reg->vidcfg);
+	tdfx_outl(par, VGAINIT1, reg->vgainit1);
+	tdfx_outl(par, MISCINIT0, reg->miscinit0);
+
+	banshee_make_room(par, 8);
+	tdfx_outl(par, SRCBASE, reg->startaddr);
+	tdfx_outl(par, DSTBASE, reg->startaddr);
+	tdfx_outl(par, COMMANDEXTRA_2D, 0);
+	tdfx_outl(par, CLIP0MIN, 0);
+	tdfx_outl(par, CLIP0MAX, 0x0fff0fff);
+	tdfx_outl(par, CLIP1MIN, 0);
+	tdfx_outl(par, CLIP1MAX, 0x0fff0fff);
+	tdfx_outl(par, SRCXY, 0);
 
 	banshee_wait_idle(info);
 }
 
-static unsigned long do_lfb_size(struct tdfx_par *par, unsigned short dev_id) 
+static unsigned long do_lfb_size(struct tdfx_par *par, unsigned short dev_id)
 {
-	u32 draminit0;
-	u32 draminit1;
+	u32 draminit0 = tdfx_inl(par, DRAMINIT0);
+	u32 draminit1 = tdfx_inl(par, DRAMINIT1);
 	u32 miscinit1;
-
-	int num_chips;
+	int num_chips = (draminit0 & DRAMINIT0_SGRAM_NUM) ? 8 : 4;
 	int chip_size; /* in MB */
-	u32 lfbsize;
-	int has_sgram;
+	int has_sgram = draminit1 & DRAMINIT1_MEM_SDRAM;
 
-	draminit0 = tdfx_inl(par, DRAMINIT0);  
-	draminit1 = tdfx_inl(par, DRAMINIT1);
-
-	num_chips = (draminit0 & DRAMINIT0_SGRAM_NUM) ? 8 : 4;
- 
 	if (dev_id < PCI_DEVICE_ID_3DFX_VOODOO5) {
 		/* Banshee/Voodoo3 */
-		has_sgram = draminit1 & DRAMINIT1_MEM_SDRAM;
-		chip_size = has_sgram ? ((draminit0 & DRAMINIT0_SGRAM_TYPE) ? 2 : 1)
-				      : 2;
+		chip_size = 2;
+		if (has_sgram && (draminit0 & DRAMINIT0_SGRAM_TYPE))
+			chip_size = 1;
 	} else {
 		/* Voodoo4/5 */
 		has_sgram = 0;
-		chip_size = 1 << ((draminit0 & DRAMINIT0_SGRAM_TYPE_MASK) >> DRAMINIT0_SGRAM_TYPE_SHIFT);
+		chip_size = draminit0 & DRAMINIT0_SGRAM_TYPE_MASK;
+		chip_size = 1 << (chip_size >> DRAMINIT0_SGRAM_TYPE_SHIFT);
 	}
-	lfbsize = num_chips * chip_size * 1024 * 1024;
 
 	/* disable block writes for SDRAM */
 	miscinit1 = tdfx_inl(par, MISCINIT1);
 	miscinit1 |= has_sgram ? 0 : MISCINIT1_2DBLOCK_DIS;
 	miscinit1 |= MISCINIT1_CLUT_INV;
 
-	banshee_make_room(par, 1); 
+	banshee_make_room(par, 1);
 	tdfx_outl(par, MISCINIT1, miscinit1);
-	return lfbsize;
+	return num_chips * chip_size * 1024l * 1024;
 }
 
 /* ------------------------------------------------------------------------- */
 
-static int tdfxfb_check_var(struct fb_var_screeninfo *var,struct fb_info *info) 
+static int tdfxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
 	struct tdfx_par *par = info->par;
 	u32 lpitch;
@@ -486,103 +468,113 @@ static int tdfxfb_check_var(struct fb_var_screeninfo *var,struct fb_info *info)
 		DPRINTK("xoffset not supported\n");
 		return -EINVAL;
 	}
+	var->yoffset = 0;
 
-	/* Banshee doesn't support interlace, but Voodoo4/5 and probably Voodoo3 do. */
-	/* no direct information about device id now? use max_pixclock for this... */
+	/*
+	 * Banshee doesn't support interlace, but Voodoo4/5 and probably
+	 * Voodoo3 do.
+	 * no direct information about device id now?
+	 *  use max_pixclock for this...
+	 */
 	if (((var->vmode & FB_VMODE_MASK) == FB_VMODE_INTERLACED) &&
-			(par->max_pixclock < VOODOO3_MAX_PIXCLOCK)) {
+	    (par->max_pixclock < VOODOO3_MAX_PIXCLOCK)) {
 		DPRINTK("interlace not supported\n");
 		return -EINVAL;
 	}
 
 	var->xres = (var->xres + 15) & ~15; /* could sometimes be 8 */
-	lpitch = var->xres * ((var->bits_per_pixel + 7)>>3);
-  
+	lpitch = var->xres * ((var->bits_per_pixel + 7) >> 3);
+
 	if (var->xres < 320 || var->xres > 2048) {
 		DPRINTK("width not supported: %u\n", var->xres);
 		return -EINVAL;
 	}
-  
+
 	if (var->yres < 200 || var->yres > 2048) {
 		DPRINTK("height not supported: %u\n", var->yres);
 		return -EINVAL;
 	}
-  
+
 	if (lpitch * var->yres_virtual > info->fix.smem_len) {
-		var->yres_virtual = info->fix.smem_len/lpitch;
+		var->yres_virtual = info->fix.smem_len / lpitch;
 		if (var->yres_virtual < var->yres) {
 			DPRINTK("no memory for screen (%ux%ux%u)\n",
-			var->xres, var->yres_virtual, var->bits_per_pixel);
+				var->xres, var->yres_virtual,
+				var->bits_per_pixel);
 			return -EINVAL;
 		}
 	}
-  
+
 	if (PICOS2KHZ(var->pixclock) > par->max_pixclock) {
-		DPRINTK("pixclock too high (%ldKHz)\n",PICOS2KHZ(var->pixclock));
+		DPRINTK("pixclock too high (%ldKHz)\n",
+			PICOS2KHZ(var->pixclock));
 		return -EINVAL;
 	}
 
-	switch(var->bits_per_pixel) {
-		case 8:
-			var->red.length = var->green.length = var->blue.length = 8;
-			break;
-		case 16:
-			var->red.offset   = 11;
-			var->red.length   = 5;
-			var->green.offset = 5;
-			var->green.length = 6;
-			var->blue.offset  = 0;
-			var->blue.length  = 5;
-			break;
-		case 24:
-			var->red.offset=16;
-			var->green.offset=8;
-			var->blue.offset=0;
-			var->red.length = var->green.length = var->blue.length = 8;
-		case 32:
-			var->red.offset   = 16;
-			var->green.offset = 8;
-			var->blue.offset  = 0;
-			var->red.length = var->green.length = var->blue.length = 8;
-			break;
+	var->transp.offset = 0;
+	var->transp.length = 0;
+	switch (var->bits_per_pixel) {
+	case 8:
+		var->red.length = 8;
+		var->red.offset = 0;
+		var->green = var->red;
+		var->blue = var->red;
+		break;
+	case 16:
+		var->red.offset   = 11;
+		var->red.length   = 5;
+		var->green.offset = 5;
+		var->green.length = 6;
+		var->blue.offset  = 0;
+		var->blue.length  = 5;
+		break;
+	case 32:
+		var->transp.offset = 24;
+		var->transp.length = 8;
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = var->green.length = var->blue.length = 8;
+		break;
 	}
-	var->height = var->width = -1;
-  
+	var->width = -1;
+	var->height = -1;
+
 	var->accel_flags = FB_ACCELF_TEXT;
-	
-	DPRINTK("Checking graphics mode at %dx%d depth %d\n",  var->xres, var->yres, var->bits_per_pixel);
+
+	DPRINTK("Checking graphics mode at %dx%d depth %d\n",
+		var->xres, var->yres, var->bits_per_pixel);
 	return 0;
 }
 
 static int tdfxfb_set_par(struct fb_info *info)
 {
 	struct tdfx_par *par = info->par;
-	u32 hdispend, hsyncsta, hsyncend, htotal;
+	u32 hdispend = info->var.xres;
+	u32 hsyncsta = hdispend + info->var.right_margin;
+	u32 hsyncend = hsyncsta + info->var.hsync_len;
+	u32 htotal   = hsyncend + info->var.left_margin;
 	u32 hd, hs, he, ht, hbs, hbe;
 	u32 vd, vs, ve, vt, vbs, vbe;
 	struct banshee_reg reg;
 	int fout, freq;
-	u32 wd, cpp;
-  
-	par->baseline  = 0;
- 
+	u32 wd;
+	u32 cpp = (info->var.bits_per_pixel + 7) >> 3;
+
 	memset(&reg, 0, sizeof(reg));
-	cpp = (info->var.bits_per_pixel + 7)/8;
- 
-	reg.vidcfg = VIDCFG_VIDPROC_ENABLE | VIDCFG_DESK_ENABLE | VIDCFG_CURS_X11 | ((cpp - 1) << VIDCFG_PIXFMT_SHIFT) | (cpp != 1 ? VIDCFG_CLUT_BYPASS : 0);
+
+	reg.vidcfg = VIDCFG_VIDPROC_ENABLE | VIDCFG_DESK_ENABLE |
+		     VIDCFG_CURS_X11 |
+		     ((cpp - 1) << VIDCFG_PIXFMT_SHIFT) |
+		     (cpp != 1 ? VIDCFG_CLUT_BYPASS : 0);
 
 	/* PLL settings */
 	freq = PICOS2KHZ(info->var.pixclock);
 
-	reg.dacmode = 0;
-	reg.vidcfg  &= ~VIDCFG_2X;
-
-	hdispend = info->var.xres;
-	hsyncsta = hdispend + info->var.right_margin;
-	hsyncend = hsyncsta + info->var.hsync_len;
-	htotal   = hsyncend + info->var.left_margin;	
+	reg.vidcfg &= ~VIDCFG_2X;
 
-	if (freq > par->max_pixclock/2) {
+	if (freq > par->max_pixclock / 2) {
 		freq = freq > par->max_pixclock ? par->max_pixclock : freq;
 		reg.dacmode |= DACMODE_2X;
 		reg.vidcfg  |= VIDCFG_2X;
@@ -591,8 +583,9 @@ static int tdfxfb_set_par(struct fb_info *info)
 		hsyncend >>= 1;
 		htotal   >>= 1;
 	}
-  
-	hd  = wd = (hdispend >> 3) - 1;
+
+	wd = (hdispend >> 3) - 1;
+	hd  = wd;
 	hs  = (hsyncsta >> 3) - 1;
 	he  = (hsyncend >> 3) - 1;
 	ht  = (htotal >> 3) - 1;
@@ -600,28 +593,30 @@ static int tdfxfb_set_par(struct fb_info *info)
 	hbe = ht;
 
 	if ((info->var.vmode & FB_VMODE_MASK) == FB_VMODE_DOUBLE) {
-		vbs = vd = (info->var.yres << 1) - 1;
+		vd = (info->var.yres << 1) - 1;
 		vs  = vd + (info->var.lower_margin << 1);
 		ve  = vs + (info->var.vsync_len << 1);
-		vbe = vt = ve + (info->var.upper_margin << 1) - 1;
+		vt = ve + (info->var.upper_margin << 1) - 1;
+		reg.screensize = info->var.xres | (info->var.yres << 13);
+		reg.vidcfg |= VIDCFG_HALF_MODE;
+		reg.crt[0x09] = 0x80;
 	} else {
-		vbs = vd = info->var.yres - 1;
+		vd = info->var.yres - 1;
 		vs  = vd + info->var.lower_margin;
 		ve  = vs + info->var.vsync_len;
-		vbe = vt = ve + info->var.upper_margin - 1;
+		vt = ve + info->var.upper_margin - 1;
+		reg.screensize = info->var.xres | (info->var.yres << 12);
+		reg.vidcfg &= ~VIDCFG_HALF_MODE;
 	}
-  
+	vbs = vd;
+	vbe = vt;
+
 	/* this is all pretty standard VGA register stuffing */
-	reg.misc[0x00] = 0x0f | 
+	reg.misc[0x00] = 0x0f |
 			(info->var.xres < 400 ? 0xa0 :
 			 info->var.xres < 480 ? 0x60 :
 			 info->var.xres < 768 ? 0xe0 : 0x20);
-     
-	reg.gra[0x00] = 0x00;
-	reg.gra[0x01] = 0x00;
-	reg.gra[0x02] = 0x00;
-	reg.gra[0x03] = 0x00;
-	reg.gra[0x04] = 0x00;
+
 	reg.gra[0x05] = 0x40;
 	reg.gra[0x06] = 0x05;
 	reg.gra[0x07] = 0x0f;
@@ -644,10 +639,7 @@ static int tdfxfb_set_par(struct fb_info *info)
 	reg.att[0x0e] = 0x0e;
 	reg.att[0x0f] = 0x0f;
 	reg.att[0x10] = 0x41;
-	reg.att[0x11] = 0x00;
 	reg.att[0x12] = 0x0f;
-	reg.att[0x13] = 0x00;
-	reg.att[0x14] = 0x00;
 
 	reg.seq[0x00] = 0x03;
 	reg.seq[0x01] = 0x01; /* fixme: clkdiv2? */
@@ -660,146 +652,133 @@ static int tdfxfb_set_par(struct fb_info *info)
 	reg.crt[0x02] = hbs;
 	reg.crt[0x03] = 0x80 | (hbe & 0x1f);
 	reg.crt[0x04] = hs;
-	reg.crt[0x05] = ((hbe & 0x20) << 2) | (he & 0x1f); 
+	reg.crt[0x05] = ((hbe & 0x20) << 2) | (he & 0x1f);
 	reg.crt[0x06] = vt;
 	reg.crt[0x07] = ((vs & 0x200) >> 2) |
 			((vd & 0x200) >> 3) |
 			((vt & 0x200) >> 4) | 0x10 |
 			((vbs & 0x100) >> 5) |
-			((vs  & 0x100) >> 6) |
-			((vd  & 0x100) >> 7) |
-			((vt  & 0x100) >> 8);
-	reg.crt[0x08] = 0x00;
-	reg.crt[0x09] = 0x40 | ((vbs & 0x200) >> 4); 
-	reg.crt[0x0a] = 0x00;
-	reg.crt[0x0b] = 0x00;
-	reg.crt[0x0c] = 0x00;
-	reg.crt[0x0d] = 0x00;
-	reg.crt[0x0e] = 0x00;
-	reg.crt[0x0f] = 0x00;
+			((vs & 0x100) >> 6) |
+			((vd & 0x100) >> 7) |
+			((vt & 0x100) >> 8);
+	reg.crt[0x09] |= 0x40 | ((vbs & 0x200) >> 4);
 	reg.crt[0x10] = vs;
-	reg.crt[0x11] = (ve & 0x0f) | 0x20; 
+	reg.crt[0x11] = (ve & 0x0f) | 0x20;
 	reg.crt[0x12] = vd;
 	reg.crt[0x13] = wd;
-	reg.crt[0x14] = 0x00;
 	reg.crt[0x15] = vbs;
-	reg.crt[0x16] = vbe + 1; 
+	reg.crt[0x16] = vbe + 1;
 	reg.crt[0x17] = 0xc3;
 	reg.crt[0x18] = 0xff;
- 
+
 	/* Banshee's nonvga stuff */
-	reg.ext[0x00] = (((ht  & 0x100) >> 8) | 
-			((hd  & 0x100) >> 6) |
+	reg.ext[0x00] = (((ht & 0x100) >> 8) |
+			((hd & 0x100) >> 6) |
 			((hbs & 0x100) >> 4) |
-			((hbe &  0x40) >> 1) |
-			((hs  & 0x100) >> 2) |
-			((he  &  0x20) << 2)); 
-	reg.ext[0x01] = (((vt  & 0x400) >> 10) |
-			((vd  & 0x400) >>  8) | 
-			((vbs & 0x400) >>  6) |
-			((vbe & 0x400) >>  4));
-
-	reg.vgainit0 = 	VGAINIT0_8BIT_DAC     |
+			((hbe & 0x40) >> 1) |
+			((hs & 0x100) >> 2) |
+			((he & 0x20) << 2));
+	reg.ext[0x01] = (((vt & 0x400) >> 10) |
+			((vd & 0x400) >> 8) |
+			((vbs & 0x400) >> 6) |
+			((vbe & 0x400) >> 4));
+
+	reg.vgainit0 =	VGAINIT0_8BIT_DAC     |
 			VGAINIT0_EXT_ENABLE   |
 			VGAINIT0_WAKEUP_3C3   |
 			VGAINIT0_ALT_READBACK |
 			VGAINIT0_EXTSHIFTOUT;
 	reg.vgainit1 = tdfx_inl(par, VGAINIT1) & 0x1fffff;
 
+	if (hwcursor)
+		reg.curspataddr = info->fix.smem_len;
+
 	reg.cursloc   = 0;
-   
-	reg.cursc0    = 0; 
+
+	reg.cursc0    = 0;
 	reg.cursc1    = 0xffffff;
-   
-	reg.stride    = info->var.xres * cpp;
-	reg.startaddr = par->baseline * reg.stride;
-	reg.srcbase   = reg.startaddr;
-	reg.dstbase   = reg.startaddr;
 
-	/* PLL settings */
-	freq = PICOS2KHZ(info->var.pixclock);
+	reg.stride    = info->var.xres * cpp;
+	reg.startaddr = info->var.yoffset * reg.stride
+			+ info->var.xoffset * cpp;
 
-	reg.dacmode &= ~DACMODE_2X;
-	reg.vidcfg  &= ~VIDCFG_2X;
-	if (freq > par->max_pixclock/2) {
-		freq = freq > par->max_pixclock ? par->max_pixclock : freq;
-		reg.dacmode |= DACMODE_2X;
-		reg.vidcfg  |= VIDCFG_2X;
-	}
 	reg.vidpll = do_calc_pll(freq, &fout);
 #if 0
 	reg.mempll = do_calc_pll(..., &fout);
 	reg.gfxpll = do_calc_pll(..., &fout);
 #endif
 
-	if ((info->var.vmode & FB_VMODE_MASK) == FB_VMODE_DOUBLE) {
-		reg.screensize = info->var.xres | (info->var.yres << 13);
-		reg.vidcfg |= VIDCFG_HALF_MODE;
-		reg.crt[0x09] |= 0x80;
-	} else {
-		reg.screensize = info->var.xres | (info->var.yres << 12);
-		reg.vidcfg &= ~VIDCFG_HALF_MODE;
-	}
 	if ((info->var.vmode & FB_VMODE_MASK) == FB_VMODE_INTERLACED)
 		reg.vidcfg |= VIDCFG_INTERLACE;
 	reg.miscinit0 = tdfx_inl(par, MISCINIT0);
 
 #if defined(__BIG_ENDIAN)
 	switch (info->var.bits_per_pixel) {
-		case 8:
-		case 24:
-			reg.miscinit0 &= ~(1 << 30);
-			reg.miscinit0 &= ~(1 << 31);
-			break;
-		case 16:
-			reg.miscinit0 |= (1 << 30);
-			reg.miscinit0 |= (1 << 31);
-			break;
-		case 32:
-			reg.miscinit0 |= (1 << 30);
-			reg.miscinit0 &= ~(1 << 31);
-			break;
+	case 8:
+	case 24:
+		reg.miscinit0 &= ~(1 << 30);
+		reg.miscinit0 &= ~(1 << 31);
+		break;
+	case 16:
+		reg.miscinit0 |= (1 << 30);
+		reg.miscinit0 |= (1 << 31);
+		break;
+	case 32:
+		reg.miscinit0 |= (1 << 30);
+		reg.miscinit0 &= ~(1 << 31);
+		break;
 	}
-#endif 
+#endif
 	do_write_regs(info, &reg);
 
 	/* Now change fb_fix_screeninfo according to changes in par */
-	info->fix.line_length = info->var.xres * ((info->var.bits_per_pixel + 7)>>3);
-	info->fix.visual = (info->var.bits_per_pixel == 8) 
+	info->fix.line_length = reg.stride;
+	info->fix.visual = (info->var.bits_per_pixel == 8)
 				? FB_VISUAL_PSEUDOCOLOR
 				: FB_VISUAL_TRUECOLOR;
-	DPRINTK("Graphics mode is now set at %dx%d depth %d\n", info->var.xres, info->var.yres, info->var.bits_per_pixel);
-	return 0;	
+	DPRINTK("Graphics mode is now set at %dx%d depth %d\n",
+		info->var.xres, info->var.yres, info->var.bits_per_pixel);
+	return 0;
 }
 
 /* A handy macro shamelessly pinched from matroxfb */
-#define CNVT_TOHW(val,width) ((((val)<<(width))+0x7FFF-(val))>>16)
+#define CNVT_TOHW(val, width) ((((val) << (width)) + 0x7FFF - (val)) >> 16)
 
-static int tdfxfb_setcolreg(unsigned regno, unsigned red, unsigned green,  
-			    unsigned blue,unsigned transp,struct fb_info *info) 
+static int tdfxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			    unsigned blue, unsigned transp,
+			    struct fb_info *info)
 {
 	struct tdfx_par *par = info->par;
 	u32 rgbcol;
-   
-	if (regno >= info->cmap.len || regno > 255) return 1;
-   
+
+	if (regno >= info->cmap.len || regno > 255)
+		return 1;
+
+	/* grayscale works only partially under directcolor */
+	if (info->var.grayscale) {
+		/* grayscale = 0.30*R + 0.59*G + 0.11*B */
+		blue = (red * 77 + green * 151 + blue * 28) >> 8;
+		green = blue;
+		red = blue;
+	}
+
 	switch (info->fix.visual) {
 	case FB_VISUAL_PSEUDOCOLOR:
-		rgbcol =(((u32)red   & 0xff00) << 8) |
-			(((u32)green & 0xff00) << 0) |
-			(((u32)blue  & 0xff00) >> 8);
+		rgbcol = (((u32)red   & 0xff00) << 8) |
+			 (((u32)green & 0xff00) << 0) |
+			 (((u32)blue  & 0xff00) >> 8);
 		do_setpalentry(par, regno, rgbcol);
 		break;
 	/* Truecolor has no hardware color palettes. */
 	case FB_VISUAL_TRUECOLOR:
 		if (regno < 16) {
-			rgbcol = (CNVT_TOHW( red, info->var.red.length) <<
+			rgbcol = (CNVT_TOHW(red, info->var.red.length) <<
 				  info->var.red.offset) |
-				(CNVT_TOHW( green, info->var.green.length) <<
+				(CNVT_TOHW(green, info->var.green.length) <<
 				 info->var.green.offset) |
-				(CNVT_TOHW( blue, info->var.blue.length) <<
+				(CNVT_TOHW(blue, info->var.blue.length) <<
 				 info->var.blue.offset) |
-				(CNVT_TOHW( transp, info->var.transp.length) <<
+				(CNVT_TOHW(transp, info->var.transp.length) <<
 				 info->var.transp.offset);
 			par->palette[regno] = rgbcol;
 		}
@@ -815,287 +794,325 @@ static int tdfxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 
 /* 0 unblank, 1 blank, 2 no vsync, 3 no hsync, 4 off */
 static int tdfxfb_blank(int blank, struct fb_info *info)
-{ 
+{
 	struct tdfx_par *par = info->par;
-	u32 dacmode, state = 0, vgablank = 0;
+	int vgablank = 1;
+	u32 dacmode = tdfx_inl(par, DACMODE);
 
-	dacmode = tdfx_inl(par, DACMODE);
+	dacmode &= ~(BIT(1) | BIT(3));
 
 	switch (blank) {
-		case FB_BLANK_UNBLANK: /* Screen: On; HSync: On, VSync: On */
-			state    = 0;
-			vgablank = 0;
-			break;
-		case FB_BLANK_NORMAL: /* Screen: Off; HSync: On, VSync: On */
-			state    = 0;
-			vgablank = 1;
-			break;
-		case FB_BLANK_VSYNC_SUSPEND: /* Screen: Off; HSync: On, VSync: Off */
-			state    = BIT(3);
-			vgablank = 1;
-			break;
-		case FB_BLANK_HSYNC_SUSPEND: /* Screen: Off; HSync: Off, VSync: On */
-			state    = BIT(1);
-			vgablank = 1;
-			break;
-		case FB_BLANK_POWERDOWN: /* Screen: Off; HSync: Off, VSync: Off */
-			state    = BIT(1) | BIT(3);
-			vgablank = 1;
-			break;
+	case FB_BLANK_UNBLANK: /* Screen: On; HSync: On, VSync: On */
+		vgablank = 0;
+		break;
+	case FB_BLANK_NORMAL: /* Screen: Off; HSync: On, VSync: On */
+		break;
+	case FB_BLANK_VSYNC_SUSPEND: /* Screen: Off; HSync: On, VSync: Off */
+		dacmode |= BIT(3);
+		break;
+	case FB_BLANK_HSYNC_SUSPEND: /* Screen: Off; HSync: Off, VSync: On */
+		dacmode |= BIT(1);
+		break;
+	case FB_BLANK_POWERDOWN: /* Screen: Off; HSync: Off, VSync: Off */
+		dacmode |= BIT(1) | BIT(3);
+		break;
 	}
 
-	dacmode &= ~(BIT(1) | BIT(3));
-	dacmode |= state;
-	banshee_make_room(par, 1); 
+	banshee_make_room(par, 1);
 	tdfx_outl(par, DACMODE, dacmode);
-	if (vgablank) 
+	if (vgablank)
 		vga_disable_video(par);
 	else
 		vga_enable_video(par);
 	return 0;
 }
 
-/*   
+/*
  * Set the starting position of the visible screen to var->yoffset
- */   
+ */
 static int tdfxfb_pan_display(struct fb_var_screeninfo *var,
-			      struct fb_info *info) 
+			      struct fb_info *info)
 {
 	struct tdfx_par *par = info->par;
-	u32 addr;  	
+	u32 addr = var->yoffset * info->fix.line_length;
 
 	if (nopan || var->xoffset || (var->yoffset > var->yres_virtual))
 		return -EINVAL;
 	if ((var->yoffset + var->yres > var->yres_virtual && nowrap))
 		return -EINVAL;
 
-	addr = var->yoffset * info->fix.line_length;
 	banshee_make_room(par, 1);
 	tdfx_outl(par, VIDDESKSTART, addr);
-   
+
 	info->var.xoffset = var->xoffset;
-	info->var.yoffset = var->yoffset; 
+	info->var.yoffset = var->yoffset;
 	return 0;
 }
 
 #ifdef CONFIG_FB_3DFX_ACCEL
 /*
- * FillRect 2D command (solidfill or invert (via ROP_XOR))   
+ * FillRect 2D command (solidfill or invert (via ROP_XOR))
  */
-static void tdfxfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect) 
+static void tdfxfb_fillrect(struct fb_info *info,
+			    const struct fb_fillrect *rect)
 {
 	struct tdfx_par *par = info->par;
 	u32 bpp = info->var.bits_per_pixel;
 	u32 stride = info->fix.line_length;
-	u32 fmt= stride | ((bpp+((bpp==8) ? 0 : 8)) << 13); 
+	u32 fmt = stride | ((bpp + ((bpp == 8) ? 0 : 8)) << 13);
 	int tdfx_rop;
-   	
-	if (rect->rop == ROP_COPY) 
+	u32 dx = rect->dx;
+	u32 dy = rect->dy;
+	u32 dstbase = 0;
+
+	if (rect->rop == ROP_COPY)
 		tdfx_rop = TDFX_ROP_COPY;
-	else 			 
+	else
 		tdfx_rop = TDFX_ROP_XOR;
 
-	banshee_make_room(par, 5);
-	tdfx_outl(par,	DSTFORMAT, fmt);
+	/* asume always rect->height < 4096 */
+	if (dy + rect->height > 4095) {
+		dstbase = stride * dy;
+		dy = 0;
+	}
+	/* asume always rect->width < 4096 */
+	if (dx + rect->width > 4095) {
+		dstbase += dx * bpp >> 3;
+		dx = 0;
+	}
+	banshee_make_room(par, 6);
+	tdfx_outl(par, DSTFORMAT, fmt);
 	if (info->fix.visual == FB_VISUAL_PSEUDOCOLOR) {
-		tdfx_outl(par,	COLORFORE, rect->color);
+		tdfx_outl(par, COLORFORE, rect->color);
 	} else { /* FB_VISUAL_TRUECOLOR */
 		tdfx_outl(par, COLORFORE, par->palette[rect->color]);
 	}
-	tdfx_outl(par,	COMMAND_2D, COMMAND_2D_FILLRECT | (tdfx_rop << 24));
-	tdfx_outl(par,	DSTSIZE,    rect->width | (rect->height << 16));
-	tdfx_outl(par,	LAUNCH_2D,  rect->dx | (rect->dy << 16));
+	tdfx_outl(par, COMMAND_2D, COMMAND_2D_FILLRECT | (tdfx_rop << 24));
+	tdfx_outl(par, DSTBASE, dstbase);
+	tdfx_outl(par, DSTSIZE, rect->width | (rect->height << 16));
+	tdfx_outl(par, LAUNCH_2D, dx | (dy << 16));
 }
 
 /*
- * Screen-to-Screen BitBlt 2D command (for the bmove fb op.) 
+ * Screen-to-Screen BitBlt 2D command (for the bmove fb op.)
  */
-static void tdfxfb_copyarea(struct fb_info *info, const struct fb_copyarea *area)  
+static void tdfxfb_copyarea(struct fb_info *info,
+			    const struct fb_copyarea *area)
 {
 	struct tdfx_par *par = info->par;
-   	u32 sx = area->sx, sy = area->sy, dx = area->dx, dy = area->dy;
+	u32 sx = area->sx, sy = area->sy, dx = area->dx, dy = area->dy;
 	u32 bpp = info->var.bits_per_pixel;
 	u32 stride = info->fix.line_length;
 	u32 blitcmd = COMMAND_2D_S2S_BITBLT | (TDFX_ROP_COPY << 24);
-	u32 fmt = stride | ((bpp+((bpp==8) ? 0 : 8)) << 13); 
-	
+	u32 fmt = stride | ((bpp + ((bpp == 8) ? 0 : 8)) << 13);
+	u32 dstbase = 0;
+	u32 srcbase = 0;
+
+	/* asume always area->height < 4096 */
+	if (sy + area->height > 4095) {
+		srcbase = stride * sy;
+		sy = 0;
+	}
+	/* asume always area->width < 4096 */
+	if (sx + area->width > 4095) {
+		srcbase += sx * bpp >> 3;
+		sx = 0;
+	}
+	/* asume always area->height < 4096 */
+	if (dy + area->height > 4095) {
+		dstbase = stride * dy;
+		dy = 0;
+	}
+	/* asume always area->width < 4096 */
+	if (dx + area->width > 4095) {
+		dstbase += dx * bpp >> 3;
+		dx = 0;
+	}
+
 	if (area->sx <= area->dx) {
-		//-X 
+		/* -X */
 		blitcmd |= BIT(14);
 		sx += area->width - 1;
 		dx += area->width - 1;
 	}
 	if (area->sy <= area->dy) {
-		//-Y  
+		/* -Y */
 		blitcmd |= BIT(15);
 		sy += area->height - 1;
 		dy += area->height - 1;
 	}
-   
-	banshee_make_room(par, 6);
 
-	tdfx_outl(par,	SRCFORMAT, fmt);
-	tdfx_outl(par,	DSTFORMAT, fmt);
-	tdfx_outl(par,	COMMAND_2D, blitcmd); 
-	tdfx_outl(par,	DSTSIZE,   area->width | (area->height << 16));
-	tdfx_outl(par,	DSTXY,     dx | (dy << 16));
-	tdfx_outl(par,	LAUNCH_2D, sx | (sy << 16)); 
+	banshee_make_room(par, 8);
+
+	tdfx_outl(par, SRCFORMAT, fmt);
+	tdfx_outl(par, DSTFORMAT, fmt);
+	tdfx_outl(par, COMMAND_2D, blitcmd);
+	tdfx_outl(par, DSTSIZE, area->width | (area->height << 16));
+	tdfx_outl(par, DSTXY, dx | (dy << 16));
+	tdfx_outl(par, SRCBASE, srcbase);
+	tdfx_outl(par, DSTBASE, dstbase);
+	tdfx_outl(par, LAUNCH_2D, sx | (sy << 16));
 }
 
-static void tdfxfb_imageblit(struct fb_info *info, const struct fb_image *image) 
+static void tdfxfb_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct tdfx_par *par = info->par;
-	int size = image->height * ((image->width * image->depth + 7)>>3);
+	int size = image->height * ((image->width * image->depth + 7) >> 3);
 	int fifo_free;
 	int i, stride = info->fix.line_length;
 	u32 bpp = info->var.bits_per_pixel;
-	u32 dstfmt = stride | ((bpp+((bpp==8) ? 0 : 8)) << 13); 
+	u32 dstfmt = stride | ((bpp + ((bpp == 8) ? 0 : 8)) << 13);
 	u8 *chardata = (u8 *) image->data;
 	u32 srcfmt;
+	u32 dx = image->dx;
+	u32 dy = image->dy;
+	u32 dstbase = 0;
 
 	if (image->depth != 1) {
-		//banshee_make_room(par, 6 + ((size + 3) >> 2));
-		//srcfmt = stride | ((bpp+((bpp==8) ? 0 : 8)) << 13) | 0x400000;
+#ifdef BROKEN_CODE
+		banshee_make_room(par, 6 + ((size + 3) >> 2));
+		srcfmt = stride | ((bpp + ((bpp == 8) ? 0 : 8)) << 13) |
+			0x400000;
+#else
 		cfb_imageblit(info, image);
+#endif
 		return;
-	} else {
-		banshee_make_room(par, 8);
-		switch (info->fix.visual) {
-			case FB_VISUAL_PSEUDOCOLOR:
+	}
+	banshee_make_room(par, 9);
+	switch (info->fix.visual) {
+	case FB_VISUAL_PSEUDOCOLOR:
 		tdfx_outl(par, COLORFORE, image->fg_color);
 		tdfx_outl(par, COLORBACK, image->bg_color);
-				break;
-			case FB_VISUAL_TRUECOLOR:
-			default:
-				tdfx_outl(par, COLORFORE,
-					  par->palette[image->fg_color]);
-				tdfx_outl(par, COLORBACK,
-					  par->palette[image->bg_color]);
-		}
+		break;
+	case FB_VISUAL_TRUECOLOR:
+	default:
+		tdfx_outl(par, COLORFORE,
+			  par->palette[image->fg_color]);
+		tdfx_outl(par, COLORBACK,
+			  par->palette[image->bg_color]);
+	}
 #ifdef __BIG_ENDIAN
-		srcfmt = 0x400000 | BIT(20);
+	srcfmt = 0x400000 | BIT(20);
 #else
-		srcfmt = 0x400000;
+	srcfmt = 0x400000;
 #endif
-	}	
+	/* asume always image->height < 4096 */
+	if (dy + image->height > 4095) {
+		dstbase = stride * dy;
+		dy = 0;
+	}
+	/* asume always image->width < 4096 */
+	if (dx + image->width > 4095) {
+		dstbase += dx * bpp >> 3;
+		dx = 0;
+	}
 
-	tdfx_outl(par,	SRCXY,     0);
-	tdfx_outl(par,	DSTXY,     image->dx | (image->dy << 16));
-	tdfx_outl(par,	COMMAND_2D, COMMAND_2D_H2S_BITBLT | (TDFX_ROP_COPY << 24));
-	tdfx_outl(par,	SRCFORMAT, srcfmt);
-	tdfx_outl(par,	DSTFORMAT, dstfmt);
-	tdfx_outl(par,	DSTSIZE,   image->width | (image->height << 16));
+	tdfx_outl(par, DSTBASE, dstbase);
+	tdfx_outl(par, SRCXY, 0);
+	tdfx_outl(par, DSTXY, dx | (dy << 16));
+	tdfx_outl(par, COMMAND_2D,
+		  COMMAND_2D_H2S_BITBLT | (TDFX_ROP_COPY << 24));
+	tdfx_outl(par, SRCFORMAT, srcfmt);
+	tdfx_outl(par, DSTFORMAT, dstfmt);
+	tdfx_outl(par, DSTSIZE, image->width | (image->height << 16));
 
 	/* A count of how many free FIFO entries we've requested.
 	 * When this goes negative, we need to request more. */
 	fifo_free = 0;
 
-	/* Send four bytes at a time of data */	
-	for (i = (size >> 2) ; i > 0; i--) { 
-		if(--fifo_free < 0) {
-			fifo_free=31;
-			banshee_make_room(par,fifo_free);
+	/* Send four bytes at a time of data */
+	for (i = (size >> 2); i > 0; i--) {
+		if (--fifo_free < 0) {
+			fifo_free = 31;
+			banshee_make_room(par, fifo_free);
 		}
-		tdfx_outl(par,	LAUNCH_2D,*(u32*)chardata);
-		chardata += 4; 
-	}	
-
-	/* Send the leftovers now */	
-	banshee_make_room(par,3);
-	i = size%4;	
-	switch (i) {
-		case 0: break;
-		case 1:  tdfx_outl(par,	LAUNCH_2D,*chardata); break;
-		case 2:  tdfx_outl(par,	LAUNCH_2D,*(u16*)chardata); break;
-		case 3:  tdfx_outl(par,	LAUNCH_2D,*(u16*)chardata | ((chardata[3]) << 24)); break;
+		tdfx_outl(par, LAUNCH_2D, *(u32 *)chardata);
+		chardata += 4;
+	}
+
+	/* Send the leftovers now */
+	banshee_make_room(par, 3);
+	switch (size % 4) {
+	case 0:
+		break;
+	case 1:
+		tdfx_outl(par, LAUNCH_2D, *chardata);
+		break;
+	case 2:
+		tdfx_outl(par, LAUNCH_2D, *(u16 *)chardata);
+		break;
+	case 3:
+		tdfx_outl(par, LAUNCH_2D,
+			*(u16 *)chardata | (chardata[3] << 24));
+		break;
 	}
 }
 #endif /* CONFIG_FB_3DFX_ACCEL */
 
-#ifdef TDFX_HARDWARE_CURSOR
 static int tdfxfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 {
 	struct tdfx_par *par = info->par;
-	unsigned long flags;
+	u32 vidcfg;
 
-	/*
-	 * If the cursor is not be changed this means either we want the 
-	 * current cursor state (if enable is set) or we want to query what
-	 * we can do with the cursor (if enable is not set) 
- 	 */
-	if (!cursor->set) return 0;
+	if (!hwcursor)
+		return -EINVAL;	/* just to force soft_cursor() call */
 
-	/* Too large of a cursor :-( */
-	if (cursor->image.width > 64 || cursor->image.height > 64)
-		return -ENXIO;
+	/* Too large of a cursor or wrong bpp :-( */
+	if (cursor->image.width > 64 ||
+	    cursor->image.height > 64 ||
+	    cursor->image.depth > 1)
+		return -EINVAL;
 
-	/* 
-	 * If we are going to be changing things we should disable
-	 * the cursor first 
-	 */
-	if (info->cursor.enable) {
-		spin_lock_irqsave(&par->DAClock, flags);
-		info->cursor.enable = 0;
-		del_timer(&(par->hwcursor.timer));
-		tdfx_outl(par, VIDPROCCFG, par->hwcursor.disable);
-		spin_unlock_irqrestore(&par->DAClock, flags);
-	}
+	vidcfg = tdfx_inl(par, VIDPROCCFG);
+	if (cursor->enable)
+		tdfx_outl(par, VIDPROCCFG, vidcfg | VIDCFG_HWCURSOR_ENABLE);
+	else
+		tdfx_outl(par, VIDPROCCFG, vidcfg & ~VIDCFG_HWCURSOR_ENABLE);
 
-	/* Disable the Cursor */
-	if ((cursor->set && FB_CUR_SETCUR) && !cursor->enable)
+	/*
+	 * If the cursor is not be changed this means either we want the
+	 * current cursor state (if enable is set) or we want to query what
+	 * we can do with the cursor (if enable is not set)
+	 */
+	if (!cursor->set)
 		return 0;
 
 	/* fix cursor color - XFree86 forgets to restore it properly */
-	if (cursor->set && FB_CUR_SETCMAP) {
-		struct fb_cmap cmap = cursor->image.cmap;
+	if (cursor->set & FB_CUR_SETCMAP) {
+		struct fb_cmap cmap = info->cmap;
+		u32 bg_idx = cursor->image.bg_color;
+		u32 fg_idx = cursor->image.fg_color;
 		unsigned long bg_color, fg_color;
 
-		cmap.len = 2; /* Voodoo 3+ only support 2 color cursors */
-		fg_color = ((cmap.red[cmap.start] << 16) |
-			    (cmap.green[cmap.start] << 8)  |
-			    (cmap.blue[cmap.start]));
-		bg_color = ((cmap.red[cmap.start+1] << 16) |
-			    (cmap.green[cmap.start+1] << 8) |
-			    (cmap.blue[cmap.start+1]));
-		fb_copy_cmap(&cmap, &info->cursor.image.cmap);
-		spin_lock_irqsave(&par->DAClock, flags);
+		fg_color = (((u32)cmap.red[fg_idx]   & 0xff00) << 8) |
+			   (((u32)cmap.green[fg_idx] & 0xff00) << 0) |
+			   (((u32)cmap.blue[fg_idx]  & 0xff00) >> 8);
+		bg_color = (((u32)cmap.red[bg_idx]   & 0xff00) << 8) |
+			   (((u32)cmap.green[bg_idx] & 0xff00) << 0) |
+			   (((u32)cmap.blue[bg_idx]  & 0xff00) >> 8);
 		banshee_make_room(par, 2);
 		tdfx_outl(par, HWCURC0, bg_color);
 		tdfx_outl(par, HWCURC1, fg_color);
-		spin_unlock_irqrestore(&par->DAClock, flags);
 	}
 
-	if (cursor->set && FB_CUR_SETPOS) {
-		int x, y;
+	if (cursor->set & FB_CUR_SETPOS) {
+		int x = cursor->image.dx;
+		int y = cursor->image.dy - info->var.yoffset;
 
-		x = cursor->image.dx;
-		y = cursor->image.dy;
-		y -= info->var.yoffset;
-		info->cursor.image.dx = x;
-		info->cursor.image.dy = y;
 		x += 63;
 		y += 63;
-		spin_lock_irqsave(&par->DAClock, flags);
 		banshee_make_room(par, 1);
 		tdfx_outl(par, HWCURLOC, (y << 16) + x);
-		spin_unlock_irqrestore(&par->DAClock, flags);
 	}
-
-	/* Not supported so we fake it */
-	if (cursor->set && FB_CUR_SETHOT) {
-		info->cursor.hot.x = cursor->hot.x;
-		info->cursor.hot.y = cursor->hot.y;
-	}
-
-	if (cursor->set && FB_CUR_SETSHAPE) {
+	if (cursor->set & (FB_CUR_SETIMAGE | FB_CUR_SETSHAPE)) {
 		/*
-	 	 * Voodoo 3 and above cards use 2 monochrome cursor patterns.
+		 * Voodoo 3 and above cards use 2 monochrome cursor patterns.
 		 *    The reason is so the card can fetch 8 words at a time
 		 * and are stored on chip for use for the next 8 scanlines.
 		 * This reduces the number of times for access to draw the
 		 * cursor for each screen refresh.
 		 *    Each pattern is a bitmap of 64 bit wide and 64 bit high
-		 * (total of 8192 bits or 1024 Kbytes). The two patterns are
+		 * (total of 8192 bits or 1024 bytes). The two patterns are
 		 * stored in such a way that pattern 0 always resides in the
 		 * lower half (least significant 64 bits) of a 128 bit word
 		 * and pattern 1 the upper half. If you examine the data of
@@ -1106,50 +1123,54 @@ static int tdfxfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 		 * (128 bits) which is the maximum cursor width times two for
 		 * the two monochrome patterns.
 		 */
-		u8 *cursorbase = (u8 *) info->cursor.image.data;
-		char *bitmap = (char *)cursor->image.data;
-		char *mask = (char *) cursor->mask;
-		int i, j, k, h = 0;
-
-		for (i = 0; i < 64; i++) {
-			if (i < cursor->image.height) {
-				j = (cursor->image.width + 7) >> 3;
-				k = 8 - j;
-
-				for (;j > 0; j--) {
-				/* Pattern 0. Copy the cursor bitmap to it */
-					fb_writeb(*bitmap, cursorbase + h);
-					bitmap++;
-				/* Pattern 1. Copy the cursor mask to it */
-					fb_writeb(*mask, cursorbase + h + 8);
-					mask++;
-					h++;
-				}
-				for (;k > 0; k--) {
-					fb_writeb(0, cursorbase + h);
-					fb_writeb(~0, cursorbase + h + 8);
-					h++;
-				}
-			} else {
-				fb_writel(0, cursorbase + h);
-				fb_writel(0, cursorbase + h + 4);
-				fb_writel(~0, cursorbase + h + 8);
-				fb_writel(~0, cursorbase + h + 12);
-				h += 16;
+		u8 __iomem *cursorbase = info->screen_base + info->fix.smem_len;
+		u8 *bitmap = (u8 *)cursor->image.data;
+		u8 *mask = (u8 *)cursor->mask;
+		int i;
+
+		fb_memset(cursorbase, 0, 1024);
+
+		for (i = 0; i < cursor->image.height; i++) {
+			int h = 0;
+			int j = (cursor->image.width + 7) >> 3;
+
+			for (; j > 0; j--) {
+				u8 data = *mask ^ *bitmap;
+				if (cursor->rop == ROP_COPY)
+					data = *mask & *bitmap;
+				/* Pattern 0. Copy the cursor mask to it */
+				fb_writeb(*mask, cursorbase + h);
+				mask++;
+				/* Pattern 1. Copy the cursor bitmap to it */
+				fb_writeb(data, cursorbase + h + 8);
+				bitmap++;
+				h++;
 			}
+			cursorbase += 16;
 		}
 	}
-	/* Turn the cursor on */
-	cursor->enable = 1;
-	info->cursor = *cursor;
-	mod_timer(&par->hwcursor.timer, jiffies+HZ/2);
-	spin_lock_irqsave(&par->DAClock, flags);
-	banshee_make_room(par, 1);
-	tdfx_outl(par, VIDPROCCFG, par->hwcursor.enable);
-	spin_unlock_irqrestore(&par->DAClock, flags);
 	return 0;
 }
+
+static struct fb_ops tdfxfb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_check_var	= tdfxfb_check_var,
+	.fb_set_par	= tdfxfb_set_par,
+	.fb_setcolreg	= tdfxfb_setcolreg,
+	.fb_blank	= tdfxfb_blank,
+	.fb_pan_display	= tdfxfb_pan_display,
+	.fb_sync	= banshee_wait_idle,
+	.fb_cursor	= tdfxfb_cursor,
+#ifdef CONFIG_FB_3DFX_ACCEL
+	.fb_fillrect	= tdfxfb_fillrect,
+	.fb_copyarea	= tdfxfb_copyarea,
+	.fb_imageblit	= tdfxfb_imageblit,
+#else
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
 #endif
+};
 
 /**
  *      tdfxfb_probe - Device Initializiation
@@ -1161,14 +1182,15 @@ static int tdfxfb_cursor(struct fb_info *info, struct fb_cursor *cursor)
  *
  */
 static int __devinit tdfxfb_probe(struct pci_dev *pdev,
-                                  const struct pci_device_id *id)
+				  const struct pci_device_id *id)
 {
 	struct tdfx_par *default_par;
 	struct fb_info *info;
 	int err, lpitch;
 
-	if ((err = pci_enable_device(pdev))) {
-		printk(KERN_WARNING "tdfxfb: Can't enable pdev: %d\n", err);
+	err = pci_enable_device(pdev);
+	if (err) {
+		printk(KERN_ERR "tdfxfb: Can't enable pdev: %d\n", err);
 		return err;
 	}
 
@@ -1176,139 +1198,145 @@ static int __devinit tdfxfb_probe(struct pci_dev *pdev,
 
 	if (!info)
 		return -ENOMEM;
-		
+
 	default_par = info->par;
- 
+
 	/* Configure the default fb_fix_screeninfo first */
 	switch (pdev->device) {
-		case PCI_DEVICE_ID_3DFX_BANSHEE:	
-			strcat(tdfx_fix.id, " Banshee");
-			default_par->max_pixclock = BANSHEE_MAX_PIXCLOCK;
-			break;
-		case PCI_DEVICE_ID_3DFX_VOODOO3:
-			strcat(tdfx_fix.id, " Voodoo3");
-			default_par->max_pixclock = VOODOO3_MAX_PIXCLOCK;
-			break;
-		case PCI_DEVICE_ID_3DFX_VOODOO5:
-			strcat(tdfx_fix.id, " Voodoo5");
-			default_par->max_pixclock = VOODOO5_MAX_PIXCLOCK;
-			break;
+	case PCI_DEVICE_ID_3DFX_BANSHEE:
+		strcat(tdfx_fix.id, " Banshee");
+		default_par->max_pixclock = BANSHEE_MAX_PIXCLOCK;
+		break;
+	case PCI_DEVICE_ID_3DFX_VOODOO3:
+		strcat(tdfx_fix.id, " Voodoo3");
+		default_par->max_pixclock = VOODOO3_MAX_PIXCLOCK;
+		break;
+	case PCI_DEVICE_ID_3DFX_VOODOO5:
+		strcat(tdfx_fix.id, " Voodoo5");
+		default_par->max_pixclock = VOODOO5_MAX_PIXCLOCK;
+		break;
 	}
 
 	tdfx_fix.mmio_start = pci_resource_start(pdev, 0);
 	tdfx_fix.mmio_len = pci_resource_len(pdev, 0);
-	default_par->regbase_virt = ioremap_nocache(tdfx_fix.mmio_start, tdfx_fix.mmio_len);
-	if (!default_par->regbase_virt) {
-		printk("fb: Can't remap %s register area.\n", tdfx_fix.id);
+	if (!request_mem_region(tdfx_fix.mmio_start, tdfx_fix.mmio_len,
+				"tdfx regbase")) {
+		printk(KERN_ERR "tdfxfb: Can't reserve regbase\n");
 		goto out_err;
 	}
-    
-	if (!request_mem_region(pci_resource_start(pdev, 0),
-	    pci_resource_len(pdev, 0), "tdfx regbase")) {
-		printk(KERN_WARNING "tdfxfb: Can't reserve regbase\n");
-		goto out_err;
-	} 
+
+	default_par->regbase_virt =
+		ioremap_nocache(tdfx_fix.mmio_start, tdfx_fix.mmio_len);
+	if (!default_par->regbase_virt) {
+		printk(KERN_ERR "fb: Can't remap %s register area.\n",
+				tdfx_fix.id);
+		goto out_err_regbase;
+	}
 
 	tdfx_fix.smem_start = pci_resource_start(pdev, 1);
-	if (!(tdfx_fix.smem_len = do_lfb_size(default_par, pdev->device))) {
-		printk("fb: Can't count %s memory.\n", tdfx_fix.id);
-		release_mem_region(pci_resource_start(pdev, 0),
-				   pci_resource_len(pdev, 0));
-		goto out_err;	
+	tdfx_fix.smem_len = do_lfb_size(default_par, pdev->device);
+	if (!tdfx_fix.smem_len) {
+		printk(KERN_ERR "fb: Can't count %s memory.\n", tdfx_fix.id);
+		goto out_err_regbase;
 	}
 
-	if (!request_mem_region(pci_resource_start(pdev, 1),
-	     pci_resource_len(pdev, 1), "tdfx smem")) {
-		printk(KERN_WARNING "tdfxfb: Can't reserve smem\n");
-		release_mem_region(pci_resource_start(pdev, 0),
-				   pci_resource_len(pdev, 0));
-		goto out_err;
+	if (!request_mem_region(tdfx_fix.smem_start,
+				pci_resource_len(pdev, 1), "tdfx smem")) {
+		printk(KERN_ERR "tdfxfb: Can't reserve smem\n");
+		goto out_err_regbase;
 	}
 
-	info->screen_base = ioremap_nocache(tdfx_fix.smem_start, 
+	info->screen_base = ioremap_nocache(tdfx_fix.smem_start,
 					    tdfx_fix.smem_len);
 	if (!info->screen_base) {
-		printk("fb: Can't remap %s framebuffer.\n", tdfx_fix.id);
-		release_mem_region(pci_resource_start(pdev, 1),
-				   pci_resource_len(pdev, 1));
-		release_mem_region(pci_resource_start(pdev, 0),
-				   pci_resource_len(pdev, 0));
-		goto out_err;
+		printk(KERN_ERR "fb: Can't remap %s framebuffer.\n",
+				tdfx_fix.id);
+		goto out_err_screenbase;
 	}
 
 	default_par->iobase = pci_resource_start(pdev, 2);
-    
+
 	if (!request_region(pci_resource_start(pdev, 2),
-	    pci_resource_len(pdev, 2), "tdfx iobase")) {
-		printk(KERN_WARNING "tdfxfb: Can't reserve iobase\n");
-		release_mem_region(pci_resource_start(pdev, 1),
-				   pci_resource_len(pdev, 1));
-		release_mem_region(pci_resource_start(pdev, 0),
-				   pci_resource_len(pdev, 0));
-		goto out_err;
+			    pci_resource_len(pdev, 2), "tdfx iobase")) {
+		printk(KERN_ERR "tdfxfb: Can't reserve iobase\n");
+		goto out_err_screenbase;
 	}
 
-	printk("fb: %s memory = %dK\n", tdfx_fix.id, tdfx_fix.smem_len >> 10);
+	printk(KERN_INFO "fb: %s memory = %dK\n", tdfx_fix.id,
+			tdfx_fix.smem_len >> 10);
+
+	default_par->mtrr_handle = -1;
+	if (!nomtrr)
+		default_par->mtrr_handle =
+			mtrr_add(tdfx_fix.smem_start, tdfx_fix.smem_len,
+				 MTRR_TYPE_WRCOMB, 1);
 
 	tdfx_fix.ypanstep	= nopan ? 0 : 1;
 	tdfx_fix.ywrapstep	= nowrap ? 0 : 1;
-   
+
 	info->fbops		= &tdfxfb_ops;
-	info->fix		= tdfx_fix; 	
+	info->fix		= tdfx_fix;
 	info->pseudo_palette	= default_par->palette;
 	info->flags		= FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
 #ifdef CONFIG_FB_3DFX_ACCEL
-	info->flags             |= FBINFO_HWACCEL_FILLRECT |
-		FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_IMAGEBLIT;
+	info->flags		|= FBINFO_HWACCEL_FILLRECT |
+				   FBINFO_HWACCEL_COPYAREA |
+				   FBINFO_HWACCEL_IMAGEBLIT |
+				   FBINFO_READS_FAST;
 #endif
+	/* reserve 8192 bits for cursor */
+	/* the 2.4 driver says PAGE_MASK boundary is not enough for Voodoo4 */
+	if (hwcursor)
+		info->fix.smem_len = (info->fix.smem_len - 1024) &
+					(PAGE_MASK << 1);
 
 	if (!mode_option)
 		mode_option = "640x480@60";
-	 
-	err = fb_find_mode(&info->var, info, mode_option, NULL, 0, NULL, 8); 
+
+	err = fb_find_mode(&info->var, info, mode_option, NULL, 0, NULL, 8);
 	if (!err || err == 4)
 		info->var = tdfx_var;
 
 	/* maximize virtual vertical length */
 	lpitch = info->var.xres_virtual * ((info->var.bits_per_pixel + 7) >> 3);
-	info->var.yres_virtual = info->fix.smem_len/lpitch;
+	info->var.yres_virtual = info->fix.smem_len / lpitch;
 	if (info->var.yres_virtual < info->var.yres)
-		goto out_err;
-
-#ifdef CONFIG_FB_3DFX_ACCEL
-	/*
-	 * FIXME: Limit var->yres_virtual to 4096 because of screen artifacts
-	 * during scrolling. This is only present if 2D acceleration is
-	 * enabled.
-	 */
-	if (info->var.yres_virtual > 4096)
-		info->var.yres_virtual = 4096;
-#endif /* CONFIG_FB_3DFX_ACCEL */
+		goto out_err_iobase;
 
 	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
-		printk(KERN_WARNING "tdfxfb: Can't allocate color map\n");
-		goto out_err;
+		printk(KERN_ERR "tdfxfb: Can't allocate color map\n");
+		goto out_err_iobase;
 	}
 
 	if (register_framebuffer(info) < 0) {
-		printk("tdfxfb: can't register framebuffer\n");
+		printk(KERN_ERR "tdfxfb: can't register framebuffer\n");
 		fb_dealloc_cmap(&info->cmap);
-		goto out_err;
+		goto out_err_iobase;
 	}
 	/*
 	 * Our driver data
 	 */
 	pci_set_drvdata(pdev, info);
-	return 0; 
+	return 0;
 
-out_err:
+out_err_iobase:
+	if (default_par->mtrr_handle >= 0)
+		mtrr_del(default_par->mtrr_handle, info->fix.smem_start,
+			 info->fix.smem_len);
+	release_mem_region(pci_resource_start(pdev, 2),
+			   pci_resource_len(pdev, 2));
+out_err_screenbase:
+	if (info->screen_base)
+		iounmap(info->screen_base);
+	release_mem_region(tdfx_fix.smem_start, pci_resource_len(pdev, 1));
+out_err_regbase:
 	/*
 	 * Cleanup after anything that was remapped/allocated.
 	 */
 	if (default_par->regbase_virt)
 		iounmap(default_par->regbase_virt);
-	if (info->screen_base)
-		iounmap(info->screen_base);
+	release_mem_region(tdfx_fix.mmio_start, tdfx_fix.mmio_len);
+out_err:
 	framebuffer_release(info);
 	return -ENXIO;
 }
@@ -1316,7 +1344,7 @@ out_err:
 #ifndef MODULE
 static void tdfxfb_setup(char *options)
 {
-	char* this_opt;
+	char *this_opt;
 
 	if (!options || !*options)
 		return;
@@ -1324,10 +1352,16 @@ static void tdfxfb_setup(char *options)
 	while ((this_opt = strsep(&options, ",")) != NULL) {
 		if (!*this_opt)
 			continue;
-		if(!strcmp(this_opt, "nopan")) {
+		if (!strcmp(this_opt, "nopan")) {
 			nopan = 1;
-		} else if(!strcmp(this_opt, "nowrap")) {
+		} else if (!strcmp(this_opt, "nowrap")) {
 			nowrap = 1;
+		} else if (!strncmp(this_opt, "hwcursor=", 9)) {
+			hwcursor = simple_strtoul(this_opt + 9, NULL, 0);
+#ifdef CONFIG_MTRR
+		} else if (!strncmp(this_opt, "nomtrr", 6)) {
+			nomtrr = 1;
+#endif
 		} else {
 			mode_option = this_opt;
 		}
@@ -1350,6 +1384,9 @@ static void __devexit tdfxfb_remove(struct pci_dev *pdev)
 	struct tdfx_par *par = info->par;
 
 	unregister_framebuffer(info);
+	if (par->mtrr_handle >= 0)
+		mtrr_del(par->mtrr_handle, info->fix.smem_start,
+			 info->fix.smem_len);
 	iounmap(par->regbase_virt);
 	iounmap(info->screen_base);
 
@@ -1374,17 +1411,25 @@ static int __init tdfxfb_init(void)
 
 	tdfxfb_setup(option);
 #endif
-        return pci_register_driver(&tdfxfb_driver);
+	return pci_register_driver(&tdfxfb_driver);
 }
 
 static void __exit tdfxfb_exit(void)
 {
-        pci_unregister_driver(&tdfxfb_driver);
+	pci_unregister_driver(&tdfxfb_driver);
 }
 
 MODULE_AUTHOR("Hannu Mallat <hmallat@cc.hut.fi>");
 MODULE_DESCRIPTION("3Dfx framebuffer device driver");
 MODULE_LICENSE("GPL");
- 
+
+module_param(hwcursor, int, 0644);
+MODULE_PARM_DESC(hwcursor, "Enable hardware cursor "
+			"(1=enable, 0=disable, default=1)");
+#ifdef CONFIG_MTRR
+module_param(nomtrr, bool, 0);
+MODULE_PARM_DESC(nomtrr, "Disable MTRR support (default: enabled)");
+#endif
+
 module_init(tdfxfb_init);
 module_exit(tdfxfb_exit);
diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c
index d292a37ec7d..680642c089c 100644
--- a/drivers/video/tgafb.c
+++ b/drivers/video/tgafb.c
@@ -5,7 +5,7 @@
  *	Copyright (C) 1997 Geert Uytterhoeven
  *	Copyright (C) 1999,2000 Martin Lucina, Tom Zerucha
  *	Copyright (C) 2002 Richard Henderson
- *	Copyright (C) 2006 Maciej W. Rozycki
+ *	Copyright (C) 2006, 2007  Maciej W. Rozycki
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License. See the file COPYING in the main directory of this archive for
@@ -13,6 +13,7 @@
  */
 
 #include <linux/bitrev.h>
+#include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/errno.h>
@@ -636,15 +637,6 @@ tgafb_mono_imageblit(struct fb_info *info, const struct fb_image *image)
 
 	is8bpp = info->var.bits_per_pixel == 8;
 
-	/* For copies that aren't pixel expansion, there's little we
-	   can do better than the generic code.  */
-	/* ??? There is a DMA write mode; I wonder if that could be
-	   made to pull the data from the image buffer...  */
-	if (image->depth > 1) {
-		cfb_imageblit(info, image);
-		return;
-	}
-
 	dx = image->dx;
 	dy = image->dy;
 	width = image->width;
@@ -654,6 +646,9 @@ tgafb_mono_imageblit(struct fb_info *info, const struct fb_image *image)
 	line_length = info->fix.line_length;
 	rincr = (width + 7) / 8;
 
+	/* A shift below cannot cope with.  */
+	if (unlikely(width == 0))
+		return;
 	/* Crop the image to the screen.  */
 	if (dx > vxres || dy > vyres)
 		return;
@@ -709,9 +704,10 @@ tgafb_mono_imageblit(struct fb_info *info, const struct fb_image *image)
 		unsigned long bwidth;
 
 		/* Handle common case of imaging a single character, in
-		   a font less than 32 pixels wide.  */
+		   a font less than or 32 pixels wide.  */
 
-		pixelmask = (1 << width) - 1;
+		/* Avoid a shift by 32; width > 0 implied.  */
+		pixelmask = (2ul << (width - 1)) - 1;
 		pixelmask <<= shift;
 		__raw_writel(pixelmask, regs_base + TGA_PIXELMASK_REG);
 		wmb();
diff --git a/drivers/video/tridentfb.c b/drivers/video/tridentfb.c
index c699864b6f4..70fb4ee2b42 100644
--- a/drivers/video/tridentfb.c
+++ b/drivers/video/tridentfb.c
@@ -1,18 +1,19 @@
 /*
  * Frame buffer driver for Trident Blade and Image series
  *
- * Copyright 2001,2002 - Jani Monoses   <jani@iv.ro>
+ * Copyright 2001, 2002 - Jani Monoses   <jani@iv.ro>
  *
  *
  * CREDITS:(in order of appearance)
- * 	skeletonfb.c by Geert Uytterhoeven and other fb code in drivers/video
- * 	Special thanks ;) to Mattia Crivellini <tia@mclink.it>
- * 	much inspired by the XFree86 4.x Trident driver sources by Alan Hourihane
- * 	the FreeVGA project
- *	Francesco Salvestrini <salvestrini@users.sf.net> XP support,code,suggestions
+ *	skeletonfb.c by Geert Uytterhoeven and other fb code in drivers/video
+ *	Special thanks ;) to Mattia Crivellini <tia@mclink.it>
+ *	much inspired by the XFree86 4.x Trident driver sources
+ *	by Alan Hourihane the FreeVGA project
+ *	Francesco Salvestrini <salvestrini@users.sf.net> XP support,
+ *	code, suggestions
  * TODO:
- * 	timing value tweaking so it looks good on every monitor in every mode
- *	TGUI acceleration	
+ *	timing value tweaking so it looks good on every monitor in every mode
+ *	TGUI acceleration
  */
 
 #include <linux/module.h>
@@ -26,11 +27,11 @@
 #define VERSION		"0.7.8-NEWAPI"
 
 struct tridentfb_par {
-	int vclk;		//in MHz
-	void __iomem * io_virt;	//iospace virtual memory address
+	int vclk;		/* in MHz */
+	void __iomem *io_virt;	/* iospace virtual memory address */
 };
 
-static unsigned char eng_oper;		//engine operation...
+static unsigned char eng_oper;	/* engine operation... */
 static struct fb_ops tridentfb_ops;
 
 static struct tridentfb_par default_par;
@@ -39,11 +40,10 @@ static struct tridentfb_par default_par;
 static struct fb_info fb_info;
 static u32 pseudo_pal[16];
 
-
 static struct fb_var_screeninfo default_var;
 
 static struct fb_fix_screeninfo tridentfb_fix = {
-	.id = "Trident",	
+	.id = "Trident",
 	.type = FB_TYPE_PACKED_PIXELS,
 	.ypanstep = 1,
 	.visual = FB_VISUAL_PSEUDOCOLOR,
@@ -55,11 +55,10 @@ static int chip_id;
 static int defaultaccel;
 static int displaytype;
 
-
 /* defaults which are normally overriden by user values */
 
 /* video mode */
-static char * mode = "640x480";
+static char *mode = "640x480";
 static int bpp = 8;
 
 static int noaccel;
@@ -74,7 +73,6 @@ static int memsize;
 static int memdiff;
 static int nativex;
 
-
 module_param(mode, charp, 0);
 module_param(bpp, int, 0);
 module_param(center, int, 0);
@@ -86,88 +84,85 @@ module_param(nativex, int, 0);
 module_param(fp, int, 0);
 module_param(crt, int, 0);
 
-
 static int chip3D;
 static int chipcyber;
 
 static int is3Dchip(int id)
 {
-	return 	((id == BLADE3D) || (id == CYBERBLADEE4) ||
-		 (id == CYBERBLADEi7) || (id == CYBERBLADEi7D) ||
-		 (id == CYBER9397) || (id == CYBER9397DVD) ||
-		 (id == CYBER9520) || (id == CYBER9525DVD) ||
-		 (id == IMAGE975) || (id == IMAGE985) ||
-		 (id == CYBERBLADEi1) || (id == CYBERBLADEi1D) ||
-		 (id ==	CYBERBLADEAi1) || (id == CYBERBLADEAi1D) ||
-		 (id ==	CYBERBLADEXPm8) || (id == CYBERBLADEXPm16) ||
-		 (id ==	CYBERBLADEXPAi1));
+	return ((id == BLADE3D) || (id == CYBERBLADEE4) ||
+		(id == CYBERBLADEi7) || (id == CYBERBLADEi7D) ||
+		(id == CYBER9397) || (id == CYBER9397DVD) ||
+		(id == CYBER9520) || (id == CYBER9525DVD) ||
+		(id == IMAGE975) || (id == IMAGE985) ||
+		(id == CYBERBLADEi1) || (id == CYBERBLADEi1D) ||
+		(id == CYBERBLADEAi1) || (id == CYBERBLADEAi1D) ||
+		(id == CYBERBLADEXPm8) || (id == CYBERBLADEXPm16) ||
+		(id == CYBERBLADEXPAi1));
 }
 
 static int iscyber(int id)
 {
 	switch (id) {
-		case CYBER9388:		
-		case CYBER9382:
-		case CYBER9385:
-		case CYBER9397:
-		case CYBER9397DVD:
-		case CYBER9520:
-		case CYBER9525DVD:
-		case CYBERBLADEE4:
-		case CYBERBLADEi7D:
-		case CYBERBLADEi1:
-		case CYBERBLADEi1D: 
-		case CYBERBLADEAi1: 
-		case CYBERBLADEAi1D:
-		case CYBERBLADEXPAi1:
-			return 1;
-		
-		case CYBER9320:
-		case TGUI9660:     
-		case IMAGE975:
-		case IMAGE985:
-		case BLADE3D:
-		case CYBERBLADEi7: /* VIA MPV4 integrated version */
+	case CYBER9388:
+	case CYBER9382:
+	case CYBER9385:
+	case CYBER9397:
+	case CYBER9397DVD:
+	case CYBER9520:
+	case CYBER9525DVD:
+	case CYBERBLADEE4:
+	case CYBERBLADEi7D:
+	case CYBERBLADEi1:
+	case CYBERBLADEi1D:
+	case CYBERBLADEAi1:
+	case CYBERBLADEAi1D:
+	case CYBERBLADEXPAi1:
+		return 1;
 
-		default:
-			/* case CYBERBLDAEXPm8:	 Strange */
-			/* case CYBERBLDAEXPm16: Strange */
-			return 0;
+	case CYBER9320:
+	case TGUI9660:
+	case IMAGE975:
+	case IMAGE985:
+	case BLADE3D:
+	case CYBERBLADEi7:	/* VIA MPV4 integrated version */
+
+	default:
+		/* case CYBERBLDAEXPm8:  Strange */
+		/* case CYBERBLDAEXPm16: Strange */
+		return 0;
 	}
 }
 
-#define CRT 0x3D0		//CRTC registers offset for color display
+#define CRT 0x3D0		/* CRTC registers offset for color display */
 
 #ifndef TRIDENT_MMIO
 	#define TRIDENT_MMIO 1
 #endif
 
 #if TRIDENT_MMIO
-	#define t_outb(val,reg)	writeb(val,((struct tridentfb_par *)(fb_info.par))->io_virt + reg)
+	#define t_outb(val, reg)	writeb(val,((struct tridentfb_par *)(fb_info.par))->io_virt + reg)
 	#define t_inb(reg)	readb(((struct tridentfb_par*)(fb_info.par))->io_virt + reg)
 #else
-	#define t_outb(val,reg) outb(val,reg)
+	#define t_outb(val, reg) outb(val, reg)
 	#define t_inb(reg) inb(reg)
 #endif
 
 
 static struct accel_switch {
-	void (*init_accel)(int,int);
-	void (*wait_engine)(void);
-	void (*fill_rect)(__u32,__u32,__u32,__u32,__u32,__u32);
-	void (*copy_rect)(__u32,__u32,__u32,__u32,__u32,__u32);
+	void (*init_accel) (int, int);
+	void (*wait_engine) (void);
+	void (*fill_rect) (u32, u32, u32, u32, u32, u32);
+	void (*copy_rect) (u32, u32, u32, u32, u32, u32);
 } *acc;
 
-#define writemmr(r,v)	writel(v, ((struct tridentfb_par *)fb_info.par)->io_virt + r)
+#define writemmr(r, v)	writel(v, ((struct tridentfb_par *)fb_info.par)->io_virt + r)
 #define readmmr(r)	readl(((struct tridentfb_par *)fb_info.par)->io_virt + r)
 
-
-
 /*
  * Blade specific acceleration.
  */
 
-#define point(x,y) ((y)<<16|(x))
+#define point(x, y) ((y) << 16 | (x))
 #define STA	0x2120
 #define CMD	0x2144
 #define ROP	0x2148
@@ -179,64 +174,71 @@ static struct accel_switch {
 
 #define ROP_S	0xCC
 
-static void blade_init_accel(int pitch,int bpp)
+static void blade_init_accel(int pitch, int bpp)
 {
-	int v1 = (pitch>>3)<<20;
-	int tmp = 0,v2;
+	int v1 = (pitch >> 3) << 20;
+	int tmp = 0, v2;
 	switch (bpp) {
-		case 8:tmp = 0;break;
-		case 15:tmp = 5;break;
-		case 16:tmp = 1;break;
-		case 24:
-		case 32:tmp = 2;break;
+	case 8:
+		tmp = 0;
+		break;
+	case 15:
+		tmp = 5;
+		break;
+	case 16:
+		tmp = 1;
+		break;
+	case 24:
+	case 32:
+		tmp = 2;
+		break;
 	}
-	v2 = v1 | (tmp<<29);
-	writemmr(0x21C0,v2);
-	writemmr(0x21C4,v2);
-	writemmr(0x21B8,v2);
-	writemmr(0x21BC,v2);
-	writemmr(0x21D0,v1);
-	writemmr(0x21D4,v1);
-	writemmr(0x21C8,v1);
-	writemmr(0x21CC,v1);
-	writemmr(0x216C,0);
+	v2 = v1 | (tmp << 29);
+	writemmr(0x21C0, v2);
+	writemmr(0x21C4, v2);
+	writemmr(0x21B8, v2);
+	writemmr(0x21BC, v2);
+	writemmr(0x21D0, v1);
+	writemmr(0x21D4, v1);
+	writemmr(0x21C8, v1);
+	writemmr(0x21CC, v1);
+	writemmr(0x216C, 0);
 }
 
 static void blade_wait_engine(void)
 {
-	while(readmmr(STA) & 0xFA800000);
+	while (readmmr(STA) & 0xFA800000) ;
 }
 
-static void blade_fill_rect(__u32 x,__u32 y,__u32 w,__u32 h,__u32 c,__u32 rop)
+static void blade_fill_rect(u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
 {
-	writemmr(CLR,c);
-	writemmr(ROP,rop ? 0x66:ROP_S);
-	writemmr(CMD,0x20000000|1<<19|1<<4|2<<2);
+	writemmr(CLR, c);
+	writemmr(ROP, rop ? 0x66 : ROP_S);
+	writemmr(CMD, 0x20000000 | 1 << 19 | 1 << 4 | 2 << 2);
 
-	writemmr(DR1,point(x,y));
-	writemmr(DR2,point(x+w-1,y+h-1));
+	writemmr(DR1, point(x, y));
+	writemmr(DR2, point(x + w - 1, y + h - 1));
 }
 
-static void blade_copy_rect(__u32 x1,__u32 y1,__u32 x2,__u32 y2,__u32 w,__u32 h)
+static void blade_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
-	__u32 s1,s2,d1,d2;
+	u32 s1, s2, d1, d2;
 	int direction = 2;
-	s1 = point(x1,y1);
-	s2 = point(x1+w-1,y1+h-1);
-	d1 = point(x2,y2);
-	d2 = point(x2+w-1,y2+h-1);
+	s1 = point(x1, y1);
+	s2 = point(x1 + w - 1, y1 + h - 1);
+	d1 = point(x2, y2);
+	d2 = point(x2 + w - 1, y2 + h - 1);
 
 	if ((y1 > y2) || ((y1 == y2) && (x1 > x2)))
-			direction = 0;
-
+		direction = 0;
 
-	writemmr(ROP,ROP_S);
-	writemmr(CMD,0xE0000000|1<<19|1<<4|1<<2|direction);
+	writemmr(ROP, ROP_S);
+	writemmr(CMD, 0xE0000000 | 1 << 19 | 1 << 4 | 1 << 2 | direction);
 
-	writemmr(SR1,direction?s2:s1);
-	writemmr(SR2,direction?s1:s2);
-	writemmr(DR1,direction?d2:d1);
-	writemmr(DR2,direction?d1:d2);
+	writemmr(SR1, direction ? s2 : s1);
+	writemmr(SR2, direction ? s1 : s2);
+	writemmr(DR1, direction ? d2 : d1);
+	writemmr(DR2, direction ? d1 : d2);
 }
 
 static struct accel_switch accel_blade = {
@@ -246,51 +248,72 @@ static struct accel_switch accel_blade = {
 	blade_copy_rect,
 };
 
-
 /*
  * BladeXP specific acceleration functions
  */
 
 #define ROP_P 0xF0
-#define masked_point(x,y) ((y & 0xffff)<<16|(x & 0xffff))
+#define masked_point(x, y) ((y & 0xffff)<<16|(x & 0xffff))
 
-static void xp_init_accel(int pitch,int bpp)
+static void xp_init_accel(int pitch, int bpp)
 {
-	int tmp = 0,v1;
+	int tmp = 0, v1;
 	unsigned char x = 0;
 
 	switch (bpp) {
-		case 8:  x = 0; break;
-		case 16: x = 1; break;
-		case 24: x = 3; break;
-		case 32: x = 2; break;
+	case 8:
+		x = 0;
+		break;
+	case 16:
+		x = 1;
+		break;
+	case 24:
+		x = 3;
+		break;
+	case 32:
+		x = 2;
+		break;
 	}
 
 	switch (pitch << (bpp >> 3)) {
-		case 8192:
-		case 512:  x |= 0x00; break;
-		case 1024: x |= 0x04; break;
-		case 2048: x |= 0x08; break;
-		case 4096: x |= 0x0C; break;
+	case 8192:
+	case 512:
+		x |= 0x00;
+		break;
+	case 1024:
+		x |= 0x04;
+		break;
+	case 2048:
+		x |= 0x08;
+		break;
+	case 4096:
+		x |= 0x0C;
+		break;
 	}
 
-	t_outb(x,0x2125);
+	t_outb(x, 0x2125);
 
 	eng_oper = x | 0x40;
 
 	switch (bpp) {
-		case 8:  tmp = 18; break;
-		case 15:
-		case 16: tmp = 19; break;
-		case 24:
-		case 32: tmp = 20; break;
+	case 8:
+		tmp = 18;
+		break;
+	case 15:
+	case 16:
+		tmp = 19;
+		break;
+	case 24:
+	case 32:
+		tmp = 20;
+		break;
 	}
 
 	v1 = pitch << tmp;
 
-	writemmr(0x2154,v1);
-	writemmr(0x2150,v1);
-	t_outb(3,0x2126);
+	writemmr(0x2154, v1);
+	writemmr(0x2150, v1);
+	t_outb(3, 0x2126);
 }
 
 static void xp_wait_engine(void)
@@ -318,24 +341,24 @@ static void xp_wait_engine(void)
 	}
 }
 
-static void xp_fill_rect(__u32 x,__u32 y,__u32 w,__u32 h,__u32 c,__u32 rop)
+static void xp_fill_rect(u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
 {
-	writemmr(0x2127,ROP_P);
-	writemmr(0x2158,c);
-	writemmr(0x2128,0x4000);
-	writemmr(0x2140,masked_point(h,w));
-	writemmr(0x2138,masked_point(y,x));
-	t_outb(0x01,0x2124);
-        t_outb(eng_oper,0x2125);
+	writemmr(0x2127, ROP_P);
+	writemmr(0x2158, c);
+	writemmr(0x2128, 0x4000);
+	writemmr(0x2140, masked_point(h, w));
+	writemmr(0x2138, masked_point(y, x));
+	t_outb(0x01, 0x2124);
+	t_outb(eng_oper, 0x2125);
 }
 
-static void xp_copy_rect(__u32 x1,__u32 y1,__u32 x2,__u32 y2,__u32 w,__u32 h)
+static void xp_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
 	int direction;
-	__u32 x1_tmp, x2_tmp, y1_tmp, y2_tmp;
+	u32 x1_tmp, x2_tmp, y1_tmp, y2_tmp;
 
 	direction = 0x0004;
-	
+
 	if ((x1 < x2) && (y1 == y2)) {
 		direction |= 0x0200;
 		x1_tmp = x1 + w - 1;
@@ -344,53 +367,60 @@ static void xp_copy_rect(__u32 x1,__u32 y1,__u32 x2,__u32 y2,__u32 w,__u32 h)
 		x1_tmp = x1;
 		x2_tmp = x2;
 	}
-  
+
 	if (y1 < y2) {
 		direction |= 0x0100;
 		y1_tmp = y1 + h - 1;
 		y2_tmp = y2 + h - 1;
-  	} else {
+	} else {
 		y1_tmp = y1;
 		y2_tmp = y2;
 	}
 
-	writemmr(0x2128,direction);	
-	t_outb(ROP_S,0x2127);
-	writemmr(0x213C,masked_point(y1_tmp,x1_tmp));
-	writemmr(0x2138,masked_point(y2_tmp,x2_tmp));
-	writemmr(0x2140,masked_point(h,w));
-	t_outb(0x01,0x2124);
+	writemmr(0x2128, direction);
+	t_outb(ROP_S, 0x2127);
+	writemmr(0x213C, masked_point(y1_tmp, x1_tmp));
+	writemmr(0x2138, masked_point(y2_tmp, x2_tmp));
+	writemmr(0x2140, masked_point(h, w));
+	t_outb(0x01, 0x2124);
 }
 
 static struct accel_switch accel_xp = {
-  	xp_init_accel,
+	xp_init_accel,
 	xp_wait_engine,
 	xp_fill_rect,
 	xp_copy_rect,
 };
 
-
 /*
  * Image specific acceleration functions
  */
-static void image_init_accel(int pitch,int bpp)
+static void image_init_accel(int pitch, int bpp)
 {
 	int tmp = 0;
-   	switch (bpp) {
-		case 8:tmp = 0;break;
-		case 15:tmp = 5;break;
-		case 16:tmp = 1;break;
-		case 24:
-		case 32:tmp = 2;break;
+	switch (bpp) {
+	case 8:
+		tmp = 0;
+		break;
+	case 15:
+		tmp = 5;
+		break;
+	case 16:
+		tmp = 1;
+		break;
+	case 24:
+	case 32:
+		tmp = 2;
+		break;
 	}
 	writemmr(0x2120, 0xF0000000);
-	writemmr(0x2120, 0x40000000|tmp);
+	writemmr(0x2120, 0x40000000 | tmp);
 	writemmr(0x2120, 0x80000000);
 	writemmr(0x2144, 0x00000000);
 	writemmr(0x2148, 0x00000000);
 	writemmr(0x2150, 0x00000000);
 	writemmr(0x2154, 0x00000000);
-	writemmr(0x2120, 0x60000000|(pitch<<16) |pitch);
+	writemmr(0x2120, 0x60000000 | (pitch << 16) | pitch);
 	writemmr(0x216C, 0x00000000);
 	writemmr(0x2170, 0x00000000);
 	writemmr(0x217C, 0x00000000);
@@ -400,44 +430,43 @@ static void image_init_accel(int pitch,int bpp)
 
 static void image_wait_engine(void)
 {
-	while(readmmr(0x2164) & 0xF0000000);
+	while (readmmr(0x2164) & 0xF0000000) ;
 }
 
-static void image_fill_rect(__u32 x, __u32 y, __u32 w, __u32 h, __u32 c, __u32 rop)
+static void image_fill_rect(u32 x, u32 y, u32 w, u32 h, u32 c, u32 rop)
 {
-	writemmr(0x2120,0x80000000);
-	writemmr(0x2120,0x90000000|ROP_S);
+	writemmr(0x2120, 0x80000000);
+	writemmr(0x2120, 0x90000000 | ROP_S);
 
-	writemmr(0x2144,c);
+	writemmr(0x2144, c);
 
-	writemmr(DR1,point(x,y));
-	writemmr(DR2,point(x+w-1,y+h-1));
+	writemmr(DR1, point(x, y));
+	writemmr(DR2, point(x + w - 1, y + h - 1));
 
-	writemmr(0x2124,0x80000000|3<<22|1<<10|1<<9);
+	writemmr(0x2124, 0x80000000 | 3 << 22 | 1 << 10 | 1 << 9);
 }
 
-static void image_copy_rect(__u32 x1,__u32 y1,__u32 x2,__u32 y2,__u32 w,__u32 h)
+static void image_copy_rect(u32 x1, u32 y1, u32 x2, u32 y2, u32 w, u32 h)
 {
-	__u32 s1,s2,d1,d2;
+	u32 s1, s2, d1, d2;
 	int direction = 2;
-	s1 = point(x1,y1);
-	s2 = point(x1+w-1,y1+h-1);
-	d1 = point(x2,y2);
-	d2 = point(x2+w-1,y2+h-1);
-
-	if ((y1 > y2) || ((y1 == y2) && (x1 >x2)))
-			direction = 0;
-
-	writemmr(0x2120,0x80000000);
-	writemmr(0x2120,0x90000000|ROP_S);
-
-	writemmr(SR1,direction?s2:s1);
-	writemmr(SR2,direction?s1:s2);
-	writemmr(DR1,direction?d2:d1);
-	writemmr(DR2,direction?d1:d2);
-	writemmr(0x2124,0x80000000|1<<22|1<<10|1<<7|direction);
-}
+	s1 = point(x1, y1);
+	s2 = point(x1 + w - 1, y1 + h - 1);
+	d1 = point(x2, y2);
+	d2 = point(x2 + w - 1, y2 + h - 1);
 
+	if ((y1 > y2) || ((y1 == y2) && (x1 > x2)))
+		direction = 0;
+
+	writemmr(0x2120, 0x80000000);
+	writemmr(0x2120, 0x90000000 | ROP_S);
+
+	writemmr(SR1, direction ? s2 : s1);
+	writemmr(SR2, direction ? s1 : s2);
+	writemmr(DR1, direction ? d2 : d1);
+	writemmr(DR2, direction ? d1 : d2);
+	writemmr(0x2124, 0x80000000 | 1 << 22 | 1 << 10 | 1 << 7 | direction);
+}
 
 static struct accel_switch accel_image = {
 	image_init_accel,
@@ -450,30 +479,34 @@ static struct accel_switch accel_image = {
  * Accel functions called by the upper layers
  */
 #ifdef CONFIG_FB_TRIDENT_ACCEL
-static void tridentfb_fillrect(struct fb_info * info, const struct fb_fillrect *fr)
+static void tridentfb_fillrect(struct fb_info *info,
+			       const struct fb_fillrect *fr)
 {
 	int bpp = info->var.bits_per_pixel;
 	int col = 0;
-	
+
 	switch (bpp) {
-		default:
-		case 8: col |= fr->color;
-			col |= col << 8;
-			col |= col << 16;
-			break;
-		case 16: col = ((u32 *)(info->pseudo_palette))[fr->color];
-			
-			 break;
-		case 32: col = ((u32 *)(info->pseudo_palette))[fr->color];
-			 break;
-	}		 
-			
+	default:
+	case 8:
+		col |= fr->color;
+		col |= col << 8;
+		col |= col << 16;
+		break;
+	case 16:
+		col = ((u32 *)(info->pseudo_palette))[fr->color];
+		break;
+	case 32:
+		col = ((u32 *)(info->pseudo_palette))[fr->color];
+		break;
+	}
+
 	acc->fill_rect(fr->dx, fr->dy, fr->width, fr->height, col, fr->rop);
 	acc->wait_engine();
 }
-static void tridentfb_copyarea(struct fb_info *info, const struct fb_copyarea *ca)
+static void tridentfb_copyarea(struct fb_info *info,
+			       const struct fb_copyarea *ca)
 {
-	acc->copy_rect(ca->sx,ca->sy,ca->dx,ca->dy,ca->width,ca->height);
+	acc->copy_rect(ca->sx, ca->sy, ca->dx, ca->dy, ca->width, ca->height);
 	acc->wait_engine();
 }
 #else /* !CONFIG_FB_TRIDENT_ACCEL */
@@ -488,14 +521,14 @@ static void tridentfb_copyarea(struct fb_info *info, const struct fb_copyarea *c
 
 static inline unsigned char read3X4(int reg)
 {
-	struct tridentfb_par * par = (struct tridentfb_par *)fb_info.par;
+	struct tridentfb_par *par = (struct tridentfb_par *)fb_info.par;
 	writeb(reg, par->io_virt + CRT + 4);
-	return readb( par->io_virt + CRT + 5);
+	return readb(par->io_virt + CRT + 5);
 }
 
 static inline void write3X4(int reg, unsigned char val)
 {
-	struct tridentfb_par * par = (struct tridentfb_par *)fb_info.par;
+	struct tridentfb_par *par = (struct tridentfb_par *)fb_info.par;
 	writeb(reg, par->io_virt + CRT + 4);
 	writeb(val, par->io_virt + CRT + 5);
 }
@@ -520,7 +553,7 @@ static inline unsigned char read3CE(int reg)
 
 static inline void writeAttr(int reg, unsigned char val)
 {
-	readb(((struct tridentfb_par *)fb_info.par)->io_virt + CRT + 0x0A);	//flip-flop to index
+	readb(((struct tridentfb_par *)fb_info.par)->io_virt + CRT + 0x0A);	/* flip-flop to index */
 	t_outb(reg, 0x3C0);
 	t_outb(val, 0x3C0);
 }
@@ -540,32 +573,41 @@ static inline void enable_mmio(void)
 	/* Unprotect registers */
 	outb(NewMode1, 0x3C4);
 	outb(0x80, 0x3C5);
-  
+
 	/* Enable MMIO */
-	outb(PCIReg, 0x3D4); 
+	outb(PCIReg, 0x3D4);
 	outb(inb(0x3D5) | 0x01, 0x3D5);
 }
 
-
 #define crtc_unlock()	write3X4(CRTVSyncEnd, read3X4(CRTVSyncEnd) & 0x7F)
 
 /*  Return flat panel's maximum x resolution */
 static int __devinit get_nativex(void)
 {
-	int x,y,tmp;
+	int x, y, tmp;
 
 	if (nativex)
 		return nativex;
 
-       	tmp = (read3CE(VertStretch) >> 4) & 3;
+	tmp = (read3CE(VertStretch) >> 4) & 3;
 
 	switch (tmp) {
-		case 0: x = 1280; y = 1024; break;
-		case 2: x = 1024; y = 768;  break;
-		case 3: x = 800;  y = 600;  break; 
-		case 4: x = 1400; y = 1050; break;
-		case 1: 
-		default:x = 640;  y = 480;  break;
+	case 0:
+		x = 1280; y = 1024;
+		break;
+	case 2:
+		x = 1024; y = 768;
+		break;
+	case 3:
+		x = 800; y = 600;
+		break;
+	case 4:
+		x = 1400; y = 1050;
+		break;
+	case 1:
+	default:
+		x = 640;  y = 480;
+		break;
 	}
 
 	output("%dx%d flat panel found\n", x, y);
@@ -576,25 +618,26 @@ static int __devinit get_nativex(void)
 static void set_lwidth(int width)
 {
 	write3X4(Offset, width & 0xFF);
-	write3X4(AddColReg, (read3X4(AddColReg) & 0xCF) | ((width & 0x300) >>4));
+	write3X4(AddColReg,
+		 (read3X4(AddColReg) & 0xCF) | ((width & 0x300) >> 4));
 }
 
 /* For resolutions smaller than FP resolution stretch */
 static void screen_stretch(void)
 {
-  	if (chip_id != CYBERBLADEXPAi1)
-  		write3CE(BiosReg,0);
-  	else
-  		write3CE(BiosReg,8);
-	write3CE(VertStretch,(read3CE(VertStretch) & 0x7C) | 1);
-	write3CE(HorStretch,(read3CE(HorStretch) & 0x7C) | 1);
+	if (chip_id != CYBERBLADEXPAi1)
+		write3CE(BiosReg, 0);
+	else
+		write3CE(BiosReg, 8);
+	write3CE(VertStretch, (read3CE(VertStretch) & 0x7C) | 1);
+	write3CE(HorStretch, (read3CE(HorStretch) & 0x7C) | 1);
 }
 
 /* For resolutions smaller than FP resolution center */
 static void screen_center(void)
 {
-	write3CE(VertStretch,(read3CE(VertStretch) & 0x7C) | 0x80);
-	write3CE(HorStretch,(read3CE(HorStretch) & 0x7C) | 0x80);
+	write3CE(VertStretch, (read3CE(VertStretch) & 0x7C) | 0x80);
+	write3CE(HorStretch, (read3CE(HorStretch) & 0x7C) | 0x80);
 }
 
 /* Address of first shown pixel in display memory */
@@ -602,40 +645,42 @@ static void set_screen_start(int base)
 {
 	write3X4(StartAddrLow, base & 0xFF);
 	write3X4(StartAddrHigh, (base & 0xFF00) >> 8);
-	write3X4(CRTCModuleTest, (read3X4(CRTCModuleTest) & 0xDF) | ((base & 0x10000) >> 11));
-	write3X4(CRTHiOrd, (read3X4(CRTHiOrd) & 0xF8) | ((base & 0xE0000) >> 17));
+	write3X4(CRTCModuleTest,
+		 (read3X4(CRTCModuleTest) & 0xDF) | ((base & 0x10000) >> 11));
+	write3X4(CRTHiOrd,
+		 (read3X4(CRTHiOrd) & 0xF8) | ((base & 0xE0000) >> 17));
 }
 
 /* Use 20.12 fixed-point for NTSC value and frequency calculation */
-#define calc_freq(n,m,k)  ( ((unsigned long)0xE517 * (n+8) / ((m+2)*(1<<k))) >> 12 )
+#define calc_freq(n, m, k)  ( ((unsigned long)0xE517 * (n + 8) / ((m + 2) * (1 << k))) >> 12 )
 
 /* Set dotclock frequency */
 static void set_vclk(int freq)
 {
-	int m,n,k;
-	int f,fi,d,di;
-	unsigned char lo=0,hi=0;
+	int m, n, k;
+	int f, fi, d, di;
+	unsigned char lo = 0, hi = 0;
 
 	d = 20;
-	for(k = 2;k>=0;k--)
-	for(m = 0;m<63;m++)
-	for(n = 0;n<128;n++) {
-		fi = calc_freq(n,m,k);
-		if ((di = abs(fi - freq)) < d) {
-			d = di;
-			f = fi;
-			lo = n;
-			hi = (k<<6) | m;
-		}
-	}
+	for (k = 2; k >= 0; k--)
+		for (m = 0; m < 63; m++)
+			for (n = 0; n < 128; n++) {
+				fi = calc_freq(n, m, k);
+				if ((di = abs(fi - freq)) < d) {
+					d = di;
+					f = fi;
+					lo = n;
+					hi = (k << 6) | m;
+				}
+			}
 	if (chip3D) {
-		write3C4(ClockHigh,hi);
-		write3C4(ClockLow,lo);
+		write3C4(ClockHigh, hi);
+		write3C4(ClockLow, lo);
 	} else {
-		outb(lo,0x43C8);
-		outb(hi,0x43C9);
+		outb(lo, 0x43C8);
+		outb(hi, 0x43C9);
 	}
-	debug("VCLK = %X %X\n",hi,lo);
+	debug("VCLK = %X %X\n", hi, lo);
 }
 
 /* Set number of lines for flat panels*/
@@ -663,7 +708,7 @@ static unsigned int __devinit get_displaytype(void)
 		return DISPLAY_FP;
 	if (crt || !chipcyber)
 		return DISPLAY_CRT;
-	return (read3CE(FPConfig) & 0x10)?DISPLAY_FP:DISPLAY_CRT;
+	return (read3CE(FPConfig) & 0x10) ? DISPLAY_FP : DISPLAY_CRT;
 }
 
 /* Try detecting the video memory size */
@@ -676,100 +721,136 @@ static unsigned int __devinit get_memsize(void)
 	if (memsize)
 		k = memsize * Kb;
 	else
-	switch (chip_id) {
-		case CYBER9525DVD:    k = 2560 * Kb; break;
+		switch (chip_id) {
+		case CYBER9525DVD:
+			k = 2560 * Kb;
+			break;
 		default:
 			tmp = read3X4(SPR) & 0x0F;
 			switch (tmp) {
 
-				case 0x01: k = 512;     break;
-				case 0x02: k = 6 * Mb;  break; /* XP */
-				case 0x03: k = 1 * Mb;  break;
-				case 0x04: k = 8 * Mb;  break;
-				case 0x06: k = 10 * Mb; break; /* XP */
-				case 0x07: k = 2 * Mb;  break;
-				case 0x08: k = 12 * Mb; break; /* XP */
-				case 0x0A: k = 14 * Mb; break; /* XP */
-				case 0x0C: k = 16 * Mb; break; /* XP */
-				case 0x0E:                     /* XP */
-  
-					tmp2 = read3C4(0xC1);
-					switch (tmp2) {
-						case 0x00: k = 20 * Mb; break;
-						case 0x01: k = 24 * Mb; break;
-						case 0x10: k = 28 * Mb; break;
-						case 0x11: k = 32 * Mb; break;
-						default:   k = 1 * Mb;  break;
-					}
+			case 0x01:
+				k = 512;
+				break;
+			case 0x02:
+				k = 6 * Mb;	/* XP */
+				break;
+			case 0x03:
+				k = 1 * Mb;
+				break;
+			case 0x04:
+				k = 8 * Mb;
+				break;
+			case 0x06:
+				k = 10 * Mb;	/* XP */
+				break;
+			case 0x07:
+				k = 2 * Mb;
+				break;
+			case 0x08:
+				k = 12 * Mb;	/* XP */
+				break;
+			case 0x0A:
+				k = 14 * Mb;	/* XP */
+				break;
+			case 0x0C:
+				k = 16 * Mb;	/* XP */
+				break;
+			case 0x0E:		/* XP */
+
+				tmp2 = read3C4(0xC1);
+				switch (tmp2) {
+				case 0x00:
+					k = 20 * Mb;
+					break;
+				case 0x01:
+					k = 24 * Mb;
+					break;
+				case 0x10:
+					k = 28 * Mb;
+					break;
+				case 0x11:
+					k = 32 * Mb;
+					break;
+				default:
+					k = 1 * Mb;
+					break;
+				}
+				break;
+
+			case 0x0F:
+				k = 4 * Mb;
+				break;
+			default:
+				k = 1 * Mb;
 				break;
-	
-				case 0x0F: k = 4 * Mb; break;
-				default:   k = 1 * Mb;
 			}
-	}
+		}
 
 	k -= memdiff * Kb;
-	output("framebuffer size = %d Kb\n", k/Kb);
+	output("framebuffer size = %d Kb\n", k / Kb);
 	return k;
 }
 
 /* See if we can handle the video mode described in var */
-static int tridentfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+static int tridentfb_check_var(struct fb_var_screeninfo *var,
+			       struct fb_info *info)
 {
 	int bpp = var->bits_per_pixel;
 	debug("enter\n");
 
 	/* check color depth */
-	if (bpp == 24 )
+	if (bpp == 24)
 		bpp = var->bits_per_pixel = 32;
-	/* check whether resolution fits on panel and in memory*/
+	/* check whether resolution fits on panel and in memory */
 	if (flatpanel && nativex && var->xres > nativex)
 		return -EINVAL;
-	if (var->xres * var->yres_virtual * bpp/8 > info->fix.smem_len)
+	if (var->xres * var->yres_virtual * bpp / 8 > info->fix.smem_len)
 		return -EINVAL;
 
 	switch (bpp) {
-		case 8:
-			var->red.offset = 0;
-			var->green.offset = 0;
-			var->blue.offset = 0;
-			var->red.length = 6;
-			var->green.length = 6;
-			var->blue.length = 6;
-			break;
-		case 16:
-			var->red.offset = 11;
-			var->green.offset = 5;
-			var->blue.offset = 0;
-			var->red.length = 5;
-			var->green.length = 6;
-			var->blue.length = 5;
-			break;
-		case 32:
-			var->red.offset = 16;
-			var->green.offset = 8;
-			var->blue.offset = 0;
-			var->red.length = 8;
-			var->green.length = 8;
-			var->blue.length = 8;
-			break;
-		default:
-			return -EINVAL;	
+	case 8:
+		var->red.offset = 0;
+		var->green.offset = 0;
+		var->blue.offset = 0;
+		var->red.length = 6;
+		var->green.length = 6;
+		var->blue.length = 6;
+		break;
+	case 16:
+		var->red.offset = 11;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		break;
+	default:
+		return -EINVAL;
 	}
 	debug("exit\n");
 
 	return 0;
 
 }
+
 /* Pan the display */
 static int tridentfb_pan_display(struct fb_var_screeninfo *var,
-				   struct fb_info *info)
+				 struct fb_info *info)
 {
 	unsigned int offset;
 
 	debug("enter\n");
 	offset = (var->xoffset + (var->yoffset * var->xres))
-			* var->bits_per_pixel/32;
+		* var->bits_per_pixel / 32;
 	info->var.xoffset = var->xoffset;
 	info->var.yoffset = var->yoffset;
 	set_screen_start(offset);
@@ -777,36 +858,38 @@ static int tridentfb_pan_display(struct fb_var_screeninfo *var,
 	return 0;
 }
 
-#define shadowmode_on()  write3CE(CyberControl,read3CE(CyberControl) | 0x81)
-#define shadowmode_off() write3CE(CyberControl,read3CE(CyberControl) & 0x7E)
+#define shadowmode_on()  write3CE(CyberControl, read3CE(CyberControl) | 0x81)
+#define shadowmode_off() write3CE(CyberControl, read3CE(CyberControl) & 0x7E)
 
 /* Set the hardware to the requested video mode */
 static int tridentfb_set_par(struct fb_info *info)
 {
-	struct tridentfb_par * par = (struct tridentfb_par *)(info->par);
-	u32	htotal,hdispend,hsyncstart,hsyncend,hblankstart,hblankend,
-		vtotal,vdispend,vsyncstart,vsyncend,vblankstart,vblankend;
-	struct fb_var_screeninfo *var = &info->var;	
+	struct tridentfb_par *par = (struct tridentfb_par *)(info->par);
+	u32 htotal, hdispend, hsyncstart, hsyncend, hblankstart, hblankend;
+	u32 vtotal, vdispend, vsyncstart, vsyncend, vblankstart, vblankend;
+	struct fb_var_screeninfo *var = &info->var;
 	int bpp = var->bits_per_pixel;
 	unsigned char tmp;
 	debug("enter\n");
-	htotal = (var->xres + var->left_margin + var->right_margin + var->hsync_len)/8 - 10;
-	hdispend = var->xres/8 - 1;
-	hsyncstart = (var->xres + var->right_margin)/8;
-	hsyncend = var->hsync_len/8;
+	hdispend = var->xres / 8 - 1;
+	hsyncstart = (var->xres + var->right_margin) / 8;
+	hsyncend = var->hsync_len / 8;
+	htotal =
+		(var->xres + var->left_margin + var->right_margin +
+		 var->hsync_len) / 8 - 10;
 	hblankstart = hdispend + 1;
 	hblankend = htotal + 5;
 
-	vtotal = var->yres + var->upper_margin + var->lower_margin + var->vsync_len - 2;
 	vdispend = var->yres - 1;
 	vsyncstart = var->yres + var->lower_margin;
 	vsyncend = var->vsync_len;
+	vtotal = var->upper_margin + vsyncstart + vsyncend - 2;
 	vblankstart = var->yres;
 	vblankend = vtotal + 2;
 
 	enable_mmio();
 	crtc_unlock();
-	write3CE(CyberControl,8);
+	write3CE(CyberControl, 8);
 
 	if (flatpanel && var->xres < nativex) {
 		/*
@@ -814,18 +897,18 @@ static int tridentfb_set_par(struct fb_info *info)
 		 * than requested resolution decide whether
 		 * we stretch or center
 		 */
-		t_outb(0xEB,0x3C2);
+		t_outb(0xEB, 0x3C2);
 
 		shadowmode_on();
 
-		if (center) 
+		if (center)
 			screen_center();
 		else if (stretch)
 			screen_stretch();
 
 	} else {
-		t_outb(0x2B,0x3C2);
-		write3CE(CyberControl,8);
+		t_outb(0x2B, 0x3C2);
+		write3CE(CyberControl, 8);
 	}
 
 	/* vertical timing values */
@@ -834,15 +917,15 @@ static int tridentfb_set_par(struct fb_info *info)
 	write3X4(CRTVSyncStart, vsyncstart & 0xFF);
 	write3X4(CRTVSyncEnd, (vsyncend & 0x0F));
 	write3X4(CRTVBlankStart, vblankstart & 0xFF);
-	write3X4(CRTVBlankEnd, 0/*p->vblankend & 0xFF*/);
+	write3X4(CRTVBlankEnd, 0 /* p->vblankend & 0xFF */ );
 
 	/* horizontal timing values */
 	write3X4(CRTHTotal, htotal & 0xFF);
 	write3X4(CRTHDispEnd, hdispend & 0xFF);
 	write3X4(CRTHSyncStart, hsyncstart & 0xFF);
-	write3X4(CRTHSyncEnd, (hsyncend & 0x1F) | ((hblankend & 0x20)<<2));
+	write3X4(CRTHSyncEnd, (hsyncend & 0x1F) | ((hblankend & 0x20) << 2));
 	write3X4(CRTHBlankStart, hblankstart & 0xFF);
-	write3X4(CRTHBlankEnd, 0/*(p->hblankend & 0x1F)*/);
+	write3X4(CRTHBlankEnd, 0 /* (p->hblankend & 0x1F) */ );
 
 	/* higher bits of vertical timing values */
 	tmp = 0x10;
@@ -856,7 +939,7 @@ static int tridentfb_set_par(struct fb_info *info)
 	if (vsyncstart & 0x200) tmp |= 0x80;
 	write3X4(CRTOverflow, tmp);
 
-	tmp = read3X4(CRTHiOrd) | 0x08;	//line compare bit 10
+	tmp = read3X4(CRTHiOrd) | 0x08;	/* line compare bit 10 */
 	if (vtotal & 0x400) tmp |= 0x80;
 	if (vblankstart & 0x400) tmp |= 0x40;
 	if (vsyncstart & 0x400) tmp |= 0x20;
@@ -867,84 +950,100 @@ static int tridentfb_set_par(struct fb_info *info)
 	if (htotal & 0x800) tmp |= 0x800 >> 11;
 	if (hblankstart & 0x800) tmp |= 0x800 >> 7;
 	write3X4(HorizOverflow, tmp);
-	
+
 	tmp = 0x40;
 	if (vblankstart & 0x200) tmp |= 0x20;
-//FIXME	if (info->var.vmode & FB_VMODE_DOUBLE) tmp |= 0x80;  //double scan for 200 line modes
+//FIXME	if (info->var.vmode & FB_VMODE_DOUBLE) tmp |= 0x80;  /* double scan for 200 line modes */
 	write3X4(CRTMaxScanLine, tmp);
 
-	write3X4(CRTLineCompare,0xFF);
-	write3X4(CRTPRowScan,0);
-	write3X4(CRTModeControl,0xC3);
+	write3X4(CRTLineCompare, 0xFF);
+	write3X4(CRTPRowScan, 0);
+	write3X4(CRTModeControl, 0xC3);
 
-	write3X4(LinearAddReg,0x20);	//enable linear addressing
+	write3X4(LinearAddReg, 0x20);	/* enable linear addressing */
 
-	tmp = (info->var.vmode & FB_VMODE_INTERLACED) ? 0x84:0x80;
-	write3X4(CRTCModuleTest,tmp);	//enable access extended memory
+	tmp = (info->var.vmode & FB_VMODE_INTERLACED) ? 0x84 : 0x80;
+	write3X4(CRTCModuleTest, tmp);	/* enable access extended memory */
 
-	write3X4(GraphEngReg, 0x80);	//enable GE for text acceleration
+	write3X4(GraphEngReg, 0x80);	/* enable GE for text acceleration */
 
-#ifdef CONFIG_FB_TRIDENT_ACCEL	
-	acc->init_accel(info->var.xres,bpp);
+#ifdef CONFIG_FB_TRIDENT_ACCEL
+	acc->init_accel(info->var.xres, bpp);
 #endif
-	
+
 	switch (bpp) {
-		case 8:  tmp = 0x00; break;
-		case 16: tmp = 0x05; break;
-		case 24: tmp = 0x29; break;
-		case 32: tmp = 0x09; 
+	case 8:
+		tmp = 0x00;
+		break;
+	case 16:
+		tmp = 0x05;
+		break;
+	case 24:
+		tmp = 0x29;
+		break;
+	case 32:
+		tmp = 0x09;
+		break;
 	}
 
 	write3X4(PixelBusReg, tmp);
 
 	tmp = 0x10;
 	if (chipcyber)
-	    tmp |= 0x20;
-	write3X4(DRAMControl, tmp);	//both IO,linear enable
+		tmp |= 0x20;
+	write3X4(DRAMControl, tmp);	/* both IO, linear enable */
 
 	write3X4(InterfaceSel, read3X4(InterfaceSel) | 0x40);
-	write3X4(Performance,0x92);
-	write3X4(PCIReg,0x07);		//MMIO & PCI read and write burst enable
+	write3X4(Performance, 0x92);
+	write3X4(PCIReg, 0x07);		/* MMIO & PCI read and write burst enable */
 
 	/* convert from picoseconds to MHz */
-	par->vclk = 1000000/info->var.pixclock;
+	par->vclk = 1000000 / info->var.pixclock;
 	if (bpp == 32)
-		par->vclk *=2;
+		par->vclk *= 2;
 	set_vclk(par->vclk);
 
-	write3C4(0,3);
-	write3C4(1,1);		//set char clock 8 dots wide
-	write3C4(2,0x0F);	//enable 4 maps because needed in chain4 mode
-	write3C4(3,0);
-	write3C4(4,0x0E);	//memory mode enable bitmaps ??
+	write3C4(0, 3);
+	write3C4(1, 1);		/* set char clock 8 dots wide */
+	write3C4(2, 0x0F);	/* enable 4 maps because needed in chain4 mode */
+	write3C4(3, 0);
+	write3C4(4, 0x0E);	/* memory mode enable bitmaps ?? */
 
-	write3CE(MiscExtFunc,(bpp==32)?0x1A:0x12);	//divide clock by 2 if 32bpp
-							//chain4 mode display and CPU path
-	write3CE(0x5,0x40);	//no CGA compat,allow 256 col
-	write3CE(0x6,0x05);	//graphics mode
-	write3CE(0x7,0x0F);	//planes?
+	write3CE(MiscExtFunc, (bpp == 32) ? 0x1A : 0x12);	/* divide clock by 2 if 32bpp */
+							/* chain4 mode display and CPU path */
+	write3CE(0x5, 0x40);	/* no CGA compat, allow 256 col */
+	write3CE(0x6, 0x05);	/* graphics mode */
+	write3CE(0x7, 0x0F);	/* planes? */
 
 	if (chip_id == CYBERBLADEXPAi1) {
 		/* This fixes snow-effect in 32 bpp */
-		write3X4(CRTHSyncStart,0x84);
+		write3X4(CRTHSyncStart, 0x84);
 	}
 
-	writeAttr(0x10,0x41);	//graphics mode and support 256 color modes
-	writeAttr(0x12,0x0F);	//planes
-	writeAttr(0x13,0);	//horizontal pel panning
+	writeAttr(0x10, 0x41);	/* graphics mode and support 256 color modes */
+	writeAttr(0x12, 0x0F);	/* planes */
+	writeAttr(0x13, 0);	/* horizontal pel panning */
 
-	//colors
-	for(tmp = 0;tmp < 0x10;tmp++)
-		writeAttr(tmp,tmp);
-	readb(par->io_virt + CRT + 0x0A);	//flip-flop to index
-	t_outb(0x20, 0x3C0);			//enable attr
+	/* colors */
+	for (tmp = 0; tmp < 0x10; tmp++)
+		writeAttr(tmp, tmp);
+	readb(par->io_virt + CRT + 0x0A);	/* flip-flop to index */
+	t_outb(0x20, 0x3C0);			/* enable attr */
 
 	switch (bpp) {
-		case 8:	tmp = 0;break;		//256 colors
-		case 15: tmp = 0x10;break;
-		case 16: tmp = 0x30;break;	//hicolor
-		case 24: 			//truecolor
-		case 32: tmp = 0xD0;break;
+	case 8:
+		tmp = 0;
+		break;
+	case 15:
+		tmp = 0x10;
+		break;
+	case 16:
+		tmp = 0x30;
+		break;
+	case 24:
+	case 32:
+		tmp = 0xD0;
+		break;
 	}
 
 	t_inb(0x3C8);
@@ -952,37 +1051,36 @@ static int tridentfb_set_par(struct fb_info *info)
 	t_inb(0x3C6);
 	t_inb(0x3C6);
 	t_inb(0x3C6);
-	t_outb(tmp,0x3C6);
+	t_outb(tmp, 0x3C6);
 	t_inb(0x3C8);
 
 	if (flatpanel)
 		set_number_of_lines(info->var.yres);
-	set_lwidth(info->var.xres * bpp/(4*16));
+	set_lwidth(info->var.xres * bpp / (4 * 16));
 	info->fix.visual = (bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
-	info->fix.line_length = info->var.xres * (bpp >> 3);  
-	info->cmap.len = (bpp == 8) ? 256: 16;
+	info->fix.line_length = info->var.xres * (bpp >> 3);
+	info->cmap.len = (bpp == 8) ? 256 : 16;
 	debug("exit\n");
 	return 0;
 }
 
 /* Set one color register */
 static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
-				 unsigned blue, unsigned transp,
-				 struct fb_info *info)
+			       unsigned blue, unsigned transp,
+			       struct fb_info *info)
 {
 	int bpp = info->var.bits_per_pixel;
 
 	if (regno >= info->cmap.len)
 		return 1;
 
-
 	if (bpp == 8) {
-		t_outb(0xFF,0x3C6);
-		t_outb(regno,0x3C8);
+		t_outb(0xFF, 0x3C6);
+		t_outb(regno, 0x3C8);
 
-		t_outb(red>>10,0x3C9);
-		t_outb(green>>10,0x3C9);
-		t_outb(blue>>10,0x3C9);
+		t_outb(red >> 10, 0x3C9);
+		t_outb(green >> 10, 0x3C9);
+		t_outb(blue >> 10, 0x3C9);
 
 	} else if (regno < 16) {
 		if (bpp == 16) {	/* RGB 565 */
@@ -994,29 +1092,28 @@ static int tridentfb_setcolreg(unsigned regno, unsigned red, unsigned green,
 			((u32 *)(info->pseudo_palette))[regno] = col;
 		} else if (bpp == 32)		/* ARGB 8888 */
 			((u32*)info->pseudo_palette)[regno] =
-				((transp & 0xFF00) <<16) 	|
-				((red & 0xFF00) << 8) 		|
+				((transp & 0xFF00) << 16)	|
+				((red & 0xFF00) << 8)		|
 				((green & 0xFF00))		|
-				((blue & 0xFF00)>>8);
+				((blue & 0xFF00) >> 8);
 	}
 
-//	debug("exit\n");
+/* 	debug("exit\n"); */
 	return 0;
 }
 
 /* Try blanking the screen.For flat panels it does nothing */
 static int tridentfb_blank(int blank_mode, struct fb_info *info)
 {
-	unsigned char PMCont,DPMSCont;
+	unsigned char PMCont, DPMSCont;
 
 	debug("enter\n");
 	if (flatpanel)
 		return 0;
-	t_outb(0x04,0x83C8); /* Read DPMS Control */
+	t_outb(0x04, 0x83C8); /* Read DPMS Control */
 	PMCont = t_inb(0x83C6) & 0xFC;
 	DPMSCont = read3CE(PowerStatus) & 0xFC;
-	switch (blank_mode)
-	{
+	switch (blank_mode) {
 	case FB_BLANK_UNBLANK:
 		/* Screen: On, HSync: On, VSync: On */
 	case FB_BLANK_NORMAL:
@@ -1039,11 +1136,11 @@ static int tridentfb_blank(int blank_mode, struct fb_info *info)
 		PMCont |= 0x00;
 		DPMSCont |= 0x03;
 		break;
-    	}
+	}
 
-	write3CE(PowerStatus,DPMSCont);
-	t_outb(4,0x83C8);
-	t_outb(PMCont,0x83C6);
+	write3CE(PowerStatus, DPMSCont);
+	t_outb(4, 0x83C8);
+	t_outb(PMCont, 0x83C6);
 
 	debug("exit\n");
 
@@ -1051,7 +1148,20 @@ static int tridentfb_blank(int blank_mode, struct fb_info *info)
 	return (blank_mode == FB_BLANK_NORMAL) ? 1 : 0;
 }
 
-static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_device_id * id)
+static struct fb_ops tridentfb_ops = {
+	.owner = THIS_MODULE,
+	.fb_setcolreg = tridentfb_setcolreg,
+	.fb_pan_display = tridentfb_pan_display,
+	.fb_blank = tridentfb_blank,
+	.fb_check_var = tridentfb_check_var,
+	.fb_set_par = tridentfb_set_par,
+	.fb_fillrect = tridentfb_fillrect,
+	.fb_copyarea = tridentfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+};
+
+static int __devinit trident_pci_probe(struct pci_dev * dev,
+				       const struct pci_device_id * id)
 {
 	int err;
 	unsigned char revision;
@@ -1062,31 +1172,42 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_de
 
 	chip_id = id->device;
 
-	if(chip_id == CYBERBLADEi1)
+	if (chip_id == CYBERBLADEi1)
 		output("*** Please do use cyblafb, Cyberblade/i1 support "
 		       "will soon be removed from tridentfb!\n");
 
 
 	/* If PCI id is 0x9660 then further detect chip type */
-	
+
 	if (chip_id == TGUI9660) {
-		outb(RevisionID,0x3C4);
-		revision = inb(0x3C5);	
-	
+		outb(RevisionID, 0x3C4);
+		revision = inb(0x3C5);
+
 		switch (revision) {
-			case 0x22:
-			case 0x23: chip_id = CYBER9397;break;
-			case 0x2A: chip_id = CYBER9397DVD;break;
-			case 0x30:
-			case 0x33:
-			case 0x34:
-			case 0x35:
-			case 0x38:
-			case 0x3A:
-			case 0xB3: chip_id = CYBER9385;break;
-			case 0x40 ... 0x43: chip_id = CYBER9382;break;
-			case 0x4A: chip_id = CYBER9388;break;
-			default:break;	
+		case 0x22:
+		case 0x23:
+			chip_id = CYBER9397;
+			break;
+		case 0x2A:
+			chip_id = CYBER9397DVD;
+			break;
+		case 0x30:
+		case 0x33:
+		case 0x34:
+		case 0x35:
+		case 0x38:
+		case 0x3A:
+		case 0xB3:
+			chip_id = CYBER9385;
+			break;
+		case 0x40 ... 0x43:
+			chip_id = CYBER9382;
+			break;
+		case 0x4A:
+			chip_id = CYBER9388;
+			break;
+		default:
+			break;
 		}
 	}
 
@@ -1095,8 +1216,7 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_de
 
 	if (is_xp(chip_id)) {
 		acc = &accel_xp;
-	} else 
-	if (is_blade(chip_id)) {
+	} else if (is_blade(chip_id)) {
 		acc = &accel_blade;
 	} else {
 		acc = &accel_image;
@@ -1108,8 +1228,8 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_de
 	fb_info.par = &default_par;
 
 	/* setup MMIO region */
-	tridentfb_fix.mmio_start = pci_resource_start(dev,1);
-	tridentfb_fix.mmio_len = chip3D ? 0x20000:0x10000;
+	tridentfb_fix.mmio_start = pci_resource_start(dev, 1);
+	tridentfb_fix.mmio_len = chip3D ? 0x20000 : 0x10000;
 
 	if (!request_mem_region(tridentfb_fix.mmio_start, tridentfb_fix.mmio_len, "tridentfb")) {
 		debug("request_region failed!\n");
@@ -1125,11 +1245,11 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_de
 	}
 
 	enable_mmio();
-	
+
 	/* setup framebuffer memory */
-	tridentfb_fix.smem_start = pci_resource_start(dev,0);
+	tridentfb_fix.smem_start = pci_resource_start(dev, 0);
 	tridentfb_fix.smem_len = get_memsize();
-	
+
 	if (!request_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len, "tridentfb")) {
 		debug("request_mem_region failed!\n");
 		err = -1;
@@ -1137,7 +1257,7 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_de
 	}
 
 	fb_info.screen_base = ioremap_nocache(tridentfb_fix.smem_start,
-		       			tridentfb_fix.smem_len);
+					      tridentfb_fix.smem_len);
 
 	if (!fb_info.screen_base) {
 		release_mem_region(tridentfb_fix.smem_start, tridentfb_fix.smem_len);
@@ -1147,13 +1267,13 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_de
 	}
 
 	output("%s board found\n", pci_name(dev));
-#if 0	
-	output("Trident board found : mem = %X,io = %X, mem_v = %X, io_v = %X\n",
+#if 0
+	output("Trident board found : mem = %X, io = %X, mem_v = %X, io_v = %X\n",
 		tridentfb_fix.smem_start, tridentfb_fix.mmio_start, fb_info.screen_base, default_par.io_virt);
 #endif
 	displaytype = get_displaytype();
 
-	if(flatpanel)
+	if (flatpanel)
 		nativex = get_nativex();
 
 	fb_info.fix = tridentfb_fix;
@@ -1166,11 +1286,11 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_de
 #endif
 	fb_info.pseudo_palette = pseudo_pal;
 
-	if (!fb_find_mode(&default_var,&fb_info,mode,NULL,0,NULL,bpp)) {
+	if (!fb_find_mode(&default_var, &fb_info, mode, NULL, 0, NULL, bpp)) {
 		err = -EINVAL;
 		goto out_unmap;
 	}
-	fb_alloc_cmap(&fb_info.cmap,256,0);
+	fb_alloc_cmap(&fb_info.cmap, 256, 0);
 	if (defaultaccel && acc)
 		default_var.accel_flags |= FB_ACCELF_TEXT;
 	else
@@ -1184,8 +1304,8 @@ static int __devinit trident_pci_probe(struct pci_dev * dev, const struct pci_de
 		goto out_unmap;
 	}
 	output("fb%d: %s frame buffer device %dx%d-%dbpp\n",
-	   fb_info.node, fb_info.fix.id,default_var.xres,
-	   default_var.yres,default_var.bits_per_pixel);
+	   fb_info.node, fb_info.fix.id, default_var.xres,
+	   default_var.yres, default_var.bits_per_pixel);
 	return 0;
 
 out_unmap:
@@ -1196,7 +1316,7 @@ out_unmap:
 	return err;
 }
 
-static void __devexit trident_pci_remove(struct pci_dev * dev)
+static void __devexit trident_pci_remove(struct pci_dev *dev)
 {
 	struct tridentfb_par *par = (struct tridentfb_par*)fb_info.par;
 	unregister_framebuffer(&fb_info);
@@ -1208,69 +1328,70 @@ static void __devexit trident_pci_remove(struct pci_dev * dev)
 
 /* List of boards that we are trying to support */
 static struct pci_device_id trident_devices[] = {
-	{PCI_VENDOR_ID_TRIDENT,	BLADE3D, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEi7, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEi7D, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEi1, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEi1D, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEAi1, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEAi1D, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEE4, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	TGUI9660, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	IMAGE975, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	IMAGE985, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBER9320, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBER9388, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBER9520, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBER9525DVD, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBER9397, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBER9397DVD, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEXPAi1, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEXPm8, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
-	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEXPm16, PCI_ANY_ID,PCI_ANY_ID,0,0,0},
+	{PCI_VENDOR_ID_TRIDENT,	BLADE3D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEi7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEi7D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEi1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEi1D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEAi1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEAi1D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEE4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	TGUI9660, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	IMAGE975, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	IMAGE985, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBER9320, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBER9388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBER9520, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBER9525DVD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBER9397, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBER9397DVD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEXPAi1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEXPm8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+	{PCI_VENDOR_ID_TRIDENT,	CYBERBLADEXPm16, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 	{0,}
-};	
-	
-MODULE_DEVICE_TABLE(pci,trident_devices); 
+};
+
+MODULE_DEVICE_TABLE(pci, trident_devices);
 
 static struct pci_driver tridentfb_pci_driver = {
-	.name		= "tridentfb",
-	.id_table	= trident_devices,
-	.probe		= trident_pci_probe,
-	.remove		= __devexit_p(trident_pci_remove)
+	.name = "tridentfb",
+	.id_table = trident_devices,
+	.probe = trident_pci_probe,
+	.remove = __devexit_p(trident_pci_remove)
 };
 
 /*
  * Parse user specified options (`video=trident:')
  * example:
- * 	video=trident:800x600,bpp=16,noaccel
+ *	video=trident:800x600,bpp=16,noaccel
  */
 #ifndef MODULE
 static int tridentfb_setup(char *options)
 {
-	char * opt;
+	char *opt;
 	if (!options || !*options)
 		return 0;
-	while((opt = strsep(&options,",")) != NULL ) {
-		if (!*opt) continue;
-		if (!strncmp(opt,"noaccel",7))
+	while ((opt = strsep(&options, ",")) != NULL) {
+		if (!*opt)
+			continue;
+		if (!strncmp(opt, "noaccel", 7))
 			noaccel = 1;
-		else if (!strncmp(opt,"fp",2))
+		else if (!strncmp(opt, "fp", 2))
 			displaytype = DISPLAY_FP;
-		else if (!strncmp(opt,"crt",3))
+		else if (!strncmp(opt, "crt", 3))
 			displaytype = DISPLAY_CRT;
-		else if (!strncmp(opt,"bpp=",4))
-			bpp = simple_strtoul(opt+4,NULL,0);
-		else if (!strncmp(opt,"center",6))
+		else if (!strncmp(opt, "bpp=", 4))
+			bpp = simple_strtoul(opt + 4, NULL, 0);
+		else if (!strncmp(opt, "center", 6))
 			center = 1;
-		else if (!strncmp(opt,"stretch",7))
+		else if (!strncmp(opt, "stretch", 7))
 			stretch = 1;
-		else if (!strncmp(opt,"memsize=",8))
-			memsize = simple_strtoul(opt+8,NULL,0);
-		else if (!strncmp(opt,"memdiff=",8))
-			memdiff = simple_strtoul(opt+8,NULL,0);
-		else if (!strncmp(opt,"nativex=",8))
-			nativex = simple_strtoul(opt+8,NULL,0);
+		else if (!strncmp(opt, "memsize=", 8))
+			memsize = simple_strtoul(opt + 8, NULL, 0);
+		else if (!strncmp(opt, "memdiff=", 8))
+			memdiff = simple_strtoul(opt + 8, NULL, 0);
+		else if (!strncmp(opt, "nativex=", 8))
+			nativex = simple_strtoul(opt + 8, NULL, 0);
 		else
 			mode = opt;
 	}
@@ -1296,18 +1417,6 @@ static void __exit tridentfb_exit(void)
 	pci_unregister_driver(&tridentfb_pci_driver);
 }
 
-static struct fb_ops tridentfb_ops = {
-	.owner	= THIS_MODULE,
-	.fb_setcolreg = tridentfb_setcolreg,
-	.fb_pan_display = tridentfb_pan_display,
-	.fb_blank = tridentfb_blank,
-	.fb_check_var = tridentfb_check_var,
-	.fb_set_par = tridentfb_set_par,
-	.fb_fillrect = tridentfb_fillrect,
-	.fb_copyarea= tridentfb_copyarea,
-	.fb_imageblit = cfb_imageblit,
-};
-
 module_init(tridentfb_init);
 module_exit(tridentfb_exit);
 
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
new file mode 100644
index 00000000000..b983d262ab7
--- /dev/null
+++ b/drivers/video/uvesafb.c
@@ -0,0 +1,2066 @@
+/*
+ * A framebuffer driver for VBE 2.0+ compliant video cards
+ *
+ * (c) 2007 Michal Januszewski <spock@gentoo.org>
+ *     Loosely based upon the vesafb driver.
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+#include <linux/completion.h>
+#include <linux/connector.h>
+#include <linux/random.h>
+#include <linux/platform_device.h>
+#include <linux/limits.h>
+#include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <video/edid.h>
+#include <video/uvesafb.h>
+#ifdef CONFIG_X86
+#include <video/vga.h>
+#endif
+#ifdef CONFIG_MTRR
+#include <asm/mtrr.h>
+#endif
+#include "edid.h"
+
+static struct cb_id uvesafb_cn_id = {
+	.idx = CN_IDX_V86D,
+	.val = CN_VAL_V86D_UVESAFB
+};
+static char v86d_path[PATH_MAX] = "/sbin/v86d";
+static char v86d_started;	/* has v86d been started by uvesafb? */
+
+static struct fb_fix_screeninfo uvesafb_fix __devinitdata = {
+	.id	= "VESA VGA",
+	.type	= FB_TYPE_PACKED_PIXELS,
+	.accel	= FB_ACCEL_NONE,
+	.visual = FB_VISUAL_TRUECOLOR,
+};
+
+static int mtrr		__devinitdata = 3; /* enable mtrr by default */
+static int blank	__devinitdata = 1; /* enable blanking by default */
+static int ypan		__devinitdata = 1; /* 0: scroll, 1: ypan, 2: ywrap */
+static int pmi_setpal	__devinitdata = 1; /* use PMI for palette changes */
+static int nocrtc	__devinitdata; /* ignore CRTC settings */
+static int noedid	__devinitdata; /* don't try DDC transfers */
+static int vram_remap	__devinitdata; /* set amt. of memory to be used */
+static int vram_total	__devinitdata; /* set total amount of memory */
+static u16 maxclk	__devinitdata; /* maximum pixel clock */
+static u16 maxvf	__devinitdata; /* maximum vertical frequency */
+static u16 maxhf	__devinitdata; /* maximum horizontal frequency */
+static u16 vbemode	__devinitdata; /* force use of a specific VBE mode */
+static char *mode_option __devinitdata;
+
+static struct uvesafb_ktask *uvfb_tasks[UVESAFB_TASKS_MAX];
+static DEFINE_MUTEX(uvfb_lock);
+
+/*
+ * A handler for replies from userspace.
+ *
+ * Make sure each message passes consistency checks and if it does,
+ * find the kernel part of the task struct, copy the registers and
+ * the buffer contents and then complete the task.
+ */
+static void uvesafb_cn_callback(void *data)
+{
+	struct cn_msg *msg = data;
+	struct uvesafb_task *utask;
+	struct uvesafb_ktask *task;
+
+	if (msg->seq >= UVESAFB_TASKS_MAX)
+		return;
+
+	mutex_lock(&uvfb_lock);
+	task = uvfb_tasks[msg->seq];
+
+	if (!task || msg->ack != task->ack) {
+		mutex_unlock(&uvfb_lock);
+		return;
+	}
+
+	utask = (struct uvesafb_task *)msg->data;
+
+	/* Sanity checks for the buffer length. */
+	if (task->t.buf_len < utask->buf_len ||
+	    utask->buf_len > msg->len - sizeof(*utask)) {
+		mutex_unlock(&uvfb_lock);
+		return;
+	}
+
+	uvfb_tasks[msg->seq] = NULL;
+	mutex_unlock(&uvfb_lock);
+
+	memcpy(&task->t, utask, sizeof(*utask));
+
+	if (task->t.buf_len && task->buf)
+		memcpy(task->buf, utask + 1, task->t.buf_len);
+
+	complete(task->done);
+	return;
+}
+
+static int uvesafb_helper_start(void)
+{
+	char *envp[] = {
+		"HOME=/",
+		"PATH=/sbin:/bin",
+		NULL,
+	};
+
+	char *argv[] = {
+		v86d_path,
+		NULL,
+	};
+
+	return call_usermodehelper(v86d_path, argv, envp, 1);
+}
+
+/*
+ * Execute a uvesafb task.
+ *
+ * Returns 0 if the task is executed successfully.
+ *
+ * A message sent to the userspace consists of the uvesafb_task
+ * struct and (optionally) a buffer. The uvesafb_task struct is
+ * a simplified version of uvesafb_ktask (its kernel counterpart)
+ * containing only the register values, flags and the length of
+ * the buffer.
+ *
+ * Each message is assigned a sequence number (increased linearly)
+ * and a random ack number. The sequence number is used as a key
+ * for the uvfb_tasks array which holds pointers to uvesafb_ktask
+ * structs for all requests.
+ */
+static int uvesafb_exec(struct uvesafb_ktask *task)
+{
+	static int seq;
+	struct cn_msg *m;
+	int err;
+	int len = sizeof(task->t) + task->t.buf_len;
+
+	/*
+	 * Check whether the message isn't longer than the maximum
+	 * allowed by connector.
+	 */
+	if (sizeof(*m) + len > CONNECTOR_MAX_MSG_SIZE) {
+		printk(KERN_WARNING "uvesafb: message too long (%d), "
+			"can't execute task\n", (int)(sizeof(*m) + len));
+		return -E2BIG;
+	}
+
+	m = kzalloc(sizeof(*m) + len, GFP_KERNEL);
+	if (!m)
+		return -ENOMEM;
+
+	init_completion(task->done);
+
+	memcpy(&m->id, &uvesafb_cn_id, sizeof(m->id));
+	m->seq = seq;
+	m->len = len;
+	m->ack = random32();
+
+	/* uvesafb_task structure */
+	memcpy(m + 1, &task->t, sizeof(task->t));
+
+	/* Buffer */
+	memcpy((u8 *)(m + 1) + sizeof(task->t), task->buf, task->t.buf_len);
+
+	/*
+	 * Save the message ack number so that we can find the kernel
+	 * part of this task when a reply is received from userspace.
+	 */
+	task->ack = m->ack;
+
+	mutex_lock(&uvfb_lock);
+
+	/* If all slots are taken -- bail out. */
+	if (uvfb_tasks[seq]) {
+		mutex_unlock(&uvfb_lock);
+		return -EBUSY;
+	}
+
+	/* Save a pointer to the kernel part of the task struct. */
+	uvfb_tasks[seq] = task;
+	mutex_unlock(&uvfb_lock);
+
+	err = cn_netlink_send(m, 0, gfp_any());
+	if (err == -ESRCH) {
+		/*
+		 * Try to start the userspace helper if sending
+		 * the request failed the first time.
+		 */
+		err = uvesafb_helper_start();
+		if (err) {
+			printk(KERN_ERR "uvesafb: failed to execute %s\n",
+					v86d_path);
+			printk(KERN_ERR "uvesafb: make sure that the v86d "
+					"helper is installed and executable\n");
+		} else {
+			v86d_started = 1;
+			err = cn_netlink_send(m, 0, gfp_any());
+		}
+	}
+	kfree(m);
+
+	if (!err && !(task->t.flags & TF_EXIT))
+		err = !wait_for_completion_timeout(task->done,
+				msecs_to_jiffies(UVESAFB_TIMEOUT));
+
+	mutex_lock(&uvfb_lock);
+	uvfb_tasks[seq] = NULL;
+	mutex_unlock(&uvfb_lock);
+
+	seq++;
+	if (seq >= UVESAFB_TASKS_MAX)
+		seq = 0;
+
+	return err;
+}
+
+/*
+ * Free a uvesafb_ktask struct.
+ */
+static void uvesafb_free(struct uvesafb_ktask *task)
+{
+	if (task) {
+		if (task->done)
+			kfree(task->done);
+		kfree(task);
+	}
+}
+
+/*
+ * Prepare a uvesafb_ktask struct to be used again.
+ */
+static void uvesafb_reset(struct uvesafb_ktask *task)
+{
+	struct completion *cpl = task->done;
+
+	memset(task, 0, sizeof(*task));
+	task->done = cpl;
+}
+
+/*
+ * Allocate and prepare a uvesafb_ktask struct.
+ */
+static struct uvesafb_ktask *uvesafb_prep(void)
+{
+	struct uvesafb_ktask *task;
+
+	task = kzalloc(sizeof(*task), GFP_KERNEL);
+	if (task) {
+		task->done = kzalloc(sizeof(*task->done), GFP_KERNEL);
+		if (!task->done) {
+			kfree(task);
+			task = NULL;
+		}
+	}
+	return task;
+}
+
+static void uvesafb_setup_var(struct fb_var_screeninfo *var,
+		struct fb_info *info, struct vbe_mode_ib *mode)
+{
+	struct uvesafb_par *par = info->par;
+
+	var->vmode = FB_VMODE_NONINTERLACED;
+	var->sync = FB_SYNC_VERT_HIGH_ACT;
+
+	var->xres = mode->x_res;
+	var->yres = mode->y_res;
+	var->xres_virtual = mode->x_res;
+	var->yres_virtual = (par->ypan) ?
+			info->fix.smem_len / mode->bytes_per_scan_line :
+			mode->y_res;
+	var->xoffset = 0;
+	var->yoffset = 0;
+	var->bits_per_pixel = mode->bits_per_pixel;
+
+	if (var->bits_per_pixel == 15)
+		var->bits_per_pixel = 16;
+
+	if (var->bits_per_pixel > 8) {
+		var->red.offset    = mode->red_off;
+		var->red.length    = mode->red_len;
+		var->green.offset  = mode->green_off;
+		var->green.length  = mode->green_len;
+		var->blue.offset   = mode->blue_off;
+		var->blue.length   = mode->blue_len;
+		var->transp.offset = mode->rsvd_off;
+		var->transp.length = mode->rsvd_len;
+	} else {
+		var->red.offset    = 0;
+		var->green.offset  = 0;
+		var->blue.offset   = 0;
+		var->transp.offset = 0;
+
+		/*
+		 * We're assuming that we can switch the DAC to 8 bits. If
+		 * this proves to be incorrect, we'll update the fields
+		 * later in set_par().
+		 */
+		if (par->vbe_ib.capabilities & VBE_CAP_CAN_SWITCH_DAC) {
+			var->red.length    = 8;
+			var->green.length  = 8;
+			var->blue.length   = 8;
+			var->transp.length = 0;
+		} else {
+			var->red.length    = 6;
+			var->green.length  = 6;
+			var->blue.length   = 6;
+			var->transp.length = 0;
+		}
+	}
+}
+
+static int uvesafb_vbe_find_mode(struct uvesafb_par *par,
+		int xres, int yres, int depth, unsigned char flags)
+{
+	int i, match = -1, h = 0, d = 0x7fffffff;
+
+	for (i = 0; i < par->vbe_modes_cnt; i++) {
+		h = abs(par->vbe_modes[i].x_res - xres) +
+		    abs(par->vbe_modes[i].y_res - yres) +
+		    abs(depth - par->vbe_modes[i].depth);
+
+		/*
+		 * We have an exact match in terms of resolution
+		 * and depth.
+		 */
+		if (h == 0)
+			return i;
+
+		if (h < d || (h == d && par->vbe_modes[i].depth > depth)) {
+			d = h;
+			match = i;
+		}
+	}
+	i = 1;
+
+	if (flags & UVESAFB_EXACT_DEPTH &&
+			par->vbe_modes[match].depth != depth)
+		i = 0;
+
+	if (flags & UVESAFB_EXACT_RES && d > 24)
+		i = 0;
+
+	if (i != 0)
+		return match;
+	else
+		return -1;
+}
+
+static u8 *uvesafb_vbe_state_save(struct uvesafb_par *par)
+{
+	struct uvesafb_ktask *task;
+	u8 *state;
+	int err;
+
+	if (!par->vbe_state_size)
+		return NULL;
+
+	state = kmalloc(par->vbe_state_size, GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	task = uvesafb_prep();
+	if (!task) {
+		kfree(state);
+		return NULL;
+	}
+
+	task->t.regs.eax = 0x4f04;
+	task->t.regs.ecx = 0x000f;
+	task->t.regs.edx = 0x0001;
+	task->t.flags = TF_BUF_RET | TF_BUF_ESBX;
+	task->t.buf_len = par->vbe_state_size;
+	task->buf = state;
+	err = uvesafb_exec(task);
+
+	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
+		printk(KERN_WARNING "uvesafb: VBE get state call "
+				"failed (eax=0x%x, err=%d)\n",
+				task->t.regs.eax, err);
+		kfree(state);
+		state = NULL;
+	}
+
+	uvesafb_free(task);
+	return state;
+}
+
+static void uvesafb_vbe_state_restore(struct uvesafb_par *par, u8 *state_buf)
+{
+	struct uvesafb_ktask *task;
+	int err;
+
+	if (!state_buf)
+		return;
+
+	task = uvesafb_prep();
+	if (!task)
+		return;
+
+	task->t.regs.eax = 0x4f04;
+	task->t.regs.ecx = 0x000f;
+	task->t.regs.edx = 0x0002;
+	task->t.buf_len = par->vbe_state_size;
+	task->t.flags = TF_BUF_ESBX;
+	task->buf = state_buf;
+
+	err = uvesafb_exec(task);
+	if (err || (task->t.regs.eax & 0xffff) != 0x004f)
+		printk(KERN_WARNING "uvesafb: VBE state restore call "
+				"failed (eax=0x%x, err=%d)\n",
+				task->t.regs.eax, err);
+
+	uvesafb_free(task);
+}
+
+static int __devinit uvesafb_vbe_getinfo(struct uvesafb_ktask *task,
+		struct uvesafb_par *par)
+{
+	int err;
+
+	task->t.regs.eax = 0x4f00;
+	task->t.flags = TF_VBEIB;
+	task->t.buf_len = sizeof(struct vbe_ib);
+	task->buf = &par->vbe_ib;
+	strncpy(par->vbe_ib.vbe_signature, "VBE2", 4);
+
+	err = uvesafb_exec(task);
+	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
+		printk(KERN_ERR "uvesafb: Getting VBE info block failed "
+				"(eax=0x%x, err=%d)\n", (u32)task->t.regs.eax,
+				err);
+		return -EINVAL;
+	}
+
+	if (par->vbe_ib.vbe_version < 0x0200) {
+		printk(KERN_ERR "uvesafb: Sorry, pre-VBE 2.0 cards are "
+				"not supported.\n");
+		return -EINVAL;
+	}
+
+	if (!par->vbe_ib.mode_list_ptr) {
+		printk(KERN_ERR "uvesafb: Missing mode list!\n");
+		return -EINVAL;
+	}
+
+	printk(KERN_INFO "uvesafb: ");
+
+	/*
+	 * Convert string pointers and the mode list pointer into
+	 * usable addresses. Print informational messages about the
+	 * video adapter and its vendor.
+	 */
+	if (par->vbe_ib.oem_vendor_name_ptr)
+		printk("%s, ",
+			((char *)task->buf) + par->vbe_ib.oem_vendor_name_ptr);
+
+	if (par->vbe_ib.oem_product_name_ptr)
+		printk("%s, ",
+			((char *)task->buf) + par->vbe_ib.oem_product_name_ptr);
+
+	if (par->vbe_ib.oem_product_rev_ptr)
+		printk("%s, ",
+			((char *)task->buf) + par->vbe_ib.oem_product_rev_ptr);
+
+	if (par->vbe_ib.oem_string_ptr)
+		printk("OEM: %s, ",
+			((char *)task->buf) + par->vbe_ib.oem_string_ptr);
+
+	printk("VBE v%d.%d\n", ((par->vbe_ib.vbe_version & 0xff00) >> 8),
+			par->vbe_ib.vbe_version & 0xff);
+
+	return 0;
+}
+
+static int __devinit uvesafb_vbe_getmodes(struct uvesafb_ktask *task,
+		struct uvesafb_par *par)
+{
+	int off = 0, err;
+	u16 *mode;
+
+	par->vbe_modes_cnt = 0;
+
+	/* Count available modes. */
+	mode = (u16 *) (((u8 *)&par->vbe_ib) + par->vbe_ib.mode_list_ptr);
+	while (*mode != 0xffff) {
+		par->vbe_modes_cnt++;
+		mode++;
+	}
+
+	par->vbe_modes = kzalloc(sizeof(struct vbe_mode_ib) *
+				par->vbe_modes_cnt, GFP_KERNEL);
+	if (!par->vbe_modes)
+		return -ENOMEM;
+
+	/* Get info about all available modes. */
+	mode = (u16 *) (((u8 *)&par->vbe_ib) + par->vbe_ib.mode_list_ptr);
+	while (*mode != 0xffff) {
+		struct vbe_mode_ib *mib;
+
+		uvesafb_reset(task);
+		task->t.regs.eax = 0x4f01;
+		task->t.regs.ecx = (u32) *mode;
+		task->t.flags = TF_BUF_RET | TF_BUF_ESDI;
+		task->t.buf_len = sizeof(struct vbe_mode_ib);
+		task->buf = par->vbe_modes + off;
+
+		err = uvesafb_exec(task);
+		if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
+			printk(KERN_ERR "uvesafb: Getting mode info block "
+				"for mode 0x%x failed (eax=0x%x, err=%d)\n",
+				*mode, (u32)task->t.regs.eax, err);
+			return -EINVAL;
+		}
+
+		mib = task->buf;
+		mib->mode_id = *mode;
+
+		/*
+		 * We only want modes that are supported with the current
+		 * hardware configuration, color, graphics and that have
+		 * support for the LFB.
+		 */
+		if ((mib->mode_attr & VBE_MODE_MASK) == VBE_MODE_MASK &&
+				 mib->bits_per_pixel >= 8)
+			off++;
+		else
+			par->vbe_modes_cnt--;
+
+		mode++;
+		mib->depth = mib->red_len + mib->green_len + mib->blue_len;
+
+		/*
+		 * Handle 8bpp modes and modes with broken color component
+		 * lengths.
+		 */
+		if (mib->depth == 0 || (mib->depth == 24 &&
+					mib->bits_per_pixel == 32))
+			mib->depth = mib->bits_per_pixel;
+	}
+
+	return 0;
+}
+
+/*
+ * The Protected Mode Interface is 32-bit x86 code, so we only run it on
+ * x86 and not x86_64.
+ */
+#ifdef CONFIG_X86_32
+static int __devinit uvesafb_vbe_getpmi(struct uvesafb_ktask *task,
+		struct uvesafb_par *par)
+{
+	int i, err;
+
+	uvesafb_reset(task);
+	task->t.regs.eax = 0x4f0a;
+	task->t.regs.ebx = 0x0;
+	err = uvesafb_exec(task);
+
+	if ((task->t.regs.eax & 0xffff) != 0x4f || task->t.regs.es < 0xc000) {
+		par->pmi_setpal = par->ypan = 0;
+	} else {
+		par->pmi_base = (u16 *)phys_to_virt(((u32)task->t.regs.es << 4)
+						+ task->t.regs.edi);
+		par->pmi_start = (u8 *)par->pmi_base + par->pmi_base[1];
+		par->pmi_pal = (u8 *)par->pmi_base + par->pmi_base[2];
+		printk(KERN_INFO "uvesafb: protected mode interface info at "
+				 "%04x:%04x\n",
+				 (u16)task->t.regs.es, (u16)task->t.regs.edi);
+		printk(KERN_INFO "uvesafb: pmi: set display start = %p, "
+				 "set palette = %p\n", par->pmi_start,
+				 par->pmi_pal);
+
+		if (par->pmi_base[3]) {
+			printk(KERN_INFO "uvesafb: pmi: ports = ");
+			for (i = par->pmi_base[3]/2;
+					par->pmi_base[i] != 0xffff; i++)
+				printk("%x ", par->pmi_base[i]);
+			printk("\n");
+
+			if (par->pmi_base[i] != 0xffff) {
+				printk(KERN_INFO "uvesafb: can't handle memory"
+						 " requests, pmi disabled\n");
+				par->ypan = par->pmi_setpal = 0;
+			}
+		}
+	}
+	return 0;
+}
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Check whether a video mode is supported by the Video BIOS and is
+ * compatible with the monitor limits.
+ */
+static int __devinit uvesafb_is_valid_mode(struct fb_videomode *mode,
+		struct fb_info *info)
+{
+	if (info->monspecs.gtf) {
+		fb_videomode_to_var(&info->var, mode);
+		if (fb_validate_mode(&info->var, info))
+			return 0;
+	}
+
+	if (uvesafb_vbe_find_mode(info->par, mode->xres, mode->yres, 8,
+				UVESAFB_EXACT_RES) == -1)
+		return 0;
+
+	return 1;
+}
+
+static int __devinit uvesafb_vbe_getedid(struct uvesafb_ktask *task,
+		struct fb_info *info)
+{
+	struct uvesafb_par *par = info->par;
+	int err = 0;
+
+	if (noedid || par->vbe_ib.vbe_version < 0x0300)
+		return -EINVAL;
+
+	task->t.regs.eax = 0x4f15;
+	task->t.regs.ebx = 0;
+	task->t.regs.ecx = 0;
+	task->t.buf_len = 0;
+	task->t.flags = 0;
+
+	err = uvesafb_exec(task);
+
+	if ((task->t.regs.eax & 0xffff) != 0x004f || err)
+		return -EINVAL;
+
+	if ((task->t.regs.ebx & 0x3) == 3) {
+		printk(KERN_INFO "uvesafb: VBIOS/hardware supports both "
+				 "DDC1 and DDC2 transfers\n");
+	} else if ((task->t.regs.ebx & 0x3) == 2) {
+		printk(KERN_INFO "uvesafb: VBIOS/hardware supports DDC2 "
+				 "transfers\n");
+	} else if ((task->t.regs.ebx & 0x3) == 1) {
+		printk(KERN_INFO "uvesafb: VBIOS/hardware supports DDC1 "
+				 "transfers\n");
+	} else {
+		printk(KERN_INFO "uvesafb: VBIOS/hardware doesn't support "
+				 "DDC transfers\n");
+		return -EINVAL;
+	}
+
+	task->t.regs.eax = 0x4f15;
+	task->t.regs.ebx = 1;
+	task->t.regs.ecx = task->t.regs.edx = 0;
+	task->t.flags = TF_BUF_RET | TF_BUF_ESDI;
+	task->t.buf_len = EDID_LENGTH;
+	task->buf = kzalloc(EDID_LENGTH, GFP_KERNEL);
+
+	err = uvesafb_exec(task);
+
+	if ((task->t.regs.eax & 0xffff) == 0x004f && !err) {
+		fb_edid_to_monspecs(task->buf, &info->monspecs);
+
+		if (info->monspecs.vfmax && info->monspecs.hfmax) {
+			/*
+			 * If the maximum pixel clock wasn't specified in
+			 * the EDID block, set it to 300 MHz.
+			 */
+			if (info->monspecs.dclkmax == 0)
+				info->monspecs.dclkmax = 300 * 1000000;
+			info->monspecs.gtf = 1;
+		}
+	} else {
+		err = -EINVAL;
+	}
+
+	kfree(task->buf);
+	return err;
+}
+
+static void __devinit uvesafb_vbe_getmonspecs(struct uvesafb_ktask *task,
+		struct fb_info *info)
+{
+	struct uvesafb_par *par = info->par;
+	int i;
+
+	memset(&info->monspecs, 0, sizeof(info->monspecs));
+
+	/*
+	 * If we don't get all necessary data from the EDID block,
+	 * mark it as incompatible with the GTF and set nocrtc so
+	 * that we always use the default BIOS refresh rate.
+	 */
+	if (uvesafb_vbe_getedid(task, info)) {
+		info->monspecs.gtf = 0;
+		par->nocrtc = 1;
+	}
+
+	/* Kernel command line overrides. */
+	if (maxclk)
+		info->monspecs.dclkmax = maxclk * 1000000;
+	if (maxvf)
+		info->monspecs.vfmax = maxvf;
+	if (maxhf)
+		info->monspecs.hfmax = maxhf * 1000;
+
+	/*
+	 * In case DDC transfers are not supported, the user can provide
+	 * monitor limits manually. Lower limits are set to "safe" values.
+	 */
+	if (info->monspecs.gtf == 0 && maxclk && maxvf && maxhf) {
+		info->monspecs.dclkmin = 0;
+		info->monspecs.vfmin = 60;
+		info->monspecs.hfmin = 29000;
+		info->monspecs.gtf = 1;
+		par->nocrtc = 0;
+	}
+
+	if (info->monspecs.gtf)
+		printk(KERN_INFO
+			"uvesafb: monitor limits: vf = %d Hz, hf = %d kHz, "
+			"clk = %d MHz\n", info->monspecs.vfmax,
+			(int)(info->monspecs.hfmax / 1000),
+			(int)(info->monspecs.dclkmax / 1000000));
+	else
+		printk(KERN_INFO "uvesafb: no monitor limits have been set, "
+				 "default refresh rate will be used\n");
+
+	/* Add VBE modes to the modelist. */
+	for (i = 0; i < par->vbe_modes_cnt; i++) {
+		struct fb_var_screeninfo var;
+		struct vbe_mode_ib *mode;
+		struct fb_videomode vmode;
+
+		mode = &par->vbe_modes[i];
+		memset(&var, 0, sizeof(var));
+
+		var.xres = mode->x_res;
+		var.yres = mode->y_res;
+
+		fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60, &var, info);
+		fb_var_to_videomode(&vmode, &var);
+		fb_add_videomode(&vmode, &info->modelist);
+	}
+
+	/* Add valid VESA modes to our modelist. */
+	for (i = 0; i < VESA_MODEDB_SIZE; i++) {
+		if (uvesafb_is_valid_mode((struct fb_videomode *)
+						&vesa_modes[i], info))
+			fb_add_videomode(&vesa_modes[i], &info->modelist);
+	}
+
+	for (i = 0; i < info->monspecs.modedb_len; i++) {
+		if (uvesafb_is_valid_mode(&info->monspecs.modedb[i], info))
+			fb_add_videomode(&info->monspecs.modedb[i],
+					&info->modelist);
+	}
+
+	return;
+}
+
+static void __devinit uvesafb_vbe_getstatesize(struct uvesafb_ktask *task,
+		struct uvesafb_par *par)
+{
+	int err;
+
+	uvesafb_reset(task);
+
+	/*
+	 * Get the VBE state buffer size. We want all available
+	 * hardware state data (CL = 0x0f).
+	 */
+	task->t.regs.eax = 0x4f04;
+	task->t.regs.ecx = 0x000f;
+	task->t.regs.edx = 0x0000;
+	task->t.flags = 0;
+
+	err = uvesafb_exec(task);
+
+	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
+		printk(KERN_WARNING "uvesafb: VBE state buffer size "
+			"cannot be determined (eax=0x%x, err=%d)\n",
+			task->t.regs.eax, err);
+		par->vbe_state_size = 0;
+		return;
+	}
+
+	par->vbe_state_size = 64 * (task->t.regs.ebx & 0xffff);
+}
+
+static int __devinit uvesafb_vbe_init(struct fb_info *info)
+{
+	struct uvesafb_ktask *task = NULL;
+	struct uvesafb_par *par = info->par;
+	int err;
+
+	task = uvesafb_prep();
+	if (!task)
+		return -ENOMEM;
+
+	err = uvesafb_vbe_getinfo(task, par);
+	if (err)
+		goto out;
+
+	err = uvesafb_vbe_getmodes(task, par);
+	if (err)
+		goto out;
+
+	par->nocrtc = nocrtc;
+#ifdef CONFIG_X86_32
+	par->pmi_setpal = pmi_setpal;
+	par->ypan = ypan;
+
+	if (par->pmi_setpal || par->ypan)
+		uvesafb_vbe_getpmi(task, par);
+#else
+	/* The protected mode interface is not available on non-x86. */
+	par->pmi_setpal = par->ypan = 0;
+#endif
+
+	INIT_LIST_HEAD(&info->modelist);
+	uvesafb_vbe_getmonspecs(task, info);
+	uvesafb_vbe_getstatesize(task, par);
+
+out:	uvesafb_free(task);
+	return err;
+}
+
+static int __devinit uvesafb_vbe_init_mode(struct fb_info *info)
+{
+	struct list_head *pos;
+	struct fb_modelist *modelist;
+	struct fb_videomode *mode;
+	struct uvesafb_par *par = info->par;
+	int i, modeid;
+
+	/* Has the user requested a specific VESA mode? */
+	if (vbemode) {
+		for (i = 0; i < par->vbe_modes_cnt; i++) {
+			if (par->vbe_modes[i].mode_id == vbemode) {
+				fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60,
+							&info->var, info);
+				/*
+				 * With pixclock set to 0, the default BIOS
+				 * timings will be used in set_par().
+				 */
+				info->var.pixclock = 0;
+				modeid = i;
+				goto gotmode;
+			}
+		}
+		printk(KERN_INFO "uvesafb: requested VBE mode 0x%x is "
+				 "unavailable\n", vbemode);
+		vbemode = 0;
+	}
+
+	/* Count the modes in the modelist */
+	i = 0;
+	list_for_each(pos, &info->modelist)
+		i++;
+
+	/*
+	 * Convert the modelist into a modedb so that we can use it with
+	 * fb_find_mode().
+	 */
+	mode = kzalloc(i * sizeof(*mode), GFP_KERNEL);
+	if (mode) {
+		i = 0;
+		list_for_each(pos, &info->modelist) {
+			modelist = list_entry(pos, struct fb_modelist, list);
+			mode[i] = modelist->mode;
+			i++;
+		}
+
+		if (!mode_option)
+			mode_option = UVESAFB_DEFAULT_MODE;
+
+		i = fb_find_mode(&info->var, info, mode_option, mode, i,
+			NULL, 8);
+
+		kfree(mode);
+	}
+
+	/* fb_find_mode() failed */
+	if (i == 0 || i >= 3) {
+		info->var.xres = 640;
+		info->var.yres = 480;
+		mode = (struct fb_videomode *)
+				fb_find_best_mode(&info->var, &info->modelist);
+
+		if (mode) {
+			fb_videomode_to_var(&info->var, mode);
+		} else {
+			modeid = par->vbe_modes[0].mode_id;
+			fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60,
+				    &info->var, info);
+			goto gotmode;
+		}
+	}
+
+	/* Look for a matching VBE mode. */
+	modeid = uvesafb_vbe_find_mode(par, info->var.xres, info->var.yres,
+			info->var.bits_per_pixel, UVESAFB_EXACT_RES);
+
+	if (modeid == -1)
+		return -EINVAL;
+
+gotmode:
+	uvesafb_setup_var(&info->var, info, &par->vbe_modes[modeid]);
+
+	/*
+	 * If we are not VBE3.0+ compliant, we're done -- the BIOS will
+	 * ignore our timings anyway.
+	 */
+	if (par->vbe_ib.vbe_version < 0x0300 || par->nocrtc)
+		fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60,
+					&info->var, info);
+
+	return modeid;
+}
+
+static int uvesafb_setpalette(struct uvesafb_pal_entry *entries, int count,
+		int start, struct fb_info *info)
+{
+	struct uvesafb_ktask *task;
+	struct uvesafb_par *par = info->par;
+	int i = par->mode_idx;
+	int err = 0;
+
+	/*
+	 * We support palette modifications for 8 bpp modes only, so
+	 * there can never be more than 256 entries.
+	 */
+	if (start + count > 256)
+		return -EINVAL;
+
+#ifdef CONFIG_X86
+	/* Use VGA registers if mode is VGA-compatible. */
+	if (i >= 0 && i < par->vbe_modes_cnt &&
+	    par->vbe_modes[i].mode_attr & VBE_MODE_VGACOMPAT) {
+		for (i = 0; i < count; i++) {
+			outb_p(start + i,        dac_reg);
+			outb_p(entries[i].red,   dac_val);
+			outb_p(entries[i].green, dac_val);
+			outb_p(entries[i].blue,  dac_val);
+		}
+	}
+#ifdef CONFIG_X86_32
+	else if (par->pmi_setpal) {
+		__asm__ __volatile__(
+		"call *(%%esi)"
+		: /* no return value */
+		: "a" (0x4f09),         /* EAX */
+		  "b" (0),              /* EBX */
+		  "c" (count),          /* ECX */
+		  "d" (start),          /* EDX */
+		  "D" (entries),        /* EDI */
+		  "S" (&par->pmi_pal)); /* ESI */
+	}
+#endif /* CONFIG_X86_32 */
+	else
+#endif /* CONFIG_X86 */
+	{
+		task = uvesafb_prep();
+		if (!task)
+			return -ENOMEM;
+
+		task->t.regs.eax = 0x4f09;
+		task->t.regs.ebx = 0x0;
+		task->t.regs.ecx = count;
+		task->t.regs.edx = start;
+		task->t.flags = TF_BUF_ESDI;
+		task->t.buf_len = sizeof(struct uvesafb_pal_entry) * count;
+		task->buf = entries;
+
+		err = uvesafb_exec(task);
+		if ((task->t.regs.eax & 0xffff) != 0x004f)
+			err = 1;
+
+		uvesafb_free(task);
+	}
+	return err;
+}
+
+static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
+		unsigned blue, unsigned transp,
+		struct fb_info *info)
+{
+	struct uvesafb_pal_entry entry;
+	int shift = 16 - info->var.green.length;
+	int err = 0;
+
+	if (regno >= info->cmap.len)
+		return -EINVAL;
+
+	if (info->var.bits_per_pixel == 8) {
+		entry.red   = red   >> shift;
+		entry.green = green >> shift;
+		entry.blue  = blue  >> shift;
+		entry.pad   = 0;
+
+		err = uvesafb_setpalette(&entry, 1, regno, info);
+	} else if (regno < 16) {
+		switch (info->var.bits_per_pixel) {
+		case 16:
+			if (info->var.red.offset == 10) {
+				/* 1:5:5:5 */
+				((u32 *) (info->pseudo_palette))[regno] =
+						((red   & 0xf800) >>  1) |
+						((green & 0xf800) >>  6) |
+						((blue  & 0xf800) >> 11);
+			} else {
+				/* 0:5:6:5 */
+				((u32 *) (info->pseudo_palette))[regno] =
+						((red   & 0xf800)      ) |
+						((green & 0xfc00) >>  5) |
+						((blue  & 0xf800) >> 11);
+			}
+			break;
+
+		case 24:
+		case 32:
+			red   >>= 8;
+			green >>= 8;
+			blue  >>= 8;
+			((u32 *)(info->pseudo_palette))[regno] =
+				(red   << info->var.red.offset)   |
+				(green << info->var.green.offset) |
+				(blue  << info->var.blue.offset);
+			break;
+		}
+	}
+	return err;
+}
+
+static int uvesafb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
+{
+	struct uvesafb_pal_entry *entries;
+	int shift = 16 - info->var.green.length;
+	int i, err = 0;
+
+	if (info->var.bits_per_pixel == 8) {
+		if (cmap->start + cmap->len > info->cmap.start +
+		    info->cmap.len || cmap->start < info->cmap.start)
+			return -EINVAL;
+
+		entries = kmalloc(sizeof(*entries) * cmap->len, GFP_KERNEL);
+		if (!entries)
+			return -ENOMEM;
+
+		for (i = 0; i < cmap->len; i++) {
+			entries[i].red   = cmap->red[i]   >> shift;
+			entries[i].green = cmap->green[i] >> shift;
+			entries[i].blue  = cmap->blue[i]  >> shift;
+			entries[i].pad   = 0;
+		}
+		err = uvesafb_setpalette(entries, cmap->len, cmap->start, info);
+		kfree(entries);
+	} else {
+		/*
+		 * For modes with bpp > 8, we only set the pseudo palette in
+		 * the fb_info struct. We rely on uvesafb_setcolreg to do all
+		 * sanity checking.
+		 */
+		for (i = 0; i < cmap->len; i++) {
+			err |= uvesafb_setcolreg(cmap->start + i, cmap->red[i],
+						cmap->green[i], cmap->blue[i],
+						0, info);
+		}
+	}
+	return err;
+}
+
+static int uvesafb_pan_display(struct fb_var_screeninfo *var,
+		struct fb_info *info)
+{
+#ifdef CONFIG_X86_32
+	int offset;
+	struct uvesafb_par *par = info->par;
+
+	offset = (var->yoffset * info->fix.line_length + var->xoffset) / 4;
+
+	/*
+	 * It turns out it's not the best idea to do panning via vm86,
+	 * so we only allow it if we have a PMI.
+	 */
+	if (par->pmi_start) {
+		__asm__ __volatile__(
+			"call *(%%edi)"
+			: /* no return value */
+			: "a" (0x4f07),         /* EAX */
+			  "b" (0),              /* EBX */
+			  "c" (offset),         /* ECX */
+			  "d" (offset >> 16),   /* EDX */
+			  "D" (&par->pmi_start));    /* EDI */
+	}
+#endif
+	return 0;
+}
+
+static int uvesafb_blank(int blank, struct fb_info *info)
+{
+	struct uvesafb_par *par = info->par;
+	struct uvesafb_ktask *task;
+	int err = 1;
+
+#ifdef CONFIG_X86
+	if (par->vbe_ib.capabilities & VBE_CAP_VGACOMPAT) {
+		int loop = 10000;
+		u8 seq = 0, crtc17 = 0;
+
+		if (blank == FB_BLANK_POWERDOWN) {
+			seq = 0x20;
+			crtc17 = 0x00;
+			err = 0;
+		} else {
+			seq = 0x00;
+			crtc17 = 0x80;
+			err = (blank == FB_BLANK_UNBLANK) ? 0 : -EINVAL;
+		}
+
+		vga_wseq(NULL, 0x00, 0x01);
+		seq |= vga_rseq(NULL, 0x01) & ~0x20;
+		vga_wseq(NULL, 0x00, seq);
+
+		crtc17 |= vga_rcrt(NULL, 0x17) & ~0x80;
+		while (loop--);
+		vga_wcrt(NULL, 0x17, crtc17);
+		vga_wseq(NULL, 0x00, 0x03);
+	} else
+#endif /* CONFIG_X86 */
+	{
+		task = uvesafb_prep();
+		if (!task)
+			return -ENOMEM;
+
+		task->t.regs.eax = 0x4f10;
+		switch (blank) {
+		case FB_BLANK_UNBLANK:
+			task->t.regs.ebx = 0x0001;
+			break;
+		case FB_BLANK_NORMAL:
+			task->t.regs.ebx = 0x0101;	/* standby */
+			break;
+		case FB_BLANK_POWERDOWN:
+			task->t.regs.ebx = 0x0401;	/* powerdown */
+			break;
+		default:
+			goto out;
+		}
+
+		err = uvesafb_exec(task);
+		if (err || (task->t.regs.eax & 0xffff) != 0x004f)
+			err = 1;
+out:		uvesafb_free(task);
+	}
+	return err;
+}
+
+static int uvesafb_open(struct fb_info *info, int user)
+{
+	struct uvesafb_par *par = info->par;
+	int cnt = atomic_read(&par->ref_count);
+
+	if (!cnt && par->vbe_state_size)
+		par->vbe_state_orig = uvesafb_vbe_state_save(par);
+
+	atomic_inc(&par->ref_count);
+	return 0;
+}
+
+static int uvesafb_release(struct fb_info *info, int user)
+{
+	struct uvesafb_ktask *task = NULL;
+	struct uvesafb_par *par = info->par;
+	int cnt = atomic_read(&par->ref_count);
+
+	if (!cnt)
+		return -EINVAL;
+
+	if (cnt != 1)
+		goto out;
+
+	task = uvesafb_prep();
+	if (!task)
+		goto out;
+
+	/* First, try to set the standard 80x25 text mode. */
+	task->t.regs.eax = 0x0003;
+	uvesafb_exec(task);
+
+	/*
+	 * Now try to restore whatever hardware state we might have
+	 * saved when the fb device was first opened.
+	 */
+	uvesafb_vbe_state_restore(par, par->vbe_state_orig);
+out:
+	atomic_dec(&par->ref_count);
+	if (task)
+		uvesafb_free(task);
+	return 0;
+}
+
+static int uvesafb_set_par(struct fb_info *info)
+{
+	struct uvesafb_par *par = info->par;
+	struct uvesafb_ktask *task = NULL;
+	struct vbe_crtc_ib *crtc = NULL;
+	struct vbe_mode_ib *mode = NULL;
+	int i, err = 0, depth = info->var.bits_per_pixel;
+
+	if (depth > 8 && depth != 32)
+		depth = info->var.red.length + info->var.green.length +
+			info->var.blue.length;
+
+	i = uvesafb_vbe_find_mode(par, info->var.xres, info->var.yres, depth,
+				 UVESAFB_EXACT_RES | UVESAFB_EXACT_DEPTH);
+	if (i >= 0)
+		mode = &par->vbe_modes[i];
+	else
+		return -EINVAL;
+
+	task = uvesafb_prep();
+	if (!task)
+		return -ENOMEM;
+setmode:
+	task->t.regs.eax = 0x4f02;
+	task->t.regs.ebx = mode->mode_id | 0x4000;	/* use LFB */
+
+	if (par->vbe_ib.vbe_version >= 0x0300 && !par->nocrtc &&
+	    info->var.pixclock != 0) {
+		task->t.regs.ebx |= 0x0800;		/* use CRTC data */
+		task->t.flags = TF_BUF_ESDI;
+		crtc = kzalloc(sizeof(struct vbe_crtc_ib), GFP_KERNEL);
+		if (!crtc) {
+			err = -ENOMEM;
+			goto out;
+		}
+		crtc->horiz_start = info->var.xres + info->var.right_margin;
+		crtc->horiz_end	  = crtc->horiz_start + info->var.hsync_len;
+		crtc->horiz_total = crtc->horiz_end + info->var.left_margin;
+
+		crtc->vert_start  = info->var.yres + info->var.lower_margin;
+		crtc->vert_end    = crtc->vert_start + info->var.vsync_len;
+		crtc->vert_total  = crtc->vert_end + info->var.upper_margin;
+
+		crtc->pixel_clock = PICOS2KHZ(info->var.pixclock) * 1000;
+		crtc->refresh_rate = (u16)(100 * (crtc->pixel_clock /
+				(crtc->vert_total * crtc->horiz_total)));
+
+		if (info->var.vmode & FB_VMODE_DOUBLE)
+			crtc->flags |= 0x1;
+		if (info->var.vmode & FB_VMODE_INTERLACED)
+			crtc->flags |= 0x2;
+		if (!(info->var.sync & FB_SYNC_HOR_HIGH_ACT))
+			crtc->flags |= 0x4;
+		if (!(info->var.sync & FB_SYNC_VERT_HIGH_ACT))
+			crtc->flags |= 0x8;
+		memcpy(&par->crtc, crtc, sizeof(*crtc));
+	} else {
+		memset(&par->crtc, 0, sizeof(*crtc));
+	}
+
+	task->t.buf_len = sizeof(struct vbe_crtc_ib);
+	task->buf = &par->crtc;
+
+	err = uvesafb_exec(task);
+	if (err || (task->t.regs.eax & 0xffff) != 0x004f) {
+		/*
+		 * The mode switch might have failed because we tried to
+		 * use our own timings.  Try again with the default timings.
+		 */
+		if (crtc != NULL) {
+			printk(KERN_WARNING "uvesafb: mode switch failed "
+				"(eax=0x%x, err=%d). Trying again with "
+				"default timings.\n", task->t.regs.eax, err);
+			uvesafb_reset(task);
+			kfree(crtc);
+			crtc = NULL;
+			info->var.pixclock = 0;
+			goto setmode;
+		} else {
+			printk(KERN_ERR "uvesafb: mode switch failed (eax="
+				"0x%x, err=%d)\n", task->t.regs.eax, err);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+	par->mode_idx = i;
+
+	/* For 8bpp modes, always try to set the DAC to 8 bits. */
+	if (par->vbe_ib.capabilities & VBE_CAP_CAN_SWITCH_DAC &&
+	    mode->bits_per_pixel <= 8) {
+		uvesafb_reset(task);
+		task->t.regs.eax = 0x4f08;
+		task->t.regs.ebx = 0x0800;
+
+		err = uvesafb_exec(task);
+		if (err || (task->t.regs.eax & 0xffff) != 0x004f ||
+		    ((task->t.regs.ebx & 0xff00) >> 8) != 8) {
+			/*
+			 * We've failed to set the DAC palette format -
+			 * time to correct var.
+			 */
+			info->var.red.length    = 6;
+			info->var.green.length  = 6;
+			info->var.blue.length   = 6;
+		}
+	}
+
+	info->fix.visual = (info->var.bits_per_pixel == 8) ?
+				FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
+	info->fix.line_length = mode->bytes_per_scan_line;
+
+out:	if (crtc != NULL)
+		kfree(crtc);
+	uvesafb_free(task);
+
+	return err;
+}
+
+static void uvesafb_check_limits(struct fb_var_screeninfo *var,
+		struct fb_info *info)
+{
+	const struct fb_videomode *mode;
+	struct uvesafb_par *par = info->par;
+
+	/*
+	 * If pixclock is set to 0, then we're using default BIOS timings
+	 * and thus don't have to perform any checks here.
+	 */
+	if (!var->pixclock)
+		return;
+
+	if (par->vbe_ib.vbe_version < 0x0300) {
+		fb_get_mode(FB_VSYNCTIMINGS | FB_IGNOREMON, 60, var, info);
+		return;
+	}
+
+	if (!fb_validate_mode(var, info))
+		return;
+
+	mode = fb_find_best_mode(var, &info->modelist);
+	if (mode) {
+		if (mode->xres == var->xres && mode->yres == var->yres &&
+		    !(mode->vmode & (FB_VMODE_INTERLACED | FB_VMODE_DOUBLE))) {
+			fb_videomode_to_var(var, mode);
+			return;
+		}
+	}
+
+	if (info->monspecs.gtf && !fb_get_mode(FB_MAXTIMINGS, 0, var, info))
+		return;
+	/* Use default refresh rate */
+	var->pixclock = 0;
+}
+
+static int uvesafb_check_var(struct fb_var_screeninfo *var,
+		struct fb_info *info)
+{
+	struct uvesafb_par *par = info->par;
+	struct vbe_mode_ib *mode = NULL;
+	int match = -1;
+	int depth = var->red.length + var->green.length + var->blue.length;
+
+	/*
+	 * Various apps will use bits_per_pixel to set the color depth,
+	 * which is theoretically incorrect, but which we'll try to handle
+	 * here.
+	 */
+	if (depth == 0 || abs(depth - var->bits_per_pixel) >= 8)
+		depth = var->bits_per_pixel;
+
+	match = uvesafb_vbe_find_mode(par, var->xres, var->yres, depth,
+						UVESAFB_EXACT_RES);
+	if (match == -1)
+		return -EINVAL;
+
+	mode = &par->vbe_modes[match];
+	uvesafb_setup_var(var, info, mode);
+
+	/*
+	 * Check whether we have remapped enough memory for this mode.
+	 * We might be called at an early stage, when we haven't remapped
+	 * any memory yet, in which case we simply skip the check.
+	 */
+	if (var->yres * mode->bytes_per_scan_line > info->fix.smem_len
+						&& info->fix.smem_len)
+		return -EINVAL;
+
+	if ((var->vmode & FB_VMODE_DOUBLE) &&
+				!(par->vbe_modes[match].mode_attr & 0x100))
+		var->vmode &= ~FB_VMODE_DOUBLE;
+
+	if ((var->vmode & FB_VMODE_INTERLACED) &&
+				!(par->vbe_modes[match].mode_attr & 0x200))
+		var->vmode &= ~FB_VMODE_INTERLACED;
+
+	uvesafb_check_limits(var, info);
+
+	var->xres_virtual = var->xres;
+	var->yres_virtual = (par->ypan) ?
+				info->fix.smem_len / mode->bytes_per_scan_line :
+				var->yres;
+	return 0;
+}
+
+static void uvesafb_save_state(struct fb_info *info)
+{
+	struct uvesafb_par *par = info->par;
+
+	if (par->vbe_state_saved)
+		kfree(par->vbe_state_saved);
+
+	par->vbe_state_saved = uvesafb_vbe_state_save(par);
+}
+
+static void uvesafb_restore_state(struct fb_info *info)
+{
+	struct uvesafb_par *par = info->par;
+
+	uvesafb_vbe_state_restore(par, par->vbe_state_saved);
+}
+
+static struct fb_ops uvesafb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_open	= uvesafb_open,
+	.fb_release	= uvesafb_release,
+	.fb_setcolreg	= uvesafb_setcolreg,
+	.fb_setcmap	= uvesafb_setcmap,
+	.fb_pan_display	= uvesafb_pan_display,
+	.fb_blank	= uvesafb_blank,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+	.fb_check_var	= uvesafb_check_var,
+	.fb_set_par	= uvesafb_set_par,
+	.fb_save_state	= uvesafb_save_state,
+	.fb_restore_state = uvesafb_restore_state,
+};
+
+static void __devinit uvesafb_init_info(struct fb_info *info,
+		struct vbe_mode_ib *mode)
+{
+	unsigned int size_vmode;
+	unsigned int size_remap;
+	unsigned int size_total;
+	struct uvesafb_par *par = info->par;
+	int i, h;
+
+	info->pseudo_palette = ((u8 *)info->par + sizeof(struct uvesafb_par));
+	info->fix = uvesafb_fix;
+	info->fix.ypanstep = par->ypan ? 1 : 0;
+	info->fix.ywrapstep = (par->ypan > 1) ? 1 : 0;
+
+	/*
+	 * If we were unable to get the state buffer size, disable
+	 * functions for saving and restoring the hardware state.
+	 */
+	if (par->vbe_state_size == 0) {
+		info->fbops->fb_save_state = NULL;
+		info->fbops->fb_restore_state = NULL;
+	}
+
+	/* Disable blanking if the user requested so. */
+	if (!blank)
+		info->fbops->fb_blank = NULL;
+
+	/*
+	 * Find out how much IO memory is required for the mode with
+	 * the highest resolution.
+	 */
+	size_remap = 0;
+	for (i = 0; i < par->vbe_modes_cnt; i++) {
+		h = par->vbe_modes[i].bytes_per_scan_line *
+					par->vbe_modes[i].y_res;
+		if (h > size_remap)
+			size_remap = h;
+	}
+	size_remap *= 2;
+
+	/*
+	 *   size_vmode -- that is the amount of memory needed for the
+	 *                 used video mode, i.e. the minimum amount of
+	 *                 memory we need.
+	 */
+	if (mode != NULL) {
+		size_vmode = info->var.yres * mode->bytes_per_scan_line;
+	} else {
+		size_vmode = info->var.yres * info->var.xres *
+			     ((info->var.bits_per_pixel + 7) >> 3);
+	}
+
+	/*
+	 *   size_total -- all video memory we have. Used for mtrr
+	 *                 entries, resource allocation and bounds
+	 *                 checking.
+	 */
+	size_total = par->vbe_ib.total_memory * 65536;
+	if (vram_total)
+		size_total = vram_total * 1024 * 1024;
+	if (size_total < size_vmode)
+		size_total = size_vmode;
+
+	/*
+	 *   size_remap -- the amount of video memory we are going to
+	 *                 use for vesafb.  With modern cards it is no
+	 *                 option to simply use size_total as th
+	 *                 wastes plenty of kernel address space.
+	 */
+	if (vram_remap)
+		size_remap = vram_remap * 1024 * 1024;
+	if (size_remap < size_vmode)
+		size_remap = size_vmode;
+	if (size_remap > size_total)
+		size_remap = size_total;
+
+	info->fix.smem_len = size_remap;
+	info->fix.smem_start = mode->phys_base_ptr;
+
+	/*
+	 * We have to set yres_virtual here because when setup_var() was
+	 * called, smem_len wasn't defined yet.
+	 */
+	info->var.yres_virtual = info->fix.smem_len /
+				 mode->bytes_per_scan_line;
+
+	if (par->ypan && info->var.yres_virtual > info->var.yres) {
+		printk(KERN_INFO "uvesafb: scrolling: %s "
+			"using protected mode interface, "
+			"yres_virtual=%d\n",
+			(par->ypan > 1) ? "ywrap" : "ypan",
+			info->var.yres_virtual);
+	} else {
+		printk(KERN_INFO "uvesafb: scrolling: redraw\n");
+		info->var.yres_virtual = info->var.yres;
+		par->ypan = 0;
+	}
+
+	info->flags = FBINFO_FLAG_DEFAULT |
+			(par->ypan) ? FBINFO_HWACCEL_YPAN : 0;
+
+	if (!par->ypan)
+		info->fbops->fb_pan_display = NULL;
+}
+
+static void uvesafb_init_mtrr(struct fb_info *info)
+{
+#ifdef CONFIG_MTRR
+	if (mtrr && !(info->fix.smem_start & (PAGE_SIZE - 1))) {
+		int temp_size = info->fix.smem_len;
+		unsigned int type = 0;
+
+		switch (mtrr) {
+		case 1:
+			type = MTRR_TYPE_UNCACHABLE;
+			break;
+		case 2:
+			type = MTRR_TYPE_WRBACK;
+			break;
+		case 3:
+			type = MTRR_TYPE_WRCOMB;
+			break;
+		case 4:
+			type = MTRR_TYPE_WRTHROUGH;
+			break;
+		default:
+			type = 0;
+			break;
+		}
+
+		if (type) {
+			int rc;
+
+			/* Find the largest power-of-two */
+			while (temp_size & (temp_size - 1))
+				temp_size &= (temp_size - 1);
+
+			/* Try and find a power of two to add */
+			do {
+				rc = mtrr_add(info->fix.smem_start,
+					      temp_size, type, 1);
+				temp_size >>= 1;
+			} while (temp_size >= PAGE_SIZE && rc == -EINVAL);
+		}
+	}
+#endif /* CONFIG_MTRR */
+}
+
+
+static ssize_t uvesafb_show_vbe_ver(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *info = platform_get_drvdata(to_platform_device(dev));
+	struct uvesafb_par *par = info->par;
+
+	return snprintf(buf, PAGE_SIZE, "%.4x\n", par->vbe_ib.vbe_version);
+}
+
+static DEVICE_ATTR(vbe_version, S_IRUGO, uvesafb_show_vbe_ver, NULL);
+
+static ssize_t uvesafb_show_vbe_modes(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *info = platform_get_drvdata(to_platform_device(dev));
+	struct uvesafb_par *par = info->par;
+	int ret = 0, i;
+
+	for (i = 0; i < par->vbe_modes_cnt && ret < PAGE_SIZE; i++) {
+		ret += snprintf(buf + ret, PAGE_SIZE - ret,
+			"%dx%d-%d, 0x%.4x\n",
+			par->vbe_modes[i].x_res, par->vbe_modes[i].y_res,
+			par->vbe_modes[i].depth, par->vbe_modes[i].mode_id);
+	}
+
+	return ret;
+}
+
+static DEVICE_ATTR(vbe_modes, S_IRUGO, uvesafb_show_vbe_modes, NULL);
+
+static ssize_t uvesafb_show_vendor(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *info = platform_get_drvdata(to_platform_device(dev));
+	struct uvesafb_par *par = info->par;
+
+	if (par->vbe_ib.oem_vendor_name_ptr)
+		return snprintf(buf, PAGE_SIZE, "%s\n", (char *)
+			(&par->vbe_ib) + par->vbe_ib.oem_vendor_name_ptr);
+	else
+		return 0;
+}
+
+static DEVICE_ATTR(oem_vendor, S_IRUGO, uvesafb_show_vendor, NULL);
+
+static ssize_t uvesafb_show_product_name(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *info = platform_get_drvdata(to_platform_device(dev));
+	struct uvesafb_par *par = info->par;
+
+	if (par->vbe_ib.oem_product_name_ptr)
+		return snprintf(buf, PAGE_SIZE, "%s\n", (char *)
+			(&par->vbe_ib) + par->vbe_ib.oem_product_name_ptr);
+	else
+		return 0;
+}
+
+static DEVICE_ATTR(oem_product_name, S_IRUGO, uvesafb_show_product_name, NULL);
+
+static ssize_t uvesafb_show_product_rev(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *info = platform_get_drvdata(to_platform_device(dev));
+	struct uvesafb_par *par = info->par;
+
+	if (par->vbe_ib.oem_product_rev_ptr)
+		return snprintf(buf, PAGE_SIZE, "%s\n", (char *)
+			(&par->vbe_ib) + par->vbe_ib.oem_product_rev_ptr);
+	else
+		return 0;
+}
+
+static DEVICE_ATTR(oem_product_rev, S_IRUGO, uvesafb_show_product_rev, NULL);
+
+static ssize_t uvesafb_show_oem_string(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *info = platform_get_drvdata(to_platform_device(dev));
+	struct uvesafb_par *par = info->par;
+
+	if (par->vbe_ib.oem_string_ptr)
+		return snprintf(buf, PAGE_SIZE, "%s\n",
+			(char *)(&par->vbe_ib) + par->vbe_ib.oem_string_ptr);
+	else
+		return 0;
+}
+
+static DEVICE_ATTR(oem_string, S_IRUGO, uvesafb_show_oem_string, NULL);
+
+static ssize_t uvesafb_show_nocrtc(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct fb_info *info = platform_get_drvdata(to_platform_device(dev));
+	struct uvesafb_par *par = info->par;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", par->nocrtc);
+}
+
+static ssize_t uvesafb_store_nocrtc(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct fb_info *info = platform_get_drvdata(to_platform_device(dev));
+	struct uvesafb_par *par = info->par;
+
+	if (count > 0) {
+		if (buf[0] == '0')
+			par->nocrtc = 0;
+		else
+			par->nocrtc = 1;
+	}
+	return count;
+}
+
+static DEVICE_ATTR(nocrtc, S_IRUGO | S_IWUSR, uvesafb_show_nocrtc,
+			uvesafb_store_nocrtc);
+
+static struct attribute *uvesafb_dev_attrs[] = {
+	&dev_attr_vbe_version.attr,
+	&dev_attr_vbe_modes.attr,
+	&dev_attr_oem_vendor.attr,
+	&dev_attr_oem_product_name.attr,
+	&dev_attr_oem_product_rev.attr,
+	&dev_attr_oem_string.attr,
+	&dev_attr_nocrtc.attr,
+	NULL,
+};
+
+static struct attribute_group uvesafb_dev_attgrp = {
+	.name = NULL,
+	.attrs = uvesafb_dev_attrs,
+};
+
+static int __devinit uvesafb_probe(struct platform_device *dev)
+{
+	struct fb_info *info;
+	struct vbe_mode_ib *mode = NULL;
+	struct uvesafb_par *par;
+	int err = 0, i;
+
+	info = framebuffer_alloc(sizeof(*par) +	sizeof(u32) * 256, &dev->dev);
+	if (!info)
+		return -ENOMEM;
+
+	par = info->par;
+
+	err = uvesafb_vbe_init(info);
+	if (err) {
+		printk(KERN_ERR "uvesafb: vbe_init() failed with %d\n", err);
+		goto out;
+	}
+
+	info->fbops = &uvesafb_ops;
+
+	i = uvesafb_vbe_init_mode(info);
+	if (i < 0) {
+		err = -EINVAL;
+		goto out;
+	} else {
+		mode = &par->vbe_modes[i];
+	}
+
+	if (fb_alloc_cmap(&info->cmap, 256, 0) < 0) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	uvesafb_init_info(info, mode);
+
+	if (!request_mem_region(info->fix.smem_start, info->fix.smem_len,
+				"uvesafb")) {
+		printk(KERN_ERR "uvesafb: cannot reserve video memory at "
+				"0x%lx\n", info->fix.smem_start);
+		err = -EIO;
+		goto out_mode;
+	}
+
+	info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len);
+
+	if (!info->screen_base) {
+		printk(KERN_ERR
+			"uvesafb: abort, cannot ioremap 0x%x bytes of video "
+			"memory at 0x%lx\n",
+			info->fix.smem_len, info->fix.smem_start);
+		err = -EIO;
+		goto out_mem;
+	}
+
+	if (!request_region(0x3c0, 32, "uvesafb")) {
+		printk(KERN_ERR "uvesafb: request region 0x3c0-0x3e0 failed\n");
+		err = -EIO;
+		goto out_unmap;
+	}
+
+	uvesafb_init_mtrr(info);
+	platform_set_drvdata(dev, info);
+
+	if (register_framebuffer(info) < 0) {
+		printk(KERN_ERR
+			"uvesafb: failed to register framebuffer device\n");
+		err = -EINVAL;
+		goto out_reg;
+	}
+
+	printk(KERN_INFO "uvesafb: framebuffer at 0x%lx, mapped to 0x%p, "
+			"using %dk, total %dk\n", info->fix.smem_start,
+			info->screen_base, info->fix.smem_len/1024,
+			par->vbe_ib.total_memory * 64);
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
+			info->fix.id);
+
+	err = sysfs_create_group(&dev->dev.kobj, &uvesafb_dev_attgrp);
+	if (err != 0)
+		printk(KERN_WARNING "fb%d: failed to register attributes\n",
+			info->node);
+
+	return 0;
+
+out_reg:
+	release_region(0x3c0, 32);
+out_unmap:
+	iounmap(info->screen_base);
+out_mem:
+	release_mem_region(info->fix.smem_start, info->fix.smem_len);
+out_mode:
+	if (!list_empty(&info->modelist))
+		fb_destroy_modelist(&info->modelist);
+	fb_destroy_modedb(info->monspecs.modedb);
+	fb_dealloc_cmap(&info->cmap);
+out:
+	if (par->vbe_modes)
+		kfree(par->vbe_modes);
+
+	framebuffer_release(info);
+	return err;
+}
+
+static int uvesafb_remove(struct platform_device *dev)
+{
+	struct fb_info *info = platform_get_drvdata(dev);
+
+	if (info) {
+		struct uvesafb_par *par = info->par;
+
+		sysfs_remove_group(&dev->dev.kobj, &uvesafb_dev_attgrp);
+		unregister_framebuffer(info);
+		release_region(0x3c0, 32);
+		iounmap(info->screen_base);
+		release_mem_region(info->fix.smem_start, info->fix.smem_len);
+		fb_destroy_modedb(info->monspecs.modedb);
+		fb_dealloc_cmap(&info->cmap);
+
+		if (par) {
+			if (par->vbe_modes)
+				kfree(par->vbe_modes);
+			if (par->vbe_state_orig)
+				kfree(par->vbe_state_orig);
+			if (par->vbe_state_saved)
+				kfree(par->vbe_state_saved);
+		}
+
+		framebuffer_release(info);
+	}
+	return 0;
+}
+
+static struct platform_driver uvesafb_driver = {
+	.probe  = uvesafb_probe,
+	.remove = uvesafb_remove,
+	.driver = {
+		.name = "uvesafb",
+	},
+};
+
+static struct platform_device *uvesafb_device;
+
+#ifndef MODULE
+static int __devinit uvesafb_setup(char *options)
+{
+	char *this_opt;
+
+	if (!options || !*options)
+		return 0;
+
+	while ((this_opt = strsep(&options, ",")) != NULL) {
+		if (!*this_opt) continue;
+
+		if (!strcmp(this_opt, "redraw"))
+			ypan = 0;
+		else if (!strcmp(this_opt, "ypan"))
+			ypan = 1;
+		else if (!strcmp(this_opt, "ywrap"))
+			ypan = 2;
+		else if (!strcmp(this_opt, "vgapal"))
+			pmi_setpal = 0;
+		else if (!strcmp(this_opt, "pmipal"))
+			pmi_setpal = 1;
+		else if (!strncmp(this_opt, "mtrr:", 5))
+			mtrr = simple_strtoul(this_opt+5, NULL, 0);
+		else if (!strcmp(this_opt, "nomtrr"))
+			mtrr = 0;
+		else if (!strcmp(this_opt, "nocrtc"))
+			nocrtc = 1;
+		else if (!strcmp(this_opt, "noedid"))
+			noedid = 1;
+		else if (!strcmp(this_opt, "noblank"))
+			blank = 0;
+		else if (!strncmp(this_opt, "vtotal:", 7))
+			vram_total = simple_strtoul(this_opt + 7, NULL, 0);
+		else if (!strncmp(this_opt, "vremap:", 7))
+			vram_remap = simple_strtoul(this_opt + 7, NULL, 0);
+		else if (!strncmp(this_opt, "maxhf:", 6))
+			maxhf = simple_strtoul(this_opt + 6, NULL, 0);
+		else if (!strncmp(this_opt, "maxvf:", 6))
+			maxvf = simple_strtoul(this_opt + 6, NULL, 0);
+		else if (!strncmp(this_opt, "maxclk:", 7))
+			maxclk = simple_strtoul(this_opt + 7, NULL, 0);
+		else if (!strncmp(this_opt, "vbemode:", 8))
+			vbemode = simple_strtoul(this_opt + 8, NULL, 0);
+		else if (this_opt[0] >= '0' && this_opt[0] <= '9') {
+			mode_option = this_opt;
+		} else {
+			printk(KERN_WARNING
+				"uvesafb: unrecognized option %s\n", this_opt);
+		}
+	}
+
+	return 0;
+}
+#endif /* !MODULE */
+
+static ssize_t show_v86d(struct device_driver *dev, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", v86d_path);
+}
+
+static ssize_t store_v86d(struct device_driver *dev, const char *buf,
+		size_t count)
+{
+	strncpy(v86d_path, buf, PATH_MAX);
+	return count;
+}
+
+static DRIVER_ATTR(v86d, S_IRUGO | S_IWUSR, show_v86d, store_v86d);
+
+static int __devinit uvesafb_init(void)
+{
+	int err;
+
+#ifndef MODULE
+	char *option = NULL;
+
+	if (fb_get_options("uvesafb", &option))
+		return -ENODEV;
+	uvesafb_setup(option);
+#endif
+	err = cn_add_callback(&uvesafb_cn_id, "uvesafb", uvesafb_cn_callback);
+	if (err)
+		return err;
+
+	err = platform_driver_register(&uvesafb_driver);
+
+	if (!err) {
+		uvesafb_device = platform_device_alloc("uvesafb", 0);
+		if (uvesafb_device)
+			err = platform_device_add(uvesafb_device);
+		else
+			err = -ENOMEM;
+
+		if (err) {
+			platform_device_put(uvesafb_device);
+			platform_driver_unregister(&uvesafb_driver);
+			cn_del_callback(&uvesafb_cn_id);
+			return err;
+		}
+
+		err = driver_create_file(&uvesafb_driver.driver,
+				&driver_attr_v86d);
+		if (err) {
+			printk(KERN_WARNING "uvesafb: failed to register "
+					"attributes\n");
+			err = 0;
+		}
+	}
+	return err;
+}
+
+module_init(uvesafb_init);
+
+static void __devexit uvesafb_exit(void)
+{
+	struct uvesafb_ktask *task;
+
+	if (v86d_started) {
+		task = uvesafb_prep();
+		if (task) {
+			task->t.flags = TF_EXIT;
+			uvesafb_exec(task);
+			uvesafb_free(task);
+		}
+	}
+
+	cn_del_callback(&uvesafb_cn_id);
+	driver_remove_file(&uvesafb_driver.driver, &driver_attr_v86d);
+	platform_device_unregister(uvesafb_device);
+	platform_driver_unregister(&uvesafb_driver);
+}
+
+module_exit(uvesafb_exit);
+
+static inline int param_get_scroll(char *buffer, struct kernel_param *kp)
+{
+	return 0;
+}
+
+static inline int param_set_scroll(const char *val, struct kernel_param *kp)
+{
+	ypan = 0;
+
+	if (!strcmp(val, "redraw"))
+		ypan = 0;
+	else if (!strcmp(val, "ypan"))
+		ypan = 1;
+	else if (!strcmp(val, "ywrap"))
+		ypan = 2;
+
+	return 0;
+}
+
+#define param_check_scroll(name, p) __param_check(name, p, void);
+
+module_param_named(scroll, ypan, scroll, 0);
+MODULE_PARM_DESC(scroll,
+	"Scrolling mode, set to 'redraw', ''ypan' or 'ywrap'");
+module_param_named(vgapal, pmi_setpal, invbool, 0);
+MODULE_PARM_DESC(vgapal, "Set palette using VGA registers");
+module_param_named(pmipal, pmi_setpal, bool, 0);
+MODULE_PARM_DESC(pmipal, "Set palette using PMI calls");
+module_param(mtrr, uint, 0);
+MODULE_PARM_DESC(mtrr,
+	"Memory Type Range Registers setting. Use 0 to disable.");
+module_param(blank, bool, 0);
+MODULE_PARM_DESC(blank, "Enable hardware blanking");
+module_param(nocrtc, bool, 0);
+MODULE_PARM_DESC(nocrtc, "Ignore CRTC timings when setting modes");
+module_param(noedid, bool, 0);
+MODULE_PARM_DESC(noedid,
+	"Ignore EDID-provided monitor limits when setting modes");
+module_param(vram_remap, uint, 0);
+MODULE_PARM_DESC(vram_remap, "Set amount of video memory to be used [MiB]");
+module_param(vram_total, uint, 0);
+MODULE_PARM_DESC(vram_total, "Set total amount of video memoery [MiB]");
+module_param(maxclk, ushort, 0);
+MODULE_PARM_DESC(maxclk, "Maximum pixelclock [MHz], overrides EDID data");
+module_param(maxhf, ushort, 0);
+MODULE_PARM_DESC(maxhf,
+	"Maximum horizontal frequency [kHz], overrides EDID data");
+module_param(maxvf, ushort, 0);
+MODULE_PARM_DESC(maxvf,
+	"Maximum vertical frequency [Hz], overrides EDID data");
+module_param_named(mode, mode_option, charp, 0);
+MODULE_PARM_DESC(mode,
+	"Specify initial video mode as \"<xres>x<yres>[-<bpp>][@<refresh>]\"");
+module_param(vbemode, ushort, 0);
+MODULE_PARM_DESC(vbemode,
+	"VBE mode number to set, overrides the 'mode' option");
+module_param_string(v86d, v86d_path, PATH_MAX, 0660);
+MODULE_PARM_DESC(v86d, "Path to the v86d userspace helper.");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michal Januszewski <spock@gentoo.org>");
+MODULE_DESCRIPTION("Framebuffer driver for VBE2.0+ compliant graphics boards");
+
diff --git a/drivers/video/vermilion/vermilion.c b/drivers/video/vermilion/vermilion.c
index de531c90771..ff9e805c43b 100644
--- a/drivers/video/vermilion/vermilion.c
+++ b/drivers/video/vermilion/vermilion.c
@@ -39,7 +39,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <linux/mmzone.h>
-#include <asm/uaccess.h>
 
 /* #define VERMILION_DEBUG */
 
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index 64ee78c3c12..072638a9528 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -21,7 +21,6 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 
-#include <asm/uaccess.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 
@@ -38,6 +37,48 @@ static void *videomemory;
 static u_long videomemorysize = VIDEOMEMSIZE;
 module_param(videomemorysize, ulong, 0);
 
+/**********************************************************************
+ *
+ * Memory management
+ *
+ **********************************************************************/
+static void *rvmalloc(unsigned long size)
+{
+	void *mem;
+	unsigned long adr;
+
+	size = PAGE_ALIGN(size);
+	mem = vmalloc_32(size);
+	if (!mem)
+		return NULL;
+
+	memset(mem, 0, size); /* Clear the ram out, no junk to the user */
+	adr = (unsigned long) mem;
+	while (size > 0) {
+		SetPageReserved(vmalloc_to_page((void *)adr));
+		adr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	return mem;
+}
+
+static void rvfree(void *mem, unsigned long size)
+{
+	unsigned long adr;
+
+	if (!mem)
+		return;
+
+	adr = (unsigned long) mem;
+	while ((long) size > 0) {
+		ClearPageReserved(vmalloc_to_page((void *)adr));
+		adr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	vfree(mem);
+}
+
 static struct fb_var_screeninfo vfb_default __initdata = {
 	.xres =		640,
 	.yres =		480,
@@ -372,7 +413,33 @@ static int vfb_pan_display(struct fb_var_screeninfo *var,
 static int vfb_mmap(struct fb_info *info,
 		    struct vm_area_struct *vma)
 {
-	return -EINVAL;
+	unsigned long start = vma->vm_start;
+	unsigned long size = vma->vm_end - vma->vm_start;
+	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long page, pos;
+
+	if (offset + size > info->fix.smem_len) {
+		return -EINVAL;
+	}
+
+	pos = (unsigned long)info->fix.smem_start + offset;
+
+	while (size > 0) {
+		page = vmalloc_to_pfn((void *)pos);
+		if (remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED)) {
+			return -EAGAIN;
+		}
+		start += PAGE_SIZE;
+		pos += PAGE_SIZE;
+		if (size > PAGE_SIZE)
+			size -= PAGE_SIZE;
+		else
+			size = 0;
+	}
+
+	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	return 0;
+
 }
 
 #ifndef MODULE
@@ -407,7 +474,7 @@ static int __init vfb_probe(struct platform_device *dev)
 	/*
 	 * For real video cards we use ioremap.
 	 */
-	if (!(videomemory = vmalloc(videomemorysize)))
+	if (!(videomemory = rvmalloc(videomemorysize)))
 		return retval;
 
 	/*
@@ -430,6 +497,8 @@ static int __init vfb_probe(struct platform_device *dev)
 
 	if (!retval || (retval == 4))
 		info->var = vfb_default;
+	vfb_fix.smem_start = (unsigned long) videomemory;
+	vfb_fix.smem_len = videomemorysize;
 	info->fix = vfb_fix;
 	info->pseudo_palette = info->par;
 	info->par = NULL;
@@ -453,7 +522,7 @@ err2:
 err1:
 	framebuffer_release(info);
 err:
-	vfree(videomemory);
+	rvfree(videomemory, videomemorysize);
 	return retval;
 }
 
@@ -463,7 +532,7 @@ static int vfb_remove(struct platform_device *dev)
 
 	if (info) {
 		unregister_framebuffer(info);
-		vfree(videomemory);
+		rvfree(videomemory, videomemorysize);
 		framebuffer_release(info);
 	}
 	return 0;
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 7e7a04be127..e647200262a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -61,10 +61,14 @@ static int adfs_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page, adfs_get_block);
 }
 
-static int adfs_prepare_write(struct file *file, struct page *page, unsigned int from, unsigned int to)
+static int adfs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return cont_prepare_write(page, from, to, adfs_get_block,
-		&ADFS_I(page->mapping->host)->mmu_private);
+	*pagep = NULL;
+	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				adfs_get_block,
+				&ADFS_I(mapping->host)->mmu_private);
 }
 
 static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
@@ -76,8 +80,8 @@ static const struct address_space_operations adfs_aops = {
 	.readpage	= adfs_readpage,
 	.writepage	= adfs_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= adfs_prepare_write,
-	.commit_write	= generic_commit_write,
+	.write_begin	= adfs_write_begin,
+	.write_end	= generic_write_end,
 	.bmap		= _adfs_bmap
 };
 
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c314a35f091..6e0c9399200 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -395,25 +395,33 @@ static int affs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, affs_get_block, wbc);
 }
+
 static int affs_readpage(struct file *file, struct page *page)
 {
 	return block_read_full_page(page, affs_get_block);
 }
-static int affs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+
+static int affs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return cont_prepare_write(page, from, to, affs_get_block,
-		&AFFS_I(page->mapping->host)->mmu_private);
+	*pagep = NULL;
+	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				affs_get_block,
+				&AFFS_I(mapping->host)->mmu_private);
 }
+
 static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,affs_get_block);
 }
+
 const struct address_space_operations affs_aops = {
 	.readpage = affs_readpage,
 	.writepage = affs_writepage,
 	.sync_page = block_sync_page,
-	.prepare_write = affs_prepare_write,
-	.commit_write = generic_commit_write,
+	.write_begin = affs_write_begin,
+	.write_end = generic_write_end,
 	.bmap = _affs_bmap
 };
 
@@ -603,54 +611,65 @@ affs_readpage_ofs(struct file *file, struct page *page)
 	return err;
 }
 
-static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned from, unsigned to)
+static int affs_write_begin_ofs(struct file *file, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata)
 {
-	struct inode *inode = page->mapping->host;
-	u32 size, offset;
-	u32 tmp;
+	struct inode *inode = mapping->host;
+	struct page *page;
+	pgoff_t index;
 	int err = 0;
 
-	pr_debug("AFFS: prepare_write(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
-	offset = page->index << PAGE_CACHE_SHIFT;
-	if (offset + from > AFFS_I(inode)->mmu_private) {
-		err = affs_extent_file_ofs(inode, offset + from);
+	pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len);
+	if (pos > AFFS_I(inode)->mmu_private) {
+		/* XXX: this probably leaves a too-big i_size in case of
+		 * failure. Should really be updating i_size at write_end time
+		 */
+		err = affs_extent_file_ofs(inode, pos);
 		if (err)
 			return err;
 	}
-	size = inode->i_size;
+
+	index = pos >> PAGE_CACHE_SHIFT;
+	page = __grab_cache_page(mapping, index);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
 
 	if (PageUptodate(page))
 		return 0;
 
-	if (from) {
-		err = affs_do_readpage_ofs(file, page, 0, from);
-		if (err)
-			return err;
-	}
-	if (to < PAGE_CACHE_SIZE) {
-		zero_user_page(page, to, PAGE_CACHE_SIZE - to, KM_USER0);
-		if (size > offset + to) {
-			if (size < offset + PAGE_CACHE_SIZE)
-				tmp = size & ~PAGE_CACHE_MASK;
-			else
-				tmp = PAGE_CACHE_SIZE;
-			err = affs_do_readpage_ofs(file, page, to, tmp);
-		}
+	/* XXX: inefficient but safe in the face of short writes */
+	err = affs_do_readpage_ofs(file, page, 0, PAGE_CACHE_SIZE);
+	if (err) {
+		unlock_page(page);
+		page_cache_release(page);
 	}
 	return err;
 }
 
-static int affs_commit_write_ofs(struct file *file, struct page *page, unsigned from, unsigned to)
+static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = mapping->host;
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *bh, *prev_bh;
 	char *data;
 	u32 bidx, boff, bsize;
+	unsigned from, to;
 	u32 tmp;
 	int written;
 
-	pr_debug("AFFS: commit_write(%u, %ld, %d, %d)\n", (u32)inode->i_ino, page->index, from, to);
+	from = pos & (PAGE_CACHE_SIZE - 1);
+	to = pos + len;
+	/*
+	 * XXX: not sure if this can handle short copies (len < copied), but
+	 * we don't have to, because the page should always be uptodate here,
+	 * due to write_begin.
+	 */
+
+	pr_debug("AFFS: write_begin(%u, %llu, %llu)\n", (u32)inode->i_ino, (unsigned long long)pos, (unsigned long long)pos + len);
 	bsize = AFFS_SB(sb)->s_data_blksize;
 	data = page_address(page);
 
@@ -748,6 +767,9 @@ done:
 	if (tmp > inode->i_size)
 		inode->i_size = AFFS_I(inode)->mmu_private = tmp;
 
+	unlock_page(page);
+	page_cache_release(page);
+
 	return written;
 
 out:
@@ -761,8 +783,8 @@ const struct address_space_operations affs_aops_ofs = {
 	.readpage = affs_readpage_ofs,
 	//.writepage = affs_writepage_ofs,
 	//.sync_page = affs_sync_page_ofs,
-	.prepare_write = affs_prepare_write_ofs,
-	.commit_write = affs_commit_write_ofs
+	.write_begin = affs_write_begin_ofs,
+	.write_end = affs_write_end_ofs
 };
 
 /* Free any preallocated blocks. */
@@ -805,18 +827,13 @@ affs_truncate(struct inode *inode)
 	if (inode->i_size > AFFS_I(inode)->mmu_private) {
 		struct address_space *mapping = inode->i_mapping;
 		struct page *page;
-		u32 size = inode->i_size - 1;
+		void *fsdata;
+		u32 size = inode->i_size;
 		int res;
 
-		page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT);
-		if (!page)
-			return;
-		size = (size & (PAGE_CACHE_SIZE - 1)) + 1;
-		res = mapping->a_ops->prepare_write(NULL, page, size, size);
+		res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
 		if (!res)
-			res = mapping->a_ops->commit_write(NULL, page, size, size);
-		unlock_page(page);
-		page_cache_release(page);
+			res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
 		mark_inode_dirty(inode);
 		return;
 	} else if (inode->i_size == AFFS_I(inode)->mmu_private)
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 24310e9ee05..911b4ccf470 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -145,9 +145,13 @@ static int bfs_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page, bfs_get_block);
 }
 
-static int bfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+static int bfs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return block_prepare_write(page, from, to, bfs_get_block);
+	*pagep = NULL;
+	return block_write_begin(file, mapping, pos, len, flags,
+					pagep, fsdata, bfs_get_block);
 }
 
 static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
@@ -159,8 +163,8 @@ const struct address_space_operations bfs_aops = {
 	.readpage	= bfs_readpage,
 	.writepage	= bfs_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= bfs_prepare_write,
-	.commit_write	= generic_commit_write,
+	.write_begin	= bfs_write_begin,
+	.write_end	= generic_write_end,
 	.bmap		= bfs_bmap,
 };
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b1013f34085..f3037c645ca 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1725,7 +1725,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 						&page, &vma) <= 0) {
 				DUMP_SEEK(PAGE_SIZE);
 			} else {
-				if (page == ZERO_PAGE(addr)) {
+				if (page == ZERO_PAGE(0)) {
 					if (!dump_seek(file, PAGE_SIZE)) {
 						page_cache_release(page);
 						goto end_coredump;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2f5d8dbe676..c5ca2f0aca7 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1488,7 +1488,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 					   &page, &vma) <= 0) {
 				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 			}
-			else if (page == ZERO_PAGE(addr)) {
+			else if (page == ZERO_PAGE(0)) {
 				page_cache_release(page);
 				DUMP_SEEK(file->f_pos + PAGE_SIZE);
 			}
diff --git a/fs/bio.c b/fs/bio.c
index 5f604f269df..d59ddbf7962 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -109,11 +109,14 @@ static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned lon
 
 void bio_free(struct bio *bio, struct bio_set *bio_set)
 {
-	const int pool_idx = BIO_POOL_IDX(bio);
+	if (bio->bi_io_vec) {
+		const int pool_idx = BIO_POOL_IDX(bio);
 
-	BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
+		BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS);
+
+		mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
+	}
 
-	mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
 	mempool_free(bio, bio_set->bio_pool);
 }
 
@@ -127,21 +130,9 @@ static void bio_fs_destructor(struct bio *bio)
 
 void bio_init(struct bio *bio)
 {
-	bio->bi_next = NULL;
-	bio->bi_bdev = NULL;
+	memset(bio, 0, sizeof(*bio));
 	bio->bi_flags = 1 << BIO_UPTODATE;
-	bio->bi_rw = 0;
-	bio->bi_vcnt = 0;
-	bio->bi_idx = 0;
-	bio->bi_phys_segments = 0;
-	bio->bi_hw_segments = 0;
-	bio->bi_hw_front_size = 0;
-	bio->bi_hw_back_size = 0;
-	bio->bi_size = 0;
-	bio->bi_max_vecs = 0;
-	bio->bi_end_io = NULL;
 	atomic_set(&bio->bi_cnt, 1);
-	bio->bi_private = NULL;
 }
 
 /**
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6339a30879b..379a446e243 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -378,14 +378,26 @@ static int blkdev_readpage(struct file * file, struct page * page)
 	return block_read_full_page(page, blkdev_get_block);
 }
 
-static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+static int blkdev_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return block_prepare_write(page, from, to, blkdev_get_block);
+	*pagep = NULL;
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				blkdev_get_block);
 }
 
-static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
+static int blkdev_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
-	return block_commit_write(page, from, to);
+	int ret;
+	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	unlock_page(page);
+	page_cache_release(page);
+
+	return ret;
 }
 
 /*
@@ -1327,8 +1339,8 @@ const struct address_space_operations def_blk_aops = {
 	.readpage	= blkdev_readpage,
 	.writepage	= blkdev_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= blkdev_prepare_write,
-	.commit_write	= blkdev_commit_write,
+	.write_begin	= blkdev_write_begin,
+	.write_end	= blkdev_write_end,
 	.writepages	= generic_writepages,
 	.direct_IO	= blkdev_direct_IO,
 };
diff --git a/fs/buffer.c b/fs/buffer.c
index 75b51dfa5e0..faceb5eecca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -110,10 +110,14 @@ static void buffer_io_error(struct buffer_head *bh)
 }
 
 /*
- * Default synchronous end-of-IO handler..  Just mark it up-to-date and
- * unlock the buffer. This is what ll_rw_block uses too.
+ * End-of-IO handler helper function which does not touch the bh after
+ * unlocking it.
+ * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
+ * a race there is benign: unlock_buffer() only use the bh's address for
+ * hashing after unlocking the buffer, so it doesn't actually touch the bh
+ * itself.
  */
-void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
 {
 	if (uptodate) {
 		set_buffer_uptodate(bh);
@@ -122,6 +126,15 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
 		clear_buffer_uptodate(bh);
 	}
 	unlock_buffer(bh);
+}
+
+/*
+ * Default synchronous end-of-IO handler..  Just mark it up-to-date and
+ * unlock the buffer. This is what ll_rw_block uses too.
+ */
+void end_buffer_read_sync(struct buffer_head *bh, int uptodate)
+{
+	__end_buffer_read_notouch(bh, uptodate);
 	put_bh(bh);
 }
 
@@ -1757,6 +1770,48 @@ recover:
 	goto done;
 }
 
+/*
+ * If a page has any new buffers, zero them out here, and mark them uptodate
+ * and dirty so they'll be written out (in order to prevent uninitialised
+ * block data from leaking). And clear the new bit.
+ */
+void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+{
+	unsigned int block_start, block_end;
+	struct buffer_head *head, *bh;
+
+	BUG_ON(!PageLocked(page));
+	if (!page_has_buffers(page))
+		return;
+
+	bh = head = page_buffers(page);
+	block_start = 0;
+	do {
+		block_end = block_start + bh->b_size;
+
+		if (buffer_new(bh)) {
+			if (block_end > from && block_start < to) {
+				if (!PageUptodate(page)) {
+					unsigned start, size;
+
+					start = max(from, block_start);
+					size = min(to, block_end) - start;
+
+					zero_user_page(page, start, size, KM_USER0);
+					set_buffer_uptodate(bh);
+				}
+
+				clear_buffer_new(bh);
+				mark_buffer_dirty(bh);
+			}
+		}
+
+		block_start = block_end;
+		bh = bh->b_this_page;
+	} while (bh != head);
+}
+EXPORT_SYMBOL(page_zero_new_buffers);
+
 static int __block_prepare_write(struct inode *inode, struct page *page,
 		unsigned from, unsigned to, get_block_t *get_block)
 {
@@ -1800,7 +1855,9 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 				unmap_underlying_metadata(bh->b_bdev,
 							bh->b_blocknr);
 				if (PageUptodate(page)) {
+					clear_buffer_new(bh);
 					set_buffer_uptodate(bh);
+					mark_buffer_dirty(bh);
 					continue;
 				}
 				if (block_end > to || block_start < from) {
@@ -1839,38 +1896,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (!buffer_uptodate(*wait_bh))
 			err = -EIO;
 	}
-	if (!err) {
-		bh = head;
-		do {
-			if (buffer_new(bh))
-				clear_buffer_new(bh);
-		} while ((bh = bh->b_this_page) != head);
-		return 0;
-	}
-	/* Error case: */
-	/*
-	 * Zero out any newly allocated blocks to avoid exposing stale
-	 * data.  If BH_New is set, we know that the block was newly
-	 * allocated in the above loop.
-	 */
-	bh = head;
-	block_start = 0;
-	do {
-		block_end = block_start+blocksize;
-		if (block_end <= from)
-			goto next_bh;
-		if (block_start >= to)
-			break;
-		if (buffer_new(bh)) {
-			clear_buffer_new(bh);
-			zero_user_page(page, block_start, bh->b_size, KM_USER0);
-			set_buffer_uptodate(bh);
-			mark_buffer_dirty(bh);
-		}
-next_bh:
-		block_start = block_end;
-		bh = bh->b_this_page;
-	} while (bh != head);
+	if (unlikely(err))
+		page_zero_new_buffers(page, from, to);
 	return err;
 }
 
@@ -1895,6 +1922,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 			set_buffer_uptodate(bh);
 			mark_buffer_dirty(bh);
 		}
+		clear_buffer_new(bh);
 	}
 
 	/*
@@ -1909,6 +1937,130 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 }
 
 /*
+ * block_write_begin takes care of the basic task of block allocation and
+ * bringing partial write blocks uptodate first.
+ *
+ * If *pagep is not NULL, then block_write_begin uses the locked page
+ * at *pagep rather than allocating its own. In this case, the page will
+ * not be unlocked or deallocated on failure.
+ */
+int block_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata,
+			get_block_t *get_block)
+{
+	struct inode *inode = mapping->host;
+	int status = 0;
+	struct page *page;
+	pgoff_t index;
+	unsigned start, end;
+	int ownpage = 0;
+
+	index = pos >> PAGE_CACHE_SHIFT;
+	start = pos & (PAGE_CACHE_SIZE - 1);
+	end = start + len;
+
+	page = *pagep;
+	if (page == NULL) {
+		ownpage = 1;
+		page = __grab_cache_page(mapping, index);
+		if (!page) {
+			status = -ENOMEM;
+			goto out;
+		}
+		*pagep = page;
+	} else
+		BUG_ON(!PageLocked(page));
+
+	status = __block_prepare_write(inode, page, start, end, get_block);
+	if (unlikely(status)) {
+		ClearPageUptodate(page);
+
+		if (ownpage) {
+			unlock_page(page);
+			page_cache_release(page);
+			*pagep = NULL;
+
+			/*
+			 * prepare_write() may have instantiated a few blocks
+			 * outside i_size.  Trim these off again. Don't need
+			 * i_size_read because we hold i_mutex.
+			 */
+			if (pos + len > inode->i_size)
+				vmtruncate(inode, inode->i_size);
+		}
+		goto out;
+	}
+
+out:
+	return status;
+}
+EXPORT_SYMBOL(block_write_begin);
+
+int block_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	unsigned start;
+
+	start = pos & (PAGE_CACHE_SIZE - 1);
+
+	if (unlikely(copied < len)) {
+		/*
+		 * The buffers that were written will now be uptodate, so we
+		 * don't have to worry about a readpage reading them and
+		 * overwriting a partial write. However if we have encountered
+		 * a short write and only partially written into a buffer, it
+		 * will not be marked uptodate, so a readpage might come in and
+		 * destroy our partial write.
+		 *
+		 * Do the simplest thing, and just treat any short write to a
+		 * non uptodate page as a zero-length write, and force the
+		 * caller to redo the whole thing.
+		 */
+		if (!PageUptodate(page))
+			copied = 0;
+
+		page_zero_new_buffers(page, start+copied, start+len);
+	}
+	flush_dcache_page(page);
+
+	/* This could be a short (even 0-length) commit */
+	__block_commit_write(inode, page, start, start+copied);
+
+	return copied;
+}
+EXPORT_SYMBOL(block_write_end);
+
+int generic_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+
+	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold i_mutex.
+	 *
+	 * But it's important to update i_size while still holding page lock:
+	 * page writeout could otherwise come in and zero beyond i_size.
+	 */
+	if (pos+copied > inode->i_size) {
+		i_size_write(inode, pos+copied);
+		mark_inode_dirty(inode);
+	}
+
+	unlock_page(page);
+	page_cache_release(page);
+
+	return copied;
+}
+EXPORT_SYMBOL(generic_write_end);
+
+/*
  * Generic "read page" function for block devices that have the normal
  * get_block functionality. This is most of the block device filesystems.
  * Reads the page asynchronously --- the unlock_buffer() and
@@ -2004,14 +2156,14 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 }
 
 /* utility function for filesystems that need to do work on expanding
- * truncates.  Uses prepare/commit_write to allow the filesystem to
+ * truncates.  Uses filesystem pagecache writes to allow the filesystem to
  * deal with the hole.  
  */
-static int __generic_cont_expand(struct inode *inode, loff_t size,
-				 pgoff_t index, unsigned int offset)
+int generic_cont_expand_simple(struct inode *inode, loff_t size)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
+	void *fsdata;
 	unsigned long limit;
 	int err;
 
@@ -2024,140 +2176,115 @@ static int __generic_cont_expand(struct inode *inode, loff_t size,
 	if (size > inode->i_sb->s_maxbytes)
 		goto out;
 
-	err = -ENOMEM;
-	page = grab_cache_page(mapping, index);
-	if (!page)
-		goto out;
-	err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
-	if (err) {
-		/*
-		 * ->prepare_write() may have instantiated a few blocks
-		 * outside i_size.  Trim these off again.
-		 */
-		unlock_page(page);
-		page_cache_release(page);
-		vmtruncate(inode, inode->i_size);
+	err = pagecache_write_begin(NULL, mapping, size, 0,
+				AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
+				&page, &fsdata);
+	if (err)
 		goto out;
-	}
 
-	err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+	err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
+	BUG_ON(err > 0);
 
-	unlock_page(page);
-	page_cache_release(page);
-	if (err > 0)
-		err = 0;
 out:
 	return err;
 }
 
-int generic_cont_expand(struct inode *inode, loff_t size)
+int cont_expand_zero(struct file *file, struct address_space *mapping,
+			loff_t pos, loff_t *bytes)
 {
-	pgoff_t index;
-	unsigned int offset;
+	struct inode *inode = mapping->host;
+	unsigned blocksize = 1 << inode->i_blkbits;
+	struct page *page;
+	void *fsdata;
+	pgoff_t index, curidx;
+	loff_t curpos;
+	unsigned zerofrom, offset, len;
+	int err = 0;
 
-	offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
+	index = pos >> PAGE_CACHE_SHIFT;
+	offset = pos & ~PAGE_CACHE_MASK;
 
-	/* ugh.  in prepare/commit_write, if from==to==start of block, we
-	** skip the prepare.  make sure we never send an offset for the start
-	** of a block
-	*/
-	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-		/* caller must handle this extra byte. */
-		offset++;
-	}
-	index = size >> PAGE_CACHE_SHIFT;
+	while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
+		zerofrom = curpos & ~PAGE_CACHE_MASK;
+		if (zerofrom & (blocksize-1)) {
+			*bytes |= (blocksize-1);
+			(*bytes)++;
+		}
+		len = PAGE_CACHE_SIZE - zerofrom;
 
-	return __generic_cont_expand(inode, size, index, offset);
-}
+		err = pagecache_write_begin(file, mapping, curpos, len,
+						AOP_FLAG_UNINTERRUPTIBLE,
+						&page, &fsdata);
+		if (err)
+			goto out;
+		zero_user_page(page, zerofrom, len, KM_USER0);
+		err = pagecache_write_end(file, mapping, curpos, len, len,
+						page, fsdata);
+		if (err < 0)
+			goto out;
+		BUG_ON(err != len);
+		err = 0;
+	}
 
-int generic_cont_expand_simple(struct inode *inode, loff_t size)
-{
-	loff_t pos = size - 1;
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
+	/* page covers the boundary, find the boundary offset */
+	if (index == curidx) {
+		zerofrom = curpos & ~PAGE_CACHE_MASK;
+		/* if we will expand the thing last block will be filled */
+		if (offset <= zerofrom) {
+			goto out;
+		}
+		if (zerofrom & (blocksize-1)) {
+			*bytes |= (blocksize-1);
+			(*bytes)++;
+		}
+		len = offset - zerofrom;
 
-	/* prepare/commit_write can handle even if from==to==start of block. */
-	return __generic_cont_expand(inode, size, index, offset);
+		err = pagecache_write_begin(file, mapping, curpos, len,
+						AOP_FLAG_UNINTERRUPTIBLE,
+						&page, &fsdata);
+		if (err)
+			goto out;
+		zero_user_page(page, zerofrom, len, KM_USER0);
+		err = pagecache_write_end(file, mapping, curpos, len, len,
+						page, fsdata);
+		if (err < 0)
+			goto out;
+		BUG_ON(err != len);
+		err = 0;
+	}
+out:
+	return err;
 }
 
 /*
  * For moronic filesystems that do not allow holes in file.
  * We may have to extend the file.
  */
-
-int cont_prepare_write(struct page *page, unsigned offset,
-		unsigned to, get_block_t *get_block, loff_t *bytes)
+int cont_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata,
+			get_block_t *get_block, loff_t *bytes)
 {
-	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
-	struct page *new_page;
-	pgoff_t pgpos;
-	long status;
-	unsigned zerofrom;
 	unsigned blocksize = 1 << inode->i_blkbits;
+	unsigned zerofrom;
+	int err;
 
-	while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
-		status = -ENOMEM;
-		new_page = grab_cache_page(mapping, pgpos);
-		if (!new_page)
-			goto out;
-		/* we might sleep */
-		if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
-			unlock_page(new_page);
-			page_cache_release(new_page);
-			continue;
-		}
-		zerofrom = *bytes & ~PAGE_CACHE_MASK;
-		if (zerofrom & (blocksize-1)) {
-			*bytes |= (blocksize-1);
-			(*bytes)++;
-		}
-		status = __block_prepare_write(inode, new_page, zerofrom,
-						PAGE_CACHE_SIZE, get_block);
-		if (status)
-			goto out_unmap;
-		zero_user_page(new_page, zerofrom, PAGE_CACHE_SIZE - zerofrom,
-				KM_USER0);
-		generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
-		unlock_page(new_page);
-		page_cache_release(new_page);
-	}
-
-	if (page->index < pgpos) {
-		/* completely inside the area */
-		zerofrom = offset;
-	} else {
-		/* page covers the boundary, find the boundary offset */
-		zerofrom = *bytes & ~PAGE_CACHE_MASK;
-
-		/* if we will expand the thing last block will be filled */
-		if (to > zerofrom && (zerofrom & (blocksize-1))) {
-			*bytes |= (blocksize-1);
-			(*bytes)++;
-		}
+	err = cont_expand_zero(file, mapping, pos, bytes);
+	if (err)
+		goto out;
 
-		/* starting below the boundary? Nothing to zero out */
-		if (offset <= zerofrom)
-			zerofrom = offset;
-	}
-	status = __block_prepare_write(inode, page, zerofrom, to, get_block);
-	if (status)
-		goto out1;
-	if (zerofrom < offset) {
-		zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0);
-		__block_commit_write(inode, page, zerofrom, offset);
+	zerofrom = *bytes & ~PAGE_CACHE_MASK;
+	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
+		*bytes |= (blocksize-1);
+		(*bytes)++;
 	}
-	return 0;
-out1:
-	ClearPageUptodate(page);
-	return status;
 
-out_unmap:
-	ClearPageUptodate(new_page);
-	unlock_page(new_page);
-	page_cache_release(new_page);
+	*pagep = NULL;
+	err = block_write_begin(file, mapping, pos, len,
+				flags, pagep, fsdata, get_block);
 out:
-	return status;
+	return err;
 }
 
 int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -2242,81 +2369,129 @@ out_unlock:
 }
 
 /*
- * nobh_prepare_write()'s prereads are special: the buffer_heads are freed
+ * nobh_write_begin()'s prereads are special: the buffer_heads are freed
  * immediately, while under the page lock.  So it needs a special end_io
  * handler which does not touch the bh after unlocking it.
- *
- * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
- * a race there is benign: unlock_buffer() only use the bh's address for
- * hashing after unlocking the buffer, so it doesn't actually touch the bh
- * itself.
  */
 static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
 {
-	if (uptodate) {
-		set_buffer_uptodate(bh);
-	} else {
-		/* This happens, due to failed READA attempts. */
-		clear_buffer_uptodate(bh);
-	}
-	unlock_buffer(bh);
+	__end_buffer_read_notouch(bh, uptodate);
+}
+
+/*
+ * Attach the singly-linked list of buffers created by nobh_write_begin, to
+ * the page (converting it to circular linked list and taking care of page
+ * dirty races).
+ */
+static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
+{
+	struct buffer_head *bh;
+
+	BUG_ON(!PageLocked(page));
+
+	spin_lock(&page->mapping->private_lock);
+	bh = head;
+	do {
+		if (PageDirty(page))
+			set_buffer_dirty(bh);
+		if (!bh->b_this_page)
+			bh->b_this_page = head;
+		bh = bh->b_this_page;
+	} while (bh != head);
+	attach_page_buffers(page, head);
+	spin_unlock(&page->mapping->private_lock);
 }
 
 /*
  * On entry, the page is fully not uptodate.
  * On exit the page is fully uptodate in the areas outside (from,to)
  */
-int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
+int nobh_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata,
 			get_block_t *get_block)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
 	const unsigned blocksize = 1 << blkbits;
-	struct buffer_head map_bh;
-	struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
+	struct buffer_head *head, *bh;
+	struct page *page;
+	pgoff_t index;
+	unsigned from, to;
 	unsigned block_in_page;
-	unsigned block_start;
+	unsigned block_start, block_end;
 	sector_t block_in_file;
 	char *kaddr;
 	int nr_reads = 0;
-	int i;
 	int ret = 0;
 	int is_mapped_to_disk = 1;
 
+	index = pos >> PAGE_CACHE_SHIFT;
+	from = pos & (PAGE_CACHE_SIZE - 1);
+	to = from + len;
+
+	page = __grab_cache_page(mapping, index);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
+	*fsdata = NULL;
+
+	if (page_has_buffers(page)) {
+		unlock_page(page);
+		page_cache_release(page);
+		*pagep = NULL;
+		return block_write_begin(file, mapping, pos, len, flags, pagep,
+					fsdata, get_block);
+	}
+
 	if (PageMappedToDisk(page))
 		return 0;
 
+	/*
+	 * Allocate buffers so that we can keep track of state, and potentially
+	 * attach them to the page if an error occurs. In the common case of
+	 * no error, they will just be freed again without ever being attached
+	 * to the page (which is all OK, because we're under the page lock).
+	 *
+	 * Be careful: the buffer linked list is a NULL terminated one, rather
+	 * than the circular one we're used to.
+	 */
+	head = alloc_page_buffers(page, blocksize, 0);
+	if (!head) {
+		ret = -ENOMEM;
+		goto out_release;
+	}
+
 	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
-	map_bh.b_page = page;
 
 	/*
 	 * We loop across all blocks in the page, whether or not they are
 	 * part of the affected region.  This is so we can discover if the
 	 * page is fully mapped-to-disk.
 	 */
-	for (block_start = 0, block_in_page = 0;
+	for (block_start = 0, block_in_page = 0, bh = head;
 		  block_start < PAGE_CACHE_SIZE;
-		  block_in_page++, block_start += blocksize) {
-		unsigned block_end = block_start + blocksize;
+		  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
 		int create;
 
-		map_bh.b_state = 0;
+		block_end = block_start + blocksize;
+		bh->b_state = 0;
 		create = 1;
 		if (block_start >= to)
 			create = 0;
-		map_bh.b_size = blocksize;
 		ret = get_block(inode, block_in_file + block_in_page,
-					&map_bh, create);
+					bh, create);
 		if (ret)
 			goto failed;
-		if (!buffer_mapped(&map_bh))
+		if (!buffer_mapped(bh))
 			is_mapped_to_disk = 0;
-		if (buffer_new(&map_bh))
-			unmap_underlying_metadata(map_bh.b_bdev,
-							map_bh.b_blocknr);
-		if (PageUptodate(page))
+		if (buffer_new(bh))
+			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+		if (PageUptodate(page)) {
+			set_buffer_uptodate(bh);
 			continue;
-		if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
+		}
+		if (buffer_new(bh) || !buffer_mapped(bh)) {
 			kaddr = kmap_atomic(page, KM_USER0);
 			if (block_start < from)
 				memset(kaddr+block_start, 0, from-block_start);
@@ -2326,49 +2501,26 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
 			kunmap_atomic(kaddr, KM_USER0);
 			continue;
 		}
-		if (buffer_uptodate(&map_bh))
+		if (buffer_uptodate(bh))
 			continue;	/* reiserfs does this */
 		if (block_start < from || block_end > to) {
-			struct buffer_head *bh = alloc_buffer_head(GFP_NOFS);
-
-			if (!bh) {
-				ret = -ENOMEM;
-				goto failed;
-			}
-			bh->b_state = map_bh.b_state;
-			atomic_set(&bh->b_count, 0);
-			bh->b_this_page = NULL;
-			bh->b_page = page;
-			bh->b_blocknr = map_bh.b_blocknr;
-			bh->b_size = blocksize;
-			bh->b_data = (char *)(long)block_start;
-			bh->b_bdev = map_bh.b_bdev;
-			bh->b_private = NULL;
-			read_bh[nr_reads++] = bh;
+			lock_buffer(bh);
+			bh->b_end_io = end_buffer_read_nobh;
+			submit_bh(READ, bh);
+			nr_reads++;
 		}
 	}
 
 	if (nr_reads) {
-		struct buffer_head *bh;
-
 		/*
 		 * The page is locked, so these buffers are protected from
 		 * any VM or truncate activity.  Hence we don't need to care
 		 * for the buffer_head refcounts.
 		 */
-		for (i = 0; i < nr_reads; i++) {
-			bh = read_bh[i];
-			lock_buffer(bh);
-			bh->b_end_io = end_buffer_read_nobh;
-			submit_bh(READ, bh);
-		}
-		for (i = 0; i < nr_reads; i++) {
-			bh = read_bh[i];
+		for (bh = head; bh; bh = bh->b_this_page) {
 			wait_on_buffer(bh);
 			if (!buffer_uptodate(bh))
 				ret = -EIO;
-			free_buffer_head(bh);
-			read_bh[i] = NULL;
 		}
 		if (ret)
 			goto failed;
@@ -2377,44 +2529,70 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
 	if (is_mapped_to_disk)
 		SetPageMappedToDisk(page);
 
+	*fsdata = head; /* to be released by nobh_write_end */
+
 	return 0;
 
 failed:
-	for (i = 0; i < nr_reads; i++) {
-		if (read_bh[i])
-			free_buffer_head(read_bh[i]);
-	}
-
+	BUG_ON(!ret);
 	/*
-	 * Error recovery is pretty slack.  Clear the page and mark it dirty
-	 * so we'll later zero out any blocks which _were_ allocated.
+	 * Error recovery is a bit difficult. We need to zero out blocks that
+	 * were newly allocated, and dirty them to ensure they get written out.
+	 * Buffers need to be attached to the page at this point, otherwise
+	 * the handling of potential IO errors during writeout would be hard
+	 * (could try doing synchronous writeout, but what if that fails too?)
 	 */
-	zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-	SetPageUptodate(page);
-	set_page_dirty(page);
+	attach_nobh_buffers(page, head);
+	page_zero_new_buffers(page, from, to);
+
+out_release:
+	unlock_page(page);
+	page_cache_release(page);
+	*pagep = NULL;
+
+	if (pos + len > inode->i_size)
+		vmtruncate(inode, inode->i_size);
+
 	return ret;
 }
-EXPORT_SYMBOL(nobh_prepare_write);
+EXPORT_SYMBOL(nobh_write_begin);
 
-/*
- * Make sure any changes to nobh_commit_write() are reflected in
- * nobh_truncate_page(), since it doesn't call commit_write().
- */
-int nobh_commit_write(struct file *file, struct page *page,
-		unsigned from, unsigned to)
+int nobh_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
 	struct inode *inode = page->mapping->host;
-	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	struct buffer_head *head = NULL;
+	struct buffer_head *bh;
+
+	if (!PageMappedToDisk(page)) {
+		if (unlikely(copied < len) && !page_has_buffers(page))
+			attach_nobh_buffers(page, head);
+		if (page_has_buffers(page))
+			return generic_write_end(file, mapping, pos, len,
+						copied, page, fsdata);
+	}
 
 	SetPageUptodate(page);
 	set_page_dirty(page);
-	if (pos > inode->i_size) {
-		i_size_write(inode, pos);
+	if (pos+copied > inode->i_size) {
+		i_size_write(inode, pos+copied);
 		mark_inode_dirty(inode);
 	}
-	return 0;
+
+	unlock_page(page);
+	page_cache_release(page);
+
+	head = fsdata;
+	while (head) {
+		bh = head;
+		head = head->b_this_page;
+		free_buffer_head(bh);
+	}
+
+	return copied;
 }
-EXPORT_SYMBOL(nobh_commit_write);
+EXPORT_SYMBOL(nobh_write_end);
 
 /*
  * nobh_writepage() - based on block_full_write_page() except
@@ -2467,44 +2645,79 @@ out:
 }
 EXPORT_SYMBOL(nobh_writepage);
 
-/*
- * This function assumes that ->prepare_write() uses nobh_prepare_write().
- */
-int nobh_truncate_page(struct address_space *mapping, loff_t from)
+int nobh_truncate_page(struct address_space *mapping,
+			loff_t from, get_block_t *get_block)
 {
-	struct inode *inode = mapping->host;
-	unsigned blocksize = 1 << inode->i_blkbits;
 	pgoff_t index = from >> PAGE_CACHE_SHIFT;
 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
-	unsigned to;
+	unsigned blocksize;
+	sector_t iblock;
+	unsigned length, pos;
+	struct inode *inode = mapping->host;
 	struct page *page;
-	const struct address_space_operations *a_ops = mapping->a_ops;
-	int ret = 0;
+	struct buffer_head map_bh;
+	int err;
 
-	if ((offset & (blocksize - 1)) == 0)
-		goto out;
+	blocksize = 1 << inode->i_blkbits;
+	length = offset & (blocksize - 1);
+
+	/* Block boundary? Nothing to do */
+	if (!length)
+		return 0;
+
+	length = blocksize - length;
+	iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
 
-	ret = -ENOMEM;
 	page = grab_cache_page(mapping, index);
+	err = -ENOMEM;
 	if (!page)
 		goto out;
 
-	to = (offset + blocksize) & ~(blocksize - 1);
-	ret = a_ops->prepare_write(NULL, page, offset, to);
-	if (ret == 0) {
-		zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
-				KM_USER0);
-		/*
-		 * It would be more correct to call aops->commit_write()
-		 * here, but this is more efficient.
-		 */
-		SetPageUptodate(page);
-		set_page_dirty(page);
+	if (page_has_buffers(page)) {
+has_buffers:
+		unlock_page(page);
+		page_cache_release(page);
+		return block_truncate_page(mapping, from, get_block);
 	}
+
+	/* Find the buffer that contains "offset" */
+	pos = blocksize;
+	while (offset >= pos) {
+		iblock++;
+		pos += blocksize;
+	}
+
+	err = get_block(inode, iblock, &map_bh, 0);
+	if (err)
+		goto unlock;
+	/* unmapped? It's a hole - nothing to do */
+	if (!buffer_mapped(&map_bh))
+		goto unlock;
+
+	/* Ok, it's mapped. Make sure it's up-to-date */
+	if (!PageUptodate(page)) {
+		err = mapping->a_ops->readpage(NULL, page);
+		if (err) {
+			page_cache_release(page);
+			goto out;
+		}
+		lock_page(page);
+		if (!PageUptodate(page)) {
+			err = -EIO;
+			goto unlock;
+		}
+		if (page_has_buffers(page))
+			goto has_buffers;
+	}
+	zero_user_page(page, offset, length, KM_USER0);
+	set_page_dirty(page);
+	err = 0;
+
+unlock:
 	unlock_page(page);
 	page_cache_release(page);
 out:
-	return ret;
+	return err;
 }
 EXPORT_SYMBOL(nobh_truncate_page);
 
@@ -2956,7 +3169,8 @@ static void recalc_bh_state(void)
 	
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
 {
-	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
+	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep,
+				set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
 	if (ret) {
 		INIT_LIST_HEAD(&ret->b_assoc_buffers);
 		get_cpu_var(bh_accounting).nr++;
@@ -3024,14 +3238,13 @@ EXPORT_SYMBOL(block_read_full_page);
 EXPORT_SYMBOL(block_sync_page);
 EXPORT_SYMBOL(block_truncate_page);
 EXPORT_SYMBOL(block_write_full_page);
-EXPORT_SYMBOL(cont_prepare_write);
+EXPORT_SYMBOL(cont_write_begin);
 EXPORT_SYMBOL(end_buffer_read_sync);
 EXPORT_SYMBOL(end_buffer_write_sync);
 EXPORT_SYMBOL(file_fsync);
 EXPORT_SYMBOL(fsync_bdev);
 EXPORT_SYMBOL(generic_block_bmap);
 EXPORT_SYMBOL(generic_commit_write);
-EXPORT_SYMBOL(generic_cont_expand);
 EXPORT_SYMBOL(generic_cont_expand_simple);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(invalidate_bdev);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index ddc003a9d21..dbd257d956c 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -41,8 +41,8 @@ extern struct super_block * configfs_sb;
 
 static const struct address_space_operations configfs_aops = {
 	.readpage	= simple_readpage,
-	.prepare_write	= simple_prepare_write,
-	.commit_write	= simple_commit_write
+	.write_begin	= simple_write_begin,
+	.write_end	= simple_write_end,
 };
 
 static struct backing_dev_info configfs_backing_dev_info = {
diff --git a/fs/dcache.c b/fs/dcache.c
index 678d39deb60..7da0cf50873 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -903,7 +903,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	struct dentry *dentry;
 	char *dname;
 
-	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL); 
+	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
 	if (!dentry)
 		return NULL;
 
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index a9b99c0dc2e..fa6b7f7ff91 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -227,15 +227,24 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n"
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n");
 
-/**
- * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value
- * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value
- * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value
+/*
+ * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value
  *
- * These functions are exactly the same as the above functions, (but use a hex
- * output for the decimal challenged) for details look at the above unsigned
+ * These functions are exactly the same as the above functions (but use a hex
+ * output for the decimal challenged). For details look at the above unsigned
  * decimal functions.
  */
+
+/**
+ * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ *         from.
+ */
 struct dentry *debugfs_create_x8(const char *name, mode_t mode,
 				 struct dentry *parent, u8 *value)
 {
@@ -243,6 +252,16 @@ struct dentry *debugfs_create_x8(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_x8);
 
+/**
+ * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ *         from.
+ */
 struct dentry *debugfs_create_x16(const char *name, mode_t mode,
 				 struct dentry *parent, u16 *value)
 {
@@ -250,6 +269,16 @@ struct dentry *debugfs_create_x16(const char *name, mode_t mode,
 }
 EXPORT_SYMBOL_GPL(debugfs_create_x16);
 
+/**
+ * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *          directory dentry if set.  If this parameter is %NULL, then the
+ *          file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ *         from.
+ */
 struct dentry *debugfs_create_x32(const char *name, mode_t mode,
 				 struct dentry *parent, u32 *value)
 {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b5928a7b6a5..acf0da1bd25 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -163,7 +163,7 @@ static int dio_refill_pages(struct dio *dio)
 	up_read(&current->mm->mmap_sem);
 
 	if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
-		struct page *page = ZERO_PAGE(dio->curr_user_address);
+		struct page *page = ZERO_PAGE(0);
 		/*
 		 * A memory fault, but the filesystem has some outstanding
 		 * mapped blocks.  We need to use those blocks up to avoid
@@ -763,7 +763,7 @@ static void dio_zero_block(struct dio *dio, int end)
 
 	this_chunk_bytes = this_chunk_blocks << dio->blkbits;
 
-	page = ZERO_PAGE(dio->curr_user_address);
+	page = ZERO_PAGE(0);
 	if (submit_page_section(dio, page, 0, this_chunk_bytes, 
 				dio->next_block_for_io))
 		return;
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1f1107237ea..76885701551 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
 
-ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o messaging.o netlink.o debug.o
+ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 6ac630625b7..1ae90ef2c74 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -123,9 +123,9 @@ out:
 	return rc;
 }
 
-int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
-					   char *cipher_name,
-					   char *chaining_modifier)
+static int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
+						  char *cipher_name,
+						  char *chaining_modifier)
 {
 	int cipher_name_len = strlen(cipher_name);
 	int chaining_modifier_len = strlen(chaining_modifier);
@@ -149,7 +149,7 @@ out:
  * ecryptfs_derive_iv
  * @iv: destination for the derived iv vale
  * @crypt_stat: Pointer to crypt_stat struct for the current inode
- * @offset: Offset of the page whose's iv we are to derive
+ * @offset: Offset of the extent whose IV we are to derive
  *
  * Generate the initialization vector from the given root IV and page
  * offset.
@@ -157,7 +157,7 @@ out:
  * Returns zero on success; non-zero on error.
  */
 static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
-			      pgoff_t offset)
+			      loff_t offset)
 {
 	int rc = 0;
 	char dst[MD5_DIGEST_SIZE];
@@ -173,7 +173,7 @@ static int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
 	 * hashing business. -Halcrow */
 	memcpy(src, crypt_stat->root_iv, crypt_stat->iv_bytes);
 	memset((src + crypt_stat->iv_bytes), 0, 16);
-	snprintf((src + crypt_stat->iv_bytes), 16, "%ld", offset);
+	snprintf((src + crypt_stat->iv_bytes), 16, "%lld", offset);
 	if (unlikely(ecryptfs_verbosity > 0)) {
 		ecryptfs_printk(KERN_DEBUG, "source:\n");
 		ecryptfs_dump_hex(src, (crypt_stat->iv_bytes + 16));
@@ -204,6 +204,8 @@ void
 ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 {
 	memset((void *)crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
+	INIT_LIST_HEAD(&crypt_stat->keysig_list);
+	mutex_init(&crypt_stat->keysig_list_mutex);
 	mutex_init(&crypt_stat->cs_mutex);
 	mutex_init(&crypt_stat->cs_tfm_mutex);
 	mutex_init(&crypt_stat->cs_hash_tfm_mutex);
@@ -211,27 +213,48 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 }
 
 /**
- * ecryptfs_destruct_crypt_stat
+ * ecryptfs_destroy_crypt_stat
  * @crypt_stat: Pointer to the crypt_stat struct to initialize.
  *
  * Releases all memory associated with a crypt_stat struct.
  */
-void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
+void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
 {
+	struct ecryptfs_key_sig *key_sig, *key_sig_tmp;
+
 	if (crypt_stat->tfm)
 		crypto_free_blkcipher(crypt_stat->tfm);
 	if (crypt_stat->hash_tfm)
 		crypto_free_hash(crypt_stat->hash_tfm);
+	mutex_lock(&crypt_stat->keysig_list_mutex);
+	list_for_each_entry_safe(key_sig, key_sig_tmp,
+				 &crypt_stat->keysig_list, crypt_stat_list) {
+		list_del(&key_sig->crypt_stat_list);
+		kmem_cache_free(ecryptfs_key_sig_cache, key_sig);
+	}
+	mutex_unlock(&crypt_stat->keysig_list_mutex);
 	memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
 }
 
-void ecryptfs_destruct_mount_crypt_stat(
+void ecryptfs_destroy_mount_crypt_stat(
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
 {
-	if (mount_crypt_stat->global_auth_tok_key)
-		key_put(mount_crypt_stat->global_auth_tok_key);
-	if (mount_crypt_stat->global_key_tfm)
-		crypto_free_blkcipher(mount_crypt_stat->global_key_tfm);
+	struct ecryptfs_global_auth_tok *auth_tok, *auth_tok_tmp;
+
+	if (!(mount_crypt_stat->flags & ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED))
+		return;
+	mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
+	list_for_each_entry_safe(auth_tok, auth_tok_tmp,
+				 &mount_crypt_stat->global_auth_tok_list,
+				 mount_crypt_stat_list) {
+		list_del(&auth_tok->mount_crypt_stat_list);
+		mount_crypt_stat->num_global_auth_toks--;
+		if (auth_tok->global_auth_tok_key
+		    && !(auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID))
+			key_put(auth_tok->global_auth_tok_key);
+		kmem_cache_free(ecryptfs_global_auth_tok_cache, auth_tok);
+	}
+	mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
 	memset(mount_crypt_stat, 0, sizeof(struct ecryptfs_mount_crypt_stat));
 }
 
@@ -330,114 +353,82 @@ out:
 	return rc;
 }
 
-static void
-ecryptfs_extent_to_lwr_pg_idx_and_offset(unsigned long *lower_page_idx,
-					 int *byte_offset,
-					 struct ecryptfs_crypt_stat *crypt_stat,
-					 unsigned long extent_num)
+/**
+ * ecryptfs_lower_offset_for_extent
+ *
+ * Convert an eCryptfs page index into a lower byte offset
+ */
+void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num,
+				      struct ecryptfs_crypt_stat *crypt_stat)
 {
-	unsigned long lower_extent_num;
-	int extents_occupied_by_headers_at_front;
-	int bytes_occupied_by_headers_at_front;
-	int extent_offset;
-	int extents_per_page;
-
-	bytes_occupied_by_headers_at_front =
-		( crypt_stat->header_extent_size
-		  * crypt_stat->num_header_extents_at_front );
-	extents_occupied_by_headers_at_front =
-		( bytes_occupied_by_headers_at_front
-		  / crypt_stat->extent_size );
-	lower_extent_num = extents_occupied_by_headers_at_front + extent_num;
-	extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
-	(*lower_page_idx) = lower_extent_num / extents_per_page;
-	extent_offset = lower_extent_num % extents_per_page;
-	(*byte_offset) = extent_offset * crypt_stat->extent_size;
-	ecryptfs_printk(KERN_DEBUG, " * crypt_stat->header_extent_size = "
-			"[%d]\n", crypt_stat->header_extent_size);
-	ecryptfs_printk(KERN_DEBUG, " * crypt_stat->"
-			"num_header_extents_at_front = [%d]\n",
-			crypt_stat->num_header_extents_at_front);
-	ecryptfs_printk(KERN_DEBUG, " * extents_occupied_by_headers_at_"
-			"front = [%d]\n", extents_occupied_by_headers_at_front);
-	ecryptfs_printk(KERN_DEBUG, " * lower_extent_num = [0x%.16x]\n",
-			lower_extent_num);
-	ecryptfs_printk(KERN_DEBUG, " * extents_per_page = [%d]\n",
-			extents_per_page);
-	ecryptfs_printk(KERN_DEBUG, " * (*lower_page_idx) = [0x%.16x]\n",
-			(*lower_page_idx));
-	ecryptfs_printk(KERN_DEBUG, " * extent_offset = [%d]\n",
-			extent_offset);
-	ecryptfs_printk(KERN_DEBUG, " * (*byte_offset) = [%d]\n",
-			(*byte_offset));
+	(*offset) = ((crypt_stat->extent_size
+		      * crypt_stat->num_header_extents_at_front)
+		     + (crypt_stat->extent_size * extent_num));
 }
 
-static int ecryptfs_write_out_page(struct ecryptfs_page_crypt_context *ctx,
-				   struct page *lower_page,
-				   struct inode *lower_inode,
-				   int byte_offset_in_page, int bytes_to_write)
+/**
+ * ecryptfs_encrypt_extent
+ * @enc_extent_page: Allocated page into which to encrypt the data in
+ *                   @page
+ * @crypt_stat: crypt_stat containing cryptographic context for the
+ *              encryption operation
+ * @page: Page containing plaintext data extent to encrypt
+ * @extent_offset: Page extent offset for use in generating IV
+ *
+ * Encrypts one extent of data.
+ *
+ * Return zero on success; non-zero otherwise
+ */
+static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
+				   struct ecryptfs_crypt_stat *crypt_stat,
+				   struct page *page,
+				   unsigned long extent_offset)
 {
-	int rc = 0;
+	loff_t extent_base;
+	char extent_iv[ECRYPTFS_MAX_IV_BYTES];
+	int rc;
 
-	if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) {
-		rc = ecryptfs_commit_lower_page(lower_page, lower_inode,
-						ctx->param.lower_file,
-						byte_offset_in_page,
-						bytes_to_write);
-		if (rc) {
-			ecryptfs_printk(KERN_ERR, "Error calling lower "
-					"commit; rc = [%d]\n", rc);
-			goto out;
-		}
-	} else {
-		rc = ecryptfs_writepage_and_release_lower_page(lower_page,
-							       lower_inode,
-							       ctx->param.wbc);
-		if (rc) {
-			ecryptfs_printk(KERN_ERR, "Error calling lower "
-					"writepage(); rc = [%d]\n", rc);
-			goto out;
-		}
+	extent_base = (((loff_t)page->index)
+		       * (PAGE_CACHE_SIZE / crypt_stat->extent_size));
+	rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
+				(extent_base + extent_offset));
+	if (rc) {
+		ecryptfs_printk(KERN_ERR, "Error attempting to "
+				"derive IV for extent [0x%.16x]; "
+				"rc = [%d]\n", (extent_base + extent_offset),
+				rc);
+		goto out;
 	}
-out:
-	return rc;
-}
-
-static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx,
-				 struct page **lower_page,
-				 struct inode *lower_inode,
-				 unsigned long lower_page_idx,
-				 int byte_offset_in_page)
-{
-	int rc = 0;
-
-	if (ctx->mode == ECRYPTFS_PREPARE_COMMIT_MODE) {
-		/* TODO: Limit this to only the data extents that are
-		 * needed */
-		rc = ecryptfs_get_lower_page(lower_page, lower_inode,
-					     ctx->param.lower_file,
-					     lower_page_idx,
-					     byte_offset_in_page,
-					     (PAGE_CACHE_SIZE
-					      - byte_offset_in_page));
-		if (rc) {
-			ecryptfs_printk(
-				KERN_ERR, "Error attempting to grab, map, "
-				"and prepare_write lower page with index "
-				"[0x%.16x]; rc = [%d]\n", lower_page_idx, rc);
-			goto out;
-		}
-	} else {
-		*lower_page = grab_cache_page(lower_inode->i_mapping,
-					      lower_page_idx);
-		if (!(*lower_page)) {
-			rc = -EINVAL;
-			ecryptfs_printk(
-				KERN_ERR, "Error attempting to grab and map "
-				"lower page with index [0x%.16x]; rc = [%d]\n",
-				lower_page_idx, rc);
-			goto out;
-		}
+	if (unlikely(ecryptfs_verbosity > 0)) {
+		ecryptfs_printk(KERN_DEBUG, "Encrypting extent "
+				"with iv:\n");
+		ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
+		ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
+				"encryption:\n");
+		ecryptfs_dump_hex((char *)
+				  (page_address(page)
+				   + (extent_offset * crypt_stat->extent_size)),
+				  8);
+	}
+	rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0,
+					  page, (extent_offset
+						 * crypt_stat->extent_size),
+					  crypt_stat->extent_size, extent_iv);
+	if (rc < 0) {
+		printk(KERN_ERR "%s: Error attempting to encrypt page with "
+		       "page->index = [%ld], extent_offset = [%ld]; "
+		       "rc = [%d]\n", __FUNCTION__, page->index, extent_offset,
+		       rc);
+		goto out;
+	}
+	rc = 0;
+	if (unlikely(ecryptfs_verbosity > 0)) {
+		ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
+				"rc = [%d]\n", (extent_base + extent_offset),
+				rc);
+		ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
+				"encryption:\n");
+		ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8);
 	}
 out:
 	return rc;
@@ -445,7 +436,9 @@ out:
 
 /**
  * ecryptfs_encrypt_page
- * @ctx: The context of the page
+ * @page: Page mapped from the eCryptfs inode for the file; contains
+ *        decrypted content that needs to be encrypted (to a temporary
+ *        page; not in place) and written out to the lower file
  *
  * Encrypt an eCryptfs page. This is done on a per-extent basis. Note
  * that eCryptfs pages may straddle the lower pages -- for instance,
@@ -455,128 +448,122 @@ out:
  * file, 24K of page 0 of the lower file will be read and decrypted,
  * and then 8K of page 1 of the lower file will be read and decrypted.
  *
- * The actual operations performed on each page depends on the
- * contents of the ecryptfs_page_crypt_context struct.
- *
  * Returns zero on success; negative on error
  */
-int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx)
+int ecryptfs_encrypt_page(struct page *page)
 {
-	char extent_iv[ECRYPTFS_MAX_IV_BYTES];
-	unsigned long base_extent;
-	unsigned long extent_offset = 0;
-	unsigned long lower_page_idx = 0;
-	unsigned long prior_lower_page_idx = 0;
-	struct page *lower_page;
-	struct inode *lower_inode;
-	struct ecryptfs_inode_info *inode_info;
+	struct inode *ecryptfs_inode;
 	struct ecryptfs_crypt_stat *crypt_stat;
+	char *enc_extent_virt = NULL;
+	struct page *enc_extent_page;
+	loff_t extent_offset;
 	int rc = 0;
-	int lower_byte_offset = 0;
-	int orig_byte_offset = 0;
-	int num_extents_per_page;
-#define ECRYPTFS_PAGE_STATE_UNREAD    0
-#define ECRYPTFS_PAGE_STATE_READ      1
-#define ECRYPTFS_PAGE_STATE_MODIFIED  2
-#define ECRYPTFS_PAGE_STATE_WRITTEN   3
-	int page_state;
-
-	lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host);
-	inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host);
-	crypt_stat = &inode_info->crypt_stat;
+
+	ecryptfs_inode = page->mapping->host;
+	crypt_stat =
+		&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
 	if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
-		rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode,
-						 ctx->param.lower_file);
+		rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page,
+						       0, PAGE_CACHE_SIZE);
 		if (rc)
-			ecryptfs_printk(KERN_ERR, "Error attempting to copy "
-					"page at index [0x%.16x]\n",
-					ctx->page->index);
+			printk(KERN_ERR "%s: Error attempting to copy "
+			       "page at index [%ld]\n", __FUNCTION__,
+			       page->index);
 		goto out;
 	}
-	num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
-	base_extent = (ctx->page->index * num_extents_per_page);
-	page_state = ECRYPTFS_PAGE_STATE_UNREAD;
-	while (extent_offset < num_extents_per_page) {
-		ecryptfs_extent_to_lwr_pg_idx_and_offset(
-			&lower_page_idx, &lower_byte_offset, crypt_stat,
-			(base_extent + extent_offset));
-		if (prior_lower_page_idx != lower_page_idx
-		    && page_state == ECRYPTFS_PAGE_STATE_MODIFIED) {
-			rc = ecryptfs_write_out_page(ctx, lower_page,
-						     lower_inode,
-						     orig_byte_offset,
-						     (PAGE_CACHE_SIZE
-						      - orig_byte_offset));
-			if (rc) {
-				ecryptfs_printk(KERN_ERR, "Error attempting "
-						"to write out page; rc = [%d]"
-						"\n", rc);
-				goto out;
-			}
-			page_state = ECRYPTFS_PAGE_STATE_WRITTEN;
-		}
-		if (page_state == ECRYPTFS_PAGE_STATE_UNREAD
-		    || page_state == ECRYPTFS_PAGE_STATE_WRITTEN) {
-			rc = ecryptfs_read_in_page(ctx, &lower_page,
-						   lower_inode, lower_page_idx,
-						   lower_byte_offset);
-			if (rc) {
-				ecryptfs_printk(KERN_ERR, "Error attempting "
-						"to read in lower page with "
-						"index [0x%.16x]; rc = [%d]\n",
-						lower_page_idx, rc);
-				goto out;
-			}
-			orig_byte_offset = lower_byte_offset;
-			prior_lower_page_idx = lower_page_idx;
-			page_state = ECRYPTFS_PAGE_STATE_READ;
-		}
-		BUG_ON(!(page_state == ECRYPTFS_PAGE_STATE_MODIFIED
-			 || page_state == ECRYPTFS_PAGE_STATE_READ));
-		rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
-					(base_extent + extent_offset));
+	enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER);
+	if (!enc_extent_virt) {
+		rc = -ENOMEM;
+		ecryptfs_printk(KERN_ERR, "Error allocating memory for "
+				"encrypted extent\n");
+		goto out;
+	}
+	enc_extent_page = virt_to_page(enc_extent_virt);
+	for (extent_offset = 0;
+	     extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+	     extent_offset++) {
+		loff_t offset;
+
+		rc = ecryptfs_encrypt_extent(enc_extent_page, crypt_stat, page,
+					     extent_offset);
 		if (rc) {
-			ecryptfs_printk(KERN_ERR, "Error attempting to "
-					"derive IV for extent [0x%.16x]; "
-					"rc = [%d]\n",
-					(base_extent + extent_offset), rc);
+			printk(KERN_ERR "%s: Error encrypting extent; "
+			       "rc = [%d]\n", __FUNCTION__, rc);
 			goto out;
 		}
-		if (unlikely(ecryptfs_verbosity > 0)) {
-			ecryptfs_printk(KERN_DEBUG, "Encrypting extent "
-					"with iv:\n");
-			ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
-			ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
-					"encryption:\n");
-			ecryptfs_dump_hex((char *)
-					  (page_address(ctx->page)
-					   + (extent_offset
-					      * crypt_stat->extent_size)), 8);
-		}
-		rc = ecryptfs_encrypt_page_offset(
-			crypt_stat, lower_page, lower_byte_offset, ctx->page,
-			(extent_offset * crypt_stat->extent_size),
-			crypt_stat->extent_size, extent_iv);
-		ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16x]; "
-				"rc = [%d]\n",
-				(base_extent + extent_offset), rc);
-		if (unlikely(ecryptfs_verbosity > 0)) {
-			ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
-					"encryption:\n");
-			ecryptfs_dump_hex((char *)(page_address(lower_page)
-						   + lower_byte_offset), 8);
+		ecryptfs_lower_offset_for_extent(
+			&offset, ((((loff_t)page->index)
+				   * (PAGE_CACHE_SIZE
+				      / crypt_stat->extent_size))
+				  + extent_offset), crypt_stat);
+		rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt,
+					  offset, crypt_stat->extent_size);
+		if (rc) {
+			ecryptfs_printk(KERN_ERR, "Error attempting "
+					"to write lower page; rc = [%d]"
+					"\n", rc);
+			goto out;
 		}
-		page_state = ECRYPTFS_PAGE_STATE_MODIFIED;
 		extent_offset++;
 	}
-	BUG_ON(orig_byte_offset != 0);
-	rc = ecryptfs_write_out_page(ctx, lower_page, lower_inode, 0,
-				     (lower_byte_offset
-				      + crypt_stat->extent_size));
+out:
+	kfree(enc_extent_virt);
+	return rc;
+}
+
+static int ecryptfs_decrypt_extent(struct page *page,
+				   struct ecryptfs_crypt_stat *crypt_stat,
+				   struct page *enc_extent_page,
+				   unsigned long extent_offset)
+{
+	loff_t extent_base;
+	char extent_iv[ECRYPTFS_MAX_IV_BYTES];
+	int rc;
+
+	extent_base = (((loff_t)page->index)
+		       * (PAGE_CACHE_SIZE / crypt_stat->extent_size));
+	rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
+				(extent_base + extent_offset));
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error attempting to write out "
-				"page; rc = [%d]\n", rc);
-				goto out;
+		ecryptfs_printk(KERN_ERR, "Error attempting to "
+				"derive IV for extent [0x%.16x]; "
+				"rc = [%d]\n", (extent_base + extent_offset),
+				rc);
+		goto out;
+	}
+	if (unlikely(ecryptfs_verbosity > 0)) {
+		ecryptfs_printk(KERN_DEBUG, "Decrypting extent "
+				"with iv:\n");
+		ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
+		ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
+				"decryption:\n");
+		ecryptfs_dump_hex((char *)
+				  (page_address(enc_extent_page)
+				   + (extent_offset * crypt_stat->extent_size)),
+				  8);
+	}
+	rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
+					  (extent_offset
+					   * crypt_stat->extent_size),
+					  enc_extent_page, 0,
+					  crypt_stat->extent_size, extent_iv);
+	if (rc < 0) {
+		printk(KERN_ERR "%s: Error attempting to decrypt to page with "
+		       "page->index = [%ld], extent_offset = [%ld]; "
+		       "rc = [%d]\n", __FUNCTION__, page->index, extent_offset,
+		       rc);
+		goto out;
+	}
+	rc = 0;
+	if (unlikely(ecryptfs_verbosity > 0)) {
+		ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16x]; "
+				"rc = [%d]\n", (extent_base + extent_offset),
+				rc);
+		ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
+				"decryption:\n");
+		ecryptfs_dump_hex((char *)(page_address(page)
+					   + (extent_offset
+					      * crypt_stat->extent_size)), 8);
 	}
 out:
 	return rc;
@@ -584,8 +571,9 @@ out:
 
 /**
  * ecryptfs_decrypt_page
- * @file: The ecryptfs file
- * @page: The page in ecryptfs to decrypt
+ * @page: Page mapped from the eCryptfs inode for the file; data read
+ *        and decrypted from the lower file will be written into this
+ *        page
  *
  * Decrypt an eCryptfs page. This is done on a per-extent basis. Note
  * that eCryptfs pages may straddle the lower pages -- for instance,
@@ -597,108 +585,75 @@ out:
  *
  * Returns zero on success; negative on error
  */
-int ecryptfs_decrypt_page(struct file *file, struct page *page)
+int ecryptfs_decrypt_page(struct page *page)
 {
-	char extent_iv[ECRYPTFS_MAX_IV_BYTES];
-	unsigned long base_extent;
-	unsigned long extent_offset = 0;
-	unsigned long lower_page_idx = 0;
-	unsigned long prior_lower_page_idx = 0;
-	struct page *lower_page;
-	char *lower_page_virt = NULL;
-	struct inode *lower_inode;
+	struct inode *ecryptfs_inode;
 	struct ecryptfs_crypt_stat *crypt_stat;
+	char *enc_extent_virt = NULL;
+	struct page *enc_extent_page;
+	unsigned long extent_offset;
 	int rc = 0;
-	int byte_offset;
-	int num_extents_per_page;
-	int page_state;
 
-	crypt_stat = &(ecryptfs_inode_to_private(
-			       page->mapping->host)->crypt_stat);
-	lower_inode = ecryptfs_inode_to_lower(page->mapping->host);
+	ecryptfs_inode = page->mapping->host;
+	crypt_stat =
+		&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
 	if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
-		rc = ecryptfs_do_readpage(file, page, page->index);
+		rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
+						      PAGE_CACHE_SIZE,
+						      ecryptfs_inode);
 		if (rc)
-			ecryptfs_printk(KERN_ERR, "Error attempting to copy "
-					"page at index [0x%.16x]\n",
-					page->index);
+			printk(KERN_ERR "%s: Error attempting to copy "
+			       "page at index [%ld]\n", __FUNCTION__,
+			       page->index);
 		goto out;
 	}
-	num_extents_per_page = PAGE_CACHE_SIZE / crypt_stat->extent_size;
-	base_extent = (page->index * num_extents_per_page);
-	lower_page_virt = kmem_cache_alloc(ecryptfs_lower_page_cache,
-					   GFP_KERNEL);
-	if (!lower_page_virt) {
+	enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER);
+	if (!enc_extent_virt) {
 		rc = -ENOMEM;
-		ecryptfs_printk(KERN_ERR, "Error getting page for encrypted "
-				"lower page(s)\n");
+		ecryptfs_printk(KERN_ERR, "Error allocating memory for "
+				"encrypted extent\n");
 		goto out;
 	}
-	lower_page = virt_to_page(lower_page_virt);
-	page_state = ECRYPTFS_PAGE_STATE_UNREAD;
-	while (extent_offset < num_extents_per_page) {
-		ecryptfs_extent_to_lwr_pg_idx_and_offset(
-			&lower_page_idx, &byte_offset, crypt_stat,
-			(base_extent + extent_offset));
-		if (prior_lower_page_idx != lower_page_idx
-		    || page_state == ECRYPTFS_PAGE_STATE_UNREAD) {
-			rc = ecryptfs_do_readpage(file, lower_page,
-						  lower_page_idx);
-			if (rc) {
-				ecryptfs_printk(KERN_ERR, "Error reading "
-						"lower encrypted page; rc = "
-						"[%d]\n", rc);
-				goto out;
-			}
-			prior_lower_page_idx = lower_page_idx;
-			page_state = ECRYPTFS_PAGE_STATE_READ;
-		}
-		rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
-					(base_extent + extent_offset));
+	enc_extent_page = virt_to_page(enc_extent_virt);
+	for (extent_offset = 0;
+	     extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+	     extent_offset++) {
+		loff_t offset;
+
+		ecryptfs_lower_offset_for_extent(
+			&offset, ((page->index * (PAGE_CACHE_SIZE
+						  / crypt_stat->extent_size))
+				  + extent_offset), crypt_stat);
+		rc = ecryptfs_read_lower(enc_extent_virt, offset,
+					 crypt_stat->extent_size,
+					 ecryptfs_inode);
 		if (rc) {
-			ecryptfs_printk(KERN_ERR, "Error attempting to "
-					"derive IV for extent [0x%.16x]; rc = "
-					"[%d]\n",
-					(base_extent + extent_offset), rc);
+			ecryptfs_printk(KERN_ERR, "Error attempting "
+					"to read lower page; rc = [%d]"
+					"\n", rc);
 			goto out;
 		}
-		if (unlikely(ecryptfs_verbosity > 0)) {
-			ecryptfs_printk(KERN_DEBUG, "Decrypting extent "
-					"with iv:\n");
-			ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
-			ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
-					"decryption:\n");
-			ecryptfs_dump_hex((lower_page_virt + byte_offset), 8);
-		}
-		rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
-						  (extent_offset
-						   * crypt_stat->extent_size),
-						  lower_page, byte_offset,
-						  crypt_stat->extent_size,
-						  extent_iv);
-		if (rc != crypt_stat->extent_size) {
-			ecryptfs_printk(KERN_ERR, "Error attempting to "
-					"decrypt extent [0x%.16x]\n",
-					(base_extent + extent_offset));
+		rc = ecryptfs_decrypt_extent(page, crypt_stat, enc_extent_page,
+					     extent_offset);
+		if (rc) {
+			printk(KERN_ERR "%s: Error encrypting extent; "
+			       "rc = [%d]\n", __FUNCTION__, rc);
 			goto out;
 		}
-		rc = 0;
-		if (unlikely(ecryptfs_verbosity > 0)) {
-			ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
-					"decryption:\n");
-			ecryptfs_dump_hex((char *)(page_address(page)
-						   + byte_offset), 8);
-		}
 		extent_offset++;
 	}
 out:
-	if (lower_page_virt)
-		kmem_cache_free(ecryptfs_lower_page_cache, lower_page_virt);
+	kfree(enc_extent_virt);
 	return rc;
 }
 
 /**
  * decrypt_scatterlist
+ * @crypt_stat: Cryptographic context
+ * @dest_sg: The destination scatterlist to decrypt into
+ * @src_sg: The source scatterlist to decrypt from
+ * @size: The number of bytes to decrypt
+ * @iv: The initialization vector to use for the decryption
  *
  * Returns the number of bytes decrypted; negative value on error
  */
@@ -740,6 +695,13 @@ out:
 
 /**
  * ecryptfs_encrypt_page_offset
+ * @crypt_stat: The cryptographic context
+ * @dst_page: The page to encrypt into
+ * @dst_offset: The offset in the page to encrypt into
+ * @src_page: The page to encrypt from
+ * @src_offset: The offset in the page to encrypt from
+ * @size: The number of bytes to encrypt
+ * @iv: The initialization vector to use for the encryption
  *
  * Returns the number of bytes encrypted
  */
@@ -762,6 +724,13 @@ ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat,
 
 /**
  * ecryptfs_decrypt_page_offset
+ * @crypt_stat: The cryptographic context
+ * @dst_page: The page to decrypt into
+ * @dst_offset: The offset in the page to decrypt into
+ * @src_page: The page to decrypt from
+ * @src_offset: The offset in the page to decrypt from
+ * @size: The number of bytes to decrypt
+ * @iv: The initialization vector to use for the decryption
  *
  * Returns the number of bytes decrypted
  */
@@ -857,15 +826,17 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
 	crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
 	set_extent_mask_and_shift(crypt_stat);
 	crypt_stat->iv_bytes = ECRYPTFS_DEFAULT_IV_BYTES;
-	if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
-		crypt_stat->header_extent_size =
-			ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
-	} else
-		crypt_stat->header_extent_size = PAGE_CACHE_SIZE;
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
 		crypt_stat->num_header_extents_at_front = 0;
-	else
-		crypt_stat->num_header_extents_at_front = 1;
+	else {
+		if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
+			crypt_stat->num_header_extents_at_front =
+				(ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE
+				 / crypt_stat->extent_size);
+		else
+			crypt_stat->num_header_extents_at_front =
+				(PAGE_CACHE_SIZE / crypt_stat->extent_size);
+	}
 }
 
 /**
@@ -917,6 +888,8 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
 
 /**
  * ecryptfs_copy_mount_wide_flags_to_inode_flags
+ * @crypt_stat: The inode's cryptographic context
+ * @mount_crypt_stat: The mount point's cryptographic context
  *
  * This function propagates the mount-wide flags to individual inode
  * flags.
@@ -931,9 +904,34 @@ static void ecryptfs_copy_mount_wide_flags_to_inode_flags(
 		crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED;
 }
 
+static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
+	struct ecryptfs_crypt_stat *crypt_stat,
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
+{
+	struct ecryptfs_global_auth_tok *global_auth_tok;
+	int rc = 0;
+
+	mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
+	list_for_each_entry(global_auth_tok,
+			    &mount_crypt_stat->global_auth_tok_list,
+			    mount_crypt_stat_list) {
+		rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig);
+		if (rc) {
+			printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc);
+			mutex_unlock(
+				&mount_crypt_stat->global_auth_tok_list_mutex);
+			goto out;
+		}
+	}
+	mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
+out:
+	return rc;
+}
+
 /**
  * ecryptfs_set_default_crypt_stat_vals
- * @crypt_stat
+ * @crypt_stat: The inode's cryptographic context
+ * @mount_crypt_stat: The mount point's cryptographic context
  *
  * Default values in the event that policy does not override them.
  */
@@ -953,7 +951,7 @@ static void ecryptfs_set_default_crypt_stat_vals(
 
 /**
  * ecryptfs_new_file_context
- * @ecryptfs_dentry
+ * @ecryptfs_dentry: The eCryptfs dentry
  *
  * If the crypto context for the file has not yet been established,
  * this is where we do that.  Establishing a new crypto context
@@ -970,49 +968,42 @@ static void ecryptfs_set_default_crypt_stat_vals(
  *
  * Returns zero on success; non-zero otherwise
  */
-/* Associate an authentication token(s) with the file */
 int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
 {
-	int rc = 0;
 	struct ecryptfs_crypt_stat *crypt_stat =
 	    &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
 	    &ecryptfs_superblock_to_private(
 		    ecryptfs_dentry->d_sb)->mount_crypt_stat;
 	int cipher_name_len;
+	int rc = 0;
 
 	ecryptfs_set_default_crypt_stat_vals(crypt_stat, mount_crypt_stat);
-	/* See if there are mount crypt options */
-	if (mount_crypt_stat->global_auth_tok) {
-		ecryptfs_printk(KERN_DEBUG, "Initializing context for new "
-				"file using mount_crypt_stat\n");
-		crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
-		crypt_stat->flags |= ECRYPTFS_KEY_VALID;
-		ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
-							      mount_crypt_stat);
-		memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++],
-		       mount_crypt_stat->global_auth_tok_sig,
-		       ECRYPTFS_SIG_SIZE_HEX);
-		cipher_name_len =
-		    strlen(mount_crypt_stat->global_default_cipher_name);
-		memcpy(crypt_stat->cipher,
-		       mount_crypt_stat->global_default_cipher_name,
-		       cipher_name_len);
-		crypt_stat->cipher[cipher_name_len] = '\0';
-		crypt_stat->key_size =
-			mount_crypt_stat->global_default_cipher_key_size;
-		ecryptfs_generate_new_key(crypt_stat);
-	} else
-		/* We should not encounter this scenario since we
-		 * should detect lack of global_auth_tok at mount time
-		 * TODO: Applies to 0.1 release only; remove in future
-		 * release */
-		BUG();
+	crypt_stat->flags |= (ECRYPTFS_ENCRYPTED | ECRYPTFS_KEY_VALID);
+	ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
+						      mount_crypt_stat);
+	rc = ecryptfs_copy_mount_wide_sigs_to_inode_sigs(crypt_stat,
+							 mount_crypt_stat);
+	if (rc) {
+		printk(KERN_ERR "Error attempting to copy mount-wide key sigs "
+		       "to the inode key sigs; rc = [%d]\n", rc);
+		goto out;
+	}
+	cipher_name_len =
+		strlen(mount_crypt_stat->global_default_cipher_name);
+	memcpy(crypt_stat->cipher,
+	       mount_crypt_stat->global_default_cipher_name,
+	       cipher_name_len);
+	crypt_stat->cipher[cipher_name_len] = '\0';
+	crypt_stat->key_size =
+		mount_crypt_stat->global_default_cipher_key_size;
+	ecryptfs_generate_new_key(crypt_stat);
 	rc = ecryptfs_init_crypt_ctx(crypt_stat);
 	if (rc)
 		ecryptfs_printk(KERN_ERR, "Error initializing cryptographic "
 				"context for cipher [%s]: rc = [%d]\n",
 				crypt_stat->cipher, rc);
+out:
 	return rc;
 }
 
@@ -1054,7 +1045,7 @@ static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
 
 /**
  * ecryptfs_process_flags
- * @crypt_stat
+ * @crypt_stat: The cryptographic context
  * @page_virt: Source data to be parsed
  * @bytes_read: Updated with the number of bytes read
  *
@@ -1142,7 +1133,7 @@ ecryptfs_cipher_code_str_map[] = {
 
 /**
  * ecryptfs_code_for_cipher_string
- * @str: The string representing the cipher name
+ * @crypt_stat: The cryptographic context
  *
  * Returns zero on no match, or the cipher code on match
  */
@@ -1198,59 +1189,28 @@ int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
 	return rc;
 }
 
-/**
- * ecryptfs_read_header_region
- * @data
- * @dentry
- * @nd
- *
- * Returns zero on success; non-zero otherwise
- */
-static int ecryptfs_read_header_region(char *data, struct dentry *dentry,
-				       struct vfsmount *mnt)
+int ecryptfs_read_and_validate_header_region(char *data,
+					     struct inode *ecryptfs_inode)
 {
-	struct file *lower_file;
-	mm_segment_t oldfs;
+	struct ecryptfs_crypt_stat *crypt_stat =
+		&(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
 	int rc;
 
-	if ((rc = ecryptfs_open_lower_file(&lower_file, dentry, mnt,
-					   O_RDONLY))) {
-		printk(KERN_ERR
-		       "Error opening lower_file to read header region\n");
-		goto out;
-	}
-	lower_file->f_pos = 0;
-	oldfs = get_fs();
-	set_fs(get_ds());
-	/* For releases 0.1 and 0.2, all of the header information
-	 * fits in the first data extent-sized region. */
-	rc = lower_file->f_op->read(lower_file, (char __user *)data,
-			      ECRYPTFS_DEFAULT_EXTENT_SIZE, &lower_file->f_pos);
-	set_fs(oldfs);
-	if ((rc = ecryptfs_close_lower_file(lower_file))) {
-		printk(KERN_ERR "Error closing lower_file\n");
+	rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size,
+				 ecryptfs_inode);
+	if (rc) {
+		printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
+		       __FUNCTION__, rc);
 		goto out;
 	}
-	rc = 0;
-out:
-	return rc;
-}
-
-int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry,
-					     struct vfsmount *mnt)
-{
-	int rc;
-
-	rc = ecryptfs_read_header_region(data, dentry, mnt);
-	if (rc)
-		goto out;
-	if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES))
+	if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
 		rc = -EINVAL;
+		ecryptfs_printk(KERN_DEBUG, "Valid marker not found\n");
+	}
 out:
 	return rc;
 }
 
-
 void
 ecryptfs_write_header_metadata(char *virt,
 			       struct ecryptfs_crypt_stat *crypt_stat,
@@ -1259,7 +1219,7 @@ ecryptfs_write_header_metadata(char *virt,
 	u32 header_extent_size;
 	u16 num_header_extents_at_front;
 
-	header_extent_size = (u32)crypt_stat->header_extent_size;
+	header_extent_size = (u32)crypt_stat->extent_size;
 	num_header_extents_at_front =
 		(u16)crypt_stat->num_header_extents_at_front;
 	header_extent_size = cpu_to_be32(header_extent_size);
@@ -1276,9 +1236,10 @@ struct kmem_cache *ecryptfs_header_cache_2;
 
 /**
  * ecryptfs_write_headers_virt
- * @page_virt
- * @crypt_stat
- * @ecryptfs_dentry
+ * @page_virt: The virtual address to write the headers to
+ * @size: Set to the number of bytes written by this function
+ * @crypt_stat: The cryptographic context
+ * @ecryptfs_dentry: The eCryptfs dentry
  *
  * Format version: 1
  *
@@ -1332,53 +1293,50 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size,
 	return rc;
 }
 
-static int ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
-					       struct file *lower_file,
-					       char *page_virt)
+static int
+ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
+				    struct dentry *ecryptfs_dentry,
+				    char *page_virt)
 {
-	mm_segment_t oldfs;
 	int current_header_page;
 	int header_pages;
-	ssize_t size;
-	int rc = 0;
+	int rc;
 
-	lower_file->f_pos = 0;
-	oldfs = get_fs();
-	set_fs(get_ds());
-	size = vfs_write(lower_file, (char __user *)page_virt, PAGE_CACHE_SIZE,
-			 &lower_file->f_pos);
-	if (size < 0) {
-		rc = (int)size;
-		printk(KERN_ERR "Error attempting to write lower page; "
-		       "rc = [%d]\n", rc);
-		set_fs(oldfs);
+	rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, page_virt,
+				  0, PAGE_CACHE_SIZE);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to write header "
+		       "information to lower file; rc = [%d]\n", __FUNCTION__,
+		       rc);
 		goto out;
 	}
-	header_pages = ((crypt_stat->header_extent_size
+	header_pages = ((crypt_stat->extent_size
 			 * crypt_stat->num_header_extents_at_front)
 			/ PAGE_CACHE_SIZE);
 	memset(page_virt, 0, PAGE_CACHE_SIZE);
 	current_header_page = 1;
 	while (current_header_page < header_pages) {
-		size = vfs_write(lower_file, (char __user *)page_virt,
-				 PAGE_CACHE_SIZE, &lower_file->f_pos);
-		if (size < 0) {
-			rc = (int)size;
-			printk(KERN_ERR "Error attempting to write lower page; "
-			       "rc = [%d]\n", rc);
-			set_fs(oldfs);
+		loff_t offset;
+
+		offset = (((loff_t)current_header_page) << PAGE_CACHE_SHIFT);
+		if ((rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode,
+					       page_virt, offset,
+					       PAGE_CACHE_SIZE))) {
+			printk(KERN_ERR "%s: Error attempting to write header "
+			       "information to lower file; rc = [%d]\n",
+			       __FUNCTION__, rc);
 			goto out;
 		}
 		current_header_page++;
 	}
-	set_fs(oldfs);
 out:
 	return rc;
 }
 
-static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
-					    struct ecryptfs_crypt_stat *crypt_stat,
-					    char *page_virt, size_t size)
+static int
+ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
+				 struct ecryptfs_crypt_stat *crypt_stat,
+				 char *page_virt, size_t size)
 {
 	int rc;
 
@@ -1389,7 +1347,7 @@ static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
 
 /**
  * ecryptfs_write_metadata
- * @lower_file: The lower file struct, which was returned from dentry_open
+ * @ecryptfs_dentry: The eCryptfs dentry
  *
  * Write the file headers out.  This will likely involve a userspace
  * callout, in which the session key is encrypted with one or more
@@ -1397,22 +1355,21 @@ static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
  * retrieved via a prompt.  Exactly what happens at this point should
  * be policy-dependent.
  *
+ * TODO: Support header information spanning multiple pages
+ *
  * Returns zero on success; non-zero on error
  */
-int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
-			    struct file *lower_file)
+int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry)
 {
-	struct ecryptfs_crypt_stat *crypt_stat;
+	struct ecryptfs_crypt_stat *crypt_stat =
+		&ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
 	char *page_virt;
-	size_t size;
+	size_t size = 0;
 	int rc = 0;
 
-	crypt_stat = &ecryptfs_inode_to_private(
-		ecryptfs_dentry->d_inode)->crypt_stat;
 	if (likely(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
 		if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
-			ecryptfs_printk(KERN_DEBUG, "Key is "
-					"invalid; bailing out\n");
+			printk(KERN_ERR "Key is invalid; bailing out\n");
 			rc = -EINVAL;
 			goto out;
 		}
@@ -1441,7 +1398,8 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
 						      crypt_stat, page_virt,
 						      size);
 	else
-		rc = ecryptfs_write_metadata_to_contents(crypt_stat, lower_file,
+		rc = ecryptfs_write_metadata_to_contents(crypt_stat,
+							 ecryptfs_dentry,
 							 page_virt);
 	if (rc) {
 		printk(KERN_ERR "Error writing metadata out to lower file; "
@@ -1464,28 +1422,28 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
 	u32 header_extent_size;
 	u16 num_header_extents_at_front;
 
-	memcpy(&header_extent_size, virt, 4);
+	memcpy(&header_extent_size, virt, sizeof(u32));
 	header_extent_size = be32_to_cpu(header_extent_size);
-	virt += 4;
-	memcpy(&num_header_extents_at_front, virt, 2);
+	virt += sizeof(u32);
+	memcpy(&num_header_extents_at_front, virt, sizeof(u16));
 	num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
-	crypt_stat->header_extent_size = (int)header_extent_size;
 	crypt_stat->num_header_extents_at_front =
 		(int)num_header_extents_at_front;
-	(*bytes_read) = 6;
+	(*bytes_read) = (sizeof(u32) + sizeof(u16));
 	if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
-	    && ((crypt_stat->header_extent_size
+	    && ((crypt_stat->extent_size
 		 * crypt_stat->num_header_extents_at_front)
 		< ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
 		rc = -EINVAL;
-		ecryptfs_printk(KERN_WARNING, "Invalid header extent size: "
-				"[%d]\n", crypt_stat->header_extent_size);
+		printk(KERN_WARNING "Invalid number of header extents: [%zd]\n",
+		       crypt_stat->num_header_extents_at_front);
 	}
 	return rc;
 }
 
 /**
  * set_default_header_data
+ * @crypt_stat: The cryptographic context
  *
  * For version 0 file format; this function is only for backwards
  * compatibility for files created with the prior versions of
@@ -1493,12 +1451,15 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
  */
 static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
 {
-	crypt_stat->header_extent_size = 4096;
-	crypt_stat->num_header_extents_at_front = 1;
+	crypt_stat->num_header_extents_at_front = 2;
 }
 
 /**
  * ecryptfs_read_headers_virt
+ * @page_virt: The virtual address into which to read the headers
+ * @crypt_stat: The cryptographic context
+ * @ecryptfs_dentry: The eCryptfs dentry
+ * @validate_header_size: Whether to validate the header size while reading
  *
  * Read/parse the header data. The header format is detailed in the
  * comment block for the ecryptfs_write_headers_virt() function.
@@ -1558,19 +1519,25 @@ out:
 
 /**
  * ecryptfs_read_xattr_region
+ * @page_virt: The vitual address into which to read the xattr data
+ * @ecryptfs_inode: The eCryptfs inode
  *
  * Attempts to read the crypto metadata from the extended attribute
  * region of the lower file.
+ *
+ * Returns zero on success; non-zero on error
  */
-int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry)
+int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode)
 {
+	struct dentry *lower_dentry =
+		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
 	ssize_t size;
 	int rc = 0;
 
-	size = ecryptfs_getxattr(ecryptfs_dentry, ECRYPTFS_XATTR_NAME,
-				 page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE);
+	size = ecryptfs_getxattr_lower(lower_dentry, ECRYPTFS_XATTR_NAME,
+				       page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE);
 	if (size < 0) {
-		printk(KERN_DEBUG "Error attempting to read the [%s] "
+		printk(KERN_ERR "Error attempting to read the [%s] "
 		       "xattr from the lower file; return value = [%zd]\n",
 		       ECRYPTFS_XATTR_NAME, size);
 		rc = -EINVAL;
@@ -1585,7 +1552,7 @@ int ecryptfs_read_and_validate_xattr_region(char *page_virt,
 {
 	int rc;
 
-	rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry);
+	rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry->d_inode);
 	if (rc)
 		goto out;
 	if (!contains_ecryptfs_marker(page_virt	+ ECRYPTFS_FILE_SIZE_BYTES)) {
@@ -1609,15 +1576,13 @@ out:
  *
  * Returns zero if valid headers found and parsed; non-zero otherwise
  */
-int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
-			   struct file *lower_file)
+int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 {
 	int rc = 0;
 	char *page_virt = NULL;
-	mm_segment_t oldfs;
-	ssize_t bytes_read;
+	struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
 	struct ecryptfs_crypt_stat *crypt_stat =
-	    &ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+	    &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
 		&ecryptfs_superblock_to_private(
 			ecryptfs_dentry->d_sb)->mount_crypt_stat;
@@ -1628,27 +1593,18 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
 	page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER);
 	if (!page_virt) {
 		rc = -ENOMEM;
-		ecryptfs_printk(KERN_ERR, "Unable to allocate page_virt\n");
+		printk(KERN_ERR "%s: Unable to allocate page_virt\n",
+		       __FUNCTION__);
 		goto out;
 	}
-	lower_file->f_pos = 0;
-	oldfs = get_fs();
-	set_fs(get_ds());
-	bytes_read = lower_file->f_op->read(lower_file,
-					    (char __user *)page_virt,
-					    ECRYPTFS_DEFAULT_EXTENT_SIZE,
-					    &lower_file->f_pos);
-	set_fs(oldfs);
-	if (bytes_read != ECRYPTFS_DEFAULT_EXTENT_SIZE) {
-		rc = -EINVAL;
-		goto out;
-	}
-	rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
-					ecryptfs_dentry,
-					ECRYPTFS_VALIDATE_HEADER_SIZE);
+	rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
+				 ecryptfs_inode);
+	if (!rc)
+		rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
+						ecryptfs_dentry,
+						ECRYPTFS_VALIDATE_HEADER_SIZE);
 	if (rc) {
-		rc = ecryptfs_read_xattr_region(page_virt,
-						ecryptfs_dentry);
+		rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
 		if (rc) {
 			printk(KERN_DEBUG "Valid eCryptfs headers not found in "
 			       "file header region or xattr region\n");
@@ -1776,7 +1732,7 @@ out:
 }
 
 /**
- * ecryptfs_process_cipher - Perform cipher initialization.
+ * ecryptfs_process_key_cipher - Perform key cipher initialization.
  * @key_tfm: Crypto context for key material, set by this function
  * @cipher_name: Name of the cipher
  * @key_size: Size of the key in bytes
@@ -1785,9 +1741,9 @@ out:
  * should be released by other functions, such as on a superblock put
  * event, regardless of whether this function succeeds for fails.
  */
-int
-ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
-			size_t *key_size)
+static int
+ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
+			    char *cipher_name, size_t *key_size)
 {
 	char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
 	char *full_alg_name;
@@ -1829,3 +1785,100 @@ ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
 out:
 	return rc;
 }
+
+struct kmem_cache *ecryptfs_key_tfm_cache;
+struct list_head key_tfm_list;
+struct mutex key_tfm_list_mutex;
+
+int ecryptfs_init_crypto(void)
+{
+	mutex_init(&key_tfm_list_mutex);
+	INIT_LIST_HEAD(&key_tfm_list);
+	return 0;
+}
+
+int ecryptfs_destroy_crypto(void)
+{
+	struct ecryptfs_key_tfm *key_tfm, *key_tfm_tmp;
+
+	mutex_lock(&key_tfm_list_mutex);
+	list_for_each_entry_safe(key_tfm, key_tfm_tmp, &key_tfm_list,
+				 key_tfm_list) {
+		list_del(&key_tfm->key_tfm_list);
+		if (key_tfm->key_tfm)
+			crypto_free_blkcipher(key_tfm->key_tfm);
+		kmem_cache_free(ecryptfs_key_tfm_cache, key_tfm);
+	}
+	mutex_unlock(&key_tfm_list_mutex);
+	return 0;
+}
+
+int
+ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
+			 size_t key_size)
+{
+	struct ecryptfs_key_tfm *tmp_tfm;
+	int rc = 0;
+
+	tmp_tfm = kmem_cache_alloc(ecryptfs_key_tfm_cache, GFP_KERNEL);
+	if (key_tfm != NULL)
+		(*key_tfm) = tmp_tfm;
+	if (!tmp_tfm) {
+		rc = -ENOMEM;
+		printk(KERN_ERR "Error attempting to allocate from "
+		       "ecryptfs_key_tfm_cache\n");
+		goto out;
+	}
+	mutex_init(&tmp_tfm->key_tfm_mutex);
+	strncpy(tmp_tfm->cipher_name, cipher_name,
+		ECRYPTFS_MAX_CIPHER_NAME_SIZE);
+	tmp_tfm->key_size = key_size;
+	rc = ecryptfs_process_key_cipher(&tmp_tfm->key_tfm,
+					 tmp_tfm->cipher_name,
+					 &tmp_tfm->key_size);
+	if (rc) {
+		printk(KERN_ERR "Error attempting to initialize key TFM "
+		       "cipher with name = [%s]; rc = [%d]\n",
+		       tmp_tfm->cipher_name, rc);
+		kmem_cache_free(ecryptfs_key_tfm_cache, tmp_tfm);
+		if (key_tfm != NULL)
+			(*key_tfm) = NULL;
+		goto out;
+	}
+	mutex_lock(&key_tfm_list_mutex);
+	list_add(&tmp_tfm->key_tfm_list, &key_tfm_list);
+	mutex_unlock(&key_tfm_list_mutex);
+out:
+	return rc;
+}
+
+int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
+					       struct mutex **tfm_mutex,
+					       char *cipher_name)
+{
+	struct ecryptfs_key_tfm *key_tfm;
+	int rc = 0;
+
+	(*tfm) = NULL;
+	(*tfm_mutex) = NULL;
+	mutex_lock(&key_tfm_list_mutex);
+	list_for_each_entry(key_tfm, &key_tfm_list, key_tfm_list) {
+		if (strcmp(key_tfm->cipher_name, cipher_name) == 0) {
+			(*tfm) = key_tfm->key_tfm;
+			(*tfm_mutex) = &key_tfm->key_tfm_mutex;
+			mutex_unlock(&key_tfm_list_mutex);
+			goto out;
+		}
+	}
+	mutex_unlock(&key_tfm_list_mutex);
+	rc = ecryptfs_add_new_key_tfm(&key_tfm, cipher_name, 0);
+	if (rc) {
+		printk(KERN_ERR "Error adding new key_tfm to list; rc = [%d]\n",
+		       rc);
+		goto out;
+	}
+	(*tfm) = key_tfm->key_tfm;
+	(*tfm_mutex) = &key_tfm->key_tfm_mutex;
+out:
+	return rc;
+}
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
index 434c7efd80f..3d2bdf546ec 100644
--- a/fs/ecryptfs/debug.c
+++ b/fs/ecryptfs/debug.c
@@ -38,8 +38,6 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
 			auth_tok);
 	if (auth_tok->flags & ECRYPTFS_PRIVATE_KEY) {
 		ecryptfs_printk(KERN_DEBUG, " * private key type\n");
-		ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT "
-				"IN ECRYPTFS VERSION 0.1)\n");
 	} else {
 		ecryptfs_printk(KERN_DEBUG, " * passphrase type\n");
 		ecryptfs_to_hex(salt, auth_tok->token.password.salt,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 1b9dd9a96f1..ce7a5d4aec3 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -38,7 +38,7 @@
 /* Version verification for shared data structures w/ userspace */
 #define ECRYPTFS_VERSION_MAJOR 0x00
 #define ECRYPTFS_VERSION_MINOR 0x04
-#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x02
+#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03
 /* These flags indicate which features are supported by the kernel
  * module; userspace tools such as the mount helper read
  * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
@@ -48,10 +48,12 @@
 #define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
 #define ECRYPTFS_VERSIONING_POLICY                0x00000008
 #define ECRYPTFS_VERSIONING_XATTR                 0x00000010
+#define ECRYPTFS_VERSIONING_MULTKEY               0x00000020
 #define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
 				  | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
 				  | ECRYPTFS_VERSIONING_PUBKEY \
-				  | ECRYPTFS_VERSIONING_XATTR)
+				  | ECRYPTFS_VERSIONING_XATTR \
+				  | ECRYPTFS_VERSIONING_MULTKEY)
 #define ECRYPTFS_MAX_PASSWORD_LENGTH 64
 #define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
 #define ECRYPTFS_SALT_SIZE 8
@@ -65,8 +67,7 @@
 #define ECRYPTFS_MAX_KEY_BYTES 64
 #define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
 #define ECRYPTFS_DEFAULT_IV_BYTES 16
-#define ECRYPTFS_FILE_VERSION 0x02
-#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192
+#define ECRYPTFS_FILE_VERSION 0x03
 #define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
 #define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
 #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
@@ -144,6 +145,7 @@ struct ecryptfs_private_key {
 struct ecryptfs_auth_tok {
 	u16 version; /* 8-bit major and 8-bit minor */
 	u16 token_type;
+#define ECRYPTFS_ENCRYPT_ONLY 0x00000001
 	u32 flags;
 	struct ecryptfs_session_key session_key;
 	u8 reserved[32];
@@ -194,12 +196,11 @@ ecryptfs_get_key_payload_data(struct key *key)
 #define ECRYPTFS_MAX_KEYSET_SIZE 1024
 #define ECRYPTFS_MAX_CIPHER_NAME_SIZE 32
 #define ECRYPTFS_MAX_NUM_ENC_KEYS 64
-#define ECRYPTFS_MAX_NUM_KEYSIGS 2 /* TODO: Make this a linked list */
 #define ECRYPTFS_MAX_IV_BYTES 16	/* 128 bits */
 #define ECRYPTFS_SALT_BYTES 2
 #define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5
 #define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8	/* 4*2 */
-#define ECRYPTFS_FILE_SIZE_BYTES 8
+#define ECRYPTFS_FILE_SIZE_BYTES (sizeof(u64))
 #define ECRYPTFS_DEFAULT_CIPHER "aes"
 #define ECRYPTFS_DEFAULT_KEY_BYTES 16
 #define ECRYPTFS_DEFAULT_HASH "md5"
@@ -212,6 +213,11 @@ ecryptfs_get_key_payload_data(struct key *key)
 #define ECRYPTFS_TAG_67_PACKET_TYPE 0x43
 #define MD5_DIGEST_SIZE 16
 
+struct ecryptfs_key_sig {
+	struct list_head crypt_stat_list;
+	char keysig[ECRYPTFS_SIG_SIZE_HEX];
+};
+
 /**
  * This is the primary struct associated with each encrypted file.
  *
@@ -231,8 +237,6 @@ struct ecryptfs_crypt_stat {
 	u32 flags;
 	unsigned int file_version;
 	size_t iv_bytes;
-	size_t num_keysigs;
-	size_t header_extent_size;
 	size_t num_header_extents_at_front;
 	size_t extent_size; /* Data extent size; default is 4096 */
 	size_t key_size;
@@ -245,7 +249,8 @@ struct ecryptfs_crypt_stat {
 	unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE];
 	unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
 	unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
-	unsigned char keysigs[ECRYPTFS_MAX_NUM_KEYSIGS][ECRYPTFS_SIG_SIZE_HEX];
+	struct list_head keysig_list;
+	struct mutex keysig_list_mutex;
 	struct mutex cs_tfm_mutex;
 	struct mutex cs_hash_tfm_mutex;
 	struct mutex cs_mutex;
@@ -255,6 +260,8 @@ struct ecryptfs_crypt_stat {
 struct ecryptfs_inode_info {
 	struct inode vfs_inode;
 	struct inode *wii_inode;
+	struct file *lower_file;
+	struct mutex lower_file_mutex;
 	struct ecryptfs_crypt_stat crypt_stat;
 };
 
@@ -266,6 +273,59 @@ struct ecryptfs_dentry_info {
 };
 
 /**
+ * ecryptfs_global_auth_tok - A key used to encrypt all new files under the mountpoint
+ * @flags: Status flags
+ * @mount_crypt_stat_list: These auth_toks hang off the mount-wide
+ *                         cryptographic context. Every time a new
+ *                         inode comes into existence, eCryptfs copies
+ *                         the auth_toks on that list to the set of
+ *                         auth_toks on the inode's crypt_stat
+ * @global_auth_tok_key: The key from the user's keyring for the sig
+ * @global_auth_tok: The key contents
+ * @sig: The key identifier
+ *
+ * ecryptfs_global_auth_tok structs refer to authentication token keys
+ * in the user keyring that apply to newly created files. A list of
+ * these objects hangs off of the mount_crypt_stat struct for any
+ * given eCryptfs mount. This struct maintains a reference to both the
+ * key contents and the key itself so that the key can be put on
+ * unmount.
+ */
+struct ecryptfs_global_auth_tok {
+#define ECRYPTFS_AUTH_TOK_INVALID 0x00000001
+	u32 flags;
+	struct list_head mount_crypt_stat_list;
+	struct key *global_auth_tok_key;
+	struct ecryptfs_auth_tok *global_auth_tok;
+	unsigned char sig[ECRYPTFS_SIG_SIZE_HEX + 1];
+};
+
+/**
+ * ecryptfs_key_tfm - Persistent key tfm
+ * @key_tfm: crypto API handle to the key
+ * @key_size: Key size in bytes
+ * @key_tfm_mutex: Mutex to ensure only one operation in eCryptfs is
+ *                 using the persistent TFM at any point in time
+ * @key_tfm_list: Handle to hang this off the module-wide TFM list
+ * @cipher_name: String name for the cipher for this TFM
+ *
+ * Typically, eCryptfs will use the same ciphers repeatedly throughout
+ * the course of its operations. In order to avoid unnecessarily
+ * destroying and initializing the same cipher repeatedly, eCryptfs
+ * keeps a list of crypto API contexts around to use when needed.
+ */
+struct ecryptfs_key_tfm {
+	struct crypto_blkcipher *key_tfm;
+	size_t key_size;
+	struct mutex key_tfm_mutex;
+	struct list_head key_tfm_list;
+	unsigned char cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
+};
+
+extern struct list_head key_tfm_list;
+extern struct mutex key_tfm_list_mutex;
+
+/**
  * This struct is to enable a mount-wide passphrase/salt combo. This
  * is more or less a stopgap to provide similar functionality to other
  * crypto filesystems like EncFS or CFS until full policy support is
@@ -276,15 +336,14 @@ struct ecryptfs_mount_crypt_stat {
 #define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001
 #define ECRYPTFS_XATTR_METADATA_ENABLED        0x00000002
 #define ECRYPTFS_ENCRYPTED_VIEW_ENABLED        0x00000004
+#define ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED  0x00000008
 	u32 flags;
-	struct ecryptfs_auth_tok *global_auth_tok;
-	struct key *global_auth_tok_key;
+	struct list_head global_auth_tok_list;
+	struct mutex global_auth_tok_list_mutex;
+	size_t num_global_auth_toks;
 	size_t global_default_cipher_key_size;
-	struct crypto_blkcipher *global_key_tfm;
-	struct mutex global_key_tfm_mutex;
 	unsigned char global_default_cipher_name[ECRYPTFS_MAX_CIPHER_NAME_SIZE
 						 + 1];
-	unsigned char global_auth_tok_sig[ECRYPTFS_SIG_SIZE_HEX + 1];
 };
 
 /* superblock private data. */
@@ -468,6 +527,9 @@ extern struct kmem_cache *ecryptfs_header_cache_2;
 extern struct kmem_cache *ecryptfs_xattr_cache;
 extern struct kmem_cache *ecryptfs_lower_page_cache;
 extern struct kmem_cache *ecryptfs_key_record_cache;
+extern struct kmem_cache *ecryptfs_key_sig_cache;
+extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
+extern struct kmem_cache *ecryptfs_key_tfm_cache;
 
 int ecryptfs_interpose(struct dentry *hidden_dentry,
 		       struct dentry *this_dentry, struct super_block *sb,
@@ -486,44 +548,18 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
 int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat);
 void ecryptfs_rotate_iv(unsigned char *iv);
 void ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
-void ecryptfs_destruct_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
-void ecryptfs_destruct_mount_crypt_stat(
+void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat);
+void ecryptfs_destroy_mount_crypt_stat(
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
 int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
-int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
-					   char *cipher_name,
-					   char *chaining_modifier);
-#define ECRYPTFS_LOWER_I_MUTEX_NOT_HELD 0
-#define ECRYPTFS_LOWER_I_MUTEX_HELD 1
-int ecryptfs_write_inode_size_to_metadata(struct file *lower_file,
-					  struct inode *lower_inode,
-					  struct inode *inode,
-					  struct dentry *ecryptfs_dentry,
-					  int lower_i_mutex_held);
-int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
-			    struct file *lower_file,
-			    unsigned long lower_page_index, int byte_offset,
-			    int region_bytes);
-int
-ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
-			   struct file *lower_file, int byte_offset,
-			   int region_size);
-int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
-				struct file *lower_file);
-int ecryptfs_do_readpage(struct file *file, struct page *page,
-			 pgoff_t lower_page_index);
-int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
-					      struct inode *lower_inode,
-					      struct writeback_control *wbc);
-int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx);
-int ecryptfs_decrypt_page(struct file *file, struct page *page);
-int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
-			    struct file *lower_file);
-int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
-			   struct file *lower_file);
+int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode);
+int ecryptfs_encrypt_page(struct page *page);
+int ecryptfs_decrypt_page(struct page *page);
+int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry);
+int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry);
 int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
-int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry,
-					     struct vfsmount *mnt);
+int ecryptfs_read_and_validate_header_region(char *data,
+					     struct inode *ecryptfs_inode);
 int ecryptfs_read_and_validate_xattr_region(char *page_virt,
 					    struct dentry *ecryptfs_dentry);
 u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat);
@@ -533,27 +569,22 @@ int ecryptfs_generate_key_packet_set(char *dest_base,
 				     struct ecryptfs_crypt_stat *crypt_stat,
 				     struct dentry *ecryptfs_dentry,
 				     size_t *len, size_t max);
-int process_request_key_err(long err_code);
 int
 ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			  unsigned char *src, struct dentry *ecryptfs_dentry);
 int ecryptfs_truncate(struct dentry *dentry, loff_t new_length);
-int
-ecryptfs_process_cipher(struct crypto_blkcipher **key_tfm, char *cipher_name,
-			size_t *key_size);
 int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode);
 int ecryptfs_inode_set(struct inode *inode, void *lower_inode);
 void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode);
-int ecryptfs_open_lower_file(struct file **lower_file,
-			     struct dentry *lower_dentry,
-			     struct vfsmount *lower_mnt, int flags);
-int ecryptfs_close_lower_file(struct file *lower_file);
 ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
 			  size_t size);
+ssize_t
+ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
+			void *value, size_t size);
 int
 ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		  size_t size, int flags);
-int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry);
+int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
 int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid);
 int ecryptfs_process_quit(uid_t uid, pid_t pid);
 int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid,
@@ -580,7 +611,43 @@ void
 ecryptfs_write_header_metadata(char *virt,
 			       struct ecryptfs_crypt_stat *crypt_stat,
 			       size_t *written);
+int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig);
+int
+ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+			   char *sig);
+int ecryptfs_get_global_auth_tok_for_sig(
+	struct ecryptfs_global_auth_tok **global_auth_tok,
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig);
+int
+ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
+			 size_t key_size);
+int ecryptfs_init_crypto(void);
+int ecryptfs_destroy_crypto(void);
+int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
+					       struct mutex **tfm_mutex,
+					       char *cipher_name);
+int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
+				      struct ecryptfs_auth_tok **auth_tok,
+				      char *sig);
 int ecryptfs_write_zeros(struct file *file, pgoff_t index, int start,
 			 int num_zeros);
+void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num,
+				      struct ecryptfs_crypt_stat *crypt_stat);
+int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
+			 loff_t offset, size_t size);
+int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
+				      struct page *page_for_lower,
+				      size_t offset_in_page, size_t size);
+int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
+		   size_t size);
+int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
+			struct inode *ecryptfs_inode);
+int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
+				     pgoff_t page_index,
+				     size_t offset_in_page, size_t size,
+				     struct inode *ecryptfs_inode);
+int ecryptfs_read(char *data, loff_t offset, size_t size,
+		  struct file *ecryptfs_file);
+struct page *ecryptfs_get_locked_page(struct file *file, loff_t index);
 
 #endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 94f456fe4d9..c98c4690a77 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -141,34 +141,6 @@ retry:
 
 struct kmem_cache *ecryptfs_file_info_cache;
 
-int ecryptfs_open_lower_file(struct file **lower_file,
-			     struct dentry *lower_dentry,
-			     struct vfsmount *lower_mnt, int flags)
-{
-	int rc = 0;
-
-	flags |= O_LARGEFILE;
-	dget(lower_dentry);
-	mntget(lower_mnt);
-	*lower_file = dentry_open(lower_dentry, lower_mnt, flags);
-	if (IS_ERR(*lower_file)) {
-		printk(KERN_ERR "Error opening lower file for lower_dentry "
-		       "[0x%p], lower_mnt [0x%p], and flags [0x%x]\n",
-		       lower_dentry, lower_mnt, flags);
-		rc = PTR_ERR(*lower_file);
-		*lower_file = NULL;
-		goto out;
-	}
-out:
-	return rc;
-}
-
-int ecryptfs_close_lower_file(struct file *lower_file)
-{
-	fput(lower_file);
-	return 0;
-}
-
 /**
  * ecryptfs_open
  * @inode: inode speciying file to open
@@ -187,11 +159,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	/* Private value of ecryptfs_dentry allocated in
 	 * ecryptfs_lookup() */
 	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
-	struct inode *lower_inode = NULL;
-	struct file *lower_file = NULL;
-	struct vfsmount *lower_mnt;
 	struct ecryptfs_file_info *file_info;
-	int lower_flags;
 
 	mount_crypt_stat = &ecryptfs_superblock_to_private(
 		ecryptfs_dentry->d_sb)->mount_crypt_stat;
@@ -219,25 +187,12 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) {
 		ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n");
 		/* Policy code enabled in future release */
-		crypt_stat->flags |= ECRYPTFS_POLICY_APPLIED;
-		crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
+		crypt_stat->flags |= (ECRYPTFS_POLICY_APPLIED
+				      | ECRYPTFS_ENCRYPTED);
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
-	lower_flags = file->f_flags;
-	if ((lower_flags & O_ACCMODE) == O_WRONLY)
-		lower_flags = (lower_flags & O_ACCMODE) | O_RDWR;
-	if (file->f_flags & O_APPEND)
-		lower_flags &= ~O_APPEND;
-	lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
-	/* Corresponding fput() in ecryptfs_release() */
-	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
-					   lower_flags))) {
-		ecryptfs_printk(KERN_ERR, "Error opening lower file\n");
-		goto out_puts;
-	}
-	ecryptfs_set_file_lower(file, lower_file);
-	/* Isn't this check the same as the one in lookup? */
-	lower_inode = lower_dentry->d_inode;
+	ecryptfs_set_file_lower(
+		file, ecryptfs_inode_to_private(inode)->lower_file);
 	if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
 		ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
 		crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
@@ -247,7 +202,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	mutex_lock(&crypt_stat->cs_mutex);
 	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
 	    || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
-		rc = ecryptfs_read_metadata(ecryptfs_dentry, lower_file);
+		rc = ecryptfs_read_metadata(ecryptfs_dentry);
 		if (rc) {
 			ecryptfs_printk(KERN_DEBUG,
 					"Valid headers not found\n");
@@ -259,7 +214,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 				       "and plaintext passthrough mode is not "
 				       "enabled; returning -EIO\n");
 				mutex_unlock(&crypt_stat->cs_mutex);
-				goto out_puts;
+				goto out_free;
 			}
 			rc = 0;
 			crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
@@ -271,11 +226,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 	ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = [0x%.16x] "
 			"size: [0x%.16x]\n", inode, inode->i_ino,
 			i_size_read(inode));
-	ecryptfs_set_file_lower(file, lower_file);
 	goto out;
-out_puts:
-	mntput(lower_mnt);
-	dput(lower_dentry);
+out_free:
 	kmem_cache_free(ecryptfs_file_info_cache,
 			ecryptfs_file_to_private(file));
 out:
@@ -295,19 +247,9 @@ static int ecryptfs_flush(struct file *file, fl_owner_t td)
 
 static int ecryptfs_release(struct inode *inode, struct file *file)
 {
-	struct file *lower_file = ecryptfs_file_to_lower(file);
-	struct ecryptfs_file_info *file_info = ecryptfs_file_to_private(file);
-	struct inode *lower_inode = ecryptfs_inode_to_lower(inode);
-	int rc;
-
-	if ((rc = ecryptfs_close_lower_file(lower_file))) {
-		printk(KERN_ERR "Error closing lower_file\n");
-		goto out;
-	}
-	inode->i_blocks = lower_inode->i_blocks;
-	kmem_cache_free(ecryptfs_file_info_cache, file_info);
-out:
-	return rc;
+	kmem_cache_free(ecryptfs_file_info_cache,
+			ecryptfs_file_to_private(file));
+	return 0;
 }
 
 static int
@@ -338,21 +280,6 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
 	return rc;
 }
 
-static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
-				    struct pipe_inode_info *pipe, size_t count,
-				    unsigned int flags)
-{
-	struct file *lower_file = NULL;
-	int rc = -EINVAL;
-
-	lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file->f_op && lower_file->f_op->splice_read)
-		rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
-						count, flags);
-
-	return rc;
-}
-
 static int ecryptfs_ioctl(struct inode *inode, struct file *file,
 			  unsigned int cmd, unsigned long arg);
 
@@ -365,7 +292,7 @@ const struct file_operations ecryptfs_dir_fops = {
 	.release = ecryptfs_release,
 	.fsync = ecryptfs_fsync,
 	.fasync = ecryptfs_fasync,
-	.splice_read = ecryptfs_splice_read,
+	.splice_read = generic_file_splice_read,
 };
 
 const struct file_operations ecryptfs_main_fops = {
@@ -382,7 +309,7 @@ const struct file_operations ecryptfs_main_fops = {
 	.release = ecryptfs_release,
 	.fsync = ecryptfs_fsync,
 	.fasync = ecryptfs_fasync,
-	.splice_read = ecryptfs_splice_read,
+	.splice_read = generic_file_splice_read,
 };
 
 static int
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 131954b3fb9..5701f816faf 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -119,10 +119,23 @@ ecryptfs_do_create(struct inode *directory_inode,
 	}
 	rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
 					     ecryptfs_dentry, mode, nd);
-	if (unlikely(rc)) {
-		ecryptfs_printk(KERN_ERR,
-				"Failure to create underlying file\n");
-		goto out_lock;
+	if (rc) {
+		struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
+		struct ecryptfs_inode_info *inode_info =
+			ecryptfs_inode_to_private(ecryptfs_inode);
+
+		printk(KERN_WARNING "%s: Error creating underlying file; "
+		       "rc = [%d]; checking for existing\n", __FUNCTION__, rc);
+		if (inode_info) {
+			mutex_lock(&inode_info->lower_file_mutex);
+			if (!inode_info->lower_file) {
+				mutex_unlock(&inode_info->lower_file_mutex);
+				printk(KERN_ERR "%s: Failure to set underlying "
+				       "file; rc = [%d]\n", __FUNCTION__, rc);
+				goto out_lock;
+			}
+			mutex_unlock(&inode_info->lower_file_mutex);
+		}
 	}
 	rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
 				directory_inode->i_sb, 0);
@@ -140,39 +153,30 @@ out:
 
 /**
  * grow_file
- * @ecryptfs_dentry: the ecryptfs dentry
- * @lower_file: The lower file
- * @inode: The ecryptfs inode
- * @lower_inode: The lower inode
+ * @ecryptfs_dentry: the eCryptfs dentry
  *
  * This is the code which will grow the file to its correct size.
  */
-static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file,
-		     struct inode *inode, struct inode *lower_inode)
+static int grow_file(struct dentry *ecryptfs_dentry)
 {
-	int rc = 0;
+	struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
 	struct file fake_file;
 	struct ecryptfs_file_info tmp_file_info;
+	char zero_virt[] = { 0x00 };
+	int rc = 0;
 
 	memset(&fake_file, 0, sizeof(fake_file));
 	fake_file.f_path.dentry = ecryptfs_dentry;
 	memset(&tmp_file_info, 0, sizeof(tmp_file_info));
 	ecryptfs_set_file_private(&fake_file, &tmp_file_info);
-	ecryptfs_set_file_lower(&fake_file, lower_file);
-	rc = ecryptfs_fill_zeros(&fake_file, 1);
-	if (rc) {
-		ecryptfs_inode_to_private(inode)->crypt_stat.flags |=
-			ECRYPTFS_SECURITY_WARNING;
-		ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros "
-				"in file; rc = [%d]\n", rc);
-		goto out;
-	}
-	i_size_write(inode, 0);
-	rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode,
-			inode, ecryptfs_dentry,
-			ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
-	ecryptfs_inode_to_private(inode)->crypt_stat.flags |= ECRYPTFS_NEW_FILE;
-out:
+	ecryptfs_set_file_lower(
+		&fake_file,
+		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file);
+	rc = ecryptfs_write(&fake_file, zero_virt, 0, 1);
+	i_size_write(ecryptfs_inode, 0);
+	rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
+	ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat.flags |=
+		ECRYPTFS_NEW_FILE;
 	return rc;
 }
 
@@ -186,51 +190,31 @@ out:
  */
 static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
 {
+	struct ecryptfs_crypt_stat *crypt_stat =
+		&ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
 	int rc = 0;
-	int lower_flags;
-	struct ecryptfs_crypt_stat *crypt_stat;
-	struct dentry *lower_dentry;
-	struct file *lower_file;
-	struct inode *inode, *lower_inode;
-	struct vfsmount *lower_mnt;
 
-	lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
-	ecryptfs_printk(KERN_DEBUG, "lower_dentry->d_name.name = [%s]\n",
-			lower_dentry->d_name.name);
-	inode = ecryptfs_dentry->d_inode;
-	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
-	lower_flags = ((O_CREAT | O_TRUNC) & O_ACCMODE) | O_RDWR;
-	lower_mnt = ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
-	/* Corresponding fput() at end of this function */
-	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
-					   lower_flags))) {
-		ecryptfs_printk(KERN_ERR,
-				"Error opening dentry; rc = [%i]\n", rc);
-		goto out;
-	}
-	lower_inode = lower_dentry->d_inode;
 	if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
 		ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
 		crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
-		goto out_fput;
+		goto out;
 	}
 	crypt_stat->flags |= ECRYPTFS_NEW_FILE;
 	ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
 	rc = ecryptfs_new_file_context(ecryptfs_dentry);
 	if (rc) {
-		ecryptfs_printk(KERN_DEBUG, "Error creating new file "
-				"context\n");
-		goto out_fput;
+		ecryptfs_printk(KERN_ERR, "Error creating new file "
+				"context; rc = [%d]\n", rc);
+		goto out;
 	}
-	rc = ecryptfs_write_metadata(ecryptfs_dentry, lower_file);
+	rc = ecryptfs_write_metadata(ecryptfs_dentry);
 	if (rc) {
-		ecryptfs_printk(KERN_DEBUG, "Error writing headers\n");
-		goto out_fput;
+		printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
+		goto out;
 	}
-	rc = grow_file(ecryptfs_dentry, lower_file, inode, lower_inode);
-out_fput:
-	if ((rc = ecryptfs_close_lower_file(lower_file)))
-		printk(KERN_ERR "Error closing lower_file\n");
+	rc = grow_file(ecryptfs_dentry);
+	if (rc)
+		printk(KERN_ERR "Error growing file; rc = [%d]\n", rc);
 out:
 	return rc;
 }
@@ -252,6 +236,8 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
 {
 	int rc;
 
+	/* ecryptfs_do_create() calls ecryptfs_interpose(), which opens
+	 * the crypt_stat->lower_file (persistent file) */
 	rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd);
 	if (unlikely(rc)) {
 		ecryptfs_printk(KERN_WARNING, "Failed to create file in"
@@ -374,8 +360,8 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 	crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
 	if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
 		ecryptfs_set_default_sizes(crypt_stat);
-	rc = ecryptfs_read_and_validate_header_region(page_virt, lower_dentry,
-						      nd->mnt);
+	rc = ecryptfs_read_and_validate_header_region(page_virt,
+						      dentry->d_inode);
 	if (rc) {
 		rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry);
 		if (rc) {
@@ -392,7 +378,8 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
 		dentry->d_sb)->mount_crypt_stat;
 	if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
 		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-			file_size = (crypt_stat->header_extent_size
+			file_size = ((crypt_stat->extent_size
+				      * crypt_stat->num_header_extents_at_front)
 				     + i_size_read(lower_dentry->d_inode));
 		else
 			file_size = i_size_read(lower_dentry->d_inode);
@@ -722,8 +709,8 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
 {
 	loff_t lower_size;
 
-	lower_size = ( crypt_stat->header_extent_size
-		       * crypt_stat->num_header_extents_at_front );
+	lower_size = (crypt_stat->extent_size
+		      * crypt_stat->num_header_extents_at_front);
 	if (upper_size != 0) {
 		loff_t num_extents;
 
@@ -752,8 +739,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 	int rc = 0;
 	struct inode *inode = dentry->d_inode;
 	struct dentry *lower_dentry;
-	struct vfsmount *lower_mnt;
-	struct file fake_ecryptfs_file, *lower_file = NULL;
+	struct file fake_ecryptfs_file;
 	struct ecryptfs_crypt_stat *crypt_stat;
 	loff_t i_size = i_size_read(inode);
 	loff_t lower_size_before_truncate;
@@ -776,62 +762,52 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 		goto out;
 	}
 	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	/* This dget & mntget is released through fput at out_fput: */
-	lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
-	if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry, lower_mnt,
-					   O_RDWR))) {
-		ecryptfs_printk(KERN_ERR,
-				"Error opening dentry; rc = [%i]\n", rc);
-		goto out_free;
-	}
-	ecryptfs_set_file_lower(&fake_ecryptfs_file, lower_file);
+	ecryptfs_set_file_lower(
+		&fake_ecryptfs_file,
+		ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
 	/* Switch on growing or shrinking file */
 	if (new_length > i_size) {
-		rc = ecryptfs_fill_zeros(&fake_ecryptfs_file, new_length);
-		if (rc) {
-			ecryptfs_printk(KERN_ERR,
-					"Problem with fill_zeros\n");
-			goto out_fput;
-		}
-		i_size_write(inode, new_length);
-		rc = ecryptfs_write_inode_size_to_metadata(
-			lower_file, lower_dentry->d_inode, inode, dentry,
-			ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
-		if (rc) {
-			printk(KERN_ERR	"Problem with "
-			       "ecryptfs_write_inode_size_to_metadata; "
-			       "rc = [%d]\n", rc);
-			goto out_fput;
-		}
+		char zero[] = { 0x00 };
+
+		/* Write a single 0 at the last position of the file;
+		 * this triggers code that will fill in 0's throughout
+		 * the intermediate portion of the previous end of the
+		 * file and the new and of the file */
+		rc = ecryptfs_write(&fake_ecryptfs_file, zero,
+				    (new_length - 1), 1);
 	} else { /* new_length < i_size_read(inode) */
-		pgoff_t index = 0;
-		int end_pos_in_page = -1;
+		/* We're chopping off all the pages down do the page
+		 * in which new_length is located. Fill in the end of
+		 * that page from (new_length & ~PAGE_CACHE_MASK) to
+		 * PAGE_CACHE_SIZE with zeros. */
+		size_t num_zeros = (PAGE_CACHE_SIZE
+				    - (new_length & ~PAGE_CACHE_MASK));
 
-		if (new_length != 0) {
-			index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
-			end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
-		}
-		if (end_pos_in_page != (PAGE_CACHE_SIZE - 1)) {
-			if ((rc = ecryptfs_write_zeros(&fake_ecryptfs_file,
-						       index,
-						       (end_pos_in_page + 1),
-						       ((PAGE_CACHE_SIZE - 1)
-							- end_pos_in_page)))) {
+		if (num_zeros) {
+			char *zeros_virt;
+
+			zeros_virt = kzalloc(num_zeros, GFP_KERNEL);
+			if (!zeros_virt) {
+				rc = -ENOMEM;
+				goto out_free;
+			}
+			rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt,
+					    new_length, num_zeros);
+			kfree(zeros_virt);
+			if (rc) {
 				printk(KERN_ERR "Error attempting to zero out "
 				       "the remainder of the end page on "
 				       "reducing truncate; rc = [%d]\n", rc);
-				goto out_fput;
+				goto out_free;
 			}
 		}
 		vmtruncate(inode, new_length);
-		rc = ecryptfs_write_inode_size_to_metadata(
-			lower_file, lower_dentry->d_inode, inode, dentry,
-			ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
+		rc = ecryptfs_write_inode_size_to_metadata(inode);
 		if (rc) {
 			printk(KERN_ERR	"Problem with "
 			       "ecryptfs_write_inode_size_to_metadata; "
 			       "rc = [%d]\n", rc);
-			goto out_fput;
+			goto out_free;
 		}
 		/* We are reducing the size of the ecryptfs file, and need to
 		 * know if we need to reduce the size of the lower file. */
@@ -843,13 +819,6 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
 			vmtruncate(lower_dentry->d_inode,
 				   lower_size_after_truncate);
 	}
-	/* Update the access times */
-	lower_dentry->d_inode->i_mtime = lower_dentry->d_inode->i_ctime
-		= CURRENT_TIME;
-	mark_inode_dirty_sync(inode);
-out_fput:
-	if ((rc = ecryptfs_close_lower_file(lower_file)))
-		printk(KERN_ERR "Error closing lower_file\n");
 out_free:
 	if (ecryptfs_file_to_private(&fake_ecryptfs_file))
 		kmem_cache_free(ecryptfs_file_info_cache,
@@ -909,23 +878,12 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 	else if (S_ISREG(dentry->d_inode->i_mode)
 		 && (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
 		     || !(crypt_stat->flags & ECRYPTFS_KEY_VALID))) {
-		struct vfsmount *lower_mnt;
-		struct file *lower_file = NULL;
 		struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
-		int lower_flags;
-
-		lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
-		lower_flags = O_RDONLY;
-		if ((rc = ecryptfs_open_lower_file(&lower_file, lower_dentry,
-						   lower_mnt, lower_flags))) {
-			printk(KERN_ERR
-			       "Error opening lower file; rc = [%d]\n", rc);
-			mutex_unlock(&crypt_stat->cs_mutex);
-			goto out;
-		}
+
 		mount_crypt_stat = &ecryptfs_superblock_to_private(
 			dentry->d_sb)->mount_crypt_stat;
-		if ((rc = ecryptfs_read_metadata(dentry, lower_file))) {
+		rc = ecryptfs_read_metadata(dentry);
+		if (rc) {
 			if (!(mount_crypt_stat->flags
 			      & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
 				rc = -EIO;
@@ -935,16 +893,13 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
 				       "enabled; returning -EIO\n");
 
 				mutex_unlock(&crypt_stat->cs_mutex);
-				fput(lower_file);
 				goto out;
 			}
 			rc = 0;
 			crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
 			mutex_unlock(&crypt_stat->cs_mutex);
-			fput(lower_file);
 			goto out;
 		}
-		fput(lower_file);
 	}
 	mutex_unlock(&crypt_stat->cs_mutex);
 	if (ia->ia_valid & ATTR_SIZE) {
@@ -986,13 +941,11 @@ out:
 }
 
 ssize_t
-ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
-		  size_t size)
+ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
+			void *value, size_t size)
 {
 	int rc = 0;
-	struct dentry *lower_dentry;
 
-	lower_dentry = ecryptfs_dentry_to_lower(dentry);
 	if (!lower_dentry->d_inode->i_op->getxattr) {
 		rc = -ENOSYS;
 		goto out;
@@ -1005,6 +958,14 @@ out:
 	return rc;
 }
 
+ssize_t
+ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
+		  size_t size)
+{
+	return ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), name,
+				       value, size);
+}
+
 static ssize_t
 ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
 {
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index b550dea8eee..89d9710dd63 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -39,7 +39,7 @@
  * determine the type of error, make appropriate log entries, and
  * return an error code.
  */
-int process_request_key_err(long err_code)
+static int process_request_key_err(long err_code)
 {
 	int rc = 0;
 
@@ -71,7 +71,7 @@ int process_request_key_err(long err_code)
  *        address; zero on error
  * @length_size: The number of bytes occupied by the encoded length
  *
- * Returns Zero on success
+ * Returns zero on success; non-zero on error
  */
 static int parse_packet_length(unsigned char *data, size_t *size,
 			       size_t *length_size)
@@ -106,11 +106,11 @@ out:
 
 /**
  * write_packet_length
- * @dest: The byte array target into which to write the
- *       length. Must have at least 5 bytes allocated.
+ * @dest: The byte array target into which to write the length. Must
+ *        have at least 5 bytes allocated.
  * @size: The length to write.
- * @packet_size_length: The number of bytes used to encode the
- *                      packet length is written to this address.
+ * @packet_size_length: The number of bytes used to encode the packet
+ *                      length is written to this address.
  *
  * Returns zero on success; non-zero on error.
  */
@@ -396,26 +396,53 @@ out:
 	return rc;
 }
 
+static int
+ecryptfs_get_auth_tok_sig(char **sig, struct ecryptfs_auth_tok *auth_tok)
+{
+	int rc = 0;
+
+	(*sig) = NULL;
+	switch (auth_tok->token_type) {
+	case ECRYPTFS_PASSWORD:
+		(*sig) = auth_tok->token.password.signature;
+		break;
+	case ECRYPTFS_PRIVATE_KEY:
+		(*sig) = auth_tok->token.private_key.signature;
+		break;
+	default:
+		printk(KERN_ERR "Cannot get sig for auth_tok of type [%d]\n",
+		       auth_tok->token_type);
+		rc = -EINVAL;
+	}
+	return rc;
+}
+
 /**
- * decrypt_pki_encrypted_session_key - Decrypt the session key with
- * the given auth_tok.
+ * decrypt_pki_encrypted_session_key - Decrypt the session key with the given auth_tok.
+ * @auth_tok: The key authentication token used to decrypt the session key
+ * @crypt_stat: The cryptographic context
  *
- * Returns Zero on success; non-zero error otherwise.
+ * Returns zero on success; non-zero error otherwise.
  */
-static int decrypt_pki_encrypted_session_key(
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
-	struct ecryptfs_auth_tok *auth_tok,
-	struct ecryptfs_crypt_stat *crypt_stat)
+static int
+decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
+				  struct ecryptfs_crypt_stat *crypt_stat)
 {
 	u16 cipher_code = 0;
 	struct ecryptfs_msg_ctx *msg_ctx;
 	struct ecryptfs_message *msg = NULL;
+	char *auth_tok_sig;
 	char *netlink_message;
 	size_t netlink_message_length;
 	int rc;
 
-	rc = write_tag_64_packet(mount_crypt_stat->global_auth_tok_sig,
-				 &(auth_tok->session_key),
+	rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok);
+	if (rc) {
+		printk(KERN_ERR "Unrecognized auth tok type: [%d]\n",
+		       auth_tok->token_type);
+		goto out;
+	}
+	rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key),
 				 &netlink_message, &netlink_message_length);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet");
@@ -465,40 +492,33 @@ out:
 
 static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
 {
-	struct list_head *walker;
 	struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
+	struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp;
 
-	walker = auth_tok_list_head->next;
-	while (walker != auth_tok_list_head) {
-		auth_tok_list_item =
-		    list_entry(walker, struct ecryptfs_auth_tok_list_item,
-			       list);
-		walker = auth_tok_list_item->list.next;
-		memset(auth_tok_list_item, 0,
-		       sizeof(struct ecryptfs_auth_tok_list_item));
+	list_for_each_entry_safe(auth_tok_list_item, auth_tok_list_item_tmp,
+				 auth_tok_list_head, list) {
+		list_del(&auth_tok_list_item->list);
 		kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
 				auth_tok_list_item);
 	}
-	auth_tok_list_head->next = NULL;
 }
 
 struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
 
-
 /**
  * parse_tag_1_packet
- * @crypt_stat: The cryptographic context to modify based on packet
- *              contents.
+ * @crypt_stat: The cryptographic context to modify based on packet contents
  * @data: The raw bytes of the packet.
  * @auth_tok_list: eCryptfs parses packets into authentication tokens;
- *                 a new authentication token will be placed at the end
- *                 of this list for this packet.
+ *                 a new authentication token will be placed at the
+ *                 end of this list for this packet.
  * @new_auth_tok: Pointer to a pointer to memory that this function
  *                allocates; sets the memory address of the pointer to
  *                NULL on error. This object is added to the
  *                auth_tok_list.
  * @packet_size: This function writes the size of the parsed packet
  *               into this memory location; zero on error.
+ * @max_packet_size: The maximum allowable packet size
  *
  * Returns zero on success; non-zero on error.
  */
@@ -515,72 +535,65 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
 
 	(*packet_size) = 0;
 	(*new_auth_tok) = NULL;
-
-	/* we check that:
-	 *   one byte for the Tag 1 ID flag
-	 *   two bytes for the body size
-	 * do not exceed the maximum_packet_size
+	/**
+	 * This format is inspired by OpenPGP; see RFC 2440
+	 * packet tag 1
+	 *
+	 * Tag 1 identifier (1 byte)
+	 * Max Tag 1 packet size (max 3 bytes)
+	 * Version (1 byte)
+	 * Key identifier (8 bytes; ECRYPTFS_SIG_SIZE)
+	 * Cipher identifier (1 byte)
+	 * Encrypted key size (arbitrary)
+	 *
+	 * 12 bytes minimum packet size
 	 */
-	if (unlikely((*packet_size) + 3 > max_packet_size)) {
-		ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+	if (unlikely(max_packet_size < 12)) {
+		printk(KERN_ERR "Invalid max packet size; must be >=12\n");
 		rc = -EINVAL;
 		goto out;
 	}
-	/* check for Tag 1 identifier - one byte */
 	if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) {
-		ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n",
-				ECRYPTFS_TAG_1_PACKET_TYPE);
+		printk(KERN_ERR "Enter w/ first byte != 0x%.2x\n",
+		       ECRYPTFS_TAG_1_PACKET_TYPE);
 		rc = -EINVAL;
 		goto out;
 	}
 	/* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
 	 * at end of function upon failure */
 	auth_tok_list_item =
-		kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache,
-				 GFP_KERNEL);
+		kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache,
+				  GFP_KERNEL);
 	if (!auth_tok_list_item) {
-		ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
+		printk(KERN_ERR "Unable to allocate memory\n");
 		rc = -ENOMEM;
 		goto out;
 	}
-	memset(auth_tok_list_item, 0,
-	       sizeof(struct ecryptfs_auth_tok_list_item));
 	(*new_auth_tok) = &auth_tok_list_item->auth_tok;
-	/* check for body size - one to two bytes
-	 *
-	 *              ***** TAG 1 Packet Format *****
-	 *    | version number                     | 1 byte       |
-	 *    | key ID                             | 8 bytes      |
-	 *    | public key algorithm               | 1 byte       |
-	 *    | encrypted session key              | arbitrary    |
-	 */
 	rc = parse_packet_length(&data[(*packet_size)], &body_size,
 				 &length_size);
 	if (rc) {
-		ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
-				"rc = [%d]\n", rc);
+		printk(KERN_WARNING "Error parsing packet length; "
+		       "rc = [%d]\n", rc);
 		goto out_free;
 	}
-	if (unlikely(body_size < (0x02 + ECRYPTFS_SIG_SIZE))) {
-		ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
-				body_size);
+	if (unlikely(body_size < (ECRYPTFS_SIG_SIZE + 2))) {
+		printk(KERN_WARNING "Invalid body size ([%td])\n", body_size);
 		rc = -EINVAL;
 		goto out_free;
 	}
 	(*packet_size) += length_size;
 	if (unlikely((*packet_size) + body_size > max_packet_size)) {
-		ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+		printk(KERN_WARNING "Packet size exceeds max\n");
 		rc = -EINVAL;
 		goto out_free;
 	}
-	/* Version 3 (from RFC2440) - one byte */
 	if (unlikely(data[(*packet_size)++] != 0x03)) {
-		ecryptfs_printk(KERN_DEBUG, "Unknown version number "
-				"[%d]\n", data[(*packet_size) - 1]);
+		printk(KERN_WARNING "Unknown version number [%d]\n",
+		       data[(*packet_size) - 1]);
 		rc = -EINVAL;
 		goto out_free;
 	}
-	/* Read Signature */
 	ecryptfs_to_hex((*new_auth_tok)->token.private_key.signature,
 			&data[(*packet_size)], ECRYPTFS_SIG_SIZE);
 	*packet_size += ECRYPTFS_SIG_SIZE;
@@ -588,27 +601,23 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
 	 * know which public key encryption algorithm was used */
 	(*packet_size)++;
 	(*new_auth_tok)->session_key.encrypted_key_size =
-		body_size - (0x02 + ECRYPTFS_SIG_SIZE);
+		body_size - (ECRYPTFS_SIG_SIZE + 2);
 	if ((*new_auth_tok)->session_key.encrypted_key_size
 	    > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
-		ecryptfs_printk(KERN_ERR, "Tag 1 packet contains key larger "
-				"than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
+		printk(KERN_WARNING "Tag 1 packet contains key larger "
+		       "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
 		rc = -EINVAL;
 		goto out;
 	}
-	ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
-			(*new_auth_tok)->session_key.encrypted_key_size);
 	memcpy((*new_auth_tok)->session_key.encrypted_key,
-	       &data[(*packet_size)], (body_size - 0x02 - ECRYPTFS_SIG_SIZE));
+	       &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2)));
 	(*packet_size) += (*new_auth_tok)->session_key.encrypted_key_size;
 	(*new_auth_tok)->session_key.flags &=
 		~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
 	(*new_auth_tok)->session_key.flags |=
 		ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
 	(*new_auth_tok)->token_type = ECRYPTFS_PRIVATE_KEY;
-	(*new_auth_tok)->flags |= ECRYPTFS_PRIVATE_KEY;
-	/* TODO: Why are we setting this flag here? Don't we want the
-	 * userspace to decrypt the session key? */
+	(*new_auth_tok)->flags = 0;
 	(*new_auth_tok)->session_key.flags &=
 		~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
 	(*new_auth_tok)->session_key.flags &=
@@ -658,22 +667,30 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
 
 	(*packet_size) = 0;
 	(*new_auth_tok) = NULL;
-
-	/* we check that:
-	 *   one byte for the Tag 3 ID flag
-	 *   two bytes for the body size
-	 * do not exceed the maximum_packet_size
+	/**
+	 *This format is inspired by OpenPGP; see RFC 2440
+	 * packet tag 3
+	 *
+	 * Tag 3 identifier (1 byte)
+	 * Max Tag 3 packet size (max 3 bytes)
+	 * Version (1 byte)
+	 * Cipher code (1 byte)
+	 * S2K specifier (1 byte)
+	 * Hash identifier (1 byte)
+	 * Salt (ECRYPTFS_SALT_SIZE)
+	 * Hash iterations (1 byte)
+	 * Encrypted key (arbitrary)
+	 *
+	 * (ECRYPTFS_SALT_SIZE + 7) minimum packet size
 	 */
-	if (unlikely((*packet_size) + 3 > max_packet_size)) {
-		ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+	if (max_packet_size < (ECRYPTFS_SALT_SIZE + 7)) {
+		printk(KERN_ERR "Max packet size too large\n");
 		rc = -EINVAL;
 		goto out;
 	}
-
-	/* check for Tag 3 identifyer - one byte */
 	if (data[(*packet_size)++] != ECRYPTFS_TAG_3_PACKET_TYPE) {
-		ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n",
-				ECRYPTFS_TAG_3_PACKET_TYPE);
+		printk(KERN_ERR "First byte != 0x%.2x; invalid packet\n",
+		       ECRYPTFS_TAG_3_PACKET_TYPE);
 		rc = -EINVAL;
 		goto out;
 	}
@@ -682,56 +699,37 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
 	auth_tok_list_item =
 	    kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
 	if (!auth_tok_list_item) {
-		ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
+		printk(KERN_ERR "Unable to allocate memory\n");
 		rc = -ENOMEM;
 		goto out;
 	}
 	(*new_auth_tok) = &auth_tok_list_item->auth_tok;
-
-	/* check for body size - one to two bytes */
 	rc = parse_packet_length(&data[(*packet_size)], &body_size,
 				 &length_size);
 	if (rc) {
-		ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
-				"rc = [%d]\n", rc);
+		printk(KERN_WARNING "Error parsing packet length; rc = [%d]\n",
+		       rc);
 		goto out_free;
 	}
-	if (unlikely(body_size < (0x05 + ECRYPTFS_SALT_SIZE))) {
-		ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
-				body_size);
+	if (unlikely(body_size < (ECRYPTFS_SALT_SIZE + 5))) {
+		printk(KERN_WARNING "Invalid body size ([%td])\n", body_size);
 		rc = -EINVAL;
 		goto out_free;
 	}
 	(*packet_size) += length_size;
-
-	/* now we know the length of the remainting Tag 3 packet size:
-	 *   5 fix bytes for: version string, cipher, S2K ID, hash algo,
-	 *                    number of hash iterations
-	 *   ECRYPTFS_SALT_SIZE bytes for salt
-	 *   body_size bytes minus the stuff above is the encrypted key size
-	 */
 	if (unlikely((*packet_size) + body_size > max_packet_size)) {
-		ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+		printk(KERN_ERR "Packet size exceeds max\n");
 		rc = -EINVAL;
 		goto out_free;
 	}
-
-	/* There are 5 characters of additional information in the
-	 * packet */
 	(*new_auth_tok)->session_key.encrypted_key_size =
-		body_size - (0x05 + ECRYPTFS_SALT_SIZE);
-	ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
-			(*new_auth_tok)->session_key.encrypted_key_size);
-
-	/* Version 4 (from RFC2440) - one byte */
+		(body_size - (ECRYPTFS_SALT_SIZE + 5));
 	if (unlikely(data[(*packet_size)++] != 0x04)) {
-		ecryptfs_printk(KERN_DEBUG, "Unknown version number "
-				"[%d]\n", data[(*packet_size) - 1]);
+		printk(KERN_WARNING "Unknown version number [%d]\n",
+		       data[(*packet_size) - 1]);
 		rc = -EINVAL;
 		goto out_free;
 	}
-
-	/* cipher - one byte */
 	ecryptfs_cipher_code_to_string(crypt_stat->cipher,
 				       (u16)data[(*packet_size)]);
 	/* A little extra work to differentiate among the AES key
@@ -745,33 +743,26 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
 			(*new_auth_tok)->session_key.encrypted_key_size;
 	}
 	ecryptfs_init_crypt_ctx(crypt_stat);
-	/* S2K identifier 3 (from RFC2440) */
 	if (unlikely(data[(*packet_size)++] != 0x03)) {
-		ecryptfs_printk(KERN_ERR, "Only S2K ID 3 is currently "
-				"supported\n");
+		printk(KERN_WARNING "Only S2K ID 3 is currently supported\n");
 		rc = -ENOSYS;
 		goto out_free;
 	}
-
 	/* TODO: finish the hash mapping */
-	/* hash algorithm - one byte */
 	switch (data[(*packet_size)++]) {
 	case 0x01: /* See RFC2440 for these numbers and their mappings */
 		/* Choose MD5 */
-		/* salt - ECRYPTFS_SALT_SIZE bytes */
 		memcpy((*new_auth_tok)->token.password.salt,
 		       &data[(*packet_size)], ECRYPTFS_SALT_SIZE);
 		(*packet_size) += ECRYPTFS_SALT_SIZE;
-
 		/* This conversion was taken straight from RFC2440 */
-		/* number of hash iterations - one byte */
 		(*new_auth_tok)->token.password.hash_iterations =
 			((u32) 16 + (data[(*packet_size)] & 15))
 				<< ((data[(*packet_size)] >> 4) + 6);
 		(*packet_size)++;
-
-		/* encrypted session key -
-		 *   (body_size-5-ECRYPTFS_SALT_SIZE) bytes */
+		/* Friendly reminder:
+		 * (*new_auth_tok)->session_key.encrypted_key_size =
+		 *         (body_size - (ECRYPTFS_SALT_SIZE + 5)); */
 		memcpy((*new_auth_tok)->session_key.encrypted_key,
 		       &data[(*packet_size)],
 		       (*new_auth_tok)->session_key.encrypted_key_size);
@@ -781,7 +772,7 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
 			~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
 		(*new_auth_tok)->session_key.flags |=
 			ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
-		(*new_auth_tok)->token.password.hash_algo = 0x01;
+		(*new_auth_tok)->token.password.hash_algo = 0x01; /* MD5 */
 		break;
 	default:
 		ecryptfs_printk(KERN_ERR, "Unsupported hash algorithm: "
@@ -837,82 +828,61 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
 
 	(*packet_size) = 0;
 	(*tag_11_contents_size) = 0;
-
-	/* check that:
-	 *   one byte for the Tag 11 ID flag
-	 *   two bytes for the Tag 11 length
-	 * do not exceed the maximum_packet_size
+	/* This format is inspired by OpenPGP; see RFC 2440
+	 * packet tag 11
+	 *
+	 * Tag 11 identifier (1 byte)
+	 * Max Tag 11 packet size (max 3 bytes)
+	 * Binary format specifier (1 byte)
+	 * Filename length (1 byte)
+	 * Filename ("_CONSOLE") (8 bytes)
+	 * Modification date (4 bytes)
+	 * Literal data (arbitrary)
+	 *
+	 * We need at least 16 bytes of data for the packet to even be
+	 * valid.
 	 */
-	if (unlikely((*packet_size) + 3 > max_packet_size)) {
-		ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+	if (max_packet_size < 16) {
+		printk(KERN_ERR "Maximum packet size too small\n");
 		rc = -EINVAL;
 		goto out;
 	}
-
-	/* check for Tag 11 identifyer - one byte */
 	if (data[(*packet_size)++] != ECRYPTFS_TAG_11_PACKET_TYPE) {
-		ecryptfs_printk(KERN_WARNING,
-				"Invalid tag 11 packet format\n");
+		printk(KERN_WARNING "Invalid tag 11 packet format\n");
 		rc = -EINVAL;
 		goto out;
 	}
-
-	/* get Tag 11 content length - one or two bytes */
 	rc = parse_packet_length(&data[(*packet_size)], &body_size,
 				 &length_size);
 	if (rc) {
-		ecryptfs_printk(KERN_WARNING,
-				"Invalid tag 11 packet format\n");
+		printk(KERN_WARNING "Invalid tag 11 packet format\n");
 		goto out;
 	}
-	(*packet_size) += length_size;
-
-	if (body_size < 13) {
-		ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
-				body_size);
+	if (body_size < 14) {
+		printk(KERN_WARNING "Invalid body size ([%td])\n", body_size);
 		rc = -EINVAL;
 		goto out;
 	}
-	/* We have 13 bytes of surrounding packet values */
-	(*tag_11_contents_size) = (body_size - 13);
-
-	/* now we know the length of the remainting Tag 11 packet size:
-	 *   14 fix bytes for: special flag one, special flag two,
-	 *   		       12 skipped bytes
-	 *   body_size bytes minus the stuff above is the Tag 11 content
-	 */
-	/* FIXME why is the body size one byte smaller than the actual
-	 * size of the body?
-	 * this seems to be an error here as well as in
-	 * write_tag_11_packet() */
+	(*packet_size) += length_size;
+	(*tag_11_contents_size) = (body_size - 14);
 	if (unlikely((*packet_size) + body_size + 1 > max_packet_size)) {
-		ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+		printk(KERN_ERR "Packet size exceeds max\n");
 		rc = -EINVAL;
 		goto out;
 	}
-
-	/* special flag one - one byte */
 	if (data[(*packet_size)++] != 0x62) {
-		ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n");
+		printk(KERN_WARNING "Unrecognizable packet\n");
 		rc = -EINVAL;
 		goto out;
 	}
-
-	/* special flag two - one byte */
 	if (data[(*packet_size)++] != 0x08) {
-		ecryptfs_printk(KERN_WARNING, "Unrecognizable packet\n");
+		printk(KERN_WARNING "Unrecognizable packet\n");
 		rc = -EINVAL;
 		goto out;
 	}
-
-	/* skip the next 12 bytes */
-	(*packet_size) += 12; /* We don't care about the filename or
-			       * the timestamp */
-
-	/* get the Tag 11 contents - tag_11_contents_size bytes */
+	(*packet_size) += 12; /* Ignore filename and modification date */
 	memcpy(contents, &data[(*packet_size)], (*tag_11_contents_size));
 	(*packet_size) += (*tag_11_contents_size);
-
 out:
 	if (rc) {
 		(*packet_size) = 0;
@@ -921,130 +891,229 @@ out:
 	return rc;
 }
 
+static int
+ecryptfs_find_global_auth_tok_for_sig(
+	struct ecryptfs_global_auth_tok **global_auth_tok,
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *sig)
+{
+	struct ecryptfs_global_auth_tok *walker;
+	int rc = 0;
+
+	(*global_auth_tok) = NULL;
+	mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
+	list_for_each_entry(walker,
+			    &mount_crypt_stat->global_auth_tok_list,
+			    mount_crypt_stat_list) {
+		if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) {
+			(*global_auth_tok) = walker;
+			goto out;
+		}
+	}
+	rc = -EINVAL;
+out:
+	mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
+	return rc;
+}
+
 /**
- * decrypt_session_key - Decrypt the session key with the given auth_tok.
+ * ecryptfs_verify_version
+ * @version: The version number to confirm
  *
- * Returns Zero on success; non-zero error otherwise.
+ * Returns zero on good version; non-zero otherwise
  */
-static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
-			       struct ecryptfs_crypt_stat *crypt_stat)
+static int ecryptfs_verify_version(u16 version)
 {
-	struct ecryptfs_password *password_s_ptr;
-	struct scatterlist src_sg[2], dst_sg[2];
-	struct mutex *tfm_mutex = NULL;
-	char *encrypted_session_key;
-	char *session_key;
+	int rc = 0;
+	unsigned char major;
+	unsigned char minor;
+
+	major = ((version >> 8) & 0xFF);
+	minor = (version & 0xFF);
+	if (major != ECRYPTFS_VERSION_MAJOR) {
+		ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
+				"Expected [%d]; got [%d]\n",
+				ECRYPTFS_VERSION_MAJOR, major);
+		rc = -EINVAL;
+		goto out;
+	}
+	if (minor != ECRYPTFS_VERSION_MINOR) {
+		ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
+				"Expected [%d]; got [%d]\n",
+				ECRYPTFS_VERSION_MINOR, minor);
+		rc = -EINVAL;
+		goto out;
+	}
+out:
+	return rc;
+}
+
+int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
+				      struct ecryptfs_auth_tok **auth_tok,
+				      char *sig)
+{
+	int rc = 0;
+
+	(*auth_tok_key) = request_key(&key_type_user, sig, NULL);
+	if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
+		printk(KERN_ERR "Could not find key with description: [%s]\n",
+		       sig);
+		process_request_key_err(PTR_ERR(*auth_tok_key));
+		rc = -EINVAL;
+		goto out;
+	}
+	(*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
+	if (ecryptfs_verify_version((*auth_tok)->version)) {
+		printk(KERN_ERR
+		       "Data structure version mismatch. "
+		       "Userspace tools must match eCryptfs "
+		       "kernel module with major version [%d] "
+		       "and minor version [%d]\n",
+		       ECRYPTFS_VERSION_MAJOR,
+		       ECRYPTFS_VERSION_MINOR);
+		rc = -EINVAL;
+		goto out;
+	}
+	if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
+	    && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
+		printk(KERN_ERR "Invalid auth_tok structure "
+		       "returned from key query\n");
+		rc = -EINVAL;
+		goto out;
+	}
+out:
+	return rc;
+}
+
+/**
+ * ecryptfs_find_auth_tok_for_sig
+ * @auth_tok: Set to the matching auth_tok; NULL if not found
+ * @crypt_stat: inode crypt_stat crypto context
+ * @sig: Sig of auth_tok to find
+ *
+ * For now, this function simply looks at the registered auth_tok's
+ * linked off the mount_crypt_stat, so all the auth_toks that can be
+ * used must be registered at mount time. This function could
+ * potentially try a lot harder to find auth_tok's (e.g., by calling
+ * out to ecryptfsd to dynamically retrieve an auth_tok object) so
+ * that static registration of auth_tok's will no longer be necessary.
+ *
+ * Returns zero on no error; non-zero on error
+ */
+static int
+ecryptfs_find_auth_tok_for_sig(
+	struct ecryptfs_auth_tok **auth_tok,
+	struct ecryptfs_crypt_stat *crypt_stat, char *sig)
+{
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+		crypt_stat->mount_crypt_stat;
+	struct ecryptfs_global_auth_tok *global_auth_tok;
+	int rc = 0;
+
+	(*auth_tok) = NULL;
+	if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
+						  mount_crypt_stat, sig)) {
+		struct key *auth_tok_key;
+
+		rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
+						       sig);
+	} else
+		(*auth_tok) = global_auth_tok->global_auth_tok;
+	return rc;
+}
+
+/**
+ * decrypt_passphrase_encrypted_session_key - Decrypt the session key with the given auth_tok.
+ * @auth_tok: The passphrase authentication token to use to encrypt the FEK
+ * @crypt_stat: The cryptographic context
+ *
+ * Returns zero on success; non-zero error otherwise
+ */
+static int
+decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
+					 struct ecryptfs_crypt_stat *crypt_stat)
+{
+	struct scatterlist dst_sg;
+	struct scatterlist src_sg;
+	struct mutex *tfm_mutex;
 	struct blkcipher_desc desc = {
 		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
 	};
 	int rc = 0;
 
-	password_s_ptr = &auth_tok->token.password;
-	if (password_s_ptr->flags & ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)
-		ecryptfs_printk(KERN_DEBUG, "Session key encryption key "
-				"set; skipping key generation\n");
-	ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])"
-			":\n",
-			password_s_ptr->session_key_encryption_key_bytes);
-	if (ecryptfs_verbosity > 0)
-		ecryptfs_dump_hex(password_s_ptr->session_key_encryption_key,
-				  password_s_ptr->
-				  session_key_encryption_key_bytes);
-	if (!strcmp(crypt_stat->cipher,
-		    crypt_stat->mount_crypt_stat->global_default_cipher_name)
-	    && crypt_stat->mount_crypt_stat->global_key_tfm) {
-		desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
-		tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
-	} else {
-		char *full_alg_name;
-
-		rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
-							    crypt_stat->cipher,
-							    "ecb");
-		if (rc)
-			goto out;
-		desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0,
-						  CRYPTO_ALG_ASYNC);
-		kfree(full_alg_name);
-		if (IS_ERR(desc.tfm)) {
-			rc = PTR_ERR(desc.tfm);
-			printk(KERN_ERR "Error allocating crypto context; "
-			       "rc = [%d]\n", rc);
-			goto out;
-		}
-		crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	if (unlikely(ecryptfs_verbosity > 0)) {
+		ecryptfs_printk(
+			KERN_DEBUG, "Session key encryption key (size [%d]):\n",
+			auth_tok->token.password.session_key_encryption_key_bytes);
+		ecryptfs_dump_hex(
+			auth_tok->token.password.session_key_encryption_key,
+			auth_tok->token.password.session_key_encryption_key_bytes);
+	}
+	rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+							crypt_stat->cipher);
+	if (unlikely(rc)) {
+		printk(KERN_ERR "Internal error whilst attempting to get "
+		       "tfm and mutex for cipher name [%s]; rc = [%d]\n",
+		       crypt_stat->cipher, rc);
+		goto out;
 	}
-	if (tfm_mutex)
-		mutex_lock(tfm_mutex);
-	rc = crypto_blkcipher_setkey(desc.tfm,
-				     password_s_ptr->session_key_encryption_key,
-				     crypt_stat->key_size);
-	if (rc < 0) {
+	rc = virt_to_scatterlist(auth_tok->session_key.encrypted_key,
+				 auth_tok->session_key.encrypted_key_size,
+				 &src_sg, 1);
+	if (rc != 1) {
+		printk(KERN_ERR "Internal error whilst attempting to convert "
+			"auth_tok->session_key.encrypted_key to scatterlist; "
+			"expected rc = 1; got rc = [%d]. "
+		       "auth_tok->session_key.encrypted_key_size = [%d]\n", rc,
+			auth_tok->session_key.encrypted_key_size);
+		goto out;
+	}
+	auth_tok->session_key.decrypted_key_size =
+		auth_tok->session_key.encrypted_key_size;
+	rc = virt_to_scatterlist(auth_tok->session_key.decrypted_key,
+				 auth_tok->session_key.decrypted_key_size,
+				 &dst_sg, 1);
+	if (rc != 1) {
+		printk(KERN_ERR "Internal error whilst attempting to convert "
+			"auth_tok->session_key.decrypted_key to scatterlist; "
+			"expected rc = 1; got rc = [%d]\n", rc);
+		goto out;
+	}
+	mutex_lock(tfm_mutex);
+	rc = crypto_blkcipher_setkey(
+		desc.tfm, auth_tok->token.password.session_key_encryption_key,
+		crypt_stat->key_size);
+	if (unlikely(rc < 0)) {
+		mutex_unlock(tfm_mutex);
 		printk(KERN_ERR "Error setting key for crypto context\n");
 		rc = -EINVAL;
-		goto out_free_tfm;
-	}
-	/* TODO: virt_to_scatterlist */
-	encrypted_session_key = (char *)__get_free_page(GFP_KERNEL);
-	if (!encrypted_session_key) {
-		ecryptfs_printk(KERN_ERR, "Out of memory\n");
-		rc = -ENOMEM;
-		goto out_free_tfm;
+		goto out;
 	}
-	session_key = (char *)__get_free_page(GFP_KERNEL);
-	if (!session_key) {
-		kfree(encrypted_session_key);
-		ecryptfs_printk(KERN_ERR, "Out of memory\n");
-		rc = -ENOMEM;
-		goto out_free_tfm;
-	}
-	memcpy(encrypted_session_key, auth_tok->session_key.encrypted_key,
-	       auth_tok->session_key.encrypted_key_size);
-	src_sg[0].page = virt_to_page(encrypted_session_key);
-	src_sg[0].offset = 0;
-	BUG_ON(auth_tok->session_key.encrypted_key_size > PAGE_CACHE_SIZE);
-	src_sg[0].length = auth_tok->session_key.encrypted_key_size;
-	dst_sg[0].page = virt_to_page(session_key);
-	dst_sg[0].offset = 0;
-	auth_tok->session_key.decrypted_key_size =
-	    auth_tok->session_key.encrypted_key_size;
-	dst_sg[0].length = auth_tok->session_key.encrypted_key_size;
-	rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg,
+	rc = crypto_blkcipher_decrypt(&desc, &dst_sg, &src_sg,
 				      auth_tok->session_key.encrypted_key_size);
-	if (rc) {
+	mutex_unlock(tfm_mutex);
+	if (unlikely(rc)) {
 		printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc);
-		goto out_free_memory;
+		goto out;
 	}
-	auth_tok->session_key.decrypted_key_size =
-	    auth_tok->session_key.encrypted_key_size;
-	memcpy(auth_tok->session_key.decrypted_key, session_key,
-	       auth_tok->session_key.decrypted_key_size);
 	auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
 	memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
 	       auth_tok->session_key.decrypted_key_size);
 	crypt_stat->flags |= ECRYPTFS_KEY_VALID;
-	ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
-	if (ecryptfs_verbosity > 0)
+	if (unlikely(ecryptfs_verbosity > 0)) {
+		ecryptfs_printk(KERN_DEBUG, "FEK of size [%d]:\n",
+				crypt_stat->key_size);
 		ecryptfs_dump_hex(crypt_stat->key,
 				  crypt_stat->key_size);
-out_free_memory:
-	memset(encrypted_session_key, 0, PAGE_CACHE_SIZE);
-	free_page((unsigned long)encrypted_session_key);
-	memset(session_key, 0, PAGE_CACHE_SIZE);
-	free_page((unsigned long)session_key);
-out_free_tfm:
-	if (tfm_mutex)
-		mutex_unlock(tfm_mutex);
-	else
-		crypto_free_blkcipher(desc.tfm);
+	}
 out:
 	return rc;
 }
 
 /**
  * ecryptfs_parse_packet_set
- * @dest: The header page in memory
- * @version: Version of file format, to guide parsing behavior
+ * @crypt_stat: The cryptographic context
+ * @src: Virtual address of region of memory containing the packets
+ * @ecryptfs_dentry: The eCryptfs dentry associated with the packet set
  *
  * Get crypt_stat to have the file's session key if the requisite key
  * is available to decrypt the session key.
@@ -1058,25 +1127,22 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 			      struct dentry *ecryptfs_dentry)
 {
 	size_t i = 0;
-	size_t found_auth_tok = 0;
+	size_t found_auth_tok;
 	size_t next_packet_is_auth_tok_packet;
-	char sig[ECRYPTFS_SIG_SIZE_HEX];
 	struct list_head auth_tok_list;
-	struct list_head *walker;
-	struct ecryptfs_auth_tok *chosen_auth_tok = NULL;
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
-		&ecryptfs_superblock_to_private(
-			ecryptfs_dentry->d_sb)->mount_crypt_stat;
-	struct ecryptfs_auth_tok *candidate_auth_tok = NULL;
+	struct ecryptfs_auth_tok *matching_auth_tok;
+	struct ecryptfs_auth_tok *candidate_auth_tok;
+	char *candidate_auth_tok_sig;
 	size_t packet_size;
 	struct ecryptfs_auth_tok *new_auth_tok;
 	unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE];
+	struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
 	size_t tag_11_contents_size;
 	size_t tag_11_packet_size;
 	int rc = 0;
 
 	INIT_LIST_HEAD(&auth_tok_list);
-	/* Parse the header to find as many packets as we can, these will be
+	/* Parse the header to find as many packets as we can; these will be
 	 * added the our &auth_tok_list */
 	next_packet_is_auth_tok_packet = 1;
 	while (next_packet_is_auth_tok_packet) {
@@ -1155,73 +1221,85 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 		}
 	}
 	if (list_empty(&auth_tok_list)) {
-		rc = -EINVAL; /* Do not support non-encrypted files in
-			       * the 0.1 release */
+		printk(KERN_ERR "The lower file appears to be a non-encrypted "
+		       "eCryptfs file; this is not supported in this version "
+		       "of the eCryptfs kernel module\n");
+		rc = -EINVAL;
 		goto out;
 	}
-	/* If we have a global auth tok, then we should try to use
-	 * it */
-	if (mount_crypt_stat->global_auth_tok) {
-		memcpy(sig, mount_crypt_stat->global_auth_tok_sig,
-		       ECRYPTFS_SIG_SIZE_HEX);
-		chosen_auth_tok = mount_crypt_stat->global_auth_tok;
-	} else
-		BUG(); /* We should always have a global auth tok in
-			* the 0.1 release */
-	/* Scan list to see if our chosen_auth_tok works */
-	list_for_each(walker, &auth_tok_list) {
-		struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
-		auth_tok_list_item =
-		    list_entry(walker, struct ecryptfs_auth_tok_list_item,
-			       list);
+	/* auth_tok_list contains the set of authentication tokens
+	 * parsed from the metadata. We need to find a matching
+	 * authentication token that has the secret component(s)
+	 * necessary to decrypt the EFEK in the auth_tok parsed from
+	 * the metadata. There may be several potential matches, but
+	 * just one will be sufficient to decrypt to get the FEK. */
+find_next_matching_auth_tok:
+	found_auth_tok = 0;
+	list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
 		candidate_auth_tok = &auth_tok_list_item->auth_tok;
 		if (unlikely(ecryptfs_verbosity > 0)) {
 			ecryptfs_printk(KERN_DEBUG,
 					"Considering cadidate auth tok:\n");
 			ecryptfs_dump_auth_tok(candidate_auth_tok);
 		}
-		/* TODO: Replace ECRYPTFS_SIG_SIZE_HEX w/ dynamic value */
-		if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD
-		    && !strncmp(candidate_auth_tok->token.password.signature,
-				sig, ECRYPTFS_SIG_SIZE_HEX)) {
-			found_auth_tok = 1;
-			goto leave_list;
-			/* TODO: Transfer the common salt into the
-			 * crypt_stat salt */
-		} else if ((candidate_auth_tok->token_type
-			    == ECRYPTFS_PRIVATE_KEY)
-			   && !strncmp(candidate_auth_tok->token.private_key.signature,
-				     sig, ECRYPTFS_SIG_SIZE_HEX)) {
+		rc = ecryptfs_get_auth_tok_sig(&candidate_auth_tok_sig,
+					       candidate_auth_tok);
+		if (rc) {
+			printk(KERN_ERR
+			       "Unrecognized candidate auth tok type: [%d]\n",
+			       candidate_auth_tok->token_type);
+			rc = -EINVAL;
+			goto out_wipe_list;
+		}
+		ecryptfs_find_auth_tok_for_sig(&matching_auth_tok, crypt_stat,
+					       candidate_auth_tok_sig);
+		if (matching_auth_tok) {
 			found_auth_tok = 1;
-			goto leave_list;
+			goto found_matching_auth_tok;
 		}
 	}
 	if (!found_auth_tok) {
-		ecryptfs_printk(KERN_ERR, "Could not find authentication "
-				"token on temporary list for sig [%.*s]\n",
-				ECRYPTFS_SIG_SIZE_HEX, sig);
+		ecryptfs_printk(KERN_ERR, "Could not find a usable "
+				"authentication token\n");
 		rc = -EIO;
 		goto out_wipe_list;
 	}
-leave_list:
-	rc = -ENOTSUPP;
+found_matching_auth_tok:
 	if (candidate_auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
 		memcpy(&(candidate_auth_tok->token.private_key),
-		       &(chosen_auth_tok->token.private_key),
+		       &(matching_auth_tok->token.private_key),
 		       sizeof(struct ecryptfs_private_key));
-		rc = decrypt_pki_encrypted_session_key(mount_crypt_stat,
-						       candidate_auth_tok,
+		rc = decrypt_pki_encrypted_session_key(candidate_auth_tok,
 						       crypt_stat);
 	} else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) {
 		memcpy(&(candidate_auth_tok->token.password),
-		       &(chosen_auth_tok->token.password),
+		       &(matching_auth_tok->token.password),
 		       sizeof(struct ecryptfs_password));
-		rc = decrypt_session_key(candidate_auth_tok, crypt_stat);
+		rc = decrypt_passphrase_encrypted_session_key(
+			candidate_auth_tok, crypt_stat);
 	}
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error decrypting the "
-				"session key; rc = [%d]\n", rc);
-		goto out_wipe_list;
+		struct ecryptfs_auth_tok_list_item *auth_tok_list_item_tmp;
+
+		ecryptfs_printk(KERN_WARNING, "Error decrypting the "
+				"session key for authentication token with sig "
+				"[%.*s]; rc = [%d]. Removing auth tok "
+				"candidate from the list and searching for "
+				"the next match.\n", candidate_auth_tok_sig,
+				ECRYPTFS_SIG_SIZE_HEX, rc);
+		list_for_each_entry_safe(auth_tok_list_item,
+					 auth_tok_list_item_tmp,
+					 &auth_tok_list, list) {
+			if (candidate_auth_tok
+			    == &auth_tok_list_item->auth_tok) {
+				list_del(&auth_tok_list_item->list);
+				kmem_cache_free(
+					ecryptfs_auth_tok_list_item_cache,
+					auth_tok_list_item);
+				goto find_next_matching_auth_tok;
+			}
+		}
+		BUG();
 	}
 	rc = ecryptfs_compute_root_iv(crypt_stat);
 	if (rc) {
@@ -1240,6 +1318,7 @@ out_wipe_list:
 out:
 	return rc;
 }
+
 static int
 pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
 			struct ecryptfs_crypt_stat *crypt_stat,
@@ -1284,22 +1363,25 @@ out:
 /**
  * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet
  * @dest: Buffer into which to write the packet
- * @max: Maximum number of bytes that can be writtn
+ * @remaining_bytes: Maximum number of bytes that can be writtn
+ * @auth_tok: The authentication token used for generating the tag 1 packet
+ * @crypt_stat: The cryptographic context
+ * @key_rec: The key record struct for the tag 1 packet
  * @packet_size: This function will write the number of bytes that end
  *               up constituting the packet; set to zero on error
  *
  * Returns zero on success; non-zero on error.
  */
 static int
-write_tag_1_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
+write_tag_1_packet(char *dest, size_t *remaining_bytes,
+		   struct ecryptfs_auth_tok *auth_tok,
 		   struct ecryptfs_crypt_stat *crypt_stat,
-		   struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
 		   struct ecryptfs_key_record *key_rec, size_t *packet_size)
 {
 	size_t i;
 	size_t encrypted_session_key_valid = 0;
-	size_t key_rec_size;
 	size_t packet_size_length;
+	size_t max_packet_size;
 	int rc = 0;
 
 	(*packet_size) = 0;
@@ -1329,37 +1411,23 @@ write_tag_1_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 		ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size);
 	}
 encrypted_session_key_set:
-	/* Now we have a valid key_rec.  Append it to the
-	 * key_rec set. */
-	key_rec_size = (sizeof(struct ecryptfs_key_record)
-			- ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
-			+ (key_rec->enc_key_size));
-	/* TODO: Include a packet size limit as a parameter to this
-	 * function once we have multi-packet headers (for versions
-	 * later than 0.1 */
-	if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) {
-		ecryptfs_printk(KERN_ERR, "Keyset too large\n");
-		rc = -EINVAL;
-		goto out;
-	}
-	/*              ***** TAG 1 Packet Format *****
-	 *    | version number                     | 1 byte       |
-	 *    | key ID                             | 8 bytes      |
-	 *    | public key algorithm               | 1 byte       |
-	 *    | encrypted session key              | arbitrary    |
-	 */
-	if ((0x02 + ECRYPTFS_SIG_SIZE + key_rec->enc_key_size) >= max) {
-		ecryptfs_printk(KERN_ERR,
-				"Authentication token is too large\n");
+	/* This format is inspired by OpenPGP; see RFC 2440
+	 * packet tag 1 */
+	max_packet_size = (1                         /* Tag 1 identifier */
+			   + 3                       /* Max Tag 1 packet size */
+			   + 1                       /* Version */
+			   + ECRYPTFS_SIG_SIZE       /* Key identifier */
+			   + 1                       /* Cipher identifier */
+			   + key_rec->enc_key_size); /* Encrypted key size */
+	if (max_packet_size > (*remaining_bytes)) {
+		printk(KERN_ERR "Packet length larger than maximum allowable; "
+		       "need up to [%td] bytes, but there are only [%td] "
+		       "available\n", max_packet_size, (*remaining_bytes));
 		rc = -EINVAL;
 		goto out;
 	}
 	dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
-	/* This format is inspired by OpenPGP; see RFC 2440
-	 * packet tag 1 */
-	rc = write_packet_length(&dest[(*packet_size)],
-				 (0x02 + ECRYPTFS_SIG_SIZE +
-				 key_rec->enc_key_size),
+	rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4),
 				 &packet_size_length);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
@@ -1377,13 +1445,15 @@ encrypted_session_key_set:
 out:
 	if (rc)
 		(*packet_size) = 0;
+	else
+		(*remaining_bytes) -= (*packet_size);
 	return rc;
 }
 
 /**
  * write_tag_11_packet
  * @dest: Target into which Tag 11 packet is to be written
- * @max: Maximum packet length
+ * @remaining_bytes: Maximum packet length
  * @contents: Byte array of contents to copy in
  * @contents_length: Number of bytes in contents
  * @packet_length: Length of the Tag 11 packet written; zero on error
@@ -1391,54 +1461,59 @@ out:
  * Returns zero on success; non-zero on error.
  */
 static int
-write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length,
-		    size_t *packet_length)
+write_tag_11_packet(char *dest, size_t *remaining_bytes, char *contents,
+		    size_t contents_length, size_t *packet_length)
 {
 	size_t packet_size_length;
+	size_t max_packet_size;
 	int rc = 0;
 
 	(*packet_length) = 0;
-	if ((13 + contents_length) > max) {
+	/* This format is inspired by OpenPGP; see RFC 2440
+	 * packet tag 11 */
+	max_packet_size = (1                   /* Tag 11 identifier */
+			   + 3                 /* Max Tag 11 packet size */
+			   + 1                 /* Binary format specifier */
+			   + 1                 /* Filename length */
+			   + 8                 /* Filename ("_CONSOLE") */
+			   + 4                 /* Modification date */
+			   + contents_length); /* Literal data */
+	if (max_packet_size > (*remaining_bytes)) {
+		printk(KERN_ERR "Packet length larger than maximum allowable; "
+		       "need up to [%td] bytes, but there are only [%td] "
+		       "available\n", max_packet_size, (*remaining_bytes));
 		rc = -EINVAL;
-		ecryptfs_printk(KERN_ERR, "Packet length larger than "
-				"maximum allowable\n");
 		goto out;
 	}
-	/* General packet header */
-	/* Packet tag */
 	dest[(*packet_length)++] = ECRYPTFS_TAG_11_PACKET_TYPE;
-	/* Packet length */
 	rc = write_packet_length(&dest[(*packet_length)],
-				 (13 + contents_length), &packet_size_length);
+				 (max_packet_size - 4), &packet_size_length);
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error generating tag 11 packet "
-				"header; cannot generate packet length\n");
+		printk(KERN_ERR "Error generating tag 11 packet header; cannot "
+		       "generate packet length. rc = [%d]\n", rc);
 		goto out;
 	}
 	(*packet_length) += packet_size_length;
-	/* Tag 11 specific */
-	/* One-octet field that describes how the data is formatted */
-	dest[(*packet_length)++] = 0x62; /* binary data */
-	/* One-octet filename length followed by filename */
+	dest[(*packet_length)++] = 0x62; /* binary data format specifier */
 	dest[(*packet_length)++] = 8;
 	memcpy(&dest[(*packet_length)], "_CONSOLE", 8);
 	(*packet_length) += 8;
-	/* Four-octet number indicating modification date */
 	memset(&dest[(*packet_length)], 0x00, 4);
 	(*packet_length) += 4;
-	/* Remainder is literal data */
 	memcpy(&dest[(*packet_length)], contents, contents_length);
 	(*packet_length) += contents_length;
  out:
 	if (rc)
 		(*packet_length) = 0;
+	else
+		(*remaining_bytes) -= (*packet_length);
 	return rc;
 }
 
 /**
  * write_tag_3_packet
  * @dest: Buffer into which to write the packet
- * @max: Maximum number of bytes that can be written
+ * @remaining_bytes: Maximum number of bytes that can be written
  * @auth_tok: Authentication token
  * @crypt_stat: The cryptographic context
  * @key_rec: encrypted key
@@ -1448,19 +1523,22 @@ write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length,
  * Returns zero on success; non-zero on error.
  */
 static int
-write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
+write_tag_3_packet(char *dest, size_t *remaining_bytes,
+		   struct ecryptfs_auth_tok *auth_tok,
 		   struct ecryptfs_crypt_stat *crypt_stat,
 		   struct ecryptfs_key_record *key_rec, size_t *packet_size)
 {
 	size_t i;
 	size_t encrypted_session_key_valid = 0;
 	char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
-	struct scatterlist dest_sg[2];
-	struct scatterlist src_sg[2];
+	struct scatterlist dst_sg;
+	struct scatterlist src_sg;
 	struct mutex *tfm_mutex = NULL;
-	size_t key_rec_size;
-	size_t packet_size_length;
 	size_t cipher_code;
+	size_t packet_size_length;
+	size_t max_packet_size;
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+		crypt_stat->mount_crypt_stat;
 	struct blkcipher_desc desc = {
 		.tfm = NULL,
 		.flags = CRYPTO_TFM_REQ_MAY_SLEEP
@@ -1470,16 +1548,25 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 	(*packet_size) = 0;
 	ecryptfs_from_hex(key_rec->sig, auth_tok->token.password.signature,
 			  ECRYPTFS_SIG_SIZE);
-	encrypted_session_key_valid = 0;
-	for (i = 0; i < crypt_stat->key_size; i++)
-		encrypted_session_key_valid |=
-			auth_tok->session_key.encrypted_key[i];
-	if (encrypted_session_key_valid) {
-		memcpy(key_rec->enc_key,
-		       auth_tok->session_key.encrypted_key,
-		       auth_tok->session_key.encrypted_key_size);
-		goto encrypted_session_key_set;
+	rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+							crypt_stat->cipher);
+	if (unlikely(rc)) {
+		printk(KERN_ERR "Internal error whilst attempting to get "
+		       "tfm and mutex for cipher name [%s]; rc = [%d]\n",
+		       crypt_stat->cipher, rc);
+		goto out;
+	}
+	if (mount_crypt_stat->global_default_cipher_key_size == 0) {
+		struct blkcipher_alg *alg = crypto_blkcipher_alg(desc.tfm);
+
+		printk(KERN_WARNING "No key size specified at mount; "
+		       "defaulting to [%d]\n", alg->max_keysize);
+		mount_crypt_stat->global_default_cipher_key_size =
+			alg->max_keysize;
 	}
+	if (crypt_stat->key_size == 0)
+		crypt_stat->key_size =
+			mount_crypt_stat->global_default_cipher_key_size;
 	if (auth_tok->session_key.encrypted_key_size == 0)
 		auth_tok->session_key.encrypted_key_size =
 			crypt_stat->key_size;
@@ -1487,9 +1574,24 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 	    && strcmp("aes", crypt_stat->cipher) == 0) {
 		memset((crypt_stat->key + 24), 0, 8);
 		auth_tok->session_key.encrypted_key_size = 32;
-	}
+	} else
+		auth_tok->session_key.encrypted_key_size = crypt_stat->key_size;
 	key_rec->enc_key_size =
 		auth_tok->session_key.encrypted_key_size;
+	encrypted_session_key_valid = 0;
+	for (i = 0; i < auth_tok->session_key.encrypted_key_size; i++)
+		encrypted_session_key_valid |=
+			auth_tok->session_key.encrypted_key[i];
+	if (encrypted_session_key_valid) {
+		ecryptfs_printk(KERN_DEBUG, "encrypted_session_key_valid != 0; "
+				"using auth_tok->session_key.encrypted_key, "
+				"where key_rec->enc_key_size = [%d]\n",
+				key_rec->enc_key_size);
+		memcpy(key_rec->enc_key,
+		       auth_tok->session_key.encrypted_key,
+		       key_rec->enc_key_size);
+		goto encrypted_session_key_set;
+	}
 	if (auth_tok->token.password.flags &
 	    ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET) {
 		ecryptfs_printk(KERN_DEBUG, "Using previously generated "
@@ -1508,54 +1610,32 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 		ecryptfs_printk(KERN_DEBUG, "Session key encryption key:\n");
 		ecryptfs_dump_hex(session_key_encryption_key, 16);
 	}
-	rc = virt_to_scatterlist(crypt_stat->key,
-				 key_rec->enc_key_size, src_sg, 2);
-	if (!rc) {
+	rc = virt_to_scatterlist(crypt_stat->key, key_rec->enc_key_size,
+				 &src_sg, 1);
+	if (rc != 1) {
 		ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
-				"for crypt_stat session key\n");
+				"for crypt_stat session key; expected rc = 1; "
+				"got rc = [%d]. key_rec->enc_key_size = [%d]\n",
+				rc, key_rec->enc_key_size);
 		rc = -ENOMEM;
 		goto out;
 	}
-	rc = virt_to_scatterlist(key_rec->enc_key,
-				 key_rec->enc_key_size, dest_sg, 2);
-	if (!rc) {
+	rc = virt_to_scatterlist(key_rec->enc_key, key_rec->enc_key_size,
+				 &dst_sg, 1);
+	if (rc != 1) {
 		ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
-				"for crypt_stat encrypted session key\n");
+				"for crypt_stat encrypted session key; "
+				"expected rc = 1; got rc = [%d]. "
+				"key_rec->enc_key_size = [%d]\n", rc,
+				key_rec->enc_key_size);
 		rc = -ENOMEM;
 		goto out;
 	}
-	if (!strcmp(crypt_stat->cipher,
-		    crypt_stat->mount_crypt_stat->global_default_cipher_name)
-	    && crypt_stat->mount_crypt_stat->global_key_tfm) {
-		desc.tfm = crypt_stat->mount_crypt_stat->global_key_tfm;
-		tfm_mutex = &crypt_stat->mount_crypt_stat->global_key_tfm_mutex;
-	} else {
-		char *full_alg_name;
-
-		rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
-							    crypt_stat->cipher,
-							    "ecb");
-		if (rc)
-			goto out;
-		desc.tfm = crypto_alloc_blkcipher(full_alg_name, 0,
-						  CRYPTO_ALG_ASYNC);
-		kfree(full_alg_name);
-		if (IS_ERR(desc.tfm)) {
-			rc = PTR_ERR(desc.tfm);
-			ecryptfs_printk(KERN_ERR, "Could not initialize crypto "
-					"context for cipher [%s]; rc = [%d]\n",
-					crypt_stat->cipher, rc);
-			goto out;
-		}
-		crypto_blkcipher_set_flags(desc.tfm, CRYPTO_TFM_REQ_WEAK_KEY);
-	}
-	if (tfm_mutex)
-		mutex_lock(tfm_mutex);
+	mutex_lock(tfm_mutex);
 	rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key,
 				     crypt_stat->key_size);
 	if (rc < 0) {
-		if (tfm_mutex)
-			mutex_unlock(tfm_mutex);
+		mutex_unlock(tfm_mutex);
 		ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
 				"context; rc = [%d]\n", rc);
 		goto out;
@@ -1563,56 +1643,53 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
 	rc = 0;
 	ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
 			crypt_stat->key_size);
-	rc = crypto_blkcipher_encrypt(&desc, dest_sg, src_sg,
+	rc = crypto_blkcipher_encrypt(&desc, &dst_sg, &src_sg,
 				      (*key_rec).enc_key_size);
+	mutex_unlock(tfm_mutex);
 	if (rc) {
 		printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc);
 		goto out;
 	}
-	if (tfm_mutex)
-		mutex_unlock(tfm_mutex);
 	ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
-	if (ecryptfs_verbosity > 0)
+	if (ecryptfs_verbosity > 0) {
+		ecryptfs_printk(KERN_DEBUG, "EFEK of size [%d]:\n",
+				key_rec->enc_key_size);
 		ecryptfs_dump_hex(key_rec->enc_key,
 				  key_rec->enc_key_size);
-encrypted_session_key_set:
-	/* Now we have a valid key_rec.  Append it to the
-	 * key_rec set. */
-	key_rec_size = (sizeof(struct ecryptfs_key_record)
-			- ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
-			+ (key_rec->enc_key_size));
-	/* TODO: Include a packet size limit as a parameter to this
-	 * function once we have multi-packet headers (for versions
-	 * later than 0.1 */
-	if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) {
-		ecryptfs_printk(KERN_ERR, "Keyset too large\n");
-		rc = -EINVAL;
-		goto out;
 	}
-	/* TODO: Packet size limit */
-	/* We have 5 bytes of surrounding packet data */
-	if ((0x05 + ECRYPTFS_SALT_SIZE
-	     + key_rec->enc_key_size) >= max) {
-		ecryptfs_printk(KERN_ERR, "Authentication token is too "
-				"large\n");
+encrypted_session_key_set:
+	/* This format is inspired by OpenPGP; see RFC 2440
+	 * packet tag 3 */
+	max_packet_size = (1                         /* Tag 3 identifier */
+			   + 3                       /* Max Tag 3 packet size */
+			   + 1                       /* Version */
+			   + 1                       /* Cipher code */
+			   + 1                       /* S2K specifier */
+			   + 1                       /* Hash identifier */
+			   + ECRYPTFS_SALT_SIZE      /* Salt */
+			   + 1                       /* Hash iterations */
+			   + key_rec->enc_key_size); /* Encrypted key size */
+	if (max_packet_size > (*remaining_bytes)) {
+		printk(KERN_ERR "Packet too large; need up to [%td] bytes, but "
+		       "there are only [%td] available\n", max_packet_size,
+		       (*remaining_bytes));
 		rc = -EINVAL;
 		goto out;
 	}
-	/* This format is inspired by OpenPGP; see RFC 2440
-	 * packet tag 3 */
 	dest[(*packet_size)++] = ECRYPTFS_TAG_3_PACKET_TYPE;
-	/* ver+cipher+s2k+hash+salt+iter+enc_key */
-	rc = write_packet_length(&dest[(*packet_size)],
-				 (0x05 + ECRYPTFS_SALT_SIZE
-				  + key_rec->enc_key_size),
+	/* Chop off the Tag 3 identifier(1) and Tag 3 packet size(3)
+	 * to get the number of octets in the actual Tag 3 packet */
+	rc = write_packet_length(&dest[(*packet_size)], (max_packet_size - 4),
 				 &packet_size_length);
 	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet "
-				"header; cannot generate packet length\n");
+		printk(KERN_ERR "Error generating tag 3 packet header; cannot "
+		       "generate packet length. rc = [%d]\n", rc);
 		goto out;
 	}
 	(*packet_size) += packet_size_length;
 	dest[(*packet_size)++] = 0x04; /* version 4 */
+	/* TODO: Break from RFC2440 so that arbitrary ciphers can be
+	 * specified with strings */
 	cipher_code = ecryptfs_code_for_cipher_string(crypt_stat);
 	if (cipher_code == 0) {
 		ecryptfs_printk(KERN_WARNING, "Unable to generate code for "
@@ -1631,10 +1708,10 @@ encrypted_session_key_set:
 	       key_rec->enc_key_size);
 	(*packet_size) += key_rec->enc_key_size;
 out:
-	if (desc.tfm && !tfm_mutex)
-		crypto_free_blkcipher(desc.tfm);
 	if (rc)
 		(*packet_size) = 0;
+	else
+		(*remaining_bytes) -= (*packet_size);
 	return rc;
 }
 
@@ -1642,7 +1719,7 @@ struct kmem_cache *ecryptfs_key_record_cache;
 
 /**
  * ecryptfs_generate_key_packet_set
- * @dest: Virtual address from which to write the key record set
+ * @dest_base: Virtual address from which to write the key record set
  * @crypt_stat: The cryptographic context from which the
  *              authentication tokens will be retrieved
  * @ecryptfs_dentry: The dentry, used to retrieve the mount crypt stat
@@ -1662,24 +1739,43 @@ ecryptfs_generate_key_packet_set(char *dest_base,
 				 size_t max)
 {
 	struct ecryptfs_auth_tok *auth_tok;
+	struct ecryptfs_global_auth_tok *global_auth_tok;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
 		&ecryptfs_superblock_to_private(
 			ecryptfs_dentry->d_sb)->mount_crypt_stat;
 	size_t written;
 	struct ecryptfs_key_record *key_rec;
+	struct ecryptfs_key_sig *key_sig;
 	int rc = 0;
 
 	(*len) = 0;
+	mutex_lock(&crypt_stat->keysig_list_mutex);
 	key_rec = kmem_cache_alloc(ecryptfs_key_record_cache, GFP_KERNEL);
 	if (!key_rec) {
 		rc = -ENOMEM;
 		goto out;
 	}
-	if (mount_crypt_stat->global_auth_tok) {
-		auth_tok = mount_crypt_stat->global_auth_tok;
+	list_for_each_entry(key_sig, &crypt_stat->keysig_list,
+			    crypt_stat_list) {
+		memset(key_rec, 0, sizeof(*key_rec));
+		rc = ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
+							   mount_crypt_stat,
+							   key_sig->keysig);
+		if (rc) {
+			printk(KERN_ERR "Error attempting to get the global "
+			       "auth_tok; rc = [%d]\n", rc);
+			goto out_free;
+		}
+		if (global_auth_tok->flags & ECRYPTFS_AUTH_TOK_INVALID) {
+			printk(KERN_WARNING
+			       "Skipping invalid auth tok with sig = [%s]\n",
+			       global_auth_tok->sig);
+			continue;
+		}
+		auth_tok = global_auth_tok->global_auth_tok;
 		if (auth_tok->token_type == ECRYPTFS_PASSWORD) {
 			rc = write_tag_3_packet((dest_base + (*len)),
-						max, auth_tok,
+						&max, auth_tok,
 						crypt_stat, key_rec,
 						&written);
 			if (rc) {
@@ -1689,10 +1785,9 @@ ecryptfs_generate_key_packet_set(char *dest_base,
 			}
 			(*len) += written;
 			/* Write auth tok signature packet */
-			rc = write_tag_11_packet(
-				(dest_base + (*len)),
-				(max - (*len)),
-				key_rec->sig, ECRYPTFS_SIG_SIZE, &written);
+			rc = write_tag_11_packet((dest_base + (*len)), &max,
+						 key_rec->sig,
+						 ECRYPTFS_SIG_SIZE, &written);
 			if (rc) {
 				ecryptfs_printk(KERN_ERR, "Error writing "
 						"auth tok signature packet\n");
@@ -1701,9 +1796,8 @@ ecryptfs_generate_key_packet_set(char *dest_base,
 			(*len) += written;
 		} else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
 			rc = write_tag_1_packet(dest_base + (*len),
-						max, auth_tok,
-						crypt_stat,mount_crypt_stat,
-						key_rec, &written);
+						&max, auth_tok,
+						crypt_stat, key_rec, &written);
 			if (rc) {
 				ecryptfs_printk(KERN_WARNING, "Error "
 						"writing tag 1 packet\n");
@@ -1716,19 +1810,69 @@ ecryptfs_generate_key_packet_set(char *dest_base,
 			rc = -EINVAL;
 			goto out_free;
 		}
-	} else
-		BUG();
-	if (likely((max - (*len)) > 0)) {
+	}
+	if (likely(max > 0)) {
 		dest_base[(*len)] = 0x00;
 	} else {
 		ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n");
 		rc = -EIO;
 	}
-
 out_free:
 	kmem_cache_free(ecryptfs_key_record_cache, key_rec);
 out:
 	if (rc)
 		(*len) = 0;
+	mutex_unlock(&crypt_stat->keysig_list_mutex);
 	return rc;
 }
+
+struct kmem_cache *ecryptfs_key_sig_cache;
+
+int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig)
+{
+	struct ecryptfs_key_sig *new_key_sig;
+	int rc = 0;
+
+	new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL);
+	if (!new_key_sig) {
+		rc = -ENOMEM;
+		printk(KERN_ERR
+		       "Error allocating from ecryptfs_key_sig_cache\n");
+		goto out;
+	}
+	memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX);
+	mutex_lock(&crypt_stat->keysig_list_mutex);
+	list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list);
+	mutex_unlock(&crypt_stat->keysig_list_mutex);
+out:
+	return rc;
+}
+
+struct kmem_cache *ecryptfs_global_auth_tok_cache;
+
+int
+ecryptfs_add_global_auth_tok(struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+			     char *sig)
+{
+	struct ecryptfs_global_auth_tok *new_auth_tok;
+	int rc = 0;
+
+	new_auth_tok = kmem_cache_alloc(ecryptfs_global_auth_tok_cache,
+					GFP_KERNEL);
+	if (!new_auth_tok) {
+		rc = -ENOMEM;
+		printk(KERN_ERR "Error allocating from "
+		       "ecryptfs_global_auth_tok_cache\n");
+		goto out;
+	}
+	memcpy(new_auth_tok->sig, sig, ECRYPTFS_SIG_SIZE_HEX);
+	new_auth_tok->sig[ECRYPTFS_SIG_SIZE_HEX] = '\0';
+	mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
+	list_add(&new_auth_tok->mount_crypt_stat_list,
+		 &mount_crypt_stat->global_auth_tok_list);
+	mount_crypt_stat->num_global_auth_toks++;
+	mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
+out:
+	return rc;
+}
+
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a98497264fe..97e6801f722 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -99,6 +99,64 @@ void __ecryptfs_printk(const char *fmt, ...)
 }
 
 /**
+ * ecryptfs_init_persistent_file
+ * @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with
+ *                   the lower dentry and the lower mount set
+ *
+ * eCryptfs only ever keeps a single open file for every lower
+ * inode. All I/O operations to the lower inode occur through that
+ * file. When the first eCryptfs dentry that interposes with the first
+ * lower dentry for that inode is created, this function creates the
+ * persistent file struct and associates it with the eCryptfs
+ * inode. When the eCryptfs inode is destroyed, the file is closed.
+ *
+ * The persistent file will be opened with read/write permissions, if
+ * possible. Otherwise, it is opened read-only.
+ *
+ * This function does nothing if a lower persistent file is already
+ * associated with the eCryptfs inode.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
+{
+	struct ecryptfs_inode_info *inode_info =
+		ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
+	int rc = 0;
+
+	mutex_lock(&inode_info->lower_file_mutex);
+	if (!inode_info->lower_file) {
+		struct dentry *lower_dentry;
+		struct vfsmount *lower_mnt =
+			ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
+
+		lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
+		/* Corresponding dput() and mntput() are done when the
+		 * persistent file is fput() when the eCryptfs inode
+		 * is destroyed. */
+		dget(lower_dentry);
+		mntget(lower_mnt);
+		inode_info->lower_file = dentry_open(lower_dentry,
+						     lower_mnt,
+						     (O_RDWR | O_LARGEFILE));
+		if (IS_ERR(inode_info->lower_file))
+			inode_info->lower_file = dentry_open(lower_dentry,
+							     lower_mnt,
+							     (O_RDONLY
+							      | O_LARGEFILE));
+		if (IS_ERR(inode_info->lower_file)) {
+			printk(KERN_ERR "Error opening lower persistent file "
+			       "for lower_dentry [0x%p] and lower_mnt [0x%p]\n",
+			       lower_dentry, lower_mnt);
+			rc = PTR_ERR(inode_info->lower_file);
+			inode_info->lower_file = NULL;
+		}
+	}
+	mutex_unlock(&inode_info->lower_file_mutex);
+	return rc;
+}
+
+/**
  * ecryptfs_interpose
  * @lower_dentry: Existing dentry in the lower filesystem
  * @dentry: ecryptfs' dentry
@@ -155,6 +213,13 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
 	/* This size will be overwritten for real files w/ headers and
 	 * other metadata */
 	fsstack_copy_inode_size(inode, lower_inode);
+	rc = ecryptfs_init_persistent_file(dentry);
+	if (rc) {
+		printk(KERN_ERR "%s: Error attempting to initialize the "
+		       "persistent file for the dentry with name [%s]; "
+		       "rc = [%d]\n", __FUNCTION__, dentry->d_name.name, rc);
+		goto out;
+	}
 out:
 	return rc;
 }
@@ -179,38 +244,41 @@ static match_table_t tokens = {
 	{ecryptfs_opt_err, NULL}
 };
 
-/**
- * ecryptfs_verify_version
- * @version: The version number to confirm
- *
- * Returns zero on good version; non-zero otherwise
- */
-static int ecryptfs_verify_version(u16 version)
+static int ecryptfs_init_global_auth_toks(
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
 {
+	struct ecryptfs_global_auth_tok *global_auth_tok;
 	int rc = 0;
-	unsigned char major;
-	unsigned char minor;
-
-	major = ((version >> 8) & 0xFF);
-	minor = (version & 0xFF);
-	if (major != ECRYPTFS_VERSION_MAJOR) {
-		ecryptfs_printk(KERN_ERR, "Major version number mismatch. "
-				"Expected [%d]; got [%d]\n",
-				ECRYPTFS_VERSION_MAJOR, major);
-		rc = -EINVAL;
-		goto out;
-	}
-	if (minor != ECRYPTFS_VERSION_MINOR) {
-		ecryptfs_printk(KERN_ERR, "Minor version number mismatch. "
-				"Expected [%d]; got [%d]\n",
-				ECRYPTFS_VERSION_MINOR, minor);
-		rc = -EINVAL;
-		goto out;
+
+	list_for_each_entry(global_auth_tok,
+			    &mount_crypt_stat->global_auth_tok_list,
+			    mount_crypt_stat_list) {
+		rc = ecryptfs_keyring_auth_tok_for_sig(
+			&global_auth_tok->global_auth_tok_key,
+			&global_auth_tok->global_auth_tok,
+			global_auth_tok->sig);
+		if (rc) {
+			printk(KERN_ERR "Could not find valid key in user "
+			       "session keyring for sig specified in mount "
+			       "option: [%s]\n", global_auth_tok->sig);
+			global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
+			rc = 0;
+		} else
+			global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
 	}
-out:
 	return rc;
 }
 
+static void ecryptfs_init_mount_crypt_stat(
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
+{
+	memset((void *)mount_crypt_stat, 0,
+	       sizeof(struct ecryptfs_mount_crypt_stat));
+	INIT_LIST_HEAD(&mount_crypt_stat->global_auth_tok_list);
+	mutex_init(&mount_crypt_stat->global_auth_tok_list_mutex);
+	mount_crypt_stat->flags |= ECRYPTFS_MOUNT_CRYPT_STAT_INITIALIZED;
+}
+
 /**
  * ecryptfs_parse_options
  * @sb: The ecryptfs super block
@@ -238,14 +306,11 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 	int cipher_name_set = 0;
 	int cipher_key_bytes;
 	int cipher_key_bytes_set = 0;
-	struct key *auth_tok_key = NULL;
-	struct ecryptfs_auth_tok *auth_tok = NULL;
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
 		&ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
 	substring_t args[MAX_OPT_ARGS];
 	int token;
 	char *sig_src;
-	char *sig_dst;
 	char *debug_src;
 	char *cipher_name_dst;
 	char *cipher_name_src;
@@ -256,6 +321,7 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 		rc = -EINVAL;
 		goto out;
 	}
+	ecryptfs_init_mount_crypt_stat(mount_crypt_stat);
 	while ((p = strsep(&options, ",")) != NULL) {
 		if (!*p)
 			continue;
@@ -264,14 +330,13 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 		case ecryptfs_opt_sig:
 		case ecryptfs_opt_ecryptfs_sig:
 			sig_src = args[0].from;
-			sig_dst =
-				mount_crypt_stat->global_auth_tok_sig;
-			memcpy(sig_dst, sig_src, ECRYPTFS_SIG_SIZE_HEX);
-			sig_dst[ECRYPTFS_SIG_SIZE_HEX] = '\0';
-			ecryptfs_printk(KERN_DEBUG,
-					"The mount_crypt_stat "
-					"global_auth_tok_sig set to: "
-					"[%s]\n", sig_dst);
+			rc = ecryptfs_add_global_auth_tok(mount_crypt_stat,
+							  sig_src);
+			if (rc) {
+				printk(KERN_ERR "Error attempting to register "
+				       "global sig; rc = [%d]\n", rc);
+				goto out;
+			}
 			sig_set = 1;
 			break;
 		case ecryptfs_opt_debug:
@@ -333,12 +398,10 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 					p);
 		}
 	}
-	/* Do not support lack of mount-wide signature in 0.1
-	 * release */
 	if (!sig_set) {
 		rc = -EINVAL;
-		ecryptfs_printk(KERN_ERR, "You must supply a valid "
-				"passphrase auth tok signature as a mount "
+		ecryptfs_printk(KERN_ERR, "You must supply at least one valid "
+				"auth tok signature as a mount "
 				"parameter; see the eCryptfs README\n");
 		goto out;
 	}
@@ -358,55 +421,23 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
 	if (!cipher_key_bytes_set) {
 		mount_crypt_stat->global_default_cipher_key_size = 0;
 	}
-	rc = ecryptfs_process_cipher(
-		&mount_crypt_stat->global_key_tfm,
-		mount_crypt_stat->global_default_cipher_name,
-		&mount_crypt_stat->global_default_cipher_key_size);
+	rc = ecryptfs_add_new_key_tfm(
+		NULL, mount_crypt_stat->global_default_cipher_name,
+		mount_crypt_stat->global_default_cipher_key_size);
 	if (rc) {
-		printk(KERN_ERR "Error attempting to initialize cipher [%s] "
-		       "with key size [%Zd] bytes; rc = [%d]\n",
+		printk(KERN_ERR "Error attempting to initialize cipher with "
+		       "name = [%s] and key size = [%td]; rc = [%d]\n",
 		       mount_crypt_stat->global_default_cipher_name,
 		       mount_crypt_stat->global_default_cipher_key_size, rc);
-		mount_crypt_stat->global_key_tfm = NULL;
-		mount_crypt_stat->global_auth_tok_key = NULL;
 		rc = -EINVAL;
 		goto out;
 	}
-	mutex_init(&mount_crypt_stat->global_key_tfm_mutex);
-	ecryptfs_printk(KERN_DEBUG, "Requesting the key with description: "
-			"[%s]\n", mount_crypt_stat->global_auth_tok_sig);
-	/* The reference to this key is held until umount is done The
-	 * call to key_put is done in ecryptfs_put_super() */
-	auth_tok_key = request_key(&key_type_user,
-				   mount_crypt_stat->global_auth_tok_sig,
-				   NULL);
-	if (!auth_tok_key || IS_ERR(auth_tok_key)) {
-		ecryptfs_printk(KERN_ERR, "Could not find key with "
-				"description: [%s]\n",
-				mount_crypt_stat->global_auth_tok_sig);
-		process_request_key_err(PTR_ERR(auth_tok_key));
-		rc = -EINVAL;
-		goto out;
-	}
-	auth_tok = ecryptfs_get_key_payload_data(auth_tok_key);
-	if (ecryptfs_verify_version(auth_tok->version)) {
-		ecryptfs_printk(KERN_ERR, "Data structure version mismatch. "
-				"Userspace tools must match eCryptfs kernel "
-				"module with major version [%d] and minor "
-				"version [%d]\n", ECRYPTFS_VERSION_MAJOR,
-				ECRYPTFS_VERSION_MINOR);
-		rc = -EINVAL;
-		goto out;
-	}
-	if (auth_tok->token_type != ECRYPTFS_PASSWORD
-	    && auth_tok->token_type != ECRYPTFS_PRIVATE_KEY) {
-		ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure "
-				"returned from key query\n");
-		rc = -EINVAL;
-		goto out;
+	rc = ecryptfs_init_global_auth_toks(mount_crypt_stat);
+	if (rc) {
+		printk(KERN_WARNING "One or more global auth toks could not "
+		       "properly register; rc = [%d]\n", rc);
 	}
-	mount_crypt_stat->global_auth_tok_key = auth_tok_key;
-	mount_crypt_stat->global_auth_tok = auth_tok;
+	rc = 0;
 out:
 	return rc;
 }
@@ -495,7 +526,8 @@ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
 	sb->s_maxbytes = lower_root->d_sb->s_maxbytes;
 	ecryptfs_set_dentry_lower(sb->s_root, lower_root);
 	ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt);
-	if ((rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0)))
+	rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0);
+	if (rc)
 		goto out_free;
 	rc = 0;
 	goto out;
@@ -639,15 +671,25 @@ static struct ecryptfs_cache_info {
 		.size = PAGE_CACHE_SIZE,
 	},
 	{
-		.cache = &ecryptfs_lower_page_cache,
-		.name = "ecryptfs_lower_page_cache",
-		.size = PAGE_CACHE_SIZE,
-	},
-	{
 		.cache = &ecryptfs_key_record_cache,
 		.name = "ecryptfs_key_record_cache",
 		.size = sizeof(struct ecryptfs_key_record),
 	},
+	{
+		.cache = &ecryptfs_key_sig_cache,
+		.name = "ecryptfs_key_sig_cache",
+		.size = sizeof(struct ecryptfs_key_sig),
+	},
+	{
+		.cache = &ecryptfs_global_auth_tok_cache,
+		.name = "ecryptfs_global_auth_tok_cache",
+		.size = sizeof(struct ecryptfs_global_auth_tok),
+	},
+	{
+		.cache = &ecryptfs_key_tfm_cache,
+		.name = "ecryptfs_key_tfm_cache",
+		.size = sizeof(struct ecryptfs_key_tfm),
+	},
 };
 
 static void ecryptfs_free_kmem_caches(void)
@@ -750,7 +792,8 @@ static struct ecryptfs_version_str_map_elem {
 	{ECRYPTFS_VERSIONING_PUBKEY, "pubkey"},
 	{ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"},
 	{ECRYPTFS_VERSIONING_POLICY, "policy"},
-	{ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"}
+	{ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"},
+	{ECRYPTFS_VERSIONING_MULTKEY, "multiple keys per file"}
 };
 
 static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff)
@@ -786,7 +829,8 @@ static int do_sysfs_registration(void)
 {
 	int rc;
 
-	if ((rc = subsystem_register(&ecryptfs_subsys))) {
+	rc = subsystem_register(&ecryptfs_subsys);
+	if (rc) {
 		printk(KERN_ERR
 		       "Unable to register ecryptfs sysfs subsystem\n");
 		goto out;
@@ -845,33 +889,49 @@ static int __init ecryptfs_init(void)
 	rc = register_filesystem(&ecryptfs_fs_type);
 	if (rc) {
 		printk(KERN_ERR "Failed to register filesystem\n");
-		ecryptfs_free_kmem_caches();
-		goto out;
+		goto out_free_kmem_caches;
 	}
 	kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
 	rc = do_sysfs_registration();
 	if (rc) {
 		printk(KERN_ERR "sysfs registration failed\n");
-		unregister_filesystem(&ecryptfs_fs_type);
-		ecryptfs_free_kmem_caches();
-		goto out;
+		goto out_unregister_filesystem;
 	}
 	rc = ecryptfs_init_messaging(ecryptfs_transport);
 	if (rc) {
 		ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
 				"initialize the eCryptfs netlink socket\n");
-		do_sysfs_unregistration();
-		unregister_filesystem(&ecryptfs_fs_type);
-		ecryptfs_free_kmem_caches();
+		goto out_do_sysfs_unregistration;
+	}
+	rc = ecryptfs_init_crypto();
+	if (rc) {
+		printk(KERN_ERR "Failure whilst attempting to init crypto; "
+		       "rc = [%d]\n", rc);
+		goto out_release_messaging;
 	}
+	goto out;
+out_release_messaging:
+	ecryptfs_release_messaging(ecryptfs_transport);
+out_do_sysfs_unregistration:
+	do_sysfs_unregistration();
+out_unregister_filesystem:
+	unregister_filesystem(&ecryptfs_fs_type);
+out_free_kmem_caches:
+	ecryptfs_free_kmem_caches();
 out:
 	return rc;
 }
 
 static void __exit ecryptfs_exit(void)
 {
-	do_sysfs_unregistration();
+	int rc;
+
+	rc = ecryptfs_destroy_crypto();
+	if (rc)
+		printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
+		       "rc = [%d]\n", rc);
 	ecryptfs_release_messaging(ecryptfs_transport);
+	do_sysfs_unregistration();
 	unregister_filesystem(&ecryptfs_fs_type);
 	ecryptfs_free_kmem_caches();
 }
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index a9d87c47f72..a96d341d154 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -419,8 +419,9 @@ int ecryptfs_init_messaging(unsigned int transport)
 	}
 	mutex_init(&ecryptfs_daemon_id_hash_mux);
 	mutex_lock(&ecryptfs_daemon_id_hash_mux);
-	ecryptfs_hash_buckets = 0;
-	while (ecryptfs_number_of_users >> ++ecryptfs_hash_buckets);
+	ecryptfs_hash_buckets = 1;
+	while (ecryptfs_number_of_users >> ecryptfs_hash_buckets)
+		ecryptfs_hash_buckets++;
 	ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head)
 					  * ecryptfs_hash_buckets, GFP_KERNEL);
 	if (!ecryptfs_daemon_id_hash) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index fd3f94d4a66..16a7a555f39 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -37,130 +37,27 @@
 struct kmem_cache *ecryptfs_lower_page_cache;
 
 /**
- * ecryptfs_get1page
+ * ecryptfs_get_locked_page
  *
  * Get one page from cache or lower f/s, return error otherwise.
  *
- * Returns unlocked and up-to-date page (if ok), with increased
+ * Returns locked and up-to-date page (if ok), with increased
  * refcnt.
  */
-static struct page *ecryptfs_get1page(struct file *file, int index)
+struct page *ecryptfs_get_locked_page(struct file *file, loff_t index)
 {
 	struct dentry *dentry;
 	struct inode *inode;
 	struct address_space *mapping;
+	struct page *page;
 
 	dentry = file->f_path.dentry;
 	inode = dentry->d_inode;
 	mapping = inode->i_mapping;
-	return read_mapping_page(mapping, index, (void *)file);
-}
-
-/**
- * ecryptfs_fill_zeros
- * @file: The ecryptfs file
- * @new_length: The new length of the data in the underlying file;
- *              everything between the prior end of the file and the
- *              new end of the file will be filled with zero's.
- *              new_length must be greater than  current length
- *
- * Function for handling lseek-ing past the end of the file.
- *
- * This function does not support shrinking, only growing a file.
- *
- * Returns zero on success; non-zero otherwise.
- */
-int ecryptfs_fill_zeros(struct file *file, loff_t new_length)
-{
-	int rc = 0;
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
-	pgoff_t old_end_page_index = 0;
-	pgoff_t index = old_end_page_index;
-	int old_end_pos_in_page = -1;
-	pgoff_t new_end_page_index;
-	int new_end_pos_in_page;
-	loff_t cur_length = i_size_read(inode);
-
-	if (cur_length != 0) {
-		index = old_end_page_index =
-		    ((cur_length - 1) >> PAGE_CACHE_SHIFT);
-		old_end_pos_in_page = ((cur_length - 1) & ~PAGE_CACHE_MASK);
-	}
-	new_end_page_index = ((new_length - 1) >> PAGE_CACHE_SHIFT);
-	new_end_pos_in_page = ((new_length - 1) & ~PAGE_CACHE_MASK);
-	ecryptfs_printk(KERN_DEBUG, "old_end_page_index = [0x%.16x]; "
-			"old_end_pos_in_page = [%d]; "
-			"new_end_page_index = [0x%.16x]; "
-			"new_end_pos_in_page = [%d]\n",
-			old_end_page_index, old_end_pos_in_page,
-			new_end_page_index, new_end_pos_in_page);
-	if (old_end_page_index == new_end_page_index) {
-		/* Start and end are in the same page; we just need to
-		 * set a portion of the existing page to zero's */
-		rc = ecryptfs_write_zeros(file, index,
-					  (old_end_pos_in_page + 1),
-					  (new_end_pos_in_page
-					   - old_end_pos_in_page));
-		if (rc)
-			ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
-					"file=[%p], "
-					"index=[0x%.16x], "
-					"old_end_pos_in_page=[d], "
-					"(PAGE_CACHE_SIZE - new_end_pos_in_page"
-					"=[%d]"
-					")=[d]) returned [%d]\n", file, index,
-					old_end_pos_in_page,
-					new_end_pos_in_page,
-					(PAGE_CACHE_SIZE - new_end_pos_in_page),
-					rc);
-		goto out;
-	}
-	/* Fill the remainder of the previous last page with zeros */
-	rc = ecryptfs_write_zeros(file, index, (old_end_pos_in_page + 1),
-			 ((PAGE_CACHE_SIZE - 1) - old_end_pos_in_page));
-	if (rc) {
-		ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file=[%p], "
-				"index=[0x%.16x], old_end_pos_in_page=[d], "
-				"(PAGE_CACHE_SIZE - old_end_pos_in_page)=[d]) "
-				"returned [%d]\n", file, index,
-				old_end_pos_in_page,
-				(PAGE_CACHE_SIZE - old_end_pos_in_page), rc);
-		goto out;
-	}
-	index++;
-	while (index < new_end_page_index) {
-		/* Fill all intermediate pages with zeros */
-		rc = ecryptfs_write_zeros(file, index, 0, PAGE_CACHE_SIZE);
-		if (rc) {
-			ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros("
-					"file=[%p], "
-					"index=[0x%.16x], "
-					"old_end_pos_in_page=[d], "
-					"(PAGE_CACHE_SIZE - new_end_pos_in_page"
-					"=[%d]"
-					")=[d]) returned [%d]\n", file, index,
-					old_end_pos_in_page,
-					new_end_pos_in_page,
-					(PAGE_CACHE_SIZE - new_end_pos_in_page),
-					rc);
-			goto out;
-		}
-		index++;
-	}
-	/* Fill the portion at the beginning of the last new page with
-	 * zero's */
-	rc = ecryptfs_write_zeros(file, index, 0, (new_end_pos_in_page + 1));
-	if (rc) {
-		ecryptfs_printk(KERN_ERR, "ecryptfs_write_zeros(file="
-				"[%p], index=[0x%.16x], 0, "
-				"new_end_pos_in_page=[%d]"
-				"returned [%d]\n", file, index,
-				new_end_pos_in_page, rc);
-		goto out;
-	}
-out:
-	return rc;
+	page = read_mapping_page(mapping, index, (void *)file);
+	if (!IS_ERR(page))
+		lock_page(page);
+	return page;
 }
 
 /**
@@ -171,13 +68,9 @@ out:
  */
 static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct ecryptfs_page_crypt_context ctx;
 	int rc;
 
-	ctx.page = page;
-	ctx.mode = ECRYPTFS_WRITEPAGE_MODE;
-	ctx.param.wbc = wbc;
-	rc = ecryptfs_encrypt_page(&ctx);
+	rc = ecryptfs_encrypt_page(page);
 	if (rc) {
 		ecryptfs_printk(KERN_WARNING, "Error encrypting "
 				"page (upper index [0x%.16x])\n", page->index);
@@ -191,58 +84,6 @@ out:
 }
 
 /**
- * Reads the data from the lower file file at index lower_page_index
- * and copies that data into page.
- *
- * @param page	Page to fill
- * @param lower_page_index Index of the page in the lower file to get
- */
-int ecryptfs_do_readpage(struct file *file, struct page *page,
-			 pgoff_t lower_page_index)
-{
-	int rc;
-	struct dentry *dentry;
-	struct file *lower_file;
-	struct dentry *lower_dentry;
-	struct inode *inode;
-	struct inode *lower_inode;
-	char *page_data;
-	struct page *lower_page = NULL;
-	char *lower_page_data;
-	const struct address_space_operations *lower_a_ops;
-
-	dentry = file->f_path.dentry;
-	lower_file = ecryptfs_file_to_lower(file);
-	lower_dentry = ecryptfs_dentry_to_lower(dentry);
-	inode = dentry->d_inode;
-	lower_inode = ecryptfs_inode_to_lower(inode);
-	lower_a_ops = lower_inode->i_mapping->a_ops;
-	lower_page = read_cache_page(lower_inode->i_mapping, lower_page_index,
-				     (filler_t *)lower_a_ops->readpage,
-				     (void *)lower_file);
-	if (IS_ERR(lower_page)) {
-		rc = PTR_ERR(lower_page);
-		lower_page = NULL;
-		ecryptfs_printk(KERN_ERR, "Error reading from page cache\n");
-		goto out;
-	}
-	page_data = kmap_atomic(page, KM_USER0);
-	lower_page_data = kmap_atomic(lower_page, KM_USER1);
-	memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
-	kunmap_atomic(lower_page_data, KM_USER1);
-	kunmap_atomic(page_data, KM_USER0);
-	flush_dcache_page(page);
-	rc = 0;
-out:
-	if (likely(lower_page))
-		page_cache_release(lower_page);
-	if (rc == 0)
-		SetPageUptodate(page);
-	else
-		ClearPageUptodate(page);
-	return rc;
-}
-/**
  *   Header Extent:
  *     Octets 0-7:        Unencrypted file size (big-endian)
  *     Octets 8-15:       eCryptfs special marker
@@ -271,9 +112,77 @@ static void set_header_info(char *page_virt,
 }
 
 /**
+ * ecryptfs_copy_up_encrypted_with_header
+ * @page: Sort of a ``virtual'' representation of the encrypted lower
+ *        file. The actual lower file does not have the metadata in
+ *        the header. This is locked.
+ * @crypt_stat: The eCryptfs inode's cryptographic context
+ *
+ * The ``view'' is the version of the file that userspace winds up
+ * seeing, with the header information inserted.
+ */
+static int
+ecryptfs_copy_up_encrypted_with_header(struct page *page,
+				       struct ecryptfs_crypt_stat *crypt_stat)
+{
+	loff_t extent_num_in_page = 0;
+	loff_t num_extents_per_page = (PAGE_CACHE_SIZE
+				       / crypt_stat->extent_size);
+	int rc = 0;
+
+	while (extent_num_in_page < num_extents_per_page) {
+		loff_t view_extent_num = ((((loff_t)page->index)
+					   * num_extents_per_page)
+					  + extent_num_in_page);
+
+		if (view_extent_num < crypt_stat->num_header_extents_at_front) {
+			/* This is a header extent */
+			char *page_virt;
+
+			page_virt = kmap_atomic(page, KM_USER0);
+			memset(page_virt, 0, PAGE_CACHE_SIZE);
+			/* TODO: Support more than one header extent */
+			if (view_extent_num == 0) {
+				rc = ecryptfs_read_xattr_region(
+					page_virt, page->mapping->host);
+				set_header_info(page_virt, crypt_stat);
+			}
+			kunmap_atomic(page_virt, KM_USER0);
+			flush_dcache_page(page);
+			if (rc) {
+				printk(KERN_ERR "%s: Error reading xattr "
+				       "region; rc = [%d]\n", __FUNCTION__, rc);
+				goto out;
+			}
+		} else {
+			/* This is an encrypted data extent */
+			loff_t lower_offset =
+				((view_extent_num -
+				  crypt_stat->num_header_extents_at_front)
+				 * crypt_stat->extent_size);
+
+			rc = ecryptfs_read_lower_page_segment(
+				page, (lower_offset >> PAGE_CACHE_SHIFT),
+				(lower_offset & ~PAGE_CACHE_MASK),
+				crypt_stat->extent_size, page->mapping->host);
+			if (rc) {
+				printk(KERN_ERR "%s: Error attempting to read "
+				       "extent at offset [%lld] in the lower "
+				       "file; rc = [%d]\n", __FUNCTION__,
+				       lower_offset, rc);
+				goto out;
+			}
+		}
+		extent_num_in_page++;
+	}
+out:
+	return rc;
+}
+
+/**
  * ecryptfs_readpage
- * @file: This is an ecryptfs file
- * @page: ecryptfs associated page to stick the read data into
+ * @file: An eCryptfs file
+ * @page: Page from eCryptfs inode mapping into which to stick the read data
  *
  * Read in a page, decrypting if necessary.
  *
@@ -281,59 +190,35 @@ static void set_header_info(char *page_virt,
  */
 static int ecryptfs_readpage(struct file *file, struct page *page)
 {
+	struct ecryptfs_crypt_stat *crypt_stat =
+		&ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat;
 	int rc = 0;
-	struct ecryptfs_crypt_stat *crypt_stat;
 
-	BUG_ON(!(file && file->f_path.dentry && file->f_path.dentry->d_inode));
-	crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
-			->crypt_stat;
 	if (!crypt_stat
 	    || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
 	    || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
 		ecryptfs_printk(KERN_DEBUG,
 				"Passing through unencrypted page\n");
-		rc = ecryptfs_do_readpage(file, page, page->index);
-		if (rc) {
-			ecryptfs_printk(KERN_ERR, "Error reading page; rc = "
-					"[%d]\n", rc);
-			goto out;
-		}
+		rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
+						      PAGE_CACHE_SIZE,
+						      page->mapping->host);
 	} else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
 		if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
-			int num_pages_in_header_region =
-				(crypt_stat->header_extent_size
-				 / PAGE_CACHE_SIZE);
-
-			if (page->index < num_pages_in_header_region) {
-				char *page_virt;
-
-				page_virt = kmap_atomic(page, KM_USER0);
-				memset(page_virt, 0, PAGE_CACHE_SIZE);
-				if (page->index == 0) {
-					rc = ecryptfs_read_xattr_region(
-						page_virt, file->f_path.dentry);
-					set_header_info(page_virt, crypt_stat);
-				}
-				kunmap_atomic(page_virt, KM_USER0);
-				flush_dcache_page(page);
-				if (rc) {
-					printk(KERN_ERR "Error reading xattr "
-					       "region\n");
-					goto out;
-				}
-			} else {
-				rc = ecryptfs_do_readpage(
-					file, page,
-					(page->index
-					 - num_pages_in_header_region));
-				if (rc) {
-					printk(KERN_ERR "Error reading page; "
-					       "rc = [%d]\n", rc);
-					goto out;
-				}
+			rc = ecryptfs_copy_up_encrypted_with_header(page,
+								    crypt_stat);
+			if (rc) {
+				printk(KERN_ERR "%s: Error attempting to copy "
+				       "the encrypted content from the lower "
+				       "file whilst inserting the metadata "
+				       "from the xattr into the header; rc = "
+				       "[%d]\n", __FUNCTION__, rc);
+				goto out;
 			}
+
 		} else {
-			rc = ecryptfs_do_readpage(file, page, page->index);
+			rc = ecryptfs_read_lower_page_segment(
+				page, page->index, 0, PAGE_CACHE_SIZE,
+				page->mapping->host);
 			if (rc) {
 				printk(KERN_ERR "Error reading page; rc = "
 				       "[%d]\n", rc);
@@ -341,17 +226,18 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
 			}
 		}
 	} else {
-		rc = ecryptfs_decrypt_page(file, page);
+		rc = ecryptfs_decrypt_page(page);
 		if (rc) {
 			ecryptfs_printk(KERN_ERR, "Error decrypting page; "
 					"rc = [%d]\n", rc);
 			goto out;
 		}
 	}
-	SetPageUptodate(page);
 out:
 	if (rc)
 		ClearPageUptodate(page);
+	else
+		SetPageUptodate(page);
 	ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
 			page->index);
 	unlock_page(page);
@@ -377,27 +263,6 @@ out:
 	return 0;
 }
 
-/**
- * eCryptfs does not currently support holes. When writing after a
- * seek past the end of the file, eCryptfs fills in 0's through to the
- * current location. The code to fill in the 0's to all the
- * intermediate pages calls ecryptfs_prepare_write_no_truncate().
- */
-static int
-ecryptfs_prepare_write_no_truncate(struct file *file, struct page *page,
-				   unsigned from, unsigned to)
-{
-	int rc = 0;
-
-	if (from == 0 && to == PAGE_CACHE_SIZE)
-		goto out;	/* If we are writing a full page, it will be
-				   up to date. */
-	if (!PageUptodate(page))
-		rc = ecryptfs_do_readpage(file, page, page->index);
-out:
-	return rc;
-}
-
 static int ecryptfs_prepare_write(struct file *file, struct page *page,
 				  unsigned from, unsigned to)
 {
@@ -406,10 +271,21 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
 	if (from == 0 && to == PAGE_CACHE_SIZE)
 		goto out;	/* If we are writing a full page, it will be
 				   up to date. */
-	if (!PageUptodate(page))
-		rc = ecryptfs_do_readpage(file, page, page->index);
+	if (!PageUptodate(page)) {
+		rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
+						      PAGE_CACHE_SIZE,
+						      page->mapping->host);
+		if (rc) {
+			printk(KERN_ERR "%s: Error attemping to read lower "
+			       "page segment; rc = [%d]\n", __FUNCTION__, rc);
+			ClearPageUptodate(page);
+			goto out;
+		} else
+			SetPageUptodate(page);
+	}
 	if (page->index != 0) {
-		loff_t end_of_prev_pg_pos = page_offset(page) - 1;
+		loff_t end_of_prev_pg_pos =
+			(((loff_t)page->index << PAGE_CACHE_SHIFT) - 1);
 
 		if (end_of_prev_pg_pos > i_size_read(page->mapping->host)) {
 			rc = ecryptfs_truncate(file->f_path.dentry,
@@ -428,32 +304,6 @@ out:
 	return rc;
 }
 
-int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
-					      struct inode *lower_inode,
-					      struct writeback_control *wbc)
-{
-	int rc = 0;
-
-	rc = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
-	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error calling lower writepage(); "
-				"rc = [%d]\n", rc);
-		goto out;
-	}
-	lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
-	page_cache_release(lower_page);
-out:
-	return rc;
-}
-
-static
-void ecryptfs_release_lower_page(struct page *lower_page, int page_locked)
-{
-	if (page_locked)
-		unlock_page(lower_page);
-	page_cache_release(lower_page);
-}
-
 /**
  * ecryptfs_write_inode_size_to_header
  *
@@ -461,67 +311,48 @@ void ecryptfs_release_lower_page(struct page *lower_page, int page_locked)
  *
  * Returns zero on success; non-zero on error.
  */
-static int ecryptfs_write_inode_size_to_header(struct file *lower_file,
-					       struct inode *lower_inode,
-					       struct inode *inode)
+static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
 {
-	int rc = 0;
-	struct page *header_page;
-	char *header_virt;
-	const struct address_space_operations *lower_a_ops;
 	u64 file_size;
+	char *file_size_virt;
+	int rc;
 
-retry:
-	header_page = grab_cache_page(lower_inode->i_mapping, 0);
-	if (!header_page) {
-		ecryptfs_printk(KERN_ERR, "grab_cache_page for "
-				"lower_page_index 0 failed\n");
-		rc = -EINVAL;
-		goto out;
-	}
-	lower_a_ops = lower_inode->i_mapping->a_ops;
-	rc = lower_a_ops->prepare_write(lower_file, header_page, 0, 8);
-	if (rc) {
-		if (rc == AOP_TRUNCATED_PAGE) {
-			ecryptfs_release_lower_page(header_page, 0);
-			goto retry;
-		} else
-			ecryptfs_release_lower_page(header_page, 1);
+	file_size_virt = kmalloc(sizeof(u64), GFP_KERNEL);
+	if (!file_size_virt) {
+		rc = -ENOMEM;
 		goto out;
 	}
-	file_size = (u64)i_size_read(inode);
-	ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size);
+	file_size = (u64)i_size_read(ecryptfs_inode);
 	file_size = cpu_to_be64(file_size);
-	header_virt = kmap_atomic(header_page, KM_USER0);
-	memcpy(header_virt, &file_size, sizeof(u64));
-	kunmap_atomic(header_virt, KM_USER0);
-	flush_dcache_page(header_page);
-	rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8);
-	if (rc < 0)
-		ecryptfs_printk(KERN_ERR, "Error commiting header page "
-				"write\n");
-	if (rc == AOP_TRUNCATED_PAGE) {
-		ecryptfs_release_lower_page(header_page, 0);
-		goto retry;
-	} else
-		ecryptfs_release_lower_page(header_page, 1);
-	lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
-	mark_inode_dirty_sync(inode);
+	memcpy(file_size_virt, &file_size, sizeof(u64));
+	rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
+				  sizeof(u64));
+	kfree(file_size_virt);
+	if (rc)
+		printk(KERN_ERR "%s: Error writing file size to header; "
+		       "rc = [%d]\n", __FUNCTION__, rc);
 out:
 	return rc;
 }
 
-static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode,
-					      struct inode *inode,
-					      struct dentry *ecryptfs_dentry,
-					      int lower_i_mutex_held)
+struct kmem_cache *ecryptfs_xattr_cache;
+
+static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
 {
 	ssize_t size;
 	void *xattr_virt;
-	struct dentry *lower_dentry;
+	struct dentry *lower_dentry =
+		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
+	struct inode *lower_inode = lower_dentry->d_inode;
 	u64 file_size;
 	int rc;
 
+	if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
+		printk(KERN_WARNING
+		       "No support for setting xattr in lower filesystem\n");
+		rc = -ENOSYS;
+		goto out;
+	}
 	xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL);
 	if (!xattr_virt) {
 		printk(KERN_ERR "Out of memory whilst attempting to write "
@@ -529,35 +360,17 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode,
 		rc = -ENOMEM;
 		goto out;
 	}
-	lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
-	if (!lower_dentry->d_inode->i_op->getxattr ||
-			!lower_dentry->d_inode->i_op->setxattr) {
-		printk(KERN_WARNING
-		       "No support for setting xattr in lower filesystem\n");
-		rc = -ENOSYS;
-		kmem_cache_free(ecryptfs_xattr_cache, xattr_virt);
-		goto out;
-	}
-	if (!lower_i_mutex_held)
-		mutex_lock(&lower_dentry->d_inode->i_mutex);
-	size = lower_dentry->d_inode->i_op->getxattr(lower_dentry,
-						     ECRYPTFS_XATTR_NAME,
-						     xattr_virt,
-						     PAGE_CACHE_SIZE);
-	if (!lower_i_mutex_held)
-		mutex_unlock(&lower_dentry->d_inode->i_mutex);
+	mutex_lock(&lower_inode->i_mutex);
+	size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
+					   xattr_virt, PAGE_CACHE_SIZE);
 	if (size < 0)
 		size = 8;
-	file_size = (u64)i_size_read(inode);
+	file_size = (u64)i_size_read(ecryptfs_inode);
 	file_size = cpu_to_be64(file_size);
 	memcpy(xattr_virt, &file_size, sizeof(u64));
-	if (!lower_i_mutex_held)
-		mutex_lock(&lower_dentry->d_inode->i_mutex);
-	rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry,
-						   ECRYPTFS_XATTR_NAME,
-						   xattr_virt, size, 0);
-	if (!lower_i_mutex_held)
-		mutex_unlock(&lower_dentry->d_inode->i_mutex);
+	rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
+					 xattr_virt, size, 0);
+	mutex_unlock(&lower_inode->i_mutex);
 	if (rc)
 		printk(KERN_ERR "Error whilst attempting to write inode size "
 		       "to lower file xattr; rc = [%d]\n", rc);
@@ -566,122 +379,18 @@ out:
 	return rc;
 }
 
-int
-ecryptfs_write_inode_size_to_metadata(struct file *lower_file,
-				      struct inode *lower_inode,
-				      struct inode *inode,
-				      struct dentry *ecryptfs_dentry,
-				      int lower_i_mutex_held)
+int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
 {
 	struct ecryptfs_crypt_stat *crypt_stat;
 
-	crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
+	crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
 	if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
-		return ecryptfs_write_inode_size_to_xattr(lower_inode, inode,
-							  ecryptfs_dentry,
-							  lower_i_mutex_held);
+		return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode);
 	else
-		return ecryptfs_write_inode_size_to_header(lower_file,
-							   lower_inode,
-							   inode);
-}
-
-int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
-			    struct file *lower_file,
-			    unsigned long lower_page_index, int byte_offset,
-			    int region_bytes)
-{
-	int rc = 0;
-
-retry:
-	*lower_page = grab_cache_page(lower_inode->i_mapping, lower_page_index);
-	if (!(*lower_page)) {
-		rc = -EINVAL;
-		ecryptfs_printk(KERN_ERR, "Error attempting to grab "
-				"lower page with index [0x%.16x]\n",
-				lower_page_index);
-		goto out;
-	}
-	rc = lower_inode->i_mapping->a_ops->prepare_write(lower_file,
-							  (*lower_page),
-							  byte_offset,
-							  region_bytes);
-	if (rc) {
-		if (rc == AOP_TRUNCATED_PAGE) {
-			ecryptfs_release_lower_page(*lower_page, 0);
-			goto retry;
-		} else {
-			ecryptfs_printk(KERN_ERR, "prepare_write for "
-				"lower_page_index = [0x%.16x] failed; rc = "
-				"[%d]\n", lower_page_index, rc);
-			ecryptfs_release_lower_page(*lower_page, 1);
-			(*lower_page) = NULL;
-		}
-	}
-out:
-	return rc;
-}
-
-/**
- * ecryptfs_commit_lower_page
- *
- * Returns zero on success; non-zero on error
- */
-int
-ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
-			   struct file *lower_file, int byte_offset,
-			   int region_size)
-{
-	int page_locked = 1;
-	int rc = 0;
-
-	rc = lower_inode->i_mapping->a_ops->commit_write(
-		lower_file, lower_page, byte_offset, region_size);
-	if (rc == AOP_TRUNCATED_PAGE)
-		page_locked = 0;
-	if (rc < 0) {
-		ecryptfs_printk(KERN_ERR,
-				"Error committing write; rc = [%d]\n", rc);
-	} else
-		rc = 0;
-	ecryptfs_release_lower_page(lower_page, page_locked);
-	return rc;
+		return ecryptfs_write_inode_size_to_header(ecryptfs_inode);
 }
 
 /**
- * ecryptfs_copy_page_to_lower
- *
- * Used for plaintext pass-through; no page index interpolation
- * required.
- */
-int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
-				struct file *lower_file)
-{
-	int rc = 0;
-	struct page *lower_page;
-
-	rc = ecryptfs_get_lower_page(&lower_page, lower_inode, lower_file,
-				     page->index, 0, PAGE_CACHE_SIZE);
-	if (rc) {
-		ecryptfs_printk(KERN_ERR, "Error attempting to get page "
-				"at index [0x%.16x]\n", page->index);
-		goto out;
-	}
-	/* TODO: aops */
-	memcpy((char *)page_address(lower_page), page_address(page),
-	       PAGE_CACHE_SIZE);
-	rc = ecryptfs_commit_lower_page(lower_page, lower_inode, lower_file,
-					0, PAGE_CACHE_SIZE);
-	if (rc)
-		ecryptfs_printk(KERN_ERR, "Error attempting to commit page "
-				"at index [0x%.16x]\n", page->index);
-out:
-	return rc;
-}
-
-struct kmem_cache *ecryptfs_xattr_cache;
-
-/**
  * ecryptfs_commit_write
  * @file: The eCryptfs file object
  * @page: The eCryptfs page
@@ -695,20 +404,12 @@ struct kmem_cache *ecryptfs_xattr_cache;
 static int ecryptfs_commit_write(struct file *file, struct page *page,
 				 unsigned from, unsigned to)
 {
-	struct ecryptfs_page_crypt_context ctx;
 	loff_t pos;
-	struct inode *inode;
-	struct inode *lower_inode;
-	struct file *lower_file;
-	struct ecryptfs_crypt_stat *crypt_stat;
+	struct inode *ecryptfs_inode = page->mapping->host;
+	struct ecryptfs_crypt_stat *crypt_stat =
+		&ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat;
 	int rc;
 
-	inode = page->mapping->host;
-	lower_inode = ecryptfs_inode_to_lower(inode);
-	lower_file = ecryptfs_file_to_lower(file);
-	mutex_lock(&lower_inode->i_mutex);
-	crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
-				->crypt_stat;
 	if (crypt_stat->flags & ECRYPTFS_NEW_FILE) {
 		ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
 			"crypt_stat at memory location [%p]\n", crypt_stat);
@@ -718,6 +419,7 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
 	ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
 			"(page w/ index = [0x%.16x], to = [%d])\n", page->index,
 			to);
+	/* Fills in zeros if 'to' goes beyond inode size */
 	rc = fill_zeros_to_end_of_page(page, to);
 	if (rc) {
 		ecryptfs_printk(KERN_WARNING, "Error attempting to fill "
@@ -725,82 +427,22 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
 				page->index);
 		goto out;
 	}
-	ctx.page = page;
-	ctx.mode = ECRYPTFS_PREPARE_COMMIT_MODE;
-	ctx.param.lower_file = lower_file;
-	rc = ecryptfs_encrypt_page(&ctx);
+	rc = ecryptfs_encrypt_page(page);
 	if (rc) {
 		ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
 				"index [0x%.16x])\n", page->index);
 		goto out;
 	}
-	inode->i_blocks = lower_inode->i_blocks;
-	pos = page_offset(page) + to;
-	if (pos > i_size_read(inode)) {
-		i_size_write(inode, pos);
+	pos = (((loff_t)page->index) << PAGE_CACHE_SHIFT) + to;
+	if (pos > i_size_read(ecryptfs_inode)) {
+		i_size_write(ecryptfs_inode, pos);
 		ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
-				"[0x%.16x]\n", i_size_read(inode));
+				"[0x%.16x]\n", i_size_read(ecryptfs_inode));
 	}
-	rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode,
-						   inode, file->f_dentry,
-						   ECRYPTFS_LOWER_I_MUTEX_HELD);
+	rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
 	if (rc)
 		printk(KERN_ERR "Error writing inode size to metadata; "
 		       "rc = [%d]\n", rc);
-	lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
-	mark_inode_dirty_sync(inode);
-out:
-	if (rc < 0)
-		ClearPageUptodate(page);
-	else
-		SetPageUptodate(page);
-	mutex_unlock(&lower_inode->i_mutex);
-	return rc;
-}
-
-/**
- * ecryptfs_write_zeros
- * @file: The ecryptfs file
- * @index: The index in which we are writing
- * @start: The position after the last block of data
- * @num_zeros: The number of zeros to write
- *
- * Write a specified number of zero's to a page.
- *
- * (start + num_zeros) must be less than or equal to PAGE_CACHE_SIZE
- */
-int
-ecryptfs_write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
-{
-	int rc = 0;
-	struct page *tmp_page;
-
-	tmp_page = ecryptfs_get1page(file, index);
-	if (IS_ERR(tmp_page)) {
-		ecryptfs_printk(KERN_ERR, "Error getting page at index "
-				"[0x%.16x]\n", index);
-		rc = PTR_ERR(tmp_page);
-		goto out;
-	}
-	if ((rc = ecryptfs_prepare_write_no_truncate(file, tmp_page, start,
-						     (start + num_zeros)))) {
-		ecryptfs_printk(KERN_ERR, "Error preparing to write zero's "
-				"to page at index [0x%.16x]\n",
-				index);
-		page_cache_release(tmp_page);
-		goto out;
-	}
-	zero_user_page(tmp_page, start, num_zeros, KM_USER0);
-	rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros);
-	if (rc < 0) {
-		ecryptfs_printk(KERN_ERR, "Error attempting to write zero's "
-				"to remainder of page at index [0x%.16x]\n",
-				index);
-		page_cache_release(tmp_page);
-		goto out;
-	}
-	rc = 0;
-	page_cache_release(tmp_page);
 out:
 	return rc;
 }
@@ -819,34 +461,10 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
 	return rc;
 }
 
-static void ecryptfs_sync_page(struct page *page)
-{
-	struct inode *inode;
-	struct inode *lower_inode;
-	struct page *lower_page;
-
-	inode = page->mapping->host;
-	lower_inode = ecryptfs_inode_to_lower(inode);
-	/* NOTE: Recently swapped with grab_cache_page(), since
-	 * sync_page() just makes sure that pending I/O gets done. */
-	lower_page = find_lock_page(lower_inode->i_mapping, page->index);
-	if (!lower_page) {
-		ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n");
-		return;
-	}
-	if (lower_page->mapping->a_ops->sync_page)
-		lower_page->mapping->a_ops->sync_page(lower_page);
-	ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n",
-			lower_page->index);
-	unlock_page(lower_page);
-	page_cache_release(lower_page);
-}
-
 struct address_space_operations ecryptfs_aops = {
 	.writepage = ecryptfs_writepage,
 	.readpage = ecryptfs_readpage,
 	.prepare_write = ecryptfs_prepare_write,
 	.commit_write = ecryptfs_commit_write,
 	.bmap = ecryptfs_bmap,
-	.sync_page = ecryptfs_sync_page,
 };
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
new file mode 100644
index 00000000000..2150edf9a58
--- /dev/null
+++ b/fs/ecryptfs/read_write.c
@@ -0,0 +1,358 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 2007 International Business Machines Corp.
+ *   Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "ecryptfs_kernel.h"
+
+/**
+ * ecryptfs_write_lower
+ * @ecryptfs_inode: The eCryptfs inode
+ * @data: Data to write
+ * @offset: Byte offset in the lower file to which to write the data
+ * @size: Number of bytes from @data to write at @offset in the lower
+ *        file
+ *
+ * Write data to the lower file.
+ *
+ * Returns zero on success; non-zero on error
+ */
+int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
+			 loff_t offset, size_t size)
+{
+	struct ecryptfs_inode_info *inode_info;
+	ssize_t octets_written;
+	mm_segment_t fs_save;
+	int rc = 0;
+
+	inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
+	mutex_lock(&inode_info->lower_file_mutex);
+	BUG_ON(!inode_info->lower_file);
+	inode_info->lower_file->f_pos = offset;
+	fs_save = get_fs();
+	set_fs(get_ds());
+	octets_written = vfs_write(inode_info->lower_file, data, size,
+				   &inode_info->lower_file->f_pos);
+	set_fs(fs_save);
+	if (octets_written < 0) {
+		printk(KERN_ERR "%s: octets_written = [%td]; "
+		       "expected [%td]\n", __FUNCTION__, octets_written, size);
+		rc = -EINVAL;
+	}
+	mutex_unlock(&inode_info->lower_file_mutex);
+	mark_inode_dirty_sync(ecryptfs_inode);
+	return rc;
+}
+
+/**
+ * ecryptfs_write_lower_page_segment
+ * @ecryptfs_inode: The eCryptfs inode
+ * @page_for_lower: The page containing the data to be written to the
+ *                  lower file
+ * @offset_in_page: The offset in the @page_for_lower from which to
+ *                  start writing the data
+ * @size: The amount of data from @page_for_lower to write to the
+ *        lower file
+ *
+ * Determines the byte offset in the file for the given page and
+ * offset within the page, maps the page, and makes the call to write
+ * the contents of @page_for_lower to the lower inode.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
+				      struct page *page_for_lower,
+				      size_t offset_in_page, size_t size)
+{
+	char *virt;
+	loff_t offset;
+	int rc;
+
+	offset = ((((off_t)page_for_lower->index) << PAGE_CACHE_SHIFT)
+		  + offset_in_page);
+	virt = kmap(page_for_lower);
+	rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
+	kunmap(page_for_lower);
+	return rc;
+}
+
+/**
+ * ecryptfs_write
+ * @ecryptfs_file: The eCryptfs file into which to write
+ * @data: Virtual address where data to write is located
+ * @offset: Offset in the eCryptfs file at which to begin writing the
+ *          data from @data
+ * @size: The number of bytes to write from @data
+ *
+ * Write an arbitrary amount of data to an arbitrary location in the
+ * eCryptfs inode page cache. This is done on a page-by-page, and then
+ * by an extent-by-extent, basis; individual extents are encrypted and
+ * written to the lower page cache (via VFS writes). This function
+ * takes care of all the address translation to locations in the lower
+ * filesystem; it also handles truncate events, writing out zeros
+ * where necessary.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
+		   size_t size)
+{
+	struct page *ecryptfs_page;
+	char *ecryptfs_page_virt;
+	loff_t ecryptfs_file_size =
+		i_size_read(ecryptfs_file->f_dentry->d_inode);
+	loff_t data_offset = 0;
+	loff_t pos;
+	int rc = 0;
+
+	if (offset > ecryptfs_file_size)
+		pos = ecryptfs_file_size;
+	else
+		pos = offset;
+	while (pos < (offset + size)) {
+		pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
+		size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
+		size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
+		size_t total_remaining_bytes = ((offset + size) - pos);
+
+		if (num_bytes > total_remaining_bytes)
+			num_bytes = total_remaining_bytes;
+		if (pos < offset) {
+			size_t total_remaining_zeros = (offset - pos);
+
+			if (num_bytes > total_remaining_zeros)
+				num_bytes = total_remaining_zeros;
+		}
+		ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file,
+							 ecryptfs_page_idx);
+		if (IS_ERR(ecryptfs_page)) {
+			rc = PTR_ERR(ecryptfs_page);
+			printk(KERN_ERR "%s: Error getting page at "
+			       "index [%ld] from eCryptfs inode "
+			       "mapping; rc = [%d]\n", __FUNCTION__,
+			       ecryptfs_page_idx, rc);
+			goto out;
+		}
+		if (start_offset_in_page) {
+			/* Read in the page from the lower
+			 * into the eCryptfs inode page cache,
+			 * decrypting */
+			rc = ecryptfs_decrypt_page(ecryptfs_page);
+			if (rc) {
+				printk(KERN_ERR "%s: Error decrypting "
+				       "page; rc = [%d]\n",
+				       __FUNCTION__, rc);
+				ClearPageUptodate(ecryptfs_page);
+				page_cache_release(ecryptfs_page);
+				goto out;
+			}
+		}
+		ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
+		if (pos >= offset) {
+			memcpy(((char *)ecryptfs_page_virt
+				+ start_offset_in_page),
+			       (data + data_offset), num_bytes);
+			data_offset += num_bytes;
+		} else {
+			/* We are extending past the previous end of the file.
+			 * Fill in zero values up to the start of where we
+			 * will be writing data. */
+			memset(((char *)ecryptfs_page_virt
+				+ start_offset_in_page), 0, num_bytes);
+		}
+		kunmap_atomic(ecryptfs_page_virt, KM_USER0);
+		flush_dcache_page(ecryptfs_page);
+		SetPageUptodate(ecryptfs_page);
+		unlock_page(ecryptfs_page);
+		rc = ecryptfs_encrypt_page(ecryptfs_page);
+		page_cache_release(ecryptfs_page);
+		if (rc) {
+			printk(KERN_ERR "%s: Error encrypting "
+			       "page; rc = [%d]\n", __FUNCTION__, rc);
+			goto out;
+		}
+		pos += num_bytes;
+	}
+	if ((offset + size) > ecryptfs_file_size) {
+		i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size));
+		rc = ecryptfs_write_inode_size_to_metadata(
+			ecryptfs_file->f_dentry->d_inode);
+		if (rc) {
+			printk(KERN_ERR	"Problem with "
+			       "ecryptfs_write_inode_size_to_metadata; "
+			       "rc = [%d]\n", rc);
+			goto out;
+		}
+	}
+out:
+	return rc;
+}
+
+/**
+ * ecryptfs_read_lower
+ * @data: The read data is stored here by this function
+ * @offset: Byte offset in the lower file from which to read the data
+ * @size: Number of bytes to read from @offset of the lower file and
+ *        store into @data
+ * @ecryptfs_inode: The eCryptfs inode
+ *
+ * Read @size bytes of data at byte offset @offset from the lower
+ * inode into memory location @data.
+ *
+ * Returns zero on success; non-zero on error
+ */
+int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
+			struct inode *ecryptfs_inode)
+{
+	struct ecryptfs_inode_info *inode_info =
+		ecryptfs_inode_to_private(ecryptfs_inode);
+	ssize_t octets_read;
+	mm_segment_t fs_save;
+	int rc = 0;
+
+	mutex_lock(&inode_info->lower_file_mutex);
+	BUG_ON(!inode_info->lower_file);
+	inode_info->lower_file->f_pos = offset;
+	fs_save = get_fs();
+	set_fs(get_ds());
+	octets_read = vfs_read(inode_info->lower_file, data, size,
+			       &inode_info->lower_file->f_pos);
+	set_fs(fs_save);
+	if (octets_read < 0) {
+		printk(KERN_ERR "%s: octets_read = [%td]; "
+		       "expected [%td]\n", __FUNCTION__, octets_read, size);
+		rc = -EINVAL;
+	}
+	mutex_unlock(&inode_info->lower_file_mutex);
+	return rc;
+}
+
+/**
+ * ecryptfs_read_lower_page_segment
+ * @page_for_ecryptfs: The page into which data for eCryptfs will be
+ *                     written
+ * @offset_in_page: Offset in @page_for_ecryptfs from which to start
+ *                  writing
+ * @size: The number of bytes to write into @page_for_ecryptfs
+ * @ecryptfs_inode: The eCryptfs inode
+ *
+ * Determines the byte offset in the file for the given page and
+ * offset within the page, maps the page, and makes the call to read
+ * the contents of @page_for_ecryptfs from the lower inode.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
+				     pgoff_t page_index,
+				     size_t offset_in_page, size_t size,
+				     struct inode *ecryptfs_inode)
+{
+	char *virt;
+	loff_t offset;
+	int rc;
+
+	offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page);
+	virt = kmap(page_for_ecryptfs);
+	rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
+	kunmap(page_for_ecryptfs);
+	flush_dcache_page(page_for_ecryptfs);
+	return rc;
+}
+
+/**
+ * ecryptfs_read
+ * @data: The virtual address into which to write the data read (and
+ *        possibly decrypted) from the lower file
+ * @offset: The offset in the decrypted view of the file from which to
+ *          read into @data
+ * @size: The number of bytes to read into @data
+ * @ecryptfs_file: The eCryptfs file from which to read
+ *
+ * Read an arbitrary amount of data from an arbitrary location in the
+ * eCryptfs page cache. This is done on an extent-by-extent basis;
+ * individual extents are decrypted and read from the lower page
+ * cache (via VFS reads). This function takes care of all the
+ * address translation to locations in the lower filesystem.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_read(char *data, loff_t offset, size_t size,
+		  struct file *ecryptfs_file)
+{
+	struct page *ecryptfs_page;
+	char *ecryptfs_page_virt;
+	loff_t ecryptfs_file_size =
+		i_size_read(ecryptfs_file->f_dentry->d_inode);
+	loff_t data_offset = 0;
+	loff_t pos;
+	int rc = 0;
+
+	if ((offset + size) > ecryptfs_file_size) {
+		rc = -EINVAL;
+		printk(KERN_ERR "%s: Attempt to read data past the end of the "
+			"file; offset = [%lld]; size = [%td]; "
+		       "ecryptfs_file_size = [%lld]\n",
+		       __FUNCTION__, offset, size, ecryptfs_file_size);
+		goto out;
+	}
+	pos = offset;
+	while (pos < (offset + size)) {
+		pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
+		size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
+		size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
+		size_t total_remaining_bytes = ((offset + size) - pos);
+
+		if (num_bytes > total_remaining_bytes)
+			num_bytes = total_remaining_bytes;
+		ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_file,
+							 ecryptfs_page_idx);
+		if (IS_ERR(ecryptfs_page)) {
+			rc = PTR_ERR(ecryptfs_page);
+			printk(KERN_ERR "%s: Error getting page at "
+			       "index [%ld] from eCryptfs inode "
+			       "mapping; rc = [%d]\n", __FUNCTION__,
+			       ecryptfs_page_idx, rc);
+			goto out;
+		}
+		rc = ecryptfs_decrypt_page(ecryptfs_page);
+		if (rc) {
+			printk(KERN_ERR "%s: Error decrypting "
+			       "page; rc = [%d]\n", __FUNCTION__, rc);
+			ClearPageUptodate(ecryptfs_page);
+			page_cache_release(ecryptfs_page);
+			goto out;
+		}
+		ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
+		memcpy((data + data_offset),
+		       ((char *)ecryptfs_page_virt + start_offset_in_page),
+		       num_bytes);
+		kunmap_atomic(ecryptfs_page_virt, KM_USER0);
+		flush_dcache_page(ecryptfs_page);
+		SetPageUptodate(ecryptfs_page);
+		unlock_page(ecryptfs_page);
+		page_cache_release(ecryptfs_page);
+		pos += num_bytes;
+		data_offset += num_bytes;
+	}
+out:
+	return rc;
+}
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 7b3f0cc09a6..f8cdab2bee3 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/key.h>
 #include <linux/seq_file.h>
+#include <linux/file.h>
 #include <linux/crypto.h>
 #include "ecryptfs_kernel.h"
 
@@ -46,15 +47,16 @@ struct kmem_cache *ecryptfs_inode_info_cache;
  */
 static struct inode *ecryptfs_alloc_inode(struct super_block *sb)
 {
-	struct ecryptfs_inode_info *ecryptfs_inode;
+	struct ecryptfs_inode_info *inode_info;
 	struct inode *inode = NULL;
 
-	ecryptfs_inode = kmem_cache_alloc(ecryptfs_inode_info_cache,
-					  GFP_KERNEL);
-	if (unlikely(!ecryptfs_inode))
+	inode_info = kmem_cache_alloc(ecryptfs_inode_info_cache, GFP_KERNEL);
+	if (unlikely(!inode_info))
 		goto out;
-	ecryptfs_init_crypt_stat(&ecryptfs_inode->crypt_stat);
-	inode = &ecryptfs_inode->vfs_inode;
+	ecryptfs_init_crypt_stat(&inode_info->crypt_stat);
+	mutex_init(&inode_info->lower_file_mutex);
+	inode_info->lower_file = NULL;
+	inode = &inode_info->vfs_inode;
 out:
 	return inode;
 }
@@ -63,9 +65,10 @@ out:
  * ecryptfs_destroy_inode
  * @inode: The ecryptfs inode
  *
- * This is used during the final destruction of the inode.
- * All allocation of memory related to the inode, including allocated
- * memory in the crypt_stat struct, will be released here.
+ * This is used during the final destruction of the inode.  All
+ * allocation of memory related to the inode, including allocated
+ * memory in the crypt_stat struct, will be released here. This
+ * function also fput()'s the persistent file for the lower inode.
  * There should be no chance that this deallocation will be missed.
  */
 static void ecryptfs_destroy_inode(struct inode *inode)
@@ -73,7 +76,21 @@ static void ecryptfs_destroy_inode(struct inode *inode)
 	struct ecryptfs_inode_info *inode_info;
 
 	inode_info = ecryptfs_inode_to_private(inode);
-	ecryptfs_destruct_crypt_stat(&inode_info->crypt_stat);
+	mutex_lock(&inode_info->lower_file_mutex);
+	if (inode_info->lower_file) {
+		struct dentry *lower_dentry =
+			inode_info->lower_file->f_dentry;
+
+		BUG_ON(!lower_dentry);
+		if (lower_dentry->d_inode) {
+			fput(inode_info->lower_file);
+			inode_info->lower_file = NULL;
+			d_drop(lower_dentry);
+			d_delete(lower_dentry);
+		}
+	}
+	mutex_unlock(&inode_info->lower_file_mutex);
+	ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
 	kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
 }
 
@@ -104,7 +121,7 @@ static void ecryptfs_put_super(struct super_block *sb)
 {
 	struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
 
-	ecryptfs_destruct_mount_crypt_stat(&sb_info->mount_crypt_stat);
+	ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
 	kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
 	ecryptfs_set_superblock_private(sb, NULL);
 }
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2bf49d7ef84..05d9342bb64 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -22,7 +22,9 @@
  */
 
 #include "ext2.h"
+#include <linux/buffer_head.h>
 #include <linux/pagemap.h>
+#include <linux/swap.h>
 
 typedef struct ext2_dir_entry_2 ext2_dirent;
 
@@ -61,16 +63,25 @@ ext2_last_byte(struct inode *inode, unsigned long page_nr)
 	return last_byte;
 }
 
-static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to)
+static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	struct inode *dir = page->mapping->host;
+	struct address_space *mapping = page->mapping;
+	struct inode *dir = mapping->host;
 	int err = 0;
+
 	dir->i_version++;
-	page->mapping->a_ops->commit_write(NULL, page, from, to);
+	block_write_end(NULL, mapping, pos, len, len, page, NULL);
+
+	if (pos+len > dir->i_size) {
+		i_size_write(dir, pos+len);
+		mark_inode_dirty(dir);
+	}
+
 	if (IS_DIRSYNC(dir))
 		err = write_one_page(page, 1);
 	else
 		unlock_page(page);
+
 	return err;
 }
 
@@ -412,16 +423,18 @@ ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry)
 void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 			struct page *page, struct inode *inode)
 {
-	unsigned from = (char *) de - (char *) page_address(page);
-	unsigned to = from + le16_to_cpu(de->rec_len);
+	loff_t pos = page_offset(page) +
+			(char *) de - (char *) page_address(page);
+	unsigned len = le16_to_cpu(de->rec_len);
 	int err;
 
 	lock_page(page);
-	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+	err = __ext2_write_begin(NULL, page->mapping, pos, len,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	BUG_ON(err);
 	de->inode = cpu_to_le32(inode->i_ino);
-	ext2_set_de_type (de, inode);
-	err = ext2_commit_chunk(page, from, to);
+	ext2_set_de_type(de, inode);
+	err = ext2_commit_chunk(page, pos, len);
 	ext2_put_page(page);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
 	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
@@ -444,7 +457,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 	unsigned long npages = dir_pages(dir);
 	unsigned long n;
 	char *kaddr;
-	unsigned from, to;
+	loff_t pos;
 	int err;
 
 	/*
@@ -497,9 +510,10 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 	return -EINVAL;
 
 got_it:
-	from = (char*)de - (char*)page_address(page);
-	to = from + rec_len;
-	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+	pos = page_offset(page) +
+		(char*)de - (char*)page_address(page);
+	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
+							&page, NULL);
 	if (err)
 		goto out_unlock;
 	if (de->inode) {
@@ -509,10 +523,10 @@ got_it:
 		de = de1;
 	}
 	de->name_len = namelen;
-	memcpy (de->name, name, namelen);
+	memcpy(de->name, name, namelen);
 	de->inode = cpu_to_le32(inode->i_ino);
 	ext2_set_de_type (de, inode);
-	err = ext2_commit_chunk(page, from, to);
+	err = ext2_commit_chunk(page, pos, rec_len);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
 	EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
 	mark_inode_dirty(dir);
@@ -537,6 +551,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 	char *kaddr = page_address(page);
 	unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
 	unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len);
+	loff_t pos;
 	ext2_dirent * pde = NULL;
 	ext2_dirent * de = (ext2_dirent *) (kaddr + from);
 	int err;
@@ -553,13 +568,15 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 	}
 	if (pde)
 		from = (char*)pde - (char*)page_address(page);
+	pos = page_offset(page) + from;
 	lock_page(page);
-	err = mapping->a_ops->prepare_write(NULL, page, from, to);
+	err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0,
+							&page, NULL);
 	BUG_ON(err);
 	if (pde)
-		pde->rec_len = cpu_to_le16(to-from);
+		pde->rec_len = cpu_to_le16(to - from);
 	dir->inode = 0;
-	err = ext2_commit_chunk(page, from, to);
+	err = ext2_commit_chunk(page, pos, to - from);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
 	EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
 	mark_inode_dirty(inode);
@@ -582,7 +599,9 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
 
 	if (!page)
 		return -ENOMEM;
-	err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
+
+	err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0,
+							&page, NULL);
 	if (err) {
 		unlock_page(page);
 		goto fail;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9fd0ec5ba0d..a08052d2c00 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -134,6 +134,9 @@ extern void ext2_truncate (struct inode *);
 extern int ext2_setattr (struct dentry *, struct iattr *);
 extern void ext2_set_inode_flags(struct inode *inode);
 extern void ext2_get_inode_flags(struct ext2_inode_info *);
+int __ext2_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata);
 
 /* ioctl.c */
 extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0079b2cd531..1b102a1cceb 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -642,18 +642,35 @@ ext2_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
 }
 
+int __ext2_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata)
+{
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+							ext2_get_block);
+}
+
 static int
-ext2_prepare_write(struct file *file, struct page *page,
-			unsigned from, unsigned to)
+ext2_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata)
 {
-	return block_prepare_write(page,from,to,ext2_get_block);
+	*pagep = NULL;
+	return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
 }
 
 static int
-ext2_nobh_prepare_write(struct file *file, struct page *page,
-			unsigned from, unsigned to)
+ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata)
 {
-	return nobh_prepare_write(page,from,to,ext2_get_block);
+	/*
+	 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
+	 * directory handling code to pass around offsets rather than struct
+	 * pages in order to make this work easily.
+	 */
+	return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+							ext2_get_block);
 }
 
 static int ext2_nobh_writepage(struct page *page,
@@ -689,8 +706,8 @@ const struct address_space_operations ext2_aops = {
 	.readpages		= ext2_readpages,
 	.writepage		= ext2_writepage,
 	.sync_page		= block_sync_page,
-	.prepare_write		= ext2_prepare_write,
-	.commit_write		= generic_commit_write,
+	.write_begin		= ext2_write_begin,
+	.write_end		= generic_write_end,
 	.bmap			= ext2_bmap,
 	.direct_IO		= ext2_direct_IO,
 	.writepages		= ext2_writepages,
@@ -707,8 +724,8 @@ const struct address_space_operations ext2_nobh_aops = {
 	.readpages		= ext2_readpages,
 	.writepage		= ext2_nobh_writepage,
 	.sync_page		= block_sync_page,
-	.prepare_write		= ext2_nobh_prepare_write,
-	.commit_write		= nobh_commit_write,
+	.write_begin		= ext2_nobh_write_begin,
+	.write_end		= nobh_write_end,
 	.bmap			= ext2_bmap,
 	.direct_IO		= ext2_direct_IO,
 	.writepages		= ext2_writepages,
@@ -925,7 +942,8 @@ void ext2_truncate (struct inode * inode)
 	if (mapping_is_xip(inode->i_mapping))
 		xip_truncate_page(inode->i_mapping, inode->i_size);
 	else if (test_opt(inode->i_sb, NOBH))
-		nobh_truncate_page(inode->i_mapping, inode->i_size);
+		nobh_truncate_page(inode->i_mapping,
+				inode->i_size, ext2_get_block);
 	else
 		block_truncate_page(inode->i_mapping,
 				inode->i_size, ext2_get_block);
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index c00723a99f4..c2c3491b18c 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -143,7 +143,7 @@ static int ext3_readdir(struct file * filp,
 					sb->s_bdev->bd_inode->i_mapping,
 					&filp->f_ra, filp,
 					index, 1);
-			filp->f_ra.prev_index = index;
+			filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 			bh = ext3_bread(NULL, inode, blk, 0, &err);
 		}
 
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index de4e3161e47..2f2b6864db1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1147,51 +1147,68 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext3_journal_get_write_access(handle, bh);
 }
 
-static int ext3_prepare_write(struct file *file, struct page *page,
-			      unsigned from, unsigned to)
+static int ext3_write_begin(struct file *file, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = mapping->host;
 	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
+	struct page *page;
+	pgoff_t index;
+	unsigned from, to;
+
+	index = pos >> PAGE_CACHE_SHIFT;
+	from = pos & (PAGE_CACHE_SIZE - 1);
+	to = from + len;
 
 retry:
+	page = __grab_cache_page(mapping, index);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
+
 	handle = ext3_journal_start(inode, needed_blocks);
 	if (IS_ERR(handle)) {
+		unlock_page(page);
+		page_cache_release(page);
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
-		ret = nobh_prepare_write(page, from, to, ext3_get_block);
-	else
-		ret = block_prepare_write(page, from, to, ext3_get_block);
+	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+							ext3_get_block);
 	if (ret)
-		goto prepare_write_failed;
+		goto write_begin_failed;
 
 	if (ext3_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
 	}
-prepare_write_failed:
-	if (ret)
+write_begin_failed:
+	if (ret) {
 		ext3_journal_stop(handle);
+		unlock_page(page);
+		page_cache_release(page);
+	}
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 out:
 	return ret;
 }
 
+
 int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
 	int err = journal_dirty_data(handle, bh);
 	if (err)
 		ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__,
-						bh, handle,err);
+						bh, handle, err);
 	return err;
 }
 
-/* For commit_write() in data=journal mode */
-static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
+/* For write_end() in data=journal mode */
+static int write_end_fn(handle_t *handle, struct buffer_head *bh)
 {
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
@@ -1200,84 +1217,130 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
 }
 
 /*
+ * Generic write_end handler for ordered and writeback ext3 journal modes.
+ * We can't use generic_write_end, because that unlocks the page and we need to
+ * unlock the page after ext3_journal_stop, but ext3_journal_stop must run
+ * after block_write_end.
+ */
+static int ext3_generic_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
+{
+	struct inode *inode = file->f_mapping->host;
+
+	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	if (pos+copied > inode->i_size) {
+		i_size_write(inode, pos+copied);
+		mark_inode_dirty(inode);
+	}
+
+	return copied;
+}
+
+/*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
  *
  * ext3 never places buffers on inode->i_mapping->private_list.  metadata
  * buffers are managed internally.
  */
-static int ext3_ordered_commit_write(struct file *file, struct page *page,
-			     unsigned from, unsigned to)
+static int ext3_ordered_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
 {
 	handle_t *handle = ext3_journal_current_handle();
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = file->f_mapping->host;
+	unsigned from, to;
 	int ret = 0, ret2;
 
+	from = pos & (PAGE_CACHE_SIZE - 1);
+	to = from + len;
+
 	ret = walk_page_buffers(handle, page_buffers(page),
 		from, to, NULL, ext3_journal_dirty_data);
 
 	if (ret == 0) {
 		/*
-		 * generic_commit_write() will run mark_inode_dirty() if i_size
+		 * generic_write_end() will run mark_inode_dirty() if i_size
 		 * changes.  So let's piggyback the i_disksize mark_inode_dirty
 		 * into that.
 		 */
 		loff_t new_i_size;
 
-		new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+		new_i_size = pos + copied;
 		if (new_i_size > EXT3_I(inode)->i_disksize)
 			EXT3_I(inode)->i_disksize = new_i_size;
-		ret = generic_commit_write(file, page, from, to);
+		copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+							page, fsdata);
+		if (copied < 0)
+			ret = copied;
 	}
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	return ret;
+	unlock_page(page);
+	page_cache_release(page);
+
+	return ret ? ret : copied;
 }
 
-static int ext3_writeback_commit_write(struct file *file, struct page *page,
-			     unsigned from, unsigned to)
+static int ext3_writeback_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
 {
 	handle_t *handle = ext3_journal_current_handle();
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = file->f_mapping->host;
 	int ret = 0, ret2;
 	loff_t new_i_size;
 
-	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	new_i_size = pos + copied;
 	if (new_i_size > EXT3_I(inode)->i_disksize)
 		EXT3_I(inode)->i_disksize = new_i_size;
 
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
-		ret = nobh_commit_write(file, page, from, to);
-	else
-		ret = generic_commit_write(file, page, from, to);
+	copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+							page, fsdata);
+	if (copied < 0)
+		ret = copied;
 
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	return ret;
+	unlock_page(page);
+	page_cache_release(page);
+
+	return ret ? ret : copied;
 }
 
-static int ext3_journalled_commit_write(struct file *file,
-			struct page *page, unsigned from, unsigned to)
+static int ext3_journalled_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
 {
 	handle_t *handle = ext3_journal_current_handle();
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = mapping->host;
 	int ret = 0, ret2;
 	int partial = 0;
-	loff_t pos;
+	unsigned from, to;
 
-	/*
-	 * Here we duplicate the generic_commit_write() functionality
-	 */
-	pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	from = pos & (PAGE_CACHE_SIZE - 1);
+	to = from + len;
+
+	if (copied < len) {
+		if (!PageUptodate(page))
+			copied = 0;
+		page_zero_new_buffers(page, from+copied, to);
+	}
 
 	ret = walk_page_buffers(handle, page_buffers(page), from,
-				to, &partial, commit_write_fn);
+				to, &partial, write_end_fn);
 	if (!partial)
 		SetPageUptodate(page);
-	if (pos > inode->i_size)
-		i_size_write(inode, pos);
+	if (pos+copied > inode->i_size)
+		i_size_write(inode, pos+copied);
 	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
 	if (inode->i_size > EXT3_I(inode)->i_disksize) {
 		EXT3_I(inode)->i_disksize = inode->i_size;
@@ -1285,10 +1348,14 @@ static int ext3_journalled_commit_write(struct file *file,
 		if (!ret)
 			ret = ret2;
 	}
+
 	ret2 = ext3_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	return ret;
+	unlock_page(page);
+	page_cache_release(page);
+
+	return ret ? ret : copied;
 }
 
 /*
@@ -1546,7 +1613,7 @@ static int ext3_journalled_writepage(struct page *page,
 			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
 
 		err = walk_page_buffers(handle, page_buffers(page), 0,
-				PAGE_CACHE_SIZE, NULL, commit_write_fn);
+				PAGE_CACHE_SIZE, NULL, write_end_fn);
 		if (ret == 0)
 			ret = err;
 		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
@@ -1706,8 +1773,8 @@ static const struct address_space_operations ext3_ordered_aops = {
 	.readpages	= ext3_readpages,
 	.writepage	= ext3_ordered_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_ordered_commit_write,
+	.write_begin	= ext3_write_begin,
+	.write_end	= ext3_ordered_write_end,
 	.bmap		= ext3_bmap,
 	.invalidatepage	= ext3_invalidatepage,
 	.releasepage	= ext3_releasepage,
@@ -1720,8 +1787,8 @@ static const struct address_space_operations ext3_writeback_aops = {
 	.readpages	= ext3_readpages,
 	.writepage	= ext3_writeback_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_writeback_commit_write,
+	.write_begin	= ext3_write_begin,
+	.write_end	= ext3_writeback_write_end,
 	.bmap		= ext3_bmap,
 	.invalidatepage	= ext3_invalidatepage,
 	.releasepage	= ext3_releasepage,
@@ -1734,8 +1801,8 @@ static const struct address_space_operations ext3_journalled_aops = {
 	.readpages	= ext3_readpages,
 	.writepage	= ext3_journalled_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_journalled_commit_write,
+	.write_begin	= ext3_write_begin,
+	.write_end	= ext3_journalled_write_end,
 	.set_page_dirty	= ext3_journalled_set_page_dirty,
 	.bmap		= ext3_bmap,
 	.invalidatepage	= ext3_invalidatepage,
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 3ab01c04e00..e11890acfa2 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -142,7 +142,7 @@ static int ext4_readdir(struct file * filp,
 					sb->s_bdev->bd_inode->i_mapping,
 					&filp->f_ra, filp,
 					index, 1);
-			filp->f_ra.prev_index = index;
+			filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 			bh = ext4_bread(NULL, inode, blk, 0, &err);
 		}
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a4848e04a5e..0df2b1e06d0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1146,34 +1146,50 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ext4_journal_get_write_access(handle, bh);
 }
 
-static int ext4_prepare_write(struct file *file, struct page *page,
-			      unsigned from, unsigned to)
+static int ext4_write_begin(struct file *file, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata)
 {
-	struct inode *inode = page->mapping->host;
+ 	struct inode *inode = mapping->host;
 	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
+ 	struct page *page;
+ 	pgoff_t index;
+ 	unsigned from, to;
+
+ 	index = pos >> PAGE_CACHE_SHIFT;
+ 	from = pos & (PAGE_CACHE_SIZE - 1);
+ 	to = from + len;
 
 retry:
-	handle = ext4_journal_start(inode, needed_blocks);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		goto out;
+ 	page = __grab_cache_page(mapping, index);
+ 	if (!page)
+ 		return -ENOMEM;
+ 	*pagep = page;
+
+  	handle = ext4_journal_start(inode, needed_blocks);
+  	if (IS_ERR(handle)) {
+ 		unlock_page(page);
+ 		page_cache_release(page);
+  		ret = PTR_ERR(handle);
+  		goto out;
 	}
-	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-		ret = nobh_prepare_write(page, from, to, ext4_get_block);
-	else
-		ret = block_prepare_write(page, from, to, ext4_get_block);
-	if (ret)
-		goto prepare_write_failed;
 
-	if (ext4_should_journal_data(inode)) {
+	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+							ext4_get_block);
+
+	if (!ret && ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
 	}
-prepare_write_failed:
-	if (ret)
+
+	if (ret) {
 		ext4_journal_stop(handle);
+ 		unlock_page(page);
+ 		page_cache_release(page);
+	}
+
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 out:
@@ -1185,12 +1201,12 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	int err = jbd2_journal_dirty_data(handle, bh);
 	if (err)
 		ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
-						bh, handle,err);
+						bh, handle, err);
 	return err;
 }
 
-/* For commit_write() in data=journal mode */
-static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
+/* For write_end() in data=journal mode */
+static int write_end_fn(handle_t *handle, struct buffer_head *bh)
 {
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
@@ -1199,84 +1215,130 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
 }
 
 /*
+ * Generic write_end handler for ordered and writeback ext4 journal modes.
+ * We can't use generic_write_end, because that unlocks the page and we need to
+ * unlock the page after ext4_journal_stop, but ext4_journal_stop must run
+ * after block_write_end.
+ */
+static int ext4_generic_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
+{
+	struct inode *inode = file->f_mapping->host;
+
+	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+	if (pos+copied > inode->i_size) {
+		i_size_write(inode, pos+copied);
+		mark_inode_dirty(inode);
+	}
+
+	return copied;
+}
+
+/*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
  *
  * ext4 never places buffers on inode->i_mapping->private_list.  metadata
  * buffers are managed internally.
  */
-static int ext4_ordered_commit_write(struct file *file, struct page *page,
-			     unsigned from, unsigned to)
+static int ext4_ordered_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
 {
 	handle_t *handle = ext4_journal_current_handle();
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = file->f_mapping->host;
+	unsigned from, to;
 	int ret = 0, ret2;
 
+	from = pos & (PAGE_CACHE_SIZE - 1);
+	to = from + len;
+
 	ret = walk_page_buffers(handle, page_buffers(page),
 		from, to, NULL, ext4_journal_dirty_data);
 
 	if (ret == 0) {
 		/*
-		 * generic_commit_write() will run mark_inode_dirty() if i_size
+		 * generic_write_end() will run mark_inode_dirty() if i_size
 		 * changes.  So let's piggyback the i_disksize mark_inode_dirty
 		 * into that.
 		 */
 		loff_t new_i_size;
 
-		new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+		new_i_size = pos + copied;
 		if (new_i_size > EXT4_I(inode)->i_disksize)
 			EXT4_I(inode)->i_disksize = new_i_size;
-		ret = generic_commit_write(file, page, from, to);
+		copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+							page, fsdata);
+		if (copied < 0)
+			ret = copied;
 	}
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	return ret;
+	unlock_page(page);
+	page_cache_release(page);
+
+	return ret ? ret : copied;
 }
 
-static int ext4_writeback_commit_write(struct file *file, struct page *page,
-			     unsigned from, unsigned to)
+static int ext4_writeback_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
 {
 	handle_t *handle = ext4_journal_current_handle();
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = file->f_mapping->host;
 	int ret = 0, ret2;
 	loff_t new_i_size;
 
-	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	new_i_size = pos + copied;
 	if (new_i_size > EXT4_I(inode)->i_disksize)
 		EXT4_I(inode)->i_disksize = new_i_size;
 
-	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-		ret = nobh_commit_write(file, page, from, to);
-	else
-		ret = generic_commit_write(file, page, from, to);
+	copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+							page, fsdata);
+	if (copied < 0)
+		ret = copied;
 
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	return ret;
+	unlock_page(page);
+	page_cache_release(page);
+
+	return ret ? ret : copied;
 }
 
-static int ext4_journalled_commit_write(struct file *file,
-			struct page *page, unsigned from, unsigned to)
+static int ext4_journalled_write_end(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
 {
 	handle_t *handle = ext4_journal_current_handle();
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = mapping->host;
 	int ret = 0, ret2;
 	int partial = 0;
-	loff_t pos;
+	unsigned from, to;
 
-	/*
-	 * Here we duplicate the generic_commit_write() functionality
-	 */
-	pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	from = pos & (PAGE_CACHE_SIZE - 1);
+	to = from + len;
+
+	if (copied < len) {
+		if (!PageUptodate(page))
+			copied = 0;
+		page_zero_new_buffers(page, from+copied, to);
+	}
 
 	ret = walk_page_buffers(handle, page_buffers(page), from,
-				to, &partial, commit_write_fn);
+				to, &partial, write_end_fn);
 	if (!partial)
 		SetPageUptodate(page);
-	if (pos > inode->i_size)
-		i_size_write(inode, pos);
+	if (pos+copied > inode->i_size)
+		i_size_write(inode, pos+copied);
 	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
 	if (inode->i_size > EXT4_I(inode)->i_disksize) {
 		EXT4_I(inode)->i_disksize = inode->i_size;
@@ -1284,10 +1346,14 @@ static int ext4_journalled_commit_write(struct file *file,
 		if (!ret)
 			ret = ret2;
 	}
+
 	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
-	return ret;
+	unlock_page(page);
+	page_cache_release(page);
+
+	return ret ? ret : copied;
 }
 
 /*
@@ -1545,7 +1611,7 @@ static int ext4_journalled_writepage(struct page *page,
 			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
 
 		err = walk_page_buffers(handle, page_buffers(page), 0,
-				PAGE_CACHE_SIZE, NULL, commit_write_fn);
+				PAGE_CACHE_SIZE, NULL, write_end_fn);
 		if (ret == 0)
 			ret = err;
 		EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
@@ -1705,8 +1771,8 @@ static const struct address_space_operations ext4_ordered_aops = {
 	.readpages	= ext4_readpages,
 	.writepage	= ext4_ordered_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext4_prepare_write,
-	.commit_write	= ext4_ordered_commit_write,
+	.write_begin	= ext4_write_begin,
+	.write_end	= ext4_ordered_write_end,
 	.bmap		= ext4_bmap,
 	.invalidatepage	= ext4_invalidatepage,
 	.releasepage	= ext4_releasepage,
@@ -1719,8 +1785,8 @@ static const struct address_space_operations ext4_writeback_aops = {
 	.readpages	= ext4_readpages,
 	.writepage	= ext4_writeback_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext4_prepare_write,
-	.commit_write	= ext4_writeback_commit_write,
+	.write_begin	= ext4_write_begin,
+	.write_end	= ext4_writeback_write_end,
 	.bmap		= ext4_bmap,
 	.invalidatepage	= ext4_invalidatepage,
 	.releasepage	= ext4_releasepage,
@@ -1733,8 +1799,8 @@ static const struct address_space_operations ext4_journalled_aops = {
 	.readpages	= ext4_readpages,
 	.writepage	= ext4_journalled_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext4_prepare_write,
-	.commit_write	= ext4_journalled_commit_write,
+	.write_begin	= ext4_write_begin,
+	.write_end	= ext4_journalled_write_end,
 	.set_page_dirty	= ext4_journalled_set_page_dirty,
 	.bmap		= ext4_bmap,
 	.invalidatepage	= ext4_invalidatepage,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 4baa5f20536..46b8a67f55c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -141,19 +141,24 @@ static int fat_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
 }
 
-static int fat_prepare_write(struct file *file, struct page *page,
-			     unsigned from, unsigned to)
+static int fat_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return cont_prepare_write(page, from, to, fat_get_block,
-				  &MSDOS_I(page->mapping->host)->mmu_private);
+	*pagep = NULL;
+	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				fat_get_block,
+				&MSDOS_I(mapping->host)->mmu_private);
 }
 
-static int fat_commit_write(struct file *file, struct page *page,
-			    unsigned from, unsigned to)
+static int fat_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *pagep, void *fsdata)
 {
-	struct inode *inode = page->mapping->host;
-	int err = generic_commit_write(file, page, from, to);
-	if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
+	struct inode *inode = mapping->host;
+	int err;
+	err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+	if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
 		inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 		MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
 		mark_inode_dirty(inode);
@@ -202,8 +207,8 @@ static const struct address_space_operations fat_aops = {
 	.writepage	= fat_writepage,
 	.writepages	= fat_writepages,
 	.sync_page	= block_sync_page,
-	.prepare_write	= fat_prepare_write,
-	.commit_write	= fat_commit_write,
+	.write_begin	= fat_write_begin,
+	.write_end	= fat_write_end,
 	.direct_IO	= fat_direct_IO,
 	.bmap		= _fat_bmap
 };
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f79de7c8cdf..11f22a3d728 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -444,22 +444,25 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
 	return outarg.size;
 }
 
-static int fuse_prepare_write(struct file *file, struct page *page,
-			      unsigned offset, unsigned to)
+static int fuse_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	/* No op */
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+	*pagep = __grab_cache_page(mapping, index);
+	if (!*pagep)
+		return -ENOMEM;
 	return 0;
 }
 
-static int fuse_commit_write(struct file *file, struct page *page,
-			     unsigned offset, unsigned to)
+static int fuse_buffered_write(struct file *file, struct inode *inode,
+			       loff_t pos, unsigned count, struct page *page)
 {
 	int err;
 	size_t nres;
-	unsigned count = to - offset;
-	struct inode *inode = page->mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
-	loff_t pos = page_offset(page) + offset;
+	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
 	struct fuse_req *req;
 
 	if (is_bad_inode(inode))
@@ -475,20 +478,35 @@ static int fuse_commit_write(struct file *file, struct page *page,
 	nres = fuse_send_write(req, file, inode, pos, count);
 	err = req->out.h.error;
 	fuse_put_request(fc, req);
-	if (!err && nres != count)
+	if (!err && !nres)
 		err = -EIO;
 	if (!err) {
-		pos += count;
+		pos += nres;
 		spin_lock(&fc->lock);
 		if (pos > inode->i_size)
 			i_size_write(inode, pos);
 		spin_unlock(&fc->lock);
 
-		if (offset == 0 && to == PAGE_CACHE_SIZE)
+		if (count == PAGE_CACHE_SIZE)
 			SetPageUptodate(page);
 	}
 	fuse_invalidate_attr(inode);
-	return err;
+	return err ? err : nres;
+}
+
+static int fuse_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+	int res = 0;
+
+	if (copied)
+		res = fuse_buffered_write(file, inode, pos, copied, page);
+
+	unlock_page(page);
+	page_cache_release(page);
+	return res;
 }
 
 static void fuse_release_user_pages(struct fuse_req *req, int write)
@@ -819,8 +837,8 @@ static const struct file_operations fuse_direct_io_file_operations = {
 
 static const struct address_space_operations fuse_file_aops  = {
 	.readpage	= fuse_readpage,
-	.prepare_write	= fuse_prepare_write,
-	.commit_write	= fuse_commit_write,
+	.write_begin	= fuse_write_begin,
+	.write_end	= fuse_write_end,
 	.readpages	= fuse_readpages,
 	.set_page_dirty	= fuse_set_page_dirty,
 	.bmap		= fuse_bmap,
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 873a511ef2b..9679f8b9870 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -17,6 +17,7 @@
 #include <linux/mpage.h>
 #include <linux/fs.h>
 #include <linux/writeback.h>
+#include <linux/swap.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
 
@@ -349,45 +350,49 @@ out_unlock:
 }
 
 /**
- * gfs2_prepare_write - Prepare to write a page to a file
+ * gfs2_write_begin - Begin to write to a file
  * @file: The file to write to
- * @page: The page which is to be prepared for writing
- * @from: From (byte range within page)
- * @to: To (byte range within page)
+ * @mapping: The mapping in which to write
+ * @pos: The file offset at which to start writing
+ * @len: Length of the write
+ * @flags: Various flags
+ * @pagep: Pointer to return the page
+ * @fsdata: Pointer to return fs data (unused by GFS2)
  *
  * Returns: errno
  */
 
-static int gfs2_prepare_write(struct file *file, struct page *page,
-			      unsigned from, unsigned to)
+static int gfs2_write_begin(struct file *file, struct address_space *mapping,
+			    loff_t pos, unsigned len, unsigned flags,
+			    struct page **pagep, void **fsdata)
 {
-	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+	struct gfs2_inode *ip = GFS2_I(mapping->host);
+	struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
 	unsigned int data_blocks, ind_blocks, rblocks;
 	int alloc_required;
 	int error = 0;
-	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
-	loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	struct gfs2_alloc *al;
-	unsigned int write_len = to - from;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned to = from + len;
+	struct page *page;
 
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
 	error = gfs2_glock_nq_atime(&ip->i_gh);
-	if (unlikely(error)) {
-		if (error == GLR_TRYFAILED) {
-			unlock_page(page);
-			error = AOP_TRUNCATED_PAGE;
-			yield();
-		}
+	if (unlikely(error))
 		goto out_uninit;
-	}
 
-	gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks);
+	error = -ENOMEM;
+	page = __grab_cache_page(mapping, index);
+	*pagep = page;
+	if (!page)
+		goto out_unlock;
+
+	gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
 
-	error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required);
+	error = gfs2_write_alloc_required(ip, pos, len, &alloc_required);
 	if (error)
-		goto out_unlock;
+		goto out_putpage;
 
 
 	ip->i_alloc.al_requested = 0;
@@ -420,7 +425,7 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
 		goto out_trans_fail;
 
 	if (gfs2_is_stuffed(ip)) {
-		if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
+		if (pos + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
 			error = gfs2_unstuff_dinode(ip, page);
 			if (error == 0)
 				goto prepare_write;
@@ -443,6 +448,10 @@ out_qunlock:
 out_alloc_put:
 			gfs2_alloc_put(ip);
 		}
+out_putpage:
+		page_cache_release(page);
+		if (pos + len > ip->i_inode.i_size)
+			vmtruncate(&ip->i_inode, ip->i_inode.i_size);
 out_unlock:
 		gfs2_glock_dq_m(1, &ip->i_gh);
 out_uninit:
@@ -478,65 +487,117 @@ static void adjust_fs_space(struct inode *inode)
 }
 
 /**
- * gfs2_commit_write - Commit write to a file
+ * gfs2_stuffed_write_end - Write end for stuffed files
+ * @inode: The inode
+ * @dibh: The buffer_head containing the on-disk inode
+ * @pos: The file position
+ * @len: The length of the write
+ * @copied: How much was actually copied by the VFS
+ * @page: The page
+ *
+ * This copies the data from the page into the inode block after
+ * the inode data structure itself.
+ *
+ * Returns: errno
+ */
+static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
+				  loff_t pos, unsigned len, unsigned copied,
+				  struct page *page)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
+	u64 to = pos + copied;
+	void *kaddr;
+	unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
+	struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
+
+	BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
+	kaddr = kmap_atomic(page, KM_USER0);
+	memcpy(buf + pos, kaddr + pos, copied);
+	memset(kaddr + pos + copied, 0, len - copied);
+	flush_dcache_page(page);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	if (!PageUptodate(page))
+		SetPageUptodate(page);
+	unlock_page(page);
+	page_cache_release(page);
+
+	if (inode->i_size < to) {
+		i_size_write(inode, to);
+		ip->i_di.di_size = inode->i_size;
+		di->di_size = cpu_to_be64(inode->i_size);
+		mark_inode_dirty(inode);
+	}
+
+	if (inode == sdp->sd_rindex)
+		adjust_fs_space(inode);
+
+	brelse(dibh);
+	gfs2_trans_end(sdp);
+	gfs2_glock_dq(&ip->i_gh);
+	gfs2_holder_uninit(&ip->i_gh);
+	return copied;
+}
+
+/**
+ * gfs2_write_end
  * @file: The file to write to
- * @page: The page containing the data
- * @from: From (byte range within page)
- * @to: To (byte range within page)
+ * @mapping: The address space to write to
+ * @pos: The file position
+ * @len: The length of the data
+ * @copied:
+ * @page: The page that has been written
+ * @fsdata: The fsdata (unused in GFS2)
+ *
+ * The main write_end function for GFS2. We have a separate one for
+ * stuffed files as they are slightly different, otherwise we just
+ * put our locking around the VFS provided functions.
  *
  * Returns: errno
  */
 
-static int gfs2_commit_write(struct file *file, struct page *page,
-			     unsigned from, unsigned to)
+static int gfs2_write_end(struct file *file, struct address_space *mapping,
+			  loff_t pos, unsigned len, unsigned copied,
+			  struct page *page, void *fsdata)
 {
 	struct inode *inode = page->mapping->host;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	int error = -EOPNOTSUPP;
 	struct buffer_head *dibh;
 	struct gfs2_alloc *al = &ip->i_alloc;
 	struct gfs2_dinode *di;
+	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned int to = from + len;
+	int ret;
 
-	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
-                goto fail_nounlock;
+	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == 0);
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (error)
-		goto fail_endtrans;
+	ret = gfs2_meta_inode_buffer(ip, &dibh);
+	if (unlikely(ret)) {
+		unlock_page(page);
+		page_cache_release(page);
+		goto failed;
+	}
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-	di = (struct gfs2_dinode *)dibh->b_data;
-
-	if (gfs2_is_stuffed(ip)) {
-		u64 file_size;
-		void *kaddr;
 
-		file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to;
+	if (gfs2_is_stuffed(ip))
+		return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
 
-		kaddr = kmap_atomic(page, KM_USER0);
-		memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
-		       kaddr + from, to - from);
-		kunmap_atomic(kaddr, KM_USER0);
+	if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+		gfs2_page_add_databufs(ip, page, from, to);
 
-		SetPageUptodate(page);
+	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
 
-		if (inode->i_size < file_size) {
-			i_size_write(inode, file_size);
+	if (likely(ret >= 0)) {
+		copied = ret;
+		if  ((pos + copied) > inode->i_size) {
+			di = (struct gfs2_dinode *)dibh->b_data;
+			ip->i_di.di_size = inode->i_size;
+			di->di_size = cpu_to_be64(inode->i_size);
 			mark_inode_dirty(inode);
 		}
-	} else {
-		if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
-		    gfs2_is_jdata(ip))
-			gfs2_page_add_databufs(ip, page, from, to);
-		error = generic_commit_write(file, page, from, to);
-		if (error)
-			goto fail;
-	}
-
-	if (ip->i_di.di_size < inode->i_size) {
-		ip->i_di.di_size = inode->i_size;
-		di->di_size = cpu_to_be64(inode->i_size);
 	}
 
 	if (inode == sdp->sd_rindex)
@@ -544,33 +605,15 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 
 	brelse(dibh);
 	gfs2_trans_end(sdp);
+failed:
 	if (al->al_requested) {
 		gfs2_inplace_release(ip);
 		gfs2_quota_unlock(ip);
 		gfs2_alloc_put(ip);
 	}
-	unlock_page(page);
-	gfs2_glock_dq_m(1, &ip->i_gh);
-	lock_page(page);
+	gfs2_glock_dq(&ip->i_gh);
 	gfs2_holder_uninit(&ip->i_gh);
-	return 0;
-
-fail:
-	brelse(dibh);
-fail_endtrans:
-	gfs2_trans_end(sdp);
-	if (al->al_requested) {
-		gfs2_inplace_release(ip);
-		gfs2_quota_unlock(ip);
-		gfs2_alloc_put(ip);
-	}
-	unlock_page(page);
-	gfs2_glock_dq_m(1, &ip->i_gh);
-	lock_page(page);
-	gfs2_holder_uninit(&ip->i_gh);
-fail_nounlock:
-	ClearPageUptodate(page);
-	return error;
+	return ret;
 }
 
 /**
@@ -799,8 +842,8 @@ const struct address_space_operations gfs2_file_aops = {
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
 	.sync_page = block_sync_page,
-	.prepare_write = gfs2_prepare_write,
-	.commit_write = gfs2_commit_write,
+	.write_begin = gfs2_write_begin,
+	.write_end = gfs2_write_end,
 	.set_page_dirty = gfs2_set_page_dirty,
 	.bmap = gfs2_bmap,
 	.invalidatepage = gfs2_invalidatepage,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index 5ea6b3d45ea..c176f67ba0a 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -464,23 +464,20 @@ void hfs_file_truncate(struct inode *inode)
 	       (long long)HFS_I(inode)->phys_size, inode->i_size);
 	if (inode->i_size > HFS_I(inode)->phys_size) {
 		struct address_space *mapping = inode->i_mapping;
+		void *fsdata;
 		struct page *page;
 		int res;
 
+		/* XXX: Can use generic_cont_expand? */
 		size = inode->i_size - 1;
-		page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT);
-		if (!page)
-			return;
-		size &= PAGE_CACHE_SIZE - 1;
-		size++;
-		res = mapping->a_ops->prepare_write(NULL, page, size, size);
-		if (!res)
-			res = mapping->a_ops->commit_write(NULL, page, size, size);
+		res = pagecache_write_begin(NULL, mapping, size+1, 0,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+		if (!res) {
+			res = pagecache_write_end(NULL, mapping, size+1, 0, 0,
+					page, fsdata);
+		}
 		if (res)
 			inode->i_size = HFS_I(inode)->phys_size;
-		unlock_page(page);
-		page_cache_release(page);
-		mark_inode_dirty(inode);
 		return;
 	} else if (inode->i_size == HFS_I(inode)->phys_size)
 		return;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index bc835f272a6..97f8446c4ff 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -35,10 +35,14 @@ static int hfs_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page, hfs_get_block);
 }
 
-static int hfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+static int hfs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return cont_prepare_write(page, from, to, hfs_get_block,
-				  &HFS_I(page->mapping->host)->phys_size);
+	*pagep = NULL;
+	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				hfs_get_block,
+				&HFS_I(mapping->host)->phys_size);
 }
 
 static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
@@ -119,8 +123,8 @@ const struct address_space_operations hfs_btree_aops = {
 	.readpage	= hfs_readpage,
 	.writepage	= hfs_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= hfs_prepare_write,
-	.commit_write	= generic_commit_write,
+	.write_begin	= hfs_write_begin,
+	.write_end	= generic_write_end,
 	.bmap		= hfs_bmap,
 	.releasepage	= hfs_releasepage,
 };
@@ -129,8 +133,8 @@ const struct address_space_operations hfs_aops = {
 	.readpage	= hfs_readpage,
 	.writepage	= hfs_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= hfs_prepare_write,
-	.commit_write	= generic_commit_write,
+	.write_begin	= hfs_write_begin,
+	.write_end	= generic_write_end,
 	.bmap		= hfs_bmap,
 	.direct_IO	= hfs_direct_IO,
 	.writepages	= hfs_writepages,
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 1a7480089e8..12e899cd788 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -443,21 +443,18 @@ void hfsplus_file_truncate(struct inode *inode)
 	if (inode->i_size > HFSPLUS_I(inode).phys_size) {
 		struct address_space *mapping = inode->i_mapping;
 		struct page *page;
-		u32 size = inode->i_size - 1;
+		void *fsdata;
+		u32 size = inode->i_size;
 		int res;
 
-		page = grab_cache_page(mapping, size >> PAGE_CACHE_SHIFT);
-		if (!page)
-			return;
-		size &= PAGE_CACHE_SIZE - 1;
-		size++;
-		res = mapping->a_ops->prepare_write(NULL, page, size, size);
-		if (!res)
-			res = mapping->a_ops->commit_write(NULL, page, size, size);
+		res = pagecache_write_begin(NULL, mapping, size, 0,
+						AOP_FLAG_UNINTERRUPTIBLE,
+						&page, &fsdata);
 		if (res)
-			inode->i_size = HFSPLUS_I(inode).phys_size;
-		unlock_page(page);
-		page_cache_release(page);
+			return;
+		res = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
+		if (res < 0)
+			return;
 		mark_inode_dirty(inode);
 		return;
 	} else if (inode->i_size == HFSPLUS_I(inode).phys_size)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 6f7c662174d..37744cf3706 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -27,10 +27,14 @@ static int hfsplus_writepage(struct page *page, struct writeback_control *wbc)
 	return block_write_full_page(page, hfsplus_get_block, wbc);
 }
 
-static int hfsplus_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+static int hfsplus_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return cont_prepare_write(page, from, to, hfsplus_get_block,
-		&HFSPLUS_I(page->mapping->host).phys_size);
+	*pagep = NULL;
+	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				hfsplus_get_block,
+				&HFSPLUS_I(mapping->host).phys_size);
 }
 
 static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
@@ -114,8 +118,8 @@ const struct address_space_operations hfsplus_btree_aops = {
 	.readpage	= hfsplus_readpage,
 	.writepage	= hfsplus_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= hfsplus_prepare_write,
-	.commit_write	= generic_commit_write,
+	.write_begin	= hfsplus_write_begin,
+	.write_end	= generic_write_end,
 	.bmap		= hfsplus_bmap,
 	.releasepage	= hfsplus_releasepage,
 };
@@ -124,8 +128,8 @@ const struct address_space_operations hfsplus_aops = {
 	.readpage	= hfsplus_readpage,
 	.writepage	= hfsplus_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= hfsplus_prepare_write,
-	.commit_write	= generic_commit_write,
+	.write_begin	= hfsplus_write_begin,
+	.write_end	= generic_write_end,
 	.bmap		= hfsplus_bmap,
 	.direct_IO	= hfsplus_direct_IO,
 	.writepages	= hfsplus_writepages,
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 06e5930515f..6ae9011b95e 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -3,7 +3,8 @@
 
 #include "os.h"
 
-/* These are exactly the same definitions as in fs.h, but the names are
+/*
+ * These are exactly the same definitions as in fs.h, but the names are
  * changed so that this file can be included in both kernel and user files.
  */
 
@@ -21,7 +22,8 @@
 #define HOSTFS_ATTR_FORCE	512	/* Not a change, but a change it */
 #define HOSTFS_ATTR_ATTR_FLAG	1024
 
-/* If you are very careful, you'll notice that these two are missing:
+/*
+ * If you are very careful, you'll notice that these two are missing:
  *
  * #define ATTR_KILL_SUID	2048
  * #define ATTR_KILL_SGID	4096
@@ -76,7 +78,8 @@ extern int make_symlink(const char *from, const char *to);
 extern int unlink_file(const char *file);
 extern int do_mkdir(const char *file, int mode);
 extern int do_rmdir(const char *file);
-extern int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor);
+extern int do_mknod(const char *file, int mode, unsigned int major,
+		    unsigned int minor);
 extern int link_file(const char *from, const char *to);
 extern int do_readlink(char *file, char *buf, int size);
 extern int rename_file(char *from, char *to);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index c77862032e8..8966b050196 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -6,21 +6,14 @@
  * 2003-02-10 Petr Baudis <pasky@ucw.cz>
  */
 
-#include <linux/stddef.h>
 #include <linux/fs.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/pagemap.h>
-#include <linux/blkdev.h>
-#include <linux/list.h>
 #include <linux/statfs.h>
-#include <linux/kdev_t.h>
-#include <asm/uaccess.h>
 #include "hostfs.h"
-#include "kern_util.h"
-#include "kern.h"
 #include "init.h"
+#include "kern.h"
 
 struct hostfs_inode_info {
 	char *host_filename;
@@ -61,18 +54,18 @@ static int __init hostfs_args(char *options, int *add)
 	char *ptr;
 
 	ptr = strchr(options, ',');
-	if(ptr != NULL)
+	if (ptr != NULL)
 		*ptr++ = '\0';
-	if(*options != '\0')
+	if (*options != '\0')
 		root_ino = options;
 
 	options = ptr;
-	while(options){
+	while (options) {
 		ptr = strchr(options, ',');
-		if(ptr != NULL)
+		if (ptr != NULL)
 			*ptr++ = '\0';
-		if(*options != '\0'){
-			if(!strcmp(options, "append"))
+		if (*options != '\0') {
+			if (!strcmp(options, "append"))
 				append = 1;
 			else printf("hostfs_args - unsupported option - %s\n",
 				    options);
@@ -102,7 +95,7 @@ static char *dentry_name(struct dentry *dentry, int extra)
 
 	len = 0;
 	parent = dentry;
-	while(parent->d_parent != parent){
+	while (parent->d_parent != parent) {
 		len += parent->d_name.len + 1;
 		parent = parent->d_parent;
 	}
@@ -110,12 +103,12 @@ static char *dentry_name(struct dentry *dentry, int extra)
 	root = HOSTFS_I(parent->d_inode)->host_filename;
 	len += strlen(root);
 	name = kmalloc(len + extra + 1, GFP_KERNEL);
-	if(name == NULL)
+	if (name == NULL)
 		return NULL;
 
 	name[len] = '\0';
 	parent = dentry;
-	while(parent->d_parent != parent){
+	while (parent->d_parent != parent) {
 		len -= parent->d_name.len + 1;
 		name[len] = '/';
 		strncpy(&name[len + 1], parent->d_name.name,
@@ -136,7 +129,8 @@ static char *inode_name(struct inode *ino, int extra)
 
 static int read_name(struct inode *ino, char *name)
 {
-	/* The non-int inode fields are copied into ints by stat_file and
+	/*
+	 * The non-int inode fields are copied into ints by stat_file and
 	 * then copied into the inode because passing the actual pointers
 	 * in and having them treated as int * breaks on big-endian machines
 	 */
@@ -149,7 +143,7 @@ static int read_name(struct inode *ino, char *name)
 	err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
 			&ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
 			&ino->i_ctime, &i_blksize, &i_blocks, -1);
-	if(err)
+	if (err)
 		return err;
 
 	ino->i_ino = i_ino;
@@ -166,33 +160,33 @@ static char *follow_link(char *link)
 	char *name, *resolved, *end;
 
 	len = 64;
-	while(1){
+	while (1) {
 		n = -ENOMEM;
 		name = kmalloc(len, GFP_KERNEL);
-		if(name == NULL)
+		if (name == NULL)
 			goto out;
 
 		n = do_readlink(link, name, len);
-		if(n < len)
+		if (n < len)
 			break;
 		len *= 2;
 		kfree(name);
 	}
-	if(n < 0)
+	if (n < 0)
 		goto out_free;
 
-	if(*name == '/')
+	if (*name == '/')
 		return name;
 
 	end = strrchr(link, '/');
-	if(end == NULL)
+	if (end == NULL)
 		return name;
 
 	*(end + 1) = '\0';
 	len = strlen(link) + strlen(name) + 1;
 
 	resolved = kmalloc(len, GFP_KERNEL);
-	if(resolved == NULL){
+	if (resolved == NULL) {
 		n = -ENOMEM;
 		goto out_free;
 	}
@@ -213,20 +207,21 @@ static int read_inode(struct inode *ino)
 	char *name;
 	int err = 0;
 
-	/* Unfortunately, we are called from iget() when we don't have a dentry
+	/*
+	 * Unfortunately, we are called from iget() when we don't have a dentry
 	 * allocated yet.
 	 */
-	if(list_empty(&ino->i_dentry))
+	if (list_empty(&ino->i_dentry))
 		goto out;
 
 	err = -ENOMEM;
 	name = inode_name(ino, 0);
-	if(name == NULL)
+	if (name == NULL)
 		goto out;
 
-	if(file_type(name, NULL, NULL) == OS_TYPE_SYMLINK){
+	if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) {
 		name = follow_link(name);
-		if(IS_ERR(name)){
+		if (IS_ERR(name)) {
 			err = PTR_ERR(name);
 			goto out;
 		}
@@ -240,7 +235,8 @@ static int read_inode(struct inode *ino)
 
 int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
 {
-	/* do_statfs uses struct statfs64 internally, but the linux kernel
+	/*
+	 * do_statfs uses struct statfs64 internally, but the linux kernel
 	 * struct statfs still has 32-bit versions for most of these fields,
 	 * so we convert them here
 	 */
@@ -255,7 +251,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
 			&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
 			&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
 			&sf->f_namelen, sf->f_spare);
-	if(err)
+	if (err)
 		return err;
 	sf->f_blocks = f_blocks;
 	sf->f_bfree = f_bfree;
@@ -271,7 +267,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 	struct hostfs_inode_info *hi;
 
 	hi = kmalloc(sizeof(*hi), GFP_KERNEL);
-	if(hi == NULL)
+	if (hi == NULL)
 		return NULL;
 
 	*hi = ((struct hostfs_inode_info) { .host_filename	= NULL,
@@ -284,7 +280,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 static void hostfs_delete_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	if(HOSTFS_I(inode)->fd != -1) {
+	if (HOSTFS_I(inode)->fd != -1) {
 		close_file(&HOSTFS_I(inode)->fd);
 		HOSTFS_I(inode)->fd = -1;
 	}
@@ -295,9 +291,11 @@ static void hostfs_destroy_inode(struct inode *inode)
 {
 	kfree(HOSTFS_I(inode)->host_filename);
 
-	/*XXX: This should not happen, probably. The check is here for
-	 * additional safety.*/
-	if(HOSTFS_I(inode)->fd != -1) {
+	/*
+	 * XXX: This should not happen, probably. The check is here for
+	 * additional safety.
+	 */
+	if (HOSTFS_I(inode)->fd != -1) {
 		close_file(&HOSTFS_I(inode)->fd);
 		printk(KERN_DEBUG "Closing host fd in .destroy_inode\n");
 	}
@@ -327,17 +325,17 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
 	int error, len;
 
 	name = dentry_name(file->f_path.dentry, 0);
-	if(name == NULL)
+	if (name == NULL)
 		return -ENOMEM;
 	dir = open_dir(name, &error);
 	kfree(name);
-	if(dir == NULL)
+	if (dir == NULL)
 		return -error;
 	next = file->f_pos;
-	while((name = read_dir(dir, &next, &ino, &len)) != NULL){
+	while ((name = read_dir(dir, &next, &ino, &len)) != NULL) {
 		error = (*filldir)(ent, name, len, file->f_pos,
 				   ino, DT_UNKNOWN);
-		if(error) break;
+		if (error) break;
 		file->f_pos = next;
 	}
 	close_dir(dir);
@@ -350,32 +348,33 @@ int hostfs_file_open(struct inode *ino, struct file *file)
 	int mode = 0, r = 0, w = 0, fd;
 
 	mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
-	if((mode & HOSTFS_I(ino)->mode) == mode)
+	if ((mode & HOSTFS_I(ino)->mode) == mode)
 		return 0;
 
-	/* The file may already have been opened, but with the wrong access,
+	/*
+	 * The file may already have been opened, but with the wrong access,
 	 * so this resets things and reopens the file with the new access.
 	 */
-	if(HOSTFS_I(ino)->fd != -1){
+	if (HOSTFS_I(ino)->fd != -1) {
 		close_file(&HOSTFS_I(ino)->fd);
 		HOSTFS_I(ino)->fd = -1;
 	}
 
 	HOSTFS_I(ino)->mode |= mode;
-	if(HOSTFS_I(ino)->mode & FMODE_READ)
+	if (HOSTFS_I(ino)->mode & FMODE_READ)
 		r = 1;
-	if(HOSTFS_I(ino)->mode & FMODE_WRITE)
+	if (HOSTFS_I(ino)->mode & FMODE_WRITE)
 		w = 1;
-	if(w)
+	if (w)
 		r = 1;
 
 	name = dentry_name(file->f_path.dentry, 0);
-	if(name == NULL)
+	if (name == NULL)
 		return -ENOMEM;
 
 	fd = open_file(name, r, w, append);
 	kfree(name);
-	if(fd < 0)
+	if (fd < 0)
 		return fd;
 	FILE_HOSTFS_I(file)->fd = fd;
 
@@ -423,7 +422,7 @@ int hostfs_writepage(struct page *page, struct writeback_control *wbc)
 	base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
 
 	err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
-	if(err != count){
+	if (err != count) {
 		ClearPageUptodate(page);
 		goto out;
 	}
@@ -452,7 +451,8 @@ int hostfs_readpage(struct file *file, struct page *page)
 	buffer = kmap(page);
 	err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
 			PAGE_CACHE_SIZE);
-	if(err < 0) goto out;
+	if (err < 0)
+		goto out;
 
 	memset(&buffer[err], 0, PAGE_CACHE_SIZE - err);
 
@@ -466,56 +466,43 @@ int hostfs_readpage(struct file *file, struct page *page)
 	return err;
 }
 
-int hostfs_prepare_write(struct file *file, struct page *page,
-			 unsigned int from, unsigned int to)
+int hostfs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	char *buffer;
-	long long start, tmp;
-	int err;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 
-	start = (long long) page->index << PAGE_CACHE_SHIFT;
-	buffer = kmap(page);
-	if(from != 0){
-		tmp = start;
-		err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer,
-				from);
-		if(err < 0) goto out;
-	}
-	if(to != PAGE_CACHE_SIZE){
-		start += to;
-		err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to,
-				PAGE_CACHE_SIZE - to);
-		if(err < 0) goto out;
-	}
-	err = 0;
- out:
-	kunmap(page);
-	return err;
+	*pagep = __grab_cache_page(mapping, index);
+	if (!*pagep)
+		return -ENOMEM;
+	return 0;
 }
 
-int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
-		 unsigned to)
+int hostfs_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
-	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
-	char *buffer;
-	long long start;
-	int err = 0;
+	void *buffer;
+	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	int err;
 
-	start = (((long long) page->index) << PAGE_CACHE_SHIFT) + from;
 	buffer = kmap(page);
-	err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from,
-			 to - from);
-	if(err > 0) err = 0;
+	err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied);
+	kunmap(page);
 
-	/* Actually, if !err, write_file has added to-from to start, so, despite
-	 * the appearance, we are comparing i_size against the _last_ written
-	 * location, as we should. */
+	if (!PageUptodate(page) && err == PAGE_CACHE_SIZE)
+		SetPageUptodate(page);
 
-	if(!err && (start > inode->i_size))
-		inode->i_size = start;
+	/*
+	 * If err > 0, write_file has added err to pos, so we are comparing
+	 * i_size against the last byte written.
+	 */
+	if (err > 0 && (pos > inode->i_size))
+		inode->i_size = pos;
+	unlock_page(page);
+	page_cache_release(page);
 
-	kunmap(page);
 	return err;
 }
 
@@ -523,8 +510,8 @@ static const struct address_space_operations hostfs_aops = {
 	.writepage 	= hostfs_writepage,
 	.readpage	= hostfs_readpage,
 	.set_page_dirty = __set_page_dirty_nobuffers,
-	.prepare_write	= hostfs_prepare_write,
-	.commit_write	= hostfs_commit_write
+	.write_begin	= hostfs_write_begin,
+	.write_end	= hostfs_write_end,
 };
 
 static int init_inode(struct inode *inode, struct dentry *dentry)
@@ -534,28 +521,28 @@ static int init_inode(struct inode *inode, struct dentry *dentry)
 	int maj, min;
 	dev_t rdev = 0;
 
-	if(dentry){
+	if (dentry) {
 		name = dentry_name(dentry, 0);
-		if(name == NULL)
+		if (name == NULL)
 			goto out;
 		type = file_type(name, &maj, &min);
-		/*Reencode maj and min with the kernel encoding.*/
+		/* Reencode maj and min with the kernel encoding.*/
 		rdev = MKDEV(maj, min);
 		kfree(name);
 	}
 	else type = OS_TYPE_DIR;
 
 	err = 0;
-	if(type == OS_TYPE_SYMLINK)
+	if (type == OS_TYPE_SYMLINK)
 		inode->i_op = &page_symlink_inode_operations;
-	else if(type == OS_TYPE_DIR)
+	else if (type == OS_TYPE_DIR)
 		inode->i_op = &hostfs_dir_iops;
 	else inode->i_op = &hostfs_iops;
 
-	if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
+	if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
 	else inode->i_fop = &hostfs_file_fops;
 
-	if(type == OS_TYPE_SYMLINK)
+	if (type == OS_TYPE_SYMLINK)
 		inode->i_mapping->a_ops = &hostfs_link_aops;
 	else inode->i_mapping->a_ops = &hostfs_aops;
 
@@ -578,7 +565,7 @@ static int init_inode(struct inode *inode, struct dentry *dentry)
 }
 
 int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
-                 struct nameidata *nd)
+		  struct nameidata *nd)
 {
 	struct inode *inode;
 	char *name;
@@ -586,27 +573,28 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
 
 	error = -ENOMEM;
 	inode = iget(dir->i_sb, 0);
-	if(inode == NULL) goto out;
+	if (inode == NULL)
+		goto out;
 
 	error = init_inode(inode, dentry);
-	if(error)
+	if (error)
 		goto out_put;
 
 	error = -ENOMEM;
 	name = dentry_name(dentry, 0);
-	if(name == NULL)
+	if (name == NULL)
 		goto out_put;
 
 	fd = file_create(name,
 			 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
 			 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
 			 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
-	if(fd < 0)
+	if (fd < 0)
 		error = fd;
 	else error = read_name(inode, name);
 
 	kfree(name);
-	if(error)
+	if (error)
 		goto out_put;
 
 	HOSTFS_I(inode)->fd = fd;
@@ -629,25 +617,25 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 
 	err = -ENOMEM;
 	inode = iget(ino->i_sb, 0);
-	if(inode == NULL)
+	if (inode == NULL)
 		goto out;
 
 	err = init_inode(inode, dentry);
-	if(err)
+	if (err)
 		goto out_put;
 
 	err = -ENOMEM;
 	name = dentry_name(dentry, 0);
-	if(name == NULL)
+	if (name == NULL)
 		goto out_put;
 
 	err = read_name(inode, name);
 	kfree(name);
-	if(err == -ENOENT){
+	if (err == -ENOENT) {
 		iput(inode);
 		inode = NULL;
 	}
-	else if(err)
+	else if (err)
 		goto out_put;
 
 	d_add(dentry, inode);
@@ -666,7 +654,7 @@ static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
 	int len;
 
 	file = inode_name(ino, dentry->d_name.len + 1);
-	if(file == NULL)
+	if (file == NULL)
 		return NULL;
 	strcat(file, "/");
 	len = strlen(file);
@@ -680,10 +668,10 @@ int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
 	char *from_name, *to_name;
 	int err;
 
-	if((from_name = inode_dentry_name(ino, from)) == NULL)
+	if ((from_name = inode_dentry_name(ino, from)) == NULL)
 		return -ENOMEM;
 	to_name = dentry_name(to, 0);
-	if(to_name == NULL){
+	if (to_name == NULL) {
 		kfree(from_name);
 		return -ENOMEM;
 	}
@@ -698,9 +686,9 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
 	char *file;
 	int err;
 
-	if((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = inode_dentry_name(ino, dentry)) == NULL)
 		return -ENOMEM;
-	if(append)
+	if (append)
 		return -EPERM;
 
 	err = unlink_file(file);
@@ -713,7 +701,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
 	char *file;
 	int err;
 
-	if((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = inode_dentry_name(ino, dentry)) == NULL)
 		return -ENOMEM;
 	err = make_symlink(file, to);
 	kfree(file);
@@ -725,7 +713,7 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
 	char *file;
 	int err;
 
-	if((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = inode_dentry_name(ino, dentry)) == NULL)
 		return -ENOMEM;
 	err = do_mkdir(file, mode);
 	kfree(file);
@@ -737,7 +725,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
 	char *file;
 	int err;
 
-	if((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = inode_dentry_name(ino, dentry)) == NULL)
 		return -ENOMEM;
 	err = do_rmdir(file);
 	kfree(file);
@@ -751,26 +739,26 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	int err = -ENOMEM;
 
 	inode = iget(dir->i_sb, 0);
-	if(inode == NULL)
+	if (inode == NULL)
 		goto out;
 
 	err = init_inode(inode, dentry);
-	if(err)
+	if (err)
 		goto out_put;
 
 	err = -ENOMEM;
 	name = dentry_name(dentry, 0);
-	if(name == NULL)
+	if (name == NULL)
 		goto out_put;
 
 	init_special_inode(inode, mode, dev);
 	err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
-	if(err)
+	if (err)
 		goto out_free;
 
 	err = read_name(inode, name);
 	kfree(name);
-	if(err)
+	if (err)
 		goto out_put;
 
 	d_instantiate(dentry, inode);
@@ -790,9 +778,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	char *from_name, *to_name;
 	int err;
 
-	if((from_name = inode_dentry_name(from_ino, from)) == NULL)
+	if ((from_name = inode_dentry_name(from_ino, from)) == NULL)
 		return -ENOMEM;
-	if((to_name = inode_dentry_name(to_ino, to)) == NULL){
+	if ((to_name = inode_dentry_name(to_ino, to)) == NULL) {
 		kfree(from_name);
 		return -ENOMEM;
 	}
@@ -815,12 +803,12 @@ int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
 		return -ENOMEM;
 
 	if (S_ISCHR(ino->i_mode) || S_ISBLK(ino->i_mode) ||
-			S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode))
+	    S_ISFIFO(ino->i_mode) || S_ISSOCK(ino->i_mode))
 		err = 0;
 	else
 		err = access_file(name, r, w, x);
 	kfree(name);
-	if(!err)
+	if (!err)
 		err = generic_permission(ino, desired, NULL);
 	return err;
 }
@@ -837,62 +825,55 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		return err;
 
-	if(append)
+	if (append)
 		attr->ia_valid &= ~ATTR_SIZE;
 
 	attrs.ia_valid = 0;
-	if(attr->ia_valid & ATTR_MODE){
+	if (attr->ia_valid & ATTR_MODE) {
 		attrs.ia_valid |= HOSTFS_ATTR_MODE;
 		attrs.ia_mode = attr->ia_mode;
 	}
-	if(attr->ia_valid & ATTR_UID){
+	if (attr->ia_valid & ATTR_UID) {
 		attrs.ia_valid |= HOSTFS_ATTR_UID;
 		attrs.ia_uid = attr->ia_uid;
 	}
-	if(attr->ia_valid & ATTR_GID){
+	if (attr->ia_valid & ATTR_GID) {
 		attrs.ia_valid |= HOSTFS_ATTR_GID;
 		attrs.ia_gid = attr->ia_gid;
 	}
-	if(attr->ia_valid & ATTR_SIZE){
+	if (attr->ia_valid & ATTR_SIZE) {
 		attrs.ia_valid |= HOSTFS_ATTR_SIZE;
 		attrs.ia_size = attr->ia_size;
 	}
-	if(attr->ia_valid & ATTR_ATIME){
+	if (attr->ia_valid & ATTR_ATIME) {
 		attrs.ia_valid |= HOSTFS_ATTR_ATIME;
 		attrs.ia_atime = attr->ia_atime;
 	}
-	if(attr->ia_valid & ATTR_MTIME){
+	if (attr->ia_valid & ATTR_MTIME) {
 		attrs.ia_valid |= HOSTFS_ATTR_MTIME;
 		attrs.ia_mtime = attr->ia_mtime;
 	}
-	if(attr->ia_valid & ATTR_CTIME){
+	if (attr->ia_valid & ATTR_CTIME) {
 		attrs.ia_valid |= HOSTFS_ATTR_CTIME;
 		attrs.ia_ctime = attr->ia_ctime;
 	}
-	if(attr->ia_valid & ATTR_ATIME_SET){
+	if (attr->ia_valid & ATTR_ATIME_SET) {
 		attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET;
 	}
-	if(attr->ia_valid & ATTR_MTIME_SET){
+	if (attr->ia_valid & ATTR_MTIME_SET) {
 		attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
 	}
 	name = dentry_name(dentry, 0);
-	if(name == NULL)
+	if (name == NULL)
 		return -ENOMEM;
 	err = set_attr(name, &attrs, fd);
 	kfree(name);
-	if(err)
+	if (err)
 		return err;
 
 	return inode_setattr(dentry->d_inode, attr);
 }
 
-int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-	   struct kstat *stat)
-{
-	generic_fillattr(dentry->d_inode, stat);
-	return 0;
-}
-
 static const struct inode_operations hostfs_iops = {
 	.create		= hostfs_create,
 	.link		= hostfs_link,
@@ -904,7 +885,6 @@ static const struct inode_operations hostfs_iops = {
 	.rename		= hostfs_rename,
 	.permission	= hostfs_permission,
 	.setattr	= hostfs_setattr,
-	.getattr	= hostfs_getattr,
 };
 
 static const struct inode_operations hostfs_dir_iops = {
@@ -919,7 +899,6 @@ static const struct inode_operations hostfs_dir_iops = {
 	.rename		= hostfs_rename,
 	.permission	= hostfs_permission,
 	.setattr	= hostfs_setattr,
-	.getattr	= hostfs_getattr,
 };
 
 int hostfs_link_readpage(struct file *file, struct page *page)
@@ -929,13 +908,13 @@ int hostfs_link_readpage(struct file *file, struct page *page)
 
 	buffer = kmap(page);
 	name = inode_name(page->mapping->host, 0);
-	if(name == NULL)
+	if (name == NULL)
 		return -ENOMEM;
 	err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
 	kfree(name);
-	if(err == PAGE_CACHE_SIZE)
+	if (err == PAGE_CACHE_SIZE)
 		err = -E2BIG;
-	else if(err > 0){
+	else if (err > 0) {
 		flush_dcache_page(page);
 		SetPageUptodate(page);
 		if (PageError(page)) ClearPageError(page);
@@ -968,31 +947,33 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	err = -ENOMEM;
 	host_root_path = kmalloc(strlen(root_ino) + 1
 				 + strlen(req_root) + 1, GFP_KERNEL);
-	if(host_root_path == NULL)
+	if (host_root_path == NULL)
 		goto out;
 
 	sprintf(host_root_path, "%s/%s", root_ino, req_root);
 
 	root_inode = iget(sb, 0);
-	if(root_inode == NULL)
+	if (root_inode == NULL)
 		goto out_free;
 
 	err = init_inode(root_inode, NULL);
-	if(err)
+	if (err)
 		goto out_put;
 
 	HOSTFS_I(root_inode)->host_filename = host_root_path;
-	/* Avoid that in the error path, iput(root_inode) frees again
-	 * host_root_path through hostfs_destroy_inode! */
+	/*
+	 * Avoid that in the error path, iput(root_inode) frees again
+	 * host_root_path through hostfs_destroy_inode!
+	 */
 	host_root_path = NULL;
 
 	err = -ENOMEM;
 	sb->s_root = d_alloc_root(root_inode);
-	if(sb->s_root == NULL)
+	if (sb->s_root == NULL)
 		goto out_put;
 
 	err = read_inode(root_inode);
-	if(err){
+	if (err) {
 		/* No iput in this case because the dput does that for us */
 		dput(sb->s_root);
 		sb->s_root = NULL;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 5625e2481dd..35c1a9f33f4 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -3,19 +3,21 @@
  * Licensed under the GPL
  */
 
-#include <unistd.h>
 #include <stdio.h>
-#include <fcntl.h>
+#include <stddef.h>
+#include <unistd.h>
 #include <dirent.h>
 #include <errno.h>
-#include <utime.h>
+#include <fcntl.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <sys/time.h>
+#include <sys/types.h>
 #include <sys/vfs.h>
 #include "hostfs.h"
-#include "kern_util.h"
+#include "os.h"
 #include "user.h"
+#include <utime.h>
 
 int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
 	      int *nlink_out, int *uid_out, int *gid_out,
@@ -25,33 +27,41 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
 {
 	struct stat64 buf;
 
-	if(fd >= 0) {
+	if (fd >= 0) {
 		if (fstat64(fd, &buf) < 0)
 			return -errno;
-	} else if(lstat64(path, &buf) < 0) {
+	} else if (lstat64(path, &buf) < 0) {
 		return -errno;
 	}
 
-	if(inode_out != NULL) *inode_out = buf.st_ino;
-	if(mode_out != NULL) *mode_out = buf.st_mode;
-	if(nlink_out != NULL) *nlink_out = buf.st_nlink;
-	if(uid_out != NULL) *uid_out = buf.st_uid;
-	if(gid_out != NULL) *gid_out = buf.st_gid;
-	if(size_out != NULL) *size_out = buf.st_size;
-	if(atime_out != NULL) {
+	if (inode_out != NULL)
+		*inode_out = buf.st_ino;
+	if (mode_out != NULL)
+		*mode_out = buf.st_mode;
+	if (nlink_out != NULL)
+		*nlink_out = buf.st_nlink;
+	if (uid_out != NULL)
+		*uid_out = buf.st_uid;
+	if (gid_out != NULL)
+		*gid_out = buf.st_gid;
+	if (size_out != NULL)
+		*size_out = buf.st_size;
+	if (atime_out != NULL) {
 		atime_out->tv_sec = buf.st_atime;
 		atime_out->tv_nsec = 0;
 	}
-	if(mtime_out != NULL) {
+	if (mtime_out != NULL) {
 		mtime_out->tv_sec = buf.st_mtime;
 		mtime_out->tv_nsec = 0;
 	}
-	if(ctime_out != NULL) {
+	if (ctime_out != NULL) {
 		ctime_out->tv_sec = buf.st_ctime;
 		ctime_out->tv_nsec = 0;
 	}
-	if(blksize_out != NULL) *blksize_out = buf.st_blksize;
-	if(blocks_out != NULL) *blocks_out = buf.st_blocks;
+	if (blksize_out != NULL)
+		*blksize_out = buf.st_blksize;
+	if (blocks_out != NULL)
+		*blocks_out = buf.st_blocks;
 	return 0;
 }
 
@@ -59,21 +69,29 @@ int file_type(const char *path, int *maj, int *min)
 {
  	struct stat64 buf;
 
-	if(lstat64(path, &buf) < 0)
+	if (lstat64(path, &buf) < 0)
 		return -errno;
-	/*We cannot pass rdev as is because glibc and the kernel disagree
-	 *about its definition.*/
-	if(maj != NULL)
+	/*
+	 * We cannot pass rdev as is because glibc and the kernel disagree
+	 * about its definition.
+	 */
+	if (maj != NULL)
 		*maj = major(buf.st_rdev);
-	if(min != NULL)
+	if (min != NULL)
 		*min = minor(buf.st_rdev);
 
-	if(S_ISDIR(buf.st_mode)) return OS_TYPE_DIR;
-	else if(S_ISLNK(buf.st_mode)) return OS_TYPE_SYMLINK;
-	else if(S_ISCHR(buf.st_mode)) return OS_TYPE_CHARDEV;
-	else if(S_ISBLK(buf.st_mode)) return OS_TYPE_BLOCKDEV;
-	else if(S_ISFIFO(buf.st_mode))return OS_TYPE_FIFO;
-	else if(S_ISSOCK(buf.st_mode))return OS_TYPE_SOCK;
+	if (S_ISDIR(buf.st_mode))
+		return OS_TYPE_DIR;
+	else if (S_ISLNK(buf.st_mode))
+		return OS_TYPE_SYMLINK;
+	else if (S_ISCHR(buf.st_mode))
+		return OS_TYPE_CHARDEV;
+	else if (S_ISBLK(buf.st_mode))
+		return OS_TYPE_BLOCKDEV;
+	else if (S_ISFIFO(buf.st_mode))
+		return OS_TYPE_FIFO;
+	else if (S_ISSOCK(buf.st_mode))
+		return OS_TYPE_SOCK;
 	else return OS_TYPE_FILE;
 }
 
@@ -81,10 +99,13 @@ int access_file(char *path, int r, int w, int x)
 {
 	int mode = 0;
 
-	if(r) mode = R_OK;
-	if(w) mode |= W_OK;
-	if(x) mode |= X_OK;
-	if(access(path, mode) != 0)
+	if (r)
+		mode = R_OK;
+	if (w)
+		mode |= W_OK;
+	if (x)
+		mode |= X_OK;
+	if (access(path, mode) != 0)
 		return -errno;
 	else return 0;
 }
@@ -93,18 +114,18 @@ int open_file(char *path, int r, int w, int append)
 {
 	int mode = 0, fd;
 
-	if(r && !w)
+	if (r && !w)
 		mode = O_RDONLY;
-	else if(!r && w)
+	else if (!r && w)
 		mode = O_WRONLY;
-	else if(r && w)
+	else if (r && w)
 		mode = O_RDWR;
 	else panic("Impossible mode in open_file");
 
-	if(append)
+	if (append)
 		mode |= O_APPEND;
 	fd = open64(path, mode);
-	if(fd < 0)
+	if (fd < 0)
 		return -errno;
 	else return fd;
 }
@@ -115,7 +136,7 @@ void *open_dir(char *path, int *err_out)
 
 	dir = opendir(path);
 	*err_out = errno;
-	if(dir == NULL)
+	if (dir == NULL)
 		return NULL;
 	return dir;
 }
@@ -128,7 +149,7 @@ char *read_dir(void *stream, unsigned long long *pos,
 
 	seekdir(dir, *pos);
 	ent = readdir(dir);
-	if(ent == NULL)
+	if (ent == NULL)
 		return NULL;
 	*len_out = strlen(ent->d_name);
 	*ino_out = ent->d_ino;
@@ -141,7 +162,7 @@ int read_file(int fd, unsigned long long *offset, char *buf, int len)
 	int n;
 
 	n = pread64(fd, buf, len, *offset);
-	if(n < 0)
+	if (n < 0)
 		return -errno;
 	*offset += n;
 	return n;
@@ -152,7 +173,7 @@ int write_file(int fd, unsigned long long *offset, const char *buf, int len)
 	int n;
 
 	n = pwrite64(fd, buf, len, *offset);
-	if(n < 0)
+	if (n < 0)
 		return -errno;
 	*offset += n;
 	return n;
@@ -163,7 +184,7 @@ int lseek_file(int fd, long long offset, int whence)
 	int ret;
 
 	ret = lseek64(fd, offset, whence);
-	if(ret < 0)
+	if (ret < 0)
 		return -errno;
 	return 0;
 }
@@ -207,7 +228,7 @@ int file_create(char *name, int ur, int uw, int ux, int gr,
 	mode |= ow ? S_IWOTH : 0;
 	mode |= ox ? S_IXOTH : 0;
 	fd = open64(name, O_CREAT | O_RDWR, mode);
-	if(fd < 0)
+	if (fd < 0)
 		return -errno;
 	return fd;
 }
@@ -230,7 +251,7 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 		if (fd >= 0) {
 			if (fchown(fd, attrs->ia_uid, -1))
 				return -errno;
-		} else if(chown(file, attrs->ia_uid, -1)) {
+		} else if (chown(file, attrs->ia_uid, -1)) {
 			return -errno;
 		}
 	}
@@ -251,9 +272,11 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 		}
 	}
 
-	/* Update accessed and/or modified time, in two parts: first set
+	/*
+	 * Update accessed and/or modified time, in two parts: first set
 	 * times according to the changes to perform, and then call futimes()
-	 * or utimes() to apply them. */
+	 * or utimes() to apply them.
+	 */
 	ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET);
 	if (attrs->ia_valid & ma) {
 		err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
@@ -283,12 +306,12 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 		}
 	}
 
-	if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ;
-	if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){
+	/* Note: ctime is not handled */
+	if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) {
 		err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
 				&attrs->ia_atime, &attrs->ia_mtime, NULL,
 				NULL, NULL, fd);
-		if(err != 0)
+		if (err != 0)
 			return err;
 	}
 	return 0;
@@ -299,7 +322,7 @@ int make_symlink(const char *from, const char *to)
 	int err;
 
 	err = symlink(to, from);
-	if(err)
+	if (err)
 		return -errno;
 	return 0;
 }
@@ -309,7 +332,7 @@ int unlink_file(const char *file)
 	int err;
 
 	err = unlink(file);
-	if(err)
+	if (err)
 		return -errno;
 	return 0;
 }
@@ -319,7 +342,7 @@ int do_mkdir(const char *file, int mode)
 	int err;
 
 	err = mkdir(file, mode);
-	if(err)
+	if (err)
 		return -errno;
 	return 0;
 }
@@ -329,7 +352,7 @@ int do_rmdir(const char *file)
 	int err;
 
 	err = rmdir(file);
-	if(err)
+	if (err)
 		return -errno;
 	return 0;
 }
@@ -339,7 +362,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
 	int err;
 
 	err = mknod(file, mode, makedev(major, minor));
-	if(err)
+	if (err)
 		return -errno;
 	return 0;
 }
@@ -349,7 +372,7 @@ int link_file(const char *to, const char *from)
 	int err;
 
 	err = link(to, from);
-	if(err)
+	if (err)
 		return -errno;
 	return 0;
 }
@@ -359,9 +382,9 @@ int do_readlink(char *file, char *buf, int size)
 	int n;
 
 	n = readlink(file, buf, size);
-	if(n < 0)
+	if (n < 0)
 		return -errno;
-	if(n < size)
+	if (n < size)
 		buf[n] = '\0';
 	return n;
 }
@@ -371,7 +394,7 @@ int rename_file(char *from, char *to)
 	int err;
 
 	err = rename(from, to);
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 	return 0;
 }
@@ -386,7 +409,7 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
 	int err;
 
 	err = statfs64(root, &buf);
-	if(err < 0)
+	if (err < 0)
 		return -errno;
 
 	*bsize_out = buf.f_bsize;
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 5b53e5c5d8d..be8be5040e0 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -86,25 +86,33 @@ static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page,hpfs_get_block, wbc);
 }
+
 static int hpfs_readpage(struct file *file, struct page *page)
 {
 	return block_read_full_page(page,hpfs_get_block);
 }
-static int hpfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+
+static int hpfs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return cont_prepare_write(page,from,to,hpfs_get_block,
-		&hpfs_i(page->mapping->host)->mmu_private);
+	*pagep = NULL;
+	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				hpfs_get_block,
+				&hpfs_i(mapping->host)->mmu_private);
 }
+
 static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,hpfs_get_block);
 }
+
 const struct address_space_operations hpfs_aops = {
 	.readpage = hpfs_readpage,
 	.writepage = hpfs_writepage,
 	.sync_page = block_sync_page,
-	.prepare_write = hpfs_prepare_write,
-	.commit_write = generic_commit_write,
+	.write_begin = hpfs_write_begin,
+	.write_end = generic_write_end,
 	.bmap = _hpfs_bmap
 };
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 950c2fbb815..04598e12c48 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -179,6 +179,130 @@ full_search:
 }
 #endif
 
+static int
+hugetlbfs_read_actor(struct page *page, unsigned long offset,
+			char __user *buf, unsigned long count,
+			unsigned long size)
+{
+	char *kaddr;
+	unsigned long left, copied = 0;
+	int i, chunksize;
+
+	if (size > count)
+		size = count;
+
+	/* Find which 4k chunk and offset with in that chunk */
+	i = offset >> PAGE_CACHE_SHIFT;
+	offset = offset & ~PAGE_CACHE_MASK;
+
+	while (size) {
+		chunksize = PAGE_CACHE_SIZE;
+		if (offset)
+			chunksize -= offset;
+		if (chunksize > size)
+			chunksize = size;
+		kaddr = kmap(&page[i]);
+		left = __copy_to_user(buf, kaddr + offset, chunksize);
+		kunmap(&page[i]);
+		if (left) {
+			copied += (chunksize - left);
+			break;
+		}
+		offset = 0;
+		size -= chunksize;
+		buf += chunksize;
+		copied += chunksize;
+		i++;
+	}
+	return copied ? copied : -EFAULT;
+}
+
+/*
+ * Support for read() - Find the page attached to f_mapping and copy out the
+ * data. Its *very* similar to do_generic_mapping_read(), we can't use that
+ * since it has PAGE_CACHE_SIZE assumptions.
+ */
+static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
+			      size_t len, loff_t *ppos)
+{
+	struct address_space *mapping = filp->f_mapping;
+	struct inode *inode = mapping->host;
+	unsigned long index = *ppos >> HPAGE_SHIFT;
+	unsigned long offset = *ppos & ~HPAGE_MASK;
+	unsigned long end_index;
+	loff_t isize;
+	ssize_t retval = 0;
+
+	mutex_lock(&inode->i_mutex);
+
+	/* validate length */
+	if (len == 0)
+		goto out;
+
+	isize = i_size_read(inode);
+	if (!isize)
+		goto out;
+
+	end_index = (isize - 1) >> HPAGE_SHIFT;
+	for (;;) {
+		struct page *page;
+		int nr, ret;
+
+		/* nr is the maximum number of bytes to copy from this page */
+		nr = HPAGE_SIZE;
+		if (index >= end_index) {
+			if (index > end_index)
+				goto out;
+			nr = ((isize - 1) & ~HPAGE_MASK) + 1;
+			if (nr <= offset) {
+				goto out;
+			}
+		}
+		nr = nr - offset;
+
+		/* Find the page */
+		page = find_get_page(mapping, index);
+		if (unlikely(page == NULL)) {
+			/*
+			 * We have a HOLE, zero out the user-buffer for the
+			 * length of the hole or request.
+			 */
+			ret = len < nr ? len : nr;
+			if (clear_user(buf, ret))
+				ret = -EFAULT;
+		} else {
+			/*
+			 * We have the page, copy it to user space buffer.
+			 */
+			ret = hugetlbfs_read_actor(page, offset, buf, len, nr);
+		}
+		if (ret < 0) {
+			if (retval == 0)
+				retval = ret;
+			if (page)
+				page_cache_release(page);
+			goto out;
+		}
+
+		offset += ret;
+		retval += ret;
+		len -= ret;
+		index += offset >> HPAGE_SHIFT;
+		offset &= ~HPAGE_MASK;
+
+		if (page)
+			page_cache_release(page);
+
+		/* short read or no more work */
+		if ((ret != nr) || (len == 0))
+			break;
+	}
+out:
+	*ppos = ((loff_t)index << HPAGE_SHIFT) + offset;
+	mutex_unlock(&inode->i_mutex);
+	return retval;
+}
+
 /*
  * Read a page. Again trivial. If it didn't already exist
  * in the page cache, it is zero-filled.
@@ -189,15 +313,19 @@ static int hugetlbfs_readpage(struct file *file, struct page * page)
 	return -EINVAL;
 }
 
-static int hugetlbfs_prepare_write(struct file *file,
-			struct page *page, unsigned offset, unsigned to)
+static int hugetlbfs_write_begin(struct file *file,
+			struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
 	return -EINVAL;
 }
 
-static int hugetlbfs_commit_write(struct file *file,
-			struct page *page, unsigned offset, unsigned to)
+static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
+	BUG();
 	return -EINVAL;
 }
 
@@ -318,21 +446,15 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
 	}
 }
 
-/*
- * Expanding truncates are not allowed.
- */
 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
 	pgoff_t pgoff;
 	struct address_space *mapping = inode->i_mapping;
 
-	if (offset > inode->i_size)
-		return -EINVAL;
-
 	BUG_ON(offset & ~HPAGE_MASK);
 	pgoff = offset >> PAGE_SHIFT;
 
-	inode->i_size = offset;
+	i_size_write(inode, offset);
 	spin_lock(&mapping->i_mmap_lock);
 	if (!prio_tree_empty(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
@@ -569,8 +691,8 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
 
 static const struct address_space_operations hugetlbfs_aops = {
 	.readpage	= hugetlbfs_readpage,
-	.prepare_write	= hugetlbfs_prepare_write,
-	.commit_write	= hugetlbfs_commit_write,
+	.write_begin	= hugetlbfs_write_begin,
+	.write_end	= hugetlbfs_write_end,
 	.set_page_dirty	= hugetlbfs_set_page_dirty,
 };
 
@@ -583,6 +705,7 @@ static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
 }
 
 const struct file_operations hugetlbfs_file_operations = {
+	.read			= hugetlbfs_read,
 	.mmap			= hugetlbfs_file_mmap,
 	.fsync			= simple_sync_file,
 	.get_unmapped_area	= hugetlb_get_unmapped_area,
diff --git a/fs/inode.c b/fs/inode.c
index f97de0aeb3b..21dab18b2f1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -568,16 +568,16 @@ EXPORT_SYMBOL(new_inode);
 void unlock_new_inode(struct inode *inode)
 {
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct file_system_type *type = inode->i_sb->s_type;
-	/*
-	 * ensure nobody is actually holding i_mutex
-	 */
-	mutex_destroy(&inode->i_mutex);
-	mutex_init(&inode->i_mutex);
-	if (inode->i_mode & S_IFDIR)
+	if (inode->i_mode & S_IFDIR) {
+		struct file_system_type *type = inode->i_sb->s_type;
+
+		/*
+		 * ensure nobody is actually holding i_mutex
+		 */
+		mutex_destroy(&inode->i_mutex);
+		mutex_init(&inode->i_mutex);
 		lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
-	else
-		lockdep_set_class(&inode->i_mutex, &type->i_mutex_key);
+	}
 #endif
 	/*
 	 * This is special!  We do not need the spinlock
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06ab3c10b1b..a6be78c05dc 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1710,7 +1710,7 @@ static int journal_init_journal_head_cache(void)
 	journal_head_cache = kmem_cache_create("journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
-				0,		/* flags */
+				SLAB_TEMPORARY,	/* flags */
 				NULL);		/* ctor */
 	retval = 0;
 	if (journal_head_cache == 0) {
@@ -2006,7 +2006,7 @@ static int __init journal_init_handle_cache(void)
 	jbd_handle_cache = kmem_cache_create("journal_handle",
 				sizeof(handle_t),
 				0,		/* offset */
-				0,		/* flags */
+				SLAB_TEMPORARY,	/* flags */
 				NULL);		/* ctor */
 	if (jbd_handle_cache == NULL) {
 		printk(KERN_EMERG "JBD: failed to create handle cache\n");
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 62e13c8db13..ad2eacf570c 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -170,13 +170,15 @@ int __init journal_init_revoke_caches(void)
 {
 	revoke_record_cache = kmem_cache_create("revoke_record",
 					   sizeof(struct jbd_revoke_record_s),
-					   0, SLAB_HWCACHE_ALIGN, NULL);
+					   0,
+					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
+					   NULL);
 	if (revoke_record_cache == 0)
 		return -ENOMEM;
 
 	revoke_table_cache = kmem_cache_create("revoke_table",
 					   sizeof(struct jbd_revoke_table_s),
-					   0, 0, NULL);
+					   0, SLAB_TEMPORARY, NULL);
 	if (revoke_table_cache == 0) {
 		kmem_cache_destroy(revoke_record_cache);
 		revoke_record_cache = NULL;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c2530197be0..023a17539dd 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -19,10 +19,12 @@
 #include <linux/jffs2.h>
 #include "nodelist.h"
 
-static int jffs2_commit_write (struct file *filp, struct page *pg,
-			       unsigned start, unsigned end);
-static int jffs2_prepare_write (struct file *filp, struct page *pg,
-				unsigned start, unsigned end);
+static int jffs2_write_end(struct file *filp, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *pg, void *fsdata);
+static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata);
 static int jffs2_readpage (struct file *filp, struct page *pg);
 
 int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
@@ -65,8 +67,8 @@ const struct inode_operations jffs2_file_inode_operations =
 const struct address_space_operations jffs2_file_address_operations =
 {
 	.readpage =	jffs2_readpage,
-	.prepare_write =jffs2_prepare_write,
-	.commit_write =	jffs2_commit_write
+	.write_begin =	jffs2_write_begin,
+	.write_end =	jffs2_write_end,
 };
 
 static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
@@ -119,15 +121,23 @@ static int jffs2_readpage (struct file *filp, struct page *pg)
 	return ret;
 }
 
-static int jffs2_prepare_write (struct file *filp, struct page *pg,
-				unsigned start, unsigned end)
+static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	struct inode *inode = pg->mapping->host;
+	struct page *pg;
+	struct inode *inode = mapping->host;
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-	uint32_t pageofs = pg->index << PAGE_CACHE_SHIFT;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	uint32_t pageofs = pos & (PAGE_CACHE_SIZE - 1);
 	int ret = 0;
 
-	D1(printk(KERN_DEBUG "jffs2_prepare_write()\n"));
+	pg = __grab_cache_page(mapping, index);
+	if (!pg)
+		return -ENOMEM;
+	*pagep = pg;
+
+	D1(printk(KERN_DEBUG "jffs2_write_begin()\n"));
 
 	if (pageofs > inode->i_size) {
 		/* Make new hole frag from old EOF to new page */
@@ -142,7 +152,7 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg,
 		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
 					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
 		if (ret)
-			return ret;
+			goto out_page;
 
 		down(&f->sem);
 		memset(&ri, 0, sizeof(ri));
@@ -172,7 +182,7 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg,
 			ret = PTR_ERR(fn);
 			jffs2_complete_reservation(c);
 			up(&f->sem);
-			return ret;
+			goto out_page;
 		}
 		ret = jffs2_add_full_dnode_to_inode(c, f, fn);
 		if (f->metadata) {
@@ -181,65 +191,79 @@ static int jffs2_prepare_write (struct file *filp, struct page *pg,
 			f->metadata = NULL;
 		}
 		if (ret) {
-			D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in prepare_write, returned %d\n", ret));
+			D1(printk(KERN_DEBUG "Eep. add_full_dnode_to_inode() failed in write_begin, returned %d\n", ret));
 			jffs2_mark_node_obsolete(c, fn->raw);
 			jffs2_free_full_dnode(fn);
 			jffs2_complete_reservation(c);
 			up(&f->sem);
-			return ret;
+			goto out_page;
 		}
 		jffs2_complete_reservation(c);
 		inode->i_size = pageofs;
 		up(&f->sem);
 	}
 
-	/* Read in the page if it wasn't already present, unless it's a whole page */
-	if (!PageUptodate(pg) && (start || end < PAGE_CACHE_SIZE)) {
+	/*
+	 * Read in the page if it wasn't already present. Cannot optimize away
+	 * the whole page write case until jffs2_write_end can handle the
+	 * case of a short-copy.
+	 */
+	if (!PageUptodate(pg)) {
 		down(&f->sem);
 		ret = jffs2_do_readpage_nolock(inode, pg);
 		up(&f->sem);
+		if (ret)
+			goto out_page;
 	}
-	D1(printk(KERN_DEBUG "end prepare_write(). pg->flags %lx\n", pg->flags));
+	D1(printk(KERN_DEBUG "end write_begin(). pg->flags %lx\n", pg->flags));
+	return ret;
+
+out_page:
+	unlock_page(pg);
+	page_cache_release(pg);
 	return ret;
 }
 
-static int jffs2_commit_write (struct file *filp, struct page *pg,
-			       unsigned start, unsigned end)
+static int jffs2_write_end(struct file *filp, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *pg, void *fsdata)
 {
 	/* Actually commit the write from the page cache page we're looking at.
 	 * For now, we write the full page out each time. It sucks, but it's simple
 	 */
-	struct inode *inode = pg->mapping->host;
+	struct inode *inode = mapping->host;
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 	struct jffs2_raw_inode *ri;
+	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned end = start + copied;
 	unsigned aligned_start = start & ~3;
 	int ret = 0;
 	uint32_t writtenlen = 0;
 
-	D1(printk(KERN_DEBUG "jffs2_commit_write(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
+	D1(printk(KERN_DEBUG "jffs2_write_end(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
 		  inode->i_ino, pg->index << PAGE_CACHE_SHIFT, start, end, pg->flags));
 
+	/* We need to avoid deadlock with page_cache_read() in
+	   jffs2_garbage_collect_pass(). So the page must be
+	   up to date to prevent page_cache_read() from trying
+	   to re-lock it. */
+	BUG_ON(!PageUptodate(pg));
+
 	if (end == PAGE_CACHE_SIZE) {
-		if (!start) {
-			/* We need to avoid deadlock with page_cache_read() in
-			   jffs2_garbage_collect_pass(). So we have to mark the
-			   page up to date, to prevent page_cache_read() from
-			   trying to re-lock it. */
-			SetPageUptodate(pg);
-		} else {
-			/* When writing out the end of a page, write out the 
-			   _whole_ page. This helps to reduce the number of
-			   nodes in files which have many short writes, like
-			   syslog files. */
-			start = aligned_start = 0;
-		}
+		/* When writing out the end of a page, write out the
+		   _whole_ page. This helps to reduce the number of
+		   nodes in files which have many short writes, like
+		   syslog files. */
+		start = aligned_start = 0;
 	}
 
 	ri = jffs2_alloc_raw_inode();
 
 	if (!ri) {
-		D1(printk(KERN_DEBUG "jffs2_commit_write(): Allocation of raw inode failed\n"));
+		D1(printk(KERN_DEBUG "jffs2_write_end(): Allocation of raw inode failed\n"));
+		unlock_page(pg);
+		page_cache_release(pg);
 		return -ENOMEM;
 	}
 
@@ -287,11 +311,14 @@ static int jffs2_commit_write (struct file *filp, struct page *pg,
 		/* generic_file_write has written more to the page cache than we've
 		   actually written to the medium. Mark the page !Uptodate so that
 		   it gets reread */
-		D1(printk(KERN_DEBUG "jffs2_commit_write(): Not all bytes written. Marking page !uptodate\n"));
+		D1(printk(KERN_DEBUG "jffs2_write_end(): Not all bytes written. Marking page !uptodate\n"));
 		SetPageError(pg);
 		ClearPageUptodate(pg);
 	}
 
-	D1(printk(KERN_DEBUG "jffs2_commit_write() returning %d\n",start+writtenlen==end?0:ret));
-	return start+writtenlen==end?0:ret;
+	D1(printk(KERN_DEBUG "jffs2_write_end() returning %d\n",
+					writtenlen > 0 ? writtenlen : ret));
+	unlock_page(pg);
+	page_cache_release(pg);
+	return writtenlen > 0 ? writtenlen : ret;
 }
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 3467dde27e5..4672013802e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -255,7 +255,7 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
 
 static int jfs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	return nobh_writepage(page, jfs_get_block, wbc);
+	return block_write_full_page(page, jfs_get_block, wbc);
 }
 
 static int jfs_writepages(struct address_space *mapping,
@@ -275,10 +275,12 @@ static int jfs_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, jfs_get_block);
 }
 
-static int jfs_prepare_write(struct file *file,
-			     struct page *page, unsigned from, unsigned to)
+static int jfs_write_begin(struct file *file, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata)
 {
-	return nobh_prepare_write(page, from, to, jfs_get_block);
+	return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				jfs_get_block);
 }
 
 static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
@@ -302,8 +304,8 @@ const struct address_space_operations jfs_aops = {
 	.writepage	= jfs_writepage,
 	.writepages	= jfs_writepages,
 	.sync_page	= block_sync_page,
-	.prepare_write	= jfs_prepare_write,
-	.commit_write	= nobh_commit_write,
+	.write_begin	= jfs_write_begin,
+	.write_end	= nobh_write_end,
 	.bmap		= jfs_bmap,
 	.direct_IO	= jfs_direct_IO,
 };
@@ -356,7 +358,7 @@ void jfs_truncate(struct inode *ip)
 {
 	jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
 
-	nobh_truncate_page(ip->i_mapping, ip->i_size);
+	nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
 
 	IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
 	jfs_truncate_nolock(ip, ip->i_size);
diff --git a/fs/libfs.c b/fs/libfs.c
index 5294de1f40c..f2b32d3a909 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -351,6 +351,26 @@ int simple_prepare_write(struct file *file, struct page *page,
 	return 0;
 }
 
+int simple_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
+{
+	struct page *page;
+	pgoff_t index;
+	unsigned from;
+
+	index = pos >> PAGE_CACHE_SHIFT;
+	from = pos & (PAGE_CACHE_SIZE - 1);
+
+	page = __grab_cache_page(mapping, index);
+	if (!page)
+		return -ENOMEM;
+
+	*pagep = page;
+
+	return simple_prepare_write(file, page, from, from+len);
+}
+
 int simple_commit_write(struct file *file, struct page *page,
 			unsigned from, unsigned to)
 {
@@ -369,6 +389,28 @@ int simple_commit_write(struct file *file, struct page *page,
 	return 0;
 }
 
+int simple_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+
+	/* zero the stale part of the page if we did a short copy */
+	if (copied < len) {
+		void *kaddr = kmap_atomic(page, KM_USER0);
+		memset(kaddr + from + copied, 0, len - copied);
+		flush_dcache_page(page);
+		kunmap_atomic(kaddr, KM_USER0);
+	}
+
+	simple_commit_write(file, page, from, from+copied);
+
+	unlock_page(page);
+	page_cache_release(page);
+
+	return copied;
+}
+
 /*
  * the inodes created here are not hashed. If you use iunique to generate
  * unique inode values later for this filesystem, then you must take care
@@ -642,6 +684,8 @@ EXPORT_SYMBOL(dcache_dir_open);
 EXPORT_SYMBOL(dcache_readdir);
 EXPORT_SYMBOL(generic_read_dir);
 EXPORT_SYMBOL(get_sb_pseudo);
+EXPORT_SYMBOL(simple_write_begin);
+EXPORT_SYMBOL(simple_write_end);
 EXPORT_SYMBOL(simple_commit_write);
 EXPORT_SYMBOL(simple_dir_inode_operations);
 EXPORT_SYMBOL(simple_dir_operations);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index e207cbe7095..f70433816a3 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -9,8 +9,10 @@
  */
 
 #include "minix.h"
+#include <linux/buffer_head.h>
 #include <linux/highmem.h>
 #include <linux/smp_lock.h>
+#include <linux/swap.h>
 
 typedef struct minix_dir_entry minix_dirent;
 typedef struct minix3_dir_entry minix3_dirent;
@@ -48,11 +50,17 @@ static inline unsigned long dir_pages(struct inode *inode)
 	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
 }
 
-static int dir_commit_chunk(struct page *page, unsigned from, unsigned to)
+static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	struct inode *dir = (struct inode *)page->mapping->host;
+	struct address_space *mapping = page->mapping;
+	struct inode *dir = mapping->host;
 	int err = 0;
-	page->mapping->a_ops->commit_write(NULL, page, from, to);
+	block_write_end(NULL, mapping, pos, len, len, page, NULL);
+
+	if (pos+len > dir->i_size) {
+		i_size_write(dir, pos+len);
+		mark_inode_dirty(dir);
+	}
 	if (IS_DIRSYNC(dir))
 		err = write_one_page(page, 1);
 	else
@@ -220,7 +228,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
 	char *kaddr, *p;
 	minix_dirent *de;
 	minix3_dirent *de3;
-	unsigned from, to;
+	loff_t pos;
 	int err;
 	char *namx = NULL;
 	__u32 inumber;
@@ -272,9 +280,9 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
 	return -EINVAL;
 
 got_it:
-	from = p - (char*)page_address(page);
-	to = from + sbi->s_dirsize;
-	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+	pos = (page->index >> PAGE_CACHE_SHIFT) + p - (char*)page_address(page);
+	err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize,
+					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err)
 		goto out_unlock;
 	memcpy (namx, name, namelen);
@@ -285,7 +293,7 @@ got_it:
 		memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2);
 		de->inode = inode->i_ino;
 	}
-	err = dir_commit_chunk(page, from, to);
+	err = dir_commit_chunk(page, pos, sbi->s_dirsize);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
 out_put:
@@ -302,15 +310,16 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = (struct inode*)mapping->host;
 	char *kaddr = page_address(page);
-	unsigned from = (char*)de - kaddr;
-	unsigned to = from + minix_sb(inode->i_sb)->s_dirsize;
+	loff_t pos = page_offset(page) + (char*)de - kaddr;
+	unsigned len = minix_sb(inode->i_sb)->s_dirsize;
 	int err;
 
 	lock_page(page);
-	err = mapping->a_ops->prepare_write(NULL, page, from, to);
+	err = __minix_write_begin(NULL, mapping, pos, len,
+					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err == 0) {
 		de->inode = 0;
-		err = dir_commit_chunk(page, from, to);
+		err = dir_commit_chunk(page, pos, len);
 	} else {
 		unlock_page(page);
 	}
@@ -330,7 +339,8 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
 
 	if (!page)
 		return -ENOMEM;
-	err = mapping->a_ops->prepare_write(NULL, page, 0, 2 * sbi->s_dirsize);
+	err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize,
+					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err) {
 		unlock_page(page);
 		goto fail;
@@ -421,17 +431,20 @@ not_empty:
 void minix_set_link(struct minix_dir_entry *de, struct page *page,
 	struct inode *inode)
 {
-	struct inode *dir = (struct inode*)page->mapping->host;
+	struct address_space *mapping = page->mapping;
+	struct inode *dir = mapping->host;
 	struct minix_sb_info *sbi = minix_sb(dir->i_sb);
-	unsigned from = (char *)de-(char*)page_address(page);
-	unsigned to = from + sbi->s_dirsize;
+	loff_t pos = page_offset(page) +
+			(char *)de-(char*)page_address(page);
 	int err;
 
 	lock_page(page);
-	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+
+	err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize,
+					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err == 0) {
 		de->inode = inode->i_ino;
-		err = dir_commit_chunk(page, from, to);
+		err = dir_commit_chunk(page, pos, sbi->s_dirsize);
 	} else {
 		unlock_page(page);
 	}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 43668d7d668..f4f3343b180 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -346,24 +346,39 @@ static int minix_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, minix_get_block, wbc);
 }
+
 static int minix_readpage(struct file *file, struct page *page)
 {
 	return block_read_full_page(page,minix_get_block);
 }
-static int minix_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+
+int __minix_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return block_prepare_write(page,from,to,minix_get_block);
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				minix_get_block);
 }
+
+static int minix_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
+{
+	*pagep = NULL;
+	return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+}
+
 static sector_t minix_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,minix_get_block);
 }
+
 static const struct address_space_operations minix_aops = {
 	.readpage = minix_readpage,
 	.writepage = minix_writepage,
 	.sync_page = block_sync_page,
-	.prepare_write = minix_prepare_write,
-	.commit_write = generic_commit_write,
+	.write_begin = minix_write_begin,
+	.write_end = generic_write_end,
 	.bmap = minix_bmap
 };
 
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 73ef84f8fb0..ac5d3a75cb0 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -54,6 +54,9 @@ extern int minix_new_block(struct inode * inode);
 extern void minix_free_block(struct inode *inode, unsigned long block);
 extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
 extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int __minix_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata);
 
 extern void V1_minix_truncate(struct inode *);
 extern void V2_minix_truncate(struct inode *);
diff --git a/fs/mpage.c b/fs/mpage.c
index b1c3e589050..d54f8f89722 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -379,31 +379,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
 	struct bio *bio = NULL;
 	unsigned page_idx;
 	sector_t last_block_in_bio = 0;
-	struct pagevec lru_pvec;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
 
 	clear_buffer_mapped(&map_bh);
-	pagevec_init(&lru_pvec, 0);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
 
 		prefetchw(&page->flags);
 		list_del(&page->lru);
-		if (!add_to_page_cache(page, mapping,
+		if (!add_to_page_cache_lru(page, mapping,
 					page->index, GFP_KERNEL)) {
 			bio = do_mpage_readpage(bio, page,
 					nr_pages - page_idx,
 					&last_block_in_bio, &map_bh,
 					&first_logical_block,
 					get_block);
-			if (!pagevec_add(&lru_pvec, page))
-				__pagevec_lru_add(&lru_pvec);
-		} else {
-			page_cache_release(page);
 		}
+		page_cache_release(page);
 	}
-	pagevec_lru_add(&lru_pvec);
 	BUG_ON(!list_empty(pages));
 	if (bio)
 		mpage_bio_submit(READ, bio);
diff --git a/fs/namei.c b/fs/namei.c
index a83160acd74..b40b8084eef 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2729,53 +2729,29 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
+	void *fsdata;
 	int err;
 	char *kaddr;
 
 retry:
-	err = -ENOMEM;
-	page = find_or_create_page(mapping, 0, gfp_mask);
-	if (!page)
-		goto fail;
-	err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
-	if (err == AOP_TRUNCATED_PAGE) {
-		page_cache_release(page);
-		goto retry;
-	}
+	err = pagecache_write_begin(NULL, mapping, 0, len-1,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
 	if (err)
-		goto fail_map;
+		goto fail;
+
 	kaddr = kmap_atomic(page, KM_USER0);
 	memcpy(kaddr, symname, len-1);
 	kunmap_atomic(kaddr, KM_USER0);
-	err = mapping->a_ops->commit_write(NULL, page, 0, len-1);
-	if (err == AOP_TRUNCATED_PAGE) {
-		page_cache_release(page);
-		goto retry;
-	}
-	if (err)
-		goto fail_map;
-	/*
-	 * Notice that we are _not_ going to block here - end of page is
-	 * unmapped, so this will only try to map the rest of page, see
-	 * that it is unmapped (typically even will not look into inode -
-	 * ->i_size will be enough for everything) and zero it out.
-	 * OTOH it's obviously correct and should make the page up-to-date.
-	 */
-	if (!PageUptodate(page)) {
-		err = mapping->a_ops->readpage(NULL, page);
-		if (err != AOP_TRUNCATED_PAGE)
-			wait_on_page_locked(page);
-	} else {
-		unlock_page(page);
-	}
-	page_cache_release(page);
+
+	err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
+							page, fsdata);
 	if (err < 0)
 		goto fail;
+	if (err < len-1)
+		goto retry;
+
 	mark_inode_dirty(inode);
 	return 0;
-fail_map:
-	unlock_page(page);
-	page_cache_release(page);
 fail:
 	return err;
 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 08c7c7387fc..d29f90d00aa 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -306,27 +306,50 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 }
 
 /*
- * This does the "real" work of the write. The generic routine has
- * allocated the page, locked it, done all the page alignment stuff
- * calculations etc. Now we should just copy the data from user
- * space and write it back to the real medium..
+ * This does the "real" work of the write. We must allocate and lock the
+ * page to be sent back to the generic routine, which then copies the
+ * data from user space.
  *
  * If the writer ends up delaying the write, the writer needs to
  * increment the page use counts until he is done with the page.
  */
-static int nfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+static int nfs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return nfs_flush_incompatible(file, page);
+	int ret;
+	pgoff_t index;
+	struct page *page;
+	index = pos >> PAGE_CACHE_SHIFT;
+
+	page = __grab_cache_page(mapping, index);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
+
+	ret = nfs_flush_incompatible(file, page);
+	if (ret) {
+		unlock_page(page);
+		page_cache_release(page);
+	}
+	return ret;
 }
 
-static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+static int nfs_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
-	long status;
+	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+	int status;
 
 	lock_kernel();
-	status = nfs_updatepage(file, page, offset, to-offset);
+	status = nfs_updatepage(file, page, offset, copied);
 	unlock_kernel();
-	return status;
+
+	unlock_page(page);
+	page_cache_release(page);
+
+	return status < 0 ? status : copied;
 }
 
 static void nfs_invalidate_page(struct page *page, unsigned long offset)
@@ -354,8 +377,8 @@ const struct address_space_operations nfs_file_aops = {
 	.set_page_dirty = __set_page_dirty_nobuffers,
 	.writepage = nfs_writepage,
 	.writepages = nfs_writepages,
-	.prepare_write = nfs_prepare_write,
-	.commit_write = nfs_commit_write,
+	.write_begin = nfs_write_begin,
+	.write_end = nfs_write_end,
 	.invalidatepage = nfs_invalidate_page,
 	.releasepage = nfs_release_page,
 #ifdef CONFIG_NFS_DIRECTIO
@@ -369,18 +392,35 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	struct file *filp = vma->vm_file;
 	unsigned pagelen;
 	int ret = -EINVAL;
+	void *fsdata;
+	struct address_space *mapping;
+	loff_t offset;
 
 	lock_page(page);
-	if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
-		goto out_unlock;
+	mapping = page->mapping;
+	if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) {
+		unlock_page(page);
+		return -EINVAL;
+	}
 	pagelen = nfs_page_length(page);
-	if (pagelen == 0)
-		goto out_unlock;
-	ret = nfs_prepare_write(filp, page, 0, pagelen);
-	if (!ret)
-		ret = nfs_commit_write(filp, page, 0, pagelen);
-out_unlock:
+	offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
 	unlock_page(page);
+
+	/*
+	 * we can use mapping after releasing the page lock, because:
+	 * we hold mmap_sem on the fault path, which should pin the vma
+	 * which should pin the file, which pins the dentry which should
+	 * hold a reference on inode.
+	 */
+
+	if (pagelen) {
+		struct page *page2 = NULL;
+		ret = nfs_write_begin(filp, mapping, offset, pagelen,
+			       	0, &page2, &fsdata);
+		if (!ret)
+			ret = nfs_write_end(filp, mapping, offset, pagelen,
+				       	pagelen, page2, fsdata);
+	}
 	return ret;
 }
 
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index cba899a3494..04b26672980 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -861,9 +861,9 @@ exp_get_fsid_key(svc_client *clp, int fsid)
 	return exp_find_key(clp, FSID_NUM, fsidv, NULL);
 }
 
-svc_export *
-exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
-		struct cache_req *reqp)
+static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
+				   struct dentry *dentry,
+				   struct cache_req *reqp)
 {
 	struct svc_export *exp, key;
 	int err;
@@ -887,9 +887,9 @@ exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
 /*
  * Find the export entry for a given dentry.
  */
-struct svc_export *
-exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
-	   struct cache_req *reqp)
+static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
+				     struct dentry *dentry,
+				     struct cache_req *reqp)
 {
 	svc_export *exp;
 
@@ -1214,9 +1214,8 @@ out:
 	return err;
 }
 
-struct svc_export *
-exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
-	 struct cache_req *reqp)
+static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type,
+				   u32 *fsidv, struct cache_req *reqp)
 {
 	struct svc_export *exp;
 	struct svc_expkey *ek = exp_find_key(clp, fsid_type, fsidv, reqp);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 34d10452c56..c69c1b30015 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1724,9 +1724,9 @@ out:
 	return ret;
 }
 
-int ocfs2_write_begin(struct file *file, struct address_space *mapping,
-		      loff_t pos, unsigned len, unsigned flags,
-		      struct page **pagep, void **fsdata)
+static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
+			     loff_t pos, unsigned len, unsigned flags,
+			     struct page **pagep, void **fsdata)
 {
 	int ret;
 	struct buffer_head *di_bh = NULL;
@@ -1877,9 +1877,9 @@ out_write_size:
 	return copied;
 }
 
-int ocfs2_write_end(struct file *file, struct address_space *mapping,
-		    loff_t pos, unsigned len, unsigned copied,
-		    struct page *page, void *fsdata)
+static int ocfs2_write_end(struct file *file, struct address_space *mapping,
+			   loff_t pos, unsigned len, unsigned copied,
+			   struct page *page, void *fsdata)
 {
 	int ret;
 	struct inode *inode = mapping->host;
@@ -1896,6 +1896,8 @@ int ocfs2_write_end(struct file *file, struct address_space *mapping,
 const struct address_space_operations ocfs2_aops = {
 	.readpage	= ocfs2_readpage,
 	.writepage	= ocfs2_writepage,
+	.write_begin	= ocfs2_write_begin,
+	.write_end	= ocfs2_write_end,
 	.bmap		= ocfs2_bmap,
 	.sync_page	= block_sync_page,
 	.direct_IO	= ocfs2_direct_IO,
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 113560877db..503e49232e1 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -44,14 +44,6 @@ int walk_page_buffers(	handle_t *handle,
 			int (*fn)(	handle_t *handle,
 					struct buffer_head *bh));
 
-int ocfs2_write_begin(struct file *file, struct address_space *mapping,
-		      loff_t pos, unsigned len, unsigned flags,
-		      struct page **pagep, void **fsdata);
-
-int ocfs2_write_end(struct file *file, struct address_space *mapping,
-		    loff_t pos, unsigned len, unsigned copied,
-		    struct page *page, void *fsdata);
-
 int ocfs2_write_end_nolock(struct address_space *mapping,
 			   loff_t pos, unsigned len, unsigned copied,
 			   struct page *page, void *fsdata);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a62b14eb406..f92fe91ff26 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1881,143 +1881,13 @@ out:
 	return ret;
 }
 
-static inline void
-ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
-{
-	const struct iovec *iov = *iovp;
-	size_t base = *basep;
-
-	do {
-		int copy = min(bytes, iov->iov_len - base);
-
-		bytes -= copy;
-		base += copy;
-		if (iov->iov_len == base) {
-			iov++;
-			base = 0;
-		}
-	} while (bytes);
-	*iovp = iov;
-	*basep = base;
-}
-
-static struct page * ocfs2_get_write_source(char **ret_src_buf,
-					    const struct iovec *cur_iov,
-					    size_t iov_offset)
-{
-	int ret;
-	char *buf = cur_iov->iov_base + iov_offset;
-	struct page *src_page = NULL;
-	unsigned long off;
-
-	off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;
-
-	if (!segment_eq(get_fs(), KERNEL_DS)) {
-		/*
-		 * Pull in the user page. We want to do this outside
-		 * of the meta data locks in order to preserve locking
-		 * order in case of page fault.
-		 */
-		ret = get_user_pages(current, current->mm,
-				     (unsigned long)buf & PAGE_CACHE_MASK, 1,
-				     0, 0, &src_page, NULL);
-		if (ret == 1)
-			*ret_src_buf = kmap(src_page) + off;
-		else
-			src_page = ERR_PTR(-EFAULT);
-	} else {
-		*ret_src_buf = buf;
-	}
-
-	return src_page;
-}
-
-static void ocfs2_put_write_source(struct page *page)
-{
-	if (page) {
-		kunmap(page);
-		page_cache_release(page);
-	}
-}
-
-static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
-					 const struct iovec *iov,
-					 unsigned long nr_segs,
-					 size_t count,
-					 ssize_t o_direct_written)
-{
-	int ret = 0;
-	ssize_t copied, total = 0;
-	size_t iov_offset = 0, bytes;
-	loff_t pos;
-	const struct iovec *cur_iov = iov;
-	struct page *user_page, *page;
-	char * uninitialized_var(buf);
-	char *dst;
-	void *fsdata;
-
-	/*
-	 * handle partial DIO write.  Adjust cur_iov if needed.
-	 */
-	ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
-
-	do {
-		pos = *ppos;
-
-		user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
-		if (IS_ERR(user_page)) {
-			ret = PTR_ERR(user_page);
-			goto out;
-		}
-
-		/* Stay within our page boundaries */
-		bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
-			    (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
-		/* Stay within the vector boundary */
-		bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
-		/* Stay within count */
-		bytes = min(bytes, count);
-
-		page = NULL;
-		ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
-					&page, &fsdata);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-
-		dst = kmap_atomic(page, KM_USER0);
-		memcpy(dst + (pos & (loff_t)(PAGE_CACHE_SIZE - 1)), buf, bytes);
-		kunmap_atomic(dst, KM_USER0);
-		flush_dcache_page(page);
-		ocfs2_put_write_source(user_page);
-
-		copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
-					 bytes, page, fsdata);
-		if (copied < 0) {
-			mlog_errno(copied);
-			ret = copied;
-			goto out;
-		}
-
-		total += copied;
-		*ppos = pos + copied;
-		count -= copied;
-
-		ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
-	} while(count);
-
-out:
-	return total ? total : ret;
-}
-
 static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 				    const struct iovec *iov,
 				    unsigned long nr_segs,
 				    loff_t pos)
 {
 	int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;
-	int can_do_direct, sync = 0;
+	int can_do_direct;
 	ssize_t written = 0;
 	size_t ocount;		/* original count */
 	size_t count;		/* after file limit checks */
@@ -2033,12 +1903,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	if (iocb->ki_left == 0)
 		return 0;
 
-	ret = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (ret)
-		return ret;
-
-	count = ocount;
-
 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 
 	appending = file->f_flags & O_APPEND ? 1 : 0;
@@ -2082,33 +1946,23 @@ relock:
 		rw_level = -1;
 
 		direct_io = 0;
-		sync = 1;
 		goto relock;
 	}
 
-	if (!sync && ((file->f_flags & O_SYNC) || IS_SYNC(inode)))
-		sync = 1;
-
-	/*
-	 * XXX: Is it ok to execute these checks a second time?
-	 */
-	ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode));
-	if (ret)
-		goto out;
-
-	/*
-	 * Set pos so that sync_page_range_nolock() below understands
-	 * where to start from. We might've moved it around via the
-	 * calls above. The range we want to actually sync starts from
-	 * *ppos here.
-	 *
-	 */
-	pos = *ppos;
-
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
 	if (direct_io) {
+		ret = generic_segment_checks(iov, &nr_segs, &ocount,
+					     VERIFY_READ);
+		if (ret)
+			goto out_dio;
+
+		ret = generic_write_checks(file, ppos, &count,
+					   S_ISBLK(inode->i_mode));
+		if (ret)
+			goto out_dio;
+
 		written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
 						    ppos, count, ocount);
 		if (written < 0) {
@@ -2116,14 +1970,8 @@ relock:
 			goto out_dio;
 		}
 	} else {
-		written = ocfs2_file_buffered_write(file, ppos, iov, nr_segs,
-						    count, written);
-		if (written < 0) {
-			ret = written;
-			if (ret != -EFAULT || ret != -ENOSPC)
-				mlog_errno(ret);
-			goto out;
-		}
+		written = generic_file_aio_write_nolock(iocb, iov, nr_segs,
+							*ppos);
 	}
 
 out_dio:
@@ -2153,97 +2001,12 @@ out_sems:
 	if (have_alloc_sem)
 		up_read(&inode->i_alloc_sem);
 
-	if (written > 0 && sync) {
-		ssize_t err;
-
-		err = sync_page_range_nolock(inode, file->f_mapping, pos, count);
-		if (err < 0)
-			written = err;
-	}
-
 	mutex_unlock(&inode->i_mutex);
 
 	mlog_exit(ret);
 	return written ? written : ret;
 }
 
-static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
-				    struct pipe_buffer *buf,
-				    struct splice_desc *sd)
-{
-	int ret, count;
-	ssize_t copied = 0;
-	struct file *file = sd->u.file;
-	unsigned int offset;
-	struct page *page = NULL;
-	void *fsdata;
-	char *src, *dst;
-
-	ret = buf->ops->confirm(pipe, buf);
-	if (ret)
-		goto out;
-
-	offset = sd->pos & ~PAGE_CACHE_MASK;
-	count = sd->len;
-	if (count + offset > PAGE_CACHE_SIZE)
-		count = PAGE_CACHE_SIZE - offset;
-
-	ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
-				&page, &fsdata);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
-	src = buf->ops->map(pipe, buf, 1);
-	dst = kmap_atomic(page, KM_USER1);
-	memcpy(dst + offset, src + buf->offset, count);
-	kunmap_atomic(dst, KM_USER1);
-	buf->ops->unmap(pipe, buf, src);
-
-	copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
-				 page, fsdata);
-	if (copied < 0) {
-		mlog_errno(copied);
-		ret = copied;
-		goto out;
-	}
-out:
-
-	return copied ? copied : ret;
-}
-
-static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
-					 struct file *out,
-					 loff_t *ppos,
-					 size_t len,
-					 unsigned int flags)
-{
-	int ret, err;
-	struct address_space *mapping = out->f_mapping;
-	struct inode *inode = mapping->host;
-	struct splice_desc sd = {
-		.total_len = len,
-		.flags = flags,
-		.pos = *ppos,
-		.u.file = out,
-	};
-
-	ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
-	if (ret > 0) {
-		*ppos += ret;
-
-		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
-			err = generic_osync_inode(inode, mapping,
-						  OSYNC_METADATA|OSYNC_DATA);
-			if (err)
-				ret = err;
-		}
-	}
-
-	return ret;
-}
-
 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 				       struct file *out,
 				       loff_t *ppos,
@@ -2273,8 +2036,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 		goto out_unlock;
 	}
 
-	/* ok, we're done with i_size and alloc work */
-	ret = __ocfs2_file_splice_write(pipe, out, ppos, len, flags);
+	ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
 
 out_unlock:
 	ocfs2_rw_unlock(inode, 1);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e5d0953d4db..78fdfea1a7f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -492,7 +492,7 @@ static ssize_t proc_info_read(struct file * file, char __user * buf,
 		count = PROC_BLOCK_SIZE;
 
 	length = -ENOMEM;
-	if (!(page = __get_free_page(GFP_KERNEL)))
+	if (!(page = __get_free_page(GFP_TEMPORARY)))
 		goto out;
 
 	length = PROC_I(inode)->op.proc_read(task, (char*)page);
@@ -532,7 +532,7 @@ static ssize_t mem_read(struct file * file, char __user * buf,
 		goto out;
 
 	ret = -ENOMEM;
-	page = (char *)__get_free_page(GFP_USER);
+	page = (char *)__get_free_page(GFP_TEMPORARY);
 	if (!page)
 		goto out;
 
@@ -602,7 +602,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
 		goto out;
 
 	copied = -ENOMEM;
-	page = (char *)__get_free_page(GFP_USER);
+	page = (char *)__get_free_page(GFP_TEMPORARY);
 	if (!page)
 		goto out;
 
@@ -788,7 +788,7 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
 		/* No partial writes. */
 		return -EINVAL;
 	}
-	page = (char*)__get_free_page(GFP_USER);
+	page = (char*)__get_free_page(GFP_TEMPORARY);
 	if (!page)
 		return -ENOMEM;
 	length = -EFAULT;
@@ -954,7 +954,8 @@ static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
 			    char __user *buffer, int buflen)
 {
 	struct inode * inode;
-	char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
+	char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
+	char *path;
 	int len;
 
 	if (!tmp)
@@ -1726,7 +1727,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 		goto out;
 
 	length = -ENOMEM;
-	page = (char*)__get_free_page(GFP_USER);
+	page = (char*)__get_free_page(GFP_TEMPORARY);
 	if (!page)
 		goto out;
 
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b5e7155d30d..1bdb6243575 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -74,7 +74,7 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 		nbytes = MAX_NON_LFS - pos;
 
 	dp = PDE(inode);
-	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+	if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
 		return -ENOMEM;
 
 	while ((nbytes > 0) && !eof) {
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 0071939c009..5de7f874d95 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -229,6 +229,19 @@ static const struct file_operations fragmentation_file_operations = {
 	.release	= seq_release,
 };
 
+extern struct seq_operations pagetypeinfo_op;
+static int pagetypeinfo_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &pagetypeinfo_op);
+}
+
+static const struct file_operations pagetypeinfo_file_ops = {
+	.open		= pagetypeinfo_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
 extern struct seq_operations zoneinfo_op;
 static int zoneinfo_open(struct inode *inode, struct file *file)
 {
@@ -724,6 +737,7 @@ void __init proc_misc_init(void)
 #endif
 #endif
 	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
+	create_seq_entry("pagetypeinfo", S_IRUGO, &pagetypeinfo_file_ops);
 	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
 	create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
 #ifdef CONFIG_BLOCK
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 1bc8d873a9e..df8bd87e49b 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -433,16 +433,21 @@ static int qnx4_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page,qnx4_get_block, wbc);
 }
+
 static int qnx4_readpage(struct file *file, struct page *page)
 {
 	return block_read_full_page(page,qnx4_get_block);
 }
-static int qnx4_prepare_write(struct file *file, struct page *page,
-			      unsigned from, unsigned to)
+
+static int qnx4_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	struct qnx4_inode_info *qnx4_inode = qnx4_i(page->mapping->host);
-	return cont_prepare_write(page, from, to, qnx4_get_block,
-				  &qnx4_inode->mmu_private);
+	struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
+	*pagep = NULL;
+	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				qnx4_get_block,
+				&qnx4_inode->mmu_private);
 }
 static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
 {
@@ -452,8 +457,8 @@ static const struct address_space_operations qnx4_aops = {
 	.readpage	= qnx4_readpage,
 	.writepage	= qnx4_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= qnx4_prepare_write,
-	.commit_write	= generic_commit_write,
+	.write_begin	= qnx4_write_begin,
+	.write_end	= generic_write_end,
 	.bmap		= qnx4_bmap
 };
 
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 97bdc0b2f9d..b41a514b097 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -29,8 +29,8 @@
 
 const struct address_space_operations ramfs_aops = {
 	.readpage	= simple_readpage,
-	.prepare_write	= simple_prepare_write,
-	.commit_write	= simple_commit_write,
+	.write_begin	= simple_write_begin,
+	.write_end	= simple_write_end,
 	.set_page_dirty = __set_page_dirty_no_writeback,
 };
 
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 237fe8b8e81..0989bc2c2f6 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -29,8 +29,8 @@ static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
 
 const struct address_space_operations ramfs_aops = {
 	.readpage		= simple_readpage,
-	.prepare_write		= simple_prepare_write,
-	.commit_write		= simple_commit_write,
+	.write_begin		= simple_write_begin,
+	.write_end		= simple_write_end,
 	.set_page_dirty		= __set_page_dirty_no_writeback,
 };
 
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2070aeee2a5..a804903d31d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -153,608 +153,6 @@ static int reiserfs_sync_file(struct file *p_s_filp,
 	return (n_err < 0) ? -EIO : 0;
 }
 
-/* I really do not want to play with memory shortage right now, so
-   to simplify the code, we are not going to write more than this much pages at
-   a time. This still should considerably improve performance compared to 4k
-   at a time case. This is 32 pages of 4k size. */
-#define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE
-
-/* Allocates blocks for a file to fulfil write request.
-   Maps all unmapped but prepared pages from the list.
-   Updates metadata with newly allocated blocknumbers as needed */
-static int reiserfs_allocate_blocks_for_region(struct reiserfs_transaction_handle *th, struct inode *inode,	/* Inode we work with */
-					       loff_t pos,	/* Writing position */
-					       int num_pages,	/* number of pages write going
-								   to touch */
-					       int write_bytes,	/* amount of bytes to write */
-					       struct page **prepared_pages,	/* array of
-										   prepared pages
-										 */
-					       int blocks_to_allocate	/* Amount of blocks we
-									   need to allocate to
-									   fit the data into file
-									 */
-    )
-{
-	struct cpu_key key;	// cpu key of item that we are going to deal with
-	struct item_head *ih;	// pointer to item head that we are going to deal with
-	struct buffer_head *bh;	// Buffer head that contains items that we are going to deal with
-	__le32 *item;		// pointer to item we are going to deal with
-	INITIALIZE_PATH(path);	// path to item, that we are going to deal with.
-	b_blocknr_t *allocated_blocks;	// Pointer to a place where allocated blocknumbers would be stored.
-	reiserfs_blocknr_hint_t hint;	// hint structure for block allocator.
-	size_t res;		// return value of various functions that we call.
-	int curr_block;		// current block used to keep track of unmapped blocks.
-	int i;			// loop counter
-	int itempos;		// position in item
-	unsigned int from = (pos & (PAGE_CACHE_SIZE - 1));	// writing position in
-	// first page
-	unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;	/* last modified byte offset in last page */
-	__u64 hole_size;	// amount of blocks for a file hole, if it needed to be created.
-	int modifying_this_item = 0;	// Flag for items traversal code to keep track
-	// of the fact that we already prepared
-	// current block for journal
-	int will_prealloc = 0;
-	RFALSE(!blocks_to_allocate,
-	       "green-9004: tried to allocate zero blocks?");
-
-	/* only preallocate if this is a small write */
-	if (REISERFS_I(inode)->i_prealloc_count ||
-	    (!(write_bytes & (inode->i_sb->s_blocksize - 1)) &&
-	     blocks_to_allocate <
-	     REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize))
-		will_prealloc =
-		    REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize;
-
-	allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
-				   sizeof(b_blocknr_t), GFP_NOFS);
-	if (!allocated_blocks)
-		return -ENOMEM;
-
-	/* First we compose a key to point at the writing position, we want to do
-	   that outside of any locking region. */
-	make_cpu_key(&key, inode, pos + 1, TYPE_ANY, 3 /*key length */ );
-
-	/* If we came here, it means we absolutely need to open a transaction,
-	   since we need to allocate some blocks */
-	reiserfs_write_lock(inode->i_sb);	// Journaling stuff and we need that.
-	res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));	// Wish I know if this number enough
-	if (res)
-		goto error_exit;
-	reiserfs_update_inode_transaction(inode);
-
-	/* Look for the in-tree position of our write, need path for block allocator */
-	res = search_for_position_by_key(inode->i_sb, &key, &path);
-	if (res == IO_ERROR) {
-		res = -EIO;
-		goto error_exit;
-	}
-
-	/* Allocate blocks */
-	/* First fill in "hint" structure for block allocator */
-	hint.th = th;		// transaction handle.
-	hint.path = &path;	// Path, so that block allocator can determine packing locality or whatever it needs to determine.
-	hint.inode = inode;	// Inode is needed by block allocator too.
-	hint.search_start = 0;	// We have no hint on where to search free blocks for block allocator.
-	hint.key = key.on_disk_key;	// on disk key of file.
-	hint.block = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);	// Number of disk blocks this file occupies already.
-	hint.formatted_node = 0;	// We are allocating blocks for unformatted node.
-	hint.preallocate = will_prealloc;
-
-	/* Call block allocator to allocate blocks */
-	res =
-	    reiserfs_allocate_blocknrs(&hint, allocated_blocks,
-				       blocks_to_allocate, blocks_to_allocate);
-	if (res != CARRY_ON) {
-		if (res == NO_DISK_SPACE) {
-			/* We flush the transaction in case of no space. This way some
-			   blocks might become free */
-			SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
-			res = restart_transaction(th, inode, &path);
-			if (res)
-				goto error_exit;
-
-			/* We might have scheduled, so search again */
-			res =
-			    search_for_position_by_key(inode->i_sb, &key,
-						       &path);
-			if (res == IO_ERROR) {
-				res = -EIO;
-				goto error_exit;
-			}
-
-			/* update changed info for hint structure. */
-			res =
-			    reiserfs_allocate_blocknrs(&hint, allocated_blocks,
-						       blocks_to_allocate,
-						       blocks_to_allocate);
-			if (res != CARRY_ON) {
-				res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
-				pathrelse(&path);
-				goto error_exit;
-			}
-		} else {
-			res = res == QUOTA_EXCEEDED ? -EDQUOT : -ENOSPC;
-			pathrelse(&path);
-			goto error_exit;
-		}
-	}
-#ifdef __BIG_ENDIAN
-	// Too bad, I have not found any way to convert a given region from
-	// cpu format to little endian format
-	{
-		int i;
-		for (i = 0; i < blocks_to_allocate; i++)
-			allocated_blocks[i] = cpu_to_le32(allocated_blocks[i]);
-	}
-#endif
-
-	/* Blocks allocating well might have scheduled and tree might have changed,
-	   let's search the tree again */
-	/* find where in the tree our write should go */
-	res = search_for_position_by_key(inode->i_sb, &key, &path);
-	if (res == IO_ERROR) {
-		res = -EIO;
-		goto error_exit_free_blocks;
-	}
-
-	bh = get_last_bh(&path);	// Get a bufferhead for last element in path.
-	ih = get_ih(&path);	// Get a pointer to last item head in path.
-	item = get_item(&path);	// Get a pointer to last item in path
-
-	/* Let's see what we have found */
-	if (res != POSITION_FOUND) {	/* position not found, this means that we
-					   might need to append file with holes
-					   first */
-		// Since we are writing past the file's end, we need to find out if
-		// there is a hole that needs to be inserted before our writing
-		// position, and how many blocks it is going to cover (we need to
-		//  populate pointers to file blocks representing the hole with zeros)
-
-		{
-			int item_offset = 1;
-			/*
-			 * if ih is stat data, its offset is 0 and we don't want to
-			 * add 1 to pos in the hole_size calculation
-			 */
-			if (is_statdata_le_ih(ih))
-				item_offset = 0;
-			hole_size = (pos + item_offset -
-				     (le_key_k_offset
-				      (get_inode_item_key_version(inode),
-				       &(ih->ih_key)) + op_bytes_number(ih,
-									inode->
-									i_sb->
-									s_blocksize)))
-			    >> inode->i_sb->s_blocksize_bits;
-		}
-
-		if (hole_size > 0) {
-			int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize) / UNFM_P_SIZE);	// How much data to insert first time.
-			/* area filled with zeroes, to supply as list of zero blocknumbers
-			   We allocate it outside of loop just in case loop would spin for
-			   several iterations. */
-			char *zeros = kzalloc(to_paste * UNFM_P_SIZE, GFP_ATOMIC);	// We cannot insert more than MAX_ITEM_LEN bytes anyway.
-			if (!zeros) {
-				res = -ENOMEM;
-				goto error_exit_free_blocks;
-			}
-			do {
-				to_paste =
-				    min_t(__u64, hole_size,
-					  MAX_ITEM_LEN(inode->i_sb->
-						       s_blocksize) /
-					  UNFM_P_SIZE);
-				if (is_indirect_le_ih(ih)) {
-					/* Ok, there is existing indirect item already. Need to append it */
-					/* Calculate position past inserted item */
-					make_cpu_key(&key, inode,
-						     le_key_k_offset
-						     (get_inode_item_key_version
-						      (inode),
-						      &(ih->ih_key)) +
-						     op_bytes_number(ih,
-								     inode->
-								     i_sb->
-								     s_blocksize),
-						     TYPE_INDIRECT, 3);
-					res =
-					    reiserfs_paste_into_item(th, &path,
-								     &key,
-								     inode,
-								     (char *)
-								     zeros,
-								     UNFM_P_SIZE
-								     *
-								     to_paste);
-					if (res) {
-						kfree(zeros);
-						goto error_exit_free_blocks;
-					}
-				} else if (is_statdata_le_ih(ih)) {
-					/* No existing item, create it */
-					/* item head for new item */
-					struct item_head ins_ih;
-
-					/* create a key for our new item */
-					make_cpu_key(&key, inode, 1,
-						     TYPE_INDIRECT, 3);
-
-					/* Create new item head for our new item */
-					make_le_item_head(&ins_ih, &key,
-							  key.version, 1,
-							  TYPE_INDIRECT,
-							  to_paste *
-							  UNFM_P_SIZE,
-							  0 /* free space */ );
-
-					/* Find where such item should live in the tree */
-					res =
-					    search_item(inode->i_sb, &key,
-							&path);
-					if (res != ITEM_NOT_FOUND) {
-						/* item should not exist, otherwise we have error */
-						if (res != -ENOSPC) {
-							reiserfs_warning(inode->
-									 i_sb,
-									 "green-9008: search_by_key (%K) returned %d",
-									 &key,
-									 res);
-						}
-						res = -EIO;
-						kfree(zeros);
-						goto error_exit_free_blocks;
-					}
-					res =
-					    reiserfs_insert_item(th, &path,
-								 &key, &ins_ih,
-								 inode,
-								 (char *)zeros);
-				} else {
-					reiserfs_panic(inode->i_sb,
-						       "green-9011: Unexpected key type %K\n",
-						       &key);
-				}
-				if (res) {
-					kfree(zeros);
-					goto error_exit_free_blocks;
-				}
-				/* Now we want to check if transaction is too full, and if it is
-				   we restart it. This will also free the path. */
-				if (journal_transaction_should_end
-				    (th, th->t_blocks_allocated)) {
-					inode->i_size = cpu_key_k_offset(&key) +
-						(to_paste << inode->i_blkbits);
-					res =
-					    restart_transaction(th, inode,
-								&path);
-					if (res) {
-						pathrelse(&path);
-						kfree(zeros);
-						goto error_exit;
-					}
-				}
-
-				/* Well, need to recalculate path and stuff */
-				set_cpu_key_k_offset(&key,
-						     cpu_key_k_offset(&key) +
-						     (to_paste << inode->
-						      i_blkbits));
-				res =
-				    search_for_position_by_key(inode->i_sb,
-							       &key, &path);
-				if (res == IO_ERROR) {
-					res = -EIO;
-					kfree(zeros);
-					goto error_exit_free_blocks;
-				}
-				bh = get_last_bh(&path);
-				ih = get_ih(&path);
-				item = get_item(&path);
-				hole_size -= to_paste;
-			} while (hole_size);
-			kfree(zeros);
-		}
-	}
-	// Go through existing indirect items first
-	// replace all zeroes with blocknumbers from list
-	// Note that if no corresponding item was found, by previous search,
-	// it means there are no existing in-tree representation for file area
-	// we are going to overwrite, so there is nothing to scan through for holes.
-	for (curr_block = 0, itempos = path.pos_in_item;
-	     curr_block < blocks_to_allocate && res == POSITION_FOUND;) {
-	      retry:
-
-		if (itempos >= ih_item_len(ih) / UNFM_P_SIZE) {
-			/* We run out of data in this indirect item, let's look for another
-			   one. */
-			/* First if we are already modifying current item, log it */
-			if (modifying_this_item) {
-				journal_mark_dirty(th, inode->i_sb, bh);
-				modifying_this_item = 0;
-			}
-			/* Then set the key to look for a new indirect item (offset of old
-			   item is added to old item length */
-			set_cpu_key_k_offset(&key,
-					     le_key_k_offset
-					     (get_inode_item_key_version(inode),
-					      &(ih->ih_key)) +
-					     op_bytes_number(ih,
-							     inode->i_sb->
-							     s_blocksize));
-			/* Search ofor position of new key in the tree. */
-			res =
-			    search_for_position_by_key(inode->i_sb, &key,
-						       &path);
-			if (res == IO_ERROR) {
-				res = -EIO;
-				goto error_exit_free_blocks;
-			}
-			bh = get_last_bh(&path);
-			ih = get_ih(&path);
-			item = get_item(&path);
-			itempos = path.pos_in_item;
-			continue;	// loop to check all kinds of conditions and so on.
-		}
-		/* Ok, we have correct position in item now, so let's see if it is
-		   representing file hole (blocknumber is zero) and fill it if needed */
-		if (!item[itempos]) {
-			/* Ok, a hole. Now we need to check if we already prepared this
-			   block to be journaled */
-			while (!modifying_this_item) {	// loop until succeed
-				/* Well, this item is not journaled yet, so we must prepare
-				   it for journal first, before we can change it */
-				struct item_head tmp_ih;	// We copy item head of found item,
-				// here to detect if fs changed under
-				// us while we were preparing for
-				// journal.
-				int fs_gen;	// We store fs generation here to find if someone
-				// changes fs under our feet
-
-				copy_item_head(&tmp_ih, ih);	// Remember itemhead
-				fs_gen = get_generation(inode->i_sb);	// remember fs generation
-				reiserfs_prepare_for_journal(inode->i_sb, bh, 1);	// Prepare a buffer within which indirect item is stored for changing.
-				if (fs_changed(fs_gen, inode->i_sb)
-				    && item_moved(&tmp_ih, &path)) {
-					// Sigh, fs was changed under us, we need to look for new
-					// location of item we are working with
-
-					/* unmark prepaerd area as journaled and search for it's
-					   new position */
-					reiserfs_restore_prepared_buffer(inode->
-									 i_sb,
-									 bh);
-					res =
-					    search_for_position_by_key(inode->
-								       i_sb,
-								       &key,
-								       &path);
-					if (res == IO_ERROR) {
-						res = -EIO;
-						goto error_exit_free_blocks;
-					}
-					bh = get_last_bh(&path);
-					ih = get_ih(&path);
-					item = get_item(&path);
-					itempos = path.pos_in_item;
-					goto retry;
-				}
-				modifying_this_item = 1;
-			}
-			item[itempos] = allocated_blocks[curr_block];	// Assign new block
-			curr_block++;
-		}
-		itempos++;
-	}
-
-	if (modifying_this_item) {	// We need to log last-accessed block, if it
-		// was modified, but not logged yet.
-		journal_mark_dirty(th, inode->i_sb, bh);
-	}
-
-	if (curr_block < blocks_to_allocate) {
-		// Oh, well need to append to indirect item, or to create indirect item
-		// if there weren't any
-		if (is_indirect_le_ih(ih)) {
-			// Existing indirect item - append. First calculate key for append
-			// position. We do not need to recalculate path as it should
-			// already point to correct place.
-			make_cpu_key(&key, inode,
-				     le_key_k_offset(get_inode_item_key_version
-						     (inode),
-						     &(ih->ih_key)) +
-				     op_bytes_number(ih,
-						     inode->i_sb->s_blocksize),
-				     TYPE_INDIRECT, 3);
-			res =
-			    reiserfs_paste_into_item(th, &path, &key, inode,
-						     (char *)(allocated_blocks +
-							      curr_block),
-						     UNFM_P_SIZE *
-						     (blocks_to_allocate -
-						      curr_block));
-			if (res) {
-				goto error_exit_free_blocks;
-			}
-		} else if (is_statdata_le_ih(ih)) {
-			// Last found item was statdata. That means we need to create indirect item.
-			struct item_head ins_ih;	/* itemhead for new item */
-
-			/* create a key for our new item */
-			make_cpu_key(&key, inode, 1, TYPE_INDIRECT, 3);	// Position one,
-			// because that's
-			// where first
-			// indirect item
-			// begins
-			/* Create new item head for our new item */
-			make_le_item_head(&ins_ih, &key, key.version, 1,
-					  TYPE_INDIRECT,
-					  (blocks_to_allocate -
-					   curr_block) * UNFM_P_SIZE,
-					  0 /* free space */ );
-			/* Find where such item should live in the tree */
-			res = search_item(inode->i_sb, &key, &path);
-			if (res != ITEM_NOT_FOUND) {
-				/* Well, if we have found such item already, or some error
-				   occured, we need to warn user and return error */
-				if (res != -ENOSPC) {
-					reiserfs_warning(inode->i_sb,
-							 "green-9009: search_by_key (%K) "
-							 "returned %d", &key,
-							 res);
-				}
-				res = -EIO;
-				goto error_exit_free_blocks;
-			}
-			/* Insert item into the tree with the data as its body */
-			res =
-			    reiserfs_insert_item(th, &path, &key, &ins_ih,
-						 inode,
-						 (char *)(allocated_blocks +
-							  curr_block));
-		} else {
-			reiserfs_panic(inode->i_sb,
-				       "green-9010: unexpected item type for key %K\n",
-				       &key);
-		}
-	}
-	// the caller is responsible for closing the transaction
-	// unless we return an error, they are also responsible for logging
-	// the inode.
-	//
-	pathrelse(&path);
-	/*
-	 * cleanup prellocation from previous writes
-	 * if this is a partial block write
-	 */
-	if (write_bytes & (inode->i_sb->s_blocksize - 1))
-		reiserfs_discard_prealloc(th, inode);
-	reiserfs_write_unlock(inode->i_sb);
-
-	// go through all the pages/buffers and map the buffers to newly allocated
-	// blocks (so that system knows where to write these pages later).
-	curr_block = 0;
-	for (i = 0; i < num_pages; i++) {
-		struct page *page = prepared_pages[i];	//current page
-		struct buffer_head *head = page_buffers(page);	// first buffer for a page
-		int block_start, block_end;	// in-page offsets for buffers.
-
-		if (!page_buffers(page))
-			reiserfs_panic(inode->i_sb,
-				       "green-9005: No buffers for prepared page???");
-
-		/* For each buffer in page */
-		for (bh = head, block_start = 0; bh != head || !block_start;
-		     block_start = block_end, bh = bh->b_this_page) {
-			if (!bh)
-				reiserfs_panic(inode->i_sb,
-					       "green-9006: Allocated but absent buffer for a page?");
-			block_end = block_start + inode->i_sb->s_blocksize;
-			if (i == 0 && block_end <= from)
-				/* if this buffer is before requested data to map, skip it */
-				continue;
-			if (i == num_pages - 1 && block_start >= to)
-				/* If this buffer is after requested data to map, abort
-				   processing of current page */
-				break;
-
-			if (!buffer_mapped(bh)) {	// Ok, unmapped buffer, need to map it
-				map_bh(bh, inode->i_sb,
-				       le32_to_cpu(allocated_blocks
-						   [curr_block]));
-				curr_block++;
-				set_buffer_new(bh);
-			}
-		}
-	}
-
-	RFALSE(curr_block > blocks_to_allocate,
-	       "green-9007: Used too many blocks? weird");
-
-	kfree(allocated_blocks);
-	return 0;
-
-// Need to deal with transaction here.
-      error_exit_free_blocks:
-	pathrelse(&path);
-	// free blocks
-	for (i = 0; i < blocks_to_allocate; i++)
-		reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]),
-				    1);
-
-      error_exit:
-	if (th->t_trans_id) {
-		int err;
-		// update any changes we made to blk count
-		mark_inode_dirty(inode);
-		err =
-		    journal_end(th, inode->i_sb,
-				JOURNAL_PER_BALANCE_CNT * 3 + 1 +
-				2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
-		if (err)
-			res = err;
-	}
-	reiserfs_write_unlock(inode->i_sb);
-	kfree(allocated_blocks);
-
-	return res;
-}
-
-/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
-static void reiserfs_unprepare_pages(struct page **prepared_pages,	/* list of locked pages */
-				     size_t num_pages /* amount of pages */ )
-{
-	int i;			// loop counter
-
-	for (i = 0; i < num_pages; i++) {
-		struct page *page = prepared_pages[i];
-
-		try_to_free_buffers(page);
-		unlock_page(page);
-		page_cache_release(page);
-	}
-}
-
-/* This function will copy data from userspace to specified pages within
-   supplied byte range */
-static int reiserfs_copy_from_user_to_file_region(loff_t pos,	/* In-file position */
-						  int num_pages,	/* Number of pages affected */
-						  int write_bytes,	/* Amount of bytes to write */
-						  struct page **prepared_pages,	/* pointer to 
-										   array to
-										   prepared pages
-										 */
-						  const char __user * buf	/* Pointer to user-supplied
-										   data */
-    )
-{
-	long page_fault = 0;	// status of copy_from_user.
-	int i;			// loop counter.
-	int offset;		// offset in page
-
-	for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
-	     i++, offset = 0) {
-		size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes);	// How much of bytes to write to this page
-		struct page *page = prepared_pages[i];	// Current page we process.
-
-		fault_in_pages_readable(buf, count);
-
-		/* Copy data from userspace to the current page */
-		kmap(page);
-		page_fault = __copy_from_user(page_address(page) + offset, buf, count);	// Copy the data.
-		/* Flush processor's dcache for this page */
-		flush_dcache_page(page);
-		kunmap(page);
-		buf += count;
-		write_bytes -= count;
-
-		if (page_fault)
-			break;	// Was there a fault? abort.
-	}
-
-	return page_fault ? -EFAULT : 0;
-}
-
 /* taken fs/buffer.c:__block_commit_write */
 int reiserfs_commit_page(struct inode *inode, struct page *page,
 			 unsigned from, unsigned to)
@@ -824,432 +222,6 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
 	return ret;
 }
 
-/* Submit pages for write. This was separated from actual file copying
-   because we might want to allocate block numbers in-between.
-   This function assumes that caller will adjust file size to correct value. */
-static int reiserfs_submit_file_region_for_write(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t pos,	/* Writing position offset */
-						 size_t num_pages,	/* Number of pages to write */
-						 size_t write_bytes,	/* number of bytes to write */
-						 struct page **prepared_pages	/* list of pages */
-    )
-{
-	int status;		// return status of block_commit_write.
-	int retval = 0;		// Return value we are going to return.
-	int i;			// loop counter
-	int offset;		// Writing offset in page.
-	int orig_write_bytes = write_bytes;
-	int sd_update = 0;
-
-	for (i = 0, offset = (pos & (PAGE_CACHE_SIZE - 1)); i < num_pages;
-	     i++, offset = 0) {
-		int count = min_t(int, PAGE_CACHE_SIZE - offset, write_bytes);	// How much of bytes to write to this page
-		struct page *page = prepared_pages[i];	// Current page we process.
-
-		status =
-		    reiserfs_commit_page(inode, page, offset, offset + count);
-		if (status)
-			retval = status;	// To not overcomplicate matters We are going to
-		// submit all the pages even if there was error.
-		// we only remember error status to report it on
-		// exit.
-		write_bytes -= count;
-	}
-	/* now that we've gotten all the ordered buffers marked dirty,
-	 * we can safely update i_size and close any running transaction
-	 */
-	if (pos + orig_write_bytes > inode->i_size) {
-		inode->i_size = pos + orig_write_bytes;	// Set new size
-		/* If the file have grown so much that tail packing is no
-		 * longer possible, reset "need to pack" flag */
-		if ((have_large_tails(inode->i_sb) &&
-		     inode->i_size > i_block_size(inode) * 4) ||
-		    (have_small_tails(inode->i_sb) &&
-		     inode->i_size > i_block_size(inode)))
-			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
-		else if ((have_large_tails(inode->i_sb) &&
-			  inode->i_size < i_block_size(inode) * 4) ||
-			 (have_small_tails(inode->i_sb) &&
-			  inode->i_size < i_block_size(inode)))
-			REISERFS_I(inode)->i_flags |= i_pack_on_close_mask;
-
-		if (th->t_trans_id) {
-			reiserfs_write_lock(inode->i_sb);
-			// this sets the proper flags for O_SYNC to trigger a commit
-			mark_inode_dirty(inode);
-			reiserfs_write_unlock(inode->i_sb);
-		} else {
-			reiserfs_write_lock(inode->i_sb);
-			reiserfs_update_inode_transaction(inode);
-			mark_inode_dirty(inode);
-			reiserfs_write_unlock(inode->i_sb);
-		}
-
-		sd_update = 1;
-	}
-	if (th->t_trans_id) {
-		reiserfs_write_lock(inode->i_sb);
-		if (!sd_update)
-			mark_inode_dirty(inode);
-		status = journal_end(th, th->t_super, th->t_blocks_allocated);
-		if (status)
-			retval = status;
-		reiserfs_write_unlock(inode->i_sb);
-	}
-	th->t_trans_id = 0;
-
-	/* 
-	 * we have to unlock the pages after updating i_size, otherwise
-	 * we race with writepage
-	 */
-	for (i = 0; i < num_pages; i++) {
-		struct page *page = prepared_pages[i];
-		unlock_page(page);
-		mark_page_accessed(page);
-		page_cache_release(page);
-	}
-	return retval;
-}
-
-/* Look if passed writing region is going to touch file's tail
-   (if it is present). And if it is, convert the tail to unformatted node */
-static int reiserfs_check_for_tail_and_convert(struct inode *inode,	/* inode to deal with */
-					       loff_t pos,	/* Writing position */
-					       int write_bytes	/* amount of bytes to write */
-    )
-{
-	INITIALIZE_PATH(path);	// needed for search_for_position
-	struct cpu_key key;	// Key that would represent last touched writing byte.
-	struct item_head *ih;	// item header of found block;
-	int res;		// Return value of various functions we call.
-	int cont_expand_offset;	// We will put offset for generic_cont_expand here
-	// This can be int just because tails are created
-	// only for small files.
-
-/* this embodies a dependency on a particular tail policy */
-	if (inode->i_size >= inode->i_sb->s_blocksize * 4) {
-		/* such a big files do not have tails, so we won't bother ourselves
-		   to look for tails, simply return */
-		return 0;
-	}
-
-	reiserfs_write_lock(inode->i_sb);
-	/* find the item containing the last byte to be written, or if
-	 * writing past the end of the file then the last item of the
-	 * file (and then we check its type). */
-	make_cpu_key(&key, inode, pos + write_bytes + 1, TYPE_ANY,
-		     3 /*key length */ );
-	res = search_for_position_by_key(inode->i_sb, &key, &path);
-	if (res == IO_ERROR) {
-		reiserfs_write_unlock(inode->i_sb);
-		return -EIO;
-	}
-	ih = get_ih(&path);
-	res = 0;
-	if (is_direct_le_ih(ih)) {
-		/* Ok, closest item is file tail (tails are stored in "direct"
-		 * items), so we need to unpack it. */
-		/* To not overcomplicate matters, we just call generic_cont_expand
-		   which will in turn call other stuff and finally will boil down to
-		   reiserfs_get_block() that would do necessary conversion. */
-		cont_expand_offset =
-		    le_key_k_offset(get_inode_item_key_version(inode),
-				    &(ih->ih_key));
-		pathrelse(&path);
-		res = generic_cont_expand(inode, cont_expand_offset);
-	} else
-		pathrelse(&path);
-
-	reiserfs_write_unlock(inode->i_sb);
-	return res;
-}
-
-/* This function locks pages starting from @pos for @inode.
-   @num_pages pages are locked and stored in
-   @prepared_pages array. Also buffers are allocated for these pages.
-   First and last page of the region is read if it is overwritten only
-   partially. If last page did not exist before write (file hole or file
-   append), it is zeroed, then. 
-   Returns number of unallocated blocks that should be allocated to cover
-   new file data.*/
-static int reiserfs_prepare_file_region_for_write(struct inode *inode
-						  /* Inode of the file */ ,
-						  loff_t pos,	/* position in the file */
-						  size_t num_pages,	/* number of pages to
-									   prepare */
-						  size_t write_bytes,	/* Amount of bytes to be
-									   overwritten from
-									   @pos */
-						  struct page **prepared_pages	/* pointer to array
-										   where to store
-										   prepared pages */
-    )
-{
-	int res = 0;		// Return values of different functions we call.
-	unsigned long index = pos >> PAGE_CACHE_SHIFT;	// Offset in file in pages.
-	int from = (pos & (PAGE_CACHE_SIZE - 1));	// Writing offset in first page
-	int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;
-	/* offset of last modified byte in last
-	   page */
-	struct address_space *mapping = inode->i_mapping;	// Pages are mapped here.
-	int i;			// Simple counter
-	int blocks = 0;		/* Return value (blocks that should be allocated) */
-	struct buffer_head *bh, *head;	// Current bufferhead and first bufferhead
-	// of a page.
-	unsigned block_start, block_end;	// Starting and ending offsets of current
-	// buffer in the page.
-	struct buffer_head *wait[2], **wait_bh = wait;	// Buffers for page, if
-	// Page appeared to be not up
-	// to date. Note how we have
-	// at most 2 buffers, this is
-	// because we at most may
-	// partially overwrite two
-	// buffers for one page. One at                                                 // the beginning of write area
-	// and one at the end.
-	// Everything inthe middle gets                                                 // overwritten totally.
-
-	struct cpu_key key;	// cpu key of item that we are going to deal with
-	struct item_head *ih = NULL;	// pointer to item head that we are going to deal with
-	struct buffer_head *itembuf = NULL;	// Buffer head that contains items that we are going to deal with
-	INITIALIZE_PATH(path);	// path to item, that we are going to deal with.
-	__le32 *item = NULL;	// pointer to item we are going to deal with
-	int item_pos = -1;	/* Position in indirect item */
-
-	if (num_pages < 1) {
-		reiserfs_warning(inode->i_sb,
-				 "green-9001: reiserfs_prepare_file_region_for_write "
-				 "called with zero number of pages to process");
-		return -EFAULT;
-	}
-
-	/* We have 2 loops for pages. In first loop we grab and lock the pages, so
-	   that nobody would touch these until we release the pages. Then
-	   we'd start to deal with mapping buffers to blocks. */
-	for (i = 0; i < num_pages; i++) {
-		prepared_pages[i] = grab_cache_page(mapping, index + i);	// locks the page
-		if (!prepared_pages[i]) {
-			res = -ENOMEM;
-			goto failed_page_grabbing;
-		}
-		if (!page_has_buffers(prepared_pages[i]))
-			create_empty_buffers(prepared_pages[i],
-					     inode->i_sb->s_blocksize, 0);
-	}
-
-	/* Let's count amount of blocks for a case where all the blocks
-	   overwritten are new (we will substract already allocated blocks later) */
-	if (num_pages > 2)
-		/* These are full-overwritten pages so we count all the blocks in
-		   these pages are counted as needed to be allocated */
-		blocks =
-		    (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
-	/* count blocks needed for first page (possibly partially written) */
-	blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) + !!(from & (inode->i_sb->s_blocksize - 1));	/* roundup */
-
-	/* Now we account for last page. If last page == first page (we
-	   overwrite only one page), we substract all the blocks past the
-	   last writing position in a page out of already calculated number
-	   of blocks */
-	blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT - inode->i_blkbits)) -
-	    ((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
-	/* Note how we do not roundup here since partial blocks still
-	   should be allocated */
-
-	/* Now if all the write area lies past the file end, no point in
-	   maping blocks, since there is none, so we just zero out remaining
-	   parts of first and last pages in write area (if needed) */
-	if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) {
-		if (from != 0)		/* First page needs to be partially zeroed */
-			zero_user_page(prepared_pages[0], 0, from, KM_USER0);
-
-		if (to != PAGE_CACHE_SIZE)	/* Last page needs to be partially zeroed */
-			zero_user_page(prepared_pages[num_pages-1], to,
-					PAGE_CACHE_SIZE - to, KM_USER0);
-
-		/* Since all blocks are new - use already calculated value */
-		return blocks;
-	}
-
-	/* Well, since we write somewhere into the middle of a file, there is
-	   possibility we are writing over some already allocated blocks, so
-	   let's map these blocks and substract number of such blocks out of blocks
-	   we need to allocate (calculated above) */
-	/* Mask write position to start on blocksize, we do it out of the
-	   loop for performance reasons */
-	pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
-	/* Set cpu key to the starting position in a file (on left block boundary) */
-	make_cpu_key(&key, inode,
-		     1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)),
-		     TYPE_ANY, 3 /*key length */ );
-
-	reiserfs_write_lock(inode->i_sb);	// We need that for at least search_by_key()
-	for (i = 0; i < num_pages; i++) {
-
-		head = page_buffers(prepared_pages[i]);
-		/* For each buffer in the page */
-		for (bh = head, block_start = 0; bh != head || !block_start;
-		     block_start = block_end, bh = bh->b_this_page) {
-			if (!bh)
-				reiserfs_panic(inode->i_sb,
-					       "green-9002: Allocated but absent buffer for a page?");
-			/* Find where this buffer ends */
-			block_end = block_start + inode->i_sb->s_blocksize;
-			if (i == 0 && block_end <= from)
-				/* if this buffer is before requested data to map, skip it */
-				continue;
-
-			if (i == num_pages - 1 && block_start >= to) {
-				/* If this buffer is after requested data to map, abort
-				   processing of current page */
-				break;
-			}
-
-			if (buffer_mapped(bh) && bh->b_blocknr != 0) {
-				/* This is optimisation for a case where buffer is mapped
-				   and have blocknumber assigned. In case significant amount
-				   of such buffers are present, we may avoid some amount
-				   of search_by_key calls.
-				   Probably it would be possible to move parts of this code
-				   out of BKL, but I afraid that would overcomplicate code
-				   without any noticeable benefit.
-				 */
-				item_pos++;
-				/* Update the key */
-				set_cpu_key_k_offset(&key,
-						     cpu_key_k_offset(&key) +
-						     inode->i_sb->s_blocksize);
-				blocks--;	// Decrease the amount of blocks that need to be
-				// allocated
-				continue;	// Go to the next buffer
-			}
-
-			if (!itembuf ||	/* if first iteration */
-			    item_pos >= ih_item_len(ih) / UNFM_P_SIZE) {	/* or if we progressed past the
-										   current unformatted_item */
-				/* Try to find next item */
-				res =
-				    search_for_position_by_key(inode->i_sb,
-							       &key, &path);
-				/* Abort if no more items */
-				if (res != POSITION_FOUND) {
-					/* make sure later loops don't use this item */
-					itembuf = NULL;
-					item = NULL;
-					break;
-				}
-
-				/* Update information about current indirect item */
-				itembuf = get_last_bh(&path);
-				ih = get_ih(&path);
-				item = get_item(&path);
-				item_pos = path.pos_in_item;
-
-				RFALSE(!is_indirect_le_ih(ih),
-				       "green-9003: indirect item expected");
-			}
-
-			/* See if there is some block associated with the file
-			   at that position, map the buffer to this block */
-			if (get_block_num(item, item_pos)) {
-				map_bh(bh, inode->i_sb,
-				       get_block_num(item, item_pos));
-				blocks--;	// Decrease the amount of blocks that need to be
-				// allocated
-			}
-			item_pos++;
-			/* Update the key */
-			set_cpu_key_k_offset(&key,
-					     cpu_key_k_offset(&key) +
-					     inode->i_sb->s_blocksize);
-		}
-	}
-	pathrelse(&path);	// Free the path
-	reiserfs_write_unlock(inode->i_sb);
-
-	/* Now zero out unmappend buffers for the first and last pages of
-	   write area or issue read requests if page is mapped. */
-	/* First page, see if it is not uptodate */
-	if (!PageUptodate(prepared_pages[0])) {
-		head = page_buffers(prepared_pages[0]);
-
-		/* For each buffer in page */
-		for (bh = head, block_start = 0; bh != head || !block_start;
-		     block_start = block_end, bh = bh->b_this_page) {
-
-			if (!bh)
-				reiserfs_panic(inode->i_sb,
-					       "green-9002: Allocated but absent buffer for a page?");
-			/* Find where this buffer ends */
-			block_end = block_start + inode->i_sb->s_blocksize;
-			if (block_end <= from)
-				/* if this buffer is before requested data to map, skip it */
-				continue;
-			if (block_start < from) {	/* Aha, our partial buffer */
-				if (buffer_mapped(bh)) {	/* If it is mapped, we need to
-								   issue READ request for it to
-								   not loose data */
-					ll_rw_block(READ, 1, &bh);
-					*wait_bh++ = bh;
-				} else {	/* Not mapped, zero it */
-					zero_user_page(prepared_pages[0],
-						       block_start,
-						       from - block_start, KM_USER0);
-					set_buffer_uptodate(bh);
-				}
-			}
-		}
-	}
-
-	/* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
-	if (!PageUptodate(prepared_pages[num_pages - 1]) ||
-	    ((pos + write_bytes) >> PAGE_CACHE_SHIFT) >
-	    (inode->i_size >> PAGE_CACHE_SHIFT)) {
-		head = page_buffers(prepared_pages[num_pages - 1]);
-
-		/* for each buffer in page */
-		for (bh = head, block_start = 0; bh != head || !block_start;
-		     block_start = block_end, bh = bh->b_this_page) {
-
-			if (!bh)
-				reiserfs_panic(inode->i_sb,
-					       "green-9002: Allocated but absent buffer for a page?");
-			/* Find where this buffer ends */
-			block_end = block_start + inode->i_sb->s_blocksize;
-			if (block_start >= to)
-				/* if this buffer is after requested data to map, skip it */
-				break;
-			if (block_end > to) {	/* Aha, our partial buffer */
-				if (buffer_mapped(bh)) {	/* If it is mapped, we need to
-								   issue READ request for it to
-								   not loose data */
-					ll_rw_block(READ, 1, &bh);
-					*wait_bh++ = bh;
-				} else {	/* Not mapped, zero it */
-					zero_user_page(prepared_pages[num_pages-1],
-							to, block_end - to, KM_USER0);
-					set_buffer_uptodate(bh);
-				}
-			}
-		}
-	}
-
-	/* Wait for read requests we made to happen, if necessary */
-	while (wait_bh > wait) {
-		wait_on_buffer(*--wait_bh);
-		if (!buffer_uptodate(*wait_bh)) {
-			res = -EIO;
-			goto failed_read;
-		}
-	}
-
-	return blocks;
-      failed_page_grabbing:
-	num_pages = i;
-      failed_read:
-	reiserfs_unprepare_pages(prepared_pages, num_pages);
-	return res;
-}
-
 /* Write @count bytes at position @ppos in a file indicated by @file
    from the buffer @buf.  
 
@@ -1284,14 +256,9 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 							 * new current position before returning. */
 				   )
 {
-	size_t already_written = 0;	// Number of bytes already written to the file.
-	loff_t pos;		// Current position in the file.
-	ssize_t res;		// return value of various functions that we call.
-	int err = 0;
 	struct inode *inode = file->f_path.dentry->d_inode;	// Inode of the file that we are writing to.
 	/* To simplify coding at this time, we store
 	   locked pages in array for now */
-	struct page *prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
 	struct reiserfs_transaction_handle th;
 	th.t_trans_id = 0;
 
@@ -1311,212 +278,7 @@ static ssize_t reiserfs_file_write(struct file *file,	/* the file we are going t
 			count = MAX_NON_LFS - (unsigned long)*ppos;
 	}
 
-	if (file->f_flags & O_DIRECT)
-		return do_sync_write(file, buf, count, ppos);
-
-	if (unlikely((ssize_t) count < 0))
-		return -EINVAL;
-
-	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
-		return -EFAULT;
-
-	mutex_lock(&inode->i_mutex);	// locks the entire file for just us
-
-	pos = *ppos;
-
-	/* Check if we can write to specified region of file, file
-	   is not overly big and this kind of stuff. Adjust pos and
-	   count, if needed */
-	res = generic_write_checks(file, &pos, &count, 0);
-	if (res)
-		goto out;
-
-	if (count == 0)
-		goto out;
-
-	res = remove_suid(file->f_path.dentry);
-	if (res)
-		goto out;
-
-	file_update_time(file);
-
-	// Ok, we are done with all the checks.
-
-	// Now we should start real work
-
-	/* If we are going to write past the file's packed tail or if we are going
-	   to overwrite part of the tail, we need that tail to be converted into
-	   unformatted node */
-	res = reiserfs_check_for_tail_and_convert(inode, pos, count);
-	if (res)
-		goto out;
-
-	while (count > 0) {
-		/* This is the main loop in which we running until some error occures
-		   or until we write all of the data. */
-		size_t num_pages;	/* amount of pages we are going to write this iteration */
-		size_t write_bytes;	/* amount of bytes to write during this iteration */
-		size_t blocks_to_allocate;	/* how much blocks we need to allocate for this iteration */
-
-		/*  (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos */
-		num_pages = !!((pos + count) & (PAGE_CACHE_SIZE - 1)) +	/* round up partial
-									   pages */
-		    ((count +
-		      (pos & (PAGE_CACHE_SIZE - 1))) >> PAGE_CACHE_SHIFT);
-		/* convert size to amount of
-		   pages */
-		reiserfs_write_lock(inode->i_sb);
-		if (num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
-		    || num_pages > reiserfs_can_fit_pages(inode->i_sb)) {
-			/* If we were asked to write more data than we want to or if there
-			   is not that much space, then we shorten amount of data to write
-			   for this iteration. */
-			num_pages =
-			    min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME,
-				  reiserfs_can_fit_pages(inode->i_sb));
-			/* Also we should not forget to set size in bytes accordingly */
-			write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
-			    (pos & (PAGE_CACHE_SIZE - 1));
-			/* If position is not on the
-			   start of the page, we need
-			   to substract the offset
-			   within page */
-		} else
-			write_bytes = count;
-
-		/* reserve the blocks to be allocated later, so that later on
-		   we still have the space to write the blocks to */
-		reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
-						      num_pages <<
-						      (PAGE_CACHE_SHIFT -
-						       inode->i_blkbits));
-		reiserfs_write_unlock(inode->i_sb);
-
-		if (!num_pages) {	/* If we do not have enough space even for a single page... */
-			if (pos >
-			    inode->i_size + inode->i_sb->s_blocksize -
-			    (pos & (inode->i_sb->s_blocksize - 1))) {
-				res = -ENOSPC;
-				break;	// In case we are writing past the end of the last file block, break.
-			}
-			// Otherwise we are possibly overwriting the file, so
-			// let's set write size to be equal or less than blocksize.
-			// This way we get it correctly for file holes.
-			// But overwriting files on absolutelly full volumes would not
-			// be very efficient. Well, people are not supposed to fill
-			// 100% of disk space anyway.
-			write_bytes =
-			    min_t(size_t, count,
-				  inode->i_sb->s_blocksize -
-				  (pos & (inode->i_sb->s_blocksize - 1)));
-			num_pages = 1;
-			// No blocks were claimed before, so do it now.
-			reiserfs_claim_blocks_to_be_allocated(inode->i_sb,
-							      1 <<
-							      (PAGE_CACHE_SHIFT
-							       -
-							       inode->
-							       i_blkbits));
-		}
-
-		/* Prepare for writing into the region, read in all the
-		   partially overwritten pages, if needed. And lock the pages,
-		   so that nobody else can access these until we are done.
-		   We get number of actual blocks needed as a result. */
-		res = reiserfs_prepare_file_region_for_write(inode, pos,
-							     num_pages,
-							     write_bytes,
-							     prepared_pages);
-		if (res < 0) {
-			reiserfs_release_claimed_blocks(inode->i_sb,
-							num_pages <<
-							(PAGE_CACHE_SHIFT -
-							 inode->i_blkbits));
-			break;
-		}
-
-		blocks_to_allocate = res;
-
-		/* First we correct our estimate of how many blocks we need */
-		reiserfs_release_claimed_blocks(inode->i_sb,
-						(num_pages <<
-						 (PAGE_CACHE_SHIFT -
-						  inode->i_sb->
-						  s_blocksize_bits)) -
-						blocks_to_allocate);
-
-		if (blocks_to_allocate > 0) {	/*We only allocate blocks if we need to */
-			/* Fill in all the possible holes and append the file if needed */
-			res =
-			    reiserfs_allocate_blocks_for_region(&th, inode, pos,
-								num_pages,
-								write_bytes,
-								prepared_pages,
-								blocks_to_allocate);
-		}
-
-		/* well, we have allocated the blocks, so it is time to free
-		   the reservation we made earlier. */
-		reiserfs_release_claimed_blocks(inode->i_sb,
-						blocks_to_allocate);
-		if (res) {
-			reiserfs_unprepare_pages(prepared_pages, num_pages);
-			break;
-		}
-
-/* NOTE that allocating blocks and filling blocks can be done in reverse order
-   and probably we would do that just to get rid of garbage in files after a
-   crash */
-
-		/* Copy data from user-supplied buffer to file's pages */
-		res =
-		    reiserfs_copy_from_user_to_file_region(pos, num_pages,
-							   write_bytes,
-							   prepared_pages, buf);
-		if (res) {
-			reiserfs_unprepare_pages(prepared_pages, num_pages);
-			break;
-		}
-
-		/* Send the pages to disk and unlock them. */
-		res =
-		    reiserfs_submit_file_region_for_write(&th, inode, pos,
-							  num_pages,
-							  write_bytes,
-							  prepared_pages);
-		if (res)
-			break;
-
-		already_written += write_bytes;
-		buf += write_bytes;
-		*ppos = pos += write_bytes;
-		count -= write_bytes;
-		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
-	}
-
-	/* this is only true on error */
-	if (th.t_trans_id) {
-		reiserfs_write_lock(inode->i_sb);
-		err = journal_end(&th, th.t_super, th.t_blocks_allocated);
-		reiserfs_write_unlock(inode->i_sb);
-		if (err) {
-			res = err;
-			goto out;
-		}
-	}
-
-	if (likely(res >= 0) &&
-	    (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))))
-		res = generic_osync_inode(inode, file->f_mapping,
-		                          OSYNC_METADATA | OSYNC_DATA);
-
-	mutex_unlock(&inode->i_mutex);
-	reiserfs_async_progress_wait(inode->i_sb);
-	return (already_written != 0) ? already_written : res;
-
-      out:
-	mutex_unlock(&inode->i_mutex);	// unlock the file on exit.
-	return res;
+	return do_sync_write(file, buf, count, ppos);
 }
 
 const struct file_operations reiserfs_file_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ddde489f1cb..95051d44a91 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -17,11 +17,12 @@
 #include <linux/mpage.h>
 #include <linux/writeback.h>
 #include <linux/quotaops.h>
+#include <linux/swap.h>
 
-static int reiserfs_commit_write(struct file *f, struct page *page,
-				 unsigned from, unsigned to);
-static int reiserfs_prepare_write(struct file *f, struct page *page,
-				  unsigned from, unsigned to);
+int reiserfs_commit_write(struct file *f, struct page *page,
+			  unsigned from, unsigned to);
+int reiserfs_prepare_write(struct file *f, struct page *page,
+			   unsigned from, unsigned to);
 
 void reiserfs_delete_inode(struct inode *inode)
 {
@@ -2550,8 +2551,78 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
 	return reiserfs_write_full_page(page, wbc);
 }
 
-static int reiserfs_prepare_write(struct file *f, struct page *page,
-				  unsigned from, unsigned to)
+static int reiserfs_write_begin(struct file *file,
+				struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata)
+{
+	struct inode *inode;
+	struct page *page;
+	pgoff_t index;
+	int ret;
+	int old_ref = 0;
+
+ 	inode = mapping->host;
+	*fsdata = 0;
+ 	if (flags & AOP_FLAG_CONT_EXPAND &&
+ 	    (pos & (inode->i_sb->s_blocksize - 1)) == 0) {
+ 		pos ++;
+		*fsdata = (void *)(unsigned long)flags;
+	}
+
+	index = pos >> PAGE_CACHE_SHIFT;
+	page = __grab_cache_page(mapping, index);
+	if (!page)
+		return -ENOMEM;
+	*pagep = page;
+
+	reiserfs_wait_on_write_block(inode->i_sb);
+	fix_tail_page_for_writing(page);
+	if (reiserfs_transaction_running(inode->i_sb)) {
+		struct reiserfs_transaction_handle *th;
+		th = (struct reiserfs_transaction_handle *)current->
+		    journal_info;
+		BUG_ON(!th->t_refcount);
+		BUG_ON(!th->t_trans_id);
+		old_ref = th->t_refcount;
+		th->t_refcount++;
+	}
+	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				reiserfs_get_block);
+	if (ret && reiserfs_transaction_running(inode->i_sb)) {
+		struct reiserfs_transaction_handle *th = current->journal_info;
+		/* this gets a little ugly.  If reiserfs_get_block returned an
+		 * error and left a transacstion running, we've got to close it,
+		 * and we've got to free handle if it was a persistent transaction.
+		 *
+		 * But, if we had nested into an existing transaction, we need
+		 * to just drop the ref count on the handle.
+		 *
+		 * If old_ref == 0, the transaction is from reiserfs_get_block,
+		 * and it was a persistent trans.  Otherwise, it was nested above.
+		 */
+		if (th->t_refcount > old_ref) {
+			if (old_ref)
+				th->t_refcount--;
+			else {
+				int err;
+				reiserfs_write_lock(inode->i_sb);
+				err = reiserfs_end_persistent_transaction(th);
+				reiserfs_write_unlock(inode->i_sb);
+				if (err)
+					ret = err;
+			}
+		}
+	}
+	if (ret) {
+		unlock_page(page);
+		page_cache_release(page);
+	}
+	return ret;
+}
+
+int reiserfs_prepare_write(struct file *f, struct page *page,
+			   unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
 	int ret;
@@ -2604,8 +2675,102 @@ static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block)
 	return generic_block_bmap(as, block, reiserfs_bmap);
 }
 
-static int reiserfs_commit_write(struct file *f, struct page *page,
-				 unsigned from, unsigned to)
+static int reiserfs_write_end(struct file *file, struct address_space *mapping,
+			      loff_t pos, unsigned len, unsigned copied,
+			      struct page *page, void *fsdata)
+{
+	struct inode *inode = page->mapping->host;
+	int ret = 0;
+	int update_sd = 0;
+	struct reiserfs_transaction_handle *th;
+	unsigned start;
+
+	if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND)
+		pos ++;
+
+	reiserfs_wait_on_write_block(inode->i_sb);
+	if (reiserfs_transaction_running(inode->i_sb))
+		th = current->journal_info;
+	else
+		th = NULL;
+
+	start = pos & (PAGE_CACHE_SIZE - 1);
+	if (unlikely(copied < len)) {
+		if (!PageUptodate(page))
+			copied = 0;
+
+		page_zero_new_buffers(page, start + copied, start + len);
+	}
+	flush_dcache_page(page);
+
+	reiserfs_commit_page(inode, page, start, start + copied);
+
+	/* generic_commit_write does this for us, but does not update the
+	 ** transaction tracking stuff when the size changes.  So, we have
+	 ** to do the i_size updates here.
+	 */
+	pos += copied;
+	if (pos > inode->i_size) {
+		struct reiserfs_transaction_handle myth;
+		reiserfs_write_lock(inode->i_sb);
+		/* If the file have grown beyond the border where it
+		   can have a tail, unmark it as needing a tail
+		   packing */
+		if ((have_large_tails(inode->i_sb)
+		     && inode->i_size > i_block_size(inode) * 4)
+		    || (have_small_tails(inode->i_sb)
+			&& inode->i_size > i_block_size(inode)))
+			REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
+
+		ret = journal_begin(&myth, inode->i_sb, 1);
+		if (ret) {
+			reiserfs_write_unlock(inode->i_sb);
+			goto journal_error;
+		}
+		reiserfs_update_inode_transaction(inode);
+		inode->i_size = pos;
+		/*
+		 * this will just nest into our transaction.  It's important
+		 * to use mark_inode_dirty so the inode gets pushed around on the
+		 * dirty lists, and so that O_SYNC works as expected
+		 */
+		mark_inode_dirty(inode);
+		reiserfs_update_sd(&myth, inode);
+		update_sd = 1;
+		ret = journal_end(&myth, inode->i_sb, 1);
+		reiserfs_write_unlock(inode->i_sb);
+		if (ret)
+			goto journal_error;
+	}
+	if (th) {
+		reiserfs_write_lock(inode->i_sb);
+		if (!update_sd)
+			mark_inode_dirty(inode);
+		ret = reiserfs_end_persistent_transaction(th);
+		reiserfs_write_unlock(inode->i_sb);
+		if (ret)
+			goto out;
+	}
+
+      out:
+	unlock_page(page);
+	page_cache_release(page);
+	return ret == 0 ? copied : ret;
+
+      journal_error:
+	if (th) {
+		reiserfs_write_lock(inode->i_sb);
+		if (!update_sd)
+			reiserfs_update_sd(th, inode);
+		ret = reiserfs_end_persistent_transaction(th);
+		reiserfs_write_unlock(inode->i_sb);
+	}
+
+	goto out;
+}
+
+int reiserfs_commit_write(struct file *f, struct page *page,
+			  unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
 	loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
@@ -2909,7 +3074,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 		}
 		/* fill in hole pointers in the expanding truncate case. */
 		if (attr->ia_size > inode->i_size) {
-			error = generic_cont_expand(inode, attr->ia_size);
+			error = generic_cont_expand_simple(inode, attr->ia_size);
 			if (REISERFS_I(inode)->i_prealloc_count > 0) {
 				int err;
 				struct reiserfs_transaction_handle th;
@@ -2999,8 +3164,8 @@ const struct address_space_operations reiserfs_address_space_operations = {
 	.releasepage = reiserfs_releasepage,
 	.invalidatepage = reiserfs_invalidatepage,
 	.sync_page = block_sync_page,
-	.prepare_write = reiserfs_prepare_write,
-	.commit_write = reiserfs_commit_write,
+	.write_begin = reiserfs_write_begin,
+	.write_end = reiserfs_write_end,
 	.bmap = reiserfs_aop_bmap,
 	.direct_IO = reiserfs_direct_IO,
 	.set_page_dirty = reiserfs_set_page_dirty,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 11a0fcc2d40..c438a8f83f2 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -128,6 +128,10 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
 }
 #endif
 
+int reiserfs_commit_write(struct file *f, struct page *page,
+			  unsigned from, unsigned to);
+int reiserfs_prepare_write(struct file *f, struct page *page,
+			   unsigned from, unsigned to);
 /*
 ** reiserfs_unpack
 ** Function try to convert tail from direct item into indirect.
@@ -175,15 +179,13 @@ static int reiserfs_unpack(struct inode *inode, struct file *filp)
 	if (!page) {
 		goto out;
 	}
-	retval =
-	    mapping->a_ops->prepare_write(NULL, page, write_from, write_from);
+	retval = reiserfs_prepare_write(NULL, page, write_from, write_from);
 	if (retval)
 		goto out_unlock;
 
 	/* conversion can change page contents, must flush */
 	flush_dcache_page(page);
-	retval =
-	    mapping->a_ops->commit_write(NULL, page, write_from, write_from);
+	retval = reiserfs_commit_write(NULL, page, write_from, write_from);
 	REISERFS_I(inode)->i_flags |= i_nopack_mask;
 
       out_unlock:
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index bf6e5821453..fab4b9b2664 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -426,6 +426,12 @@ static inline __u32 xattr_hash(const char *msg, int len)
 	return csum_partial(msg, len, 0);
 }
 
+int reiserfs_commit_write(struct file *f, struct page *page,
+			  unsigned from, unsigned to);
+int reiserfs_prepare_write(struct file *f, struct page *page,
+			   unsigned from, unsigned to);
+
+
 /* Generic extended attribute operations that can be used by xa plugins */
 
 /*
@@ -512,15 +518,15 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer,
 			rxh->h_hash = cpu_to_le32(xahash);
 		}
 
-		err = mapping->a_ops->prepare_write(fp, page, page_offset,
-						    page_offset + chunk + skip);
+		err = reiserfs_prepare_write(fp, page, page_offset,
+					    page_offset + chunk + skip);
 		if (!err) {
 			if (buffer)
 				memcpy(data + skip, buffer + buffer_pos, chunk);
 			err =
-			    mapping->a_ops->commit_write(fp, page, page_offset,
-							 page_offset + chunk +
-							 skip);
+			    reiserfs_commit_write(fp, page, page_offset,
+						  page_offset + chunk +
+						  skip);
 		}
 		unlock_page(page);
 		reiserfs_put_page(page);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index c5d78a7e492..f5d14cebc75 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -292,29 +292,45 @@ out:
  * If the writer ends up delaying the write, the writer needs to
  * increment the page use counts until he is done with the page.
  */
-static int smb_prepare_write(struct file *file, struct page *page, 
-			     unsigned offset, unsigned to)
+static int smb_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	*pagep = __grab_cache_page(mapping, index);
+	if (!*pagep)
+		return -ENOMEM;
 	return 0;
 }
 
-static int smb_commit_write(struct file *file, struct page *page,
-			    unsigned offset, unsigned to)
+static int smb_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
 	int status;
+	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
 
-	status = -EFAULT;
 	lock_kernel();
-	status = smb_updatepage(file, page, offset, to-offset);
+	status = smb_updatepage(file, page, offset, copied);
 	unlock_kernel();
+
+	if (!status) {
+		if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+			SetPageUptodate(page);
+		status = copied;
+	}
+
+	unlock_page(page);
+	page_cache_release(page);
+
 	return status;
 }
 
 const struct address_space_operations smb_file_aops = {
 	.readpage = smb_readpage,
 	.writepage = smb_writepage,
-	.prepare_write = smb_prepare_write,
-	.commit_write = smb_commit_write
+	.write_begin = smb_write_begin,
+	.write_end = smb_write_end,
 };
 
 /* 
diff --git a/fs/splice.c b/fs/splice.c
index e95a3622886..59a941d404d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -447,7 +447,7 @@ fill_it:
 	 */
 	while (page_nr < nr_pages)
 		page_cache_release(pages[page_nr++]);
-	in->f_ra.prev_index = index;
+	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 
 	if (spd.nr_pages)
 		return splice_to_pipe(pipe, &spd);
@@ -563,7 +563,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 	struct address_space *mapping = file->f_mapping;
 	unsigned int offset, this_len;
 	struct page *page;
-	pgoff_t index;
+	void *fsdata;
 	int ret;
 
 	/*
@@ -573,49 +573,16 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 	if (unlikely(ret))
 		return ret;
 
-	index = sd->pos >> PAGE_CACHE_SHIFT;
 	offset = sd->pos & ~PAGE_CACHE_MASK;
 
 	this_len = sd->len;
 	if (this_len + offset > PAGE_CACHE_SIZE)
 		this_len = PAGE_CACHE_SIZE - offset;
 
-find_page:
-	page = find_lock_page(mapping, index);
-	if (!page) {
-		ret = -ENOMEM;
-		page = page_cache_alloc_cold(mapping);
-		if (unlikely(!page))
-			goto out_ret;
-
-		/*
-		 * This will also lock the page
-		 */
-		ret = add_to_page_cache_lru(page, mapping, index,
-					    GFP_KERNEL);
-		if (unlikely(ret))
-			goto out_release;
-	}
-
-	ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len);
-	if (unlikely(ret)) {
-		loff_t isize = i_size_read(mapping->host);
-
-		if (ret != AOP_TRUNCATED_PAGE)
-			unlock_page(page);
-		page_cache_release(page);
-		if (ret == AOP_TRUNCATED_PAGE)
-			goto find_page;
-
-		/*
-		 * prepare_write() may have instantiated a few blocks
-		 * outside i_size.  Trim these off again.
-		 */
-		if (sd->pos + this_len > isize)
-			vmtruncate(mapping->host, isize);
-
-		goto out_ret;
-	}
+	ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+	if (unlikely(ret))
+		goto out;
 
 	if (buf->page != page) {
 		/*
@@ -629,31 +596,9 @@ find_page:
 		kunmap_atomic(dst, KM_USER1);
 		buf->ops->unmap(pipe, buf, src);
 	}
-
-	ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len);
-	if (ret) {
-		if (ret == AOP_TRUNCATED_PAGE) {
-			page_cache_release(page);
-			goto find_page;
-		}
-		if (ret < 0)
-			goto out;
-		/*
-		 * Partial write has happened, so 'ret' already initialized by
-		 * number of bytes written, Where is nothing we have to do here.
-		 */
-	} else
-		ret = this_len;
-	/*
-	 * Return the number of bytes written and mark page as
-	 * accessed, we are now done!
-	 */
-	mark_page_accessed(page);
+	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
+				page, fsdata);
 out:
-	unlock_page(page);
-out_release:
-	page_cache_release(page);
-out_ret:
 	return ret;
 }
 
@@ -1390,10 +1335,10 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 	if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
 		ret = -EFAULT;
 
+	buf->ops->unmap(pipe, buf, src);
 out:
 	if (ret > 0)
 		sd->u.userptr += ret;
-	buf->ops->unmap(pipe, buf, src);
 	return ret;
 }
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 9236635111f..c4ef945d39c 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -24,8 +24,8 @@ extern struct super_block * sysfs_sb;
 
 static const struct address_space_operations sysfs_aops = {
 	.readpage	= simple_readpage,
-	.prepare_write	= simple_prepare_write,
-	.commit_write	= simple_commit_write
+	.write_begin	= simple_write_begin,
+	.write_end	= simple_write_end,
 };
 
 static struct backing_dev_info sysfs_backing_dev_info = {
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index e566b387fcf..56f655254bf 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/smp_lock.h>
+#include <linux/swap.h>
 #include "sysv.h"
 
 static int sysv_readdir(struct file *, void *, filldir_t);
@@ -37,12 +38,17 @@ static inline unsigned long dir_pages(struct inode *inode)
 	return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT;
 }
 
-static int dir_commit_chunk(struct page *page, unsigned from, unsigned to)
+static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	struct inode *dir = (struct inode *)page->mapping->host;
+	struct address_space *mapping = page->mapping;
+	struct inode *dir = mapping->host;
 	int err = 0;
 
-	page->mapping->a_ops->commit_write(NULL, page, from, to);
+	block_write_end(NULL, mapping, pos, len, len, page, NULL);
+	if (pos+len > dir->i_size) {
+		i_size_write(dir, pos+len);
+		mark_inode_dirty(dir);
+	}
 	if (IS_DIRSYNC(dir))
 		err = write_one_page(page, 1);
 	else
@@ -186,7 +192,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
 	unsigned long npages = dir_pages(dir);
 	unsigned long n;
 	char *kaddr;
-	unsigned from, to;
+	loff_t pos;
 	int err;
 
 	/* We take care of directory expansion in the same loop */
@@ -212,16 +218,17 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
 	return -EINVAL;
 
 got_it:
-	from = (char*)de - (char*)page_address(page);
-	to = from + SYSV_DIRSIZE;
+	pos = page_offset(page) +
+			(char*)de - (char*)page_address(page);
 	lock_page(page);
-	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+	err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err)
 		goto out_unlock;
 	memcpy (de->name, name, namelen);
 	memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2);
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
-	err = dir_commit_chunk(page, from, to);
+	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
 out_page:
@@ -238,15 +245,15 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = (struct inode*)mapping->host;
 	char *kaddr = (char*)page_address(page);
-	unsigned from = (char*)de - kaddr;
-	unsigned to = from + SYSV_DIRSIZE;
+	loff_t pos = page_offset(page) + (char *)de - kaddr;
 	int err;
 
 	lock_page(page);
-	err = mapping->a_ops->prepare_write(NULL, page, from, to);
+	err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	BUG_ON(err);
 	de->inode = 0;
-	err = dir_commit_chunk(page, from, to);
+	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
 	dir_put_page(page);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
@@ -263,12 +270,13 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
 
 	if (!page)
 		return -ENOMEM;
-	kmap(page);
-	err = mapping->a_ops->prepare_write(NULL, page, 0, 2 * SYSV_DIRSIZE);
+	err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err) {
 		unlock_page(page);
 		goto fail;
 	}
+	kmap(page);
 
 	base = (char*)page_address(page);
 	memset(base, 0, PAGE_CACHE_SIZE);
@@ -280,9 +288,9 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), dir->i_ino);
 	strcpy(de->name,"..");
 
+	kunmap(page);
 	err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE);
 fail:
-	kunmap(page);
 	page_cache_release(page);
 	return err;
 }
@@ -336,16 +344,18 @@ not_empty:
 void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
 	struct inode *inode)
 {
-	struct inode *dir = (struct inode*)page->mapping->host;
-	unsigned from = (char *)de-(char*)page_address(page);
-	unsigned to = from + SYSV_DIRSIZE;
+	struct address_space *mapping = page->mapping;
+	struct inode *dir = mapping->host;
+	loff_t pos = page_offset(page) +
+			(char *)de-(char*)page_address(page);
 	int err;
 
 	lock_page(page);
-	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+	err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	BUG_ON(err);
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
-	err = dir_commit_chunk(page, from, to);
+	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
 	dir_put_page(page);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index f2bcccd1d6f..f042eec464c 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -453,23 +453,38 @@ static int sysv_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page,get_block,wbc);
 }
+
 static int sysv_readpage(struct file *file, struct page *page)
 {
 	return block_read_full_page(page,get_block);
 }
-static int sysv_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+
+int __sysv_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return block_prepare_write(page,from,to,get_block);
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				get_block);
 }
+
+static int sysv_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
+{
+	*pagep = NULL;
+	return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+}
+
 static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,get_block);
 }
+
 const struct address_space_operations sysv_aops = {
 	.readpage = sysv_readpage,
 	.writepage = sysv_writepage,
 	.sync_page = block_sync_page,
-	.prepare_write = sysv_prepare_write,
-	.commit_write = generic_commit_write,
+	.write_begin = sysv_write_begin,
+	.write_end = generic_write_end,
 	.bmap = sysv_bmap
 };
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 5b4fedf17cc..64c03bdf06a 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -136,6 +136,9 @@ extern unsigned long sysv_count_free_blocks(struct super_block *);
 
 /* itree.c */
 extern void sysv_truncate(struct inode *);
+extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata);
 
 /* inode.c */
 extern int sysv_write_inode(struct inode *, int);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 5d7a4ea2775..7c7a1b39d56 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -76,36 +76,29 @@ static int udf_adinicb_writepage(struct page *page, struct writeback_control *wb
 	return 0;
 }
 
-static int udf_adinicb_prepare_write(struct file *file, struct page *page,
-				     unsigned offset, unsigned to)
+static int udf_adinicb_write_end(struct file *file,
+			struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
-	kmap(page);
-	return 0;
-}
-
-static int udf_adinicb_commit_write(struct file *file, struct page *page,
-				    unsigned offset, unsigned to)
-{
-	struct inode *inode = page->mapping->host;
-	char *kaddr = page_address(page);
+	struct inode *inode = mapping->host;
+	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+	char *kaddr;
 
+	kaddr = kmap_atomic(page, KM_USER0);
 	memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode) + offset,
-	       kaddr + offset, to - offset);
-	mark_inode_dirty(inode);
-	SetPageUptodate(page);
-	kunmap(page);
-	/* only one page here */
-	if (to > inode->i_size)
-		inode->i_size = to;
-	return 0;
+		kaddr + offset, copied);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	return simple_write_end(file, mapping, pos, len, copied, page, fsdata);
 }
 
 const struct address_space_operations udf_adinicb_aops = {
 	.readpage	= udf_adinicb_readpage,
 	.writepage	= udf_adinicb_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= udf_adinicb_prepare_write,
-	.commit_write	= udf_adinicb_commit_write,
+	.write_begin = simple_write_begin,
+	.write_end = udf_adinicb_write_end,
 };
 
 static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1652b2c665b..6ff8151984c 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -133,10 +133,13 @@ static int udf_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page, udf_get_block);
 }
 
-static int udf_prepare_write(struct file *file, struct page *page,
-			     unsigned from, unsigned to)
+static int udf_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return block_prepare_write(page, from, to, udf_get_block);
+	*pagep = NULL;
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				udf_get_block);
 }
 
 static sector_t udf_bmap(struct address_space *mapping, sector_t block)
@@ -148,8 +151,8 @@ const struct address_space_operations udf_aops = {
 	.readpage	= udf_readpage,
 	.writepage	= udf_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= udf_prepare_write,
-	.commit_write	= generic_commit_write,
+	.write_begin		= udf_write_begin,
+	.write_end		= generic_write_end,
 	.bmap		= udf_bmap,
 };
 
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 154452172f4..2410ec6002d 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -19,6 +19,7 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/ufs_fs.h>
+#include <linux/swap.h>
 
 #include "swab.h"
 #include "util.h"
@@ -38,12 +39,18 @@ static inline int ufs_match(struct super_block *sb, int len,
 	return !memcmp(name, de->d_name, len);
 }
 
-static int ufs_commit_chunk(struct page *page, unsigned from, unsigned to)
+static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	struct inode *dir = page->mapping->host;
+	struct address_space *mapping = page->mapping;
+	struct inode *dir = mapping->host;
 	int err = 0;
+
 	dir->i_version++;
-	page->mapping->a_ops->commit_write(NULL, page, from, to);
+	block_write_end(NULL, mapping, pos, len, len, page, NULL);
+	if (pos+len > dir->i_size) {
+		i_size_write(dir, pos+len);
+		mark_inode_dirty(dir);
+	}
 	if (IS_DIRSYNC(dir))
 		err = write_one_page(page, 1);
 	else
@@ -81,16 +88,20 @@ ino_t ufs_inode_by_name(struct inode *dir, struct dentry *dentry)
 void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
 		  struct page *page, struct inode *inode)
 {
-	unsigned from = (char *) de - (char *) page_address(page);
-	unsigned to = from + fs16_to_cpu(dir->i_sb, de->d_reclen);
+	loff_t pos = page_offset(page) +
+			(char *) de - (char *) page_address(page);
+	unsigned len = fs16_to_cpu(dir->i_sb, de->d_reclen);
 	int err;
 
 	lock_page(page);
-	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+	err = __ufs_write_begin(NULL, page->mapping, pos, len,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	BUG_ON(err);
+
 	de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino);
 	ufs_set_de_type(dir->i_sb, de, inode->i_mode);
-	err = ufs_commit_chunk(page, from, to);
+
+	err = ufs_commit_chunk(page, pos, len);
 	ufs_put_page(page);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(dir);
@@ -312,7 +323,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
 	unsigned long npages = ufs_dir_pages(dir);
 	unsigned long n;
 	char *kaddr;
-	unsigned from, to;
+	loff_t pos;
 	int err;
 
 	UFSD("ENTER, name %s, namelen %u\n", name, namelen);
@@ -367,9 +378,10 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
 	return -EINVAL;
 
 got_it:
-	from = (char*)de - (char*)page_address(page);
-	to = from + rec_len;
-	err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
+	pos = page_offset(page) +
+			(char*)de - (char*)page_address(page);
+	err = __ufs_write_begin(NULL, page->mapping, pos, rec_len,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err)
 		goto out_unlock;
 	if (de->d_ino) {
@@ -386,7 +398,7 @@ got_it:
 	de->d_ino = cpu_to_fs32(sb, inode->i_ino);
 	ufs_set_de_type(sb, de, inode->i_mode);
 
-	err = ufs_commit_chunk(page, from, to);
+	err = ufs_commit_chunk(page, pos, rec_len);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
 
 	mark_inode_dirty(dir);
@@ -509,6 +521,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
 	char *kaddr = page_address(page);
 	unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
 	unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
+	loff_t pos;
 	struct ufs_dir_entry *pde = NULL;
 	struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from);
 	int err;
@@ -532,13 +545,16 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
 	}
 	if (pde)
 		from = (char*)pde - (char*)page_address(page);
+
+	pos = page_offset(page) + from;
 	lock_page(page);
-	err = mapping->a_ops->prepare_write(NULL, page, from, to);
+	err = __ufs_write_begin(NULL, mapping, pos, to - from,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	BUG_ON(err);
 	if (pde)
-		pde->d_reclen = cpu_to_fs16(sb, to-from);
+		pde->d_reclen = cpu_to_fs16(sb, to - from);
 	dir->d_ino = 0;
-	err = ufs_commit_chunk(page, from, to);
+	err = ufs_commit_chunk(page, pos, to - from);
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
 out:
@@ -559,14 +575,15 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
 
 	if (!page)
 		return -ENOMEM;
-	kmap(page);
-	err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
+
+	err = __ufs_write_begin(NULL, mapping, 0, chunk_size,
+				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
 	if (err) {
 		unlock_page(page);
 		goto fail;
 	}
 
-
+	kmap(page);
 	base = (char*)page_address(page);
 	memset(base, 0, PAGE_CACHE_SIZE);
 
@@ -584,10 +601,10 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
 	de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1));
 	ufs_set_de_namlen(sb, de, 2);
 	strcpy (de->d_name, "..");
+	kunmap(page);
 
 	err = ufs_commit_chunk(page, 0, chunk_size);
 fail:
-	kunmap(page);
 	page_cache_release(page);
 	return err;
 }
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index f18b79122fa..d84d4b0f477 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -558,24 +558,39 @@ static int ufs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page,ufs_getfrag_block,wbc);
 }
+
 static int ufs_readpage(struct file *file, struct page *page)
 {
 	return block_read_full_page(page,ufs_getfrag_block);
 }
-static int ufs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+
+int __ufs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	return block_prepare_write(page,from,to,ufs_getfrag_block);
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				ufs_getfrag_block);
 }
+
+static int ufs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
+{
+	*pagep = NULL;
+	return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+}
+
 static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,ufs_getfrag_block);
 }
+
 const struct address_space_operations ufs_aops = {
 	.readpage = ufs_readpage,
 	.writepage = ufs_writepage,
 	.sync_page = block_sync_page,
-	.prepare_write = ufs_prepare_write,
-	.commit_write = generic_commit_write,
+	.write_begin = ufs_write_begin,
+	.write_end = generic_write_end,
 	.bmap = ufs_bmap
 };
 
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 06d344839c4..79a340a1909 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -231,6 +231,9 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value)
 
 extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *);
 extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t);
+extern int __ufs_write_begin(struct file *file, struct address_space *mapping,
+		loff_t pos, unsigned len, unsigned flags,
+		struct page **pagep, void **fsdata);
 
 /*
  * These functions manipulate ufs buffers
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 6f4c29e9c3d..354d68a32d4 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1508,13 +1508,18 @@ xfs_vm_direct_IO(
 }
 
 STATIC int
-xfs_vm_prepare_write(
+xfs_vm_write_begin(
 	struct file		*file,
-	struct page		*page,
-	unsigned int		from,
-	unsigned int		to)
+	struct address_space	*mapping,
+	loff_t			pos,
+	unsigned		len,
+	unsigned		flags,
+	struct page		**pagep,
+	void			**fsdata)
 {
-	return block_prepare_write(page, from, to, xfs_get_blocks);
+	*pagep = NULL;
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+								xfs_get_blocks);
 }
 
 STATIC sector_t
@@ -1568,8 +1573,8 @@ const struct address_space_operations xfs_address_space_operations = {
 	.sync_page		= block_sync_page,
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
-	.prepare_write		= xfs_vm_prepare_write,
-	.commit_write		= generic_commit_write,
+	.write_begin		= xfs_vm_write_begin,
+	.write_end		= generic_write_end,
 	.bmap			= xfs_vm_bmap,
 	.direct_IO		= xfs_vm_direct_IO,
 	.migratepage		= buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 765ec16a6e3..7e7aeb4c8a0 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -134,45 +134,34 @@ xfs_iozero(
 	loff_t			pos,	/* offset in file		*/
 	size_t			count)	/* size of data to zero		*/
 {
-	unsigned		bytes;
 	struct page		*page;
 	struct address_space	*mapping;
 	int			status;
 
 	mapping = ip->i_mapping;
 	do {
-		unsigned long index, offset;
+		unsigned offset, bytes;
+		void *fsdata;
 
 		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
-		index = pos >> PAGE_CACHE_SHIFT;
 		bytes = PAGE_CACHE_SIZE - offset;
 		if (bytes > count)
 			bytes = count;
 
-		status = -ENOMEM;
-		page = grab_cache_page(mapping, index);
-		if (!page)
-			break;
-
-		status = mapping->a_ops->prepare_write(NULL, page, offset,
-							offset + bytes);
+		status = pagecache_write_begin(NULL, mapping, pos, bytes,
+					AOP_FLAG_UNINTERRUPTIBLE,
+					&page, &fsdata);
 		if (status)
-			goto unlock;
+			break;
 
 		zero_user_page(page, offset, bytes, KM_USER0);
 
-		status = mapping->a_ops->commit_write(NULL, page, offset,
-							offset + bytes);
-		if (!status) {
-			pos += bytes;
-			count -= bytes;
-		}
-
-unlock:
-		unlock_page(page);
-		page_cache_release(page);
-		if (status)
-			break;
+		status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
+					page, fsdata);
+		WARN_ON(status <= 0); /* can't return less than zero! */
+		pos += bytes;
+		count -= bytes;
+		status = 0;
 	} while (count);
 
 	return (-status);
diff --git a/include/asm-alpha/page.h b/include/asm-alpha/page.h
index bae7f05716d..8cc97bfd378 100644
--- a/include/asm-alpha/page.h
+++ b/include/asm-alpha/page.h
@@ -3,11 +3,12 @@
 
 #ifdef __KERNEL__
 
+#include <linux/const.h>
 #include <asm/pal.h>
 
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT	13
-#define PAGE_SIZE	(1UL << PAGE_SHIFT)
+#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
 #ifndef __ASSEMBLY__
diff --git a/include/asm-alpha/ptrace.h b/include/asm-alpha/ptrace.h
index 9933b8b3612..32c7a5cddd5 100644
--- a/include/asm-alpha/ptrace.h
+++ b/include/asm-alpha/ptrace.h
@@ -68,8 +68,6 @@ struct switch_stack {
 
 #ifdef __KERNEL__
 
-#define __ARCH_SYS_PTRACE	1
-
 #define user_mode(regs) (((regs)->ps & 8) != 0)
 #define instruction_pointer(regs) ((regs)->pc)
 #define profile_pc(regs) instruction_pointer(regs)
diff --git a/include/asm-arm/arch-imx/imxfb.h b/include/asm-arm/arch-imx/imxfb.h
index 7dbc7bbba65..3ed9ec8b9f0 100644
--- a/include/asm-arm/arch-imx/imxfb.h
+++ b/include/asm-arm/arch-imx/imxfb.h
@@ -7,6 +7,7 @@ struct imxfb_mach_info {
 	u_short		xres;
 	u_short		yres;
 
+	u_int		nonstd;
 	u_char		bpp;
 	u_char		hsync_len;
 	u_char		left_margin;
diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h
index 67f53e07db8..bb68b598c43 100644
--- a/include/asm-arm/arch-pxa/pxa-regs.h
+++ b/include/asm-arm/arch-pxa/pxa-regs.h
@@ -1823,6 +1823,7 @@
 #define LCCR1		__REG(0x44000004)  /* LCD Controller Control Register 1 */
 #define LCCR2		__REG(0x44000008)  /* LCD Controller Control Register 2 */
 #define LCCR3		__REG(0x4400000C)  /* LCD Controller Control Register 3 */
+#define LCCR4		__REG(0x44000010)  /* LCD Controller Control Register 3 */
 #define DFBR0		__REG(0x44000020)  /* DMA Channel 0 Frame Branch Register */
 #define DFBR1		__REG(0x44000024)  /* DMA Channel 1 Frame Branch Register */
 #define LCSR		__REG(0x44000038)  /* LCD Controller Status Register */
@@ -1836,6 +1837,16 @@
 #define LCCR3_8BPP (3 << 24)
 #define LCCR3_16BPP (4 << 24)
 
+#define LCCR3_PDFOR_0 (0 << 30)
+#define LCCR3_PDFOR_1 (1 << 30)
+#define LCCR3_PDFOR_2 (2 << 30)
+#define LCCR3_PDFOR_3 (3 << 30)
+
+#define LCCR4_PAL_FOR_0 (0 << 15)
+#define LCCR4_PAL_FOR_1 (1 << 15)
+#define LCCR4_PAL_FOR_2 (2 << 15)
+#define LCCR4_PAL_FOR_MASK (3 << 15)
+
 #define FDADR0		__REG(0x44000200)  /* DMA Channel 0 Frame Descriptor Address Register */
 #define FSADR0		__REG(0x44000204)  /* DMA Channel 0 Frame Source Address Register */
 #define FIDR0		__REG(0x44000208)  /* DMA Channel 0 Frame ID Register */
diff --git a/include/asm-arm/arch-pxa/pxafb.h b/include/asm-arm/arch-pxa/pxafb.h
index 81c3928d608..ea2336aa70e 100644
--- a/include/asm-arm/arch-pxa/pxafb.h
+++ b/include/asm-arm/arch-pxa/pxafb.h
@@ -70,7 +70,12 @@ struct pxafb_mach_info {
 	 *      LCCR3_HSP, LCCR3_VSP, LCCR0_Pcd(x), LCCR3_Bpp
 	 */
 	u_int		lccr3;
-
+	/* The following should be defined in LCCR4
+	 *	LCCR4_PAL_FOR_0 or LCCR4_PAL_FOR_1 or LCCR4_PAL_FOR_2
+	 *
+	 * All other bits in LCCR4 should be left alone.
+	 */
+	u_int		lccr4;
 	void (*pxafb_backlight_power)(int);
 	void (*pxafb_lcd_power)(int, struct fb_var_screeninfo *);
 
diff --git a/include/asm-arm/arch-s3c2410/fb.h b/include/asm-arm/arch-s3c2410/fb.h
index 93a58e7862b..5d0262601a7 100644
--- a/include/asm-arm/arch-s3c2410/fb.h
+++ b/include/asm-arm/arch-s3c2410/fb.h
@@ -14,12 +14,6 @@
 
 #include <asm/arch/regs-lcd.h>
 
-struct s3c2410fb_val {
-	unsigned int	defval;
-	unsigned int	min;
-	unsigned int	max;
-};
-
 struct s3c2410fb_hw {
 	unsigned long	lcdcon1;
 	unsigned long	lcdcon2;
@@ -28,23 +22,37 @@ struct s3c2410fb_hw {
 	unsigned long	lcdcon5;
 };
 
-struct s3c2410fb_mach_info {
-	unsigned char	fixed_syncs;	/* do not update sync/border */
-
-	/* LCD types */
-	int		type;
+/* LCD description */
+struct s3c2410fb_display {
+	/* LCD type */
+	unsigned type;
 
 	/* Screen size */
-	int		width;
-	int		height;
+	unsigned short width;
+	unsigned short height;
 
 	/* Screen info */
-	struct s3c2410fb_val xres;
-	struct s3c2410fb_val yres;
-	struct s3c2410fb_val bpp;
+	unsigned short xres;
+	unsigned short yres;
+	unsigned short bpp;
+
+	unsigned pixclock;		/* pixclock in picoseconds */
+	unsigned short left_margin;  /* value in pixels (TFT) or HCLKs (STN) */
+	unsigned short right_margin; /* value in pixels (TFT) or HCLKs (STN) */
+	unsigned short hsync_len;    /* value in pixels (TFT) or HCLKs (STN) */
+	unsigned short upper_margin;	/* value in lines (TFT) or 0 (STN) */
+	unsigned short lower_margin;	/* value in lines (TFT) or 0 (STN) */
+	unsigned short vsync_len;	/* value in lines (TFT) or 0 (STN) */
 
 	/* lcd configuration registers */
-	struct s3c2410fb_hw  regs;
+	unsigned long	lcdcon5;
+};
+
+struct s3c2410fb_mach_info {
+
+	struct s3c2410fb_display *displays;	/* attached diplays info */
+	unsigned num_displays;			/* number of defined displays */
+	unsigned default_display;
 
 	/* GPIOs */
 
diff --git a/include/asm-avr32/kdebug.h b/include/asm-avr32/kdebug.h
index 7f54e2b15d1..fd7e99046b2 100644
--- a/include/asm-avr32/kdebug.h
+++ b/include/asm-avr32/kdebug.h
@@ -1,26 +1,10 @@
 #ifndef __ASM_AVR32_KDEBUG_H
 #define __ASM_AVR32_KDEBUG_H
 
-#include <linux/notifier.h>
-
 /* Grossly misnamed. */
 enum die_val {
 	DIE_BREAKPOINT,
 	DIE_SSTEP,
 };
 
-/*
- * These are only here because kprobes.c wants them to implement a
- * blatant layering violation.  Will hopefully go away soon once all
- * architectures are updated.
- */
-static inline int register_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-static inline int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-
 #endif /* __ASM_AVR32_KDEBUG_H */
diff --git a/include/asm-avr32/kprobes.h b/include/asm-avr32/kprobes.h
index 190a6377c80..996cb656474 100644
--- a/include/asm-avr32/kprobes.h
+++ b/include/asm-avr32/kprobes.h
@@ -17,7 +17,7 @@ typedef u16	kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION	0xd673	/* breakpoint */
 #define MAX_INSN_SIZE		2
 
-#define ARCH_INACTIVE_KPROBE_COUNT 1
+#define kretprobe_blacklist_size 0
 
 #define arch_remove_kprobe(p)	do { } while (0)
 
diff --git a/include/asm-blackfin/mach-bf548/bf54x-lq043.h b/include/asm-blackfin/mach-bf548/bf54x-lq043.h
new file mode 100644
index 00000000000..9c7ca62a45e
--- /dev/null
+++ b/include/asm-blackfin/mach-bf548/bf54x-lq043.h
@@ -0,0 +1,30 @@
+#ifndef BF54X_LQ043_H
+#define BF54X_LQ043_H
+
+struct bfin_bf54xfb_val {
+	unsigned int	defval;
+	unsigned int	min;
+	unsigned int	max;
+};
+
+struct bfin_bf54xfb_mach_info {
+	unsigned char	fixed_syncs;	/* do not update sync/border */
+
+	/* LCD types */
+	int		type;
+
+	/* Screen size */
+	int		width;
+	int		height;
+
+	/* Screen info */
+	struct bfin_bf54xfb_val xres;
+	struct bfin_bf54xfb_val yres;
+	struct bfin_bf54xfb_val bpp;
+
+	/* GPIOs */
+	unsigned short 		disp;
+
+};
+
+#endif /* BF54X_LQ043_H */
diff --git a/include/asm-frv/thread_info.h b/include/asm-frv/thread_info.h
index cc5433e78b5..348b8f1df17 100644
--- a/include/asm-frv/thread_info.h
+++ b/include/asm-frv/thread_info.h
@@ -88,9 +88,8 @@ register struct thread_info *__current_thread_info asm("gr15");
 	({							\
 		struct thread_info *ret;			\
 								\
-		ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
-		if (ret)					\
-			memset(ret, 0, THREAD_SIZE);		\
+		ret = kzalloc(THREAD_SIZE, GFP_KERNEL);		\
+								\
 		ret;						\
 	})
 #else
diff --git a/include/asm-frv/tlbflush.h b/include/asm-frv/tlbflush.h
index da3a3179a85..8370f97e41e 100644
--- a/include/asm-frv/tlbflush.h
+++ b/include/asm-frv/tlbflush.h
@@ -57,8 +57,7 @@ do {								\
 #define __flush_tlb_global()			flush_tlb_all()
 #define flush_tlb()				flush_tlb_all()
 #define flush_tlb_kernel_range(start, end)	flush_tlb_all()
-#define flush_tlb_pgtables(mm,start,end) \
-	asm volatile("movgs %0,scr0 ! movgs %0,scr1" :: "r"(ULONG_MAX) : "memory");
+#define flush_tlb_pgtables(mm,start,end)	do { } while(0)
 
 #else
 
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 30d8d33491d..52226e14bd7 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -46,6 +46,12 @@
 	 __pgdat->node_start_pfn;					\
 })
 
+#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
+
+/* memmap is virtually contigious.  */
+#define __pfn_to_page(pfn)	(vmemmap + (pfn))
+#define __page_to_pfn(page)	((page) - vmemmap)
+
 #elif defined(CONFIG_SPARSEMEM)
 /*
  * Note: section's mem_map is encorded to reflect its start_pfn.
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 5f0d797d33f..44ef329531c 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -125,10 +125,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
 #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
 #endif
 
-#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
-#define lazy_mmu_prot_update(pte)	do { } while (0)
-#endif
-
 #ifndef __HAVE_ARCH_MOVE_PTE
 #define move_pte(pte, prot, old_addr, new_addr)	(pte)
 #endif
diff --git a/include/asm-ia64/dma-mapping.h b/include/asm-ia64/dma-mapping.h
index 3ca6d5c14b2..f1735a22d0e 100644
--- a/include/asm-ia64/dma-mapping.h
+++ b/include/asm-ia64/dma-mapping.h
@@ -6,7 +6,7 @@
  *	David Mosberger-Tang <davidm@hpl.hp.com>
  */
 #include <asm/machvec.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 
 #define dma_alloc_coherent	platform_dma_alloc_coherent
 /* coherent mem. is cheap */
diff --git a/include/asm-ia64/kdebug.h b/include/asm-ia64/kdebug.h
index 320cd8e754e..35e49407d06 100644
--- a/include/asm-ia64/kdebug.h
+++ b/include/asm-ia64/kdebug.h
@@ -26,21 +26,6 @@
  * 2005-Oct	Keith Owens <kaos@sgi.com>.  Expand notify_die to cover more
  *		events.
  */
-#include <linux/notifier.h>
-
-/*
- * These are only here because kprobes.c wants them to implement a
- * blatant layering violation.  Will hopefully go away soon once all
- * architectures are updated.
- */
-static inline int register_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-static inline int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
 
 enum die_val {
 	DIE_BREAK = 1,
diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
index 067d9dea68f..a93ce9ef07f 100644
--- a/include/asm-ia64/kprobes.h
+++ b/include/asm-ia64/kprobes.h
@@ -83,7 +83,7 @@ struct kprobe_ctlblk {
 };
 
 #define ARCH_SUPPORTS_KRETPROBES
-#define  ARCH_INACTIVE_KPROBE_COUNT 1
+#define kretprobe_blacklist_size 0
 
 #define SLOT0_OPCODE_SHIFT	(37)
 #define SLOT1_p1_OPCODE_SHIFT	(37 - (64-46))
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index de6d01e24dd..0971ec90807 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -223,12 +223,6 @@ ia64_phys_addr_valid (unsigned long addr)
  * page table.
  */
 
-/*
- * On some architectures, special things need to be done when setting
- * the PTE in a page table.  Nothing special needs to be on IA-64.
- */
-#define set_pte(ptep, pteval)	(*(ptep) = (pteval))
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 
 #define VMALLOC_START		(RGN_BASE(RGN_GATE) + 0x200000000UL)
 #ifdef CONFIG_VIRTUAL_MEM_MAP
@@ -236,8 +230,14 @@ ia64_phys_addr_valid (unsigned long addr)
 # define VMALLOC_END		vmalloc_end
   extern unsigned long vmalloc_end;
 #else
+#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+/* SPARSEMEM_VMEMMAP uses half of vmalloc... */
+# define VMALLOC_END		(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 10)))
+# define vmemmap		((struct page *)VMALLOC_END)
+#else
 # define VMALLOC_END		(RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9)))
 #endif
+#endif
 
 /* fs/proc/kcore.c */
 #define	kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE))
@@ -315,6 +315,36 @@ ia64_phys_addr_valid (unsigned long addr)
 #define pte_mkhuge(pte)		(__pte(pte_val(pte)))
 
 /*
+ * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to
+ * sync icache and dcache when we insert *new* executable page.
+ *  __ia64_sync_icache_dcache() check Pg_arch_1 bit and flush icache
+ * if necessary.
+ *
+ *  set_pte() is also called by the kernel, but we can expect that the kernel
+ *  flushes icache explicitly if necessary.
+ */
+#define pte_present_exec_user(pte)\
+	((pte_val(pte) & (_PAGE_P | _PAGE_PL_MASK | _PAGE_AR_RX)) == \
+		(_PAGE_P | _PAGE_PL_3 | _PAGE_AR_RX))
+
+extern void __ia64_sync_icache_dcache(pte_t pteval);
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+	/* page is present && page is user  && page is executable
+	 * && (page swapin or new page or page migraton
+	 *	|| copy_on_write with page copying.)
+	 */
+	if (pte_present_exec_user(pteval) &&
+	    (!pte_present(*ptep) ||
+		pte_pfn(*ptep) != pte_pfn(pteval)))
+		/* load_module() calles flush_icache_range() explicitly*/
+		__ia64_sync_icache_dcache(pteval);
+	*ptep = pteval;
+}
+
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+/*
  * Make page protection values cacheable, uncacheable, or write-
  * combining.  Note that "protection" is really a misnomer here as the
  * protection value contains the memory attribute bits, dirty bits, and
@@ -483,12 +513,6 @@ extern struct page *zero_page_memmap_ptr;
 #define HUGETLB_PGDIR_MASK	(~(HUGETLB_PGDIR_SIZE-1))
 #endif
 
-/*
- * IA-64 doesn't have any external MMU info: the page tables contain all the necessary
- * information.  However, we use this routine to take care of any (delayed) i-cache
- * flushing that may be necessary.
- */
-extern void lazy_mmu_prot_update (pte_t pte);
 
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 /*
@@ -578,7 +602,7 @@ extern void lazy_mmu_prot_update (pte_t pte);
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 #define __HAVE_ARCH_PTE_SAME
 #define __HAVE_ARCH_PGD_OFFSET_GATE
-#define __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
+
 
 #ifndef CONFIG_PGTABLE_4
 #include <asm-generic/pgtable-nopud.h>
diff --git a/include/asm-ia64/scatterlist.h b/include/asm-ia64/scatterlist.h
index a452ea24205..7d5234d5031 100644
--- a/include/asm-ia64/scatterlist.h
+++ b/include/asm-ia64/scatterlist.h
@@ -30,4 +30,6 @@ struct scatterlist {
 #define sg_dma_len(sg)		((sg)->dma_length)
 #define sg_dma_address(sg)	((sg)->dma_address)
 
+#define	ARCH_HAS_SG_CHAIN
+
 #endif /* _ASM_IA64_SCATTERLIST_H */
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 6314b29e8c4..1703c9d885b 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -58,7 +58,7 @@ extern char no_int_routing __devinitdata;
 
 extern cpumask_t cpu_online_map;
 extern cpumask_t cpu_core_map[NR_CPUS];
-extern cpumask_t cpu_sibling_map[NR_CPUS];
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 extern int smp_num_siblings;
 extern int smp_num_cpucores;
 extern void __iomem *ipi_base_addr;
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index 233f1caae04..2d67b72b18d 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -112,7 +112,7 @@ void build_cpu_to_node_map(void);
 #define topology_physical_package_id(cpu)	(cpu_data(cpu)->socket_id)
 #define topology_core_id(cpu)			(cpu_data(cpu)->core_id)
 #define topology_core_siblings(cpu)		(cpu_core_map[cpu])
-#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
+#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #define smt_capable() 				(smp_num_siblings > 1)
 #endif
 
diff --git a/include/asm-m32r/ptrace.h b/include/asm-m32r/ptrace.h
index 632b4ce4269..a0755b98202 100644
--- a/include/asm-m32r/ptrace.h
+++ b/include/asm-m32r/ptrace.h
@@ -120,7 +120,10 @@ struct pt_regs {
 
 #include <asm/m32r.h>		/* M32R_PSW_BSM, M32R_PSW_BPM */
 
-#define __ARCH_SYS_PTRACE	1
+struct task_struct;
+extern void init_debug_traps(struct task_struct *);
+#define arch_ptrace_attach(child) \
+	init_debug_traps(child)
 
 #if defined(CONFIG_ISA_M32R2) || defined(CONFIG_CHIP_VDEC2)
 #define user_mode(regs) ((M32R_PSW_BPM & (regs)->psw) != 0)
diff --git a/include/asm-m32r/thread_info.h b/include/asm-m32r/thread_info.h
index b7ccc3e6860..c039820dba7 100644
--- a/include/asm-m32r/thread_info.h
+++ b/include/asm-m32r/thread_info.h
@@ -100,9 +100,8 @@ static inline struct thread_info *current_thread_info(void)
 	({							\
 		struct thread_info *ret;			\
 	 							\
-	 	ret = kmalloc(THREAD_SIZE, GFP_KERNEL);		\
-	 	if (ret)					\
-	 		memset(ret, 0, THREAD_SIZE);		\
+	 	ret = kzalloc(THREAD_SIZE, GFP_KERNEL);		\
+								\
 	 	ret;						\
 	 })
 #else
diff --git a/include/asm-m68knommu/system.h b/include/asm-m68knommu/system.h
index 5da43a5d12a..1bd1142685e 100644
--- a/include/asm-m68knommu/system.h
+++ b/include/asm-m68knommu/system.h
@@ -253,8 +253,7 @@ cmpxchg(volatile int *p, int old, int new)
         ");				\
 })
 #elif defined(CONFIG_NETtel) || defined(CONFIG_eLIA) || \
-      defined(CONFIG_DISKtel) || defined(CONFIG_SECUREEDGEMP3) || \
-      defined(CONFIG_CLEOPATRA)
+      defined(CONFIG_SECUREEDGEMP3) || defined(CONFIG_CLEOPATRA)
 #define HARD_RESET_NOW() ({		\
         asm("				\
 	movew #0x2700, %sr;		\
diff --git a/include/asm-mips/xxs1500.h b/include/asm-mips/xxs1500.h
deleted file mode 100644
index 4d84a90b0f2..00000000000
--- a/include/asm-mips/xxs1500.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * MyCable XXS1500 Referrence Board
- *
- * Copyright 2003 MontaVista Software Inc.
- * Author: Pete Popov, MontaVista Software, Inc.
- *         	ppopov@mvista.com or source@mvista.com
- *
- * ########################################################################
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * ########################################################################
- *
- *
- */
-#ifndef __ASM_XXS1500_H
-#define __ASM_XXS1500_H
-
-/* PCMCIA XXS1500 specific defines */
-#define PCMCIA_MAX_SOCK 0
-#define PCMCIA_NUM_SOCKS (PCMCIA_MAX_SOCK+1)
-#define PCMCIA_IRQ AU1000_GPIO_4
-
-#endif /* __ASM_XXS1500_ */
diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
index d05891608f7..2af321f36ab 100644
--- a/include/asm-powerpc/dma-mapping.h
+++ b/include/asm-powerpc/dma-mapping.h
@@ -6,149 +6,6 @@
  */
 #ifndef _ASM_DMA_MAPPING_H
 #define _ASM_DMA_MAPPING_H
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/cache.h>
-/* need struct page definitions */
-#include <linux/mm.h>
-#include <asm/scatterlist.h>
-#include <asm/io.h>
-
-#define DMA_ERROR_CODE		(~(dma_addr_t)0x0)
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * DMA-consistent mapping functions for PowerPCs that don't support
- * cache snooping.  These allocate/free a region of uncached mapped
- * memory space for use with DMA devices.  Alternatively, you could
- * allocate the space "normally" and use the cache management functions
- * to ensure it is consistent.
- */
-extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp);
-extern void __dma_free_coherent(size_t size, void *vaddr);
-extern void __dma_sync(void *vaddr, size_t size, int direction);
-extern void __dma_sync_page(struct page *page, unsigned long offset,
-				 size_t size, int direction);
-
-#else /* ! CONFIG_NOT_COHERENT_CACHE */
-/*
- * Cache coherent cores.
- */
-
-#define __dma_alloc_coherent(gfp, size, handle)	NULL
-#define __dma_free_coherent(size, addr)		((void)0)
-#define __dma_sync(addr, size, rw)		((void)0)
-#define __dma_sync_page(pg, off, sz, rw)	((void)0)
-
-#endif /* ! CONFIG_NOT_COHERENT_CACHE */
-
-#ifdef CONFIG_PPC64
-/*
- * DMA operations are abstracted for G5 vs. i/pSeries, PCI vs. VIO
- */
-struct dma_mapping_ops {
-	void *		(*alloc_coherent)(struct device *dev, size_t size,
-				dma_addr_t *dma_handle, gfp_t flag);
-	void		(*free_coherent)(struct device *dev, size_t size,
-				void *vaddr, dma_addr_t dma_handle);
-	dma_addr_t	(*map_single)(struct device *dev, void *ptr,
-				size_t size, enum dma_data_direction direction);
-	void		(*unmap_single)(struct device *dev, dma_addr_t dma_addr,
-				size_t size, enum dma_data_direction direction);
-	int		(*map_sg)(struct device *dev, struct scatterlist *sg,
-				int nents, enum dma_data_direction direction);
-	void		(*unmap_sg)(struct device *dev, struct scatterlist *sg,
-				int nents, enum dma_data_direction direction);
-	int		(*dma_supported)(struct device *dev, u64 mask);
-	int		(*set_dma_mask)(struct device *dev, u64 dma_mask);
-};
-
-static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
-{
-	/* We don't handle the NULL dev case for ISA for now. We could
-	 * do it via an out of line call but it is not needed for now. The
-	 * only ISA DMA device we support is the floppy and we have a hack
-	 * in the floppy driver directly to get a device for us.
-	 */
-	if (unlikely(dev == NULL || dev->archdata.dma_ops == NULL))
-		return NULL;
-	return dev->archdata.dma_ops;
-}
-
-static inline int dma_supported(struct device *dev, u64 mask)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (unlikely(dma_ops == NULL))
-		return 0;
-	if (dma_ops->dma_supported == NULL)
-		return 1;
-	return dma_ops->dma_supported(dev, mask);
-}
-
-static inline int dma_set_mask(struct device *dev, u64 dma_mask)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	if (unlikely(dma_ops == NULL))
-		return -EIO;
-	if (dma_ops->set_dma_mask != NULL)
-		return dma_ops->set_dma_mask(dev, dma_mask);
-	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
-		return -EIO;
-	*dev->dma_mask = dma_mask;
-	return 0;
-}
-
-static inline void *dma_alloc_coherent(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flag)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-	return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
-}
-
-static inline void dma_free_coherent(struct device *dev, size_t size,
-				     void *cpu_addr, dma_addr_t dma_handle)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-	dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
-}
-
-static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
-					size_t size,
-					enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-	return dma_ops->map_single(dev, cpu_addr, size, direction);
-}
-
-static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
-				    size_t size,
-				    enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-	dma_ops->unmap_single(dev, dma_addr, size, direction);
-}
-
-static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
-				      unsigned long offset, size_t size,
-				      enum dma_data_direction direction)
-{
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-	return dma_ops->map_single(dev, page_address(page) + offset, size,
-			direction);
-}
 
 static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 				  size_t size,
@@ -276,14 +133,15 @@ static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
 }
 
 static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+dma_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 	   enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(direction == DMA_NONE);
 
-	for (i = 0; i < nents; i++, sg++) {
+	for_each_sg(sgl, sg, nents, i) {
 		BUG_ON(!sg->page);
 		__dma_sync_page(sg->page, sg->offset, sg->length, direction);
 		sg->dma_address = page_to_bus(sg->page) + sg->offset;
@@ -318,26 +176,28 @@ static inline void dma_sync_single_for_device(struct device *dev,
 }
 
 static inline void dma_sync_sg_for_cpu(struct device *dev,
-		struct scatterlist *sg, int nents,
+		struct scatterlist *sgl, int nents,
 		enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(direction == DMA_NONE);
 
-	for (i = 0; i < nents; i++, sg++)
+	for_each_sg(sgl, sg, nents, i)
 		__dma_sync_page(sg->page, sg->offset, sg->length, direction);
 }
 
 static inline void dma_sync_sg_for_device(struct device *dev,
-		struct scatterlist *sg, int nents,
+		struct scatterlist *sgl, int nents,
 		enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(direction == DMA_NONE);
 
-	for (i = 0; i < nents; i++, sg++)
+	for_each_sg(sgl, sg, nents, i)
 		__dma_sync_page(sg->page, sg->offset, sg->length, direction);
 }
 
diff --git a/include/asm-powerpc/kdebug.h b/include/asm-powerpc/kdebug.h
index 295f0162c60..ae6d206728a 100644
--- a/include/asm-powerpc/kdebug.h
+++ b/include/asm-powerpc/kdebug.h
@@ -2,25 +2,6 @@
 #define _ASM_POWERPC_KDEBUG_H
 #ifdef __KERNEL__
 
-/* nearly identical to x86_64/i386 code */
-
-#include <linux/notifier.h>
-
-/*
- * These are only here because kprobes.c wants them to implement a
- * blatant layering violation.  Will hopefully go away soon once all
- * architectures are updated.
- */
-static inline int register_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-static inline int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-extern struct atomic_notifier_head powerpc_die_chain;
-
 /* Grossly misnamed. */
 enum die_val {
 	DIE_OOPS = 1,
diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h
index 8b08b447d6f..afabad230db 100644
--- a/include/asm-powerpc/kprobes.h
+++ b/include/asm-powerpc/kprobes.h
@@ -81,8 +81,8 @@ typedef unsigned int kprobe_opcode_t;
 #endif
 
 #define ARCH_SUPPORTS_KRETPROBES
-#define  ARCH_INACTIVE_KPROBE_COUNT 1
 #define flush_insn_slot(p)	do { } while (0)
+#define kretprobe_blacklist_size 0
 
 void kretprobe_trampoline(void);
 extern void arch_remove_kprobe(struct kprobe *p);
diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h
index 300f9a199bf..dd4c26dc57d 100644
--- a/include/asm-powerpc/pgtable-ppc64.h
+++ b/include/asm-powerpc/pgtable-ppc64.h
@@ -68,6 +68,14 @@
 #define USER_REGION_ID		(0UL)
 
 /*
+ * Defines the address of the vmemap area, in the top 16th of the
+ * kernel region.
+ */
+#define VMEMMAP_BASE (ASM_CONST(CONFIG_KERNEL_START) + \
+					(0xfUL << (REGION_SHIFT - 4)))
+#define vmemmap ((struct page *)VMEMMAP_BASE)
+
+/*
  * Common bits in a linux-style PTE.  These match the bits in the
  * (hardware-defined) PowerPC PTE as closely as possible. Additional
  * bits may be defined in pgtable-*.h
diff --git a/include/asm-powerpc/ps3av.h b/include/asm-powerpc/ps3av.h
index 7df4250802d..967930b82ed 100644
--- a/include/asm-powerpc/ps3av.h
+++ b/include/asm-powerpc/ps3av.h
@@ -283,7 +283,7 @@
 #define PS3AV_CMD_VIDEO_CS_YUV422			0x0002
 #define PS3AV_CMD_VIDEO_CS_YUV444			0x0003
 
-/* for automode */
+/* for broadcast automode */
 #define PS3AV_RESBIT_720x480P			0x0003	/* 0x0001 | 0x0002 */
 #define PS3AV_RESBIT_720x576P			0x0003	/* 0x0001 | 0x0002 */
 #define PS3AV_RESBIT_1280x720P			0x0004
@@ -298,13 +298,22 @@
 						| PS3AV_RESBIT_1920x1080I \
 						| PS3AV_RESBIT_1920x1080P)
 
+/* for VESA automode */
+#define PS3AV_RESBIT_VGA			0x0001
+#define PS3AV_RESBIT_WXGA			0x0002
+#define PS3AV_RESBIT_SXGA			0x0004
+#define PS3AV_RESBIT_WUXGA			0x0008
+#define PS3AV_RES_MASK_VESA			(PS3AV_RESBIT_WXGA |\
+						 PS3AV_RESBIT_SXGA |\
+						 PS3AV_RESBIT_WUXGA)
+
 #define PS3AV_MONITOR_TYPE_HDMI			1	/* HDMI */
 #define PS3AV_MONITOR_TYPE_DVI			2	/* DVI */
-#define PS3AV_DEFAULT_HDMI_VID_REG_60		PS3AV_CMD_VIDEO_VID_480P
-#define PS3AV_DEFAULT_AVMULTI_VID_REG_60	PS3AV_CMD_VIDEO_VID_480I
-#define PS3AV_DEFAULT_HDMI_VID_REG_50		PS3AV_CMD_VIDEO_VID_576P
-#define PS3AV_DEFAULT_AVMULTI_VID_REG_50	PS3AV_CMD_VIDEO_VID_576I
-#define PS3AV_DEFAULT_DVI_VID			PS3AV_CMD_VIDEO_VID_480P
+
+#define PS3AV_DEFAULT_HDMI_MODE_ID_REG_60	2	/* 480p */
+#define PS3AV_DEFAULT_AVMULTI_MODE_ID_REG_60	1	/* 480i */
+#define PS3AV_DEFAULT_HDMI_MODE_ID_REG_50	7	/* 576p */
+#define PS3AV_DEFAULT_AVMULTI_MODE_ID_REG_50	6	/* 576i */
 
 #define PS3AV_REGION_60				0x01
 #define PS3AV_REGION_50				0x02
@@ -697,20 +706,12 @@ extern int ps3av_cmd_audio_mute(int, u32 *, u32);
 extern int ps3av_cmd_audio_active(int, u32);
 extern int ps3av_cmd_avb_param(struct ps3av_pkt_avb_param *, u32);
 extern int ps3av_cmd_av_get_hw_conf(struct ps3av_pkt_av_get_hw_conf *);
-#ifdef PS3AV_DEBUG
-extern void ps3av_cmd_av_hw_conf_dump(const struct ps3av_pkt_av_get_hw_conf *);
-extern void ps3av_cmd_av_monitor_info_dump(const struct ps3av_pkt_av_get_monitor_info *);
-#else
-static inline void ps3av_cmd_av_hw_conf_dump(const struct ps3av_pkt_av_get_hw_conf *hw_conf) {}
-static inline void ps3av_cmd_av_monitor_info_dump(const struct ps3av_pkt_av_get_monitor_info *monitor_info) {}
-#endif
 extern int ps3av_cmd_video_get_monitor_info(struct ps3av_pkt_av_get_monitor_info *,
 					    u32);
 
-extern int ps3av_set_video_mode(u32, int);
+extern int ps3av_set_video_mode(u32);
 extern int ps3av_set_audio_mode(u32, u32, u32, u32, u32);
-extern int ps3av_get_auto_mode(int);
-extern int ps3av_set_mode(u32, int);
+extern int ps3av_get_auto_mode(void);
 extern int ps3av_get_mode(void);
 extern int ps3av_get_scanmode(int);
 extern int ps3av_get_refresh_rate(int);
diff --git a/include/asm-powerpc/scatterlist.h b/include/asm-powerpc/scatterlist.h
index 8c992d1491d..b075f619c3b 100644
--- a/include/asm-powerpc/scatterlist.h
+++ b/include/asm-powerpc/scatterlist.h
@@ -41,5 +41,7 @@ struct scatterlist {
 #define ISA_DMA_THRESHOLD	(~0UL)
 #endif
 
+#define ARCH_HAS_SG_CHAIN
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SCATTERLIST_H */
diff --git a/include/asm-powerpc/smp.h b/include/asm-powerpc/smp.h
index 19102bfc14c..505f35bacaa 100644
--- a/include/asm-powerpc/smp.h
+++ b/include/asm-powerpc/smp.h
@@ -26,6 +26,7 @@
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #endif
+#include <asm/percpu.h>
 
 extern int boot_cpuid;
 
@@ -58,7 +59,7 @@ extern int smp_hw_index[];
 					(smp_hw_index[(cpu)] = (phys))
 #endif
 
-extern cpumask_t cpu_sibling_map[NR_CPUS];
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 
 /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
  *
@@ -77,6 +78,7 @@ void smp_init_pSeries(void);
 void smp_init_cell(void);
 void smp_init_celleb(void);
 void smp_setup_cpu_maps(void);
+void smp_setup_cpu_sibling_map(void);
 
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h
index 0ad21a849b5..ca23b681ad0 100644
--- a/include/asm-powerpc/topology.h
+++ b/include/asm-powerpc/topology.h
@@ -108,7 +108,7 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
 #ifdef CONFIG_PPC64
 #include <asm/smp.h>
 
-#define topology_thread_siblings(cpu)	(cpu_sibling_map[cpu])
+#define topology_thread_siblings(cpu)	(per_cpu(cpu_sibling_map, cpu))
 #endif
 #endif
 
diff --git a/include/asm-s390/kdebug.h b/include/asm-s390/kdebug.h
index 04418af08f8..40db27cd6e6 100644
--- a/include/asm-s390/kdebug.h
+++ b/include/asm-s390/kdebug.h
@@ -4,24 +4,9 @@
 /*
  * Feb 2006 Ported to s390 <grundym@us.ibm.com>
  */
-#include <linux/notifier.h>
 
 struct pt_regs;
 
-/*
- * These are only here because kprobes.c wants them to implement a
- * blatant layering violation. Will hopefully go away soon once all
- * architectures are updated.
- */
-static inline int register_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-static inline int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-
 enum die_val {
 	DIE_OOPS = 1,
 	DIE_BPT,
diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h
index 340ba10446e..948db3d0d05 100644
--- a/include/asm-s390/kprobes.h
+++ b/include/asm-s390/kprobes.h
@@ -47,7 +47,7 @@ typedef u16 kprobe_opcode_t;
 	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
 
 #define ARCH_SUPPORTS_KRETPROBES
-#define ARCH_INACTIVE_KPROBE_COUNT 0
+#define kretprobe_blacklist_size 0
 
 #define KPROBE_SWAP_INST	0x10
 
diff --git a/include/asm-sh/kdebug.h b/include/asm-sh/kdebug.h
index 382cfc7deb7..49cd69051a8 100644
--- a/include/asm-sh/kdebug.h
+++ b/include/asm-sh/kdebug.h
@@ -1,8 +1,6 @@
 #ifndef __ASM_SH_KDEBUG_H
 #define __ASM_SH_KDEBUG_H
 
-#include <linux/notifier.h>
-
 /* Grossly misnamed. */
 enum die_val {
 	DIE_TRAP,
diff --git a/include/asm-sparc/scatterlist.h b/include/asm-sparc/scatterlist.h
index a4fcf9ac964..4055af90ad7 100644
--- a/include/asm-sparc/scatterlist.h
+++ b/include/asm-sparc/scatterlist.h
@@ -19,4 +19,6 @@ struct scatterlist {
 
 #define ISA_DMA_THRESHOLD (~0UL)
 
+#define ARCH_HAS_SG_CHAIN
+
 #endif /* !(_SPARC_SCATTERLIST_H) */
diff --git a/include/asm-sparc64/kdebug.h b/include/asm-sparc64/kdebug.h
index 9974c7b0aeb..f905b773235 100644
--- a/include/asm-sparc64/kdebug.h
+++ b/include/asm-sparc64/kdebug.h
@@ -1,26 +1,8 @@
 #ifndef _SPARC64_KDEBUG_H
 #define _SPARC64_KDEBUG_H
 
-/* Nearly identical to x86_64/i386 code. */
-
-#include <linux/notifier.h>
-
 struct pt_regs;
 
-/*
- * These are only here because kprobes.c wants them to implement a
- * blatant layering violation.  Will hopefully go away soon once all
- * architectures are updated.
- */
-static inline int register_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-static inline int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return 0;
-}
-
 extern void bad_trap(struct pt_regs *, long);
 
 /* Grossly misnamed. */
diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h
index 7f6774dca5f..5020eaf67c2 100644
--- a/include/asm-sparc64/kprobes.h
+++ b/include/asm-sparc64/kprobes.h
@@ -10,8 +10,9 @@ typedef u32 kprobe_opcode_t;
 #define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */
 #define MAX_INSN_SIZE 2
 
+#define kretprobe_blacklist_size 0
+
 #define arch_remove_kprobe(p)	do {} while (0)
-#define  ARCH_INACTIVE_KPROBE_COUNT 0
 
 #define flush_insn_slot(p)		\
 do { 	flushi(&(p)->ainsn.insn[0]);	\
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index 0393380d754..3167ccff64f 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -42,6 +42,9 @@
 #define HI_OBP_ADDRESS		_AC(0x0000000100000000,UL)
 #define VMALLOC_START		_AC(0x0000000100000000,UL)
 #define VMALLOC_END		_AC(0x0000000200000000,UL)
+#define VMEMMAP_BASE		_AC(0x0000000200000000,UL)
+
+#define vmemmap			((struct page *)VMEMMAP_BASE)
 
 /* XXX All of this needs to be rethought so we can take advantage
  * XXX cheetah's full 64-bit virtual address space, ie. no more hole
diff --git a/include/asm-sparc64/scatterlist.h b/include/asm-sparc64/scatterlist.h
index 048fdb40e81..703c5bbe6c8 100644
--- a/include/asm-sparc64/scatterlist.h
+++ b/include/asm-sparc64/scatterlist.h
@@ -20,4 +20,6 @@ struct scatterlist {
 
 #define ISA_DMA_THRESHOLD	(~0UL)
 
+#define ARCH_HAS_SG_CHAIN
+
 #endif /* !(_SPARC64_SCATTERLIST_H) */
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index e8a96a31761..42c09949526 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -28,8 +28,9 @@
  
 #include <asm/bitops.h>
 #include <asm/atomic.h>
+#include <asm/percpu.h>
 
-extern cpumask_t cpu_sibling_map[NR_CPUS];
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
 extern cpumask_t cpu_core_map[NR_CPUS];
 extern int sparc64_multi_core;
 
diff --git a/include/asm-sparc64/topology.h b/include/asm-sparc64/topology.h
index 290ac75f385..c6b557034f6 100644
--- a/include/asm-sparc64/topology.h
+++ b/include/asm-sparc64/topology.h
@@ -5,7 +5,7 @@
 #define topology_physical_package_id(cpu)	(cpu_data(cpu).proc_id)
 #define topology_core_id(cpu)			(cpu_data(cpu).core_id)
 #define topology_core_siblings(cpu)		(cpu_core_map[cpu])
-#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
+#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #define mc_capable()				(sparc64_multi_core)
 #define smt_capable()				(sparc64_multi_core)
 #endif /* CONFIG_SMP */
diff --git a/include/asm-um/a.out.h b/include/asm-um/a.out.h
index 78bc9eed26b..9281dd8eb33 100644
--- a/include/asm-um/a.out.h
+++ b/include/asm-um/a.out.h
@@ -1,8 +1,12 @@
+/*
+ * Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
 #ifndef __UM_A_OUT_H
 #define __UM_A_OUT_H
 
 #include "asm/arch/a.out.h"
-#include "choose-mode.h"
 
 #undef STACK_TOP
 #undef STACK_TOP_MAX
@@ -13,10 +17,8 @@ extern unsigned long host_task_size;
 
 #define STACK_ROOM (stacksizelim)
 
-extern int honeypot;
-#define STACK_TOP \
-	CHOOSE_MODE((honeypot ? host_task_size : task_size), task_size)
+#define STACK_TOP task_size
 
-#define STACK_TOP_MAX	STACK_TOP
+#define STACK_TOP_MAX STACK_TOP
 
 #endif
diff --git a/include/asm-um/elf-i386.h b/include/asm-um/elf-i386.h
index 9bab712dc5c..ca94a136dfe 100644
--- a/include/asm-um/elf-i386.h
+++ b/include/asm-um/elf-i386.h
@@ -5,7 +5,8 @@
 #ifndef __UM_ELF_I386_H
 #define __UM_ELF_I386_H
 
-#include <asm/user.h>
+#include <linux/sched.h>
+#include "skas.h"
 
 #define R_386_NONE	0
 #define R_386_32	1
@@ -75,6 +76,15 @@ typedef struct user_i387_struct elf_fpregset_t;
 	pr_reg[16] = PT_REGS_SS(regs);		\
 } while(0);
 
+static inline int elf_core_copy_fpregs(struct task_struct *t,
+				       elf_fpregset_t *fpu)
+{
+	int cpu = ((struct thread_info *) t->stack)->cpu;
+	return save_fp_registers(userspace_pid[cpu], (unsigned long *) fpu);
+}
+
+#define ELF_CORE_COPY_FPREGS(t, fpu) elf_core_copy_fpregs(t, fpu)
+
 extern long elf_aux_hwcap;
 #define ELF_HWCAP (elf_aux_hwcap)
 
diff --git a/include/asm-um/elf-x86_64.h b/include/asm-um/elf-x86_64.h
index 857471c49da..3c9d543eb61 100644
--- a/include/asm-um/elf-x86_64.h
+++ b/include/asm-um/elf-x86_64.h
@@ -1,5 +1,6 @@
 /*
  * Copyright 2003 PathScale, Inc.
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  *
  * Licensed under the GPL
  */
@@ -36,7 +37,7 @@ typedef unsigned long elf_greg_t;
 #define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
-typedef struct { } elf_fpregset_t;
+typedef struct user_i387_struct elf_fpregset_t;
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
@@ -67,27 +68,27 @@ typedef struct { } elf_fpregset_t;
 } while (0)
 
 #define ELF_CORE_COPY_REGS(pr_reg, regs)		\
-	(pr_reg)[0] = (regs)->regs.skas.regs[0];			\
-	(pr_reg)[1] = (regs)->regs.skas.regs[1];			\
-	(pr_reg)[2] = (regs)->regs.skas.regs[2];			\
-	(pr_reg)[3] = (regs)->regs.skas.regs[3];			\
-	(pr_reg)[4] = (regs)->regs.skas.regs[4];			\
-	(pr_reg)[5] = (regs)->regs.skas.regs[5];			\
-	(pr_reg)[6] = (regs)->regs.skas.regs[6];			\
-	(pr_reg)[7] = (regs)->regs.skas.regs[7];			\
-	(pr_reg)[8] = (regs)->regs.skas.regs[8];			\
-	(pr_reg)[9] = (regs)->regs.skas.regs[9];			\
-	(pr_reg)[10] = (regs)->regs.skas.regs[10];			\
-	(pr_reg)[11] = (regs)->regs.skas.regs[11];			\
-	(pr_reg)[12] = (regs)->regs.skas.regs[12];			\
-	(pr_reg)[13] = (regs)->regs.skas.regs[13];			\
-	(pr_reg)[14] = (regs)->regs.skas.regs[14];			\
-	(pr_reg)[15] = (regs)->regs.skas.regs[15];			\
-	(pr_reg)[16] = (regs)->regs.skas.regs[16];			\
-	(pr_reg)[17] = (regs)->regs.skas.regs[17];			\
-	(pr_reg)[18] = (regs)->regs.skas.regs[18];			\
-	(pr_reg)[19] = (regs)->regs.skas.regs[19];			\
-	(pr_reg)[20] = (regs)->regs.skas.regs[20];			\
+	(pr_reg)[0] = (regs)->regs.gp[0];			\
+	(pr_reg)[1] = (regs)->regs.gp[1];			\
+	(pr_reg)[2] = (regs)->regs.gp[2];			\
+	(pr_reg)[3] = (regs)->regs.gp[3];			\
+	(pr_reg)[4] = (regs)->regs.gp[4];			\
+	(pr_reg)[5] = (regs)->regs.gp[5];			\
+	(pr_reg)[6] = (regs)->regs.gp[6];			\
+	(pr_reg)[7] = (regs)->regs.gp[7];			\
+	(pr_reg)[8] = (regs)->regs.gp[8];			\
+	(pr_reg)[9] = (regs)->regs.gp[9];			\
+	(pr_reg)[10] = (regs)->regs.gp[10];			\
+	(pr_reg)[11] = (regs)->regs.gp[11];			\
+	(pr_reg)[12] = (regs)->regs.gp[12];			\
+	(pr_reg)[13] = (regs)->regs.gp[13];			\
+	(pr_reg)[14] = (regs)->regs.gp[14];			\
+	(pr_reg)[15] = (regs)->regs.gp[15];			\
+	(pr_reg)[16] = (regs)->regs.gp[16];			\
+	(pr_reg)[17] = (regs)->regs.gp[17];			\
+	(pr_reg)[18] = (regs)->regs.gp[18];			\
+	(pr_reg)[19] = (regs)->regs.gp[19];			\
+	(pr_reg)[20] = (regs)->regs.gp[20];			\
 	(pr_reg)[21] = current->thread.arch.fs;			\
 	(pr_reg)[22] = 0;					\
 	(pr_reg)[23] = 0;					\
@@ -122,14 +123,3 @@ extern long elf_aux_hwcap;
 #define SET_PERSONALITY(ex, ibcs2) do ; while(0)
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/include/asm-um/ldt.h b/include/asm-um/ldt.h
index 96f82a456ce..b2553f3e87e 100644
--- a/include/asm-um/ldt.h
+++ b/include/asm-um/ldt.h
@@ -11,11 +11,7 @@
 #include "asm/semaphore.h"
 #include "asm/host_ldt.h"
 
-struct mmu_context_skas;
 extern void ldt_host_info(void);
-extern long init_new_ldt(struct mmu_context_skas * to_mm,
-			 struct mmu_context_skas * from_mm);
-extern void free_ldt(struct mmu_context_skas * mm);
 
 #define LDT_PAGES_MAX \
 	((LDT_ENTRIES * LDT_ENTRY_SIZE)/PAGE_SIZE)
diff --git a/include/asm-um/mmu_context.h b/include/asm-um/mmu_context.h
index 9aa4b44e8cc..5f3b863aef9 100644
--- a/include/asm-um/mmu_context.h
+++ b/include/asm-um/mmu_context.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -9,7 +9,6 @@
 #include <asm-generic/mm_hooks.h>
 
 #include "linux/sched.h"
-#include "choose-mode.h"
 #include "um_mmu.h"
 
 #define get_mmu_context(task) do ; while(0)
@@ -30,8 +29,7 @@ static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
 	 * possible.
 	 */
 	if (old != new && (current->flags & PF_BORROWED_MM))
-		CHOOSE_MODE(force_flush_all(),
-			    switch_mm_skas(&new->context.skas.id));
+		__switch_mm(&new->context.id);
 }
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
@@ -43,8 +41,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		cpu_clear(cpu, prev->cpu_vm_mask);
 		cpu_set(cpu, next->cpu_vm_mask);
 		if(next != &init_mm)
-			CHOOSE_MODE((void) 0, 
-				    switch_mm_skas(&next->context.skas.id));
+			__switch_mm(&next->context.id);
 	}
 }
 
@@ -53,38 +50,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
 {
 }
 
-extern int init_new_context_skas(struct task_struct *task, 
-				 struct mm_struct *mm);
+extern int init_new_context(struct task_struct *task, struct mm_struct *mm);
 
-static inline int init_new_context_tt(struct task_struct *task, 
-				      struct mm_struct *mm)
-{
-	return(0);
-}
-
-static inline int init_new_context(struct task_struct *task, 
-				   struct mm_struct *mm)
-{
-	return(CHOOSE_MODE_PROC(init_new_context_tt, init_new_context_skas, 
-				task, mm));
-}
-
-extern void destroy_context_skas(struct mm_struct *mm);
-
-static inline void destroy_context(struct mm_struct *mm)
-{
-	CHOOSE_MODE((void) 0, destroy_context_skas(mm));
-}
+extern void destroy_context(struct mm_struct *mm);
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
diff --git a/include/asm-um/page.h b/include/asm-um/page.h
index 8e310d81e5b..4b424c75fca 100644
--- a/include/asm-um/page.h
+++ b/include/asm-um/page.h
@@ -9,6 +9,7 @@
 
 struct page;
 
+#include <linux/types.h>
 #include <asm/vm-flags.h>
 
 /* PAGE_SHIFT determines the page size */
diff --git a/include/asm-um/pgalloc.h b/include/asm-um/pgalloc.h
index 34ab268ef40..14904876e8f 100644
--- a/include/asm-um/pgalloc.h
+++ b/include/asm-um/pgalloc.h
@@ -42,7 +42,7 @@ static inline void pte_free(struct page *pte)
 
 #ifdef CONFIG_3_LEVEL_PGTABLES
 
-extern __inline__ void pmd_free(pmd_t *pmd)
+static inline void pmd_free(pmd_t *pmd)
 {
 	free_page((unsigned long)pmd);
 }
diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h
index ca0c2a92a11..aa82b88db80 100644
--- a/include/asm-um/pgtable-3level.h
+++ b/include/asm-um/pgtable-3level.h
@@ -69,7 +69,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
         return pmd;
 }
 
-extern inline void pud_clear (pud_t *pud)
+static inline void pud_clear (pud_t *pud)
 {
         set_pud(pud, __pud(0));
 }
diff --git a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
index d99bbddffdb..78c0599cc80 100644
--- a/include/asm-um/processor-generic.h
+++ b/include/asm-um/processor-generic.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -11,44 +11,32 @@ struct pt_regs;
 struct task_struct;
 
 #include "asm/ptrace.h"
-#include "choose-mode.h"
 #include "registers.h"
 #include "sysdep/archsetjmp.h"
 
 struct mm_struct;
 
 struct thread_struct {
-	/* This flag is set to 1 before calling do_fork (and analyzed in
+	struct task_struct *saved_task;
+	/*
+	 * This flag is set to 1 before calling do_fork (and analyzed in
 	 * copy_thread) to mark that we are begin called from userspace (fork /
 	 * vfork / clone), and reset to 0 after. It is left to 0 when called
-	 * from kernelspace (i.e. kernel_thread() or fork_idle(), as of 2.6.11). */
-	struct task_struct *saved_task;
+	 * from kernelspace (i.e. kernel_thread() or fork_idle(),
+	 * as of 2.6.11).
+	 */
 	int forking;
 	int nsyscalls;
 	struct pt_regs regs;
 	int singlestep_syscall;
 	void *fault_addr;
-	void *fault_catcher;
+	jmp_buf *fault_catcher;
 	struct task_struct *prev_sched;
 	unsigned long temp_stack;
-	void *exec_buf;
+	jmp_buf *exec_buf;
 	struct arch_thread arch;
-	union {
-#ifdef CONFIG_MODE_TT
-		struct {
-			int extern_pid;
-			int tracing;
-			int switch_pipe[2];
-			int vm_seq;
-		} tt;
-#endif
-#ifdef CONFIG_MODE_SKAS
-		struct {
-			jmp_buf switch_buf;
-			int mm_count;
-		} skas;
-#endif
-	} mode;
+	jmp_buf switch_buf;
+	int mm_count;
 	struct {
 		int op;
 		union {
@@ -71,7 +59,7 @@ struct thread_struct {
 { \
 	.forking		= 0, \
 	.nsyscalls		= 0, \
-        .regs		   	= EMPTY_REGS, \
+	.regs		   	= EMPTY_REGS,	\
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
 	.temp_stack		= 0, \
@@ -86,7 +74,10 @@ typedef struct {
 
 extern struct task_struct *alloc_task_struct(void);
 
-extern void release_thread(struct task_struct *);
+static inline void release_thread(struct task_struct *task)
+{
+}
+
 extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 static inline void prepare_to_copy(struct task_struct *tsk)
@@ -136,12 +127,7 @@ extern struct cpuinfo_um cpu_data[];
 #endif
 
 
-#ifdef CONFIG_MODE_SKAS
-#define KSTK_REG(tsk, reg) \
-	get_thread_reg(reg, &tsk->thread.mode.skas.switch_buf)
-#else
-#define KSTK_REG(tsk, reg) (0xbadbabe)
-#endif
+#define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
 #define get_wchan(p) (0)
 
 #endif
diff --git a/include/asm-um/processor-x86_64.h b/include/asm-um/processor-x86_64.h
index 31c2d4d685b..d946bf2d334 100644
--- a/include/asm-um/processor-x86_64.h
+++ b/include/asm-um/processor-x86_64.h
@@ -18,7 +18,7 @@ struct arch_thread {
 };
 
 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-extern inline void rep_nop(void)
+static inline void rep_nop(void)
 {
 	__asm__ __volatile__("rep;nop": : :"memory");
 }
diff --git a/include/asm-um/ptrace-generic.h b/include/asm-um/ptrace-generic.h
index 99c87c5ce99..6aefcd32fc6 100644
--- a/include/asm-um/ptrace-generic.h
+++ b/include/asm-um/ptrace-generic.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -9,10 +9,11 @@
 #ifndef __ASSEMBLY__
 
 #include "asm/arch/ptrace-abi.h"
+#include <asm/user.h>
 #include "sysdep/ptrace.h"
 
 struct pt_regs {
-	union uml_pt_regs regs;
+	struct uml_pt_regs regs;
 };
 
 #define EMPTY_REGS { .regs = EMPTY_UML_PT_REGS }
@@ -35,16 +36,18 @@ struct pt_regs {
 
 struct task_struct;
 
+extern long subarch_ptrace(struct task_struct *child, long request, long addr,
+			   long data);
 extern unsigned long getreg(struct task_struct *child, int regno);
 extern int putreg(struct task_struct *child, int regno, unsigned long value);
-extern int get_fpregs(unsigned long buf, struct task_struct *child);
-extern int set_fpregs(unsigned long buf, struct task_struct *child);
-extern int get_fpxregs(unsigned long buf, struct task_struct *child);
-extern int set_fpxregs(unsigned long buf, struct task_struct *tsk);
+extern int get_fpregs(struct user_i387_struct __user *buf,
+		      struct task_struct *child);
+extern int set_fpregs(struct user_i387_struct __user *buf,
+		      struct task_struct *child);
 
 extern void show_regs(struct pt_regs *regs);
 
-extern void send_sigtrap(struct task_struct *tsk, union uml_pt_regs *regs,
+extern void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
 			 int error_code);
 
 extern int arch_copy_tls(struct task_struct *new);
diff --git a/include/asm-um/ptrace-i386.h b/include/asm-um/ptrace-i386.h
index 6e2528bb008..b2d24c5ea2c 100644
--- a/include/asm-um/ptrace-i386.h
+++ b/include/asm-um/ptrace-i386.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -9,10 +9,9 @@
 #define HOST_AUDIT_ARCH AUDIT_ARCH_I386
 
 #include "linux/compiler.h"
-#include "sysdep/ptrace.h"
 #include "asm/ptrace-generic.h"
-#include "asm/host_ldt.h"
-#include "choose-mode.h"
+#include <asm/user.h>
+#include "sysdep/ptrace.h"
 
 #define PT_REGS_EAX(r) UPT_EAX(&(r)->regs)
 #define PT_REGS_EBX(r) UPT_EBX(&(r)->regs)
@@ -41,34 +40,21 @@
 
 #define user_mode(r) UPT_IS_USER(&(r)->regs)
 
+/*
+ * Forward declaration to avoid including sysdep/tls.h, which causes a
+ * circular include, and compilation failures.
+ */
+struct user_desc;
+
+extern int get_fpxregs(struct user_fxsr_struct __user *buf,
+		       struct task_struct *child);
+extern int set_fpxregs(struct user_fxsr_struct __user *buf,
+		       struct task_struct *tsk);
+
 extern int ptrace_get_thread_area(struct task_struct *child, int idx,
                                   struct user_desc __user *user_desc);
 
 extern int ptrace_set_thread_area(struct task_struct *child, int idx,
                                   struct user_desc __user *user_desc);
 
-extern int do_set_thread_area_skas(struct user_desc *info);
-extern int do_get_thread_area_skas(struct user_desc *info);
-
-extern int do_set_thread_area_tt(struct user_desc *info);
-extern int do_get_thread_area_tt(struct user_desc *info);
-
-extern int arch_switch_tls_skas(struct task_struct *from, struct task_struct *to);
-extern int arch_switch_tls_tt(struct task_struct *from, struct task_struct *to);
-
-extern void arch_switch_to_tt(struct task_struct *from, struct task_struct *to);
-extern void arch_switch_to_skas(struct task_struct *from, struct task_struct *to);
-
-static inline int do_get_thread_area(struct user_desc *info)
-{
-	return CHOOSE_MODE_PROC(do_get_thread_area_tt, do_get_thread_area_skas, info);
-}
-
-static inline int do_set_thread_area(struct user_desc *info)
-{
-	return CHOOSE_MODE_PROC(do_set_thread_area_tt, do_set_thread_area_skas, info);
-}
-
-struct task_struct;
-
 #endif
diff --git a/include/asm-um/ptrace-x86_64.h b/include/asm-um/ptrace-x86_64.h
index bf61d17de3f..4c475350dcf 100644
--- a/include/asm-um/ptrace-x86_64.h
+++ b/include/asm-um/ptrace-x86_64.h
@@ -76,15 +76,6 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
         return -ENOSYS;
 }
 
-static inline void arch_switch_to_tt(struct task_struct *from,
-                                     struct task_struct *to)
-{
-}
-
-extern void arch_switch_to_skas(struct task_struct *from,
-				struct task_struct *to);
-
-extern long arch_prctl_skas(struct task_struct *task, int code,
-			    unsigned long __user *addr);
-
+extern long arch_prctl(struct task_struct *task, int code,
+		       unsigned long __user *addr);
 #endif
diff --git a/include/asm-um/smp.h b/include/asm-um/smp.h
index 84f8cf29324..f27a9631317 100644
--- a/include/asm-um/smp.h
+++ b/include/asm-um/smp.h
@@ -18,7 +18,7 @@ extern int hard_smp_processor_id(void);
 extern int ncpus;
 
 
-extern inline void smp_cpus_done(unsigned int maxcpus)
+static inline void smp_cpus_done(unsigned int maxcpus)
 {
 }
 
diff --git a/include/asm-um/tlbflush.h b/include/asm-um/tlbflush.h
index e78c28c1f35..9d647c55350 100644
--- a/include/asm-um/tlbflush.h
+++ b/include/asm-um/tlbflush.h
@@ -1,5 +1,5 @@
-/* 
- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
+/*
+ * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
@@ -7,7 +7,6 @@
 #define __UM_TLBFLUSH_H
 
 #include <linux/mm.h>
-#include "choose-mode.h"
 
 /*
  * TLB flushing:
@@ -25,19 +24,7 @@ extern void flush_tlb_all(void);
 extern void flush_tlb_mm(struct mm_struct *mm);
 extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 
 			    unsigned long end);
-extern void flush_tlb_page_skas(struct vm_area_struct *vma,
-				unsigned long address);
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-				  unsigned long address)
-{
-	address &= PAGE_MASK;
-
-	CHOOSE_MODE(flush_tlb_range(vma, address, address + PAGE_SIZE),
-		    flush_tlb_page_skas(vma, address));
-}
-
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long address);
 extern void flush_tlb_kernel_vm(void);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void __flush_tlb_one(unsigned long addr);
diff --git a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h
index 16c734af919..077032d4fc4 100644
--- a/include/asm-um/uaccess.h
+++ b/include/asm-um/uaccess.h
@@ -80,7 +80,7 @@
 	 __put_user(x, private_ptr) : -EFAULT); \
 })
 
-#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+#define strlen_user(str) strnlen_user(str, ~0U >> 1)
 
 struct exception_table_entry
 {
diff --git a/include/asm-x86/cpufeature_32.h b/include/asm-x86/cpufeature_32.h
index 7b3aa28ebc6..f17e688dfb0 100644
--- a/include/asm-x86/cpufeature_32.h
+++ b/include/asm-x86/cpufeature_32.h
@@ -92,6 +92,7 @@
 #define X86_FEATURE_CID		(4*32+10) /* Context ID */
 #define X86_FEATURE_CX16        (4*32+13) /* CMPXCHG16B */
 #define X86_FEATURE_XTPR	(4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_DCA		(4*32+18) /* Direct Cache Access */
 
 /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
 #define X86_FEATURE_XSTORE	(5*32+ 2) /* on-CPU RNG present (xstore insn) */
diff --git a/include/asm-x86/dma-mapping_32.h b/include/asm-x86/dma-mapping_32.h
index f1d72d177f6..6a2d26cb5da 100644
--- a/include/asm-x86/dma-mapping_32.h
+++ b/include/asm-x86/dma-mapping_32.h
@@ -2,10 +2,10 @@
 #define _ASM_I386_DMA_MAPPING_H
 
 #include <linux/mm.h>
+#include <linux/scatterlist.h>
 
 #include <asm/cache.h>
 #include <asm/io.h>
-#include <asm/scatterlist.h>
 #include <asm/bug.h>
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
@@ -35,18 +35,19 @@ dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
 }
 
 static inline int
-dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
 	   enum dma_data_direction direction)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(!valid_dma_direction(direction));
-	WARN_ON(nents == 0 || sg[0].length == 0);
+	WARN_ON(nents == 0 || sglist[0].length == 0);
 
-	for (i = 0; i < nents; i++ ) {
-		BUG_ON(!sg[i].page);
+	for_each_sg(sglist, sg, nents, i) {
+		BUG_ON(!sg->page);
 
-		sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
+		sg->dma_address = page_to_phys(sg->page) + sg->offset;
 	}
 
 	flush_write_buffers();
diff --git a/include/asm-x86/dma-mapping_64.h b/include/asm-x86/dma-mapping_64.h
index 6897e2a436e..ecd0f6125ba 100644
--- a/include/asm-x86/dma-mapping_64.h
+++ b/include/asm-x86/dma-mapping_64.h
@@ -6,8 +6,7 @@
  * documentation.
  */
 
-
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 #include <asm/io.h>
 #include <asm/swiotlb.h>
 
diff --git a/include/asm-x86/kdebug_32.h b/include/asm-x86/kdebug_32.h
index a185b5f73e7..181d437eef4 100644
--- a/include/asm-x86/kdebug_32.h
+++ b/include/asm-x86/kdebug_32.h
@@ -5,14 +5,9 @@
  * Aug-05 2004 Ported by Prasanna S Panchamukhi <prasanna@in.ibm.com>
  * from x86_64 architecture.
  */
-#include <linux/notifier.h>
 
 struct pt_regs;
 
-extern int register_page_fault_notifier(struct notifier_block *);
-extern int unregister_page_fault_notifier(struct notifier_block *);
-
-
 /* Grossly misnamed. */
 enum die_val {
 	DIE_OOPS = 1,
@@ -27,7 +22,6 @@ enum die_val {
 	DIE_GPF,
 	DIE_CALL,
 	DIE_NMI_IPI,
-	DIE_PAGE_FAULT,
 };
 
 #endif
diff --git a/include/asm-x86/kdebug_64.h b/include/asm-x86/kdebug_64.h
index d7e2bcf49e4..df413e05375 100644
--- a/include/asm-x86/kdebug_64.h
+++ b/include/asm-x86/kdebug_64.h
@@ -1,13 +1,10 @@
 #ifndef _X86_64_KDEBUG_H
 #define _X86_64_KDEBUG_H 1
 
-#include <linux/notifier.h>
+#include <linux/compiler.h>
 
 struct pt_regs;
 
-extern int register_page_fault_notifier(struct notifier_block *);
-extern int unregister_page_fault_notifier(struct notifier_block *);
-
 /* Grossly misnamed. */
 enum die_val {
 	DIE_OOPS = 1,
@@ -22,7 +19,6 @@ enum die_val {
 	DIE_GPF,
 	DIE_CALL,
 	DIE_NMI_IPI,
-	DIE_PAGE_FAULT,
 };
 
 extern void printk_address(unsigned long address);
diff --git a/include/asm-x86/kprobes_32.h b/include/asm-x86/kprobes_32.h
index 06f7303c30c..b772d5b3868 100644
--- a/include/asm-x86/kprobes_32.h
+++ b/include/asm-x86/kprobes_32.h
@@ -43,9 +43,10 @@ typedef u8 kprobe_opcode_t;
 	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
 
 #define ARCH_SUPPORTS_KRETPROBES
-#define  ARCH_INACTIVE_KPROBE_COUNT 0
 #define flush_insn_slot(p)	do { } while (0)
 
+extern const int kretprobe_blacklist_size;
+
 void arch_remove_kprobe(struct kprobe *p);
 void kretprobe_trampoline(void);
 
@@ -89,4 +90,5 @@ static inline void restore_interrupts(struct pt_regs *regs)
 
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 #endif				/* _ASM_KPROBES_H */
diff --git a/include/asm-x86/kprobes_64.h b/include/asm-x86/kprobes_64.h
index 7db825403e0..53f4d850735 100644
--- a/include/asm-x86/kprobes_64.h
+++ b/include/asm-x86/kprobes_64.h
@@ -42,7 +42,7 @@ typedef u8 kprobe_opcode_t;
 	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
 
 #define ARCH_SUPPORTS_KRETPROBES
-#define  ARCH_INACTIVE_KPROBE_COUNT 1
+extern const int kretprobe_blacklist_size;
 
 void kretprobe_trampoline(void);
 extern void arch_remove_kprobe(struct kprobe *p);
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 88adf1afb0a..c3b52bcb171 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -134,6 +134,7 @@ extern unsigned long __phys_addr(unsigned long);
 	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #define __HAVE_ARCH_GATE_AREA 1	
+#define vmemmap ((struct page *)VMEMMAP_START)
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/page.h>
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 57dd6b3107e..a79f5355e3b 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -137,6 +137,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
 #define MAXMEM		 _AC(0x3fffffffffff, UL)
 #define VMALLOC_START    _AC(0xffffc20000000000, UL)
 #define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
+#define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
 #define MODULES_VADDR    _AC(0xffffffff88000000, UL)
 #define MODULES_END      _AC(0xfffffffffff00000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
diff --git a/include/asm-x86/scatterlist_32.h b/include/asm-x86/scatterlist_32.h
index d7e45a8f1aa..bd5164aa8f6 100644
--- a/include/asm-x86/scatterlist_32.h
+++ b/include/asm-x86/scatterlist_32.h
@@ -10,6 +10,8 @@ struct scatterlist {
     unsigned int	length;
 };
 
+#define ARCH_HAS_SG_CHAIN
+
 /* These macros should be used after a pci_map_sg call has been done
  * to get bus addresses of each of the SG entries and their lengths.
  * You should only work with the number of sg entries pci_map_sg
diff --git a/include/asm-x86/scatterlist_64.h b/include/asm-x86/scatterlist_64.h
index eaf7ada27e1..ef3986ba4b7 100644
--- a/include/asm-x86/scatterlist_64.h
+++ b/include/asm-x86/scatterlist_64.h
@@ -11,6 +11,8 @@ struct scatterlist {
     unsigned int        dma_length;
 };
 
+#define ARCH_HAS_SG_CHAIN
+
 #define ISA_DMA_THRESHOLD (0x00ffffff)
 
 /* These macros should be used after a pci_map_sg call has been done
diff --git a/include/asm-x86/smp_32.h b/include/asm-x86/smp_32.h
index 1f73bde165b..955dd7c8538 100644
--- a/include/asm-x86/smp_32.h
+++ b/include/asm-x86/smp_32.h
@@ -30,8 +30,8 @@
 extern void smp_alloc_memory(void);
 extern int pic_mode;
 extern int smp_num_siblings;
-extern cpumask_t cpu_sibling_map[];
-extern cpumask_t cpu_core_map[];
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 
 extern void (*mtrr_hook) (void);
 extern void zap_low_mappings (void);
diff --git a/include/asm-x86/smp_64.h b/include/asm-x86/smp_64.h
index 3f303d2365e..f5bcee1c092 100644
--- a/include/asm-x86/smp_64.h
+++ b/include/asm-x86/smp_64.h
@@ -38,8 +38,15 @@ extern void unlock_ipi_call_lock(void);
 extern int smp_num_siblings;
 extern void smp_send_reschedule(int cpu);
 
-extern cpumask_t cpu_sibling_map[NR_CPUS];
-extern cpumask_t cpu_core_map[NR_CPUS];
+/*
+ * cpu_sibling_map and cpu_core_map now live
+ * in the per cpu area
+ *
+ * extern cpumask_t cpu_sibling_map[NR_CPUS];
+ * extern cpumask_t cpu_core_map[NR_CPUS];
+ */
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_t, cpu_core_map);
 extern u8 cpu_llc_id[NR_CPUS];
 
 #define SMP_TRAMPOLINE_BASE 0x6000
diff --git a/include/asm-x86/topology_32.h b/include/asm-x86/topology_32.h
index 19b2dafd0c8..ae1074603c4 100644
--- a/include/asm-x86/topology_32.h
+++ b/include/asm-x86/topology_32.h
@@ -30,8 +30,8 @@
 #ifdef CONFIG_X86_HT
 #define topology_physical_package_id(cpu)	(cpu_data[cpu].phys_proc_id)
 #define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
-#define topology_core_siblings(cpu)		(cpu_core_map[cpu])
-#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
+#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
+#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #endif
 
 #ifdef CONFIG_NUMA
diff --git a/include/asm-x86/topology_64.h b/include/asm-x86/topology_64.h
index 36e52fba796..848c17f9222 100644
--- a/include/asm-x86/topology_64.h
+++ b/include/asm-x86/topology_64.h
@@ -58,8 +58,8 @@ extern int __node_distance(int, int);
 #ifdef CONFIG_SMP
 #define topology_physical_package_id(cpu)	(cpu_data[cpu].phys_proc_id)
 #define topology_core_id(cpu)			(cpu_data[cpu].cpu_core_id)
-#define topology_core_siblings(cpu)		(cpu_core_map[cpu])
-#define topology_thread_siblings(cpu)		(cpu_sibling_map[cpu])
+#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
+#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
 #define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
 #define smt_capable() 			(smp_num_siblings > 1)
 #endif
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 089a8bc55dd..4da441337d6 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -176,13 +176,28 @@ struct bio {
 #define bio_offset(bio)		bio_iovec((bio))->bv_offset
 #define bio_segments(bio)	((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
-#define bio_cur_sectors(bio)	(bio_iovec(bio)->bv_len >> 9)
-#define bio_data(bio)		(page_address(bio_page((bio))) + bio_offset((bio)))
 #define bio_barrier(bio)	((bio)->bi_rw & (1 << BIO_RW_BARRIER))
 #define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
 #define bio_failfast(bio)	((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
 #define bio_rw_ahead(bio)	((bio)->bi_rw & (1 << BIO_RW_AHEAD))
 #define bio_rw_meta(bio)	((bio)->bi_rw & (1 << BIO_RW_META))
+#define bio_empty_barrier(bio)	(bio_barrier(bio) && !(bio)->bi_size)
+
+static inline unsigned int bio_cur_sectors(struct bio *bio)
+{
+	if (bio->bi_vcnt)
+		return bio_iovec(bio)->bv_len >> 9;
+
+	return 0;
+}
+
+static inline void *bio_data(struct bio *bio)
+{
+	if (bio->bi_vcnt)
+		return page_address(bio_page(bio)) + bio_offset(bio);
+
+	return NULL;
+}
 
 /*
  * will die
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 638165f571d..b9fb8ee3308 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -8,6 +8,12 @@
  */
 #include <asm/bitops.h>
 
+#define for_each_bit(bit, addr, size) \
+	for ((bit) = find_first_bit((addr), (size)); \
+	     (bit) < (size); \
+	     (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+
 static __inline__ int get_bitmask_order(unsigned int count)
 {
 	int order;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5ed888b04b2..bbf906a0b41 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -330,7 +330,6 @@ typedef void (unplug_fn) (struct request_queue *);
 
 struct bio_vec;
 typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *);
-typedef int (issue_flush_fn) (struct request_queue *, struct gendisk *, sector_t *);
 typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
 
@@ -368,7 +367,6 @@ struct request_queue
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
 	merge_bvec_fn		*merge_bvec_fn;
-	issue_flush_fn		*issue_flush_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
 
@@ -540,6 +538,7 @@ enum {
 #define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
 #define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
+#define blk_empty_barrier(rq)	(blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
@@ -729,7 +728,9 @@ static inline void blk_run_address_space(struct address_space *mapping)
 extern int end_that_request_first(struct request *, int, int);
 extern int end_that_request_chunk(struct request *, int, int);
 extern void end_that_request_last(struct request *, int);
-extern void end_request(struct request *req, int uptodate);
+extern void end_request(struct request *, int);
+extern void end_queued_request(struct request *, int);
+extern void end_dequeued_request(struct request *, int);
 extern void blk_complete_request(struct request *);
 
 /*
@@ -767,7 +768,6 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
-extern void blk_queue_issue_flush_fn(struct request_queue *, issue_flush_fn *);
 extern int blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 35cadad84b1..da0d83fbadc 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -203,10 +203,20 @@ void block_invalidatepage(struct page *page, unsigned long offset);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
 int block_read_full_page(struct page*, get_block_t*);
+int block_write_begin(struct file *, struct address_space *,
+				loff_t, unsigned, unsigned,
+				struct page **, void **, get_block_t*);
+int block_write_end(struct file *, struct address_space *,
+				loff_t, unsigned, unsigned,
+				struct page *, void *);
+int generic_write_end(struct file *, struct address_space *,
+				loff_t, unsigned, unsigned,
+				struct page *, void *);
+void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
 int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
-int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
-				loff_t *);
-int generic_cont_expand(struct inode *inode, loff_t size);
+int cont_write_begin(struct file *, struct address_space *, loff_t,
+			unsigned, unsigned, struct page **, void **,
+			get_block_t *, loff_t *);
 int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
 int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
@@ -216,9 +226,13 @@ sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
 int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
 int block_truncate_page(struct address_space *, loff_t, get_block_t *);
 int file_fsync(struct file *, struct dentry *, int);
-int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
-int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
-int nobh_truncate_page(struct address_space *, loff_t);
+int nobh_write_begin(struct file *, struct address_space *,
+				loff_t, unsigned, unsigned,
+				struct page **, void **, get_block_t*);
+int nobh_write_end(struct file *, struct address_space *,
+				loff_t, unsigned, unsigned,
+				struct page *, void *);
+int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
 int nobh_writepage(struct page *page, get_block_t *get_block,
                         struct writeback_control *wbc);
 
diff --git a/include/linux/connector.h b/include/linux/connector.h
index b62f823e90c..13fc4541bf2 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -36,14 +36,15 @@
 #define CN_VAL_CIFS                     0x1
 #define CN_W1_IDX			0x3	/* w1 communication */
 #define CN_W1_VAL			0x1
+#define CN_IDX_V86D			0x4
+#define CN_VAL_V86D_UVESAFB		0x1
 
-
-#define CN_NETLINK_USERS		4
+#define CN_NETLINK_USERS		5
 
 /*
  * Maximum connector's message size.
  */
-#define CONNECTOR_MAX_MSG_SIZE 	1024
+#define CONNECTOR_MAX_MSG_SIZE		16384
 
 /*
  * idx and val are unique identifiers which 
diff --git a/include/linux/console.h b/include/linux/console.h
index 56a7bcda49c..0a4542ddb73 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -45,7 +45,8 @@ struct consw {
 	int	(*con_font_get)(struct vc_data *, struct console_font *);
 	int	(*con_font_default)(struct vc_data *, struct console_font *, char *);
 	int	(*con_font_copy)(struct vc_data *, int);
-	int	(*con_resize)(struct vc_data *, unsigned int, unsigned int);
+	int     (*con_resize)(struct vc_data *, unsigned int, unsigned int,
+			       unsigned int);
 	int	(*con_set_palette)(struct vc_data *, unsigned char *);
 	int	(*con_scrolldelta)(struct vc_data *, int);
 	int	(*con_set_origin)(struct vc_data *);
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index dc77fed7b28..d71f7c0f931 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -100,6 +100,7 @@ struct vc_data {
 	unsigned char 	vc_G1_charset;
 	unsigned char 	vc_saved_G0;
 	unsigned char 	vc_saved_G1;
+	unsigned int    vc_resize_user;         /* resize request from user */
 	unsigned int	vc_bell_pitch;		/* Console bell pitch */
 	unsigned int	vc_bell_duration;	/* Console bell duration */
 	struct vc_data **vc_display_fg;		/* [!] Ptr to var holding fg console for this display */
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 826b15e914e..9e633ea103c 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -93,7 +93,7 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 	return node_possible_map;
 }
 
-#define cpuset_current_mems_allowed (node_online_map)
+#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
 static inline void cpuset_init_current_mems_allowed(void) {}
 static inline void cpuset_update_task_memory_state(void) {}
 #define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
diff --git a/include/linux/dca.h b/include/linux/dca.h
new file mode 100644
index 00000000000..83eaecc6f8a
--- /dev/null
+++ b/include/linux/dca.h
@@ -0,0 +1,47 @@
+#ifndef DCA_H
+#define DCA_H
+/* DCA Provider API */
+
+/* DCA Notifier Interface */
+void dca_register_notify(struct notifier_block *nb);
+void dca_unregister_notify(struct notifier_block *nb);
+
+#define DCA_PROVIDER_ADD     0x0001
+#define DCA_PROVIDER_REMOVE  0x0002
+
+struct dca_provider {
+	struct dca_ops		*ops;
+	struct class_device 	*cd;
+	int			 id;
+};
+
+struct dca_ops {
+	int	(*add_requester)    (struct dca_provider *, struct device *);
+	int	(*remove_requester) (struct dca_provider *, struct device *);
+	u8	(*get_tag)	    (struct dca_provider *, int cpu);
+};
+
+struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
+void free_dca_provider(struct dca_provider *dca);
+int register_dca_provider(struct dca_provider *dca, struct device *dev);
+void unregister_dca_provider(struct dca_provider *dca);
+
+static inline void *dca_priv(struct dca_provider *dca)
+{
+	return (void *)dca + sizeof(struct dca_provider);
+}
+
+/* Requester API */
+int dca_add_requester(struct device *dev);
+int dca_remove_requester(struct device *dev);
+u8 dca_get_tag(int cpu);
+
+/* internal stuff */
+int __init dca_sysfs_init(void);
+void __exit dca_sysfs_exit(void);
+int dca_sysfs_add_provider(struct dca_provider *dca, struct device *dev);
+void dca_sysfs_remove_provider(struct dca_provider *dca);
+int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot);
+void dca_sysfs_remove_req(struct dca_provider *dca, int slot);
+
+#endif /* DCA_H */
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 2dc21cbeb30..0ebfafbd338 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -24,6 +24,8 @@ enum dma_data_direction {
 #define DMA_28BIT_MASK	0x000000000fffffffULL
 #define DMA_24BIT_MASK	0x0000000000ffffffULL
 
+#define DMA_MASK_NONE	0x0ULL
+
 static inline int valid_dma_direction(int dma_direction)
 {
 	return ((dma_direction == DMA_BIDIRECTIONAL) ||
@@ -31,6 +33,11 @@ static inline int valid_dma_direction(int dma_direction)
 		(dma_direction == DMA_FROM_DEVICE));
 }
 
+static inline int is_device_dma_capable(struct device *dev)
+{
+	return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
+}
+
 #ifdef CONFIG_HAS_DMA
 #include <asm/dma-mapping.h>
 #else
diff --git a/include/linux/fb.h b/include/linux/fb.h
index cec54106aa8..58c57a33e5d 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -180,6 +180,7 @@ struct fb_bitfield {
 };
 
 #define FB_NONSTD_HAM		1	/* Hold-And-Modify (HAM)        */
+#define FB_NONSTD_REV_PIX_IN_B	2	/* order of pixels in each byte is reversed */
 
 #define FB_ACTIVATE_NOW		0	/* set values immediately (or vbl)*/
 #define FB_ACTIVATE_NXTOPEN	1	/* activate on next open	*/
@@ -206,6 +207,7 @@ struct fb_bitfield {
 #define FB_VMODE_NONINTERLACED  0	/* non interlaced */
 #define FB_VMODE_INTERLACED	1	/* interlaced	*/
 #define FB_VMODE_DOUBLE		2	/* double scan */
+#define FB_VMODE_ODD_FLD_FIRST	4	/* interlaced: top line first */
 #define FB_VMODE_MASK		255
 
 #define FB_VMODE_YWRAP		256	/* ywrap instead of panning     */
@@ -1054,6 +1056,7 @@ struct fb_videomode {
 	u32 flag;
 };
 
+extern const char *fb_mode_option;
 extern const struct fb_videomode vesa_modes[];
 
 struct fb_modelist {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4a6a21077ba..f70d52c4661 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -381,7 +381,7 @@ struct iattr {
  *  			trying again.  The aop will be taking reasonable
  *  			precautions not to livelock.  If the caller held a page
  *  			reference, it should drop it before retrying.  Returned
- *  			by readpage(), prepare_write(), and commit_write().
+ *  			by readpage().
  *
  * address_space_operation functions return these large constants to indicate
  * special semantics to the caller.  These are much larger than the bytes in a
@@ -394,6 +394,9 @@ enum positive_aop_returns {
 	AOP_TRUNCATED_PAGE	= 0x80001,
 };
 
+#define AOP_FLAG_UNINTERRUPTIBLE	0x0001 /* will not do a short write */
+#define AOP_FLAG_CONT_EXPAND		0x0002 /* called from cont_expand */
+
 /*
  * oh the beauties of C type declarations.
  */
@@ -401,6 +404,39 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+struct iov_iter {
+	const struct iovec *iov;
+	unsigned long nr_segs;
+	size_t iov_offset;
+	size_t count;
+};
+
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes);
+size_t iov_iter_copy_from_user(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes);
+void iov_iter_advance(struct iov_iter *i, size_t bytes);
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
+size_t iov_iter_single_seg_count(struct iov_iter *i);
+
+static inline void iov_iter_init(struct iov_iter *i,
+			const struct iovec *iov, unsigned long nr_segs,
+			size_t count, size_t written)
+{
+	i->iov = iov;
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count + written;
+
+	iov_iter_advance(i, written);
+}
+
+static inline size_t iov_iter_count(struct iov_iter *i)
+{
+	return i->count;
+}
+
+
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 	int (*readpage)(struct file *, struct page *);
@@ -421,6 +457,14 @@ struct address_space_operations {
 	 */
 	int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
 	int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
+
+	int (*write_begin)(struct file *, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata);
+	int (*write_end)(struct file *, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata);
+
 	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
 	sector_t (*bmap)(struct address_space *, sector_t);
 	void (*invalidatepage) (struct page *, unsigned long);
@@ -435,6 +479,18 @@ struct address_space_operations {
 	int (*launder_page) (struct page *);
 };
 
+/*
+ * pagecache_write_begin/pagecache_write_end must be used by general code
+ * to write into the pagecache.
+ */
+int pagecache_write_begin(struct file *, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata);
+
+int pagecache_write_end(struct file *, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata);
+
 struct backing_dev_info;
 struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
@@ -697,16 +753,14 @@ struct fown_struct {
  * Track a single file's readahead state
  */
 struct file_ra_state {
-	pgoff_t start;                  /* where readahead started */
-	unsigned long size;             /* # of readahead pages */
-	unsigned long async_size;       /* do asynchronous readahead when
+	pgoff_t start;			/* where readahead started */
+	unsigned int size;		/* # of readahead pages */
+	unsigned int async_size;	/* do asynchronous readahead when
 					   there are only # of pages ahead */
 
-	unsigned long ra_pages;		/* Maximum readahead window */
-	unsigned long mmap_hit;		/* Cache hit stat for mmap accesses */
-	unsigned long mmap_miss;	/* Cache miss stat for mmap accesses */
-	unsigned long prev_index;	/* Cache last read() position */
-	unsigned int prev_offset;	/* Offset where last read() ended in a page */
+	unsigned int ra_pages;		/* Maximum readahead window */
+	int mmap_miss;			/* Cache miss stat for mmap accesses */
+	loff_t prev_pos;		/* Cache last read() position */
 };
 
 /*
@@ -1835,6 +1889,12 @@ extern int simple_prepare_write(struct file *file, struct page *page,
 			unsigned offset, unsigned to);
 extern int simple_commit_write(struct file *file, struct page *page,
 				unsigned offset, unsigned to);
+extern int simple_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata);
+extern int simple_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata);
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index bc68dd9a6d4..7e93a9ae706 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -48,18 +48,12 @@ struct vm_area_struct;
 #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
 #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
 #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
-#define __GFP_MOVABLE	((__force gfp_t)0x80000u) /* Page is movable */
+#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
+#define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
 
-#define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
-/* if you forget to add the bitmask here kernel will crash, period */
-#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
-			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
-			__GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \
-			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE| \
-			__GFP_MOVABLE)
-
 /* This equals 0, but use constants in case they ever change */
 #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
 /* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
@@ -67,6 +61,8 @@ struct vm_area_struct;
 #define GFP_NOIO	(__GFP_WAIT)
 #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
 #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
+			 __GFP_RECLAIMABLE)
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
 #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
 			 __GFP_HIGHMEM)
@@ -86,6 +82,19 @@ struct vm_area_struct;
 #define GFP_THISNODE	((__force gfp_t)0)
 #endif
 
+/* This mask makes up all the page movable related flags */
+#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+
+/* Control page allocator reclaim behavior */
+#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
+			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
+			__GFP_NORETRY|__GFP_NOMEMALLOC)
+
+/* Control allocation constraints */
+#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
+
+/* Do not use these with a slab allocator */
+#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
 
 /* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
    platforms, used as appropriate on others */
@@ -95,25 +104,50 @@ struct vm_area_struct;
 /* 4GB DMA on some platforms */
 #define GFP_DMA32	__GFP_DMA32
 
+/* Convert GFP flags to their corresponding migrate type */
+static inline int allocflags_to_migratetype(gfp_t gfp_flags)
+{
+	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+
+	if (unlikely(page_group_by_mobility_disabled))
+		return MIGRATE_UNMOVABLE;
+
+	/* Group based on mobility */
+	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
+		((gfp_flags & __GFP_RECLAIMABLE) != 0);
+}
 
 static inline enum zone_type gfp_zone(gfp_t flags)
 {
+	int base = 0;
+
+#ifdef CONFIG_NUMA
+	if (flags & __GFP_THISNODE)
+		base = MAX_NR_ZONES;
+#endif
+
 #ifdef CONFIG_ZONE_DMA
 	if (flags & __GFP_DMA)
-		return ZONE_DMA;
+		return base + ZONE_DMA;
 #endif
 #ifdef CONFIG_ZONE_DMA32
 	if (flags & __GFP_DMA32)
-		return ZONE_DMA32;
+		return base + ZONE_DMA32;
 #endif
 	if ((flags & (__GFP_HIGHMEM | __GFP_MOVABLE)) ==
 			(__GFP_HIGHMEM | __GFP_MOVABLE))
-		return ZONE_MOVABLE;
+		return base + ZONE_MOVABLE;
 #ifdef CONFIG_HIGHMEM
 	if (flags & __GFP_HIGHMEM)
-		return ZONE_HIGHMEM;
+		return base + ZONE_HIGHMEM;
 #endif
-	return ZONE_NORMAL;
+	return base + ZONE_NORMAL;
+}
+
+static inline gfp_t set_migrateflags(gfp_t gfp, gfp_t migrate_flags)
+{
+	BUG_ON((gfp & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+	return (gfp & ~(GFP_MOVABLE_MASK)) | migrate_flags;
 }
 
 /*
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3a19b032c0e..ea0f50bfbe0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -33,6 +33,7 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 
 extern unsigned long max_huge_pages;
 extern unsigned long hugepages_treat_as_movable;
+extern int hugetlb_dynamic_pool;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
 extern int sysctl_hugetlb_shm_group;
 
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 9752307d16b..7da5b98d90e 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -32,6 +32,7 @@
 #include <linux/workqueue.h>	/* work_struct */
 #include <linux/mempool.h>
 #include <linux/mutex.h>
+#include <linux/scatterlist.h>
 
 #include <asm/io.h>
 #include <asm/semaphore.h>	/* Needed for MUTEX init macros */
@@ -837,7 +838,7 @@ static inline int i2o_dma_map_sg(struct i2o_controller *c,
 		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
 			*mptr++ = cpu_to_le32(i2o_dma_high(sg_dma_address(sg)));
 #endif
-		sg++;
+		sg = sg_next(sg);
 	}
 	*sg_ptr = mptr;
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 02a27e8cbad..30a1931466a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -772,7 +772,7 @@ typedef struct hwif_s {
 
 	unsigned int nsect;
 	unsigned int nleft;
-	unsigned int cursg;
+	struct scatterlist *cursg;
 	unsigned int cursg_ofs;
 
 	int		rqsize;		/* max sectors per request */
@@ -1093,11 +1093,6 @@ extern ide_startstop_t ide_do_reset (ide_drive_t *);
 extern void ide_init_drive_cmd (struct request *rq);
 
 /*
- * this function returns error location sector offset in case of a write error
- */
-extern u64 ide_get_error_location(ide_drive_t *, char *);
-
-/*
  * "action" parameter type for ide_do_drive_cmd() below.
  */
 typedef enum {
diff --git a/include/linux/init.h b/include/linux/init.h
index f8d9d0b5cff..9b7a2ba8237 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -67,8 +67,10 @@
 
 /* For assembly routines */
 #define __INIT		.section	".init.text","ax"
+#define __INIT_REFOK	.section	".text.init.refok","ax"
 #define __FINIT		.previous
 #define __INITDATA	.section	".init.data","aw"
+#define __INITDATA_REFOK .section	".data.init.refok","aw"
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5523f19d88d..8e5f289052a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -205,6 +205,15 @@ static inline int disable_irq_wake(unsigned int irq)
 						enable_irq(irq)
 # endif
 
+static inline int enable_irq_wake(unsigned int irq)
+{
+	return 0;
+}
+
+static inline int disable_irq_wake(unsigned int irq)
+{
+	return 0;
+}
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #ifndef __ARCH_SET_SOFTIRQ_PENDING
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 71ea9231924..6187a8567bc 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -110,9 +110,6 @@ extern int allocate_resource(struct resource *root, struct resource *new,
 int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
 
-/* get registered SYSTEM_RAM resources in specified area */
-extern int find_next_system_ram(struct resource *res);
-
 /* Convenience shorthand with allocation */
 #define request_region(start,n,name)	__request_region(&ioport_resource, (start), (n), (name))
 #define request_mem_region(start,n,name) __request_region(&iomem_resource, (start), (n), (name))
diff --git a/include/linux/isdn.h b/include/linux/isdn.h
index d5dda4b643a..d0ecc8eebfb 100644
--- a/include/linux/isdn.h
+++ b/include/linux/isdn.h
@@ -167,6 +167,7 @@ typedef struct {
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/tcp.h>
+#include <linux/mutex.h>
 
 #define ISDN_TTY_MAJOR    43
 #define ISDN_TTYAUX_MAJOR 44
@@ -616,7 +617,7 @@ typedef struct isdn_devt {
 	int               v110emu[ISDN_MAX_CHANNELS]; /* V.110 emulator-mode 0=none */
 	atomic_t          v110use[ISDN_MAX_CHANNELS]; /* Usage-Semaphore for stream */
 	isdn_v110_stream  *v110[ISDN_MAX_CHANNELS];   /* V.110 private data         */
-	struct semaphore  sem;                        /* serialize list access*/
+	struct mutex      mtx;                        /* serialize list access*/
 	unsigned long     global_features;
 } isdn_dev;
 
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 700a93b7918..72f52237292 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -372,6 +372,7 @@ struct jbd_revoke_table_s;
  * @h_sync: flag for sync-on-close
  * @h_jdata: flag to force data journaling
  * @h_aborted: flag indicating fatal error on handle
+ * @h_lockdep_map: lockdep info for debugging lock problems
  **/
 
 /* Docbook can't yet cope with the bit fields, but will leave the documentation
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index e757a74b9d1..8b080024bbc 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -148,6 +148,8 @@ static inline u64 get_jiffies_64(void)
  */
 #define MAX_JIFFY_OFFSET ((LONG_MAX >> 1)-1)
 
+extern unsigned long preset_lpj;
+
 /*
  * We want to do realistic conversions of time so we need to use the same
  * values the update wall clock code uses as the jiffies size.  This value
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d9725a28a26..5fdbc814c2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -35,6 +35,7 @@ extern const char linux_proc_banner[];
 #define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
 #define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
 #define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
+#define IS_ALIGNED(x,a)		(((x) % ((typeof(x))(a))) == 0)
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 51464d12a4e..81891581e89 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -166,6 +166,12 @@ struct kretprobe_instance {
 	struct task_struct *task;
 };
 
+struct kretprobe_blackpoint {
+	const char *name;
+	void *addr;
+};
+extern struct kretprobe_blackpoint kretprobe_blacklist[];
+
 static inline void kretprobe_assert(struct kretprobe_instance *ri,
 	unsigned long orig_ret_address, unsigned long trampoline_address)
 {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 229a9ff9f92..377e6d4d9be 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -29,7 +29,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
-#include <asm/scatterlist.h>
+#include <linux/scatterlist.h>
 #include <linux/io.h>
 #include <linux/ata.h>
 #include <linux/workqueue.h>
@@ -416,6 +416,7 @@ struct ata_queued_cmd {
 	unsigned long		flags;		/* ATA_QCFLAG_xxx */
 	unsigned int		tag;
 	unsigned int		n_elem;
+	unsigned int		n_iter;
 	unsigned int		orig_n_elem;
 
 	int			dma_dir;
@@ -426,7 +427,7 @@ struct ata_queued_cmd {
 	unsigned int		nbytes;
 	unsigned int		curbytes;
 
-	unsigned int		cursg;
+	struct scatterlist	*cursg;
 	unsigned int		cursg_ofs;
 
 	struct scatterlist	sgent;
@@ -1043,7 +1044,7 @@ ata_sg_is_last(struct scatterlist *sg, struct ata_queued_cmd *qc)
 		return 1;
 	if (qc->pad_len)
 		return 0;
-	if (((sg - qc->__sg) + 1) == qc->n_elem)
+	if (qc->n_iter == qc->n_elem)
 		return 1;
 	return 0;
 }
@@ -1051,6 +1052,7 @@ ata_sg_is_last(struct scatterlist *sg, struct ata_queued_cmd *qc)
 static inline struct scatterlist *
 ata_qc_first_sg(struct ata_queued_cmd *qc)
 {
+	qc->n_iter = 0;
 	if (qc->n_elem)
 		return qc->__sg;
 	if (qc->pad_len)
@@ -1063,8 +1065,8 @@ ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc)
 {
 	if (sg == &qc->pad_sgent)
 		return NULL;
-	if (++sg - qc->__sg < qc->n_elem)
-		return sg;
+	if (++qc->n_iter < qc->n_elem)
+		return sg_next(sg);
 	if (qc->pad_len)
 		return &qc->pad_sgent;
 	return NULL;
@@ -1309,9 +1311,11 @@ static inline void ata_qc_reinit(struct ata_queued_cmd *qc)
 	qc->dma_dir = DMA_NONE;
 	qc->__sg = NULL;
 	qc->flags = 0;
-	qc->cursg = qc->cursg_ofs = 0;
+	qc->cursg = NULL;
+	qc->cursg_ofs = 0;
 	qc->nbytes = qc->curbytes = 0;
 	qc->n_elem = 0;
+	qc->n_iter = 0;
 	qc->err_mask = 0;
 	qc->pad_len = 0;
 	qc->sect_size = ATA_SECT_SIZE;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 7b54666cea8..8fee7a45736 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -3,7 +3,6 @@
 
 #include <linux/mmzone.h>
 #include <linux/spinlock.h>
-#include <linux/mmzone.h>
 #include <linux/notifier.h>
 
 struct page;
@@ -59,11 +58,21 @@ extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 extern void online_page(struct page *page);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long);
+extern void __offline_isolated_pages(unsigned long, unsigned long);
+extern int offline_pages(unsigned long, unsigned long, unsigned long);
 
 /* reasonably generic interface to expand the physical pages in a zone  */
 extern int __add_pages(struct zone *zone, unsigned long start_pfn,
 	unsigned long nr_pages);
 
+/*
+ * Walk thorugh all memory which is registered as resource.
+ * arg is (start_pfn, nr_pages, private_arg_pointer)
+ */
+extern int walk_memory_resource(unsigned long start_pfn,
+			unsigned long nr_pages, void *arg,
+			int (*func)(unsigned long, unsigned long, void *));
+
 #ifdef CONFIG_NUMA
 extern int memory_add_physaddr_to_nid(u64 start);
 #else
@@ -161,13 +170,6 @@ static inline int mhp_notimplemented(const char *func)
 }
 
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
-static inline int __remove_pages(struct zone *zone, unsigned long start_pfn,
-	unsigned long nr_pages)
-{
-	printk(KERN_WARNING "%s() called, not yet supported\n", __FUNCTION__);
-	dump_stack();
-	return -ENOSYS;
-}
 
 extern int add_memory(int nid, u64 start, u64 size);
 extern int arch_add_memory(int nid, u64 start, u64 size);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index a020eb2d4e2..38c04d61ee0 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -19,6 +19,7 @@
 /* Flags for get_mem_policy */
 #define MPOL_F_NODE	(1<<0)	/* return next IL mode instead of node mask */
 #define MPOL_F_ADDR	(1<<1)	/* look up vma using address */
+#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
 
 /* Flags for mbind */
 #define MPOL_MF_STRICT	(1<<0)	/* Verify existing pages in the mapping */
@@ -143,7 +144,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
-extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
 extern void mpol_rebind_task(struct task_struct *tsk,
 					const nodemask_t *new);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
@@ -235,11 +235,6 @@ static inline void numa_default_policy(void)
 {
 }
 
-static inline void mpol_rebind_policy(struct mempolicy *pol,
-					const nodemask_t *new)
-{
-}
-
 static inline void mpol_rebind_task(struct task_struct *tsk,
 					const nodemask_t *new)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1692dd6cb91..7e87e1b1662 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -50,69 +50,6 @@ extern int sysctl_legacy_va_layout;
  * mmap() functions).
  */
 
-/*
- * This struct defines a memory VMM memory area. There is one of these
- * per VM-area/task.  A VM area is any part of the process virtual memory
- * space that has a special rule for the page-fault handlers (ie a shared
- * library, the executable area etc).
- */
-struct vm_area_struct {
-	struct mm_struct * vm_mm;	/* The address space we belong to. */
-	unsigned long vm_start;		/* Our start address within vm_mm. */
-	unsigned long vm_end;		/* The first byte after our end address
-					   within vm_mm. */
-
-	/* linked list of VM areas per task, sorted by address */
-	struct vm_area_struct *vm_next;
-
-	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
-	unsigned long vm_flags;		/* Flags, listed below. */
-
-	struct rb_node vm_rb;
-
-	/*
-	 * For areas with an address space and backing store,
-	 * linkage into the address_space->i_mmap prio tree, or
-	 * linkage to the list of like vmas hanging off its node, or
-	 * linkage of vma in the address_space->i_mmap_nonlinear list.
-	 */
-	union {
-		struct {
-			struct list_head list;
-			void *parent;	/* aligns with prio_tree_node parent */
-			struct vm_area_struct *head;
-		} vm_set;
-
-		struct raw_prio_tree_node prio_tree_node;
-	} shared;
-
-	/*
-	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
-	 * list, after a COW of one of the file pages.  A MAP_SHARED vma
-	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
-	 * or brk vma (with NULL file) can only be in an anon_vma list.
-	 */
-	struct list_head anon_vma_node;	/* Serialized by anon_vma->lock */
-	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
-
-	/* Function pointers to deal with this struct. */
-	struct vm_operations_struct * vm_ops;
-
-	/* Information about our backing store: */
-	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
-					   units, *not* PAGE_CACHE_SIZE */
-	struct file * vm_file;		/* File we map to (can be NULL). */
-	void * vm_private_data;		/* was vm_pte (shared mem) */
-	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
-
-#ifndef CONFIG_MMU
-	atomic_t vm_usage;		/* refcount (VMAs shared if !MMU) */
-#endif
-#ifdef CONFIG_NUMA
-	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
-#endif
-};
-
 extern struct kmem_cache *vm_area_cachep;
 
 /*
@@ -631,10 +568,6 @@ static inline struct address_space *page_mapping(struct page *page)
 	VM_BUG_ON(PageSlab(page));
 	if (unlikely(PageSwapCache(page)))
 		mapping = &swapper_space;
-#ifdef CONFIG_SLUB
-	else if (unlikely(PageSlab(page)))
-		mapping = NULL;
-#endif
 	else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
 		mapping = NULL;
 	return mapping;
@@ -715,9 +648,6 @@ static inline int page_mapped(struct page *page)
 extern void show_free_areas(void);
 
 #ifdef CONFIG_SHMEM
-int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
-struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
-					unsigned long addr);
 int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 static inline int shmem_lock(struct file *file, int lock,
@@ -725,18 +655,6 @@ static inline int shmem_lock(struct file *file, int lock,
 {
 	return 0;
 }
-
-static inline int shmem_set_policy(struct vm_area_struct *vma,
-				   struct mempolicy *new)
-{
-	return 0;
-}
-
-static inline struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
-						 unsigned long addr)
-{
-	return NULL;
-}
 #endif
 struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags);
 
@@ -779,8 +697,6 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
 			struct vm_area_struct *vma);
-int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
-			unsigned long size, pgprot_t prot);
 void unmap_mapping_range(struct address_space *mapping,
 		loff_t const holebegin, loff_t const holelen, int even_cows);
 
@@ -1106,8 +1022,6 @@ int write_one_page(struct page *page, int wait);
 /* readahead.c */
 #define VM_MAX_READAHEAD	128	/* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
-#define VM_MAX_CACHE_HIT    	256	/* max pages in a row in cache before
-					 * turning readahead off */
 
 int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
@@ -1218,5 +1132,16 @@ extern int randomize_va_space;
 
 const char * arch_vma_name(struct vm_area_struct *vma);
 
+struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
+pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
+pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
+pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
+pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
+void *vmemmap_alloc_block(unsigned long size, int node);
+void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
+int vmemmap_populate_basepages(struct page *start_page,
+						unsigned long pages, int node);
+int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d5bb1796e12..87766791845 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1,13 +1,26 @@
 #ifndef _LINUX_MM_TYPES_H
 #define _LINUX_MM_TYPES_H
 
+#include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
 #include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/prio_tree.h>
+#include <linux/rbtree.h>
+#include <linux/rwsem.h>
+#include <linux/completion.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
 
 struct address_space;
 
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+typedef atomic_long_t mm_counter_t;
+#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+typedef unsigned long mm_counter_t;
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -24,10 +37,7 @@ struct page {
 					 * to show when page is mapped
 					 * & limit reverse map searches.
 					 */
-		struct {	/* SLUB uses */
-			short unsigned int inuse;
-			short unsigned int offset;
-		};
+		unsigned int inuse;	/* SLUB: Nr of objects */
 	};
 	union {
 	    struct {
@@ -49,13 +59,8 @@ struct page {
 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
 	    spinlock_t ptl;
 #endif
-	    struct {			/* SLUB uses */
-	    	void **lockless_freelist;
-		struct kmem_cache *slab;	/* Pointer to slab */
-	    };
-	    struct {
-		struct page *first_page;	/* Compound pages */
-	    };
+	    struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+	    struct page *first_page;	/* Compound tail pages */
 	};
 	union {
 		pgoff_t index;		/* Our offset within mapping. */
@@ -80,4 +85,135 @@ struct page {
 #endif /* WANT_PAGE_VIRTUAL */
 };
 
+/*
+ * This struct defines a memory VMM memory area. There is one of these
+ * per VM-area/task.  A VM area is any part of the process virtual memory
+ * space that has a special rule for the page-fault handlers (ie a shared
+ * library, the executable area etc).
+ */
+struct vm_area_struct {
+	struct mm_struct * vm_mm;	/* The address space we belong to. */
+	unsigned long vm_start;		/* Our start address within vm_mm. */
+	unsigned long vm_end;		/* The first byte after our end address
+					   within vm_mm. */
+
+	/* linked list of VM areas per task, sorted by address */
+	struct vm_area_struct *vm_next;
+
+	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
+	unsigned long vm_flags;		/* Flags, listed below. */
+
+	struct rb_node vm_rb;
+
+	/*
+	 * For areas with an address space and backing store,
+	 * linkage into the address_space->i_mmap prio tree, or
+	 * linkage to the list of like vmas hanging off its node, or
+	 * linkage of vma in the address_space->i_mmap_nonlinear list.
+	 */
+	union {
+		struct {
+			struct list_head list;
+			void *parent;	/* aligns with prio_tree_node parent */
+			struct vm_area_struct *head;
+		} vm_set;
+
+		struct raw_prio_tree_node prio_tree_node;
+	} shared;
+
+	/*
+	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
+	 * list, after a COW of one of the file pages.	A MAP_SHARED vma
+	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
+	 * or brk vma (with NULL file) can only be in an anon_vma list.
+	 */
+	struct list_head anon_vma_node;	/* Serialized by anon_vma->lock */
+	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
+
+	/* Function pointers to deal with this struct. */
+	struct vm_operations_struct * vm_ops;
+
+	/* Information about our backing store: */
+	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
+					   units, *not* PAGE_CACHE_SIZE */
+	struct file * vm_file;		/* File we map to (can be NULL). */
+	void * vm_private_data;		/* was vm_pte (shared mem) */
+	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
+
+#ifndef CONFIG_MMU
+	atomic_t vm_usage;		/* refcount (VMAs shared if !MMU) */
+#endif
+#ifdef CONFIG_NUMA
+	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
+#endif
+};
+
+struct mm_struct {
+	struct vm_area_struct * mmap;		/* list of VMAs */
+	struct rb_root mm_rb;
+	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+	unsigned long (*get_unmapped_area) (struct file *filp,
+				unsigned long addr, unsigned long len,
+				unsigned long pgoff, unsigned long flags);
+	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+	unsigned long mmap_base;		/* base of mmap area */
+	unsigned long task_size;		/* size of task vm space */
+	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
+	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
+	pgd_t * pgd;
+	atomic_t mm_users;			/* How many users with user space? */
+	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
+	int map_count;				/* number of VMAs */
+	struct rw_semaphore mmap_sem;
+	spinlock_t page_table_lock;		/* Protects page tables and some counters */
+
+	struct list_head mmlist;		/* List of maybe swapped mm's.	These are globally strung
+						 * together off init_mm.mmlist, and are protected
+						 * by mmlist_lock
+						 */
+
+	/* Special counters, in some configurations protected by the
+	 * page_table_lock, in other configurations by being atomic.
+	 */
+	mm_counter_t _file_rss;
+	mm_counter_t _anon_rss;
+
+	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
+	unsigned long hiwater_vm;	/* High-water virtual memory usage */
+
+	unsigned long total_vm, locked_vm, shared_vm, exec_vm;
+	unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
+	unsigned long start_code, end_code, start_data, end_data;
+	unsigned long start_brk, brk, start_stack;
+	unsigned long arg_start, arg_end, env_start, env_end;
+
+	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
+
+	cpumask_t cpu_vm_mask;
+
+	/* Architecture-specific MM context */
+	mm_context_t context;
+
+	/* Swap token stuff */
+	/*
+	 * Last value of global fault stamp as seen by this process.
+	 * In other words, this value gives an indication of how long
+	 * it has been since this task got the token.
+	 * Look at mm/thrash.c
+	 */
+	unsigned int faultstamp;
+	unsigned int token_priority;
+	unsigned int last_interval;
+
+	unsigned long flags; /* Must use atomic bitops to access the bits */
+
+	/* coredumping support */
+	int core_waiters;
+	struct completion *core_startup_done, core_done;
+
+	/* aio bits */
+	rwlock_t		ioctx_list_lock;
+	struct kioctx		*ioctx_list;
+};
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4e5627379b0..f4bfe824834 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/seqlock.h>
 #include <linux/nodemask.h>
+#include <linux/pageblock-flags.h>
 #include <asm/atomic.h>
 #include <asm/page.h>
 
@@ -32,8 +33,29 @@
  */
 #define PAGE_ALLOC_COSTLY_ORDER 3
 
+#define MIGRATE_UNMOVABLE     0
+#define MIGRATE_RECLAIMABLE   1
+#define MIGRATE_MOVABLE       2
+#define MIGRATE_RESERVE       3
+#define MIGRATE_ISOLATE       4 /* can't allocate from here */
+#define MIGRATE_TYPES         5
+
+#define for_each_migratetype_order(order, type) \
+	for (order = 0; order < MAX_ORDER; order++) \
+		for (type = 0; type < MIGRATE_TYPES; type++)
+
+extern int page_group_by_mobility_disabled;
+
+static inline int get_pageblock_migratetype(struct page *page)
+{
+	if (unlikely(page_group_by_mobility_disabled))
+		return MIGRATE_UNMOVABLE;
+
+	return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
+}
+
 struct free_area {
-	struct list_head	free_list;
+	struct list_head	free_list[MIGRATE_TYPES];
 	unsigned long		nr_free;
 };
 
@@ -222,6 +244,14 @@ struct zone {
 #endif
 	struct free_area	free_area[MAX_ORDER];
 
+#ifndef CONFIG_SPARSEMEM
+	/*
+	 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
+	 * In SPARSEMEM, this map is stored in struct mem_section
+	 */
+	unsigned long		*pageblock_flags;
+#endif /* CONFIG_SPARSEMEM */
+
 
 	ZONE_PADDING(_pad1_)
 
@@ -324,6 +354,17 @@ struct zone {
 #define MAX_ZONES_PER_ZONELIST (MAX_NUMNODES * MAX_NR_ZONES)
 
 #ifdef CONFIG_NUMA
+
+/*
+ * The NUMA zonelists are doubled becausse we need zonelists that restrict the
+ * allocations to a single node for GFP_THISNODE.
+ *
+ * [0 .. MAX_NR_ZONES -1] 		: Zonelists with fallback
+ * [MAZ_NR_ZONES ... MAZ_ZONELISTS -1]  : No fallback (GFP_THISNODE)
+ */
+#define MAX_ZONELISTS (2 * MAX_NR_ZONES)
+
+
 /*
  * We cache key information from each zonelist for smaller cache
  * footprint when scanning for free pages in get_page_from_freelist().
@@ -389,6 +430,7 @@ struct zonelist_cache {
 	unsigned long last_full_zap;		/* when last zap'd (jiffies) */
 };
 #else
+#define MAX_ZONELISTS MAX_NR_ZONES
 struct zonelist_cache;
 #endif
 
@@ -455,7 +497,7 @@ extern struct page *mem_map;
 struct bootmem_data;
 typedef struct pglist_data {
 	struct zone node_zones[MAX_NR_ZONES];
-	struct zonelist node_zonelists[MAX_NR_ZONES];
+	struct zonelist node_zonelists[MAX_ZONELISTS];
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
 	struct page *node_mem_map;
@@ -708,6 +750,9 @@ extern struct zone *next_zone(struct zone *zone);
 #define PAGES_PER_SECTION       (1UL << PFN_SECTION_SHIFT)
 #define PAGE_SECTION_MASK	(~(PAGES_PER_SECTION-1))
 
+#define SECTION_BLOCKFLAGS_BITS \
+	((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
+
 #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
 #error Allocator MAX_ORDER exceeds SECTION_SIZE
 #endif
@@ -727,6 +772,9 @@ struct mem_section {
 	 * before using it wrong.
 	 */
 	unsigned long section_mem_map;
+
+	/* See declaration of similar field in struct zone */
+	unsigned long *pageblock_flags;
 };
 
 #ifdef CONFIG_SPARSEMEM_EXTREME
@@ -771,12 +819,17 @@ static inline struct page *__section_mem_map_addr(struct mem_section *section)
 	return (struct page *)map;
 }
 
-static inline int valid_section(struct mem_section *section)
+static inline int present_section(struct mem_section *section)
 {
 	return (section && (section->section_mem_map & SECTION_MARKED_PRESENT));
 }
 
-static inline int section_has_mem_map(struct mem_section *section)
+static inline int present_section_nr(unsigned long nr)
+{
+	return present_section(__nr_to_section(nr));
+}
+
+static inline int valid_section(struct mem_section *section)
 {
 	return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP));
 }
@@ -798,6 +851,13 @@ static inline int pfn_valid(unsigned long pfn)
 	return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
 }
 
+static inline int pfn_present(unsigned long pfn)
+{
+	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
+		return 0;
+	return present_section(__nr_to_section(pfn_to_section_nr(pfn)));
+}
+
 /*
  * These are _only_ used during initialisation, therefore they
  * can use __initdata ...  They could have names to indicate
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 5cd19246909..bcb7abafbca 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -127,17 +127,9 @@ void			nfsd_export_shutdown(void);
 void			nfsd_export_flush(void);
 void			exp_readlock(void);
 void			exp_readunlock(void);
-struct svc_export *	exp_get_by_name(struct auth_domain *clp,
-					struct vfsmount *mnt,
-					struct dentry *dentry,
-					struct cache_req *reqp);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
 					     struct vfsmount *,
 					     struct dentry *);
-struct svc_export *	exp_parent(struct auth_domain *clp,
-				   struct vfsmount *mnt,
-				   struct dentry *dentry,
-				   struct cache_req *reqp);
 struct svc_export *	rqst_exp_parent(struct svc_rqst *,
 					struct vfsmount *mnt,
 					struct dentry *dentry);
@@ -157,9 +149,6 @@ static inline void exp_get(struct svc_export *exp)
 {
 	cache_get(&exp->h);
 }
-extern struct svc_export *
-exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
-	 struct cache_req *reqp);
 struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 52c54a5720f..905e18f4b41 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -338,31 +338,88 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
 #endif /* MAX_NUMNODES */
 
 /*
+ * Bitmasks that are kept for all the nodes.
+ */
+enum node_states {
+	N_POSSIBLE,		/* The node could become online at some point */
+	N_ONLINE,		/* The node is online */
+	N_NORMAL_MEMORY,	/* The node has regular memory */
+#ifdef CONFIG_HIGHMEM
+	N_HIGH_MEMORY,		/* The node has regular or high memory */
+#else
+	N_HIGH_MEMORY = N_NORMAL_MEMORY,
+#endif
+	N_CPU,		/* The node has one or more cpus */
+	NR_NODE_STATES
+};
+
+/*
  * The following particular system nodemasks and operations
  * on them manage all possible and online nodes.
  */
 
-extern nodemask_t node_online_map;
-extern nodemask_t node_possible_map;
+extern nodemask_t node_states[NR_NODE_STATES];
 
 #if MAX_NUMNODES > 1
-#define num_online_nodes()	nodes_weight(node_online_map)
-#define num_possible_nodes()	nodes_weight(node_possible_map)
-#define node_online(node)	node_isset((node), node_online_map)
-#define node_possible(node)	node_isset((node), node_possible_map)
-#define first_online_node	first_node(node_online_map)
-#define next_online_node(nid)	next_node((nid), node_online_map)
+static inline int node_state(int node, enum node_states state)
+{
+	return node_isset(node, node_states[state]);
+}
+
+static inline void node_set_state(int node, enum node_states state)
+{
+	__node_set(node, &node_states[state]);
+}
+
+static inline void node_clear_state(int node, enum node_states state)
+{
+	__node_clear(node, &node_states[state]);
+}
+
+static inline int num_node_state(enum node_states state)
+{
+	return nodes_weight(node_states[state]);
+}
+
+#define for_each_node_state(__node, __state) \
+	for_each_node_mask((__node), node_states[__state])
+
+#define first_online_node	first_node(node_states[N_ONLINE])
+#define next_online_node(nid)	next_node((nid), node_states[N_ONLINE])
+
 extern int nr_node_ids;
 #else
-#define num_online_nodes()	1
-#define num_possible_nodes()	1
-#define node_online(node)	((node) == 0)
-#define node_possible(node)	((node) == 0)
+
+static inline int node_state(int node, enum node_states state)
+{
+	return node == 0;
+}
+
+static inline void node_set_state(int node, enum node_states state)
+{
+}
+
+static inline void node_clear_state(int node, enum node_states state)
+{
+}
+
+static inline int num_node_state(enum node_states state)
+{
+	return 1;
+}
+
+#define for_each_node_state(node, __state) \
+	for ( (node) = 0; (node) == 0; (node) = 1)
+
 #define first_online_node	0
 #define next_online_node(nid)	(MAX_NUMNODES)
 #define nr_node_ids		1
+
 #endif
 
+#define node_online_map 	node_states[N_ONLINE]
+#define node_possible_map 	node_states[N_POSSIBLE]
+
 #define any_online_node(mask)			\
 ({						\
 	int node;				\
@@ -372,10 +429,15 @@ extern int nr_node_ids;
 	node;					\
 })
 
-#define node_set_online(node)	   set_bit((node), node_online_map.bits)
-#define node_set_offline(node)	   clear_bit((node), node_online_map.bits)
+#define num_online_nodes()	num_node_state(N_ONLINE)
+#define num_possible_nodes()	num_node_state(N_POSSIBLE)
+#define node_online(node)	node_state((node), N_ONLINE)
+#define node_possible(node)	node_state((node), N_POSSIBLE)
+
+#define node_set_online(node)	   node_set_state((node), N_ONLINE)
+#define node_set_offline(node)	   node_clear_state((node), N_ONLINE)
 
-#define for_each_node(node)	   for_each_node_mask((node), node_possible_map)
-#define for_each_online_node(node) for_each_node_mask((node), node_online_map)
+#define for_each_node(node)	   for_each_node_state(node, N_POSSIBLE)
+#define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
 
 #endif /* __LINUX_NODEMASK_H */
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
new file mode 100644
index 00000000000..051c1b1ede4
--- /dev/null
+++ b/include/linux/page-isolation.h
@@ -0,0 +1,37 @@
+#ifndef __LINUX_PAGEISOLATION_H
+#define __LINUX_PAGEISOLATION_H
+
+/*
+ * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
+ * If specified range includes migrate types other than MOVABLE,
+ * this will fail with -EBUSY.
+ *
+ * For isolating all pages in the range finally, the caller have to
+ * free all pages in the range. test_page_isolated() can be used for
+ * test it.
+ */
+extern int
+start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn);
+
+/*
+ * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.
+ * target range is [start_pfn, end_pfn)
+ */
+extern int
+undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn);
+
+/*
+ * test all pages in [start_pfn, end_pfn)are isolated or not.
+ */
+extern int
+test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
+
+/*
+ * Internal funcs.Changes pageblock's migrate type.
+ * Please use make_pagetype_isolated()/make_pagetype_movable().
+ */
+extern int set_migratetype_isolate(struct page *page);
+extern void unset_migratetype_isolate(struct page *page);
+
+
+#endif
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
new file mode 100644
index 00000000000..e875905f7b1
--- /dev/null
+++ b/include/linux/pageblock-flags.h
@@ -0,0 +1,75 @@
+/*
+ * Macros for manipulating and testing flags related to a
+ * pageblock_nr_pages number of pages.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Original author, Mel Gorman
+ * Major cleanups and reduction of bit operations, Andy Whitcroft
+ */
+#ifndef PAGEBLOCK_FLAGS_H
+#define PAGEBLOCK_FLAGS_H
+
+#include <linux/types.h>
+
+/* Macro to aid the definition of ranges of bits */
+#define PB_range(name, required_bits) \
+	name, name ## _end = (name + required_bits) - 1
+
+/* Bit indices that affect a whole block of pages */
+enum pageblock_bits {
+	PB_range(PB_migrate, 3), /* 3 bits required for migrate types */
+	NR_PAGEBLOCK_BITS
+};
+
+#ifdef CONFIG_HUGETLB_PAGE
+
+#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
+
+/* Huge page sizes are variable */
+extern int pageblock_order;
+
+#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
+
+/* Huge pages are a constant size */
+#define pageblock_order		HUGETLB_PAGE_ORDER
+
+#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
+
+#else /* CONFIG_HUGETLB_PAGE */
+
+/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
+#define pageblock_order		(MAX_ORDER-1)
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+#define pageblock_nr_pages	(1UL << pageblock_order)
+
+/* Forward declaration */
+struct page;
+
+/* Declarations for getting and setting flags. See mm/page_alloc.c */
+unsigned long get_pageblock_flags_group(struct page *page,
+					int start_bitidx, int end_bitidx);
+void set_pageblock_flags_group(struct page *page, unsigned long flags,
+					int start_bitidx, int end_bitidx);
+
+#define get_pageblock_flags(page) \
+			get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
+#define set_pageblock_flags(page) \
+			set_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
+
+#endif	/* PAGEBLOCK_FLAGS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 8a83537d697..db8a410ae9e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -84,11 +84,11 @@ static inline struct page *page_cache_alloc_cold(struct address_space *x)
 typedef int filler_t(void *, struct page *);
 
 extern struct page * find_get_page(struct address_space *mapping,
-				unsigned long index);
+				pgoff_t index);
 extern struct page * find_lock_page(struct address_space *mapping,
-				unsigned long index);
+				pgoff_t index);
 extern struct page * find_or_create_page(struct address_space *mapping,
-				unsigned long index, gfp_t gfp_mask);
+				pgoff_t index, gfp_t gfp_mask);
 unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 			unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
@@ -96,44 +96,47 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			int tag, unsigned int nr_pages, struct page **pages);
 
+struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index);
+
 /*
  * Returns locked page at given index in given cache, creating it if needed.
  */
-static inline struct page *grab_cache_page(struct address_space *mapping, unsigned long index)
+static inline struct page *grab_cache_page(struct address_space *mapping,
+								pgoff_t index)
 {
 	return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
 }
 
 extern struct page * grab_cache_page_nowait(struct address_space *mapping,
-				unsigned long index);
+				pgoff_t index);
 extern struct page * read_cache_page_async(struct address_space *mapping,
-				unsigned long index, filler_t *filler,
+				pgoff_t index, filler_t *filler,
 				void *data);
 extern struct page * read_cache_page(struct address_space *mapping,
-				unsigned long index, filler_t *filler,
+				pgoff_t index, filler_t *filler,
 				void *data);
 extern int read_cache_pages(struct address_space *mapping,
 		struct list_head *pages, filler_t *filler, void *data);
 
 static inline struct page *read_mapping_page_async(
 						struct address_space *mapping,
-					     unsigned long index, void *data)
+						     pgoff_t index, void *data)
 {
 	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
 	return read_cache_page_async(mapping, index, filler, data);
 }
 
 static inline struct page *read_mapping_page(struct address_space *mapping,
-					     unsigned long index, void *data)
+					     pgoff_t index, void *data)
 {
 	filler_t *filler = (filler_t *)mapping->a_ops->readpage;
 	return read_cache_page(mapping, index, filler, data);
 }
 
 int add_to_page_cache(struct page *page, struct address_space *mapping,
-				unsigned long index, gfp_t gfp_mask);
+				pgoff_t index, gfp_t gfp_mask);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-				unsigned long index, gfp_t gfp_mask);
+				pgoff_t index, gfp_t gfp_mask);
 extern void remove_from_page_cache(struct page *page);
 extern void __remove_from_page_cache(struct page *page);
 
@@ -218,6 +221,9 @@ static inline int fault_in_pages_writeable(char __user *uaddr, int size)
 {
 	int ret;
 
+	if (unlikely(size == 0))
+		return 0;
+
 	/*
 	 * Writing zeroes into userspace here is OK, because we know that if
 	 * the zero gets there, we'll be overwriting it.
@@ -237,19 +243,23 @@ static inline int fault_in_pages_writeable(char __user *uaddr, int size)
 	return ret;
 }
 
-static inline void fault_in_pages_readable(const char __user *uaddr, int size)
+static inline int fault_in_pages_readable(const char __user *uaddr, int size)
 {
 	volatile char c;
 	int ret;
 
+	if (unlikely(size == 0))
+		return 0;
+
 	ret = __get_user(c, uaddr);
 	if (ret == 0) {
 		const char __user *end = uaddr + size - 1;
 
 		if (((unsigned long)uaddr & PAGE_MASK) !=
 				((unsigned long)end & PAGE_MASK))
-		 	__get_user(c, end);
+		 	ret = __get_user(c, end);
 	}
+	return ret;
 }
 
 #endif /* _LINUX_PAGEMAP_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 2c49561f9b4..df948b44eda 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1995,6 +1995,8 @@
 #define PCI_VENDOR_ID_TOPIC		0x151f
 #define PCI_DEVICE_ID_TOPIC_TP560	0x0000
 
+#define PCI_VENDOR_ID_MAINPINE		0x1522
+#define PCI_DEVICE_ID_MAINPINE_PBRIDGE	0x0100
 #define PCI_VENDOR_ID_ENE		0x1524
 #define PCI_DEVICE_ID_ENE_CB712_SD	0x0550
 #define PCI_DEVICE_ID_ENE_CB712_SD_2	0x0551
@@ -2324,6 +2326,8 @@
 #define PCI_DEVICE_ID_INTEL_MCH_PC	0x3599
 #define PCI_DEVICE_ID_INTEL_MCH_PC1	0x359a
 #define PCI_DEVICE_ID_INTEL_E7525_MCH	0x359e
+#define PCI_DEVICE_ID_INTEL_IOAT_CNB	0x360b
+#define PCI_DEVICE_ID_INTEL_IOAT_SCNB	0x65ff
 #define PCI_DEVICE_ID_INTEL_TOLAPAI_0	0x5031
 #define PCI_DEVICE_ID_INTEL_TOLAPAI_1	0x5032
 #define PCI_DEVICE_ID_INTEL_82371SB_0	0x7000
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index f9e77d2ee32..b6116b4445c 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -26,28 +26,31 @@
 #include <linux/rcupdate.h>
 
 /*
- * A direct pointer (root->rnode pointing directly to a data item,
- * rather than another radix_tree_node) is signalled by the low bit
- * set in the root->rnode pointer.
+ * An indirect pointer (root->rnode pointing to a radix_tree_node, rather
+ * than a data item) is signalled by the low bit set in the root->rnode
+ * pointer.
  *
- * In this case root->height is also NULL, but the direct pointer tests are
- * needed for RCU lookups when root->height is unreliable.
+ * In this case root->height is > 0, but the indirect pointer tests are
+ * needed for RCU lookups (because root->height is unreliable). The only
+ * time callers need worry about this is when doing a lookup_slot under
+ * RCU.
  */
-#define RADIX_TREE_DIRECT_PTR	1
+#define RADIX_TREE_INDIRECT_PTR	1
+#define RADIX_TREE_RETRY ((void *)-1UL)
 
-static inline void *radix_tree_ptr_to_direct(void *ptr)
+static inline void *radix_tree_ptr_to_indirect(void *ptr)
 {
-	return (void *)((unsigned long)ptr | RADIX_TREE_DIRECT_PTR);
+	return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
 }
 
-static inline void *radix_tree_direct_to_ptr(void *ptr)
+static inline void *radix_tree_indirect_to_ptr(void *ptr)
 {
-	return (void *)((unsigned long)ptr & ~RADIX_TREE_DIRECT_PTR);
+	return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
 }
 
-static inline int radix_tree_is_direct_ptr(void *ptr)
+static inline int radix_tree_is_indirect_ptr(void *ptr)
 {
-	return (int)((unsigned long)ptr & RADIX_TREE_DIRECT_PTR);
+	return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
 }
 
 /*** radix-tree API starts here ***/
@@ -130,7 +133,10 @@ do {									\
  */
 static inline void *radix_tree_deref_slot(void **pslot)
 {
-	return radix_tree_direct_to_ptr(*pslot);
+	void *ret = *pslot;
+	if (unlikely(radix_tree_is_indirect_ptr(ret)))
+		ret = RADIX_TREE_RETRY;
+	return ret;
 }
 /**
  * radix_tree_replace_slot	- replace item in a slot
@@ -142,10 +148,8 @@ static inline void *radix_tree_deref_slot(void **pslot)
  */
 static inline void radix_tree_replace_slot(void **pslot, void *item)
 {
-	BUG_ON(radix_tree_is_direct_ptr(item));
-	rcu_assign_pointer(*pslot,
-		(void *)((unsigned long)item |
-			((unsigned long)*pslot & RADIX_TREE_DIRECT_PTR)));
+	BUG_ON(radix_tree_is_indirect_ptr(item));
+	rcu_assign_pointer(*pslot, item);
 }
 
 int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
@@ -155,6 +159,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 			unsigned long first_index, unsigned int max_items);
+unsigned long radix_tree_next_hole(struct radix_tree_root *root,
+				unsigned long index, unsigned long max_scan);
 int radix_tree_preload(gfp_t gfp_mask);
 void radix_tree_init(void);
 void *radix_tree_tag_set(struct radix_tree_root *root,
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 4efbd9c445f..2dc7464cce5 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -20,4 +20,88 @@ static inline void sg_init_one(struct scatterlist *sg, const void *buf,
 	sg_set_buf(sg, buf, buflen);
 }
 
+/*
+ * We overload the LSB of the page pointer to indicate whether it's
+ * a valid sg entry, or whether it points to the start of a new scatterlist.
+ * Those low bits are there for everyone! (thanks mason :-)
+ */
+#define sg_is_chain(sg)		((unsigned long) (sg)->page & 0x01)
+#define sg_chain_ptr(sg)	\
+	((struct scatterlist *) ((unsigned long) (sg)->page & ~0x01))
+
+/**
+ * sg_next - return the next scatterlist entry in a list
+ * @sg:		The current sg entry
+ *
+ * Usually the next entry will be @sg@ + 1, but if this sg element is part
+ * of a chained scatterlist, it could jump to the start of a new
+ * scatterlist array.
+ *
+ * Note that the caller must ensure that there are further entries after
+ * the current entry, this function will NOT return NULL for an end-of-list.
+ *
+ */
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+	sg++;
+
+	if (unlikely(sg_is_chain(sg)))
+		sg = sg_chain_ptr(sg);
+
+	return sg;
+}
+
+/*
+ * Loop over each sg element, following the pointer to a new list if necessary
+ */
+#define for_each_sg(sglist, sg, nr, __i)	\
+	for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+
+/**
+ * sg_last - return the last scatterlist entry in a list
+ * @sgl:	First entry in the scatterlist
+ * @nents:	Number of entries in the scatterlist
+ *
+ * Should only be used casually, it (currently) scan the entire list
+ * to get the last entry.
+ *
+ * Note that the @sgl@ pointer passed in need not be the first one,
+ * the important bit is that @nents@ denotes the number of entries that
+ * exist from @sgl@.
+ *
+ */
+static inline struct scatterlist *sg_last(struct scatterlist *sgl,
+					  unsigned int nents)
+{
+#ifndef ARCH_HAS_SG_CHAIN
+	struct scatterlist *ret = &sgl[nents - 1];
+#else
+	struct scatterlist *sg, *ret = NULL;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		ret = sg;
+
+#endif
+	return ret;
+}
+
+/**
+ * sg_chain - Chain two sglists together
+ * @prv:	First scatterlist
+ * @prv_nents:	Number of entries in prv
+ * @sgl:	Second scatterlist
+ *
+ * Links @prv@ and @sgl@ together, to form a longer scatterlist.
+ *
+ */
+static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+			    struct scatterlist *sgl)
+{
+#ifndef ARCH_HAS_SG_CHAIN
+	BUG();
+#endif
+	prv[prv_nents - 1].page = (struct page *) ((unsigned long) sgl | 0x01);
+}
+
 #endif /* _LINUX_SCATTERLIST_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 228e0a8ce24..592e3a55f81 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_SCHED_H
 #define _LINUX_SCHED_H
 
-#include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
-
 /*
  * cloning flags:
  */
@@ -58,12 +56,12 @@ struct sched_param {
 #include <linux/cpumask.h>
 #include <linux/errno.h>
 #include <linux/nodemask.h>
+#include <linux/mm_types.h>
 
 #include <asm/system.h>
 #include <asm/semaphore.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
-#include <asm/mmu.h>
 #include <asm/cputime.h>
 
 #include <linux/smp.h>
@@ -319,7 +317,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
 #define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
 #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
 #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
-typedef atomic_long_t mm_counter_t;
 
 #else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
 /*
@@ -331,7 +328,6 @@ typedef atomic_long_t mm_counter_t;
 #define add_mm_counter(mm, member, value) (mm)->_##member += (value)
 #define inc_mm_counter(mm, member) (mm)->_##member++
 #define dec_mm_counter(mm, member) (mm)->_##member--
-typedef unsigned long mm_counter_t;
 
 #endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
 
@@ -368,74 +364,6 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_DUMP_FILTER_DEFAULT \
 	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED))
 
-struct mm_struct {
-	struct vm_area_struct * mmap;		/* list of VMAs */
-	struct rb_root mm_rb;
-	struct vm_area_struct * mmap_cache;	/* last find_vma result */
-	unsigned long (*get_unmapped_area) (struct file *filp,
-				unsigned long addr, unsigned long len,
-				unsigned long pgoff, unsigned long flags);
-	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
-	unsigned long mmap_base;		/* base of mmap area */
-	unsigned long task_size;		/* size of task vm space */
-	unsigned long cached_hole_size;         /* if non-zero, the largest hole below free_area_cache */
-	unsigned long free_area_cache;		/* first hole of size cached_hole_size or larger */
-	pgd_t * pgd;
-	atomic_t mm_users;			/* How many users with user space? */
-	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
-	int map_count;				/* number of VMAs */
-	struct rw_semaphore mmap_sem;
-	spinlock_t page_table_lock;		/* Protects page tables and some counters */
-
-	struct list_head mmlist;		/* List of maybe swapped mm's.  These are globally strung
-						 * together off init_mm.mmlist, and are protected
-						 * by mmlist_lock
-						 */
-
-	/* Special counters, in some configurations protected by the
-	 * page_table_lock, in other configurations by being atomic.
-	 */
-	mm_counter_t _file_rss;
-	mm_counter_t _anon_rss;
-
-	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
-	unsigned long hiwater_vm;	/* High-water virtual memory usage */
-
-	unsigned long total_vm, locked_vm, shared_vm, exec_vm;
-	unsigned long stack_vm, reserved_vm, def_flags, nr_ptes;
-	unsigned long start_code, end_code, start_data, end_data;
-	unsigned long start_brk, brk, start_stack;
-	unsigned long arg_start, arg_end, env_start, env_end;
-
-	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
-
-	cpumask_t cpu_vm_mask;
-
-	/* Architecture-specific MM context */
-	mm_context_t context;
-
-	/* Swap token stuff */
-	/*
-	 * Last value of global fault stamp as seen by this process.
-	 * In other words, this value gives an indication of how long
-	 * it has been since this task got the token.
-	 * Look at mm/thrash.c
-	 */
-	unsigned int faultstamp;
-	unsigned int token_priority;
-	unsigned int last_interval;
-
-	unsigned long flags; /* Must use atomic bitops to access the bits */
-
-	/* coredumping support */
-	int core_waiters;
-	struct completion *core_startup_done, core_done;
-
-	/* aio bits */
-	rwlock_t		ioctx_list_lock;
-	struct kioctx		*ioctx_list;
-};
-
 struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
@@ -801,9 +729,6 @@ struct sched_domain {
 #endif
 };
 
-extern int partition_sched_domains(cpumask_t *partition1,
-				    cpumask_t *partition2);
-
 #endif	/* CONFIG_SMP */
 
 /*
diff --git a/include/linux/selection.h b/include/linux/selection.h
index f9457861937..8cdaa1151d2 100644
--- a/include/linux/selection.h
+++ b/include/linux/selection.h
@@ -13,6 +13,7 @@
 struct tty_struct;
 
 extern struct vc_data *sel_cons;
+struct tty_struct;
 
 extern void clear_selection(void);
 extern int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 09d17b06bf0..4db77249281 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -291,7 +291,8 @@ struct uart_port {
 	resource_size_t		mapbase;		/* for ioremap */
 	struct device		*dev;			/* parent device */
 	unsigned char		hub6;			/* this should be in the 8250 driver */
-	unsigned char		unused[3];
+	unsigned char		suspended;
+	unsigned char		unused[2];
 	void			*private_data;		/* generic platform data pointer */
 };
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d859354b9e5..3a5bad3ad12 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -24,12 +24,14 @@
 #define SLAB_HWCACHE_ALIGN	0x00002000UL	/* Align objs on cache lines */
 #define SLAB_CACHE_DMA		0x00004000UL	/* Use GFP_DMA memory */
 #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
-#define SLAB_RECLAIM_ACCOUNT	0x00020000UL	/* Objects are reclaimable */
 #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
 #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
 #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
 #define SLAB_TRACE		0x00200000UL	/* Trace allocations and frees */
 
+/* The following flags affect the page allocator grouping pages by mobility */
+#define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
+#define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
 /*
  * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
  *
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 74962077f63..d65159d1d4f 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,6 +11,14 @@
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
 
+struct kmem_cache_cpu {
+	void **freelist;
+	struct page *page;
+	int node;
+	unsigned int offset;
+	unsigned int objsize;
+};
+
 struct kmem_cache_node {
 	spinlock_t list_lock;	/* Protect partial list and nr_partial */
 	unsigned long nr_partial;
@@ -54,7 +62,11 @@ struct kmem_cache {
 	int defrag_ratio;
 	struct kmem_cache_node *node[MAX_NUMNODES];
 #endif
-	struct page *cpu_slab[NR_CPUS];
+#ifdef CONFIG_SMP
+	struct kmem_cache_cpu *cpu_slab[NR_CPUS];
+#else
+	struct kmem_cache_cpu cpu_slab;
+#endif
 };
 
 /*
@@ -72,7 +84,7 @@ struct kmem_cache {
  * We keep the general caches in an array of slab caches that are used for
  * 2^x bytes of allocations.
  */
-extern struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
+extern struct kmem_cache kmalloc_caches[PAGE_SHIFT];
 
 /*
  * Sorry that the following has to be that ugly but some versions of GCC
@@ -83,9 +95,6 @@ static __always_inline int kmalloc_index(size_t size)
 	if (!size)
 		return 0;
 
-	if (size > KMALLOC_MAX_SIZE)
-		return -1;
-
 	if (size <= KMALLOC_MIN_SIZE)
 		return KMALLOC_SHIFT_LOW;
 
@@ -102,6 +111,10 @@ static __always_inline int kmalloc_index(size_t size)
 	if (size <=        512) return 9;
 	if (size <=       1024) return 10;
 	if (size <=   2 * 1024) return 11;
+/*
+ * The following is only needed to support architectures with a larger page
+ * size than 4k.
+ */
 	if (size <=   4 * 1024) return 12;
 	if (size <=   8 * 1024) return 13;
 	if (size <=  16 * 1024) return 14;
@@ -109,13 +122,9 @@ static __always_inline int kmalloc_index(size_t size)
 	if (size <=  64 * 1024) return 16;
 	if (size <= 128 * 1024) return 17;
 	if (size <= 256 * 1024) return 18;
-	if (size <=  512 * 1024) return 19;
+	if (size <= 512 * 1024) return 19;
 	if (size <= 1024 * 1024) return 20;
 	if (size <=  2 * 1024 * 1024) return 21;
-	if (size <=  4 * 1024 * 1024) return 22;
-	if (size <=  8 * 1024 * 1024) return 23;
-	if (size <= 16 * 1024 * 1024) return 24;
-	if (size <= 32 * 1024 * 1024) return 25;
 	return -1;
 
 /*
@@ -140,19 +149,6 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 	if (index == 0)
 		return NULL;
 
-	/*
-	 * This function only gets expanded if __builtin_constant_p(size), so
-	 * testing it here shouldn't be needed.  But some versions of gcc need
-	 * help.
-	 */
-	if (__builtin_constant_p(size) && index < 0) {
-		/*
-		 * Generate a link failure. Would be great if we could
-		 * do something to stop the compile here.
-		 */
-		extern void __kmalloc_size_too_large(void);
-		__kmalloc_size_too_large();
-	}
 	return &kmalloc_caches[index];
 }
 
@@ -168,15 +164,21 @@ void *__kmalloc(size_t size, gfp_t flags);
 
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
-	if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) {
-		struct kmem_cache *s = kmalloc_slab(size);
+	if (__builtin_constant_p(size)) {
+		if (size > PAGE_SIZE / 2)
+			return (void *)__get_free_pages(flags | __GFP_COMP,
+							get_order(size));
 
-		if (!s)
-			return ZERO_SIZE_PTR;
+		if (!(flags & SLUB_DMA)) {
+			struct kmem_cache *s = kmalloc_slab(size);
 
-		return kmem_cache_alloc(s, flags);
-	} else
-		return __kmalloc(size, flags);
+			if (!s)
+				return ZERO_SIZE_PTR;
+
+			return kmem_cache_alloc(s, flags);
+		}
+	}
+	return __kmalloc(size, flags);
 }
 
 #ifdef CONFIG_NUMA
@@ -185,15 +187,16 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 
 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
-	if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) {
-		struct kmem_cache *s = kmalloc_slab(size);
+	if (__builtin_constant_p(size) &&
+		size <= PAGE_SIZE / 2 && !(flags & SLUB_DMA)) {
+			struct kmem_cache *s = kmalloc_slab(size);
 
 		if (!s)
 			return ZERO_SIZE_PTR;
 
 		return kmem_cache_alloc_node(s, flags, node);
-	} else
-		return __kmalloc_node(size, flags, node);
+	}
+	return __kmalloc_node(size, flags, node);
 }
 #endif
 
diff --git a/include/linux/sm501-regs.h b/include/linux/sm501-regs.h
index 014e73b31fc..df7620dd8f3 100644
--- a/include/linux/sm501-regs.h
+++ b/include/linux/sm501-regs.h
@@ -15,6 +15,24 @@
 
 /* config 1 */
 #define SM501_SYSTEM_CONTROL 		(0x000000)
+
+#define SM501_SYSCTRL_PANEL_TRISTATE	(1<<0)
+#define SM501_SYSCTRL_MEM_TRISTATE	(1<<1)
+#define SM501_SYSCTRL_CRT_TRISTATE	(1<<2)
+
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_MASK (3<<4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_1	(0<<4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_2	(1<<4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_4	(2<<4)
+#define SM501_SYSCTRL_PCI_SLAVE_BURST_8	(3<<4)
+
+#define SM501_SYSCTRL_PCI_CLOCK_RUN_EN	(1<<6)
+#define SM501_SYSCTRL_PCI_RETRY_DISABLE	(1<<7)
+#define SM501_SYSCTRL_PCI_SUBSYS_LOCK	(1<<11)
+#define SM501_SYSCTRL_PCI_BURST_READ_EN	(1<<15)
+
+/* miscellaneous control */
+
 #define SM501_MISC_CONTROL		(0x000004)
 
 #define SM501_MISC_BUS_SH		(0x0)
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 002a3cddbdd..387e428f1cd 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -195,7 +195,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
 
 /**
  * struct spi_master - interface to SPI master controller
- * @cdev: class interface to this driver
+ * @dev: device interface to this driver
  * @bus_num: board-specific (and often SOC-specific) identifier for a
  *	given SPI controller.
  * @num_chipselect: chipselects are used to distinguish individual
@@ -222,7 +222,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * message's completion function when the transaction completes.
  */
 struct spi_master {
-	struct class_device	cdev;
+	struct device	dev;
 
 	/* other than negative (== assign one dynamically), bus_num is fully
 	 * board-specific.  usually that simplifies to being SOC-specific.
@@ -268,17 +268,17 @@ struct spi_master {
 
 static inline void *spi_master_get_devdata(struct spi_master *master)
 {
-	return class_get_devdata(&master->cdev);
+	return dev_get_drvdata(&master->dev);
 }
 
 static inline void spi_master_set_devdata(struct spi_master *master, void *data)
 {
-	class_set_devdata(&master->cdev, data);
+	dev_set_drvdata(&master->dev, data);
 }
 
 static inline struct spi_master *spi_master_get(struct spi_master *master)
 {
-	if (!master || !class_device_get(&master->cdev))
+	if (!master || !get_device(&master->dev))
 		return NULL;
 	return master;
 }
@@ -286,7 +286,7 @@ static inline struct spi_master *spi_master_get(struct spi_master *master)
 static inline void spi_master_put(struct spi_master *master)
 {
 	if (master)
-		class_device_put(&master->cdev);
+		put_device(&master->dev);
 }
 
 
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 46705e91573..c1527c2ef3c 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -481,7 +481,7 @@ static inline void *get_gadget_data (struct usb_gadget *gadget)
 
 /**
  * gadget_is_dualspeed - return true iff the hardware handles high speed
- * @gadget: controller that might support both high and full speeds
+ * @g: controller that might support both high and full speeds
  */
 static inline int gadget_is_dualspeed(struct usb_gadget *g)
 {
@@ -497,7 +497,7 @@ static inline int gadget_is_dualspeed(struct usb_gadget *g)
 
 /**
  * gadget_is_otg - return true iff the hardware is OTG-ready
- * @gadget: controller that might have a Mini-AB connector
+ * @g: controller that might have a Mini-AB connector
  *
  * This is a runtime test, since kernels with a USB-OTG stack sometimes
  * run on boards which only have a Mini-B (or Mini-A) connector.
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 90ef552c42d..f047a1fd64f 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -184,6 +184,7 @@ struct pcmcia_device {
 
 	char *			prod_id[4];
 
+	u64			dma_mask;
 	struct device		dev;
 
 #ifdef CONFIG_PCMCIA_IOCTL
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 9f8f80ab0c8..702fcfeb37f 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -11,13 +11,6 @@
 #include <linux/types.h>
 
 /*
- *	The maximum sg list length SCSI can cope with
- *	(currently must be a power of 2 between 32 and 256)
- */
-#define SCSI_MAX_PHYS_SEGMENTS	MAX_PHYS_SEGMENTS
-
-
-/*
  *	SCSI command lengths
  */
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 65ab5145a09..3f47e522a1e 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -5,6 +5,7 @@
 #include <linux/list.h>
 #include <linux/types.h>
 #include <linux/timer.h>
+#include <linux/scatterlist.h>
 
 struct request;
 struct scatterlist;
@@ -68,7 +69,7 @@ struct scsi_cmnd {
 
 	/* These elements define the operation we ultimately want to perform */
 	unsigned short use_sg;	/* Number of pieces of scatter-gather */
-	unsigned short sglist_len;	/* size of malloc'd scatter-gather list */
+	unsigned short __use_sg;
 
 	unsigned underflow;	/* Return error if less than
 				   this amount is transferred */
@@ -128,7 +129,7 @@ extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
 extern void scsi_kunmap_atomic_sg(void *virt);
 
 extern struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *, gfp_t);
-extern void scsi_free_sgtable(struct scatterlist *, int);
+extern void scsi_free_sgtable(struct scsi_cmnd *);
 
 extern int scsi_dma_map(struct scsi_cmnd *cmd);
 extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
@@ -148,6 +149,6 @@ static inline int scsi_get_resid(struct scsi_cmnd *cmd)
 }
 
 #define scsi_for_each_sg(cmd, sg, nseg, __i)			\
-	for (__i = 0, sg = scsi_sglist(cmd); __i < (nseg); __i++, (sg)++)
+	for_each_sg(scsi_sglist(cmd), sg, nseg, __i)
 
 #endif /* _SCSI_SCSI_CMND_H */
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 7d210cd6c38..0fd4746ee39 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -39,6 +39,9 @@ struct blk_queue_tags;
 #define DISABLE_CLUSTERING 0
 #define ENABLE_CLUSTERING 1
 
+#define DISABLE_SG_CHAINING 0
+#define ENABLE_SG_CHAINING 1
+
 enum scsi_eh_timer_return {
 	EH_NOT_HANDLED,
 	EH_HANDLED,
@@ -443,6 +446,15 @@ struct scsi_host_template {
 	unsigned ordered_tag:1;
 
 	/*
+	 * true if the low-level driver can support sg chaining. this
+	 * will be removed eventually when all the drivers are
+	 * converted to support sg chaining.
+	 *
+	 * Status: OBSOLETE
+	 */
+	unsigned use_sg_chaining:1;
+
+	/*
 	 * Countdown for host blocking with no commands outstanding
 	 */
 	unsigned int max_host_blocked;
@@ -586,6 +598,7 @@ struct Scsi_Host {
 	unsigned unchecked_isa_dma:1;
 	unsigned use_clustering:1;
 	unsigned use_blk_tcq:1;
+	unsigned use_sg_chaining:1;
 
 	/*
 	 * Host has requested that no further requests come through for the
diff --git a/include/video/Kbuild b/include/video/Kbuild
index a14f9c045b8..53a6c7310e6 100644
--- a/include/video/Kbuild
+++ b/include/video/Kbuild
@@ -1 +1 @@
-unifdef-y += sisfb.h
+unifdef-y += sisfb.h uvesafb.h
diff --git a/include/video/mbxfb.h b/include/video/mbxfb.h
index 20b9002712e..ea18961fc5e 100644
--- a/include/video/mbxfb.h
+++ b/include/video/mbxfb.h
@@ -29,18 +29,18 @@ struct mbxfb_platform_data {
 };
 
 /* planar */
-#define MBXFB_FMT_YUV12		0
+#define MBXFB_FMT_YUV16		0
+#define MBXFB_FMT_YUV12		1
 
 /* packed */
-#define MBXFB_FMT_UY0VY1	1
-#define MBXFB_FMT_VY0UY1	2
-#define MBXFB_FMT_Y0UY1V	3
-#define MBXFB_FMT_Y0VY1U	4
+#define MBXFB_FMT_UY0VY1	2
+#define MBXFB_FMT_VY0UY1	3
+#define MBXFB_FMT_Y0UY1V	4
+#define MBXFB_FMT_Y0VY1U	5
 struct mbxfb_overlaySetup {
 	__u32 enable;
 	__u32 x, y;
 	__u32 width, height;
-	__u32 alpha;
 	__u32 fmt;
 	__u32 mem_offset;
 	__u32 scaled_width;
@@ -54,6 +54,45 @@ struct mbxfb_overlaySetup {
 	__u16 UV_stride;
 };
 
-#define MBXFB_IOCX_OVERLAY	_IOWR(0xF4, 0x00,struct mbxfb_overlaySetup)
+#define MBXFB_ALPHABLEND_NONE		0
+#define MBXFB_ALPHABLEND_GLOBAL		1
+#define MBXFB_ALPHABLEND_PIXEL		2
+
+#define MBXFB_COLORKEY_DISABLED		0
+#define MBXFB_COLORKEY_PREVIOUS		1
+#define MBXFB_COLORKEY_CURRENT		2
+struct mbxfb_alphaCtl {
+	__u8 overlay_blend_mode;
+	__u8 overlay_colorkey_mode;
+	__u8 overlay_global_alpha;
+	__u32 overlay_colorkey;
+	__u32 overlay_colorkey_mask;
+
+	__u8 graphics_blend_mode;
+	__u8 graphics_colorkey_mode;
+	__u8 graphics_global_alpha;
+	__u32 graphics_colorkey;
+	__u32 graphics_colorkey_mask;
+};
+
+#define MBXFB_PLANE_GRAPHICS	0
+#define MBXFB_PLANE_VIDEO	1
+struct mbxfb_planeorder {
+	__u8 bottom;
+	__u8 top;
+};
+
+struct mbxfb_reg {
+	__u32 addr; 	/* offset from 0x03fe 0000 */
+	__u32 val;		/* value */
+	__u32 mask;		/* which bits to touch (for write) */
+};
+
+#define MBXFB_IOCX_OVERLAY		_IOWR(0xF4, 0x00,struct mbxfb_overlaySetup)
+#define MBXFB_IOCG_ALPHA		_IOR(0xF4, 0x01,struct mbxfb_alphaCtl)
+#define MBXFB_IOCS_ALPHA		_IOW(0xF4, 0x02,struct mbxfb_alphaCtl)
+#define MBXFB_IOCS_PLANEORDER	_IOR(0xF4, 0x03,struct mbxfb_planeorder)
+#define MBXFB_IOCS_REG			_IOW(0xF4, 0x04,struct mbxfb_reg)
+#define MBXFB_IOCX_REG			_IOWR(0xF4, 0x05,struct mbxfb_reg)
 
 #endif /* __MBX_FB_H */
diff --git a/include/video/permedia2.h b/include/video/permedia2.h
index 9e49c9571ec..9ce9adbfda2 100644
--- a/include/video/permedia2.h
+++ b/include/video/permedia2.h
@@ -58,7 +58,14 @@
 #define PM2R_RD_PALETTE_DATA				0x4008
 #define PM2R_RD_PIXEL_MASK				0x4010
 #define PM2R_RD_PALETTE_READ_ADDRESS			0x4018
+#define PM2R_RD_CURSOR_COLOR_ADDRESS		        0x4020
+#define PM2R_RD_CURSOR_COLOR_DATA		        0x4028
 #define PM2R_RD_INDEXED_DATA				0x4050
+#define PM2R_RD_CURSOR_DATA				0x4058
+#define PM2R_RD_CURSOR_X_LSB				0x4060
+#define PM2R_RD_CURSOR_X_MSB				0x4068
+#define PM2R_RD_CURSOR_Y_LSB				0x4070
+#define PM2R_RD_CURSOR_Y_MSB				0x4078
 
 #define PM2R_START_X_DOM				0x8000
 #define PM2R_D_X_DOM					0x8008
@@ -68,11 +75,14 @@
 #define PM2R_D_Y					0x8028
 #define PM2R_COUNT					0x8030
 #define PM2R_RENDER					0x8038
+#define PM2R_BIT_MASK_PATTERN				0x8068
 #define PM2R_RASTERIZER_MODE				0x80a0
 #define PM2R_RECTANGLE_ORIGIN				0x80d0
 #define PM2R_RECTANGLE_SIZE				0x80d8
 #define PM2R_PACKED_DATA_LIMITS				0x8150
 #define PM2R_SCISSOR_MODE				0x8180
+#define PM2R_SCISSOR_MIN_XY				0x8188
+#define PM2R_SCISSOR_MAX_XY				0x8190
 #define PM2R_SCREEN_SIZE				0x8198
 #define PM2R_AREA_STIPPLE_MODE				0x81a0
 #define PM2R_WINDOW_ORIGIN				0x81c8
@@ -83,7 +93,9 @@
 #define PM2R_TEXEL_LUT_MODE				0x8678
 #define PM2R_TEXTURE_COLOR_MODE				0x8680
 #define PM2R_FOG_MODE					0x8690
+#define PM2R_TEXEL0					0x8760
 #define PM2R_COLOR_DDA_MODE				0x87e0
+#define PM2R_CONSTANT_COLOR				0x87e8
 #define PM2R_ALPHA_BLEND_MODE				0x8810
 #define PM2R_DITHER_MODE				0x8818
 #define PM2R_FB_SOFT_WRITE_MASK				0x8820
@@ -148,6 +160,7 @@
 #define PM2VI_RD_CURSOR_Y_HIGH				0x00A
 #define PM2VI_RD_CURSOR_X_HOT				0x00B
 #define PM2VI_RD_CURSOR_Y_HOT				0x00C
+#define PM2VI_RD_OVERLAY_KEY				0x00D
 #define PM2VI_RD_CLK0_PRESCALE				0x201
 #define PM2VI_RD_CLK0_FEEDBACK				0x202
 #define PM2VI_RD_CLK0_POSTSCALE				0x203
@@ -169,6 +182,8 @@
 #define PM2F_RENDER_TRAPEZOID				(1L<<6)
 #define PM2F_RENDER_POINT				(2L<<6)
 #define PM2F_RENDER_RECTANGLE				(3L<<6)
+#define PM2F_RENDER_SYNC_ON_BIT_MASK			(1L<<11)
+#define PM2F_RENDER_TEXTURE_ENABLE			(1L<<13)
 #define PM2F_SYNCHRONIZATION				(1L<<10)
 #define PM2F_PLL_LOCKED					0x10
 #define PM2F_BEING_RESET				(1L<<31)
@@ -224,6 +239,8 @@
 #define PM2F_APERTURE_STANDARD				0
 #define PM2F_APERTURE_BYTESWAP				1
 #define PM2F_APERTURE_HALFWORDSWAP			2
+#define PM2F_CURSORMODE_CURSOR_ENABLE			(1 << 0)
+#define PM2F_CURSORMODE_TYPE_X				(1 << 4)
 
 typedef enum {
 	PM2_TYPE_PERMEDIA2,
diff --git a/include/video/pm3fb.h b/include/video/pm3fb.h
index d52e45a1e9b..2b85134fe96 100644
--- a/include/video/pm3fb.h
+++ b/include/video/pm3fb.h
@@ -1,6 +1,6 @@
 /*
  *  linux/drivers/video/pm3fb.h -- 3DLabs Permedia3 frame buffer device
- *  
+ *
  *  Copyright (C) 2001 Romain Dolbeau <dolbeau@irisa.fr>
  *  Copyright (C) 2001 Sven Luther, <luther@dpt-info.u-strasbg.fr>
  *
@@ -51,37 +51,36 @@
 *  GLINT Permedia3 Region 0 Bypass Controls   *
 ***********************************************/
 #define PM3ByAperture1Mode					0x0300
-	#define PM3ByApertureMode_BYTESWAP_ABCD			(0<<0)
-	#define PM3ByApertureMode_BYTESWAP_BADC			(1<<0)
-	#define PM3ByApertureMode_BYTESWAP_CDAB			(2<<0)
-	#define PM3ByApertureMode_BYTESWAP_DCBA			(3<<0)
-	#define PM3ByApertureMode_PATCH_DISABLE			(0<<2)
-	#define PM3ByApertureMode_PATCH_ENABLE			(1<<2)
-	#define PM3ByApertureMode_FORMAT_RAW			(0<<3)
-	#define PM3ByApertureMode_FORMAT_YUYV			(1<<3)
-	#define PM3ByApertureMode_FORMAT_UYVY			(2<<3)
-	#define PM3ByApertureMode_PIXELSIZE_8BIT		(0<<5)
-	#define PM3ByApertureMode_PIXELSIZE_16BIT		(1<<5)
-	#define PM3ByApertureMode_PIXELSIZE_32BIT		(2<<5)
-                #define PM3ByApertureMode_PIXELSIZE_MASK        (3<<5)
-	#define PM3ByApertureMode_EFFECTIVE_STRIDE_1024		(0<<7)
-	#define PM3ByApertureMode_EFFECTIVE_STRIDE_2048		(1<<7)
-	#define PM3ByApertureMode_EFFECTIVE_STRIDE_4096		(2<<7)
-	#define PM3ByApertureMode_EFFECTIVE_STRIDE_8192		(3<<7)
-	#define PM3ByApertureMode_PATCH_OFFSET_X(off)	(((off)&7f)<<9)
-	#define PM3ByApertureMode_PATCH_OFFSET_Y(off)	(((off)&7f)<<16)
-	#define PM3ByApertureMode_FRAMEBUFFER			(0<<21)
-	#define PM3ByApertureMode_LOCALBUFFER			(1<<21)
-	#define PM3ByApertureMode_DOUBLE_WRITE_OFF		(0<<22)
-	#define PM3ByApertureMode_DOUBLE_WRITE_1MB		(1<<22)
-	#define PM3ByApertureMode_DOUBLE_WRITE_2MB		(2<<22)
-	#define PM3ByApertureMode_DOUBLE_WRITE_4MB		(3<<22)
-	#define PM3ByApertureMode_DOUBLE_WRITE_8MB		(4<<22)
-	#define PM3ByApertureMode_DOUBLE_WRITE_16MB		(5<<22)
-	#define PM3ByApertureMode_DOUBLE_WRITE_32MB		(6<<22)
+	#define PM3ByApertureMode_BYTESWAP_ABCD			(0 << 0)
+	#define PM3ByApertureMode_BYTESWAP_BADC			(1 << 0)
+	#define PM3ByApertureMode_BYTESWAP_CDAB			(2 << 0)
+	#define PM3ByApertureMode_BYTESWAP_DCBA			(3 << 0)
+	#define PM3ByApertureMode_PATCH_ENABLE			(1 << 2)
+	#define PM3ByApertureMode_FORMAT_RAW			(0 << 3)
+	#define PM3ByApertureMode_FORMAT_YUYV			(1 << 3)
+	#define PM3ByApertureMode_FORMAT_UYVY			(2 << 3)
+	#define PM3ByApertureMode_PIXELSIZE_8BIT		(0 << 5)
+	#define PM3ByApertureMode_PIXELSIZE_16BIT		(1 << 5)
+	#define PM3ByApertureMode_PIXELSIZE_32BIT		(2 << 5)
+		#define PM3ByApertureMode_PIXELSIZE_MASK	(3 << 5)
+	#define PM3ByApertureMode_EFFECTIVE_STRIDE_1024		(0 << 7)
+	#define PM3ByApertureMode_EFFECTIVE_STRIDE_2048		(1 << 7)
+	#define PM3ByApertureMode_EFFECTIVE_STRIDE_4096		(2 << 7)
+	#define PM3ByApertureMode_EFFECTIVE_STRIDE_8192		(3 << 7)
+	#define PM3ByApertureMode_PATCH_OFFSET_X(off)	(((off) & 0x7f) << 9)
+	#define PM3ByApertureMode_PATCH_OFFSET_Y(off)	(((off) & 0x7f) << 16)
+	#define PM3ByApertureMode_FRAMEBUFFER			(0 << 21)
+	#define PM3ByApertureMode_LOCALBUFFER			(1 << 21)
+	#define PM3ByApertureMode_DOUBLE_WRITE_OFF		(0 << 22)
+	#define PM3ByApertureMode_DOUBLE_WRITE_1MB		(1 << 22)
+	#define PM3ByApertureMode_DOUBLE_WRITE_2MB		(2 << 22)
+	#define PM3ByApertureMode_DOUBLE_WRITE_4MB		(3 << 22)
+	#define PM3ByApertureMode_DOUBLE_WRITE_8MB		(4 << 22)
+	#define PM3ByApertureMode_DOUBLE_WRITE_16MB		(5 << 22)
+	#define PM3ByApertureMode_DOUBLE_WRITE_32MB		(6 << 22)
 
 #define PM3ByAperture2Mode					0x0328
-	
+
 /**********************************************
 *  GLINT Permedia3 Memory Control (0x1000)    *
 ***********************************************/
@@ -89,7 +88,7 @@
 #define PM3MemBypassWriteMask					0x1008
 #define PM3MemScratch						0x1010
 #define PM3LocalMemCaps						0x1018
-        #define PM3LocalMemCaps_NoWriteMask                     (1 << 28)
+	#define PM3LocalMemCaps_NoWriteMask			(1 << 28)
 #define PM3LocalMemTimings					0x1020
 #define PM3LocalMemControl					0x1028
 #define PM3LocalMemRefresh					0x1030
@@ -112,45 +111,41 @@
 #define PM3VsStart						0x3048
 #define PM3VsEnd						0x3050
 #define PM3VideoControl						0x3058
-	#define PM3VideoControl_DISABLE				(0<<0)
-	#define PM3VideoControl_ENABLE				(1<<0)
-	#define PM3VideoControl_BLANK_ACTIVE_HIGH		(0<<1)
-	#define PM3VideoControl_BLANK_ACTIVE_LOW		(1<<1)
-	#define PM3VideoControl_LINE_DOUBLE_OFF			(0<<2)
-	#define PM3VideoControl_LINE_DOUBLE_ON			(1<<2)
-	#define PM3VideoControl_HSYNC_FORCE_HIGH		(0<<3)
-	#define PM3VideoControl_HSYNC_ACTIVE_HIGH		(1<<3)
-	#define PM3VideoControl_HSYNC_FORCE_LOW			(2<<3)
-	#define PM3VideoControl_HSYNC_ACTIVE_LOW		(3<<3)
-        #define PM3VideoControl_HSYNC_MASK          (3<<3)
-	#define PM3VideoControl_VSYNC_FORCE_HIGH		(0<<5)
-	#define PM3VideoControl_VSYNC_ACTIVE_HIGH		(1<<5)
-	#define PM3VideoControl_VSYNC_FORCE_LOW			(2<<5)
-	#define PM3VideoControl_VSYNC_ACTIVE_LOW		(3<<5)
-        #define PM3VideoControl_VSYNC_MASK          (3<<5)
-	#define PM3VideoControl_BYTE_DOUBLE_OFF			(0<<7)
-	#define PM3VideoControl_BYTE_DOUBLE_ON			(1<<7)
-	#define PM3VideoControl_BUFFER_SWAP_SYNCON_FRAMEBLANK	(0<<9)
-	#define PM3VideoControl_BUFFER_SWAP_FREE_RUNNING	(1<<9)
-	#define PM3VideoControl_BUFFER_SWAP_LIMITETO_FRAMERATE	(2<<9)
-	#define PM3VideoControl_STEREO_DISABLE			(0<<11)
-	#define PM3VideoControl_STEREO_ENABLE			(1<<11)
-	#define PM3VideoControl_RIGHT_EYE_ACTIVE_HIGH		(0<<12)
-	#define PM3VideoControl_RIGHT_EYE_ACTIVE_LOW		(1<<12)
-	#define PM3VideoControl_VIDEO_EXT_LOW			(0<<14)
-	#define PM3VideoControl_VIDEO_EXT_HIGH			(1<<14)
-	#define PM3VideoControl_SYNC_MODE_INDEPENDENT		(0<<16)
-	#define PM3VideoControl_SYNC_MODE_SYNCTO_VSA		(1<<16)
-	#define PM3VideoControl_SYNC_MODE_SYNCTO_VSB		(2<<16)
-	#define PM3VideoControl_PATCH_DISABLE			(0<<18)
-	#define PM3VideoControl_PATCH_ENABLE			(1<<18)
-	#define PM3VideoControl_PIXELSIZE_8BIT			(0<<19)
-	#define PM3VideoControl_PIXELSIZE_16BIT			(1<<19)
-	#define PM3VideoControl_PIXELSIZE_32BIT			(2<<19)
-	#define PM3VideoControl_DISPLAY_DISABLE			(0<<21)
-	#define PM3VideoControl_DISPLAY_ENABLE			(1<<21)
-	#define PM3VideoControl_PATCH_OFFSET_X(off)	(((off)&0x3f)<<22)
-	#define PM3VideoControl_PATCH_OFFSET_Y(off)	(((off)&0x3f)<<28)
+	#define PM3VideoControl_ENABLE				(1 << 0)
+	#define PM3VideoControl_BLANK_ACTIVE_HIGH		(0 << 1)
+	#define PM3VideoControl_BLANK_ACTIVE_LOW		(1 << 1)
+	#define PM3VideoControl_LINE_DOUBLE_OFF			(0 << 2)
+	#define PM3VideoControl_LINE_DOUBLE_ON			(1 << 2)
+	#define PM3VideoControl_HSYNC_FORCE_HIGH		(0 << 3)
+	#define PM3VideoControl_HSYNC_ACTIVE_HIGH		(1 << 3)
+	#define PM3VideoControl_HSYNC_FORCE_LOW			(2 << 3)
+	#define PM3VideoControl_HSYNC_ACTIVE_LOW		(3 << 3)
+	#define PM3VideoControl_HSYNC_MASK			(3 << 3)
+	#define PM3VideoControl_VSYNC_FORCE_HIGH		(0 << 5)
+	#define PM3VideoControl_VSYNC_ACTIVE_HIGH		(1 << 5)
+	#define PM3VideoControl_VSYNC_FORCE_LOW			(2 << 5)
+	#define PM3VideoControl_VSYNC_ACTIVE_LOW		(3 << 5)
+	#define PM3VideoControl_VSYNC_MASK			(3 << 5)
+	#define PM3VideoControl_BYTE_DOUBLE_OFF			(0 << 7)
+	#define PM3VideoControl_BYTE_DOUBLE_ON			(1 << 7)
+	#define PM3VideoControl_BUFFER_SWAP_SYNCON_FRAMEBLANK	(0 << 9)
+	#define PM3VideoControl_BUFFER_SWAP_FREE_RUNNING	(1 << 9)
+	#define PM3VideoControl_BUFFER_SWAP_LIMITETO_FRAMERATE	(2 << 9)
+	#define PM3VideoControl_STEREO_ENABLE			(1 << 11)
+	#define PM3VideoControl_RIGHT_EYE_ACTIVE_HIGH		(0 << 12)
+	#define PM3VideoControl_RIGHT_EYE_ACTIVE_LOW		(1 << 12)
+	#define PM3VideoControl_VIDEO_EXT_LOW			(0 << 14)
+	#define PM3VideoControl_VIDEO_EXT_HIGH			(1 << 14)
+	#define PM3VideoControl_SYNC_MODE_INDEPENDENT		(0 << 16)
+	#define PM3VideoControl_SYNC_MODE_SYNCTO_VSA		(1 << 16)
+	#define PM3VideoControl_SYNC_MODE_SYNCTO_VSB		(2 << 16)
+	#define PM3VideoControl_PATCH_ENABLE			(1 << 18)
+	#define PM3VideoControl_PIXELSIZE_8BIT			(0 << 19)
+	#define PM3VideoControl_PIXELSIZE_16BIT			(1 << 19)
+	#define PM3VideoControl_PIXELSIZE_32BIT			(2 << 19)
+	#define PM3VideoControl_DISPLAY_ENABLE			(1 << 21)
+	#define PM3VideoControl_PATCH_OFFSET_X(off)	(((off) & 0x3f) << 22)
+	#define PM3VideoControl_PATCH_OFFSET_Y(off)	(((off) & 0x3f) << 28)
 #define PM3InterruptLine					0x3060
 #define PM3DisplayData						0x3068
 #define PM3VerticalLineCount					0x3070
@@ -159,80 +154,93 @@
 #define PM3MiscControl						0x3088
 
 #define PM3VideoOverlayUpdate					0x3100
-        #define PM3VideoOverlayUpdate_DISABLE                   (0<<0)
-        #define PM3VideoOverlayUpdate_ENABLE                    (1<<0)
+	#define PM3VideoOverlayUpdate_ENABLE			(1 << 0)
 #define PM3VideoOverlayMode					0x3108
-	#define PM3VideoOverlayMode_DISABLE			(0<<0)
-	#define PM3VideoOverlayMode_ENABLE			(1<<0)
-        #define PM3VideoOverlayMode_BUFFERSYNC_MANUAL           (0<<1)
-        #define PM3VideoOverlayMode_BUFFERSYNC_VIDEOSTREAMA     (1<<1)
-        #define PM3VideoOverlayMode_BUFFERSYNC_VIDEOSTREAMB     (2<<1)
-        #define PM3VideoOverlayMode_FIELDPOLARITY_NORMAL        (0<<4)
-        #define PM3VideoOverlayMode_FIELDPOLARITY_INVERT        (1<<4)
-        #define PM3VideoOverlayMode_PIXELSIZE_8BIT              (0<<5)
-        #define PM3VideoOverlayMode_PIXELSIZE_16BIT             (1<<5)
-        #define PM3VideoOverlayMode_PIXELSIZE_32BIT             (2<<5)
-        #define PM3VideoOverlayMode_COLORFORMAT_RGB8888 ((0<<7)|(1<<12)|(2<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_RGB4444 ((1<<7)|(1<<12)|(1<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_RGB5551 ((2<<7)|(1<<12)|(1<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_RGB565  ((3<<7)|(1<<12)|(1<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_RGB332  ((4<<7)|(1<<12)|(0<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_BGR8888 ((0<<7)|(2<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_BGR4444 ((1<<7)|(1<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_BGR5551 ((2<<7)|(1<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_BGR565  ((3<<7)|(1<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_BGR332  ((4<<7)|(0<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_CI8     ((5<<7)|(1<<12)|(0<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_VUY444  ((2<<10)|(1<<12)|(2<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_YUV444  ((2<<10)|(2<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_VUY422  ((1<<10)|(1<<12)|(1<<5))
-        #define PM3VideoOverlayMode_COLORFORMAT_YUV422  ((1<<10)|(1<<5))
-        #define PM3VideoOverlayMode_COLORORDER_BGR              (0<<12)
-        #define PM3VideoOverlayMode_COLORORDER_RGB              (1<<12)
-        #define PM3VideoOverlayMode_LINEARCOLOREXT_OFF          (0<<13)
-        #define PM3VideoOverlayMode_LINEARCOLOREXT_ON           (1<<13)
-        #define PM3VideoOverlayMode_FILTER_MASK                 (3<<14)
-        #define PM3VideoOverlayMode_FILTER_OFF                  (0<<14)
-        #define PM3VideoOverlayMode_FILTER_FULL                 (1<<14)
-        #define PM3VideoOverlayMode_FILTER_PARTIAL              (2<<14)
-        #define PM3VideoOverlayMode_DEINTERLACE_OFF             (0<<16)
-        #define PM3VideoOverlayMode_DEINTERLACE_BOB             (1<<16)
-        #define PM3VideoOverlayMode_PATCHMODE_OFF               (0<<18)
-        #define PM3VideoOverlayMode_PATCHMODE_ON                (1<<18)
-        #define PM3VideoOverlayMode_FLIP_VIDEO                  (0<<20)
-        #define PM3VideoOverlayMode_FLIP_VIDEOSTREAMA           (1<<20)
-        #define PM3VideoOverlayMode_FLIP_VIDEOSTREAMB           (2<<20)
-        #define PM3VideoOverlayMode_MIRROR_MASK                 (3<<23)
-        #define PM3VideoOverlayMode_MIRRORX_OFF                 (0<<23)
-        #define PM3VideoOverlayMode_MIRRORX_ON                  (1<<23)
-        #define PM3VideoOverlayMode_MIRRORY_OFF                 (0<<24)
-        #define PM3VideoOverlayMode_MIRRORY_ON                  (1<<24)
+	#define PM3VideoOverlayMode_ENABLE			(1 << 0)
+	#define PM3VideoOverlayMode_BUFFERSYNC_MANUAL		(0 << 1)
+	#define PM3VideoOverlayMode_BUFFERSYNC_VIDEOSTREAMA	(1 << 1)
+	#define PM3VideoOverlayMode_BUFFERSYNC_VIDEOSTREAMB	(2 << 1)
+	#define PM3VideoOverlayMode_FIELDPOLARITY_NORMAL	(0 << 4)
+	#define PM3VideoOverlayMode_FIELDPOLARITY_INVERT	(1 << 4)
+	#define PM3VideoOverlayMode_PIXELSIZE_8BIT		(0 << 5)
+	#define PM3VideoOverlayMode_PIXELSIZE_16BIT		(1 << 5)
+	#define PM3VideoOverlayMode_PIXELSIZE_32BIT		(2 << 5)
+	#define PM3VideoOverlayMode_COLORFORMAT_RGB8888		\
+					((0 << 7)|(1 << 12)|(2 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_RGB4444		\
+					((1 << 7)|(1 << 12)|(1 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_RGB5551		\
+					((2 << 7)|(1 << 12)|(1 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_RGB565		\
+					((3 << 7)|(1 << 12)|(1 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_RGB332		\
+					((4 << 7)|(1 << 12)|(0 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_BGR8888		\
+					((0 << 7)|(2 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_BGR4444		\
+					((1 << 7)|(1 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_BGR5551		\
+					((2 << 7)|(1 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_BGR565		\
+					((3 << 7)|(1 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_BGR332		\
+					((4 << 7)|(0 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_CI8		\
+					((5 << 7)|(1 << 12)|(0 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_VUY444		\
+					((2 << 10)|(1 << 12)|(2 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_YUV444		\
+					((2 << 10)|(2 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_VUY422		\
+					((1 << 10)|(1 << 12)|(1 << 5))
+	#define PM3VideoOverlayMode_COLORFORMAT_YUV422		\
+					((1 << 10)|(1 << 5))
+	#define PM3VideoOverlayMode_COLORORDER_BGR		(0 << 12)
+	#define PM3VideoOverlayMode_COLORORDER_RGB		(1 << 12)
+	#define PM3VideoOverlayMode_LINEARCOLOREXT_OFF		(0 << 13)
+	#define PM3VideoOverlayMode_LINEARCOLOREXT_ON		(1 << 13)
+	#define PM3VideoOverlayMode_FILTER_MASK			(3 << 14)
+	#define PM3VideoOverlayMode_FILTER_OFF			(0 << 14)
+	#define PM3VideoOverlayMode_FILTER_FULL			(1 << 14)
+	#define PM3VideoOverlayMode_FILTER_PARTIAL		(2 << 14)
+	#define PM3VideoOverlayMode_DEINTERLACE_OFF		(0 << 16)
+	#define PM3VideoOverlayMode_DEINTERLACE_BOB		(1 << 16)
+	#define PM3VideoOverlayMode_PATCHMODE_OFF		(0 << 18)
+	#define PM3VideoOverlayMode_PATCHMODE_ON		(1 << 18)
+	#define PM3VideoOverlayMode_FLIP_VIDEO			(0 << 20)
+	#define PM3VideoOverlayMode_FLIP_VIDEOSTREAMA		(1 << 20)
+	#define PM3VideoOverlayMode_FLIP_VIDEOSTREAMB		(2 << 20)
+	#define PM3VideoOverlayMode_MIRROR_MASK			(3 << 23)
+	#define PM3VideoOverlayMode_MIRRORX_OFF			(0 << 23)
+	#define PM3VideoOverlayMode_MIRRORX_ON			(1 << 23)
+	#define PM3VideoOverlayMode_MIRRORY_OFF			(0 << 24)
+	#define PM3VideoOverlayMode_MIRRORY_ON			(1 << 24)
 #define PM3VideoOverlayFifoControl				0x3110
 #define PM3VideoOverlayIndex					0x3118
 #define PM3VideoOverlayBase0					0x3120
 #define PM3VideoOverlayBase1					0x3128
 #define PM3VideoOverlayBase2					0x3130
 #define PM3VideoOverlayStride					0x3138
-        #define PM3VideoOverlayStride_STRIDE(s)         (((s)&0xfff)<<0)
-#define PM3VideoOverlayWidth                                    0x3140
-        #define PM3VideoOverlayWidth_WIDTH(w)           (((w)&0xfff)<<0)
-#define PM3VideoOverlayHeight                                   0x3148
-        #define PM3VideoOverlayHeight_HEIGHT(h)         (((h)&0xfff)<<0)
-#define PM3VideoOverlayOrigin                                   0x3150
-        #define PM3VideoOverlayOrigin_XORIGIN(x)        (((x)&0xfff)<<0)
-        #define PM3VideoOverlayOrigin_YORIGIN(y)        (((y)&0xfff)<<16)
-#define PM3VideoOverlayShrinkXDelta                             0x3158
-        #define PM3VideoOverlayShrinkXDelta_NONE                (1<<16)
-        #define PM3VideoOverlayShrinkXDelta_DELTA(s,d)          \
-                ((((s)<<16)/(d))&0x0ffffff0)
-#define PM3VideoOverlayZoomXDelta                               0x3160
-        #define PM3VideoOverlayZoomXDelta_NONE                  (1<<16)
-        #define PM3VideoOverlayZoomXDelta_DELTA(s,d)            \
-                ((((s)<<16)/(d))&0x0001fff0)
-#define PM3VideoOverlayYDelta                                   0x3168
-        #define PM3VideoOverlayYDelta_NONE                      (1<<16)
-        #define PM3VideoOverlayYDelta_DELTA(s,d)                        \
-                ((((s)<<16)/(d))&0x0ffffff0)
+	#define PM3VideoOverlayStride_STRIDE(s)		(((s) & 0xfff) << 0)
+#define PM3VideoOverlayWidth					0x3140
+	#define PM3VideoOverlayWidth_WIDTH(w)		(((w) & 0xfff) << 0)
+#define PM3VideoOverlayHeight					0x3148
+	#define PM3VideoOverlayHeight_HEIGHT(h)		(((h) & 0xfff) << 0)
+#define PM3VideoOverlayOrigin					0x3150
+	#define PM3VideoOverlayOrigin_XORIGIN(x)	(((x) & 0xfff) << 0)
+	#define PM3VideoOverlayOrigin_YORIGIN(y)	(((y) & 0xfff) << 16)
+#define PM3VideoOverlayShrinkXDelta				0x3158
+	#define PM3VideoOverlayShrinkXDelta_NONE		(1 << 16)
+	#define PM3VideoOverlayShrinkXDelta_DELTA(s,d)	\
+		((((s) << 16)/(d)) & 0x0ffffff0)
+#define PM3VideoOverlayZoomXDelta				0x3160
+	#define PM3VideoOverlayZoomXDelta_NONE			(1 << 16)
+	#define PM3VideoOverlayZoomXDelta_DELTA(s,d)	\
+		((((s) << 16)/(d)) & 0x0001fff0)
+#define PM3VideoOverlayYDelta					0x3168
+	#define PM3VideoOverlayYDelta_NONE			(1 << 16)
+	#define PM3VideoOverlayYDelta_DELTA(s,d)	\
+		((((s) << 16)/(d)) & 0x0ffffff0)
 #define PM3VideoOverlayFieldOffset				0x3170
 #define PM3VideoOverlayStatus					0x3178
 
@@ -249,102 +257,82 @@
 #define PM3RD_IndexHigh						0x4028
 #define PM3RD_IndexedData					0x4030
 #define PM3RD_IndexControl					0x4038
-	#define PM3RD_IndexControl_AUTOINCREMENT_ENABLE		(1<<0)
-	#define PM3RD_IndexControl_AUTOINCREMENT_DISABLE	(0<<0)
+	#define PM3RD_IndexControl_AUTOINCREMENT_ENABLE		(1 << 0)
 
 /* Indirect Registers */
 #define PM3RD_MiscControl					0x000
-	#define PM3RD_MiscControl_HIGHCOLOR_RES_DISABLE		(0<<0)
-	#define PM3RD_MiscControl_HIGHCOLOR_RES_ENABLE		(1<<0)
-	#define PM3RD_MiscControl_PIXELDOUBLE_DISABLE		(0<<1)
-	#define PM3RD_MiscControl_PIXELDOUBLE_ENABLE		(1<<1)
-	#define PM3RD_MiscControl_LASTREAD_ADDR_DISABLE		(0<<2)
-	#define PM3RD_MiscControl_LASTREAD_ADDR_ENABLE		(1<<2)
-	#define PM3RD_MiscControl_DIRECTCOLOR_DISABLE		(0<<3)
-	#define PM3RD_MiscControl_DIRECTCOLOR_ENABLE		(1<<3)
-	#define PM3RD_MiscControl_OVERLAY_DISABLE		(0<<4)
-	#define PM3RD_MiscControl_OVERLAY_ENABLE		(1<<4)
-	#define PM3RD_MiscControl_PIXELDOUBLE_BUFFER_DISABLE	(0<<5)
-	#define PM3RD_MiscControl_PIXELDOUBLE_BUFFER_ENABLE	(1<<5)
-	#define PM3RD_MiscControl_VSB_OUTPUT_DISABLE		(0<<6)
-	#define PM3RD_MiscControl_VSB_OUTPUT_ENABLE		(1<<6)
-	#define PM3RD_MiscControl_STEREODOUBLE_BUFFER_DISABLE	(0<<7)
-	#define PM3RD_MiscControl_STEREODOUBLE_BUFFER_ENABLE	(1<<7)
+	#define PM3RD_MiscControl_HIGHCOLOR_RES_ENABLE		(1 << 0)
+	#define PM3RD_MiscControl_PIXELDOUBLE_ENABLE		(1 << 1)
+	#define PM3RD_MiscControl_LASTREAD_ADDR_ENABLE		(1 << 2)
+	#define PM3RD_MiscControl_DIRECTCOLOR_ENABLE		(1 << 3)
+	#define PM3RD_MiscControl_OVERLAY_ENABLE		(1 << 4)
+	#define PM3RD_MiscControl_PIXELDOUBLE_BUFFER_ENABLE	(1 << 5)
+	#define PM3RD_MiscControl_VSB_OUTPUT_ENABLE		(1 << 6)
+	#define PM3RD_MiscControl_STEREODOUBLE_BUFFER_ENABLE	(1 << 7)
 #define PM3RD_SyncControl					0x001
-	#define PM3RD_SyncControl_HSYNC_ACTIVE_LOW		(0<<0)
-	#define PM3RD_SyncControl_HSYNC_ACTIVE_HIGH		(1<<0)
-	#define PM3RD_SyncControl_HSYNC_FORCE_ACTIVE		(3<<0)
-	#define PM3RD_SyncControl_HSYNC_FORCE_INACTIVE		(4<<0)
-	#define PM3RD_SyncControl_HSYNC_TRI_STATE		(2<<0)
-	#define PM3RD_SyncControl_VSYNC_ACTIVE_LOW		(0<<3)
-	#define PM3RD_SyncControl_VSYNC_ACTIVE_HIGH		(1<<3)
-	#define PM3RD_SyncControl_VSYNC_TRI_STATE		(2<<3)
-	#define PM3RD_SyncControl_VSYNC_FORCE_ACTIVE		(3<<3)
-	#define PM3RD_SyncControl_VSYNC_FORCE_INACTIVE		(4<<3)
-	#define PM3RD_SyncControl_HSYNC_OVERRIDE_SETBY_HSYNC	(0<<6)
-	#define PM3RD_SyncControl_HSYNC_OVERRIDE_FORCE_HIGH	(1<<6)
-	#define PM3RD_SyncControl_VSYNC_OVERRIDE_SETBY_VSYNC	(0<<7)
-	#define PM3RD_SyncControl_VSYNC_OVERRIDE_FORCE_HIGH	(1<<7)
+	#define PM3RD_SyncControl_HSYNC_ACTIVE_LOW		(0 << 0)
+	#define PM3RD_SyncControl_HSYNC_ACTIVE_HIGH		(1 << 0)
+	#define PM3RD_SyncControl_HSYNC_FORCE_ACTIVE		(3 << 0)
+	#define PM3RD_SyncControl_HSYNC_FORCE_INACTIVE		(4 << 0)
+	#define PM3RD_SyncControl_HSYNC_TRI_STATE		(2 << 0)
+	#define PM3RD_SyncControl_VSYNC_ACTIVE_LOW		(0 << 3)
+	#define PM3RD_SyncControl_VSYNC_ACTIVE_HIGH		(1 << 3)
+	#define PM3RD_SyncControl_VSYNC_TRI_STATE		(2 << 3)
+	#define PM3RD_SyncControl_VSYNC_FORCE_ACTIVE		(3 << 3)
+	#define PM3RD_SyncControl_VSYNC_FORCE_INACTIVE		(4 << 3)
+	#define PM3RD_SyncControl_HSYNC_OVERRIDE_SETBY_HSYNC	(0 << 6)
+	#define PM3RD_SyncControl_HSYNC_OVERRIDE_FORCE_HIGH	(1 << 6)
+	#define PM3RD_SyncControl_VSYNC_OVERRIDE_SETBY_VSYNC	(0 << 7)
+	#define PM3RD_SyncControl_VSYNC_OVERRIDE_FORCE_HIGH	(1 << 7)
 #define PM3RD_DACControl					0x002
-	#define PM3RD_DACControl_DAC_POWER_ON			(0<<0)
-	#define PM3RD_DACControl_DAC_POWER_OFF			(1<<0)
-	#define PM3RD_DACControl_SYNC_ON_GREEN_DISABLE		(0<<3)
-	#define PM3RD_DACControl_SYNC_ON_GREEN_ENABLE		(1<<3)
-	#define PM3RD_DACControl_BLANK_RED_DAC_DISABLE		(0<<4)
-	#define PM3RD_DACControl_BLANK_RED_DAC_ENABLE		(1<<4)
-	#define PM3RD_DACControl_BLANK_GREEN_DAC_DISABLE	(0<<5)
-	#define PM3RD_DACControl_BLANK_GREEN_DAC_ENABLE		(1<<5)
-	#define PM3RD_DACControl_BLANK_BLUE_DAC_DISABLE		(0<<6)
-	#define PM3RD_DACControl_BLANK_BLUE_DAC_ENABLE		(1<<6)
-	#define PM3RD_DACControl_BLANK_PEDESTAL_DISABLE		(0<<7)
-	#define PM3RD_DACControl_BLANK_PEDESTAL_ENABLE		(1<<7)
+	#define PM3RD_DACControl_DAC_POWER_ON			(0 << 0)
+	#define PM3RD_DACControl_DAC_POWER_OFF			(1 << 0)
+	#define PM3RD_DACControl_SYNC_ON_GREEN_ENABLE		(1 << 3)
+	#define PM3RD_DACControl_BLANK_RED_DAC_ENABLE		(1 << 4)
+	#define PM3RD_DACControl_BLANK_GREEN_DAC_ENABLE		(1 << 5)
+	#define PM3RD_DACControl_BLANK_BLUE_DAC_ENABLE		(1 << 6)
+	#define PM3RD_DACControl_BLANK_PEDESTAL_ENABLE		(1 << 7)
 #define PM3RD_PixelSize						0x003
-	#define PM3RD_PixelSize_24_BIT_PIXELS			(4<<0)
-	#define PM3RD_PixelSize_32_BIT_PIXELS			(2<<0)
-	#define PM3RD_PixelSize_16_BIT_PIXELS			(1<<0)
-	#define PM3RD_PixelSize_8_BIT_PIXELS			(0<<0)
+	#define PM3RD_PixelSize_24_BIT_PIXELS			(4 << 0)
+	#define PM3RD_PixelSize_32_BIT_PIXELS			(2 << 0)
+	#define PM3RD_PixelSize_16_BIT_PIXELS			(1 << 0)
+	#define PM3RD_PixelSize_8_BIT_PIXELS			(0 << 0)
 #define PM3RD_ColorFormat					0x004
-	#define PM3RD_ColorFormat_LINEAR_COLOR_EXT_ENABLE	(1<<6)
-	#define PM3RD_ColorFormat_LINEAR_COLOR_EXT_DISABLE	(0<<6)
-	#define PM3RD_ColorFormat_COLOR_ORDER_BLUE_LOW		(1<<5)
-	#define PM3RD_ColorFormat_COLOR_ORDER_RED_LOW		(0<<5)
-	#define PM3RD_ColorFormat_COLOR_FORMAT_MASK		(0x1f<<0)
-	#define PM3RD_ColorFormat_8888_COLOR			(0<<0)
-	#define PM3RD_ColorFormat_5551_FRONT_COLOR		(1<<0)
-	#define PM3RD_ColorFormat_4444_COLOR			(2<<0)
-	#define PM3RD_ColorFormat_332_FRONT_COLOR		(5<<0)
-	#define PM3RD_ColorFormat_332_BACK_COLOR		(6<<0)
-	#define PM3RD_ColorFormat_2321_FRONT_COLOR		(9<<0)
-	#define PM3RD_ColorFormat_2321_BACK_COLOR		(10<<0)
-	#define PM3RD_ColorFormat_232_FRONTOFF_COLOR		(11<<0)
-	#define PM3RD_ColorFormat_232_BACKOFF_COLOR		(12<<0)
-	#define PM3RD_ColorFormat_5551_BACK_COLOR		(13<<0)
-	#define PM3RD_ColorFormat_CI8_COLOR			(14<<0)
-	#define PM3RD_ColorFormat_565_FRONT_COLOR		(16<<0)
-	#define PM3RD_ColorFormat_565_BACK_COLOR		(17<<0)
+	#define PM3RD_ColorFormat_LINEAR_COLOR_EXT_ENABLE	(1 << 6)
+	#define PM3RD_ColorFormat_COLOR_ORDER_BLUE_LOW		(1 << 5)
+	#define PM3RD_ColorFormat_COLOR_ORDER_RED_LOW		(0 << 5)
+	#define PM3RD_ColorFormat_COLOR_FORMAT_MASK		(0x1f << 0)
+	#define PM3RD_ColorFormat_8888_COLOR			(0 << 0)
+	#define PM3RD_ColorFormat_5551_FRONT_COLOR		(1 << 0)
+	#define PM3RD_ColorFormat_4444_COLOR			(2 << 0)
+	#define PM3RD_ColorFormat_332_FRONT_COLOR		(5 << 0)
+	#define PM3RD_ColorFormat_332_BACK_COLOR		(6 << 0)
+	#define PM3RD_ColorFormat_2321_FRONT_COLOR		(9 << 0)
+	#define PM3RD_ColorFormat_2321_BACK_COLOR		(10 << 0)
+	#define PM3RD_ColorFormat_232_FRONTOFF_COLOR		(11 << 0)
+	#define PM3RD_ColorFormat_232_BACKOFF_COLOR		(12 << 0)
+	#define PM3RD_ColorFormat_5551_BACK_COLOR		(13 << 0)
+	#define PM3RD_ColorFormat_CI8_COLOR			(14 << 0)
+	#define PM3RD_ColorFormat_565_FRONT_COLOR		(16 << 0)
+	#define PM3RD_ColorFormat_565_BACK_COLOR		(17 << 0)
 #define PM3RD_CursorMode					0x005
-	#define PM3RD_CursorMode_CURSOR_DISABLE			(0<<0)
-	#define PM3RD_CursorMode_CURSOR_ENABLE			(1<<0)
-	#define PM3RD_CursorMode_FORMAT_64x64_2BPE_P0123	(0<<2)
-	#define PM3RD_CursorMode_FORMAT_32x32_2BPE_P0		(1<<2)
-	#define PM3RD_CursorMode_FORMAT_32x32_2BPE_P1		(2<<2)
-	#define PM3RD_CursorMode_FORMAT_32x32_2BPE_P2		(3<<2)
-	#define PM3RD_CursorMode_FORMAT_32x32_2BPE_P3		(4<<2)
-	#define PM3RD_CursorMode_FORMAT_32x32_4BPE_P01		(5<<2)
-	#define PM3RD_CursorMode_FORMAT_32x32_4BPE_P23		(6<<2)
-	#define PM3RD_CursorMode_TYPE_MS			(0<<4)
-	#define PM3RD_CursorMode_TYPE_X				(1<<4)
-	#define PM3RD_CursorMode_REVERSE_PIXEL_ORDER_DISABLE	(0<<6)
-	#define PM3RD_CursorMode_REVERSE_PIXEL_ORDER_ENABLE	(1<<6)
-	#define PM3RD_CursorMode_REVERSE_PIXEL_ORDER_3_COLOR	(2<<6)
-	#define PM3RD_CursorMode_REVERSE_PIXEL_ORDER_15_COLOR	(3<<6)
+	#define PM3RD_CursorMode_CURSOR_ENABLE			(1 << 0)
+	#define PM3RD_CursorMode_FORMAT_64x64_2BPE_P0123	(0 << 2)
+	#define PM3RD_CursorMode_FORMAT_32x32_2BPE_P0		(1 << 2)
+	#define PM3RD_CursorMode_FORMAT_32x32_2BPE_P1		(2 << 2)
+	#define PM3RD_CursorMode_FORMAT_32x32_2BPE_P2		(3 << 2)
+	#define PM3RD_CursorMode_FORMAT_32x32_2BPE_P3		(4 << 2)
+	#define PM3RD_CursorMode_FORMAT_32x32_4BPE_P01		(5 << 2)
+	#define PM3RD_CursorMode_FORMAT_32x32_4BPE_P23		(6 << 2)
+	#define PM3RD_CursorMode_TYPE_MS			(0 << 4)
+	#define PM3RD_CursorMode_TYPE_X				(1 << 4)
+	#define PM3RD_CursorMode_REVERSE_PIXEL_ORDER_ENABLE	(1 << 6)
+	#define PM3RD_CursorMode_REVERSE_PIXEL_ORDER_3_COLOR	(2 << 6)
+	#define PM3RD_CursorMode_REVERSE_PIXEL_ORDER_15_COLOR	(3 << 6)
 #define PM3RD_CursorControl					0x006
-	#define PM3RD_CursorControl_DOUBLE_X_DISABLED		(0<<0)
-	#define PM3RD_CursorControl_DOUBLE_X_ENABLED		(1<<0)
-	#define PM3RD_CursorControl_DOUBLE_Y_DISABLED		(0<<1)
-	#define PM3RD_CursorControl_DOUBLE_Y_ENABLED		(1<<1)
-	#define PM3RD_CursorControl_READBACK_POS_DISABLED	(0<<2)
-	#define PM3RD_CursorControl_READBACK_POS_ENABLED	(1<<2)
+	#define PM3RD_CursorControl_DOUBLE_X_ENABLED		(1 << 0)
+	#define PM3RD_CursorControl_DOUBLE_Y_ENABLED		(1 << 1)
+	#define PM3RD_CursorControl_READBACK_POS_ENABLED	(1 << 2)
 
 #define PM3RD_CursorXLow					0x007
 #define PM3RD_CursorXHigh					0x008
@@ -354,17 +342,13 @@
 #define PM3RD_CursorHotSpotY					0x00c
 #define PM3RD_OverlayKey					0x00d
 #define PM3RD_Pan						0x00e
-	#define PM3RD_Pan_DISABLE				(0<<0)
-	#define PM3RD_Pan_ENABLE				(1<<0)
-	#define PM3RD_Pan_GATE_DISABLE				(0<<1)
-	#define PM3RD_Pan_GATE_ENABLE				(1<<1)
+	#define PM3RD_Pan_ENABLE				(1 << 0)
+	#define PM3RD_Pan_GATE_ENABLE				(1 << 1)
 #define PM3RD_Sense						0x00f
 
 #define PM3RD_CheckControl					0x018
-	#define PM3RD_CheckControl_PIXEL_DISABLED		(0<<0)
-	#define PM3RD_CheckControl_PIXEL_ENABLED		(1<<0)
-	#define PM3RD_CheckControl_LUT_DISABLED			(0<<1)
-	#define PM3RD_CheckControl_LUT_ENABLED			(1<<1)
+	#define PM3RD_CheckControl_PIXEL_ENABLED		(1 << 0)
+	#define PM3RD_CheckControl_LUT_ENABLED			(1 << 1)
 #define PM3RD_CheckPixelRed					0x019
 #define PM3RD_CheckPixelGreen					0x01a
 #define PM3RD_CheckPixelBlue					0x01b
@@ -374,19 +358,17 @@
 #define PM3RD_Scratch						0x01f
 
 #define PM3RD_VideoOverlayControl				0x020
-        #define PM3RD_VideoOverlayControl_DISABLE               (0<<0)
-        #define PM3RD_VideoOverlayControl_ENABLE                (1<<0)
-        #define PM3RD_VideoOverlayControl_MODE_MASK             (3<<1)
-        #define PM3RD_VideoOverlayControl_MODE_MAINKEY          (0<<1)
-        #define PM3RD_VideoOverlayControl_MODE_OVERLAYKEY       (1<<1)
-        #define PM3RD_VideoOverlayControl_MODE_ALWAYS           (2<<1)
-        #define PM3RD_VideoOverlayControl_MODE_BLEND            (3<<1)
-        #define PM3RD_VideoOverlayControl_DIRECTCOLOR_DISABLED  (0<<3)
-        #define PM3RD_VideoOverlayControl_DIRECTCOLOR_ENABLED   (1<<3)
-        #define PM3RD_VideoOverlayControl_BLENDSRC_MAIN         (0<<4)
-        #define PM3RD_VideoOverlayControl_BLENDSRC_REGISTER     (1<<4)
-        #define PM3RD_VideoOverlayControl_KEY_COLOR             (0<<5)
-        #define PM3RD_VideoOverlayControl_KEY_ALPHA             (1<<5)
+	#define PM3RD_VideoOverlayControl_ENABLE		(1 << 0)
+	#define PM3RD_VideoOverlayControl_MODE_MASK		(3 << 1)
+	#define PM3RD_VideoOverlayControl_MODE_MAINKEY		(0 << 1)
+	#define PM3RD_VideoOverlayControl_MODE_OVERLAYKEY	(1 << 1)
+	#define PM3RD_VideoOverlayControl_MODE_ALWAYS		(2 << 1)
+	#define PM3RD_VideoOverlayControl_MODE_BLEND		(3 << 1)
+	#define PM3RD_VideoOverlayControl_DIRECTCOLOR_ENABLED	(1 << 3)
+	#define PM3RD_VideoOverlayControl_BLENDSRC_MAIN		(0 << 4)
+	#define PM3RD_VideoOverlayControl_BLENDSRC_REGISTER	(1 << 4)
+	#define PM3RD_VideoOverlayControl_KEY_COLOR		(0 << 5)
+	#define PM3RD_VideoOverlayControl_KEY_ALPHA		(1 << 5)
 #define PM3RD_VideoOverlayXStartLow				0x021
 #define PM3RD_VideoOverlayXStartHigh				0x022
 #define PM3RD_VideoOverlayYStartLow				0x023
@@ -399,10 +381,10 @@
 #define PM3RD_VideoOverlayKeyG					0x02a
 #define PM3RD_VideoOverlayKeyB					0x02b
 #define PM3RD_VideoOverlayBlend					0x02c
-        #define PM3RD_VideoOverlayBlend_FACTOR_0_PERCENT        (0<<6)
-        #define PM3RD_VideoOverlayBlend_FACTOR_25_PERCENT       (1<<6)
-        #define PM3RD_VideoOverlayBlend_FACTOR_75_PERCENT       (2<<6)
-        #define PM3RD_VideoOverlayBlend_FACTOR_100_PERCENT      (3<<6)
+	#define PM3RD_VideoOverlayBlend_FACTOR_0_PERCENT	(0 << 6)
+	#define PM3RD_VideoOverlayBlend_FACTOR_25_PERCENT	(1 << 6)
+	#define PM3RD_VideoOverlayBlend_FACTOR_75_PERCENT	(2 << 6)
+	#define PM3RD_VideoOverlayBlend_FACTOR_100_PERCENT	(3 << 6)
 
 #define PM3RD_DClkSetup1					0x1f0
 #define PM3RD_DClkSetup2					0x1f1
@@ -410,21 +392,20 @@
 #define PM3RD_KClkSetup2					0x1f3
 
 #define PM3RD_DClkControl					0x200
-	#define PM3RD_DClkControl_SOURCE_PLL			(0<<4)
-	#define PM3RD_DClkControl_SOURCE_VSA			(1<<4)
-	#define PM3RD_DClkControl_SOURCE_VSB			(2<<4)
-	#define PM3RD_DClkControl_SOURCE_EXT			(3<<4)
-	#define PM3RD_DClkControl_STATE_RUN			(2<<2)
-	#define PM3RD_DClkControl_STATE_HIGH			(1<<2)
-	#define PM3RD_DClkControl_STATE_LOW			(0<<2)
-	#define PM3RD_DClkControl_LOCKED			(1<<1)
-	#define PM3RD_DClkControl_NOT_LOCKED			(0<<1)
-	#define PM3RD_DClkControl_ENABLE			(1<<0)
-	#define PM3RD_DClkControl_DISABLE			(0<<0)
+	#define PM3RD_DClkControl_SOURCE_PLL			(0 << 4)
+	#define PM3RD_DClkControl_SOURCE_VSA			(1 << 4)
+	#define PM3RD_DClkControl_SOURCE_VSB			(2 << 4)
+	#define PM3RD_DClkControl_SOURCE_EXT			(3 << 4)
+	#define PM3RD_DClkControl_STATE_RUN			(2 << 2)
+	#define PM3RD_DClkControl_STATE_HIGH			(1 << 2)
+	#define PM3RD_DClkControl_STATE_LOW			(0 << 2)
+	#define PM3RD_DClkControl_LOCKED			(1 << 1)
+	#define PM3RD_DClkControl_NOT_LOCKED			(0 << 1)
+	#define PM3RD_DClkControl_ENABLE			(1 << 0)
 #define PM3RD_DClk0PreScale					0x201
 #define PM3RD_DClk0FeedbackScale				0x202
 #define PM3RD_DClk0PostScale					0x203
-        #define PM3_REF_CLOCK                                   14318
+	#define PM3_REF_CLOCK					14318
 #define PM3RD_DClk1PreScale					0x204
 #define PM3RD_DClk1FeedbackScale				0x205
 #define PM3RD_DClk1PostScale					0x206
@@ -435,59 +416,56 @@
 #define PM3RD_DClk3FeedbackScale				0x20b
 #define PM3RD_DClk3PostScale					0x20c
 #define PM3RD_KClkControl					0x20d
-	#define PM3RD_KClkControl_DISABLE			(0<<0)
-	#define PM3RD_KClkControl_ENABLE			(1<<0)
-	#define PM3RD_KClkControl_NOT_LOCKED			(0<<1)
-	#define PM3RD_KClkControl_LOCKED			(1<<1)
-	#define PM3RD_KClkControl_STATE_LOW			(0<<2)
-	#define PM3RD_KClkControl_STATE_HIGH			(1<<2)
-	#define PM3RD_KClkControl_STATE_RUN			(2<<2)
-	#define PM3RD_KClkControl_STATE_LOW_POWER		(3<<2)
-	#define PM3RD_KClkControl_SOURCE_PCLK			(0<<4)
-	#define PM3RD_KClkControl_SOURCE_HALF_PCLK		(1<<4)
-	#define PM3RD_KClkControl_SOURCE_PLL			(2<<4)
+	#define PM3RD_KClkControl_ENABLE			(1 << 0)
+	#define PM3RD_KClkControl_NOT_LOCKED			(0 << 1)
+	#define PM3RD_KClkControl_LOCKED			(1 << 1)
+	#define PM3RD_KClkControl_STATE_LOW			(0 << 2)
+	#define PM3RD_KClkControl_STATE_HIGH			(1 << 2)
+	#define PM3RD_KClkControl_STATE_RUN			(2 << 2)
+	#define PM3RD_KClkControl_STATE_LOW_POWER		(3 << 2)
+	#define PM3RD_KClkControl_SOURCE_PCLK			(0 << 4)
+	#define PM3RD_KClkControl_SOURCE_HALF_PCLK		(1 << 4)
+	#define PM3RD_KClkControl_SOURCE_PLL			(2 << 4)
 #define PM3RD_KClkPreScale					0x20e
 #define PM3RD_KClkFeedbackScale					0x20f
 #define PM3RD_KClkPostScale					0x210
 #define PM3RD_MClkControl					0x211
-	#define PM3RD_MClkControl_DISABLE			(0<<0)
-	#define PM3RD_MClkControl_ENABLE			(1<<0)
-	#define PM3RD_MClkControl_NOT_LOCKED			(0<<1)
-	#define PM3RD_MClkControl_LOCKED			(1<<1)
-	#define PM3RD_MClkControl_STATE_LOW			(0<<2)
-	#define PM3RD_MClkControl_STATE_HIGH			(1<<2)
-	#define PM3RD_MClkControl_STATE_RUN			(2<<2)
-	#define PM3RD_MClkControl_STATE_LOW_POWER		(3<<2)
-	#define PM3RD_MClkControl_SOURCE_PCLK			(0<<4)
-	#define PM3RD_MClkControl_SOURCE_HALF_PCLK		(1<<4)
-	#define PM3RD_MClkControl_SOURCE_HALF_EXT		(3<<4)
-	#define PM3RD_MClkControl_SOURCE_EXT			(4<<4)
-	#define PM3RD_MClkControl_SOURCE_HALF_KCLK		(5<<4)
-	#define PM3RD_MClkControl_SOURCE_KCLK			(6<<4)
+	#define PM3RD_MClkControl_ENABLE			(1 << 0)
+	#define PM3RD_MClkControl_NOT_LOCKED			(0 << 1)
+	#define PM3RD_MClkControl_LOCKED			(1 << 1)
+	#define PM3RD_MClkControl_STATE_LOW			(0 << 2)
+	#define PM3RD_MClkControl_STATE_HIGH			(1 << 2)
+	#define PM3RD_MClkControl_STATE_RUN			(2 << 2)
+	#define PM3RD_MClkControl_STATE_LOW_POWER		(3 << 2)
+	#define PM3RD_MClkControl_SOURCE_PCLK			(0 << 4)
+	#define PM3RD_MClkControl_SOURCE_HALF_PCLK		(1 << 4)
+	#define PM3RD_MClkControl_SOURCE_HALF_EXT		(3 << 4)
+	#define PM3RD_MClkControl_SOURCE_EXT			(4 << 4)
+	#define PM3RD_MClkControl_SOURCE_HALF_KCLK		(5 << 4)
+	#define PM3RD_MClkControl_SOURCE_KCLK			(6 << 4)
 #define PM3RD_MClkPreScale					0x212
 #define PM3RD_MClkFeedbackScale					0x213
 #define PM3RD_MClkPostScale					0x214
 #define PM3RD_SClkControl					0x215
-	#define PM3RD_SClkControl_DISABLE			(0<<0)
-	#define PM3RD_SClkControl_ENABLE			(1<<0)
-	#define PM3RD_SClkControl_NOT_LOCKED			(0<<1)
-	#define PM3RD_SClkControl_LOCKED			(1<<1)
-	#define PM3RD_SClkControl_STATE_LOW			(0<<2)
-	#define PM3RD_SClkControl_STATE_HIGH			(1<<2)
-	#define PM3RD_SClkControl_STATE_RUN			(2<<2)
-	#define PM3RD_SClkControl_STATE_LOW_POWER		(3<<2)
-	#define PM3RD_SClkControl_SOURCE_PCLK			(0<<4)
-	#define PM3RD_SClkControl_SOURCE_HALF_PCLK		(1<<4)
-	#define PM3RD_SClkControl_SOURCE_HALF_EXT		(3<<4)
-	#define PM3RD_SClkControl_SOURCE_EXT			(4<<4)
-	#define PM3RD_SClkControl_SOURCE_HALF_KCLK		(5<<4)
-	#define PM3RD_SClkControl_SOURCE_KCLK			(6<<4)
+	#define PM3RD_SClkControl_ENABLE			(1 << 0)
+	#define PM3RD_SClkControl_NOT_LOCKED			(0 << 1)
+	#define PM3RD_SClkControl_LOCKED			(1 << 1)
+	#define PM3RD_SClkControl_STATE_LOW			(0 << 2)
+	#define PM3RD_SClkControl_STATE_HIGH			(1 << 2)
+	#define PM3RD_SClkControl_STATE_RUN			(2 << 2)
+	#define PM3RD_SClkControl_STATE_LOW_POWER		(3 << 2)
+	#define PM3RD_SClkControl_SOURCE_PCLK			(0 << 4)
+	#define PM3RD_SClkControl_SOURCE_HALF_PCLK		(1 << 4)
+	#define PM3RD_SClkControl_SOURCE_HALF_EXT		(3 << 4)
+	#define PM3RD_SClkControl_SOURCE_EXT			(4 << 4)
+	#define PM3RD_SClkControl_SOURCE_HALF_KCLK		(5 << 4)
+	#define PM3RD_SClkControl_SOURCE_KCLK			(6 << 4)
 #define PM3RD_SClkPreScale					0x216
 #define PM3RD_SClkFeedbackScale					0x217
 #define PM3RD_SClkPostScale					0x218
 
-#define PM3RD_CursorPalette(p)					(0x303+(p))
-#define PM3RD_CursorPattern(p)					(0x400+(p))
+#define PM3RD_CursorPalette(p)				(0x303 + (p))
+#define PM3RD_CursorPattern(p)				(0x400 + (p))
 /******************************************************
 *  GLINT Permedia3 Video Streaming Registers (0x5000) *
 *******************************************************/
@@ -521,10 +499,10 @@
 #define PM3ColorDDAModeOr					0xabe8
 #define PM3CommandInterrupt					0xa990
 #define PM3ConstantColorDDA					0xafb0
-	#define PM3ConstantColorDDA_R(r)		((r)&0xff)
-	#define PM3ConstantColorDDA_G(g)		(((g)&0xff)<<8)
-	#define PM3ConstantColorDDA_B(b)		(((b)&0xff)<<16)
-	#define PM3ConstantColorDDA_A(a)		(((a)&0xff)<<24)
+	#define PM3ConstantColorDDA_R(r)		((r) & 0xff)
+	#define PM3ConstantColorDDA_G(g)		(((g) & 0xff) << 8)
+	#define PM3ConstantColorDDA_B(b)		(((b) & 0xff) << 16)
+	#define PM3ConstantColorDDA_A(a)		(((a) & 0xff) << 24)
 #define PM3ContextData						0x8dd0
 #define PM3ContextDump						0x8dc0
 #define PM3ContextRestore					0x8dc8
@@ -568,59 +546,59 @@
 #define PM3FBDestReadBufferOffset1				0xaea8
 #define PM3FBDestReadBufferOffset2				0xaeb0
 #define PM3FBDestReadBufferOffset3				0xaeb8
-	#define PM3FBDestReadBufferOffset_XOffset(x)	((x)&0xffff)
-	#define PM3FBDestReadBufferOffset_YOffset(y)	(((y)&0xffff)<<16)
+	#define PM3FBDestReadBufferOffset_XOffset(x)	((x) & 0xffff)
+	#define PM3FBDestReadBufferOffset_YOffset(y)	(((y) & 0xffff) << 16)
 #define PM3FBDestReadBufferWidth0				0xaec0
 #define PM3FBDestReadBufferWidth1				0xaec8
 #define PM3FBDestReadBufferWidth2				0xaed0
 #define PM3FBDestReadBufferWidth3				0xaed8
-	#define PM3FBDestReadBufferWidth_Width(w)	((w)&0x0fff)
+	#define PM3FBDestReadBufferWidth_Width(w)	((w) & 0x0fff)
 
 #define PM3FBDestReadEnables					0xaee8
 #define PM3FBDestReadEnablesAnd					0xad20
 #define PM3FBDestReadEnablesOr					0xad28
-	#define PM3FBDestReadEnables_E(e)		((e)&0xff)
-	#define PM3FBDestReadEnables_E0				1<<0
-	#define PM3FBDestReadEnables_E1				1<<1
-	#define PM3FBDestReadEnables_E2				1<<2
-	#define PM3FBDestReadEnables_E3				1<<3
-	#define PM3FBDestReadEnables_E4				1<<4
-	#define PM3FBDestReadEnables_E5				1<<5
-	#define PM3FBDestReadEnables_E6				1<<6
-	#define PM3FBDestReadEnables_E7				1<<7
-	#define PM3FBDestReadEnables_R(r)		(((r)&0xff)<<8)
-	#define PM3FBDestReadEnables_R0				1<<8
-	#define PM3FBDestReadEnables_R1				1<<9
-	#define PM3FBDestReadEnables_R2				1<<10
-	#define PM3FBDestReadEnables_R3				1<<11
-	#define PM3FBDestReadEnables_R4				1<<12
-	#define PM3FBDestReadEnables_R5				1<<13
-	#define PM3FBDestReadEnables_R6				1<<14
-	#define PM3FBDestReadEnables_R7				1<<15
-	#define PM3FBDestReadEnables_ReferenceAlpha(a)	(((a)&0xff)<<24)
+	#define PM3FBDestReadEnables_E(e)		((e) & 0xff)
+	#define PM3FBDestReadEnables_E0				(1 << 0)
+	#define PM3FBDestReadEnables_E1				(1 << 1)
+	#define PM3FBDestReadEnables_E2				(1 << 2)
+	#define PM3FBDestReadEnables_E3				(1 << 3)
+	#define PM3FBDestReadEnables_E4				(1 << 4)
+	#define PM3FBDestReadEnables_E5				(1 << 5)
+	#define PM3FBDestReadEnables_E6				(1 << 6)
+	#define PM3FBDestReadEnables_E7				(1 << 7)
+	#define PM3FBDestReadEnables_R(r)		(((r) & 0xff) << 8)
+	#define PM3FBDestReadEnables_R0				(1 << 8)
+	#define PM3FBDestReadEnables_R1				(1 << 9)
+	#define PM3FBDestReadEnables_R2				(1 << 10)
+	#define PM3FBDestReadEnables_R3				(1 << 11)
+	#define PM3FBDestReadEnables_R4				(1 << 12)
+	#define PM3FBDestReadEnables_R5				(1 << 13)
+	#define PM3FBDestReadEnables_R6				(1 << 14)
+	#define PM3FBDestReadEnables_R7				(1 << 15)
+	#define PM3FBDestReadEnables_ReferenceAlpha(a)	(((a) & 0xff) << 24)
 
 #define PM3FBDestReadMode					0xaee0
 #define PM3FBDestReadModeAnd					0xac90
 #define PM3FBDestReadModeOr					0xac98
-	#define PM3FBDestReadMode_ReadDisable			0<<0
-	#define PM3FBDestReadMode_ReadEnable			1<<0
-	#define PM3FBDestReadMode_StripePitch(sp)	(((sp)&0x7)<<2)
-	#define PM3FBDestReadMode_StripeHeight(sh)	(((sh)&0x7)<<7)
-	#define PM3FBDestReadMode_Enable0			1<<8
-	#define PM3FBDestReadMode_Enable1			1<<9
-	#define PM3FBDestReadMode_Enable2			1<<10
-	#define PM3FBDestReadMode_Enable3			1<<11
-	#define PM3FBDestReadMode_Layout0(l)		(((l)&0x3)<<12)
-	#define PM3FBDestReadMode_Layout1(l)		(((l)&0x3)<<14)
-	#define PM3FBDestReadMode_Layout2(l)		(((l)&0x3)<<16)
-	#define PM3FBDestReadMode_Layout3(l)		(((l)&0x3)<<18)
-	#define PM3FBDestReadMode_Origin0			1<<20
-	#define PM3FBDestReadMode_Origin1			1<<21
-	#define PM3FBDestReadMode_Origin2			1<<22
-	#define PM3FBDestReadMode_Origin3			1<<23
-	#define PM3FBDestReadMode_Blocking			1<<24
-	#define PM3FBDestReadMode_UseReadEnabled		1<<26
-	#define PM3FBDestReadMode_AlphaFiltering		1<<27
+	#define PM3FBDestReadMode_ReadDisable			(0 << 0)
+	#define PM3FBDestReadMode_ReadEnable			(1 << 0)
+	#define PM3FBDestReadMode_StripePitch(sp)	(((sp) & 0x7) << 2)
+	#define PM3FBDestReadMode_StripeHeight(sh)	(((sh) & 0x7) << 7)
+	#define PM3FBDestReadMode_Enable0			(1 << 8)
+	#define PM3FBDestReadMode_Enable1			(1 << 9)
+	#define PM3FBDestReadMode_Enable2			(1 << 10)
+	#define PM3FBDestReadMode_Enable3			(1 << 11)
+	#define PM3FBDestReadMode_Layout0(l)		(((l) & 0x3) << 12)
+	#define PM3FBDestReadMode_Layout1(l)		(((l) & 0x3) << 14)
+	#define PM3FBDestReadMode_Layout2(l)		(((l) & 0x3) << 16)
+	#define PM3FBDestReadMode_Layout3(l)		(((l) & 0x3) << 18)
+	#define PM3FBDestReadMode_Origin0			(1 << 20)
+	#define PM3FBDestReadMode_Origin1			(1 << 21)
+	#define PM3FBDestReadMode_Origin2			(1 << 22)
+	#define PM3FBDestReadMode_Origin3			(1 << 23)
+	#define PM3FBDestReadMode_Blocking			(1 << 24)
+	#define PM3FBDestReadMode_UseReadEnabled		(1 << 26)
+	#define PM3FBDestReadMode_AlphaFiltering		(1 << 27)
 
 #define PM3FBHardwareWriteMask					0x8ac0
 #define PM3FBSoftwareWriteMask					0x8820
@@ -628,65 +606,65 @@
 #define PM3FBSourceData						0x8aa8
 #define PM3FBSourceReadBufferAddr				0xaf08
 #define PM3FBSourceReadBufferOffset				0xaf10
-	#define PM3FBSourceReadBufferOffset_XOffset(x)	((x)&0xffff)
-	#define PM3FBSourceReadBufferOffset_YOffset(y)	(((y)&0xffff)<<16)
+	#define PM3FBSourceReadBufferOffset_XOffset(x)	((x) & 0xffff)
+	#define PM3FBSourceReadBufferOffset_YOffset(y)	(((y) & 0xffff) << 16)
 #define PM3FBSourceReadBufferWidth				0xaf18
-	#define PM3FBSourceReadBufferWidth_Width(w)	((w)&0x0fff)
+	#define PM3FBSourceReadBufferWidth_Width(w)	((w) & 0x0fff)
 #define PM3FBSourceReadMode					0xaf00
 #define PM3FBSourceReadModeAnd					0xaca0
 #define PM3FBSourceReadModeOr					0xaca8
-	#define PM3FBSourceReadMode_ReadDisable			(0<<0)
-	#define PM3FBSourceReadMode_ReadEnable			(1<<0)
-	#define PM3FBSourceReadMode_StripePitch(sp)	(((sp)&0x7)<<2)
-	#define PM3FBSourceReadMode_StripeHeight(sh)	(((sh)&0x7)<<7)
-	#define PM3FBSourceReadMode_Layout(l)		(((l)&0x3)<<8)
-	#define PM3FBSourceReadMode_Origin			1<<10
-	#define PM3FBSourceReadMode_Blocking			1<<11
-	#define PM3FBSourceReadMode_UserTexelCoord		1<<13
-	#define PM3FBSourceReadMode_WrapXEnable			1<<14
-	#define PM3FBSourceReadMode_WrapYEnable			1<<15
-	#define PM3FBSourceReadMode_WrapX(w)		(((w)&0xf)<<16)
-	#define PM3FBSourceReadMode_WrapY(w)		(((w)&0xf)<<20)
-	#define PM3FBSourceReadMode_ExternalSourceData		1<<24
-#define PM3FBWriteBufferAddr0                                   0xb000
-#define PM3FBWriteBufferAddr1                                   0xb008
-#define PM3FBWriteBufferAddr2                                   0xb010
-#define PM3FBWriteBufferAddr3                                   0xb018
+	#define PM3FBSourceReadMode_ReadDisable			(0 << 0)
+	#define PM3FBSourceReadMode_ReadEnable			(1 << 0)
+	#define PM3FBSourceReadMode_StripePitch(sp)	(((sp) & 0x7) << 2)
+	#define PM3FBSourceReadMode_StripeHeight(sh)	(((sh) & 0x7) << 7)
+	#define PM3FBSourceReadMode_Layout(l)		(((l) & 0x3) << 8)
+	#define PM3FBSourceReadMode_Origin			(1 << 10)
+	#define PM3FBSourceReadMode_Blocking			(1 << 11)
+	#define PM3FBSourceReadMode_UserTexelCoord		(1 << 13)
+	#define PM3FBSourceReadMode_WrapXEnable			(1 << 14)
+	#define PM3FBSourceReadMode_WrapYEnable			(1 << 15)
+	#define PM3FBSourceReadMode_WrapX(w)		(((w) & 0xf) << 16)
+	#define PM3FBSourceReadMode_WrapY(w)		(((w) & 0xf) << 20)
+	#define PM3FBSourceReadMode_ExternalSourceData		(1 << 24)
+#define PM3FBWriteBufferAddr0					0xb000
+#define PM3FBWriteBufferAddr1					0xb008
+#define PM3FBWriteBufferAddr2					0xb010
+#define PM3FBWriteBufferAddr3					0xb018
 
-#define PM3FBWriteBufferOffset0                                 0xb020
-#define PM3FBWriteBufferOffset1                                 0xb028
-#define PM3FBWriteBufferOffset2                                 0xb030
-#define PM3FBWriteBufferOffset3                                 0xb038
-	#define PM3FBWriteBufferOffset_XOffset(x)		((x)&0xffff)
-	#define PM3FBWriteBufferOffset_YOffset(y)		(((y)&0xffff)<<16)
+#define PM3FBWriteBufferOffset0					0xb020
+#define PM3FBWriteBufferOffset1					0xb028
+#define PM3FBWriteBufferOffset2					0xb030
+#define PM3FBWriteBufferOffset3					0xb038
+	#define PM3FBWriteBufferOffset_XOffset(x)	((x) & 0xffff)
+	#define PM3FBWriteBufferOffset_YOffset(y)	(((y) & 0xffff) << 16)
 
-#define PM3FBWriteBufferWidth0                                  0xb040
-#define PM3FBWriteBufferWidth1                                  0xb048
-#define PM3FBWriteBufferWidth2                                  0xb050
-#define PM3FBWriteBufferWidth3                                  0xb058
-	#define PM3FBWriteBufferWidth_Width(w)			((w)&0x0fff)
+#define PM3FBWriteBufferWidth0					0xb040
+#define PM3FBWriteBufferWidth1					0xb048
+#define PM3FBWriteBufferWidth2					0xb050
+#define PM3FBWriteBufferWidth3					0xb058
+	#define PM3FBWriteBufferWidth_Width(w)		((w) & 0x0fff)
 
-#define PM3FBWriteMode                                          0x8ab8
-#define PM3FBWriteModeAnd                                       0xacf0
-#define PM3FBWriteModeOr                                        0xacf8
-	#define PM3FBWriteMode_WriteDisable                     0<<0
-	#define PM3FBWriteMode_WriteEnable                      1<<0
-	#define PM3FBWriteMode_Replicate                        1<<4
-	#define PM3FBWriteMode_OpaqueSpan                       1<<5
-	#define PM3FBWriteMode_StripePitch(p)            (((p)&0x7)<<6)
-	#define PM3FBWriteMode_StripeHeight(h)           (((h)&0x7)<<9)
-	#define PM3FBWriteMode_Enable0                          1<<12
-	#define PM3FBWriteMode_Enable1                          1<<13
-	#define PM3FBWriteMode_Enable2                          1<<14
-	#define PM3FBWriteMode_Enable3                          1<<15
-	#define PM3FBWriteMode_Layout0(l)               (((l)&0x3)<<16)
-	#define PM3FBWriteMode_Layout1(l)               (((l)&0x3)<<18)
-	#define PM3FBWriteMode_Layout2(l)               (((l)&0x3)<<20)
-	#define PM3FBWriteMode_Layout3(l)               (((l)&0x3)<<22)
-	#define PM3FBWriteMode_Origin0                          1<<24
-	#define PM3FBWriteMode_Origin1                          1<<25
-	#define PM3FBWriteMode_Origin2                          1<<26
-	#define PM3FBWriteMode_Origin3                          1<<27
+#define PM3FBWriteMode						0x8ab8
+#define PM3FBWriteModeAnd					0xacf0
+#define PM3FBWriteModeOr					0xacf8
+	#define PM3FBWriteMode_WriteDisable			(0 << 0)
+	#define PM3FBWriteMode_WriteEnable			(1 << 0)
+	#define PM3FBWriteMode_Replicate			(1 << 4)
+	#define PM3FBWriteMode_OpaqueSpan			(1 << 5)
+	#define PM3FBWriteMode_StripePitch(p)		(((p) & 0x7) << 6)
+	#define PM3FBWriteMode_StripeHeight(h)		(((h) & 0x7) << 9)
+	#define PM3FBWriteMode_Enable0				(1 << 12)
+	#define PM3FBWriteMode_Enable1				(1 << 13)
+	#define PM3FBWriteMode_Enable2				(1 << 14)
+	#define PM3FBWriteMode_Enable3				(1 << 15)
+	#define PM3FBWriteMode_Layout0(l)		(((l) & 0x3) << 16)
+	#define PM3FBWriteMode_Layout1(l)		(((l) & 0x3) << 18)
+	#define PM3FBWriteMode_Layout2(l)		(((l) & 0x3) << 20)
+	#define PM3FBWriteMode_Layout3(l)		(((l) & 0x3) << 22)
+	#define PM3FBWriteMode_Origin0				(1 << 24)
+	#define PM3FBWriteMode_Origin1				(1 << 25)
+	#define PM3FBWriteMode_Origin2				(1 << 26)
+	#define PM3FBWriteMode_Origin3				(1 << 27)
 #define PM3ForegroundColor					0xb0c0
 /* ... */
 #define PM3GIDMode						0xb538
@@ -701,55 +679,55 @@
 #define PM3LBDestReadMode					0xb500
 #define PM3LBDestReadModeAnd					0xb580
 #define PM3LBDestReadModeOr					0xb588
-	#define PM3LBDestReadMode_Disable			0<<0
-	#define PM3LBDestReadMode_Enable			1<<0
-	#define PM3LBDestReadMode_StripePitch(p)		(((p)&0x7)<<2)
-	#define PM3LBDestReadMode_StripeHeight(h)		(((h)&0x7)<<5)
-	#define PM3LBDestReadMode_Layout			1<<8
-	#define PM3LBDestReadMode_Origin			1<<9
-	#define PM3LBDestReadMode_UserReadEnables		1<<10
-	#define PM3LBDestReadMode_Packed16			1<<11
-	#define PM3LBDestReadMode_Width(w)			(((w)&0xfff)<<12)
+	#define PM3LBDestReadMode_Disable			(0 << 0)
+	#define PM3LBDestReadMode_Enable			(1 << 0)
+	#define PM3LBDestReadMode_StripePitch(p)	(((p) & 0x7) << 2)
+	#define PM3LBDestReadMode_StripeHeight(h)	(((h) & 0x7) << 5)
+	#define PM3LBDestReadMode_Layout			(1 << 8)
+	#define PM3LBDestReadMode_Origin			(1 << 9)
+	#define PM3LBDestReadMode_UserReadEnables		(1 << 10)
+	#define PM3LBDestReadMode_Packed16			(1 << 11)
+	#define PM3LBDestReadMode_Width(w)		(((w) & 0xfff) << 12)
 #define PM3LBReadFormat						0x8888
-	#define PM3LBReadFormat_DepthWidth(w)			(((w)&0x3)<<0)
-	#define PM3LBReadFormat_StencilWidth(w)			(((w)&0xf)<<2)
-	#define PM3LBReadFormat_StencilPosition(p)		(((p)&0x1f)<<6)
-	#define PM3LBReadFormat_FCPWidth(w)			(((w)&0xf)<<11)
-	#define PM3LBReadFormat_FCPPosition(p)			(((p)&0x1f)<<15)
-	#define PM3LBReadFormat_GIDWidth(w)			(((w)&0x7)<<20)
-	#define PM3LBReadFormat_GIDPosition(p)			(((p)&0x1f)<<23)
+	#define PM3LBReadFormat_DepthWidth(w)		(((w) & 0x3) << 0)
+	#define PM3LBReadFormat_StencilWidth(w)		(((w) & 0xf) << 2)
+	#define PM3LBReadFormat_StencilPosition(p)	(((p) & 0x1f) << 6)
+	#define PM3LBReadFormat_FCPWidth(w)		(((w) & 0xf) << 11)
+	#define PM3LBReadFormat_FCPPosition(p)		(((p) & 0x1f) << 15)
+	#define PM3LBReadFormat_GIDWidth(w)		(((w) & 0x7) << 20)
+	#define PM3LBReadFormat_GIDPosition(p)		(((p) & 0x1f) << 23)
 #define PM3LBSourceReadBufferAddr				0xb528
 #define PM3LBSourceReadBufferOffset				0xb530
 #define PM3LBSourceReadMode					0xb520
 #define PM3LBSourceReadModeAnd					0xb5a0
 #define PM3LBSourceReadModeOr					0xb5a8
-	#define PM3LBSourceReadMode_Enable			1<<0
-	#define PM3LBSourceReadMode_StripePitch(p)		(((p)&0x7)<<2)
-	#define PM3LBSourceReadMode_StripeHeight(h)		(((h)&0x7)<<5)
-	#define PM3LBSourceReadMode_Layout			1<<8
-	#define PM3LBSourceReadMode_Origin			1<<9
-	#define PM3LBSourceReadMode_Packed16			1<<10
-	#define PM3LBSourceReadMode_Width(w)			(((w)&0xfff)<<11)
+	#define PM3LBSourceReadMode_Enable			(1 << 0)
+	#define PM3LBSourceReadMode_StripePitch(p)	(((p) & 0x7) << 2)
+	#define PM3LBSourceReadMode_StripeHeight(h)	(((h) & 0x7) << 5)
+	#define PM3LBSourceReadMode_Layout			(1 << 8)
+	#define PM3LBSourceReadMode_Origin			(1 << 9)
+	#define PM3LBSourceReadMode_Packed16			(1 << 10)
+	#define PM3LBSourceReadMode_Width(w)		(((w) & 0xfff) << 11)
 #define PM3LBStencil						0x88a8
 #define PM3LBWriteBufferAddr					0xb540
 #define PM3LBWriteBufferOffset					0xb548
 #define PM3LBWriteFormat					0x88c8
-	#define PM3LBWriteFormat_DepthWidth(w)			(((w)&0x3)<<0)
-	#define PM3LBWriteFormat_StencilWidth(w)		(((w)&0xf)<<2)
-	#define PM3LBWriteFormat_StencilPosition(p)		(((p)&0x1f)<<6)
-	#define PM3LBWriteFormat_GIDWidth(w)			(((w)&0x7)<<20)
-	#define PM3LBWriteFormat_GIDPosition(p)			(((p)&0x1f)<<23)
+	#define PM3LBWriteFormat_DepthWidth(w)		(((w) & 0x3) << 0)
+	#define PM3LBWriteFormat_StencilWidth(w)	(((w) & 0xf) << 2)
+	#define PM3LBWriteFormat_StencilPosition(p)	(((p) & 0x1f) << 6)
+	#define PM3LBWriteFormat_GIDWidth(w)		(((w) & 0x7) << 20)
+	#define PM3LBWriteFormat_GIDPosition(p)		(((p) & 0x1f) << 23)
 #define PM3LBWriteMode						0x88c0
 #define PM3LBWriteModeAnd					0xac80
 #define PM3LBWriteModeOr					0xac88
-	#define PM3LBWriteMode_WriteDisable			0<<0
-	#define PM3LBWriteMode_WriteEnable			1<<0
-	#define PM3LBWriteMode_StripePitch(p)			(((p)&0x7)<<3)
-	#define PM3LBWriteMode_StripeHeight(h)			(((h)&0x7)<<6)
-	#define PM3LBWriteMode_Layout				1<<9
-	#define PM3LBWriteMode_Origin				1<<10
-	#define PM3LBWriteMode_Packed16				1<<11
-	#define PM3LBWriteMode_Width(w)				(((w)&0xfff)<<12)
+	#define PM3LBWriteMode_WriteDisable			(0 << 0)
+	#define PM3LBWriteMode_WriteEnable			(1 << 0)
+	#define PM3LBWriteMode_StripePitch(p)		(((p) & 0x7) << 3)
+	#define PM3LBWriteMode_StripeHeight(h)		(((h) & 0x7) << 6)
+	#define PM3LBWriteMode_Layout				(1 << 9)
+	#define PM3LBWriteMode_Origin				(1 << 10)
+	#define PM3LBWriteMode_Packed16				(1 << 11)
+	#define PM3LBWriteMode_Width(w)			(((w) & 0xfff) << 12)
 /* ... */
 #define PM3LineStippleMode					0x81a8
 #define PM3LineStippleModeAnd					0xabc0
@@ -759,19 +737,16 @@
 #define PM3LogicalOpMode					0x8828
 #define PM3LogicalOpModeAnd					0xace0
 #define PM3LogicalOpModeOr					0xace8
-	#define PM3LogicalOpMode_Disable			(0<<0)
-	#define PM3LogicalOpMode_Enable				(1<<0)
-	#define PM3LogicalOpMode_LogicOp(op)			(((op)&0xf)<<1)
-	#define PM3LogicalOpMode_UseConstantWriteData_Disable	(0<<5)
-	#define PM3LogicalOpMode_UseConstantWriteData_Enable	(1<<5)
-	#define PM3LogicalOpMode_Background_Disable		(0<<6)
-	#define PM3LogicalOpMode_Background_Enable		(1<<6)
-	#define PM3LogicalOpMode_Background_LogicOp(op)		(((op)&0xf)<<7)
-	#define PM3LogicalOpMode_UseConstantSource_Disable	(0<<11)
-	#define PM3LogicalOpMode_UseConstantSource_Enable	(1<<11)
-
-/* ... */
-#define PM3LUT							0x8e80
+	#define PM3LogicalOpMode_Disable			(0 << 0)
+	#define PM3LogicalOpMode_Enable				(1 << 0)
+	#define PM3LogicalOpMode_LogicOp(op)		(((op) & 0xf) << 1)
+	#define PM3LogicalOpMode_UseConstantWriteData_Disable	(0 << 5)
+	#define PM3LogicalOpMode_UseConstantWriteData_Enable	(1 << 5)
+	#define PM3LogicalOpMode_Background_Disable		(0 << 6)
+	#define PM3LogicalOpMode_Background_Enable		(1 << 6)
+	#define PM3LogicalOpMode_Background_LogicOp(op)	(((op) & 0xf) << 7)
+	#define PM3LogicalOpMode_UseConstantSource_Disable	(0 << 11)
+	#define PM3LogicalOpMode_UseConstantSource_Enable	(1 << 11)
 /* ... */
 #define PM3LUT							0x8e80
 #define PM3LUTAddress						0x84d0
@@ -783,75 +758,74 @@
 #define PM3LUTTransfer						0x84d8
 /* ... */
 #define PM3PixelSize						0x80c0
-	#define PM3PixelSize_GLOBAL_32BIT			(0<<0)
-	#define PM3PixelSize_GLOBAL_16BIT			(1<<0)
-	#define PM3PixelSize_GLOBAL_8BIT			(2<<0)
-	#define PM3PixelSize_RASTERIZER_32BIT			(0<<2)
-	#define PM3PixelSize_RASTERIZER_16BIT			(1<<2)
-	#define PM3PixelSize_RASTERIZER_8BIT			(2<<2)
-	#define PM3PixelSize_SCISSOR_AND_STIPPLE_32BIT		(0<<4)
-	#define PM3PixelSize_SCISSOR_AND_STIPPLE_16BIT		(1<<4)
-	#define PM3PixelSize_SCISSOR_AND_STIPPLE_8BIT		(2<<4)
-	#define PM3PixelSize_TEXTURE_32BIT			(0<<6)
-	#define PM3PixelSize_TEXTURE_16BIT			(1<<6)
-	#define PM3PixelSize_TEXTURE_8BIT			(2<<6)
-	#define PM3PixelSize_LUT_32BIT				(0<<8)
-	#define PM3PixelSize_LUT_16BIT				(1<<8)
-	#define PM3PixelSize_LUT_8BIT				(2<<8)
-	#define PM3PixelSize_FRAMEBUFFER_32BIT			(0<<10)
-	#define PM3PixelSize_FRAMEBUFFER_16BIT			(1<<10)
-	#define PM3PixelSize_FRAMEBUFFER_8BIT			(2<<10)
-	#define PM3PixelSize_LOGICAL_OP_32BIT			(0<<12)
-	#define PM3PixelSize_LOGICAL_OP_16BIT			(1<<12)
-	#define PM3PixelSize_LOGICAL_OP_8BIT			(2<<12)
-	#define PM3PixelSize_LOCALBUFFER_32BIT			(0<<14)
-	#define PM3PixelSize_LOCALBUFFER_16BIT			(1<<14)
-	#define PM3PixelSize_LOCALBUFFER_8BIT			(2<<14)
-	#define PM3PixelSize_SETUP_32BIT			(0<<16)
-	#define PM3PixelSize_SETUP_16BIT			(1<<16)
-	#define PM3PixelSize_SETUP_8BIT				(2<<16)
-	#define PM3PixelSize_GLOBAL				(0<<31)
-	#define PM3PixelSize_INDIVIDUAL				(1<<31)
+	#define PM3PixelSize_GLOBAL_32BIT			(0 << 0)
+	#define PM3PixelSize_GLOBAL_16BIT			(1 << 0)
+	#define PM3PixelSize_GLOBAL_8BIT			(2 << 0)
+	#define PM3PixelSize_RASTERIZER_32BIT			(0 << 2)
+	#define PM3PixelSize_RASTERIZER_16BIT			(1 << 2)
+	#define PM3PixelSize_RASTERIZER_8BIT			(2 << 2)
+	#define PM3PixelSize_SCISSOR_AND_STIPPLE_32BIT		(0 << 4)
+	#define PM3PixelSize_SCISSOR_AND_STIPPLE_16BIT		(1 << 4)
+	#define PM3PixelSize_SCISSOR_AND_STIPPLE_8BIT		(2 << 4)
+	#define PM3PixelSize_TEXTURE_32BIT			(0 << 6)
+	#define PM3PixelSize_TEXTURE_16BIT			(1 << 6)
+	#define PM3PixelSize_TEXTURE_8BIT			(2 << 6)
+	#define PM3PixelSize_LUT_32BIT				(0 << 8)
+	#define PM3PixelSize_LUT_16BIT				(1 << 8)
+	#define PM3PixelSize_LUT_8BIT				(2 << 8)
+	#define PM3PixelSize_FRAMEBUFFER_32BIT			(0 << 10)
+	#define PM3PixelSize_FRAMEBUFFER_16BIT			(1 << 10)
+	#define PM3PixelSize_FRAMEBUFFER_8BIT			(2 << 10)
+	#define PM3PixelSize_LOGICAL_OP_32BIT			(0 << 12)
+	#define PM3PixelSize_LOGICAL_OP_16BIT			(1 << 12)
+	#define PM3PixelSize_LOGICAL_OP_8BIT			(2 << 12)
+	#define PM3PixelSize_LOCALBUFFER_32BIT			(0 << 14)
+	#define PM3PixelSize_LOCALBUFFER_16BIT			(1 << 14)
+	#define PM3PixelSize_LOCALBUFFER_8BIT			(2 << 14)
+	#define PM3PixelSize_SETUP_32BIT			(0 << 16)
+	#define PM3PixelSize_SETUP_16BIT			(1 << 16)
+	#define PM3PixelSize_SETUP_8BIT				(2 << 16)
+	#define PM3PixelSize_GLOBAL				(0 << 31)
+	#define PM3PixelSize_INDIVIDUAL				(1 << 31)
 /* ... */
 #define PM3Render						0x8038
-	#define PM3Render_AreaStipple_Disable			(0<<0)
-	#define PM3Render_AreaStipple_Enable			(1<<0)
-	#define PM3Render_LineStipple_Disable			(0<<1)
-	#define PM3Render_LineStipple_Enable			(1<<1)
-	#define PM3Render_ResetLine_Disable			(0<<2)
-	#define PM3Render_ResetLine_Enable			(1<<2)
-	#define PM3Render_FastFill_Disable			(0<<3)
-	#define PM3Render_FastFill_Enable			(1<<3)
-	#define PM3Render_Primitive_Line			(0<<6)
-	#define PM3Render_Primitive_Trapezoid			(1<<6)
-	#define PM3Render_Primitive_Point			(2<<6)
-	#define PM3Render_Antialias_Disable			(0<<8)
-	#define PM3Render_Antialias_Enable			(1<<8)
-	#define PM3Render_Antialias_SubPixelRes_4x4		(0<<9)
-	#define PM3Render_Antialias_SubPixelRes_8x8		(1<<9)
-	#define PM3Render_UsePointTable_Disable			(0<<10)
-	#define PM3Render_UsePointTable_Enable			(1<<10)
-	#define PM3Render_SyncOnbitMask_Disable			(0<<11)
-	#define PM3Render_SyncOnBitMask_Enable			(1<<11)
-	#define PM3Render_SyncOnHostData_Disable		(0<<12)
-	#define PM3Render_SyncOnHostData_Enable			(1<<12)
-	#define PM3Render_Texture_Disable			(0<<13)
-	#define PM3Render_Texture_Enable			(1<<13)
-	#define PM3Render_Fog_Disable				(0<<14)
-	#define PM3Render_Fog_Enable				(1<<14)
-	#define PM3Render_Coverage_Disable			(0<<15)
-	#define PM3Render_Coverage_Enable			(1<<15)
-	#define PM3Render_SubPixelCorrection_Disable		(0<<16)
-	#define PM3Render_SubPixelCorrection_Enable		(1<<16)
-	#define PM3Render_SpanOperation_Disable			(0<<18)
-	#define PM3Render_SpanOperation_Enable			(1<<18)
-	#define PM3Render_FBSourceRead_Disable			(0<<27)
-	#define PM3Render_FBSourceRead_Enable			(1<<27)
+	#define PM3Render_AreaStipple_Disable			(0 << 0)
+	#define PM3Render_AreaStipple_Enable			(1 << 0)
+	#define PM3Render_LineStipple_Disable			(0 << 1)
+	#define PM3Render_LineStipple_Enable			(1 << 1)
+	#define PM3Render_ResetLine_Disable			(0 << 2)
+	#define PM3Render_ResetLine_Enable			(1 << 2)
+	#define PM3Render_FastFill_Disable			(0 << 3)
+	#define PM3Render_FastFill_Enable			(1 << 3)
+	#define PM3Render_Primitive_Line			(0 << 6)
+	#define PM3Render_Primitive_Trapezoid			(1 << 6)
+	#define PM3Render_Primitive_Point			(2 << 6)
+	#define PM3Render_Antialias_Disable			(0 << 8)
+	#define PM3Render_Antialias_Enable			(1 << 8)
+	#define PM3Render_Antialias_SubPixelRes_4x4		(0 << 9)
+	#define PM3Render_Antialias_SubPixelRes_8x8		(1 << 9)
+	#define PM3Render_UsePointTable_Disable			(0 << 10)
+	#define PM3Render_UsePointTable_Enable			(1 << 10)
+	#define PM3Render_SyncOnbitMask_Disable			(0 << 11)
+	#define PM3Render_SyncOnBitMask_Enable			(1 << 11)
+	#define PM3Render_SyncOnHostData_Disable		(0 << 12)
+	#define PM3Render_SyncOnHostData_Enable			(1 << 12)
+	#define PM3Render_Texture_Disable			(0 << 13)
+	#define PM3Render_Texture_Enable			(1 << 13)
+	#define PM3Render_Fog_Disable				(0 << 14)
+	#define PM3Render_Fog_Enable				(1 << 14)
+	#define PM3Render_Coverage_Disable			(0 << 15)
+	#define PM3Render_Coverage_Enable			(1 << 15)
+	#define PM3Render_SubPixelCorrection_Disable		(0 << 16)
+	#define PM3Render_SubPixelCorrection_Enable		(1 << 16)
+	#define PM3Render_SpanOperation_Disable			(0 << 18)
+	#define PM3Render_SpanOperation_Enable			(1 << 18)
+	#define PM3Render_FBSourceRead_Disable			(0 << 27)
+	#define PM3Render_FBSourceRead_Enable			(1 << 27)
 #define PM3RasterizerMode					0x80a0
 #define PM3RasterizerModeAnd					0xaba0
-#define PM3RasterizerModeOr					0xabb8
+#define PM3RasterizerModeOr					0xaba8
 #define PM3RectangleHeight					0x94e0
-#define PM3Render						0x8038
 #define PM3RepeatLine						0x9328
 #define PM3ResetPickResult					0x8c20
 #define PM3RLEMask						0x8c48
@@ -918,31 +892,31 @@
 #define PM3TextureIndexMode1And					0xb3d0
 #define PM3TextureIndexMode1Or					0xb3d8
 /* ... */
-#define PM3TextureMapSize                                       0xb428
-#define PM3TextureMapWidth0                                     0x8580
-#define PM3TextureMapWidth1                                     0x8588
-        #define PM3TextureMapWidth_Width(w)             ((w&0xfff)<<0)
-        #define PM3TextureMapWidth_BorderLayout                 (1<<12)
-        #define PM3TextureMapWidth_Layout_Linear                (0<<13)
-        #define PM3TextureMapWidth_Layout_Patch64               (1<<13)
-        #define PM3TextureMapWidth_Layout_Patch32_2             (2<<13)
-        #define PM3TextureMapWidth_Layout_Patch2                (3<<13)
-        #define PM3TextureMapWidth_HostTexture                  (1<<15)
-#define PM3TextureReadMode0                                     0xb400
-#define PM3TextureReadMode0And                                  0xac30
-#define PM3TextureReadMode0Or                                   0xac38
-#define PM3TextureReadMode1                                     0xb408
-#define PM3TextureReadMode1And                                  0xad40
-#define PM3TextureReadMode1Or                                   0xad48
+#define PM3TextureMapSize					0xb428
+#define PM3TextureMapWidth0					0x8580
+#define PM3TextureMapWidth1					0x8588
+	#define PM3TextureMapWidth_Width(w)		(((w) & 0xfff) << 0)
+	#define PM3TextureMapWidth_BorderLayout			(1 << 12)
+	#define PM3TextureMapWidth_Layout_Linear		(0 << 13)
+	#define PM3TextureMapWidth_Layout_Patch64		(1 << 13)
+	#define PM3TextureMapWidth_Layout_Patch32_2		(2 << 13)
+	#define PM3TextureMapWidth_Layout_Patch2		(3 << 13)
+	#define PM3TextureMapWidth_HostTexture			(1 << 15)
+#define PM3TextureReadMode0					0xb400
+#define PM3TextureReadMode0And					0xac30
+#define PM3TextureReadMode0Or					0xac38
+#define PM3TextureReadMode1					0xb408
+#define PM3TextureReadMode1And					0xad40
+#define PM3TextureReadMode1Or					0xad48
 /* ... */
 #define PM3WaitForCompletion					0x80b8
 #define PM3Window						0x8980
-	#define PM3Window_ForceLBUpdate				1<<3
-	#define PM3Window_LBUpdateSource			1<<4
-	#define PM3Window_FrameCount(c)				(((c)&0xff)<<9)
-	#define PM3Window_StencilFCP				1<<17
-	#define PM3Window_DepthFCP				1<<18
-	#define PM3Window_OverrideWriteFiltering		1<<19
+	#define PM3Window_ForceLBUpdate				(1 << 3)
+	#define PM3Window_LBUpdateSource			(1 << 4)
+	#define PM3Window_FrameCount(c)			(((c) & 0xff) << 9)
+	#define PM3Window_StencilFCP				(1 << 17)
+	#define PM3Window_DepthFCP				(1 << 18)
+	#define PM3Window_OverrideWriteFiltering		(1 << 19)
 #define PM3WindowAnd						0xab80
 #define PM3WindowOr						0xab88
 #define PM3WindowOrigin						0x81c8
@@ -957,169 +931,131 @@
 
 
 /**********************************************
-*  GLINT Permedia3 2D setup Unit              *
+*  GLINT Permedia3 2D setup Unit	      *
 ***********************************************/
 #define PM3Config2D						0xb618
-	#define PM3Config2D_OpaqueSpan				1<<0
-	#define PM3Config2D_MultiRXBlit				1<<1
-	#define PM3Config2D_UserScissorEnable			1<<2
-	#define PM3Config2D_FBDestReadEnable			1<<3
-	#define PM3Config2D_AlphaBlendEnable			1<<4
-	#define PM3Config2D_DitherEnable			1<<5
-	#define PM3Config2D_ForegroundROPEnable			1<<6
-	#define PM3Config2D_ForegroundROP(rop)		(((rop)&0xf)<<7)
-	#define PM3Config2D_BackgroundROPEnable			1<<11
-	#define PM3Config2D_BackgroundROP(rop)		(((rop)&0xf)<<12)
-	#define PM3Config2D_UseConstantSource			1<<16
-	#define PM3Config2D_FBWriteEnable			1<<17
-	#define PM3Config2D_Blocking				1<<18
-	#define PM3Config2D_ExternalSourceData			1<<19
-	#define PM3Config2D_LUTModeEnable			1<<20
+	#define PM3Config2D_OpaqueSpan				(1 << 0)
+	#define PM3Config2D_MultiRXBlit				(1 << 1)
+	#define PM3Config2D_UserScissorEnable			(1 << 2)
+	#define PM3Config2D_FBDestReadEnable			(1 << 3)
+	#define PM3Config2D_AlphaBlendEnable			(1 << 4)
+	#define PM3Config2D_DitherEnable			(1 << 5)
+	#define PM3Config2D_ForegroundROPEnable			(1 << 6)
+	#define PM3Config2D_ForegroundROP(rop)		(((rop) & 0xf) << 7)
+	#define PM3Config2D_BackgroundROPEnable			(1 << 11)
+	#define PM3Config2D_BackgroundROP(rop)		(((rop) & 0xf) << 12)
+	#define PM3Config2D_UseConstantSource			(1 << 16)
+	#define PM3Config2D_FBWriteEnable			(1 << 17)
+	#define PM3Config2D_Blocking				(1 << 18)
+	#define PM3Config2D_ExternalSourceData			(1 << 19)
+	#define PM3Config2D_LUTModeEnable			(1 << 20)
 #define PM3DownloadGlyphwidth					0xb658
-	#define PM3DownloadGlyphwidth_GlyphWidth(gw)	((gw)&0xffff)
+	#define PM3DownloadGlyphwidth_GlyphWidth(gw)	((gw) & 0xffff)
 #define PM3DownloadTarget					0xb650
-	#define PM3DownloadTarget_TagName(tag)		((tag)&0x1fff)
+	#define PM3DownloadTarget_TagName(tag)		((tag) & 0x1fff)
 #define PM3GlyphData						0xb660
 #define PM3GlyphPosition					0xb608
-	#define PM3GlyphPosition_XOffset(x)		((x)&0xffff)
-	#define PM3GlyphPosition_YOffset(y)		(((y)&0xffff)<<16)
+	#define PM3GlyphPosition_XOffset(x)		((x) & 0xffff)
+	#define PM3GlyphPosition_YOffset(y)		(((y) & 0xffff) << 16)
 #define PM3Packed4Pixels					0xb668
 #define PM3Packed8Pixels					0xb630
 #define PM3Packed16Pixels					0xb638
 #define PM3RectanglePosition					0xb600
-	#define PM3RectanglePosition_XOffset(x)		((x)&0xffff)
-	#define PM3RectanglePosition_YOffset(y)		(((y)&0xffff)<<16)
+	#define PM3RectanglePosition_XOffset(x)		((x) & 0xffff)
+	#define PM3RectanglePosition_YOffset(y)		(((y) & 0xffff) << 16)
 #define PM3Render2D						0xb640
-	#define PM3Render2D_Width(w)			((w)&0x0fff)
-	#define PM3Render2D_Operation_Normal			0<<12
-	#define PM3Render2D_Operation_SyncOnHostData		1<<12
-	#define PM3Render2D_Operation_SyncOnBitMask		2<<12
-	#define PM3Render2D_Operation_PatchOrderRendering	3<<12
-	#define PM3Render2D_FBSourceReadEnable			1<<14
-	#define PM3Render2D_SpanOperation			1<<15
-	#define PM3Render2D_Height(h)			(((h)&0x0fff)<<16)
-	#define PM3Render2D_XPositive				1<<28
-	#define PM3Render2D_YPositive				1<<29
-	#define PM3Render2D_AreaStippleEnable			1<<30
-	#define PM3Render2D_TextureEnable			1<<31
+	#define PM3Render2D_Width(w)			((w) & 0x0fff)
+	#define PM3Render2D_Operation_Normal			(0 << 12)
+	#define PM3Render2D_Operation_SyncOnHostData		(1 << 12)
+	#define PM3Render2D_Operation_SyncOnBitMask		(2 << 12)
+	#define PM3Render2D_Operation_PatchOrderRendering	(3 << 12)
+	#define PM3Render2D_FBSourceReadEnable			(1 << 14)
+	#define PM3Render2D_SpanOperation			(1 << 15)
+	#define PM3Render2D_Height(h)			(((h) & 0x0fff) << 16)
+	#define PM3Render2D_XPositive				(1 << 28)
+	#define PM3Render2D_YPositive				(1 << 29)
+	#define PM3Render2D_AreaStippleEnable			(1 << 30)
+	#define PM3Render2D_TextureEnable			(1 << 31)
 #define PM3Render2DGlyph					0xb648
-	#define PM3Render2DGlyph_Width(w)		((w)&0x7f)
-	#define PM3Render2DGlyph_Height(h)		(((h)&0x7f)<<7)
-	#define PM3Render2DGlyph_XOffset(x)		(((x)&0x1ff)<<14)
-	#define PM3Render2DGlyph_YOffset(y)		(((y)&0x1ff)<<23)
+	#define PM3Render2DGlyph_Width(w)		((w) & 0x7f)
+	#define PM3Render2DGlyph_Height(h)		(((h) & 0x7f) << 7)
+	#define PM3Render2DGlyph_XOffset(x)		(((x) & 0x1ff) << 14)
+	#define PM3Render2DGlyph_YOffset(y)		(((y) & 0x1ff) << 23)
 #define PM3RenderPatchOffset					0xb610
-	#define PM3RenderPatchOffset_XOffset(x)		((x)&0xffff)
-	#define PM3RenderPatchOffset_YOffset(y)		(((y)&0xffff)<<16)
+	#define PM3RenderPatchOffset_XOffset(x)		((x) & 0xffff)
+	#define PM3RenderPatchOffset_YOffset(y)		(((y) & 0xffff) << 16)
 #define PM3RLCount						0xb678
-	#define PM3RLCount_Count(c)			((c)&0x0fff)
+	#define PM3RLCount_Count(c)			((c) & 0x0fff)
 #define PM3RLData						0xb670
 
 /**********************************************
-*  GLINT Permedia3 Alias Register             *
+*  GLINT Permedia3 Alias Register	     *
 ***********************************************/
-#define PM3FillBackgroundColor                                  0x8330
-#define PM3FillConfig2D0                                        0x8338
-#define PM3FillConfig2D1                                        0x8360
-	#define PM3FillConfig2D_OpaqueSpan                      1<<0
-	#define PM3FillConfig2D_MultiRXBlit                     1<<1
-	#define PM3FillConfig2D_UserScissorEnable               1<<2
-	#define PM3FillConfig2D_FBDestReadEnable                1<<3
-	#define PM3FillConfig2D_AlphaBlendEnable                1<<4
-	#define PM3FillConfig2D_DitherEnable                    1<<5
-	#define PM3FillConfig2D_ForegroundROPEnable             1<<6
-	#define PM3FillConfig2D_ForegroundROP(rop)              (((rop)&0xf)<<7)
-	#define PM3FillConfig2D_BackgroundROPEnable             1<<11
-	#define PM3FillConfig2D_BackgroundROP(rop)              (((rop)&0xf)<<12)
-	#define PM3FillConfig2D_UseConstantSource               1<<16
-	#define PM3FillConfig2D_FBWriteEnable                   1<<17
-	#define PM3FillConfig2D_Blocking                        1<<18
-	#define PM3FillConfig2D_ExternalSourceData              1<<19
-	#define PM3FillConfig2D_LUTModeEnable                   1<<20
-#define PM3FillFBDestReadBufferAddr                             0x8310
-#define PM3FillFBSourceReadBufferAddr                           0x8308
-#define PM3FillFBSourceReadBufferOffset                         0x8340
-	#define PM3FillFBSourceReadBufferOffset_XOffset(x)     ((x)&0xffff)
-	#define PM3FillFBSourceReadBufferOffset_YOffset(y)      (((y)&0xffff)<<16)
-#define PM3FillFBWriteBufferAddr                                0x8300
-#define PM3FillForegroundColor0                                 0x8328
-#define PM3FillForegroundColor1                                 0x8358
-#define PM3FillGlyphPosition                                    0x8368
-        #define PM3FillGlyphPosition_XOffset(x)                        ((x)&0xffff)
-	#define PM3FillGlyphPosition_YOffset(y)                        (((y)&0xffff)<<16)
-#define PM3FillRectanglePosition                                0x8348
-	#define PM3FillRectanglePosition_XOffset(x)            ((x)&0xffff)
-	#define PM3FillRectanglePosition_YOffset(y)            (((y)&0xffff)<<16)
+#define PM3FillBackgroundColor					0x8330
+#define PM3FillConfig2D0					0x8338
+#define PM3FillConfig2D1					0x8360
+	#define PM3FillConfig2D_OpaqueSpan			(1 << 0)
+	#define PM3FillConfig2D_MultiRXBlit			(1 << 1)
+	#define PM3FillConfig2D_UserScissorEnable		(1 << 2)
+	#define PM3FillConfig2D_FBDestReadEnable		(1 << 3)
+	#define PM3FillConfig2D_AlphaBlendEnable		(1 << 4)
+	#define PM3FillConfig2D_DitherEnable			(1 << 5)
+	#define PM3FillConfig2D_ForegroundROPEnable		(1 << 6)
+	#define PM3FillConfig2D_ForegroundROP(rop)	(((rop) & 0xf) << 7)
+	#define PM3FillConfig2D_BackgroundROPEnable		(1 << 11)
+	#define PM3FillConfig2D_BackgroundROP(rop)	(((rop) & 0xf) << 12)
+	#define PM3FillConfig2D_UseConstantSource		(1 << 16)
+	#define PM3FillConfig2D_FBWriteEnable			(1 << 17)
+	#define PM3FillConfig2D_Blocking			(1 << 18)
+	#define PM3FillConfig2D_ExternalSourceData		(1 << 19)
+	#define PM3FillConfig2D_LUTModeEnable			(1 << 20)
+#define PM3FillFBDestReadBufferAddr				0x8310
+#define PM3FillFBSourceReadBufferAddr				0x8308
+#define PM3FillFBSourceReadBufferOffset				0x8340
+	#define PM3FillFBSourceReadBufferOffset_XOffset(x) ((x) & 0xffff)
+	#define PM3FillFBSourceReadBufferOffset_YOffset(y)	\
+						(((y) & 0xffff) << 16)
+#define PM3FillFBWriteBufferAddr				0x8300
+#define PM3FillForegroundColor0					0x8328
+#define PM3FillForegroundColor1					0x8358
+#define PM3FillGlyphPosition					0x8368
+	#define PM3FillGlyphPosition_XOffset(x)		((x) & 0xffff)
+	#define PM3FillGlyphPosition_YOffset(y)		(((y) & 0xffff) << 16)
+#define PM3FillRectanglePosition				0x8348
+	#define PM3FillRectanglePosition_XOffset(x)	((x) & 0xffff)
+	#define PM3FillRectanglePosition_YOffset(y)	(((y) & 0xffff) << 16)
 
-#define PM3_REGS_SIZE           0x10000
-#define PM3_MAX_PIXCLOCK        300000
 /* a few more useful registers & regs value... */
-#define PM3Sync 0x8c40
-        #define PM3Sync_Tag 0x188
-#define PM3FilterMode 0x8c00
-        #define PM3FilterModeSync 0x400
-#define PM3OutputFifo 0x2000
-#define PM3StatisticMode 0x8c08
-#define PM3AreaStippleMode 0x81a0
-        #define AreaStipplePattern0					(0x8200)
-        #define AreaStipplePattern1					(0x8208)
-        #define AreaStipplePattern2					(0x8210)
-        #define AreaStipplePattern3					(0x8218)
-        #define AreaStipplePattern4					(0x8220)
-        #define AreaStipplePattern5					(0x8228)
-        #define AreaStipplePattern6					(0x8230)
-        #define AreaStipplePattern7					(0x8238)
-        #define AreaStipplePattern8					(0x8240)
-        #define AreaStipplePattern9					(0x8248)
-        #define AreaStipplePattern10					(0x8250)
-        #define AreaStipplePattern11					(0x8258)
-        #define AreaStipplePattern12					(0x8260)
-        #define AreaStipplePattern13					(0x8268)
-        #define AreaStipplePattern14					(0x8270)
-        #define AreaStipplePattern15					(0x8278)
-        #define AreaStipplePattern16					(0x8280)
-        #define AreaStipplePattern17					(0x8288)
-        #define AreaStipplePattern18					(0x8290)
-        #define AreaStipplePattern19					(0x8298)
-        #define AreaStipplePattern20					(0x82a0)
-        #define AreaStipplePattern21					(0x82a8)
-        #define AreaStipplePattern22					(0x82b0)
-        #define AreaStipplePattern23					(0x82b8)
-        #define AreaStipplePattern24					(0x82c0)
-        #define AreaStipplePattern25					(0x82c8)
-        #define AreaStipplePattern26					(0x82d0)
-        #define AreaStipplePattern27					(0x82d8)
-        #define AreaStipplePattern28					(0x82eo)
-        #define AreaStipplePattern29					(0x82e8)
-        #define AreaStipplePattern30					(0x82f0)
-        #define AreaStipplePattern31					(0x82f8)
-        #define AreaStipplePattern_indexed(i)             (0x8200 + ((i) * 0x8))
+#define PM3Sync							0x8c40
+	#define PM3Sync_Tag					0x188
+#define PM3FilterMode						0x8c00
+	#define PM3FilterModeSync				0x400
+#define PM3OutputFifo						0x2000
+#define PM3StatisticMode					0x8c08
+#define PM3AreaStippleMode					0x81a0
+#define AreaStipplePattern_indexed(i)		(0x8200 + ((i) * 0x8))
 
-#define PM3DepthMode 0x89a0
-#define PM3StencilMode 0x8988
-#define PM3StencilData 0x8990
-#define PM3TextureReadMode 0x8670
-#define PM3FogMode 0x8690
-#define PM3ChromaTestMode 0x8f18
-#define PM3YUVMode 0x8f00
-#define PM3BitMaskPattern 0x8068
+#define PM3DepthMode						0x89a0
+#define PM3StencilMode						0x8988
+#define PM3StencilData						0x8990
+#define PM3TextureReadMode					0x8670
+#define PM3FogMode						0x8690
+#define PM3ChromaTestMode					0x8f18
+#define PM3YUVMode						0x8f00
+#define PM3BitMaskPattern					0x8068
 
 /* ***************************** */
 /* ***** pm3fb IOCTL const ***** */
 /* ***************************** */
-/* debug-only IOCTL */
-#define PM3FBIO_CLEARMEMORY 0x504D3300 /* 'PM3\000' */
-#define PM3FBIO_CLEARCMAP   0x504D3301 /* 'PM3\001' */
-/* common use IOCTL */
-#define PM3FBIO_RESETCHIP   0x504D33FF /* 'PM3\377' */
+#define PM3FBIO_RESETCHIP		0x504D33FF /* 'PM3\377' */
 
 /* ***************************************** */
 /* ***** pm3fb useful define and macro ***** */
 /* ***************************************** */
 
-/* max size of options */
-#define PM3_OPTIONS_SIZE 256
-
-/* max size of font name */
-#define PM3_FONTNAME_SIZE 40
+/* fifo size in chip */
+#define PM3_FIFO_SIZE						120
+#define PM3_REGS_SIZE						0x10000
+#define PM3_MAX_PIXCLOCK					300000
 
 #endif /* PM3FB_H */
diff --git a/include/video/tdfx.h b/include/video/tdfx.h
index c1cc94ba3fd..05b63c2a5ab 100644
--- a/include/video/tdfx.h
+++ b/include/video/tdfx.h
@@ -2,140 +2,140 @@
 #define _TDFX_H
 
 /* membase0 register offsets */
-#define STATUS          0x00
-#define PCIINIT0        0x04
-#define SIPMONITOR      0x08
-#define LFBMEMORYCONFIG 0x0c
-#define MISCINIT0       0x10
-#define MISCINIT1       0x14
-#define DRAMINIT0       0x18
-#define DRAMINIT1       0x1c
-#define AGPINIT         0x20
-#define TMUGBEINIT      0x24
-#define VGAINIT0        0x28
-#define VGAINIT1        0x2c
-#define DRAMCOMMAND     0x30
-#define DRAMDATA        0x34
-/* reserved             0x38 */
-/* reserved             0x3c */
-#define PLLCTRL0        0x40
-#define PLLCTRL1        0x44
-#define PLLCTRL2        0x48
-#define DACMODE         0x4c
-#define DACADDR         0x50
-#define DACDATA         0x54
-#define RGBMAXDELTA     0x58
-#define VIDPROCCFG      0x5c
-#define HWCURPATADDR    0x60
-#define HWCURLOC        0x64
-#define HWCURC0         0x68
-#define HWCURC1         0x6c
-#define VIDINFORMAT     0x70
-#define VIDINSTATUS     0x74
-#define VIDSERPARPORT   0x78
-#define VIDINXDELTA     0x7c
-#define VIDININITERR    0x80
-#define VIDINYDELTA     0x84
-#define VIDPIXBUFTHOLD  0x88
-#define VIDCHRMIN       0x8c
-#define VIDCHRMAX       0x90
-#define VIDCURLIN       0x94
-#define VIDSCREENSIZE   0x98
-#define VIDOVRSTARTCRD  0x9c
-#define VIDOVRENDCRD    0xa0
-#define VIDOVRDUDX      0xa4
-#define VIDOVRDUDXOFF   0xa8
-#define VIDOVRDVDY      0xac
-/*  ... */
-#define VIDOVRDVDYOFF   0xe0
-#define VIDDESKSTART    0xe4
-#define VIDDESKSTRIDE   0xe8
-#define VIDINADDR0      0xec
-#define VIDINADDR1      0xf0
-#define VIDINADDR2      0xf4
-#define VIDINSTRIDE     0xf8
-#define VIDCUROVRSTART  0xfc
-
-#define INTCTRL         (0x00100000 + 0x04)
-#define CLIP0MIN        (0x00100000 + 0x08)
-#define CLIP0MAX        (0x00100000 + 0x0c)
-#define DSTBASE         (0x00100000 + 0x10)
-#define DSTFORMAT       (0x00100000 + 0x14)
-#define SRCBASE         (0x00100000 + 0x34)
-#define COMMANDEXTRA_2D (0x00100000 + 0x38)
-#define CLIP1MIN        (0x00100000 + 0x4c)
-#define CLIP1MAX        (0x00100000 + 0x50)
-#define SRCFORMAT       (0x00100000 + 0x54)
-#define SRCSIZE         (0x00100000 + 0x58)
-#define SRCXY           (0x00100000 + 0x5c)
-#define COLORBACK       (0x00100000 + 0x60)
-#define COLORFORE       (0x00100000 + 0x64)
-#define DSTSIZE         (0x00100000 + 0x68)
-#define DSTXY           (0x00100000 + 0x6c)
-#define COMMAND_2D      (0x00100000 + 0x70)
-#define LAUNCH_2D       (0x00100000 + 0x80)
-
-#define COMMAND_3D      (0x00200000 + 0x120)
+#define STATUS		0x00
+#define PCIINIT0	0x04
+#define SIPMONITOR	0x08
+#define LFBMEMORYCONFIG	0x0c
+#define MISCINIT0	0x10
+#define MISCINIT1	0x14
+#define DRAMINIT0	0x18
+#define DRAMINIT1	0x1c
+#define AGPINIT		0x20
+#define TMUGBEINIT	0x24
+#define VGAINIT0	0x28
+#define VGAINIT1	0x2c
+#define DRAMCOMMAND	0x30
+#define DRAMDATA	0x34
+/* reserved	0x38 */
+/* reserved	0x3c */
+#define PLLCTRL0	0x40
+#define PLLCTRL1	0x44
+#define PLLCTRL2	0x48
+#define DACMODE		0x4c
+#define DACADDR		0x50
+#define DACDATA		0x54
+#define RGBMAXDELTA	0x58
+#define VIDPROCCFG	0x5c
+#define HWCURPATADDR	0x60
+#define HWCURLOC	0x64
+#define HWCURC0		0x68
+#define HWCURC1		0x6c
+#define VIDINFORMAT	0x70
+#define VIDINSTATUS	0x74
+#define VIDSERPARPORT	0x78
+#define VIDINXDELTA	0x7c
+#define VIDININITERR	0x80
+#define VIDINYDELTA	0x84
+#define VIDPIXBUFTHOLD	0x88
+#define VIDCHRMIN	0x8c
+#define VIDCHRMAX	0x90
+#define VIDCURLIN	0x94
+#define VIDSCREENSIZE	0x98
+#define VIDOVRSTARTCRD	0x9c
+#define VIDOVRENDCRD	0xa0
+#define VIDOVRDUDX	0xa4
+#define VIDOVRDUDXOFF	0xa8
+#define VIDOVRDVDY	0xac
+/* ... */
+#define VIDOVRDVDYOFF	0xe0
+#define VIDDESKSTART	0xe4
+#define VIDDESKSTRIDE	0xe8
+#define VIDINADDR0	0xec
+#define VIDINADDR1	0xf0
+#define VIDINADDR2	0xf4
+#define VIDINSTRIDE	0xf8
+#define VIDCUROVRSTART	0xfc
+
+#define INTCTRL		(0x00100000 + 0x04)
+#define CLIP0MIN	(0x00100000 + 0x08)
+#define CLIP0MAX	(0x00100000 + 0x0c)
+#define DSTBASE		(0x00100000 + 0x10)
+#define DSTFORMAT	(0x00100000 + 0x14)
+#define SRCBASE		(0x00100000 + 0x34)
+#define COMMANDEXTRA_2D	(0x00100000 + 0x38)
+#define CLIP1MIN	(0x00100000 + 0x4c)
+#define CLIP1MAX	(0x00100000 + 0x50)
+#define SRCFORMAT	(0x00100000 + 0x54)
+#define SRCSIZE		(0x00100000 + 0x58)
+#define SRCXY		(0x00100000 + 0x5c)
+#define COLORBACK	(0x00100000 + 0x60)
+#define COLORFORE	(0x00100000 + 0x64)
+#define DSTSIZE		(0x00100000 + 0x68)
+#define DSTXY		(0x00100000 + 0x6c)
+#define COMMAND_2D	(0x00100000 + 0x70)
+#define LAUNCH_2D	(0x00100000 + 0x80)
+
+#define COMMAND_3D	(0x00200000 + 0x120)
 
 /* register bitfields (not all, only as needed) */
 
-#define BIT(x) (1UL << (x))
+#define BIT(x)	(1UL << (x))
 
 /* COMMAND_2D reg. values */
-#define TDFX_ROP_COPY        0xcc     // src
-#define TDFX_ROP_INVERT      0x55     // NOT dst
-#define TDFX_ROP_XOR         0x66     // src XOR dst
-
-#define AUTOINC_DSTX                    BIT(10)
-#define AUTOINC_DSTY                    BIT(11)
-#define COMMAND_2D_FILLRECT             0x05
-#define COMMAND_2D_S2S_BITBLT           0x01      // screen to screen
-#define COMMAND_2D_H2S_BITBLT           0x03       // host to screen
-
-#define COMMAND_3D_NOP                  0x00
-#define STATUS_RETRACE                  BIT(6)
-#define STATUS_BUSY                     BIT(9)
-#define MISCINIT1_CLUT_INV              BIT(0)
-#define MISCINIT1_2DBLOCK_DIS           BIT(15)
-#define DRAMINIT0_SGRAM_NUM             BIT(26)
-#define DRAMINIT0_SGRAM_TYPE            BIT(27)
-#define DRAMINIT0_SGRAM_TYPE_MASK       (BIT(27)|BIT(28)|BIT(29))
+#define TDFX_ROP_COPY		0xcc	/* src */
+#define TDFX_ROP_INVERT		0x55	/* NOT dst */
+#define TDFX_ROP_XOR		0x66	/* src XOR dst */
+
+#define AUTOINC_DSTX			BIT(10)
+#define AUTOINC_DSTY			BIT(11)
+#define COMMAND_2D_FILLRECT		0x05
+#define COMMAND_2D_S2S_BITBLT		0x01	/* screen to screen */
+#define COMMAND_2D_H2S_BITBLT		0x03	/* host to screen */
+
+#define COMMAND_3D_NOP			0x00
+#define STATUS_RETRACE			BIT(6)
+#define STATUS_BUSY			BIT(9)
+#define MISCINIT1_CLUT_INV		BIT(0)
+#define MISCINIT1_2DBLOCK_DIS		BIT(15)
+#define DRAMINIT0_SGRAM_NUM		BIT(26)
+#define DRAMINIT0_SGRAM_TYPE		BIT(27)
+#define DRAMINIT0_SGRAM_TYPE_MASK       (BIT(27) | BIT(28) | BIT(29))
 #define DRAMINIT0_SGRAM_TYPE_SHIFT      27
-#define DRAMINIT1_MEM_SDRAM             BIT(30)
-#define VGAINIT0_VGA_DISABLE            BIT(0)
-#define VGAINIT0_EXT_TIMING             BIT(1)
-#define VGAINIT0_8BIT_DAC               BIT(2)
-#define VGAINIT0_EXT_ENABLE             BIT(6)
-#define VGAINIT0_WAKEUP_3C3             BIT(8)
-#define VGAINIT0_LEGACY_DISABLE         BIT(9)
-#define VGAINIT0_ALT_READBACK           BIT(10)
-#define VGAINIT0_FAST_BLINK             BIT(11)
-#define VGAINIT0_EXTSHIFTOUT            BIT(12)
-#define VGAINIT0_DECODE_3C6             BIT(13)
-#define VGAINIT0_SGRAM_HBLANK_DISABLE   BIT(22)
-#define VGAINIT1_MASK                   0x1fffff
-#define VIDCFG_VIDPROC_ENABLE           BIT(0)
-#define VIDCFG_CURS_X11                 BIT(1)
-#define VIDCFG_INTERLACE                BIT(3)
-#define VIDCFG_HALF_MODE                BIT(4)
-#define VIDCFG_DESK_ENABLE              BIT(7)
-#define VIDCFG_CLUT_BYPASS              BIT(10)
-#define VIDCFG_2X                       BIT(26)
-#define VIDCFG_HWCURSOR_ENABLE          BIT(27)
+#define DRAMINIT1_MEM_SDRAM		BIT(30)
+#define VGAINIT0_VGA_DISABLE		BIT(0)
+#define VGAINIT0_EXT_TIMING		BIT(1)
+#define VGAINIT0_8BIT_DAC		BIT(2)
+#define VGAINIT0_EXT_ENABLE		BIT(6)
+#define VGAINIT0_WAKEUP_3C3		BIT(8)
+#define VGAINIT0_LEGACY_DISABLE		BIT(9)
+#define VGAINIT0_ALT_READBACK		BIT(10)
+#define VGAINIT0_FAST_BLINK		BIT(11)
+#define VGAINIT0_EXTSHIFTOUT		BIT(12)
+#define VGAINIT0_DECODE_3C6		BIT(13)
+#define VGAINIT0_SGRAM_HBLANK_DISABLE	BIT(22)
+#define VGAINIT1_MASK			0x1fffff
+#define VIDCFG_VIDPROC_ENABLE		BIT(0)
+#define VIDCFG_CURS_X11			BIT(1)
+#define VIDCFG_INTERLACE		BIT(3)
+#define VIDCFG_HALF_MODE		BIT(4)
+#define VIDCFG_DESK_ENABLE		BIT(7)
+#define VIDCFG_CLUT_BYPASS		BIT(10)
+#define VIDCFG_2X			BIT(26)
+#define VIDCFG_HWCURSOR_ENABLE		BIT(27)
 #define VIDCFG_PIXFMT_SHIFT             18
-#define DACMODE_2X                      BIT(0)
+#define DACMODE_2X			BIT(0)
 
 /* VGA rubbish, need to change this for multihead support */
-#define MISC_W  0x3c2
-#define MISC_R  0x3cc
-#define SEQ_I   0x3c4
-#define SEQ_D   0x3c5
-#define CRT_I   0x3d4
-#define CRT_D   0x3d5
-#define ATT_IW  0x3c0
-#define IS1_R   0x3da
-#define GRA_I   0x3ce
-#define GRA_D   0x3cf
+#define MISC_W		0x3c2
+#define MISC_R		0x3cc
+#define SEQ_I		0x3c4
+#define SEQ_D		0x3c5
+#define CRT_I		0x3d4
+#define CRT_D		0x3d5
+#define ATT_IW		0x3c0
+#define IS1_R		0x3da
+#define GRA_I		0x3ce
+#define GRA_D		0x3cf
 
 #ifdef __KERNEL__
 
@@ -143,9 +143,9 @@ struct banshee_reg {
 	/* VGA rubbish */
 	unsigned char att[21];
 	unsigned char crt[25];
-	unsigned char gra[ 9];
+	unsigned char gra[9];
 	unsigned char misc[1];
-	unsigned char seq[ 5];
+	unsigned char seq[5];
 
 	/* Banshee extensions */
 	unsigned char ext[2];
@@ -167,8 +167,6 @@ struct banshee_reg {
 	unsigned long clip0max;
 	unsigned long clip1min;
 	unsigned long clip1max;
-	unsigned long srcbase;
-	unsigned long dstbase;
 	unsigned long miscinit0;
 };
 
@@ -177,18 +175,10 @@ struct tdfx_par {
 	u32 palette[16];
 	void __iomem *regbase_virt;
 	unsigned long iobase;
-	u32 baseline;
-
-	struct {
-		int w,u,d;
-		unsigned long enable,disable;
-		struct timer_list timer;
-	} hwcursor;
-
-	spinlock_t DAClock;
+	int mtrr_handle;
 };
 
-#endif /* __KERNEL__ */
+#endif	/* __KERNEL__ */
 
-#endif /* _TDFX_H */
+#endif	/* _TDFX_H */
 
diff --git a/include/video/uvesafb.h b/include/video/uvesafb.h
new file mode 100644
index 00000000000..95bcef19395
--- /dev/null
+++ b/include/video/uvesafb.h
@@ -0,0 +1,193 @@
+#ifndef _UVESAFB_H
+#define _UVESAFB_H
+
+struct v86_regs {
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 esi;
+	__u32 edi;
+	__u32 ebp;
+	__u32 eax;
+	__u32 eip;
+	__u32 eflags;
+	__u32 esp;
+	__u16 cs;
+	__u16 ss;
+	__u16 es;
+	__u16 ds;
+	__u16 fs;
+	__u16 gs;
+};
+
+/* Task flags */
+#define TF_VBEIB	0x01
+#define TF_BUF_ESDI	0x02
+#define TF_BUF_ESBX	0x04
+#define TF_BUF_RET	0x08
+#define TF_EXIT		0x10
+
+struct uvesafb_task {
+	__u8 flags;
+	int buf_len;
+	struct v86_regs regs;
+};
+
+/* Constants for the capabilities field
+ * in vbe_ib */
+#define VBE_CAP_CAN_SWITCH_DAC	0x01
+#define VBE_CAP_VGACOMPAT	0x02
+
+/* The VBE Info Block */
+struct vbe_ib {
+	char  vbe_signature[4];
+	__u16 vbe_version;
+	__u32 oem_string_ptr;
+	__u32 capabilities;
+	__u32 mode_list_ptr;
+	__u16 total_memory;
+	__u16 oem_software_rev;
+	__u32 oem_vendor_name_ptr;
+	__u32 oem_product_name_ptr;
+	__u32 oem_product_rev_ptr;
+	__u8  reserved[222];
+	char  oem_data[256];
+	char  misc_data[512];
+} __attribute__ ((packed));
+
+#ifdef __KERNEL__
+
+/* VBE CRTC Info Block */
+struct vbe_crtc_ib {
+	u16 horiz_total;
+	u16 horiz_start;
+	u16 horiz_end;
+	u16 vert_total;
+	u16 vert_start;
+	u16 vert_end;
+	u8  flags;
+	u32 pixel_clock;
+	u16 refresh_rate;
+	u8  reserved[40];
+} __attribute__ ((packed));
+
+#define VBE_MODE_VGACOMPAT	0x20
+#define VBE_MODE_COLOR		0x08
+#define VBE_MODE_SUPPORTEDHW	0x01
+#define VBE_MODE_GRAPHICS	0x10
+#define VBE_MODE_LFB		0x80
+
+#define VBE_MODE_MASK		(VBE_MODE_COLOR | VBE_MODE_SUPPORTEDHW | \
+				VBE_MODE_GRAPHICS | VBE_MODE_LFB)
+
+/* VBE Mode Info Block */
+struct vbe_mode_ib {
+	/* for all VBE revisions */
+	u16 mode_attr;
+	u8  winA_attr;
+	u8  winB_attr;
+	u16 win_granularity;
+	u16 win_size;
+	u16 winA_seg;
+	u16 winB_seg;
+	u32 win_func_ptr;
+	u16 bytes_per_scan_line;
+
+	/* for VBE 1.2+ */
+	u16 x_res;
+	u16 y_res;
+	u8  x_char_size;
+	u8  y_char_size;
+	u8  planes;
+	u8  bits_per_pixel;
+	u8  banks;
+	u8  memory_model;
+	u8  bank_size;
+	u8  image_pages;
+	u8  reserved1;
+
+	/* Direct color fields for direct/6 and YUV/7 memory models. */
+	/* Offsets are bit positions of lsb in the mask. */
+	u8  red_len;
+	u8  red_off;
+	u8  green_len;
+	u8  green_off;
+	u8  blue_len;
+	u8  blue_off;
+	u8  rsvd_len;
+	u8  rsvd_off;
+	u8  direct_color_info;	/* direct color mode attributes */
+
+	/* for VBE 2.0+ */
+	u32 phys_base_ptr;
+	u8  reserved2[6];
+
+	/* for VBE 3.0+ */
+	u16 lin_bytes_per_scan_line;
+	u8  bnk_image_pages;
+	u8  lin_image_pages;
+	u8  lin_red_len;
+	u8  lin_red_off;
+	u8  lin_green_len;
+	u8  lin_green_off;
+	u8  lin_blue_len;
+	u8  lin_blue_off;
+	u8  lin_rsvd_len;
+	u8  lin_rsvd_off;
+	u32 max_pixel_clock;
+	u16 mode_id;
+	u8  depth;
+} __attribute__ ((packed));
+
+#define UVESAFB_DEFAULT_MODE "640x480-16"
+
+/* How long to wait for a reply from userspace [ms] */
+#define UVESAFB_TIMEOUT 5000
+
+/* Max number of concurrent tasks */
+#define UVESAFB_TASKS_MAX 16
+
+#define dac_reg	(0x3c8)
+#define dac_val	(0x3c9)
+
+struct uvesafb_pal_entry {
+	u_char blue, green, red, pad;
+} __attribute__ ((packed));
+
+struct uvesafb_ktask {
+	struct uvesafb_task t;
+	void *buf;
+	struct completion *done;
+	u32 ack;
+};
+
+static int uvesafb_exec(struct uvesafb_ktask *tsk);
+
+#define UVESAFB_EXACT_RES	1
+#define UVESAFB_EXACT_DEPTH	2
+
+struct uvesafb_par {
+	struct vbe_ib vbe_ib;		/* VBE Info Block */
+	struct vbe_mode_ib *vbe_modes;	/* list of supported VBE modes */
+	int vbe_modes_cnt;
+
+	u8 nocrtc;
+	u8 ypan;			/* 0 - nothing, 1 - ypan, 2 - ywrap */
+	u8 pmi_setpal;			/* PMI for palette changes */
+	u16 *pmi_base;			/* protected mode interface location */
+	void *pmi_start;
+	void *pmi_pal;
+	u8 *vbe_state_orig;		/*
+					 * original hardware state, before the
+					 * driver was loaded
+					 */
+	u8 *vbe_state_saved;		/* state saved by fb_save_state */
+	int vbe_state_size;
+	atomic_t ref_count;
+
+	int mode_idx;
+	struct vbe_crtc_ib crtc;
+};
+
+#endif /* __KERNEL__ */
+#endif /* _UVESAFB_H */
diff --git a/init/calibrate.c b/init/calibrate.c
index 40ff3b40489..2d3d73bd4ce 100644
--- a/init/calibrate.c
+++ b/init/calibrate.c
@@ -10,7 +10,7 @@
 
 #include <asm/timex.h>
 
-static unsigned long preset_lpj;
+unsigned long preset_lpj;
 static int __init lpj_setup(char *str)
 {
 	preset_lpj = simple_strtoul(str,NULL,0);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 57e6448b171..0864f409793 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
 
 /*
  * Return in *pmask the portion of a cpusets's mems_allowed that
- * are online.  If none are online, walk up the cpuset hierarchy
- * until we find one that does have some online mems.  If we get
- * all the way to the top and still haven't found any online mems,
- * return node_online_map.
+ * are online, with memory.  If none are online with memory, walk
+ * up the cpuset hierarchy until we find one that does have some
+ * online mems.  If we get all the way to the top and still haven't
+ * found any online mems, return node_states[N_HIGH_MEMORY].
  *
  * One way or another, we guarantee to return some non-empty subset
- * of node_online_map.
+ * of node_states[N_HIGH_MEMORY].
  *
  * Call with callback_mutex held.
  */
 
 static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
 {
-	while (cs && !nodes_intersects(cs->mems_allowed, node_online_map))
+	while (cs && !nodes_intersects(cs->mems_allowed,
+					node_states[N_HIGH_MEMORY]))
 		cs = cs->parent;
 	if (cs)
-		nodes_and(*pmask, cs->mems_allowed, node_online_map);
+		nodes_and(*pmask, cs->mems_allowed,
+					node_states[N_HIGH_MEMORY]);
 	else
-		*pmask = node_online_map;
-	BUG_ON(!nodes_intersects(*pmask, node_online_map));
+		*pmask = node_states[N_HIGH_MEMORY];
+	BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
 }
 
 /**
@@ -753,68 +755,13 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
 }
 
 /*
- * For a given cpuset cur, partition the system as follows
- * a. All cpus in the parent cpuset's cpus_allowed that are not part of any
- *    exclusive child cpusets
- * b. All cpus in the current cpuset's cpus_allowed that are not part of any
- *    exclusive child cpusets
- * Build these two partitions by calling partition_sched_domains
- *
- * Call with manage_mutex held.  May nest a call to the
- * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
- * Must not be called holding callback_mutex, because we must
- * not call lock_cpu_hotplug() while holding callback_mutex.
- */
-
-static void update_cpu_domains(struct cpuset *cur)
-{
-	struct cpuset *c, *par = cur->parent;
-	cpumask_t pspan, cspan;
-
-	if (par == NULL || cpus_empty(cur->cpus_allowed))
-		return;
-
-	/*
-	 * Get all cpus from parent's cpus_allowed not part of exclusive
-	 * children
-	 */
-	pspan = par->cpus_allowed;
-	list_for_each_entry(c, &par->children, sibling) {
-		if (is_cpu_exclusive(c))
-			cpus_andnot(pspan, pspan, c->cpus_allowed);
-	}
-	if (!is_cpu_exclusive(cur)) {
-		cpus_or(pspan, pspan, cur->cpus_allowed);
-		if (cpus_equal(pspan, cur->cpus_allowed))
-			return;
-		cspan = CPU_MASK_NONE;
-	} else {
-		if (cpus_empty(pspan))
-			return;
-		cspan = cur->cpus_allowed;
-		/*
-		 * Get all cpus from current cpuset's cpus_allowed not part
-		 * of exclusive children
-		 */
-		list_for_each_entry(c, &cur->children, sibling) {
-			if (is_cpu_exclusive(c))
-				cpus_andnot(cspan, cspan, c->cpus_allowed);
-		}
-	}
-
-	lock_cpu_hotplug();
-	partition_sched_domains(&pspan, &cspan);
-	unlock_cpu_hotplug();
-}
-
-/*
  * Call with manage_mutex held.  May take callback_mutex during call.
  */
 
 static int update_cpumask(struct cpuset *cs, char *buf)
 {
 	struct cpuset trialcs;
-	int retval, cpus_unchanged;
+	int retval;
 
 	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
 	if (cs == &top_cpuset)
@@ -841,12 +788,9 @@ static int update_cpumask(struct cpuset *cs, char *buf)
 	retval = validate_change(cs, &trialcs);
 	if (retval < 0)
 		return retval;
-	cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
 	mutex_lock(&callback_mutex);
 	cs->cpus_allowed = trialcs.cpus_allowed;
 	mutex_unlock(&callback_mutex);
-	if (is_cpu_exclusive(cs) && !cpus_unchanged)
-		update_cpu_domains(cs);
 	return 0;
 }
 
@@ -924,7 +868,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 	int fudge;
 	int retval;
 
-	/* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
+	/*
+	 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
+	 * it's read-only
+	 */
 	if (cs == &top_cpuset)
 		return -EACCES;
 
@@ -941,8 +888,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
 		retval = nodelist_parse(buf, trialcs.mems_allowed);
 		if (retval < 0)
 			goto done;
+		if (!nodes_intersects(trialcs.mems_allowed,
+						node_states[N_HIGH_MEMORY])) {
+			/*
+			 * error if only memoryless nodes specified.
+			 */
+			retval = -ENOSPC;
+			goto done;
+		}
 	}
-	nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
+	/*
+	 * Exclude memoryless nodes.  We know that trialcs.mems_allowed
+	 * contains at least one node with memory.
+	 */
+	nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
+						node_states[N_HIGH_MEMORY]);
 	oldmem = cs->mems_allowed;
 	if (nodes_equal(oldmem, trialcs.mems_allowed)) {
 		retval = 0;		/* Too easy - nothing to do */
@@ -1067,7 +1027,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
 {
 	int turning_on;
 	struct cpuset trialcs;
-	int err, cpu_exclusive_changed;
+	int err;
 
 	turning_on = (simple_strtoul(buf, NULL, 10) != 0);
 
@@ -1080,14 +1040,10 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
 	err = validate_change(cs, &trialcs);
 	if (err < 0)
 		return err;
-	cpu_exclusive_changed =
-		(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
 	mutex_lock(&callback_mutex);
 	cs->flags = trialcs.flags;
 	mutex_unlock(&callback_mutex);
 
-	if (cpu_exclusive_changed)
-                update_cpu_domains(cs);
 	return 0;
 }
 
@@ -1445,7 +1401,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
 	ssize_t retval = 0;
 	char *s;
 
-	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+	if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
 		return -ENOMEM;
 
 	s = page;
@@ -1947,17 +1903,6 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
 }
 
-/*
- * Locking note on the strange update_flag() call below:
- *
- * If the cpuset being removed is marked cpu_exclusive, then simulate
- * turning cpu_exclusive off, which will call update_cpu_domains().
- * The lock_cpu_hotplug() call in update_cpu_domains() must not be
- * made while holding callback_mutex.  Elsewhere the kernel nests
- * callback_mutex inside lock_cpu_hotplug() calls.  So the reverse
- * nesting would risk an ABBA deadlock.
- */
-
 static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
 {
 	struct cpuset *cs = dentry->d_fsdata;
@@ -1977,13 +1922,6 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
 		mutex_unlock(&manage_mutex);
 		return -EBUSY;
 	}
-	if (is_cpu_exclusive(cs)) {
-		int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0");
-		if (retval < 0) {
-			mutex_unlock(&manage_mutex);
-			return retval;
-		}
-	}
 	parent = cs->parent;
 	mutex_lock(&callback_mutex);
 	set_bit(CS_REMOVED, &cs->flags);
@@ -2098,8 +2036,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
 
 /*
  * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
- * cpu_online_map and node_online_map.  Force the top cpuset to track
- * whats online after any CPU or memory node hotplug or unplug event.
+ * cpu_online_map and node_states[N_HIGH_MEMORY].  Force the top cpuset to
+ * track what's online after any CPU or memory node hotplug or unplug
+ * event.
  *
  * To ensure that we don't remove a CPU or node from the top cpuset
  * that is currently in use by a child cpuset (which would violate
@@ -2119,7 +2058,7 @@ static void common_cpu_mem_hotplug_unplug(void)
 
 	guarantee_online_cpus_mems_in_subtree(&top_cpuset);
 	top_cpuset.cpus_allowed = cpu_online_map;
-	top_cpuset.mems_allowed = node_online_map;
+	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
 	mutex_unlock(&callback_mutex);
 	mutex_unlock(&manage_mutex);
@@ -2147,8 +2086,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
- * Keep top_cpuset.mems_allowed tracking node_online_map.
- * Call this routine anytime after you change node_online_map.
+ * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
+ * Call this routine anytime after you change
+ * node_states[N_HIGH_MEMORY].
  * See also the previous routine cpuset_handle_cpuhp().
  */
 
@@ -2167,7 +2107,7 @@ void cpuset_track_online_nodes(void)
 void __init cpuset_init_smp(void)
 {
 	top_cpuset.cpus_allowed = cpu_online_map;
-	top_cpuset.mems_allowed = node_online_map;
+	top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
 	hotcpu_notifier(cpuset_handle_cpuhp, 0);
 }
@@ -2309,7 +2249,7 @@ void cpuset_init_current_mems_allowed(void)
  *
  * Description: Returns the nodemask_t mems_allowed of the cpuset
  * attached to the specified @tsk.  Guaranteed to return some non-empty
- * subset of node_online_map, even if this means going outside the
+ * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
  * tasks cpuset.
  **/
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4b8a4493c54..e3a5d817ac9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -64,7 +64,6 @@
 
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
-static atomic_t kprobe_count;
 
 /* NOTE: change this value only with kprobe_mutex held */
 static bool kprobe_enabled;
@@ -73,11 +72,6 @@ DEFINE_MUTEX(kprobe_mutex);		/* Protects kprobe_table */
 DEFINE_SPINLOCK(kretprobe_lock);	/* Protects kretprobe_inst_table */
 static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
 
-static struct notifier_block kprobe_page_fault_nb = {
-	.notifier_call = kprobe_exceptions_notify,
-	.priority = 0x7fffffff /* we need to notified first */
-};
-
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
  * kprobe->ainsn.insn points to the copy of the instruction to be
@@ -556,8 +550,6 @@ static int __kprobes __register_kprobe(struct kprobe *p,
 	old_p = get_kprobe(p->addr);
 	if (old_p) {
 		ret = register_aggr_kprobe(old_p, p);
-		if (!ret)
-			atomic_inc(&kprobe_count);
 		goto out;
 	}
 
@@ -569,13 +561,9 @@ static int __kprobes __register_kprobe(struct kprobe *p,
 	hlist_add_head_rcu(&p->hlist,
 		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
 
-	if (kprobe_enabled) {
-		if (atomic_add_return(1, &kprobe_count) == \
-				(ARCH_INACTIVE_KPROBE_COUNT + 1))
-			register_page_fault_notifier(&kprobe_page_fault_nb);
-
+	if (kprobe_enabled)
 		arch_arm_kprobe(p);
-	}
+
 out:
 	mutex_unlock(&kprobe_mutex);
 
@@ -658,16 +646,6 @@ valid_p:
 		}
 		mutex_unlock(&kprobe_mutex);
 	}
-
-	/* Call unregister_page_fault_notifier()
-	 * if no probes are active
-	 */
-	mutex_lock(&kprobe_mutex);
-	if (atomic_add_return(-1, &kprobe_count) == \
-				ARCH_INACTIVE_KPROBE_COUNT)
-		unregister_page_fault_notifier(&kprobe_page_fault_nb);
-	mutex_unlock(&kprobe_mutex);
-	return;
 }
 
 static struct notifier_block kprobe_exceptions_nb = {
@@ -738,6 +716,18 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 	int ret = 0;
 	struct kretprobe_instance *inst;
 	int i;
+	void *addr = rp->kp.addr;
+
+	if (kretprobe_blacklist_size) {
+		if (addr == NULL)
+			kprobe_lookup_name(rp->kp.symbol_name, addr);
+		addr += rp->kp.offset;
+
+		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
+			if (kretprobe_blacklist[i].addr == addr)
+				return -EINVAL;
+		}
+	}
 
 	rp->kp.pre_handler = pre_handler_kretprobe;
 	rp->kp.post_handler = NULL;
@@ -815,7 +805,17 @@ static int __init init_kprobes(void)
 		INIT_HLIST_HEAD(&kprobe_table[i]);
 		INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
 	}
-	atomic_set(&kprobe_count, 0);
+
+	if (kretprobe_blacklist_size) {
+		/* lookup the function address from its name */
+		for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
+			kprobe_lookup_name(kretprobe_blacklist[i].name,
+					   kretprobe_blacklist[i].addr);
+			if (!kretprobe_blacklist[i].addr)
+				printk("kretprobe: lookup failed: %s\n",
+				       kretprobe_blacklist[i].name);
+		}
+	}
 
 	/* By default, kprobes are enabled */
 	kprobe_enabled = true;
@@ -921,13 +921,6 @@ static void __kprobes enable_all_kprobes(void)
 	if (kprobe_enabled)
 		goto already_enabled;
 
-	/*
-	 * Re-register the page fault notifier only if there are any
-	 * active probes at the time of enabling kprobes globally
-	 */
-	if (atomic_read(&kprobe_count) > ARCH_INACTIVE_KPROBE_COUNT)
-		register_page_fault_notifier(&kprobe_page_fault_nb);
-
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry_rcu(p, node, head, hlist)
@@ -968,10 +961,7 @@ static void __kprobes disable_all_kprobes(void)
 	mutex_unlock(&kprobe_mutex);
 	/* Allow all currently running kprobes to complete */
 	synchronize_sched();
-
-	mutex_lock(&kprobe_mutex);
-	/* Unconditionally unregister the page_fault notifier */
-	unregister_page_fault_notifier(&kprobe_page_fault_nb);
+	return;
 
 already_disabled:
 	mutex_unlock(&kprobe_mutex);
diff --git a/kernel/printk.c b/kernel/printk.c
index 8451dfc31d2..b2b5c3a22a3 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -22,6 +22,8 @@
 #include <linux/tty_driver.h>
 #include <linux/console.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/nmi.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/interrupt.h>			/* For in_interrupt() */
@@ -162,6 +164,61 @@ out:
 
 __setup("log_buf_len=", log_buf_len_setup);
 
+#ifdef CONFIG_BOOT_PRINTK_DELAY
+
+static unsigned int boot_delay; /* msecs delay after each printk during bootup */
+static unsigned long long printk_delay_msec; /* per msec, based on boot_delay */
+
+static int __init boot_delay_setup(char *str)
+{
+	unsigned long lpj;
+	unsigned long long loops_per_msec;
+
+	lpj = preset_lpj ? preset_lpj : 1000000;	/* some guess */
+	loops_per_msec = (unsigned long long)lpj / 1000 * HZ;
+
+	get_option(&str, &boot_delay);
+	if (boot_delay > 10 * 1000)
+		boot_delay = 0;
+
+	printk_delay_msec = loops_per_msec;
+	printk(KERN_DEBUG "boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
+		"HZ: %d, printk_delay_msec: %llu\n",
+		boot_delay, preset_lpj, lpj, HZ, printk_delay_msec);
+	return 1;
+}
+__setup("boot_delay=", boot_delay_setup);
+
+static void boot_delay_msec(void)
+{
+	unsigned long long k;
+	unsigned long timeout;
+
+	if (boot_delay == 0 || system_state != SYSTEM_BOOTING)
+		return;
+
+	k = (unsigned long long)printk_delay_msec * boot_delay;
+
+	timeout = jiffies + msecs_to_jiffies(boot_delay);
+	while (k) {
+		k--;
+		cpu_relax();
+		/*
+		 * use (volatile) jiffies to prevent
+		 * compiler reduction; loop termination via jiffies
+		 * is secondary and may or may not happen.
+		 */
+		if (time_after(jiffies, timeout))
+			break;
+		touch_nmi_watchdog();
+	}
+}
+#else
+static inline void boot_delay_msec(void)
+{
+}
+#endif
+
 /*
  * Commands to do_syslog:
  *
@@ -527,6 +584,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 	static char printk_buf[1024];
 	static int log_level_unknown = 1;
 
+	boot_delay_msec();
+
 	preempt_disable();
 	if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
 		/* If a crash is occurring during printk() on this CPU,
diff --git a/kernel/profile.c b/kernel/profile.c
index cb1e37d2dac..6f69bf792d9 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -346,7 +346,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
 		per_cpu(cpu_profile_flip, cpu) = 0;
 		if (!per_cpu(cpu_profile_hits, cpu)[1]) {
 			page = alloc_pages_node(node,
-					GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+					GFP_KERNEL | __GFP_ZERO,
 					0);
 			if (!page)
 				return NOTIFY_BAD;
@@ -354,7 +354,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
 		}
 		if (!per_cpu(cpu_profile_hits, cpu)[0]) {
 			page = alloc_pages_node(node,
-					GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+					GFP_KERNEL | __GFP_ZERO,
 					0);
 			if (!page)
 				goto out_free;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 3eca7a55f2e..a73ebd3b9d4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -386,6 +386,9 @@ int ptrace_request(struct task_struct *child, long request,
 	case PTRACE_SETSIGINFO:
 		ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
 		break;
+	case PTRACE_DETACH:	 /* detach a process that was attached. */
+		ret = ptrace_detach(child, data);
+		break;
 	default:
 		break;
 	}
@@ -450,6 +453,10 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
 	return child;
 }
 
+#ifndef arch_ptrace_attach
+#define arch_ptrace_attach(child)	do { } while (0)
+#endif
+
 #ifndef __ARCH_SYS_PTRACE
 asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
@@ -473,6 +480,12 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 
 	if (request == PTRACE_ATTACH) {
 		ret = ptrace_attach(child);
+		/*
+		 * Some architectures need to do book-keeping after
+		 * a ptrace attach.
+		 */
+		if (!ret)
+			arch_ptrace_attach(child);
 		goto out_put_task_struct;
 	}
 
diff --git a/kernel/resource.c b/kernel/resource.c
index 9bd14fd3e6d..a358142ff48 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL(release_resource);
  * the caller must specify res->start, res->end, res->flags.
  * If found, returns 0, res is overwritten, if not found, returns -1.
  */
-int find_next_system_ram(struct resource *res)
+static int find_next_system_ram(struct resource *res)
 {
 	resource_size_t start, end;
 	struct resource *p;
@@ -267,6 +267,30 @@ int find_next_system_ram(struct resource *res)
 		res->end = p->end;
 	return 0;
 }
+int
+walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
+			int (*func)(unsigned long, unsigned long, void *))
+{
+	struct resource res;
+	unsigned long pfn, len;
+	u64 orig_end;
+	int ret = -1;
+	res.start = (u64) start_pfn << PAGE_SHIFT;
+	res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
+	res.flags = IORESOURCE_MEM;
+	orig_end = res.end;
+	while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
+		pfn = (unsigned long)(res.start >> PAGE_SHIFT);
+		len = (unsigned long)((res.end + 1 - res.start) >> PAGE_SHIFT);
+		ret = (*func)(pfn, len, arg);
+		if (ret)
+			break;
+		res.start = res.end + 1;
+		res.end = orig_end;
+	}
+	return ret;
+}
+
 #endif
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index bba57adb950..0da2b2635c5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5869,7 +5869,7 @@ static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
 			     struct sched_group **sg)
 {
 	int group;
-	cpumask_t mask = cpu_sibling_map[cpu];
+	cpumask_t mask = per_cpu(cpu_sibling_map, cpu);
 	cpus_and(mask, mask, *cpu_map);
 	group = first_cpu(mask);
 	if (sg)
@@ -5898,7 +5898,7 @@ static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map,
 	cpus_and(mask, mask, *cpu_map);
 	group = first_cpu(mask);
 #elif defined(CONFIG_SCHED_SMT)
-	cpumask_t mask = cpu_sibling_map[cpu];
+	cpumask_t mask = per_cpu(cpu_sibling_map, cpu);
 	cpus_and(mask, mask, *cpu_map);
 	group = first_cpu(mask);
 #else
@@ -6132,7 +6132,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 		p = sd;
 		sd = &per_cpu(cpu_domains, i);
 		*sd = SD_SIBLING_INIT;
-		sd->span = cpu_sibling_map[i];
+		sd->span = per_cpu(cpu_sibling_map, i);
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
 		p->child = sd;
@@ -6143,7 +6143,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 #ifdef CONFIG_SCHED_SMT
 	/* Set up CPU (sibling) groups */
 	for_each_cpu_mask(i, *cpu_map) {
-		cpumask_t this_sibling_map = cpu_sibling_map[i];
+		cpumask_t this_sibling_map = per_cpu(cpu_sibling_map, i);
 		cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
 		if (i != first_cpu(this_sibling_map))
 			continue;
@@ -6348,35 +6348,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
 	arch_destroy_sched_domains(cpu_map);
 }
 
-/*
- * Partition sched domains as specified by the cpumasks below.
- * This attaches all cpus from the cpumasks to the NULL domain,
- * waits for a RCU quiescent period, recalculates sched
- * domain information and then attaches them back to the
- * correct sched domains
- * Call with hotplug lock held
- */
-int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
-{
-	cpumask_t change_map;
-	int err = 0;
-
-	cpus_and(*partition1, *partition1, cpu_online_map);
-	cpus_and(*partition2, *partition2, cpu_online_map);
-	cpus_or(change_map, *partition1, *partition2);
-
-	/* Detach sched domains from all of the affected cpus */
-	detach_destroy_domains(&change_map);
-	if (!cpus_empty(*partition1))
-		err = build_sched_domains(partition1);
-	if (!err && !cpus_empty(*partition2))
-		err = build_sched_domains(partition2);
-
-	register_sched_domain_sysctl();
-
-	return err;
-}
-
 #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
 static int arch_reinit_sched_domains(void)
 {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ec14aa8ac51..96efbb85999 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -880,6 +880,14 @@ static ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &hugetlb_treat_movable_handler,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "hugetlb_dynamic_pool",
+		.data		= &hugetlb_dynamic_pool,
+		.maxlen		= sizeof(hugetlb_dynamic_pool),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #endif
 	{
 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
diff --git a/kernel/time.c b/kernel/time.c
index 2289a8d6831..1afcc78dc3b 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -57,11 +57,7 @@ EXPORT_SYMBOL(sys_tz);
  */
 asmlinkage long sys_time(time_t __user * tloc)
 {
-	time_t i;
-	struct timespec tv;
-
-	getnstimeofday(&tv);
-	i = tv.tv_sec;
+	time_t i = get_seconds();
 
 	if (tloc) {
 		if (put_user(i,tloc))
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4ad79f6bdec..7e8983aecf8 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -49,19 +49,12 @@ struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
 static unsigned long total_sleep_time;		/* seconds */
 EXPORT_SYMBOL(xtime);
 
-
-#ifdef CONFIG_NO_HZ
 static struct timespec xtime_cache __attribute__ ((aligned (16)));
 static inline void update_xtime_cache(u64 nsec)
 {
 	xtime_cache = xtime;
 	timespec_add_ns(&xtime_cache, nsec);
 }
-#else
-#define xtime_cache xtime
-/* We do *not* want to evaluate the argument for this case */
-#define update_xtime_cache(n) do { } while (0)
-#endif
 
 static struct clocksource *clock; /* pointer to current clocksource */
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 396c38b3cb6..7d16e643330 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -413,6 +413,24 @@ config FORCED_INLINING
 	  become the default in the future, until then this option is there to
 	  test gcc for this.
 
+config BOOT_PRINTK_DELAY
+	bool "Delay each boot printk message by N milliseconds"
+	depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
+	help
+	  This build option allows you to read kernel boot messages
+	  by inserting a short delay after each one.  The delay is
+	  specified in milliseconds on the kernel command line,
+	  using "boot_delay=N".
+
+	  It is likely that you would also need to use "lpj=M" to preset
+	  the "loops per jiffie" value.
+	  See a previous boot log for the "lpj" value to use for your
+	  system, and then set "lpj=M" before setting "boot_delay=N".
+	  NOTE:  Using this option may adversely affect SMP systems.
+	  I.e., processors other than the first one may not boot up.
+	  BOOT_PRINTK_DELAY also may cause DETECT_SOFTLOCKUP to detect
+	  what it believes to be lockup conditions.
+
 config RCU_TORTURE_TEST
 	tristate "torture tests for RCU"
 	depends on DEBUG_KERNEL
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 514efb200be..6b26f9d3980 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -60,9 +60,14 @@ struct radix_tree_path {
 };
 
 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
-#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
+#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
+					  RADIX_TREE_MAP_SHIFT))
 
-static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
+/*
+ * The height_to_maxindex array needs to be one deeper than the maximum
+ * path as height 0 holds only 1 entry.
+ */
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
 
 /*
  * Radix tree node cache.
@@ -93,7 +98,8 @@ radix_tree_node_alloc(struct radix_tree_root *root)
 	struct radix_tree_node *ret;
 	gfp_t gfp_mask = root_gfp_mask(root);
 
-	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+	ret = kmem_cache_alloc(radix_tree_node_cachep,
+				set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
 	if (ret == NULL && !(gfp_mask & __GFP_WAIT)) {
 		struct radix_tree_preload *rtp;
 
@@ -104,7 +110,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
 			rtp->nr--;
 		}
 	}
-	BUG_ON(radix_tree_is_direct_ptr(ret));
+	BUG_ON(radix_tree_is_indirect_ptr(ret));
 	return ret;
 }
 
@@ -137,7 +143,8 @@ int radix_tree_preload(gfp_t gfp_mask)
 	rtp = &__get_cpu_var(radix_tree_preloads);
 	while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
 		preempt_enable();
-		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+		node = kmem_cache_alloc(radix_tree_node_cachep,
+				set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
 		if (node == NULL)
 			goto out;
 		preempt_disable();
@@ -240,7 +247,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 			return -ENOMEM;
 
 		/* Increase the height.  */
-		node->slots[0] = radix_tree_direct_to_ptr(root->rnode);
+		node->slots[0] = radix_tree_indirect_to_ptr(root->rnode);
 
 		/* Propagate the aggregated tag info into the new root */
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
@@ -251,6 +258,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 		newheight = root->height+1;
 		node->height = newheight;
 		node->count = 1;
+		node = radix_tree_ptr_to_indirect(node);
 		rcu_assign_pointer(root->rnode, node);
 		root->height = newheight;
 	} while (height > root->height);
@@ -274,7 +282,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 	int offset;
 	int error;
 
-	BUG_ON(radix_tree_is_direct_ptr(item));
+	BUG_ON(radix_tree_is_indirect_ptr(item));
 
 	/* Make sure the tree is high enough.  */
 	if (index > radix_tree_maxindex(root->height)) {
@@ -283,7 +291,8 @@ int radix_tree_insert(struct radix_tree_root *root,
 			return error;
 	}
 
-	slot = root->rnode;
+	slot = radix_tree_indirect_to_ptr(root->rnode);
+
 	height = root->height;
 	shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 
@@ -298,7 +307,8 @@ int radix_tree_insert(struct radix_tree_root *root,
 				rcu_assign_pointer(node->slots[offset], slot);
 				node->count++;
 			} else
-				rcu_assign_pointer(root->rnode, slot);
+				rcu_assign_pointer(root->rnode,
+					radix_tree_ptr_to_indirect(slot));
 		}
 
 		/* Go a level down */
@@ -318,7 +328,7 @@ int radix_tree_insert(struct radix_tree_root *root,
 		BUG_ON(tag_get(node, 0, offset));
 		BUG_ON(tag_get(node, 1, offset));
 	} else {
-		rcu_assign_pointer(root->rnode, radix_tree_ptr_to_direct(item));
+		rcu_assign_pointer(root->rnode, item);
 		BUG_ON(root_tag_get(root, 0));
 		BUG_ON(root_tag_get(root, 1));
 	}
@@ -350,11 +360,12 @@ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
 	if (node == NULL)
 		return NULL;
 
-	if (radix_tree_is_direct_ptr(node)) {
+	if (!radix_tree_is_indirect_ptr(node)) {
 		if (index > 0)
 			return NULL;
 		return (void **)&root->rnode;
 	}
+	node = radix_tree_indirect_to_ptr(node);
 
 	height = node->height;
 	if (index > radix_tree_maxindex(height))
@@ -398,11 +409,12 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
 	if (node == NULL)
 		return NULL;
 
-	if (radix_tree_is_direct_ptr(node)) {
+	if (!radix_tree_is_indirect_ptr(node)) {
 		if (index > 0)
 			return NULL;
-		return radix_tree_direct_to_ptr(node);
+		return node;
 	}
+	node = radix_tree_indirect_to_ptr(node);
 
 	height = node->height;
 	if (index > radix_tree_maxindex(height))
@@ -447,7 +459,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 	height = root->height;
 	BUG_ON(index > radix_tree_maxindex(height));
 
-	slot = root->rnode;
+	slot = radix_tree_indirect_to_ptr(root->rnode);
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 
 	while (height > 0) {
@@ -487,7 +499,11 @@ EXPORT_SYMBOL(radix_tree_tag_set);
 void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag)
 {
-	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+	/*
+	 * The radix tree path needs to be one longer than the maximum path
+	 * since the "list" is null terminated.
+	 */
+	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
 	struct radix_tree_node *slot = NULL;
 	unsigned int height, shift;
 
@@ -497,7 +513,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	pathp->node = NULL;
-	slot = root->rnode;
+	slot = radix_tree_indirect_to_ptr(root->rnode);
 
 	while (height > 0) {
 		int offset;
@@ -562,8 +578,9 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 	if (node == NULL)
 		return 0;
 
-	if (radix_tree_is_direct_ptr(node))
+	if (!radix_tree_is_indirect_ptr(node))
 		return (index == 0);
+	node = radix_tree_indirect_to_ptr(node);
 
 	height = node->height;
 	if (index > radix_tree_maxindex(height))
@@ -599,6 +616,42 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_tag_get);
 #endif
 
+/**
+ *	radix_tree_next_hole    -    find the next hole (not-present entry)
+ *	@root:		tree root
+ *	@index:		index key
+ *	@max_scan:	maximum range to search
+ *
+ *	Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
+ *	indexed hole.
+ *
+ *	Returns: the index of the hole if found, otherwise returns an index
+ *	outside of the set specified (in which case 'return - index >= max_scan'
+ *	will be true).
+ *
+ *	radix_tree_next_hole may be called under rcu_read_lock. However, like
+ *	radix_tree_gang_lookup, this will not atomically search a snapshot of the
+ *	tree at a single point in time. For example, if a hole is created at index
+ *	5, then subsequently a hole is created at index 10, radix_tree_next_hole
+ *	covering both indexes may return 10 if called under rcu_read_lock.
+ */
+unsigned long radix_tree_next_hole(struct radix_tree_root *root,
+				unsigned long index, unsigned long max_scan)
+{
+	unsigned long i;
+
+	for (i = 0; i < max_scan; i++) {
+		if (!radix_tree_lookup(root, index))
+			break;
+		index++;
+		if (index == 0)
+			break;
+	}
+
+	return index;
+}
+EXPORT_SYMBOL(radix_tree_next_hole);
+
 static unsigned int
 __lookup(struct radix_tree_node *slot, void **results, unsigned long index,
 	unsigned int max_items, unsigned long *next_index)
@@ -680,13 +733,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 	if (!node)
 		return 0;
 
-	if (radix_tree_is_direct_ptr(node)) {
+	if (!radix_tree_is_indirect_ptr(node)) {
 		if (first_index > 0)
 			return 0;
-		node = radix_tree_direct_to_ptr(node);
-		results[0] = rcu_dereference(node);
+		results[0] = node;
 		return 1;
 	}
+	node = radix_tree_indirect_to_ptr(node);
 
 	max_index = radix_tree_maxindex(node->height);
 
@@ -808,13 +861,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 	if (!node)
 		return 0;
 
-	if (radix_tree_is_direct_ptr(node)) {
+	if (!radix_tree_is_indirect_ptr(node)) {
 		if (first_index > 0)
 			return 0;
-		node = radix_tree_direct_to_ptr(node);
-		results[0] = rcu_dereference(node);
+		results[0] = node;
 		return 1;
 	}
+	node = radix_tree_indirect_to_ptr(node);
 
 	max_index = radix_tree_maxindex(node->height);
 
@@ -844,12 +897,22 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
 static inline void radix_tree_shrink(struct radix_tree_root *root)
 {
 	/* try to shrink tree height */
-	while (root->height > 0 &&
-			root->rnode->count == 1 &&
-			root->rnode->slots[0]) {
+	while (root->height > 0) {
 		struct radix_tree_node *to_free = root->rnode;
 		void *newptr;
 
+		BUG_ON(!radix_tree_is_indirect_ptr(to_free));
+		to_free = radix_tree_indirect_to_ptr(to_free);
+
+		/*
+		 * The candidate node has more than one child, or its child
+		 * is not at the leftmost slot, we cannot shrink.
+		 */
+		if (to_free->count != 1)
+			break;
+		if (!to_free->slots[0])
+			break;
+
 		/*
 		 * We don't need rcu_assign_pointer(), since we are simply
 		 * moving the node from one part of the tree to another. If
@@ -858,8 +921,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
 		 * one (root->rnode).
 		 */
 		newptr = to_free->slots[0];
-		if (root->height == 1)
-			newptr = radix_tree_ptr_to_direct(newptr);
+		if (root->height > 1)
+			newptr = radix_tree_ptr_to_indirect(newptr);
 		root->rnode = newptr;
 		root->height--;
 		/* must only free zeroed nodes into the slab */
@@ -882,7 +945,11 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
  */
 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 {
-	struct radix_tree_path path[RADIX_TREE_MAX_PATH], *pathp = path;
+	/*
+	 * The radix tree path needs to be one longer than the maximum path
+	 * since the "list" is null terminated.
+	 */
+	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
 	struct radix_tree_node *slot = NULL;
 	struct radix_tree_node *to_free;
 	unsigned int height, shift;
@@ -894,12 +961,12 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 		goto out;
 
 	slot = root->rnode;
-	if (height == 0 && root->rnode) {
-		slot = radix_tree_direct_to_ptr(slot);
+	if (height == 0) {
 		root_tag_clear_all(root);
 		root->rnode = NULL;
 		goto out;
 	}
+	slot = radix_tree_indirect_to_ptr(slot);
 
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	pathp->node = NULL;
@@ -941,7 +1008,8 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 			radix_tree_node_free(to_free);
 
 		if (pathp->node->count) {
-			if (pathp->node == root->rnode)
+			if (pathp->node ==
+					radix_tree_indirect_to_ptr(root->rnode))
 				radix_tree_shrink(root);
 			goto out;
 		}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 30c1400e749..c419ecf334c 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -677,16 +677,17 @@ swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
  * same here.
  */
 int
-swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	       int dir)
 {
+	struct scatterlist *sg;
 	void *addr;
 	dma_addr_t dev_addr;
 	int i;
 
 	BUG_ON(dir == DMA_NONE);
 
-	for (i = 0; i < nelems; i++, sg++) {
+	for_each_sg(sgl, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
 		dev_addr = virt_to_bus(addr);
 		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
@@ -696,7 +697,7 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
 				   to do proper error handling. */
 				swiotlb_full(hwdev, sg->length, dir, 0);
 				swiotlb_unmap_sg(hwdev, sg - i, i, dir);
-				sg[0].dma_length = 0;
+				sgl[0].dma_length = 0;
 				return 0;
 			}
 			sg->dma_address = virt_to_bus(map);
@@ -712,19 +713,21 @@ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
  * concerning calls here are the same as for swiotlb_unmap_single() above.
  */
 void
-swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
 		 int dir)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(dir == DMA_NONE);
 
-	for (i = 0; i < nelems; i++, sg++)
+	for_each_sg(sgl, sg, nelems, i) {
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
 			unmap_single(hwdev, bus_to_virt(sg->dma_address),
 				     sg->dma_length, dir);
 		else if (dir == DMA_FROM_DEVICE)
 			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+	}
 }
 
 /*
@@ -735,19 +738,21 @@ swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
  * and usage.
  */
 static void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg,
+swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
 		int nelems, int dir, int target)
 {
+	struct scatterlist *sg;
 	int i;
 
 	BUG_ON(dir == DMA_NONE);
 
-	for (i = 0; i < nelems; i++, sg++)
+	for_each_sg(sgl, sg, nelems, i) {
 		if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
 			sync_single(hwdev, bus_to_virt(sg->dma_address),
 				    sg->dma_length, dir, target);
 		else if (dir == DMA_FROM_DEVICE)
 			dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
+	}
 }
 
 void
diff --git a/mm/Kconfig b/mm/Kconfig
index a7609cbcb00..1cc6cada2bb 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -112,6 +112,19 @@ config SPARSEMEM_EXTREME
 	def_bool y
 	depends on SPARSEMEM && !SPARSEMEM_STATIC
 
+#
+# SPARSEMEM_VMEMMAP uses a virtually mapped mem_map to optimise pfn_to_page
+# and page_to_pfn.  The most efficient option where kernel virtual space is
+# not under pressure.
+#
+config SPARSEMEM_VMEMMAP_ENABLE
+	def_bool n
+
+config SPARSEMEM_VMEMMAP
+	bool
+	depends on SPARSEMEM
+	default y if (SPARSEMEM_VMEMMAP_ENABLE)
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
@@ -126,6 +139,11 @@ config MEMORY_HOTPLUG_SPARSE
 	def_bool y
 	depends on SPARSEMEM && MEMORY_HOTPLUG
 
+config MEMORY_HOTREMOVE
+	bool "Allow for memory hot remove"
+	depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
+	depends on MIGRATION
+
 # Heavily threaded applications may benefit from splitting the mm-wide
 # page_table_lock, so that faults on different parts of the user address
 # space can be handled with less contention: split it at this NR_CPUS.
diff --git a/mm/Makefile b/mm/Makefile
index 245e33ab00c..5c0b0ea7572 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,13 +11,14 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   $(mmu-y)
+			   page_isolation.o $(mmu-y)
 
 obj-$(CONFIG_BOUNCE)	+= bounce.o
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HUGETLBFS)	+= hugetlb.o
 obj-$(CONFIG_NUMA) 	+= mempolicy.o
 obj-$(CONFIG_SPARSEMEM)	+= sparse.o
+obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
diff --git a/mm/bounce.c b/mm/bounce.c
index 3b549bf31f7..b6d2d0f1019 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -265,6 +265,12 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
 	mempool_t *pool;
 
 	/*
+	 * Data-less bio, nothing to bounce
+	 */
+	if (bio_empty_barrier(*bio_orig))
+		return;
+
+	/*
 	 * for non-isa bounce case, just check if the bounce pfn is equal
 	 * to or bigger than the highest pfn in the system -- in that case,
 	 * don't waste time iterating over bio segments
diff --git a/mm/filemap.c b/mm/filemap.c
index 15c8413ee92..c6049e947cd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -30,7 +30,7 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/cpuset.h>
-#include "filemap.h"
+#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
 #include "internal.h"
 
 /*
@@ -593,7 +593,7 @@ void fastcall __lock_page_nosync(struct page *page)
  * Is there a pagecache struct page at the given (mapping, offset) tuple?
  * If yes, increment its refcount and return it; if no, return NULL.
  */
-struct page * find_get_page(struct address_space *mapping, unsigned long offset)
+struct page * find_get_page(struct address_space *mapping, pgoff_t offset)
 {
 	struct page *page;
 
@@ -617,30 +617,31 @@ EXPORT_SYMBOL(find_get_page);
  * Returns zero if the page was not present. find_lock_page() may sleep.
  */
 struct page *find_lock_page(struct address_space *mapping,
-				unsigned long offset)
+				pgoff_t offset)
 {
 	struct page *page;
 
-	read_lock_irq(&mapping->tree_lock);
 repeat:
+	read_lock_irq(&mapping->tree_lock);
 	page = radix_tree_lookup(&mapping->page_tree, offset);
 	if (page) {
 		page_cache_get(page);
 		if (TestSetPageLocked(page)) {
 			read_unlock_irq(&mapping->tree_lock);
 			__lock_page(page);
-			read_lock_irq(&mapping->tree_lock);
 
 			/* Has the page been truncated while we slept? */
-			if (unlikely(page->mapping != mapping ||
-				     page->index != offset)) {
+			if (unlikely(page->mapping != mapping)) {
 				unlock_page(page);
 				page_cache_release(page);
 				goto repeat;
 			}
+			VM_BUG_ON(page->index != offset);
+			goto out;
 		}
 	}
 	read_unlock_irq(&mapping->tree_lock);
+out:
 	return page;
 }
 EXPORT_SYMBOL(find_lock_page);
@@ -663,29 +664,24 @@ EXPORT_SYMBOL(find_lock_page);
  * memory exhaustion.
  */
 struct page *find_or_create_page(struct address_space *mapping,
-		unsigned long index, gfp_t gfp_mask)
+		pgoff_t index, gfp_t gfp_mask)
 {
-	struct page *page, *cached_page = NULL;
+	struct page *page;
 	int err;
 repeat:
 	page = find_lock_page(mapping, index);
 	if (!page) {
-		if (!cached_page) {
-			cached_page =
-				__page_cache_alloc(gfp_mask);
-			if (!cached_page)
-				return NULL;
+		page = __page_cache_alloc(gfp_mask);
+		if (!page)
+			return NULL;
+		err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
+		if (unlikely(err)) {
+			page_cache_release(page);
+			page = NULL;
+			if (err == -EEXIST)
+				goto repeat;
 		}
-		err = add_to_page_cache_lru(cached_page, mapping,
-					index, gfp_mask);
-		if (!err) {
-			page = cached_page;
-			cached_page = NULL;
-		} else if (err == -EEXIST)
-			goto repeat;
 	}
-	if (cached_page)
-		page_cache_release(cached_page);
 	return page;
 }
 EXPORT_SYMBOL(find_or_create_page);
@@ -797,7 +793,7 @@ EXPORT_SYMBOL(find_get_pages_tag);
  * and deadlock against the caller's locked page.
  */
 struct page *
-grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
+grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
 {
 	struct page *page = find_get_page(mapping, index);
 
@@ -859,34 +855,29 @@ static void shrink_readahead_size_eio(struct file *filp,
  * It may be NULL.
  */
 void do_generic_mapping_read(struct address_space *mapping,
-			     struct file_ra_state *_ra,
+			     struct file_ra_state *ra,
 			     struct file *filp,
 			     loff_t *ppos,
 			     read_descriptor_t *desc,
 			     read_actor_t actor)
 {
 	struct inode *inode = mapping->host;
-	unsigned long index;
-	unsigned long offset;
-	unsigned long last_index;
-	unsigned long next_index;
-	unsigned long prev_index;
+	pgoff_t index;
+	pgoff_t last_index;
+	pgoff_t prev_index;
+	unsigned long offset;      /* offset into pagecache page */
 	unsigned int prev_offset;
-	struct page *cached_page;
 	int error;
-	struct file_ra_state ra = *_ra;
 
-	cached_page = NULL;
 	index = *ppos >> PAGE_CACHE_SHIFT;
-	next_index = index;
-	prev_index = ra.prev_index;
-	prev_offset = ra.prev_offset;
+	prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
+	prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
 	last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 
 	for (;;) {
 		struct page *page;
-		unsigned long end_index;
+		pgoff_t end_index;
 		loff_t isize;
 		unsigned long nr, ret;
 
@@ -895,7 +886,7 @@ find_page:
 		page = find_get_page(mapping, index);
 		if (!page) {
 			page_cache_sync_readahead(mapping,
-					&ra, filp,
+					ra, filp,
 					index, last_index - index);
 			page = find_get_page(mapping, index);
 			if (unlikely(page == NULL))
@@ -903,7 +894,7 @@ find_page:
 		}
 		if (PageReadahead(page)) {
 			page_cache_async_readahead(mapping,
-					&ra, filp, page,
+					ra, filp, page,
 					index, last_index - index);
 		}
 		if (!PageUptodate(page))
@@ -966,7 +957,6 @@ page_ok:
 		index += offset >> PAGE_CACHE_SHIFT;
 		offset &= ~PAGE_CACHE_MASK;
 		prev_offset = offset;
-		ra.prev_offset = offset;
 
 		page_cache_release(page);
 		if (ret == nr && desc->count)
@@ -1015,7 +1005,7 @@ readpage:
 				}
 				unlock_page(page);
 				error = -EIO;
-				shrink_readahead_size_eio(filp, &ra);
+				shrink_readahead_size_eio(filp, ra);
 				goto readpage_error;
 			}
 			unlock_page(page);
@@ -1034,33 +1024,29 @@ no_cached_page:
 		 * Ok, it wasn't cached, so we need to create a new
 		 * page..
 		 */
-		if (!cached_page) {
-			cached_page = page_cache_alloc_cold(mapping);
-			if (!cached_page) {
-				desc->error = -ENOMEM;
-				goto out;
-			}
+		page = page_cache_alloc_cold(mapping);
+		if (!page) {
+			desc->error = -ENOMEM;
+			goto out;
 		}
-		error = add_to_page_cache_lru(cached_page, mapping,
+		error = add_to_page_cache_lru(page, mapping,
 						index, GFP_KERNEL);
 		if (error) {
+			page_cache_release(page);
 			if (error == -EEXIST)
 				goto find_page;
 			desc->error = error;
 			goto out;
 		}
-		page = cached_page;
-		cached_page = NULL;
 		goto readpage;
 	}
 
 out:
-	*_ra = ra;
-	_ra->prev_index = prev_index;
+	ra->prev_pos = prev_index;
+	ra->prev_pos <<= PAGE_CACHE_SHIFT;
+	ra->prev_pos |= prev_offset;
 
-	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
-	if (cached_page)
-		page_cache_release(cached_page);
+	*ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
 	if (filp)
 		file_accessed(filp);
 }
@@ -1220,7 +1206,7 @@ EXPORT_SYMBOL(generic_file_aio_read);
 
 static ssize_t
 do_readahead(struct address_space *mapping, struct file *filp,
-	     unsigned long index, unsigned long nr)
+	     pgoff_t index, unsigned long nr)
 {
 	if (!mapping || !mapping->a_ops || !mapping->a_ops->readpage)
 		return -EINVAL;
@@ -1240,8 +1226,8 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
 	if (file) {
 		if (file->f_mode & FMODE_READ) {
 			struct address_space *mapping = file->f_mapping;
-			unsigned long start = offset >> PAGE_CACHE_SHIFT;
-			unsigned long end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
+			pgoff_t start = offset >> PAGE_CACHE_SHIFT;
+			pgoff_t end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
 			unsigned long len = end - start + 1;
 			ret = do_readahead(mapping, file, start, len);
 		}
@@ -1251,7 +1237,6 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
 }
 
 #ifdef CONFIG_MMU
-static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
 /**
  * page_cache_read - adds requested page to the page cache if not already there
  * @file:	file to read
@@ -1260,7 +1245,7 @@ static int FASTCALL(page_cache_read(struct file * file, unsigned long offset));
  * This adds the requested page to the page cache if it isn't already there,
  * and schedules an I/O to read in its contents from disk.
  */
-static int fastcall page_cache_read(struct file * file, unsigned long offset)
+static int fastcall page_cache_read(struct file * file, pgoff_t offset)
 {
 	struct address_space *mapping = file->f_mapping;
 	struct page *page; 
@@ -1349,7 +1334,7 @@ retry_find:
 		 * Do we miss much more than hit in this file? If so,
 		 * stop bothering with read-ahead. It will only hurt.
 		 */
-		if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
+		if (ra->mmap_miss > MMAP_LOTSAMISS)
 			goto no_cached_page;
 
 		/*
@@ -1375,7 +1360,7 @@ retry_find:
 	}
 
 	if (!did_readaround)
-		ra->mmap_hit++;
+		ra->mmap_miss--;
 
 	/*
 	 * We have a locked page in the page cache, now we need to check
@@ -1396,7 +1381,7 @@ retry_find:
 	 * Found the page and have a reference on it.
 	 */
 	mark_page_accessed(page);
-	ra->prev_index = page->index;
+	ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
 	vmf->page = page;
 	return ret | VM_FAULT_LOCKED;
 
@@ -1501,39 +1486,32 @@ EXPORT_SYMBOL(generic_file_mmap);
 EXPORT_SYMBOL(generic_file_readonly_mmap);
 
 static struct page *__read_cache_page(struct address_space *mapping,
-				unsigned long index,
+				pgoff_t index,
 				int (*filler)(void *,struct page*),
 				void *data)
 {
-	struct page *page, *cached_page = NULL;
+	struct page *page;
 	int err;
 repeat:
 	page = find_get_page(mapping, index);
 	if (!page) {
-		if (!cached_page) {
-			cached_page = page_cache_alloc_cold(mapping);
-			if (!cached_page)
-				return ERR_PTR(-ENOMEM);
-		}
-		err = add_to_page_cache_lru(cached_page, mapping,
-					index, GFP_KERNEL);
-		if (err == -EEXIST)
-			goto repeat;
-		if (err < 0) {
+		page = page_cache_alloc_cold(mapping);
+		if (!page)
+			return ERR_PTR(-ENOMEM);
+		err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+		if (unlikely(err)) {
+			page_cache_release(page);
+			if (err == -EEXIST)
+				goto repeat;
 			/* Presumably ENOMEM for radix tree node */
-			page_cache_release(cached_page);
 			return ERR_PTR(err);
 		}
-		page = cached_page;
-		cached_page = NULL;
 		err = filler(data, page);
 		if (err < 0) {
 			page_cache_release(page);
 			page = ERR_PTR(err);
 		}
 	}
-	if (cached_page)
-		page_cache_release(cached_page);
 	return page;
 }
 
@@ -1542,7 +1520,7 @@ repeat:
  * after submitting it to the filler.
  */
 struct page *read_cache_page_async(struct address_space *mapping,
-				unsigned long index,
+				pgoff_t index,
 				int (*filler)(void *,struct page*),
 				void *data)
 {
@@ -1590,7 +1568,7 @@ EXPORT_SYMBOL(read_cache_page_async);
  * If the page does not get brought uptodate, return -EIO.
  */
 struct page *read_cache_page(struct address_space *mapping,
-				unsigned long index,
+				pgoff_t index,
 				int (*filler)(void *,struct page*),
 				void *data)
 {
@@ -1610,40 +1588,6 @@ struct page *read_cache_page(struct address_space *mapping,
 EXPORT_SYMBOL(read_cache_page);
 
 /*
- * If the page was newly created, increment its refcount and add it to the
- * caller's lru-buffering pagevec.  This function is specifically for
- * generic_file_write().
- */
-static inline struct page *
-__grab_cache_page(struct address_space *mapping, unsigned long index,
-			struct page **cached_page, struct pagevec *lru_pvec)
-{
-	int err;
-	struct page *page;
-repeat:
-	page = find_lock_page(mapping, index);
-	if (!page) {
-		if (!*cached_page) {
-			*cached_page = page_cache_alloc(mapping);
-			if (!*cached_page)
-				return NULL;
-		}
-		err = add_to_page_cache(*cached_page, mapping,
-					index, GFP_KERNEL);
-		if (err == -EEXIST)
-			goto repeat;
-		if (err == 0) {
-			page = *cached_page;
-			page_cache_get(page);
-			if (!pagevec_add(lru_pvec, page))
-				__pagevec_lru_add(lru_pvec);
-			*cached_page = NULL;
-		}
-	}
-	return page;
-}
-
-/*
  * The logic we want is
  *
  *	if suid or (sgid and xgrp)
@@ -1691,8 +1635,7 @@ int remove_suid(struct dentry *dentry)
 }
 EXPORT_SYMBOL(remove_suid);
 
-size_t
-__filemap_copy_from_user_iovec_inatomic(char *vaddr,
+static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
 {
 	size_t copied = 0, left = 0;
@@ -1715,6 +1658,124 @@ __filemap_copy_from_user_iovec_inatomic(char *vaddr,
 }
 
 /*
+ * Copy as much as we can into the page and return the number of bytes which
+ * were sucessfully copied.  If a fault is encountered then return the number of
+ * bytes which were copied.
+ */
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	char *kaddr;
+	size_t copied;
+
+	BUG_ON(!in_atomic());
+	kaddr = kmap_atomic(page, KM_USER0);
+	if (likely(i->nr_segs == 1)) {
+		int left;
+		char __user *buf = i->iov->iov_base + i->iov_offset;
+		left = __copy_from_user_inatomic_nocache(kaddr + offset,
+							buf, bytes);
+		copied = bytes - left;
+	} else {
+		copied = __iovec_copy_from_user_inatomic(kaddr + offset,
+						i->iov, i->iov_offset, bytes);
+	}
+	kunmap_atomic(kaddr, KM_USER0);
+
+	return copied;
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+/*
+ * This has the same sideeffects and return value as
+ * iov_iter_copy_from_user_atomic().
+ * The difference is that it attempts to resolve faults.
+ * Page must not be locked.
+ */
+size_t iov_iter_copy_from_user(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	char *kaddr;
+	size_t copied;
+
+	kaddr = kmap(page);
+	if (likely(i->nr_segs == 1)) {
+		int left;
+		char __user *buf = i->iov->iov_base + i->iov_offset;
+		left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
+		copied = bytes - left;
+	} else {
+		copied = __iovec_copy_from_user_inatomic(kaddr + offset,
+						i->iov, i->iov_offset, bytes);
+	}
+	kunmap(page);
+	return copied;
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user);
+
+static void __iov_iter_advance_iov(struct iov_iter *i, size_t bytes)
+{
+	if (likely(i->nr_segs == 1)) {
+		i->iov_offset += bytes;
+	} else {
+		const struct iovec *iov = i->iov;
+		size_t base = i->iov_offset;
+
+		while (bytes) {
+			int copy = min(bytes, iov->iov_len - base);
+
+			bytes -= copy;
+			base += copy;
+			if (iov->iov_len == base) {
+				iov++;
+				base = 0;
+			}
+		}
+		i->iov = iov;
+		i->iov_offset = base;
+	}
+}
+
+void iov_iter_advance(struct iov_iter *i, size_t bytes)
+{
+	BUG_ON(i->count < bytes);
+
+	__iov_iter_advance_iov(i, bytes);
+	i->count -= bytes;
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
+/*
+ * Fault in the first iovec of the given iov_iter, to a maximum length
+ * of bytes. Returns 0 on success, or non-zero if the memory could not be
+ * accessed (ie. because it is an invalid address).
+ *
+ * writev-intensive code may want this to prefault several iovecs -- that
+ * would be possible (callers must not rely on the fact that _only_ the
+ * first iovec will be faulted with the current implementation).
+ */
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+{
+	char __user *buf = i->iov->iov_base + i->iov_offset;
+	bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+	return fault_in_pages_readable(buf, bytes);
+}
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
+
+/*
+ * Return the count of just the current iov_iter segment.
+ */
+size_t iov_iter_single_seg_count(struct iov_iter *i)
+{
+	const struct iovec *iov = i->iov;
+	if (i->nr_segs == 1)
+		return i->count;
+	else
+		return min(i->count, iov->iov_len - i->iov_offset);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+/*
  * Performs necessary checks before doing a write
  *
  * Can adjust writing position or amount of bytes to write.
@@ -1796,6 +1857,91 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
 }
 EXPORT_SYMBOL(generic_write_checks);
 
+int pagecache_write_begin(struct file *file, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata)
+{
+	const struct address_space_operations *aops = mapping->a_ops;
+
+	if (aops->write_begin) {
+		return aops->write_begin(file, mapping, pos, len, flags,
+							pagep, fsdata);
+	} else {
+		int ret;
+		pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+		unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+		struct inode *inode = mapping->host;
+		struct page *page;
+again:
+		page = __grab_cache_page(mapping, index);
+		*pagep = page;
+		if (!page)
+			return -ENOMEM;
+
+		if (flags & AOP_FLAG_UNINTERRUPTIBLE && !PageUptodate(page)) {
+			/*
+			 * There is no way to resolve a short write situation
+			 * for a !Uptodate page (except by double copying in
+			 * the caller done by generic_perform_write_2copy).
+			 *
+			 * Instead, we have to bring it uptodate here.
+			 */
+			ret = aops->readpage(file, page);
+			page_cache_release(page);
+			if (ret) {
+				if (ret == AOP_TRUNCATED_PAGE)
+					goto again;
+				return ret;
+			}
+			goto again;
+		}
+
+		ret = aops->prepare_write(file, page, offset, offset+len);
+		if (ret) {
+			unlock_page(page);
+			page_cache_release(page);
+			if (pos + len > inode->i_size)
+				vmtruncate(inode, inode->i_size);
+		}
+		return ret;
+	}
+}
+EXPORT_SYMBOL(pagecache_write_begin);
+
+int pagecache_write_end(struct file *file, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata)
+{
+	const struct address_space_operations *aops = mapping->a_ops;
+	int ret;
+
+	if (aops->write_end) {
+		mark_page_accessed(page);
+		ret = aops->write_end(file, mapping, pos, len, copied,
+							page, fsdata);
+	} else {
+		unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+		struct inode *inode = mapping->host;
+
+		flush_dcache_page(page);
+		ret = aops->commit_write(file, page, offset, offset+len);
+		unlock_page(page);
+		mark_page_accessed(page);
+		page_cache_release(page);
+
+		if (ret < 0) {
+			if (pos + len > inode->i_size)
+				vmtruncate(inode, inode->i_size);
+		} else if (ret > 0)
+			ret = min_t(size_t, copied, ret);
+		else
+			ret = copied;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(pagecache_write_end);
+
 ssize_t
 generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long *nr_segs, loff_t pos, loff_t *ppos,
@@ -1835,151 +1981,314 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 }
 EXPORT_SYMBOL(generic_file_direct_write);
 
-ssize_t
-generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos, loff_t *ppos,
-		size_t count, ssize_t written)
+/*
+ * Find or create a page at the given pagecache position. Return the locked
+ * page. This function is specifically for buffered writes.
+ */
+struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
 {
-	struct file *file = iocb->ki_filp;
-	struct address_space * mapping = file->f_mapping;
-	const struct address_space_operations *a_ops = mapping->a_ops;
-	struct inode 	*inode = mapping->host;
-	long		status = 0;
-	struct page	*page;
-	struct page	*cached_page = NULL;
-	size_t		bytes;
-	struct pagevec	lru_pvec;
-	const struct iovec *cur_iov = iov; /* current iovec */
-	size_t		iov_base = 0;	   /* offset in the current iovec */
-	char __user	*buf;
-
-	pagevec_init(&lru_pvec, 0);
+	int status;
+	struct page *page;
+repeat:
+	page = find_lock_page(mapping, index);
+	if (likely(page))
+		return page;
 
-	/*
-	 * handle partial DIO write.  Adjust cur_iov if needed.
-	 */
-	if (likely(nr_segs == 1))
-		buf = iov->iov_base + written;
-	else {
-		filemap_set_next_iovec(&cur_iov, &iov_base, written);
-		buf = cur_iov->iov_base + iov_base;
+	page = page_cache_alloc(mapping);
+	if (!page)
+		return NULL;
+	status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+	if (unlikely(status)) {
+		page_cache_release(page);
+		if (status == -EEXIST)
+			goto repeat;
+		return NULL;
 	}
+	return page;
+}
+EXPORT_SYMBOL(__grab_cache_page);
+
+static ssize_t generic_perform_write_2copy(struct file *file,
+				struct iov_iter *i, loff_t pos)
+{
+	struct address_space *mapping = file->f_mapping;
+	const struct address_space_operations *a_ops = mapping->a_ops;
+	struct inode *inode = mapping->host;
+	long status = 0;
+	ssize_t written = 0;
 
 	do {
-		unsigned long index;
-		unsigned long offset;
-		size_t copied;
+		struct page *src_page;
+		struct page *page;
+		pgoff_t index;		/* Pagecache index for current page */
+		unsigned long offset;	/* Offset into pagecache page */
+		unsigned long bytes;	/* Bytes to write to page */
+		size_t copied;		/* Bytes copied from user */
 
-		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
+		offset = (pos & (PAGE_CACHE_SIZE - 1));
 		index = pos >> PAGE_CACHE_SHIFT;
-		bytes = PAGE_CACHE_SIZE - offset;
-
-		/* Limit the size of the copy to the caller's write size */
-		bytes = min(bytes, count);
+		bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+						iov_iter_count(i));
 
-		/* We only need to worry about prefaulting when writes are from
-		 * user-space.  NFSd uses vfs_writev with several non-aligned
-		 * segments in the vector, and limiting to one segment a time is
-		 * a noticeable performance for re-write
+		/*
+		 * a non-NULL src_page indicates that we're doing the
+		 * copy via get_user_pages and kmap.
 		 */
-		if (!segment_eq(get_fs(), KERNEL_DS)) {
-			/*
-			 * Limit the size of the copy to that of the current
-			 * segment, because fault_in_pages_readable() doesn't
-			 * know how to walk segments.
-			 */
-			bytes = min(bytes, cur_iov->iov_len - iov_base);
+		src_page = NULL;
 
-			/*
-			 * Bring in the user page that we will copy from
-			 * _first_.  Otherwise there's a nasty deadlock on
-			 * copying from the same page as we're writing to,
-			 * without it being marked up-to-date.
-			 */
-			fault_in_pages_readable(buf, bytes);
+		/*
+		 * Bring in the user page that we will copy from _first_.
+		 * Otherwise there's a nasty deadlock on copying from the
+		 * same page as we're writing to, without it being marked
+		 * up-to-date.
+		 *
+		 * Not only is this an optimisation, but it is also required
+		 * to check that the address is actually valid, when atomic
+		 * usercopies are used, below.
+		 */
+		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+			status = -EFAULT;
+			break;
 		}
-		page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
+
+		page = __grab_cache_page(mapping, index);
 		if (!page) {
 			status = -ENOMEM;
 			break;
 		}
 
-		if (unlikely(bytes == 0)) {
-			status = 0;
-			copied = 0;
-			goto zero_length_segment;
-		}
+		/*
+		 * non-uptodate pages cannot cope with short copies, and we
+		 * cannot take a pagefault with the destination page locked.
+		 * So pin the source page to copy it.
+		 */
+		if (!PageUptodate(page) && !segment_eq(get_fs(), KERNEL_DS)) {
+			unlock_page(page);
 
-		status = a_ops->prepare_write(file, page, offset, offset+bytes);
-		if (unlikely(status)) {
-			loff_t isize = i_size_read(inode);
+			src_page = alloc_page(GFP_KERNEL);
+			if (!src_page) {
+				page_cache_release(page);
+				status = -ENOMEM;
+				break;
+			}
+
+			/*
+			 * Cannot get_user_pages with a page locked for the
+			 * same reason as we can't take a page fault with a
+			 * page locked (as explained below).
+			 */
+			copied = iov_iter_copy_from_user(src_page, i,
+								offset, bytes);
+			if (unlikely(copied == 0)) {
+				status = -EFAULT;
+				page_cache_release(page);
+				page_cache_release(src_page);
+				break;
+			}
+			bytes = copied;
 
-			if (status != AOP_TRUNCATED_PAGE)
+			lock_page(page);
+			/*
+			 * Can't handle the page going uptodate here, because
+			 * that means we would use non-atomic usercopies, which
+			 * zero out the tail of the page, which can cause
+			 * zeroes to become transiently visible. We could just
+			 * use a non-zeroing copy, but the APIs aren't too
+			 * consistent.
+			 */
+			if (unlikely(!page->mapping || PageUptodate(page))) {
 				unlock_page(page);
-			page_cache_release(page);
-			if (status == AOP_TRUNCATED_PAGE)
+				page_cache_release(page);
+				page_cache_release(src_page);
 				continue;
+			}
+		}
+
+		status = a_ops->prepare_write(file, page, offset, offset+bytes);
+		if (unlikely(status))
+			goto fs_write_aop_error;
+
+		if (!src_page) {
 			/*
-			 * prepare_write() may have instantiated a few blocks
-			 * outside i_size.  Trim these off again.
+			 * Must not enter the pagefault handler here, because
+			 * we hold the page lock, so we might recursively
+			 * deadlock on the same lock, or get an ABBA deadlock
+			 * against a different lock, or against the mmap_sem
+			 * (which nests outside the page lock).  So increment
+			 * preempt count, and use _atomic usercopies.
+			 *
+			 * The page is uptodate so we are OK to encounter a
+			 * short copy: if unmodified parts of the page are
+			 * marked dirty and written out to disk, it doesn't
+			 * really matter.
 			 */
-			if (pos + bytes > isize)
-				vmtruncate(inode, isize);
-			break;
+			pagefault_disable();
+			copied = iov_iter_copy_from_user_atomic(page, i,
+								offset, bytes);
+			pagefault_enable();
+		} else {
+			void *src, *dst;
+			src = kmap_atomic(src_page, KM_USER0);
+			dst = kmap_atomic(page, KM_USER1);
+			memcpy(dst + offset, src + offset, bytes);
+			kunmap_atomic(dst, KM_USER1);
+			kunmap_atomic(src, KM_USER0);
+			copied = bytes;
 		}
-		if (likely(nr_segs == 1))
-			copied = filemap_copy_from_user(page, offset,
-							buf, bytes);
-		else
-			copied = filemap_copy_from_user_iovec(page, offset,
-						cur_iov, iov_base, bytes);
 		flush_dcache_page(page);
+
 		status = a_ops->commit_write(file, page, offset, offset+bytes);
-		if (status == AOP_TRUNCATED_PAGE) {
-			page_cache_release(page);
-			continue;
-		}
-zero_length_segment:
-		if (likely(copied >= 0)) {
-			if (!status)
-				status = copied;
-
-			if (status >= 0) {
-				written += status;
-				count -= status;
-				pos += status;
-				buf += status;
-				if (unlikely(nr_segs > 1)) {
-					filemap_set_next_iovec(&cur_iov,
-							&iov_base, status);
-					if (count)
-						buf = cur_iov->iov_base +
-							iov_base;
-				} else {
-					iov_base += status;
-				}
-			}
-		}
-		if (unlikely(copied != bytes))
-			if (status >= 0)
-				status = -EFAULT;
+		if (unlikely(status < 0))
+			goto fs_write_aop_error;
+		if (unlikely(status > 0)) /* filesystem did partial write */
+			copied = min_t(size_t, copied, status);
+
 		unlock_page(page);
 		mark_page_accessed(page);
 		page_cache_release(page);
-		if (status < 0)
-			break;
+		if (src_page)
+			page_cache_release(src_page);
+
+		iov_iter_advance(i, copied);
+		pos += copied;
+		written += copied;
+
 		balance_dirty_pages_ratelimited(mapping);
 		cond_resched();
-	} while (count);
-	*ppos = pos;
+		continue;
 
-	if (cached_page)
-		page_cache_release(cached_page);
+fs_write_aop_error:
+		unlock_page(page);
+		page_cache_release(page);
+		if (src_page)
+			page_cache_release(src_page);
+
+		/*
+		 * prepare_write() may have instantiated a few blocks
+		 * outside i_size.  Trim these off again. Don't need
+		 * i_size_read because we hold i_mutex.
+		 */
+		if (pos + bytes > inode->i_size)
+			vmtruncate(inode, inode->i_size);
+		break;
+	} while (iov_iter_count(i));
+
+	return written ? written : status;
+}
+
+static ssize_t generic_perform_write(struct file *file,
+				struct iov_iter *i, loff_t pos)
+{
+	struct address_space *mapping = file->f_mapping;
+	const struct address_space_operations *a_ops = mapping->a_ops;
+	long status = 0;
+	ssize_t written = 0;
+	unsigned int flags = 0;
 
 	/*
-	 * For now, when the user asks for O_SYNC, we'll actually give O_DSYNC
+	 * Copies from kernel address space cannot fail (NFSD is a big user).
 	 */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		flags |= AOP_FLAG_UNINTERRUPTIBLE;
+
+	do {
+		struct page *page;
+		pgoff_t index;		/* Pagecache index for current page */
+		unsigned long offset;	/* Offset into pagecache page */
+		unsigned long bytes;	/* Bytes to write to page */
+		size_t copied;		/* Bytes copied from user */
+		void *fsdata;
+
+		offset = (pos & (PAGE_CACHE_SIZE - 1));
+		index = pos >> PAGE_CACHE_SHIFT;
+		bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+						iov_iter_count(i));
+
+again:
+
+		/*
+		 * Bring in the user page that we will copy from _first_.
+		 * Otherwise there's a nasty deadlock on copying from the
+		 * same page as we're writing to, without it being marked
+		 * up-to-date.
+		 *
+		 * Not only is this an optimisation, but it is also required
+		 * to check that the address is actually valid, when atomic
+		 * usercopies are used, below.
+		 */
+		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+			status = -EFAULT;
+			break;
+		}
+
+		status = a_ops->write_begin(file, mapping, pos, bytes, flags,
+						&page, &fsdata);
+		if (unlikely(status))
+			break;
+
+		pagefault_disable();
+		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+		pagefault_enable();
+		flush_dcache_page(page);
+
+		status = a_ops->write_end(file, mapping, pos, bytes, copied,
+						page, fsdata);
+		if (unlikely(status < 0))
+			break;
+		copied = status;
+
+		cond_resched();
+
+		if (unlikely(copied == 0)) {
+			/*
+			 * If we were unable to copy any data at all, we must
+			 * fall back to a single segment length write.
+			 *
+			 * If we didn't fallback here, we could livelock
+			 * because not all segments in the iov can be copied at
+			 * once without a pagefault.
+			 */
+			bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
+						iov_iter_single_seg_count(i));
+			goto again;
+		}
+		iov_iter_advance(i, copied);
+		pos += copied;
+		written += copied;
+
+		balance_dirty_pages_ratelimited(mapping);
+
+	} while (iov_iter_count(i));
+
+	return written ? written : status;
+}
+
+ssize_t
+generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos, loff_t *ppos,
+		size_t count, ssize_t written)
+{
+	struct file *file = iocb->ki_filp;
+	struct address_space *mapping = file->f_mapping;
+	const struct address_space_operations *a_ops = mapping->a_ops;
+	struct inode *inode = mapping->host;
+	ssize_t status;
+	struct iov_iter i;
+
+	iov_iter_init(&i, iov, nr_segs, count, written);
+	if (a_ops->write_begin)
+		status = generic_perform_write(file, &i, pos);
+	else
+		status = generic_perform_write_2copy(file, &i, pos);
+
 	if (likely(status >= 0)) {
+		written += status;
+		*ppos = pos + status;
+
+		/*
+		 * For now, when the user asks for O_SYNC, we'll actually give
+		 * O_DSYNC
+		 */
 		if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 			if (!a_ops->writepage || !is_sync_kiocb(iocb))
 				status = generic_osync_inode(inode, mapping,
@@ -1995,7 +2304,6 @@ zero_length_segment:
 	if (unlikely(file->f_flags & O_DIRECT) && written)
 		status = filemap_write_and_wait(mapping);
 
-	pagevec_lru_add(&lru_pvec);
 	return written ? written : status;
 }
 EXPORT_SYMBOL(generic_file_buffered_write);
diff --git a/mm/filemap.h b/mm/filemap.h
deleted file mode 100644
index c2bff04c84e..00000000000
--- a/mm/filemap.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- *	linux/mm/filemap.h
- *
- * Copyright (C) 1994-1999  Linus Torvalds
- */
-
-#ifndef __FILEMAP_H
-#define __FILEMAP_H
-
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/uio.h>
-#include <linux/uaccess.h>
-
-size_t
-__filemap_copy_from_user_iovec_inatomic(char *vaddr,
-					const struct iovec *iov,
-					size_t base,
-					size_t bytes);
-
-/*
- * Copy as much as we can into the page and return the number of bytes which
- * were sucessfully copied.  If a fault is encountered then clear the page
- * out to (offset+bytes) and return the number of bytes which were copied.
- *
- * NOTE: For this to work reliably we really want copy_from_user_inatomic_nocache
- * to *NOT* zero any tail of the buffer that it failed to copy.  If it does,
- * and if the following non-atomic copy succeeds, then there is a small window
- * where the target page contains neither the data before the write, nor the
- * data after the write (it contains zero).  A read at this time will see
- * data that is inconsistent with any ordering of the read and the write.
- * (This has been detected in practice).
- */
-static inline size_t
-filemap_copy_from_user(struct page *page, unsigned long offset,
-			const char __user *buf, unsigned bytes)
-{
-	char *kaddr;
-	int left;
-
-	kaddr = kmap_atomic(page, KM_USER0);
-	left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes);
-	kunmap_atomic(kaddr, KM_USER0);
-
-	if (left != 0) {
-		/* Do it the slow way */
-		kaddr = kmap(page);
-		left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
-		kunmap(page);
-	}
-	return bytes - left;
-}
-
-/*
- * This has the same sideeffects and return value as filemap_copy_from_user().
- * The difference is that on a fault we need to memset the remainder of the
- * page (out to offset+bytes), to emulate filemap_copy_from_user()'s
- * single-segment behaviour.
- */
-static inline size_t
-filemap_copy_from_user_iovec(struct page *page, unsigned long offset,
-			const struct iovec *iov, size_t base, size_t bytes)
-{
-	char *kaddr;
-	size_t copied;
-
-	kaddr = kmap_atomic(page, KM_USER0);
-	copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov,
-							 base, bytes);
-	kunmap_atomic(kaddr, KM_USER0);
-	if (copied != bytes) {
-		kaddr = kmap(page);
-		copied = __filemap_copy_from_user_iovec_inatomic(kaddr + offset, iov,
-								 base, bytes);
-		if (bytes - copied)
-			memset(kaddr + offset + copied, 0, bytes - copied);
-		kunmap(page);
-	}
-	return copied;
-}
-
-static inline void
-filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
-{
-	const struct iovec *iov = *iovp;
-	size_t base = *basep;
-
-	do {
-		int copy = min(bytes, iov->iov_len - base);
-
-		bytes -= copy;
-		base += copy;
-		if (iov->iov_len == base) {
-			iov++;
-			base = 0;
-		}
-	} while (bytes);
-	*iovp = iov;
-	*basep = base;
-}
-#endif
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 53ee6a29963..32132f3cd64 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -15,7 +15,6 @@
 #include <linux/rmap.h>
 #include <linux/sched.h>
 #include <asm/tlbflush.h>
-#include "filemap.h"
 
 /*
  * We do use our own empty page to avoid interference with other users
@@ -288,6 +287,7 @@ __xip_file_write(struct file *filp, const char __user *buf,
 		unsigned long index;
 		unsigned long offset;
 		size_t copied;
+		char *kaddr;
 
 		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
 		index = pos >> PAGE_CACHE_SHIFT;
@@ -295,14 +295,6 @@ __xip_file_write(struct file *filp, const char __user *buf,
 		if (bytes > count)
 			bytes = count;
 
-		/*
-		 * Bring in the user page that we will copy from _first_.
-		 * Otherwise there's a nasty deadlock on copying from the
-		 * same page as we're writing to, without it being marked
-		 * up-to-date.
-		 */
-		fault_in_pages_readable(buf, bytes);
-
 		page = a_ops->get_xip_page(mapping,
 					   index*(PAGE_SIZE/512), 0);
 		if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) {
@@ -319,8 +311,13 @@ __xip_file_write(struct file *filp, const char __user *buf,
 			break;
 		}
 
-		copied = filemap_copy_from_user(page, offset, buf, bytes);
+		fault_in_pages_readable(buf, bytes);
+		kaddr = kmap_atomic(page, KM_USER0);
+		copied = bytes -
+			__copy_from_user_inatomic_nocache(kaddr, buf, bytes);
+		kunmap_atomic(kaddr, KM_USER0);
 		flush_dcache_page(page);
+
 		if (likely(copied > 0)) {
 			status = copied;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index eab8c428cc9..ae2959bb59c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -23,12 +23,16 @@
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
+static unsigned long surplus_huge_pages;
 unsigned long max_huge_pages;
 static struct list_head hugepage_freelists[MAX_NUMNODES];
 static unsigned int nr_huge_pages_node[MAX_NUMNODES];
 static unsigned int free_huge_pages_node[MAX_NUMNODES];
+static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
 static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
+int hugetlb_dynamic_pool;
+static int hugetlb_next_nid;
 
 /*
  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
@@ -85,6 +89,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
 			list_del(&page->lru);
 			free_huge_pages--;
 			free_huge_pages_node[nid]--;
+			if (vma && vma->vm_flags & VM_MAYSHARE)
+				resv_huge_pages--;
 			break;
 		}
 	}
@@ -92,58 +98,269 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
 	return page;
 }
 
+static void update_and_free_page(struct page *page)
+{
+	int i;
+	nr_huge_pages--;
+	nr_huge_pages_node[page_to_nid(page)]--;
+	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
+		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
+				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
+				1 << PG_private | 1<< PG_writeback);
+	}
+	set_compound_page_dtor(page, NULL);
+	set_page_refcounted(page);
+	__free_pages(page, HUGETLB_PAGE_ORDER);
+}
+
 static void free_huge_page(struct page *page)
 {
-	BUG_ON(page_count(page));
+	int nid = page_to_nid(page);
 
+	BUG_ON(page_count(page));
 	INIT_LIST_HEAD(&page->lru);
 
 	spin_lock(&hugetlb_lock);
-	enqueue_huge_page(page);
+	if (surplus_huge_pages_node[nid]) {
+		update_and_free_page(page);
+		surplus_huge_pages--;
+		surplus_huge_pages_node[nid]--;
+	} else {
+		enqueue_huge_page(page);
+	}
 	spin_unlock(&hugetlb_lock);
 }
 
-static int alloc_fresh_huge_page(void)
+/*
+ * Increment or decrement surplus_huge_pages.  Keep node-specific counters
+ * balanced by operating on them in a round-robin fashion.
+ * Returns 1 if an adjustment was made.
+ */
+static int adjust_pool_surplus(int delta)
 {
 	static int prev_nid;
-	struct page *page;
-	int nid;
+	int nid = prev_nid;
+	int ret = 0;
+
+	VM_BUG_ON(delta != -1 && delta != 1);
+	do {
+		nid = next_node(nid, node_online_map);
+		if (nid == MAX_NUMNODES)
+			nid = first_node(node_online_map);
+
+		/* To shrink on this node, there must be a surplus page */
+		if (delta < 0 && !surplus_huge_pages_node[nid])
+			continue;
+		/* Surplus cannot exceed the total number of pages */
+		if (delta > 0 && surplus_huge_pages_node[nid] >=
+						nr_huge_pages_node[nid])
+			continue;
+
+		surplus_huge_pages += delta;
+		surplus_huge_pages_node[nid] += delta;
+		ret = 1;
+		break;
+	} while (nid != prev_nid);
 
-	/*
-	 * Copy static prev_nid to local nid, work on that, then copy it
-	 * back to prev_nid afterwards: otherwise there's a window in which
-	 * a racer might pass invalid nid MAX_NUMNODES to alloc_pages_node.
-	 * But we don't need to use a spin_lock here: it really doesn't
-	 * matter if occasionally a racer chooses the same nid as we do.
-	 */
-	nid = next_node(prev_nid, node_online_map);
-	if (nid == MAX_NUMNODES)
-		nid = first_node(node_online_map);
 	prev_nid = nid;
+	return ret;
+}
+
+static struct page *alloc_fresh_huge_page_node(int nid)
+{
+	struct page *page;
 
-	page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
+	page = alloc_pages_node(nid,
+		htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN,
+		HUGETLB_PAGE_ORDER);
+	if (page) {
+		set_compound_page_dtor(page, free_huge_page);
+		spin_lock(&hugetlb_lock);
+		nr_huge_pages++;
+		nr_huge_pages_node[nid]++;
+		spin_unlock(&hugetlb_lock);
+		put_page(page); /* free it into the hugepage allocator */
+	}
+
+	return page;
+}
+
+static int alloc_fresh_huge_page(void)
+{
+	struct page *page;
+	int start_nid;
+	int next_nid;
+	int ret = 0;
+
+	start_nid = hugetlb_next_nid;
+
+	do {
+		page = alloc_fresh_huge_page_node(hugetlb_next_nid);
+		if (page)
+			ret = 1;
+		/*
+		 * Use a helper variable to find the next node and then
+		 * copy it back to hugetlb_next_nid afterwards:
+		 * otherwise there's a window in which a racer might
+		 * pass invalid nid MAX_NUMNODES to alloc_pages_node.
+		 * But we don't need to use a spin_lock here: it really
+		 * doesn't matter if occasionally a racer chooses the
+		 * same nid as we do.  Move nid forward in the mask even
+		 * if we just successfully allocated a hugepage so that
+		 * the next caller gets hugepages on the next node.
+		 */
+		next_nid = next_node(hugetlb_next_nid, node_online_map);
+		if (next_nid == MAX_NUMNODES)
+			next_nid = first_node(node_online_map);
+		hugetlb_next_nid = next_nid;
+	} while (!page && hugetlb_next_nid != start_nid);
+
+	return ret;
+}
+
+static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
+						unsigned long address)
+{
+	struct page *page;
+
+	/* Check if the dynamic pool is enabled */
+	if (!hugetlb_dynamic_pool)
+		return NULL;
+
+	page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
 					HUGETLB_PAGE_ORDER);
 	if (page) {
 		set_compound_page_dtor(page, free_huge_page);
 		spin_lock(&hugetlb_lock);
 		nr_huge_pages++;
 		nr_huge_pages_node[page_to_nid(page)]++;
+		surplus_huge_pages++;
+		surplus_huge_pages_node[page_to_nid(page)]++;
 		spin_unlock(&hugetlb_lock);
-		put_page(page); /* free it into the hugepage allocator */
-		return 1;
 	}
-	return 0;
+
+	return page;
+}
+
+/*
+ * Increase the hugetlb pool such that it can accomodate a reservation
+ * of size 'delta'.
+ */
+static int gather_surplus_pages(int delta)
+{
+	struct list_head surplus_list;
+	struct page *page, *tmp;
+	int ret, i;
+	int needed, allocated;
+
+	needed = (resv_huge_pages + delta) - free_huge_pages;
+	if (needed <= 0)
+		return 0;
+
+	allocated = 0;
+	INIT_LIST_HEAD(&surplus_list);
+
+	ret = -ENOMEM;
+retry:
+	spin_unlock(&hugetlb_lock);
+	for (i = 0; i < needed; i++) {
+		page = alloc_buddy_huge_page(NULL, 0);
+		if (!page) {
+			/*
+			 * We were not able to allocate enough pages to
+			 * satisfy the entire reservation so we free what
+			 * we've allocated so far.
+			 */
+			spin_lock(&hugetlb_lock);
+			needed = 0;
+			goto free;
+		}
+
+		list_add(&page->lru, &surplus_list);
+	}
+	allocated += needed;
+
+	/*
+	 * After retaking hugetlb_lock, we need to recalculate 'needed'
+	 * because either resv_huge_pages or free_huge_pages may have changed.
+	 */
+	spin_lock(&hugetlb_lock);
+	needed = (resv_huge_pages + delta) - (free_huge_pages + allocated);
+	if (needed > 0)
+		goto retry;
+
+	/*
+	 * The surplus_list now contains _at_least_ the number of extra pages
+	 * needed to accomodate the reservation.  Add the appropriate number
+	 * of pages to the hugetlb pool and free the extras back to the buddy
+	 * allocator.
+	 */
+	needed += allocated;
+	ret = 0;
+free:
+	list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
+		list_del(&page->lru);
+		if ((--needed) >= 0)
+			enqueue_huge_page(page);
+		else {
+			/*
+			 * Decrement the refcount and free the page using its
+			 * destructor.  This must be done with hugetlb_lock
+			 * unlocked which is safe because free_huge_page takes
+			 * hugetlb_lock before deciding how to free the page.
+			 */
+			spin_unlock(&hugetlb_lock);
+			put_page(page);
+			spin_lock(&hugetlb_lock);
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * When releasing a hugetlb pool reservation, any surplus pages that were
+ * allocated to satisfy the reservation must be explicitly freed if they were
+ * never used.
+ */
+void return_unused_surplus_pages(unsigned long unused_resv_pages)
+{
+	static int nid = -1;
+	struct page *page;
+	unsigned long nr_pages;
+
+	nr_pages = min(unused_resv_pages, surplus_huge_pages);
+
+	while (nr_pages) {
+		nid = next_node(nid, node_online_map);
+		if (nid == MAX_NUMNODES)
+			nid = first_node(node_online_map);
+
+		if (!surplus_huge_pages_node[nid])
+			continue;
+
+		if (!list_empty(&hugepage_freelists[nid])) {
+			page = list_entry(hugepage_freelists[nid].next,
+					  struct page, lru);
+			list_del(&page->lru);
+			update_and_free_page(page);
+			free_huge_pages--;
+			free_huge_pages_node[nid]--;
+			surplus_huge_pages--;
+			surplus_huge_pages_node[nid]--;
+			nr_pages--;
+		}
+	}
 }
 
 static struct page *alloc_huge_page(struct vm_area_struct *vma,
 				    unsigned long addr)
 {
-	struct page *page;
+	struct page *page = NULL;
+	int use_reserved_page = vma->vm_flags & VM_MAYSHARE;
 
 	spin_lock(&hugetlb_lock);
-	if (vma->vm_flags & VM_MAYSHARE)
-		resv_huge_pages--;
-	else if (free_huge_pages <= resv_huge_pages)
+	if (!use_reserved_page && (free_huge_pages <= resv_huge_pages))
 		goto fail;
 
 	page = dequeue_huge_page(vma, addr);
@@ -155,10 +372,17 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	return page;
 
 fail:
-	if (vma->vm_flags & VM_MAYSHARE)
-		resv_huge_pages++;
 	spin_unlock(&hugetlb_lock);
-	return NULL;
+
+	/*
+	 * Private mappings do not use reserved huge pages so the allocation
+	 * may have failed due to an undersized hugetlb pool.  Try to grab a
+	 * surplus huge page from the buddy allocator.
+	 */
+	if (!use_reserved_page)
+		page = alloc_buddy_huge_page(vma, addr);
+
+	return page;
 }
 
 static int __init hugetlb_init(void)
@@ -171,6 +395,8 @@ static int __init hugetlb_init(void)
 	for (i = 0; i < MAX_NUMNODES; ++i)
 		INIT_LIST_HEAD(&hugepage_freelists[i]);
 
+	hugetlb_next_nid = first_node(node_online_map);
+
 	for (i = 0; i < max_huge_pages; ++i) {
 		if (!alloc_fresh_huge_page())
 			break;
@@ -201,21 +427,6 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 }
 
 #ifdef CONFIG_SYSCTL
-static void update_and_free_page(struct page *page)
-{
-	int i;
-	nr_huge_pages--;
-	nr_huge_pages_node[page_to_nid(page)]--;
-	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
-		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
-				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
-				1 << PG_private | 1<< PG_writeback);
-	}
-	set_compound_page_dtor(page, NULL);
-	set_page_refcounted(page);
-	__free_pages(page, HUGETLB_PAGE_ORDER);
-}
-
 #ifdef CONFIG_HIGHMEM
 static void try_to_free_low(unsigned long count)
 {
@@ -224,14 +435,14 @@ static void try_to_free_low(unsigned long count)
 	for (i = 0; i < MAX_NUMNODES; ++i) {
 		struct page *page, *next;
 		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
+			if (count >= nr_huge_pages)
+				return;
 			if (PageHighMem(page))
 				continue;
 			list_del(&page->lru);
 			update_and_free_page(page);
 			free_huge_pages--;
 			free_huge_pages_node[page_to_nid(page)]--;
-			if (count >= nr_huge_pages)
-				return;
 		}
 	}
 }
@@ -241,26 +452,61 @@ static inline void try_to_free_low(unsigned long count)
 }
 #endif
 
+#define persistent_huge_pages (nr_huge_pages - surplus_huge_pages)
 static unsigned long set_max_huge_pages(unsigned long count)
 {
-	while (count > nr_huge_pages) {
-		if (!alloc_fresh_huge_page())
-			return nr_huge_pages;
-	}
-	if (count >= nr_huge_pages)
-		return nr_huge_pages;
+	unsigned long min_count, ret;
 
+	/*
+	 * Increase the pool size
+	 * First take pages out of surplus state.  Then make up the
+	 * remaining difference by allocating fresh huge pages.
+	 */
 	spin_lock(&hugetlb_lock);
-	count = max(count, resv_huge_pages);
-	try_to_free_low(count);
-	while (count < nr_huge_pages) {
+	while (surplus_huge_pages && count > persistent_huge_pages) {
+		if (!adjust_pool_surplus(-1))
+			break;
+	}
+
+	while (count > persistent_huge_pages) {
+		int ret;
+		/*
+		 * If this allocation races such that we no longer need the
+		 * page, free_huge_page will handle it by freeing the page
+		 * and reducing the surplus.
+		 */
+		spin_unlock(&hugetlb_lock);
+		ret = alloc_fresh_huge_page();
+		spin_lock(&hugetlb_lock);
+		if (!ret)
+			goto out;
+
+	}
+
+	/*
+	 * Decrease the pool size
+	 * First return free pages to the buddy allocator (being careful
+	 * to keep enough around to satisfy reservations).  Then place
+	 * pages into surplus state as needed so the pool will shrink
+	 * to the desired size as pages become free.
+	 */
+	min_count = resv_huge_pages + nr_huge_pages - free_huge_pages;
+	min_count = max(count, min_count);
+	try_to_free_low(min_count);
+	while (min_count < persistent_huge_pages) {
 		struct page *page = dequeue_huge_page(NULL, 0);
 		if (!page)
 			break;
 		update_and_free_page(page);
 	}
+	while (count < persistent_huge_pages) {
+		if (!adjust_pool_surplus(1))
+			break;
+	}
+out:
+	ret = persistent_huge_pages;
 	spin_unlock(&hugetlb_lock);
-	return nr_huge_pages;
+	return ret;
 }
 
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
@@ -292,10 +538,12 @@ int hugetlb_report_meminfo(char *buf)
 			"HugePages_Total: %5lu\n"
 			"HugePages_Free:  %5lu\n"
 			"HugePages_Rsvd:  %5lu\n"
+			"HugePages_Surp:  %5lu\n"
 			"Hugepagesize:    %5lu kB\n",
 			nr_huge_pages,
 			free_huge_pages,
 			resv_huge_pages,
+			surplus_huge_pages,
 			HPAGE_SIZE/1024);
 }
 
@@ -355,7 +603,6 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
 	entry = pte_mkwrite(pte_mkdirty(*ptep));
 	if (ptep_set_access_flags(vma, address, ptep, entry, 1)) {
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 	}
 }
 
@@ -708,7 +955,6 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
 			pte = huge_ptep_get_and_clear(mm, address, ptep);
 			pte = pte_mkhuge(pte_modify(pte, newprot));
 			set_huge_pte_at(mm, address, ptep, pte);
-			lazy_mmu_prot_update(pte);
 		}
 	}
 	spin_unlock(&mm->page_table_lock);
@@ -843,21 +1089,6 @@ static int hugetlb_acct_memory(long delta)
 	int ret = -ENOMEM;
 
 	spin_lock(&hugetlb_lock);
-	if ((delta + resv_huge_pages) <= free_huge_pages) {
-		resv_huge_pages += delta;
-		ret = 0;
-	}
-	spin_unlock(&hugetlb_lock);
-	return ret;
-}
-
-int hugetlb_reserve_pages(struct inode *inode, long from, long to)
-{
-	long ret, chg;
-
-	chg = region_chg(&inode->i_mapping->private_list, from, to);
-	if (chg < 0)
-		return chg;
 	/*
 	 * When cpuset is configured, it breaks the strict hugetlb page
 	 * reservation as the accounting is done on a global variable. Such
@@ -875,8 +1106,31 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
 	 * a best attempt and hopefully to minimize the impact of changing
 	 * semantics that cpuset has.
 	 */
-	if (chg > cpuset_mems_nr(free_huge_pages_node))
-		return -ENOMEM;
+	if (delta > 0) {
+		if (gather_surplus_pages(delta) < 0)
+			goto out;
+
+		if (delta > cpuset_mems_nr(free_huge_pages_node))
+			goto out;
+	}
+
+	ret = 0;
+	resv_huge_pages += delta;
+	if (delta < 0)
+		return_unused_surplus_pages((unsigned long) -delta);
+
+out:
+	spin_unlock(&hugetlb_lock);
+	return ret;
+}
+
+int hugetlb_reserve_pages(struct inode *inode, long from, long to)
+{
+	long ret, chg;
+
+	chg = region_chg(&inode->i_mapping->private_list, from, to);
+	if (chg < 0)
+		return chg;
 
 	ret = hugetlb_acct_memory(chg);
 	if (ret < 0)
diff --git a/mm/internal.h b/mm/internal.h
index a3110c02aea..953f941ea86 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -37,4 +37,14 @@ static inline void __put_page(struct page *page)
 extern void fastcall __init __free_pages_bootmem(struct page *page,
 						unsigned int order);
 
+/*
+ * function for dealing with page's order in buddy system.
+ * zone->lock is already acquired when we use these.
+ * So, we don't need atomic page->flags operations here.
+ */
+static inline unsigned long page_order(struct page *page)
+{
+	VM_BUG_ON(!PageBuddy(page));
+	return page_private(page);
+}
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index f82b359b274..bd16dcaeefb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -966,7 +966,7 @@ no_page_table:
 	 * has touched so far, we don't want to allocate page tables.
 	 */
 	if (flags & FOLL_ANON) {
-		page = ZERO_PAGE(address);
+		page = ZERO_PAGE(0);
 		if (flags & FOLL_GET)
 			get_page(page);
 		BUG_ON(flags & FOLL_WRITE);
@@ -1111,95 +1111,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 }
 EXPORT_SYMBOL(get_user_pages);
 
-static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
-			unsigned long addr, unsigned long end, pgprot_t prot)
-{
-	pte_t *pte;
-	spinlock_t *ptl;
-	int err = 0;
-
-	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
-	if (!pte)
-		return -EAGAIN;
-	arch_enter_lazy_mmu_mode();
-	do {
-		struct page *page = ZERO_PAGE(addr);
-		pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
-
-		if (unlikely(!pte_none(*pte))) {
-			err = -EEXIST;
-			pte++;
-			break;
-		}
-		page_cache_get(page);
-		page_add_file_rmap(page);
-		inc_mm_counter(mm, file_rss);
-		set_pte_at(mm, addr, pte, zero_pte);
-	} while (pte++, addr += PAGE_SIZE, addr != end);
-	arch_leave_lazy_mmu_mode();
-	pte_unmap_unlock(pte - 1, ptl);
-	return err;
-}
-
-static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
-			unsigned long addr, unsigned long end, pgprot_t prot)
-{
-	pmd_t *pmd;
-	unsigned long next;
-	int err;
-
-	pmd = pmd_alloc(mm, pud, addr);
-	if (!pmd)
-		return -EAGAIN;
-	do {
-		next = pmd_addr_end(addr, end);
-		err = zeromap_pte_range(mm, pmd, addr, next, prot);
-		if (err)
-			break;
-	} while (pmd++, addr = next, addr != end);
-	return err;
-}
-
-static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
-			unsigned long addr, unsigned long end, pgprot_t prot)
-{
-	pud_t *pud;
-	unsigned long next;
-	int err;
-
-	pud = pud_alloc(mm, pgd, addr);
-	if (!pud)
-		return -EAGAIN;
-	do {
-		next = pud_addr_end(addr, end);
-		err = zeromap_pmd_range(mm, pud, addr, next, prot);
-		if (err)
-			break;
-	} while (pud++, addr = next, addr != end);
-	return err;
-}
-
-int zeromap_page_range(struct vm_area_struct *vma,
-			unsigned long addr, unsigned long size, pgprot_t prot)
-{
-	pgd_t *pgd;
-	unsigned long next;
-	unsigned long end = addr + size;
-	struct mm_struct *mm = vma->vm_mm;
-	int err;
-
-	BUG_ON(addr >= end);
-	pgd = pgd_offset(mm, addr);
-	flush_cache_range(vma, addr, end);
-	do {
-		next = pgd_addr_end(addr, end);
-		err = zeromap_pud_range(mm, pgd, addr, next, prot);
-		if (err)
-			break;
-	} while (pgd++, addr = next, addr != end);
-	return err;
-}
-
 pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
 {
 	pgd_t * pgd = pgd_offset(mm, addr);
@@ -1700,10 +1611,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = pte_mkyoung(orig_pte);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-		if (ptep_set_access_flags(vma, address, page_table, entry,1)) {
+		if (ptep_set_access_flags(vma, address, page_table, entry,1))
 			update_mmu_cache(vma, address, entry);
-			lazy_mmu_prot_update(entry);
-		}
 		ret |= VM_FAULT_WRITE;
 		goto unlock;
 	}
@@ -1717,16 +1626,11 @@ gotten:
 
 	if (unlikely(anon_vma_prepare(vma)))
 		goto oom;
-	if (old_page == ZERO_PAGE(address)) {
-		new_page = alloc_zeroed_user_highpage_movable(vma, address);
-		if (!new_page)
-			goto oom;
-	} else {
-		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
-		if (!new_page)
-			goto oom;
-		cow_user_page(new_page, old_page, address, vma);
-	}
+	VM_BUG_ON(old_page == ZERO_PAGE(0));
+	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+	if (!new_page)
+		goto oom;
+	cow_user_page(new_page, old_page, address, vma);
 
 	/*
 	 * Re-check the pte - we dropped the lock
@@ -1744,7 +1648,6 @@ gotten:
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-		lazy_mmu_prot_update(entry);
 		/*
 		 * Clear the pte entry and flush it first, before updating the
 		 * pte with the new entry. This will avoid a race condition
@@ -2252,44 +2155,28 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	spinlock_t *ptl;
 	pte_t entry;
 
-	if (write_access) {
-		/* Allocate our own private page. */
-		pte_unmap(page_table);
-
-		if (unlikely(anon_vma_prepare(vma)))
-			goto oom;
-		page = alloc_zeroed_user_highpage_movable(vma, address);
-		if (!page)
-			goto oom;
-
-		entry = mk_pte(page, vma->vm_page_prot);
-		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+	/* Allocate our own private page. */
+	pte_unmap(page_table);
 
-		page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-		if (!pte_none(*page_table))
-			goto release;
-		inc_mm_counter(mm, anon_rss);
-		lru_cache_add_active(page);
-		page_add_new_anon_rmap(page, vma, address);
-	} else {
-		/* Map the ZERO_PAGE - vm_page_prot is readonly */
-		page = ZERO_PAGE(address);
-		page_cache_get(page);
-		entry = mk_pte(page, vma->vm_page_prot);
+	if (unlikely(anon_vma_prepare(vma)))
+		goto oom;
+	page = alloc_zeroed_user_highpage_movable(vma, address);
+	if (!page)
+		goto oom;
 
-		ptl = pte_lockptr(mm, pmd);
-		spin_lock(ptl);
-		if (!pte_none(*page_table))
-			goto release;
-		inc_mm_counter(mm, file_rss);
-		page_add_file_rmap(page);
-	}
+	entry = mk_pte(page, vma->vm_page_prot);
+	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
+	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+	if (!pte_none(*page_table))
+		goto release;
+	inc_mm_counter(mm, anon_rss);
+	lru_cache_add_active(page);
+	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, entry);
-	lazy_mmu_prot_update(entry);
 unlock:
 	pte_unmap_unlock(page_table, ptl);
 	return 0;
@@ -2442,7 +2329,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 		/* no need to invalidate: a not-present page won't be cached */
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 	} else {
 		if (anon)
 			page_cache_release(page);
@@ -2470,7 +2356,7 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		int write_access, pte_t orig_pte)
 {
 	pgoff_t pgoff = (((address & PAGE_MASK)
-			- vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
+			- vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 	unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
 
 	pte_unmap(page_table);
@@ -2614,7 +2500,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
 	entry = pte_mkyoung(entry);
 	if (ptep_set_access_flags(vma, address, pte, entry, write_access)) {
 		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
 	} else {
 		/*
 		 * This is needed only for protection faults but the arch code
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index df9d554bea3..091b9c6c252 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -23,6 +23,9 @@
 #include <linux/vmalloc.h>
 #include <linux/ioport.h>
 #include <linux/cpuset.h>
+#include <linux/delay.h>
+#include <linux/migrate.h>
+#include <linux/page-isolation.h>
 
 #include <asm/tlbflush.h>
 
@@ -161,14 +164,27 @@ static void grow_pgdat_span(struct pglist_data *pgdat,
 					pgdat->node_start_pfn;
 }
 
-int online_pages(unsigned long pfn, unsigned long nr_pages)
+static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
+			void *arg)
 {
 	unsigned long i;
+	unsigned long onlined_pages = *(unsigned long *)arg;
+	struct page *page;
+	if (PageReserved(pfn_to_page(start_pfn)))
+		for (i = 0; i < nr_pages; i++) {
+			page = pfn_to_page(start_pfn + i);
+			online_page(page);
+			onlined_pages++;
+		}
+	*(unsigned long *)arg = onlined_pages;
+	return 0;
+}
+
+
+int online_pages(unsigned long pfn, unsigned long nr_pages)
+{
 	unsigned long flags;
 	unsigned long onlined_pages = 0;
-	struct resource res;
-	u64 section_end;
-	unsigned long start_pfn;
 	struct zone *zone;
 	int need_zonelists_rebuild = 0;
 
@@ -191,32 +207,16 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	if (!populated_zone(zone))
 		need_zonelists_rebuild = 1;
 
-	res.start = (u64)pfn << PAGE_SHIFT;
-	res.end = res.start + ((u64)nr_pages << PAGE_SHIFT) - 1;
-	res.flags = IORESOURCE_MEM; /* we just need system ram */
-	section_end = res.end;
-
-	while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
-		start_pfn = (unsigned long)(res.start >> PAGE_SHIFT);
-		nr_pages = (unsigned long)
-                           ((res.end + 1 - res.start) >> PAGE_SHIFT);
-
-		if (PageReserved(pfn_to_page(start_pfn))) {
-			/* this region's page is not onlined now */
-			for (i = 0; i < nr_pages; i++) {
-				struct page *page = pfn_to_page(start_pfn + i);
-				online_page(page);
-				onlined_pages++;
-			}
-		}
-
-		res.start = res.end + 1;
-		res.end = section_end;
-	}
+	walk_memory_resource(pfn, nr_pages, &onlined_pages,
+		online_pages_range);
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
 
 	setup_per_zone_pages_min();
+	if (onlined_pages) {
+		kswapd_run(zone_to_nid(zone));
+		node_set_state(zone_to_nid(zone), N_HIGH_MEMORY);
+	}
 
 	if (need_zonelists_rebuild)
 		build_all_zonelists();
@@ -271,9 +271,6 @@ int add_memory(int nid, u64 start, u64 size)
 		if (!pgdat)
 			return -ENOMEM;
 		new_pgdat = 1;
-		ret = kswapd_run(nid);
-		if (ret)
-			goto error;
 	}
 
 	/* call arch's memory hotadd */
@@ -308,3 +305,260 @@ error:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(add_memory);
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+/*
+ * Confirm all pages in a range [start, end) is belongs to the same zone.
+ */
+static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+	struct zone *zone = NULL;
+	struct page *page;
+	int i;
+	for (pfn = start_pfn;
+	     pfn < end_pfn;
+	     pfn += MAX_ORDER_NR_PAGES) {
+		i = 0;
+		/* This is just a CONFIG_HOLES_IN_ZONE check.*/
+		while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i))
+			i++;
+		if (i == MAX_ORDER_NR_PAGES)
+			continue;
+		page = pfn_to_page(pfn + i);
+		if (zone && page_zone(page) != zone)
+			return 0;
+		zone = page_zone(page);
+	}
+	return 1;
+}
+
+/*
+ * Scanning pfn is much easier than scanning lru list.
+ * Scan pfn from start to end and Find LRU page.
+ */
+int scan_lru_pages(unsigned long start, unsigned long end)
+{
+	unsigned long pfn;
+	struct page *page;
+	for (pfn = start; pfn < end; pfn++) {
+		if (pfn_valid(pfn)) {
+			page = pfn_to_page(pfn);
+			if (PageLRU(page))
+				return pfn;
+		}
+	}
+	return 0;
+}
+
+static struct page *
+hotremove_migrate_alloc(struct page *page,
+			unsigned long private,
+			int **x)
+{
+	/* This should be improoooooved!! */
+	return alloc_page(GFP_HIGHUSER_PAGECACHE);
+}
+
+
+#define NR_OFFLINE_AT_ONCE_PAGES	(256)
+static int
+do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+	struct page *page;
+	int move_pages = NR_OFFLINE_AT_ONCE_PAGES;
+	int not_managed = 0;
+	int ret = 0;
+	LIST_HEAD(source);
+
+	for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
+		if (!pfn_valid(pfn))
+			continue;
+		page = pfn_to_page(pfn);
+		if (!page_count(page))
+			continue;
+		/*
+		 * We can skip free pages. And we can only deal with pages on
+		 * LRU.
+		 */
+		ret = isolate_lru_page(page, &source);
+		if (!ret) { /* Success */
+			move_pages--;
+		} else {
+			/* Becasue we don't have big zone->lock. we should
+			   check this again here. */
+			if (page_count(page))
+				not_managed++;
+#ifdef CONFIG_DEBUG_VM
+			printk(KERN_INFO "removing from LRU failed"
+					 " %lx/%d/%lx\n",
+				pfn, page_count(page), page->flags);
+#endif
+		}
+	}
+	ret = -EBUSY;
+	if (not_managed) {
+		if (!list_empty(&source))
+			putback_lru_pages(&source);
+		goto out;
+	}
+	ret = 0;
+	if (list_empty(&source))
+		goto out;
+	/* this function returns # of failed pages */
+	ret = migrate_pages(&source, hotremove_migrate_alloc, 0);
+
+out:
+	return ret;
+}
+
+/*
+ * remove from free_area[] and mark all as Reserved.
+ */
+static int
+offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
+			void *data)
+{
+	__offline_isolated_pages(start, start + nr_pages);
+	return 0;
+}
+
+static void
+offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
+{
+	walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
+				offline_isolated_pages_cb);
+}
+
+/*
+ * Check all pages in range, recoreded as memory resource, are isolated.
+ */
+static int
+check_pages_isolated_cb(unsigned long start_pfn, unsigned long nr_pages,
+			void *data)
+{
+	int ret;
+	long offlined = *(long *)data;
+	ret = test_pages_isolated(start_pfn, start_pfn + nr_pages);
+	offlined = nr_pages;
+	if (!ret)
+		*(long *)data += offlined;
+	return ret;
+}
+
+static long
+check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
+{
+	long offlined = 0;
+	int ret;
+
+	ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
+			check_pages_isolated_cb);
+	if (ret < 0)
+		offlined = (long)ret;
+	return offlined;
+}
+
+extern void drain_all_local_pages(void);
+
+int offline_pages(unsigned long start_pfn,
+		  unsigned long end_pfn, unsigned long timeout)
+{
+	unsigned long pfn, nr_pages, expire;
+	long offlined_pages;
+	int ret, drain, retry_max;
+	struct zone *zone;
+
+	BUG_ON(start_pfn >= end_pfn);
+	/* at least, alignment against pageblock is necessary */
+	if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
+		return -EINVAL;
+	if (!IS_ALIGNED(end_pfn, pageblock_nr_pages))
+		return -EINVAL;
+	/* This makes hotplug much easier...and readable.
+	   we assume this for now. .*/
+	if (!test_pages_in_a_zone(start_pfn, end_pfn))
+		return -EINVAL;
+	/* set above range as isolated */
+	ret = start_isolate_page_range(start_pfn, end_pfn);
+	if (ret)
+		return ret;
+	nr_pages = end_pfn - start_pfn;
+	pfn = start_pfn;
+	expire = jiffies + timeout;
+	drain = 0;
+	retry_max = 5;
+repeat:
+	/* start memory hot removal */
+	ret = -EAGAIN;
+	if (time_after(jiffies, expire))
+		goto failed_removal;
+	ret = -EINTR;
+	if (signal_pending(current))
+		goto failed_removal;
+	ret = 0;
+	if (drain) {
+		lru_add_drain_all();
+		flush_scheduled_work();
+		cond_resched();
+		drain_all_local_pages();
+	}
+
+	pfn = scan_lru_pages(start_pfn, end_pfn);
+	if (pfn) { /* We have page on LRU */
+		ret = do_migrate_range(pfn, end_pfn);
+		if (!ret) {
+			drain = 1;
+			goto repeat;
+		} else {
+			if (ret < 0)
+				if (--retry_max == 0)
+					goto failed_removal;
+			yield();
+			drain = 1;
+			goto repeat;
+		}
+	}
+	/* drain all zone's lru pagevec, this is asyncronous... */
+	lru_add_drain_all();
+	flush_scheduled_work();
+	yield();
+	/* drain pcp pages , this is synchrouns. */
+	drain_all_local_pages();
+	/* check again */
+	offlined_pages = check_pages_isolated(start_pfn, end_pfn);
+	if (offlined_pages < 0) {
+		ret = -EBUSY;
+		goto failed_removal;
+	}
+	printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
+	/* Ok, all of our target is islaoted.
+	   We cannot do rollback at this point. */
+	offline_isolated_pages(start_pfn, end_pfn);
+	/* reset pagetype flags */
+	start_isolate_page_range(start_pfn, end_pfn);
+	/* removal success */
+	zone = page_zone(pfn_to_page(start_pfn));
+	zone->present_pages -= offlined_pages;
+	zone->zone_pgdat->node_present_pages -= offlined_pages;
+	totalram_pages -= offlined_pages;
+	num_physpages -= offlined_pages;
+	vm_total_pages = nr_free_pagecache_pages();
+	writeback_set_ratelimit();
+	return 0;
+
+failed_removal:
+	printk(KERN_INFO "memory offlining %lx to %lx failed\n",
+		start_pfn, end_pfn);
+	/* pushback to free area */
+	undo_isolate_page_range(start_pfn, end_pfn);
+	return ret;
+}
+#else
+int remove_memory(u64 start, u64 size)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(remove_memory);
+#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3d6ac9505d0..568152ae6ca 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -72,7 +72,6 @@
 #include <linux/hugetlb.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/mm.h>
 #include <linux/nodemask.h>
 #include <linux/cpuset.h>
 #include <linux/gfp.h>
@@ -82,13 +81,13 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/compat.h>
-#include <linux/mempolicy.h>
 #include <linux/swap.h>
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/migrate.h>
 #include <linux/rmap.h>
 #include <linux/security.h>
+#include <linux/syscalls.h>
 
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -110,6 +109,9 @@ struct mempolicy default_policy = {
 	.policy = MPOL_DEFAULT,
 };
 
+static void mpol_rebind_policy(struct mempolicy *pol,
+                               const nodemask_t *newmask);
+
 /* Do sanity checking on a policy */
 static int mpol_check_policy(int mode, nodemask_t *nodes)
 {
@@ -128,7 +130,7 @@ static int mpol_check_policy(int mode, nodemask_t *nodes)
 			return -EINVAL;
 		break;
 	}
-	return nodes_subset(*nodes, node_online_map) ? 0 : -EINVAL;
+ 	return nodes_subset(*nodes, node_states[N_HIGH_MEMORY]) ? 0 : -EINVAL;
 }
 
 /* Generate a custom zonelist for the BIND policy. */
@@ -185,7 +187,9 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
 	switch (mode) {
 	case MPOL_INTERLEAVE:
 		policy->v.nodes = *nodes;
-		if (nodes_weight(*nodes) == 0) {
+		nodes_and(policy->v.nodes, policy->v.nodes,
+					node_states[N_HIGH_MEMORY]);
+		if (nodes_weight(policy->v.nodes) == 0) {
 			kmem_cache_free(policy_cache, policy);
 			return ERR_PTR(-EINVAL);
 		}
@@ -459,7 +463,7 @@ static void mpol_set_task_struct_flag(void)
 }
 
 /* Set the process memory policy */
-long do_set_mempolicy(int mode, nodemask_t *nodes)
+static long do_set_mempolicy(int mode, nodemask_t *nodes)
 {
 	struct mempolicy *new;
 
@@ -494,9 +498,9 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
 		*nodes = p->v.nodes;
 		break;
 	case MPOL_PREFERRED:
-		/* or use current node instead of online map? */
+		/* or use current node instead of memory_map? */
 		if (p->v.preferred_node < 0)
-			*nodes = node_online_map;
+			*nodes = node_states[N_HIGH_MEMORY];
 		else
 			node_set(p->v.preferred_node, *nodes);
 		break;
@@ -519,8 +523,8 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr)
 }
 
 /* Retrieve NUMA policy */
-long do_get_mempolicy(int *policy, nodemask_t *nmask,
-			unsigned long addr, unsigned long flags)
+static long do_get_mempolicy(int *policy, nodemask_t *nmask,
+			     unsigned long addr, unsigned long flags)
 {
 	int err;
 	struct mm_struct *mm = current->mm;
@@ -528,8 +532,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
 	struct mempolicy *pol = current->mempolicy;
 
 	cpuset_update_task_memory_state();
-	if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
+	if (flags &
+		~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR|MPOL_F_MEMS_ALLOWED))
 		return -EINVAL;
+
+	if (flags & MPOL_F_MEMS_ALLOWED) {
+		if (flags & (MPOL_F_NODE|MPOL_F_ADDR))
+			return -EINVAL;
+		*policy = 0;	/* just so it's initialized */
+		*nmask  = cpuset_current_mems_allowed;
+		return 0;
+	}
+
 	if (flags & MPOL_F_ADDR) {
 		down_read(&mm->mmap_sem);
 		vma = find_vma_intersection(mm, addr, addr+1);
@@ -601,7 +615,8 @@ static struct page *new_node_page(struct page *page, unsigned long node, int **x
  * Migrate pages from one node to a target node.
  * Returns error or the number of pages not migrated.
  */
-int migrate_to_node(struct mm_struct *mm, int source, int dest, int flags)
+static int migrate_to_node(struct mm_struct *mm, int source, int dest,
+			   int flags)
 {
 	nodemask_t nmask;
 	LIST_HEAD(pagelist);
@@ -732,8 +747,9 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int *
 }
 #endif
 
-long do_mbind(unsigned long start, unsigned long len,
-		unsigned long mode, nodemask_t *nmask, unsigned long flags)
+static long do_mbind(unsigned long start, unsigned long len,
+		     unsigned long mode, nodemask_t *nmask,
+		     unsigned long flags)
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
@@ -955,7 +971,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 		goto out;
 	}
 
-	if (!nodes_subset(new, node_online_map)) {
+	if (!nodes_subset(new, node_states[N_HIGH_MEMORY])) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -978,7 +994,8 @@ asmlinkage long sys_get_mempolicy(int __user *policy,
 				unsigned long maxnode,
 				unsigned long addr, unsigned long flags)
 {
-	int err, pval;
+	int err;
+	int uninitialized_var(pval);
 	nodemask_t nodes;
 
 	if (nmask != NULL && maxnode < MAX_NUMNODES)
@@ -1527,8 +1544,8 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 	kmem_cache_free(sn_cache, n);
 }
 
-struct sp_node *
-sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol)
+static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
+				struct mempolicy *pol)
 {
 	struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
 
@@ -1677,7 +1694,7 @@ void __init numa_policy_init(void)
 	 * fall back to the largest node if they're all smaller.
 	 */
 	nodes_clear(interleave_nodes);
-	for_each_online_node(nid) {
+	for_each_node_state(nid, N_HIGH_MEMORY) {
 		unsigned long total_pages = node_present_pages(nid);
 
 		/* Preserve the largest node */
@@ -1706,7 +1723,8 @@ void numa_default_policy(void)
 }
 
 /* Migrate a policy to a different set of nodes */
-void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
+static void mpol_rebind_policy(struct mempolicy *pol,
+			       const nodemask_t *newmask)
 {
 	nodemask_t *mpolmask;
 	nodemask_t tmp;
@@ -1963,7 +1981,7 @@ int show_numa_map(struct seq_file *m, void *v)
 		seq_printf(m, " huge");
 	} else {
 		check_pgd_range(vma, vma->vm_start, vma->vm_end,
-				&node_online_map, MPOL_MF_STATS, md);
+			&node_states[N_HIGH_MEMORY], MPOL_MF_STATS, md);
 	}
 
 	if (!md->pages)
@@ -1990,7 +2008,7 @@ int show_numa_map(struct seq_file *m, void *v)
 	if (md->writeback)
 		seq_printf(m," writeback=%lu", md->writeback);
 
-	for_each_online_node(n)
+	for_each_node_state(n, N_HIGH_MEMORY)
 		if (md->node[n])
 			seq_printf(m, " N%d=%lu", n, md->node[n]);
 out:
diff --git a/mm/migrate.c b/mm/migrate.c
index 07f22d4a431..06d0877a66e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -171,6 +171,7 @@ static void remove_migration_pte(struct vm_area_struct *vma,
 	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
 	if (is_write_migration_entry(entry))
 		pte = pte_mkwrite(pte);
+	flush_cache_page(vma, addr, pte_pfn(pte));
 	set_pte_at(mm, addr, ptep, pte);
 
 	if (PageAnon(new))
@@ -180,7 +181,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, pte);
-	lazy_mmu_prot_update(pte);
 
 out:
 	pte_unmap_unlock(ptep, ptl);
@@ -986,7 +986,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 				goto out;
 
 			err = -ENODEV;
-			if (!node_online(node))
+			if (!node_state(node, N_HIGH_MEMORY))
 				goto out;
 
 			err = -EACCES;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e8346c30abe..1d4d69790e5 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -53,7 +53,6 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
 			if (dirty_accountable && pte_dirty(ptent))
 				ptent = pte_mkwrite(ptent);
 			set_pte_at(mm, addr, pte, ptent);
-			lazy_mmu_prot_update(ptent);
 #ifdef CONFIG_MIGRATION
 		} else if (!pte_file(oldpte)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f9b82ad5047..41b4e362221 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -177,14 +177,7 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
 {
 #ifdef CONFIG_NUMA
 	struct zone **z;
-	nodemask_t nodes;
-	int node;
-
-	nodes_clear(nodes);
-	/* node has memory ? */
-	for_each_online_node(node)
-		if (NODE_DATA(node)->node_present_pages)
-			node_set(node, nodes);
+	nodemask_t nodes = node_states[N_HIGH_MEMORY];
 
 	for (z = zonelist->zones; *z; z++)
 		if (cpuset_zone_allowed_softwall(*z, gfp_mask))
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 44720363374..d821321326e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -126,7 +126,7 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 	int node;
 	unsigned long x = 0;
 
-	for_each_online_node(node) {
+	for_each_node_state(node, N_HIGH_MEMORY) {
 		struct zone *z =
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
@@ -1022,17 +1022,15 @@ int test_set_page_writeback(struct page *page)
 EXPORT_SYMBOL(test_set_page_writeback);
 
 /*
- * Return true if any of the pages in the mapping are marged with the
+ * Return true if any of the pages in the mapping are marked with the
  * passed tag.
  */
 int mapping_tagged(struct address_space *mapping, int tag)
 {
-	unsigned long flags;
 	int ret;
-
-	read_lock_irqsave(&mapping->tree_lock, flags);
+	rcu_read_lock();
 	ret = radix_tree_tagged(&mapping->page_tree, tag);
-	read_unlock_irqrestore(&mapping->tree_lock, flags);
+	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL(mapping_tagged);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1a8c59571cb..d315e1127dc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -41,24 +41,37 @@
 #include <linux/pfn.h>
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
+#include <linux/page-isolation.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
 #include "internal.h"
 
 /*
- * MCD - HACK: Find somewhere to initialize this EARLY, or make this
- * initializer cleaner
+ * Array of node states.
  */
-nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
-EXPORT_SYMBOL(node_online_map);
-nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
-EXPORT_SYMBOL(node_possible_map);
+nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
+	[N_POSSIBLE] = NODE_MASK_ALL,
+	[N_ONLINE] = { { [0] = 1UL } },
+#ifndef CONFIG_NUMA
+	[N_NORMAL_MEMORY] = { { [0] = 1UL } },
+#ifdef CONFIG_HIGHMEM
+	[N_HIGH_MEMORY] = { { [0] = 1UL } },
+#endif
+	[N_CPU] = { { [0] = 1UL } },
+#endif	/* NUMA */
+};
+EXPORT_SYMBOL(node_states);
+
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
 int percpu_pagelist_fraction;
 
+#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
+int pageblock_order __read_mostly;
+#endif
+
 static void __free_pages_ok(struct page *page, unsigned int order);
 
 /*
@@ -137,7 +150,7 @@ static unsigned long __meminitdata dma_reserve;
   static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
 #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   unsigned long __initdata required_kernelcore;
-  unsigned long __initdata required_movablecore;
+  static unsigned long __initdata required_movablecore;
   unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
 
   /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
@@ -150,6 +163,14 @@ int nr_node_ids __read_mostly = MAX_NUMNODES;
 EXPORT_SYMBOL(nr_node_ids);
 #endif
 
+int page_group_by_mobility_disabled __read_mostly;
+
+static void set_pageblock_migratetype(struct page *page, int migratetype)
+{
+	set_pageblock_flags_group(page, (unsigned long)migratetype,
+					PB_migrate, PB_migrate_end);
+}
+
 #ifdef CONFIG_DEBUG_VM
 static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 {
@@ -293,16 +314,6 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
 		clear_highpage(page + i);
 }
 
-/*
- * function for dealing with page's order in buddy system.
- * zone->lock is already acquired when we use these.
- * So, we don't need atomic page->flags operations here.
- */
-static inline unsigned long page_order(struct page *page)
-{
-	return page_private(page);
-}
-
 static inline void set_page_order(struct page *page, int order)
 {
 	set_page_private(page, order);
@@ -404,6 +415,7 @@ static inline void __free_one_page(struct page *page,
 {
 	unsigned long page_idx;
 	int order_size = 1 << order;
+	int migratetype = get_pageblock_migratetype(page);
 
 	if (unlikely(PageCompound(page)))
 		destroy_compound_page(page, order);
@@ -416,7 +428,6 @@ static inline void __free_one_page(struct page *page,
 	__mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
 	while (order < MAX_ORDER-1) {
 		unsigned long combined_idx;
-		struct free_area *area;
 		struct page *buddy;
 
 		buddy = __page_find_buddy(page, page_idx, order);
@@ -424,8 +435,7 @@ static inline void __free_one_page(struct page *page,
 			break;		/* Move the buddy up one level. */
 
 		list_del(&buddy->lru);
-		area = zone->free_area + order;
-		area->nr_free--;
+		zone->free_area[order].nr_free--;
 		rmv_page_order(buddy);
 		combined_idx = __find_combined_index(page_idx, order);
 		page = page + (combined_idx - page_idx);
@@ -433,7 +443,8 @@ static inline void __free_one_page(struct page *page,
 		order++;
 	}
 	set_page_order(page, order);
-	list_add(&page->lru, &zone->free_area[order].free_list);
+	list_add(&page->lru,
+		&zone->free_area[order].free_list[migratetype]);
 	zone->free_area[order].nr_free++;
 }
 
@@ -567,7 +578,8 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order)
  * -- wli
  */
 static inline void expand(struct zone *zone, struct page *page,
- 	int low, int high, struct free_area *area)
+	int low, int high, struct free_area *area,
+	int migratetype)
 {
 	unsigned long size = 1 << high;
 
@@ -576,7 +588,7 @@ static inline void expand(struct zone *zone, struct page *page,
 		high--;
 		size >>= 1;
 		VM_BUG_ON(bad_range(zone, &page[size]));
-		list_add(&page[size].lru, &area->free_list);
+		list_add(&page[size].lru, &area->free_list[migratetype]);
 		area->nr_free++;
 		set_page_order(&page[size], high);
 	}
@@ -628,49 +640,235 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 	return 0;
 }
 
-/* 
- * Do the hard work of removing an element from the buddy allocator.
- * Call me with the zone->lock already held.
+/*
+ * Go through the free lists for the given migratetype and remove
+ * the smallest available page from the freelists
  */
-static struct page *__rmqueue(struct zone *zone, unsigned int order)
+static struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
+						int migratetype)
 {
-	struct free_area * area;
 	unsigned int current_order;
+	struct free_area * area;
 	struct page *page;
 
+	/* Find a page of the appropriate size in the preferred list */
 	for (current_order = order; current_order < MAX_ORDER; ++current_order) {
-		area = zone->free_area + current_order;
-		if (list_empty(&area->free_list))
+		area = &(zone->free_area[current_order]);
+		if (list_empty(&area->free_list[migratetype]))
 			continue;
 
-		page = list_entry(area->free_list.next, struct page, lru);
+		page = list_entry(area->free_list[migratetype].next,
+							struct page, lru);
 		list_del(&page->lru);
 		rmv_page_order(page);
 		area->nr_free--;
 		__mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
-		expand(zone, page, order, current_order, area);
+		expand(zone, page, order, current_order, area, migratetype);
 		return page;
 	}
 
 	return NULL;
 }
 
+
+/*
+ * This array describes the order lists are fallen back to when
+ * the free lists for the desirable migrate type are depleted
+ */
+static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
+	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE,   MIGRATE_RESERVE },
+	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE,   MIGRATE_RESERVE },
+	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
+	[MIGRATE_RESERVE]     = { MIGRATE_RESERVE,     MIGRATE_RESERVE,   MIGRATE_RESERVE }, /* Never used */
+};
+
+/*
+ * Move the free pages in a range to the free lists of the requested type.
+ * Note that start_page and end_pages are not aligned on a pageblock
+ * boundary. If alignment is required, use move_freepages_block()
+ */
+int move_freepages(struct zone *zone,
+			struct page *start_page, struct page *end_page,
+			int migratetype)
+{
+	struct page *page;
+	unsigned long order;
+	int pages_moved = 0;
+
+#ifndef CONFIG_HOLES_IN_ZONE
+	/*
+	 * page_zone is not safe to call in this context when
+	 * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant
+	 * anyway as we check zone boundaries in move_freepages_block().
+	 * Remove at a later date when no bug reports exist related to
+	 * grouping pages by mobility
+	 */
+	BUG_ON(page_zone(start_page) != page_zone(end_page));
+#endif
+
+	for (page = start_page; page <= end_page;) {
+		if (!pfn_valid_within(page_to_pfn(page))) {
+			page++;
+			continue;
+		}
+
+		if (!PageBuddy(page)) {
+			page++;
+			continue;
+		}
+
+		order = page_order(page);
+		list_del(&page->lru);
+		list_add(&page->lru,
+			&zone->free_area[order].free_list[migratetype]);
+		page += 1 << order;
+		pages_moved += 1 << order;
+	}
+
+	return pages_moved;
+}
+
+int move_freepages_block(struct zone *zone, struct page *page, int migratetype)
+{
+	unsigned long start_pfn, end_pfn;
+	struct page *start_page, *end_page;
+
+	start_pfn = page_to_pfn(page);
+	start_pfn = start_pfn & ~(pageblock_nr_pages-1);
+	start_page = pfn_to_page(start_pfn);
+	end_page = start_page + pageblock_nr_pages - 1;
+	end_pfn = start_pfn + pageblock_nr_pages - 1;
+
+	/* Do not cross zone boundaries */
+	if (start_pfn < zone->zone_start_pfn)
+		start_page = page;
+	if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
+		return 0;
+
+	return move_freepages(zone, start_page, end_page, migratetype);
+}
+
+/* Return the page with the lowest PFN in the list */
+static struct page *min_page(struct list_head *list)
+{
+	unsigned long min_pfn = -1UL;
+	struct page *min_page = NULL, *page;;
+
+	list_for_each_entry(page, list, lru) {
+		unsigned long pfn = page_to_pfn(page);
+		if (pfn < min_pfn) {
+			min_pfn = pfn;
+			min_page = page;
+		}
+	}
+
+	return min_page;
+}
+
+/* Remove an element from the buddy allocator from the fallback list */
+static struct page *__rmqueue_fallback(struct zone *zone, int order,
+						int start_migratetype)
+{
+	struct free_area * area;
+	int current_order;
+	struct page *page;
+	int migratetype, i;
+
+	/* Find the largest possible block of pages in the other list */
+	for (current_order = MAX_ORDER-1; current_order >= order;
+						--current_order) {
+		for (i = 0; i < MIGRATE_TYPES - 1; i++) {
+			migratetype = fallbacks[start_migratetype][i];
+
+			/* MIGRATE_RESERVE handled later if necessary */
+			if (migratetype == MIGRATE_RESERVE)
+				continue;
+
+			area = &(zone->free_area[current_order]);
+			if (list_empty(&area->free_list[migratetype]))
+				continue;
+
+			/* Bias kernel allocations towards low pfns */
+			page = list_entry(area->free_list[migratetype].next,
+					struct page, lru);
+			if (unlikely(start_migratetype != MIGRATE_MOVABLE))
+				page = min_page(&area->free_list[migratetype]);
+			area->nr_free--;
+
+			/*
+			 * If breaking a large block of pages, move all free
+			 * pages to the preferred allocation list. If falling
+			 * back for a reclaimable kernel allocation, be more
+			 * agressive about taking ownership of free pages
+			 */
+			if (unlikely(current_order >= (pageblock_order >> 1)) ||
+					start_migratetype == MIGRATE_RECLAIMABLE) {
+				unsigned long pages;
+				pages = move_freepages_block(zone, page,
+								start_migratetype);
+
+				/* Claim the whole block if over half of it is free */
+				if (pages >= (1 << (pageblock_order-1)))
+					set_pageblock_migratetype(page,
+								start_migratetype);
+
+				migratetype = start_migratetype;
+			}
+
+			/* Remove the page from the freelists */
+			list_del(&page->lru);
+			rmv_page_order(page);
+			__mod_zone_page_state(zone, NR_FREE_PAGES,
+							-(1UL << order));
+
+			if (current_order == pageblock_order)
+				set_pageblock_migratetype(page,
+							start_migratetype);
+
+			expand(zone, page, order, current_order, area, migratetype);
+			return page;
+		}
+	}
+
+	/* Use MIGRATE_RESERVE rather than fail an allocation */
+	return __rmqueue_smallest(zone, order, MIGRATE_RESERVE);
+}
+
+/*
+ * Do the hard work of removing an element from the buddy allocator.
+ * Call me with the zone->lock already held.
+ */
+static struct page *__rmqueue(struct zone *zone, unsigned int order,
+						int migratetype)
+{
+	struct page *page;
+
+	page = __rmqueue_smallest(zone, order, migratetype);
+
+	if (unlikely(!page))
+		page = __rmqueue_fallback(zone, order, migratetype);
+
+	return page;
+}
+
 /* 
  * Obtain a specified number of elements from the buddy allocator, all under
  * a single hold of the lock, for efficiency.  Add them to the supplied list.
  * Returns the number of new pages which were placed at *list.
  */
 static int rmqueue_bulk(struct zone *zone, unsigned int order, 
-			unsigned long count, struct list_head *list)
+			unsigned long count, struct list_head *list,
+			int migratetype)
 {
 	int i;
 	
 	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
-		struct page *page = __rmqueue(zone, order);
+		struct page *page = __rmqueue(zone, order, migratetype);
 		if (unlikely(page == NULL))
 			break;
-		list_add_tail(&page->lru, list);
+		list_add(&page->lru, list);
+		set_page_private(page, migratetype);
 	}
 	spin_unlock(&zone->lock);
 	return i;
@@ -732,7 +930,7 @@ void mark_free_pages(struct zone *zone)
 {
 	unsigned long pfn, max_zone_pfn;
 	unsigned long flags;
-	int order;
+	int order, t;
 	struct list_head *curr;
 
 	if (!zone->spanned_pages)
@@ -749,17 +947,18 @@ void mark_free_pages(struct zone *zone)
 				swsusp_unset_page_free(page);
 		}
 
-	for (order = MAX_ORDER - 1; order >= 0; --order)
-		list_for_each(curr, &zone->free_area[order].free_list) {
+	for_each_migratetype_order(order, t) {
+		list_for_each(curr, &zone->free_area[order].free_list[t]) {
 			unsigned long i;
 
 			pfn = page_to_pfn(list_entry(curr, struct page, lru));
 			for (i = 0; i < (1UL << order); i++)
 				swsusp_set_page_free(pfn_to_page(pfn + i));
 		}
-
+	}
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
+#endif /* CONFIG_PM */
 
 /*
  * Spill all of this CPU's per-cpu pages back into the buddy allocator.
@@ -772,7 +971,25 @@ void drain_local_pages(void)
 	__drain_pages(smp_processor_id());
 	local_irq_restore(flags);	
 }
-#endif /* CONFIG_HIBERNATION */
+
+void smp_drain_local_pages(void *arg)
+{
+	drain_local_pages();
+}
+
+/*
+ * Spill all the per-cpu pages from all CPUs back into the buddy allocator
+ */
+void drain_all_local_pages(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__drain_pages(smp_processor_id());
+	local_irq_restore(flags);
+
+	smp_call_function(smp_drain_local_pages, NULL, 0, 1);
+}
 
 /*
  * Free a 0-order page
@@ -797,6 +1014,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
 	local_irq_save(flags);
 	__count_vm_event(PGFREE);
 	list_add(&page->lru, &pcp->list);
+	set_page_private(page, get_pageblock_migratetype(page));
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
 		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
@@ -846,6 +1064,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
 	struct page *page;
 	int cold = !!(gfp_flags & __GFP_COLD);
 	int cpu;
+	int migratetype = allocflags_to_migratetype(gfp_flags);
 
 again:
 	cpu  = get_cpu();
@@ -856,16 +1075,28 @@ again:
 		local_irq_save(flags);
 		if (!pcp->count) {
 			pcp->count = rmqueue_bulk(zone, 0,
-						pcp->batch, &pcp->list);
+					pcp->batch, &pcp->list, migratetype);
 			if (unlikely(!pcp->count))
 				goto failed;
 		}
-		page = list_entry(pcp->list.next, struct page, lru);
+
+		/* Find a page of the appropriate migrate type */
+		list_for_each_entry(page, &pcp->list, lru)
+			if (page_private(page) == migratetype)
+				break;
+
+		/* Allocate more to the pcp list if necessary */
+		if (unlikely(&page->lru == &pcp->list)) {
+			pcp->count += rmqueue_bulk(zone, 0,
+					pcp->batch, &pcp->list, migratetype);
+			page = list_entry(pcp->list.next, struct page, lru);
+		}
+
 		list_del(&page->lru);
 		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
-		page = __rmqueue(zone, order);
+		page = __rmqueue(zone, order, migratetype);
 		spin_unlock(&zone->lock);
 		if (!page)
 			goto failed;
@@ -1032,7 +1263,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
  *
  * If the zonelist cache is present in the passed in zonelist, then
  * returns a pointer to the allowed node mask (either the current
- * tasks mems_allowed, or node_online_map.)
+ * tasks mems_allowed, or node_states[N_HIGH_MEMORY].)
  *
  * If the zonelist cache is not available for this zonelist, does
  * nothing and returns NULL.
@@ -1061,7 +1292,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
 
 	allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
 					&cpuset_current_mems_allowed :
-					&node_online_map;
+					&node_states[N_HIGH_MEMORY];
 	return allowednodes;
 }
 
@@ -1183,9 +1414,6 @@ zonelist_scan:
 			!zlc_zone_worth_trying(zonelist, z, allowednodes))
 				continue;
 		zone = *z;
-		if (unlikely(NUMA_BUILD && (gfp_mask & __GFP_THISNODE) &&
-			zone->zone_pgdat != zonelist->zones[0]->zone_pgdat))
-				break;
 		if ((alloc_flags & ALLOC_CPUSET) &&
 			!cpuset_zone_allowed_softwall(zone, gfp_mask))
 				goto try_next_zone;
@@ -1254,7 +1482,10 @@ restart:
 	z = zonelist->zones;  /* the list of zones suitable for gfp_mask */
 
 	if (unlikely(*z == NULL)) {
-		/* Should this ever happen?? */
+		/*
+		 * Happens if we have an empty zonelist as a result of
+		 * GFP_THISNODE being used on a memoryless node
+		 */
 		return NULL;
 	}
 
@@ -1346,6 +1577,9 @@ nofail_alloc:
 
 	cond_resched();
 
+	if (order != 0)
+		drain_all_local_pages();
+
 	if (likely(did_some_progress)) {
 		page = get_page_from_freelist(gfp_mask, order,
 						zonelist, alloc_flags);
@@ -1794,7 +2028,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
 		return node;
 	}
 
-	for_each_online_node(n) {
+	for_each_node_state(n, N_HIGH_MEMORY) {
 		cpumask_t tmp;
 
 		/* Don't want a node to appear more than once */
@@ -1850,6 +2084,22 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
 }
 
 /*
+ * Build gfp_thisnode zonelists
+ */
+static void build_thisnode_zonelists(pg_data_t *pgdat)
+{
+	enum zone_type i;
+	int j;
+	struct zonelist *zonelist;
+
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		zonelist = pgdat->node_zonelists + MAX_NR_ZONES + i;
+		j = build_zonelists_node(pgdat, zonelist, 0, i);
+		zonelist->zones[j] = NULL;
+	}
+}
+
+/*
  * Build zonelists ordered by zone and nodes within zones.
  * This results in conserving DMA zone[s] until all Normal memory is
  * exhausted, but results in overflowing to remote node while memory
@@ -1915,7 +2165,8 @@ static int default_zonelist_order(void)
   	 * If there is a node whose DMA/DMA32 memory is very big area on
  	 * local memory, NODE_ORDER may be suitable.
          */
-	average_size = total_size / (num_online_nodes() + 1);
+	average_size = total_size /
+				(nodes_weight(node_states[N_HIGH_MEMORY]) + 1);
 	for_each_online_node(nid) {
 		low_kmem_size = 0;
 		total_size = 0;
@@ -1953,7 +2204,7 @@ static void build_zonelists(pg_data_t *pgdat)
 	int order = current_zonelist_order;
 
 	/* initialize zonelists */
-	for (i = 0; i < MAX_NR_ZONES; i++) {
+	for (i = 0; i < MAX_ZONELISTS; i++) {
 		zonelist = pgdat->node_zonelists + i;
 		zonelist->zones[0] = NULL;
 	}
@@ -1998,6 +2249,8 @@ static void build_zonelists(pg_data_t *pgdat)
 		/* calculate node order -- i.e., DMA last! */
 		build_zonelists_in_zone_order(pgdat, j);
 	}
+
+	build_thisnode_zonelists(pgdat);
 }
 
 /* Construct the zonelist performance cache - see further mmzone.h */
@@ -2078,8 +2331,10 @@ static int __build_all_zonelists(void *dummy)
 	int nid;
 
 	for_each_online_node(nid) {
-		build_zonelists(NODE_DATA(nid));
-		build_zonelist_cache(NODE_DATA(nid));
+		pg_data_t *pgdat = NODE_DATA(nid);
+
+		build_zonelists(pgdat);
+		build_zonelist_cache(pgdat);
 	}
 	return 0;
 }
@@ -2098,9 +2353,23 @@ void build_all_zonelists(void)
 		/* cpuset refresh routine should be here */
 	}
 	vm_total_pages = nr_free_pagecache_pages();
-	printk("Built %i zonelists in %s order.  Total pages: %ld\n",
+	/*
+	 * Disable grouping by mobility if the number of pages in the
+	 * system is too low to allow the mechanism to work. It would be
+	 * more accurate, but expensive to check per-zone. This check is
+	 * made on memory-hotadd so a system can start with mobility
+	 * disabled and enable it later
+	 */
+	if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES))
+		page_group_by_mobility_disabled = 1;
+	else
+		page_group_by_mobility_disabled = 0;
+
+	printk("Built %i zonelists in %s order, mobility grouping %s.  "
+		"Total pages: %ld\n",
 			num_online_nodes(),
 			zonelist_order_name[current_zonelist_order],
+			page_group_by_mobility_disabled ? "off" : "on",
 			vm_total_pages);
 #ifdef CONFIG_NUMA
 	printk("Policy zone: %s\n", zone_names[policy_zone]);
@@ -2176,6 +2445,61 @@ static inline unsigned long wait_table_bits(unsigned long size)
 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
 
 /*
+ * Mark a number of pageblocks as MIGRATE_RESERVE. The number
+ * of blocks reserved is based on zone->pages_min. The memory within the
+ * reserve will tend to store contiguous free pages. Setting min_free_kbytes
+ * higher will lead to a bigger reserve which will get freed as contiguous
+ * blocks as reclaim kicks in
+ */
+static void setup_zone_migrate_reserve(struct zone *zone)
+{
+	unsigned long start_pfn, pfn, end_pfn;
+	struct page *page;
+	unsigned long reserve, block_migratetype;
+
+	/* Get the start pfn, end pfn and the number of blocks to reserve */
+	start_pfn = zone->zone_start_pfn;
+	end_pfn = start_pfn + zone->spanned_pages;
+	reserve = roundup(zone->pages_min, pageblock_nr_pages) >>
+							pageblock_order;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+		if (!pfn_valid(pfn))
+			continue;
+		page = pfn_to_page(pfn);
+
+		/* Blocks with reserved pages will never free, skip them. */
+		if (PageReserved(page))
+			continue;
+
+		block_migratetype = get_pageblock_migratetype(page);
+
+		/* If this block is reserved, account for it */
+		if (reserve > 0 && block_migratetype == MIGRATE_RESERVE) {
+			reserve--;
+			continue;
+		}
+
+		/* Suitable for reserving if this block is movable */
+		if (reserve > 0 && block_migratetype == MIGRATE_MOVABLE) {
+			set_pageblock_migratetype(page, MIGRATE_RESERVE);
+			move_freepages_block(zone, page, MIGRATE_RESERVE);
+			reserve--;
+			continue;
+		}
+
+		/*
+		 * If the reserve is met and this is a previous reserved block,
+		 * take it back
+		 */
+		if (block_migratetype == MIGRATE_RESERVE) {
+			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+			move_freepages_block(zone, page, MIGRATE_MOVABLE);
+		}
+	}
+}
+
+/*
  * Initially all pages are reserved - free ones are freed
  * up by free_all_bootmem() once the early boot process is
  * done. Non-atomic initialization, single-pass.
@@ -2204,6 +2528,19 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		init_page_count(page);
 		reset_page_mapcount(page);
 		SetPageReserved(page);
+
+		/*
+		 * Mark the block movable so that blocks are reserved for
+		 * movable at startup. This will force kernel allocations
+		 * to reserve their blocks rather than leaking throughout
+		 * the address space during boot when many long-lived
+		 * kernel allocations are made. Later some blocks near
+		 * the start are marked MIGRATE_RESERVE by
+		 * setup_zone_migrate_reserve()
+		 */
+		if ((pfn & (pageblock_nr_pages-1)))
+			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+
 		INIT_LIST_HEAD(&page->lru);
 #ifdef WANT_PAGE_VIRTUAL
 		/* The shift won't overflow because ZONE_NORMAL is below 4G. */
@@ -2216,9 +2553,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 static void __meminit zone_init_free_lists(struct pglist_data *pgdat,
 				struct zone *zone, unsigned long size)
 {
-	int order;
-	for (order = 0; order < MAX_ORDER ; order++) {
-		INIT_LIST_HEAD(&zone->free_area[order].free_list);
+	int order, t;
+	for_each_migratetype_order(order, t) {
+		INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
 		zone->free_area[order].nr_free = 0;
 	}
 }
@@ -2324,6 +2661,9 @@ static struct per_cpu_pageset boot_pageset[NR_CPUS];
 static int __cpuinit process_zones(int cpu)
 {
 	struct zone *zone, *dzone;
+	int node = cpu_to_node(cpu);
+
+	node_set_state(node, N_CPU);	/* this node has a cpu */
 
 	for_each_zone(zone) {
 
@@ -2331,7 +2671,7 @@ static int __cpuinit process_zones(int cpu)
 			continue;
 
 		zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
-					 GFP_KERNEL, cpu_to_node(cpu));
+					 GFP_KERNEL, node);
 		if (!zone_pcp(zone, cpu))
 			goto bad;
 
@@ -2444,7 +2784,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 		 * To use this new node's memory, further consideration will be
 		 * necessary.
 		 */
-		zone->wait_table = (wait_queue_head_t *)vmalloc(alloc_size);
+		zone->wait_table = vmalloc(alloc_size);
 	}
 	if (!zone->wait_table)
 		return -ENOMEM;
@@ -2680,10 +3020,8 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
 		*end_pfn = max(*end_pfn, early_node_map[i].end_pfn);
 	}
 
-	if (*start_pfn == -1UL) {
-		printk(KERN_WARNING "Node %u active with no memory\n", nid);
+	if (*start_pfn == -1UL)
 		*start_pfn = 0;
-	}
 
 	/* Push the node boundaries out if requested */
 	account_node_boundary(nid, start_pfn, end_pfn);
@@ -2901,6 +3239,62 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
 							realtotalpages);
 }
 
+#ifndef CONFIG_SPARSEMEM
+/*
+ * Calculate the size of the zone->blockflags rounded to an unsigned long
+ * Start by making sure zonesize is a multiple of pageblock_order by rounding
+ * up. Then use 1 NR_PAGEBLOCK_BITS worth of bits per pageblock, finally
+ * round what is now in bits to nearest long in bits, then return it in
+ * bytes.
+ */
+static unsigned long __init usemap_size(unsigned long zonesize)
+{
+	unsigned long usemapsize;
+
+	usemapsize = roundup(zonesize, pageblock_nr_pages);
+	usemapsize = usemapsize >> pageblock_order;
+	usemapsize *= NR_PAGEBLOCK_BITS;
+	usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long));
+
+	return usemapsize / 8;
+}
+
+static void __init setup_usemap(struct pglist_data *pgdat,
+				struct zone *zone, unsigned long zonesize)
+{
+	unsigned long usemapsize = usemap_size(zonesize);
+	zone->pageblock_flags = NULL;
+	if (usemapsize) {
+		zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
+		memset(zone->pageblock_flags, 0, usemapsize);
+	}
+}
+#else
+static void inline setup_usemap(struct pglist_data *pgdat,
+				struct zone *zone, unsigned long zonesize) {}
+#endif /* CONFIG_SPARSEMEM */
+
+#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
+/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
+static inline void __init set_pageblock_order(unsigned int order)
+{
+	/* Check that pageblock_nr_pages has not already been setup */
+	if (pageblock_order)
+		return;
+
+	/*
+	 * Assume the largest contiguous order of interest is a huge page.
+	 * This value may be variable depending on boot parameters on IA64
+	 */
+	pageblock_order = order;
+}
+#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
+
+/* Defined this way to avoid accidently referencing HUGETLB_PAGE_ORDER */
+#define set_pageblock_order(x)	do {} while (0)
+
+#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
+
 /*
  * Set up the zone data structures:
  *   - mark all pages reserved
@@ -2981,6 +3375,8 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		if (!size)
 			continue;
 
+		set_pageblock_order(HUGETLB_PAGE_ORDER);
+		setup_usemap(pgdat, zone, size);
 		ret = init_currently_empty_zone(zone, zone_start_pfn,
 						size, MEMMAP_EARLY);
 		BUG_ON(ret);
@@ -3234,16 +3630,24 @@ unsigned long __init find_max_pfn_with_active_regions(void)
 	return max_pfn;
 }
 
-unsigned long __init early_calculate_totalpages(void)
+/*
+ * early_calculate_totalpages()
+ * Sum pages in active regions for movable zone.
+ * Populate N_HIGH_MEMORY for calculating usable_nodes.
+ */
+static unsigned long __init early_calculate_totalpages(void)
 {
 	int i;
 	unsigned long totalpages = 0;
 
-	for (i = 0; i < nr_nodemap_entries; i++)
-		totalpages += early_node_map[i].end_pfn -
+	for (i = 0; i < nr_nodemap_entries; i++) {
+		unsigned long pages = early_node_map[i].end_pfn -
 						early_node_map[i].start_pfn;
-
-	return totalpages;
+		totalpages += pages;
+		if (pages)
+			node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
+	}
+  	return totalpages;
 }
 
 /*
@@ -3257,7 +3661,8 @@ void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
 	int i, nid;
 	unsigned long usable_startpfn;
 	unsigned long kernelcore_node, kernelcore_remaining;
-	int usable_nodes = num_online_nodes();
+	unsigned long totalpages = early_calculate_totalpages();
+	int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
 
 	/*
 	 * If movablecore was specified, calculate what size of
@@ -3268,7 +3673,6 @@ void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
 	 * what movablecore would have allowed.
 	 */
 	if (required_movablecore) {
-		unsigned long totalpages = early_calculate_totalpages();
 		unsigned long corepages;
 
 		/*
@@ -3293,7 +3697,7 @@ void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn)
 restart:
 	/* Spread kernelcore memory as evenly as possible throughout nodes */
 	kernelcore_node = required_kernelcore / usable_nodes;
-	for_each_online_node(nid) {
+	for_each_node_state(nid, N_HIGH_MEMORY) {
 		/*
 		 * Recalculate kernelcore_node if the division per node
 		 * now exceeds what is necessary to satisfy the requested
@@ -3385,6 +3789,20 @@ restart:
 			roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
 }
 
+/* Any regular memory on that node ? */
+static void check_for_regular_memory(pg_data_t *pgdat)
+{
+#ifdef CONFIG_HIGHMEM
+	enum zone_type zone_type;
+
+	for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {
+		struct zone *zone = &pgdat->node_zones[zone_type];
+		if (zone->present_pages)
+			node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
+	}
+#endif
+}
+
 /**
  * free_area_init_nodes - Initialise all pg_data_t and zone data
  * @max_zone_pfn: an array of max PFNs for each zone
@@ -3459,6 +3877,11 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 		pg_data_t *pgdat = NODE_DATA(nid);
 		free_area_init_node(nid, pgdat, NULL,
 				find_min_pfn_for_node(nid), NULL);
+
+		/* Any memory on that node */
+		if (pgdat->node_present_pages)
+			node_set_state(nid, N_HIGH_MEMORY);
+		check_for_regular_memory(pgdat);
 	}
 }
 
@@ -3673,6 +4096,7 @@ void setup_per_zone_pages_min(void)
 
 		zone->pages_low   = zone->pages_min + (tmp >> 2);
 		zone->pages_high  = zone->pages_min + (tmp >> 1);
+		setup_zone_migrate_reserve(zone);
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 
@@ -3934,4 +4358,169 @@ EXPORT_SYMBOL(pfn_to_page);
 EXPORT_SYMBOL(page_to_pfn);
 #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
 
+/* Return a pointer to the bitmap storing bits affecting a block of pages */
+static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
+							unsigned long pfn)
+{
+#ifdef CONFIG_SPARSEMEM
+	return __pfn_to_section(pfn)->pageblock_flags;
+#else
+	return zone->pageblock_flags;
+#endif /* CONFIG_SPARSEMEM */
+}
+
+static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
+{
+#ifdef CONFIG_SPARSEMEM
+	pfn &= (PAGES_PER_SECTION-1);
+	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
+#else
+	pfn = pfn - zone->zone_start_pfn;
+	return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
+#endif /* CONFIG_SPARSEMEM */
+}
+
+/**
+ * get_pageblock_flags_group - Return the requested group of flags for the pageblock_nr_pages block of pages
+ * @page: The page within the block of interest
+ * @start_bitidx: The first bit of interest to retrieve
+ * @end_bitidx: The last bit of interest
+ * returns pageblock_bits flags
+ */
+unsigned long get_pageblock_flags_group(struct page *page,
+					int start_bitidx, int end_bitidx)
+{
+	struct zone *zone;
+	unsigned long *bitmap;
+	unsigned long pfn, bitidx;
+	unsigned long flags = 0;
+	unsigned long value = 1;
+
+	zone = page_zone(page);
+	pfn = page_to_pfn(page);
+	bitmap = get_pageblock_bitmap(zone, pfn);
+	bitidx = pfn_to_bitidx(zone, pfn);
+
+	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
+		if (test_bit(bitidx + start_bitidx, bitmap))
+			flags |= value;
+
+	return flags;
+}
 
+/**
+ * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages
+ * @page: The page within the block of interest
+ * @start_bitidx: The first bit of interest
+ * @end_bitidx: The last bit of interest
+ * @flags: The flags to set
+ */
+void set_pageblock_flags_group(struct page *page, unsigned long flags,
+					int start_bitidx, int end_bitidx)
+{
+	struct zone *zone;
+	unsigned long *bitmap;
+	unsigned long pfn, bitidx;
+	unsigned long value = 1;
+
+	zone = page_zone(page);
+	pfn = page_to_pfn(page);
+	bitmap = get_pageblock_bitmap(zone, pfn);
+	bitidx = pfn_to_bitidx(zone, pfn);
+
+	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
+		if (flags & value)
+			__set_bit(bitidx + start_bitidx, bitmap);
+		else
+			__clear_bit(bitidx + start_bitidx, bitmap);
+}
+
+/*
+ * This is designed as sub function...plz see page_isolation.c also.
+ * set/clear page block's type to be ISOLATE.
+ * page allocater never alloc memory from ISOLATE block.
+ */
+
+int set_migratetype_isolate(struct page *page)
+{
+	struct zone *zone;
+	unsigned long flags;
+	int ret = -EBUSY;
+
+	zone = page_zone(page);
+	spin_lock_irqsave(&zone->lock, flags);
+	/*
+	 * In future, more migrate types will be able to be isolation target.
+	 */
+	if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
+		goto out;
+	set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+	move_freepages_block(zone, page, MIGRATE_ISOLATE);
+	ret = 0;
+out:
+	spin_unlock_irqrestore(&zone->lock, flags);
+	if (!ret)
+		drain_all_local_pages();
+	return ret;
+}
+
+void unset_migratetype_isolate(struct page *page)
+{
+	struct zone *zone;
+	unsigned long flags;
+	zone = page_zone(page);
+	spin_lock_irqsave(&zone->lock, flags);
+	if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
+		goto out;
+	set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+	move_freepages_block(zone, page, MIGRATE_MOVABLE);
+out:
+	spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+/*
+ * All pages in the range must be isolated before calling this.
+ */
+void
+__offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct page *page;
+	struct zone *zone;
+	int order, i;
+	unsigned long pfn;
+	unsigned long flags;
+	/* find the first valid pfn */
+	for (pfn = start_pfn; pfn < end_pfn; pfn++)
+		if (pfn_valid(pfn))
+			break;
+	if (pfn == end_pfn)
+		return;
+	zone = page_zone(pfn_to_page(pfn));
+	spin_lock_irqsave(&zone->lock, flags);
+	pfn = start_pfn;
+	while (pfn < end_pfn) {
+		if (!pfn_valid(pfn)) {
+			pfn++;
+			continue;
+		}
+		page = pfn_to_page(pfn);
+		BUG_ON(page_count(page));
+		BUG_ON(!PageBuddy(page));
+		order = page_order(page);
+#ifdef CONFIG_DEBUG_VM
+		printk(KERN_INFO "remove from free list %lx %d %lx\n",
+		       pfn, 1 << order, end_pfn);
+#endif
+		list_del(&page->lru);
+		rmv_page_order(page);
+		zone->free_area[order].nr_free--;
+		__mod_zone_page_state(zone, NR_FREE_PAGES,
+				      - (1UL << order));
+		for (i = 0; i < (1 << order); i++)
+			SetPageReserved((page+i));
+		pfn += (1 << order);
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+}
+#endif
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
new file mode 100644
index 00000000000..8f92a29695c
--- /dev/null
+++ b/mm/page_isolation.c
@@ -0,0 +1,138 @@
+/*
+ * linux/mm/page_isolation.c
+ */
+
+#include <stddef.h>
+#include <linux/mm.h>
+#include <linux/page-isolation.h>
+#include <linux/pageblock-flags.h>
+#include "internal.h"
+
+static inline struct page *
+__first_valid_page(unsigned long pfn, unsigned long nr_pages)
+{
+	int i;
+	for (i = 0; i < nr_pages; i++)
+		if (pfn_valid_within(pfn + i))
+			break;
+	if (unlikely(i == nr_pages))
+		return NULL;
+	return pfn_to_page(pfn + i);
+}
+
+/*
+ * start_isolate_page_range() -- make page-allocation-type of range of pages
+ * to be MIGRATE_ISOLATE.
+ * @start_pfn: The lower PFN of the range to be isolated.
+ * @end_pfn: The upper PFN of the range to be isolated.
+ *
+ * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
+ * the range will never be allocated. Any free pages and pages freed in the
+ * future will not be allocated again.
+ *
+ * start_pfn/end_pfn must be aligned to pageblock_order.
+ * Returns 0 on success and -EBUSY if any part of range cannot be isolated.
+ */
+int
+start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+	unsigned long undo_pfn;
+	struct page *page;
+
+	BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
+	BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
+
+	for (pfn = start_pfn;
+	     pfn < end_pfn;
+	     pfn += pageblock_nr_pages) {
+		page = __first_valid_page(pfn, pageblock_nr_pages);
+		if (page && set_migratetype_isolate(page)) {
+			undo_pfn = pfn;
+			goto undo;
+		}
+	}
+	return 0;
+undo:
+	for (pfn = start_pfn;
+	     pfn <= undo_pfn;
+	     pfn += pageblock_nr_pages)
+		unset_migratetype_isolate(pfn_to_page(pfn));
+
+	return -EBUSY;
+}
+
+/*
+ * Make isolated pages available again.
+ */
+int
+undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+	struct page *page;
+	BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
+	BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
+	for (pfn = start_pfn;
+	     pfn < end_pfn;
+	     pfn += pageblock_nr_pages) {
+		page = __first_valid_page(pfn, pageblock_nr_pages);
+		if (!page || get_pageblock_flags(page) != MIGRATE_ISOLATE)
+			continue;
+		unset_migratetype_isolate(page);
+	}
+	return 0;
+}
+/*
+ * Test all pages in the range is free(means isolated) or not.
+ * all pages in [start_pfn...end_pfn) must be in the same zone.
+ * zone->lock must be held before call this.
+ *
+ * Returns 0 if all pages in the range is isolated.
+ */
+static int
+__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
+{
+	struct page *page;
+
+	while (pfn < end_pfn) {
+		if (!pfn_valid_within(pfn)) {
+			pfn++;
+			continue;
+		}
+		page = pfn_to_page(pfn);
+		if (PageBuddy(page))
+			pfn += 1 << page_order(page);
+		else if (page_count(page) == 0 &&
+				page_private(page) == MIGRATE_ISOLATE)
+			pfn += 1;
+		else
+			break;
+	}
+	if (pfn < end_pfn)
+		return 0;
+	return 1;
+}
+
+int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long pfn;
+	struct page *page;
+
+	pfn = start_pfn;
+	/*
+	 * Note: pageblock_nr_page != MAX_ORDER. Then, chunks of free page
+	 * is not aligned to pageblock_nr_pages.
+	 * Then we just check pagetype fist.
+	 */
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+		page = __first_valid_page(pfn, pageblock_nr_pages);
+		if (page && get_pageblock_flags(page) != MIGRATE_ISOLATE)
+			break;
+	}
+	if (pfn < end_pfn)
+		return -EBUSY;
+	/* Check all pages are free or Marked as ISOLATED */
+	if (__test_page_isolated_in_pageblock(start_pfn, end_pfn))
+		return 0;
+	return -EBUSY;
+}
diff --git a/mm/readahead.c b/mm/readahead.c
index be20c9d699d..22978888401 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -22,16 +22,8 @@ void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 }
 EXPORT_SYMBOL(default_unplug_io_fn);
 
-/*
- * Convienent macros for min/max read-ahead pages.
- * Note that MAX_RA_PAGES is rounded down, while MIN_RA_PAGES is rounded up.
- * The latter is necessary for systems with large page size(i.e. 64k).
- */
-#define MAX_RA_PAGES	(VM_MAX_READAHEAD*1024 / PAGE_CACHE_SIZE)
-#define MIN_RA_PAGES	DIV_ROUND_UP(VM_MIN_READAHEAD*1024, PAGE_CACHE_SIZE)
-
 struct backing_dev_info default_backing_dev_info = {
-	.ra_pages	= MAX_RA_PAGES,
+	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
 	.unplug_io_fn	= default_unplug_io_fn,
@@ -46,7 +38,7 @@ void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
 {
 	ra->ra_pages = mapping->backing_dev_info->ra_pages;
-	ra->prev_index = -1;
+	ra->prev_pos = -1;
 }
 EXPORT_SYMBOL_GPL(file_ra_state_init);
 
@@ -66,28 +58,25 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
 			int (*filler)(void *, struct page *), void *data)
 {
 	struct page *page;
-	struct pagevec lru_pvec;
 	int ret = 0;
 
-	pagevec_init(&lru_pvec, 0);
-
 	while (!list_empty(pages)) {
 		page = list_to_page(pages);
 		list_del(&page->lru);
-		if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) {
+		if (add_to_page_cache_lru(page, mapping,
+					page->index, GFP_KERNEL)) {
 			page_cache_release(page);
 			continue;
 		}
+		page_cache_release(page);
+
 		ret = filler(data, page);
-		if (!pagevec_add(&lru_pvec, page))
-			__pagevec_lru_add(&lru_pvec);
-		if (ret) {
+		if (unlikely(ret)) {
 			put_pages_list(pages);
 			break;
 		}
 		task_io_account_read(PAGE_CACHE_SIZE);
 	}
-	pagevec_lru_add(&lru_pvec);
 	return ret;
 }
 
@@ -97,7 +86,6 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 		struct list_head *pages, unsigned nr_pages)
 {
 	unsigned page_idx;
-	struct pagevec lru_pvec;
 	int ret;
 
 	if (mapping->a_ops->readpages) {
@@ -107,19 +95,15 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 		goto out;
 	}
 
-	pagevec_init(&lru_pvec, 0);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_to_page(pages);
 		list_del(&page->lru);
-		if (!add_to_page_cache(page, mapping,
+		if (!add_to_page_cache_lru(page, mapping,
 					page->index, GFP_KERNEL)) {
 			mapping->a_ops->readpage(filp, page);
-			if (!pagevec_add(&lru_pvec, page))
-				__pagevec_lru_add(&lru_pvec);
-		} else
-			page_cache_release(page);
+		}
+		page_cache_release(page);
 	}
-	pagevec_lru_add(&lru_pvec);
 	ret = 0;
 out:
 	return ret;
@@ -157,20 +141,19 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 	/*
 	 * Preallocate as many pages as we will need.
 	 */
-	read_lock_irq(&mapping->tree_lock);
 	for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
 		pgoff_t page_offset = offset + page_idx;
 
 		if (page_offset > end_index)
 			break;
 
+		rcu_read_lock();
 		page = radix_tree_lookup(&mapping->page_tree, page_offset);
+		rcu_read_unlock();
 		if (page)
 			continue;
 
-		read_unlock_irq(&mapping->tree_lock);
 		page = page_cache_alloc_cold(mapping);
-		read_lock_irq(&mapping->tree_lock);
 		if (!page)
 			break;
 		page->index = page_offset;
@@ -179,7 +162,6 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			SetPageReadahead(page);
 		ret++;
 	}
-	read_unlock_irq(&mapping->tree_lock);
 
 	/*
 	 * Now start the IO.  We ignore I/O errors - if the page is not
@@ -327,7 +309,7 @@ static unsigned long get_next_ra_size(struct file_ra_state *ra,
  * indicator. The flag won't be set on already cached pages, to avoid the
  * readahead-for-nothing fuss, saving pointless page cache lookups.
  *
- * prev_index tracks the last visited page in the _previous_ read request.
+ * prev_pos tracks the last visited byte in the _previous_ read request.
  * It should be maintained by the caller, and will be used for detecting
  * small random reads. Note that the readahead algorithm checks loosely
  * for sequential patterns. Hence interleaved reads might be served as
@@ -351,11 +333,9 @@ ondemand_readahead(struct address_space *mapping,
 		   bool hit_readahead_marker, pgoff_t offset,
 		   unsigned long req_size)
 {
-	unsigned long max;	/* max readahead pages */
-	int sequential;
-
-	max = ra->ra_pages;
-	sequential = (offset - ra->prev_index <= 1UL) || (req_size > max);
+	int	max = ra->ra_pages;	/* max readahead pages */
+	pgoff_t prev_offset;
+	int	sequential;
 
 	/*
 	 * It's the expected callback offset, assume sequential access.
@@ -369,6 +349,9 @@ ondemand_readahead(struct address_space *mapping,
 		goto readit;
 	}
 
+	prev_offset = ra->prev_pos >> PAGE_CACHE_SHIFT;
+	sequential = offset - prev_offset <= 1UL || req_size > max;
+
 	/*
 	 * Standalone, small read.
 	 * Read as is, and do not pollute the readahead state.
@@ -379,6 +362,29 @@ ondemand_readahead(struct address_space *mapping,
 	}
 
 	/*
+	 * Hit a marked page without valid readahead state.
+	 * E.g. interleaved reads.
+	 * Query the pagecache for async_size, which normally equals to
+	 * readahead size. Ramp it up and use it as the new readahead size.
+	 */
+	if (hit_readahead_marker) {
+		pgoff_t start;
+
+		read_lock_irq(&mapping->tree_lock);
+		start = radix_tree_next_hole(&mapping->page_tree, offset, max+1);
+		read_unlock_irq(&mapping->tree_lock);
+
+		if (!start || start - offset > max)
+			return 0;
+
+		ra->start = start;
+		ra->size = start - offset;	/* old async_size */
+		ra->size = get_next_ra_size(ra, max);
+		ra->async_size = ra->size;
+		goto readit;
+	}
+
+	/*
 	 * It may be one of
 	 * 	- first read on start of file
 	 * 	- sequential cache miss
@@ -389,16 +395,6 @@ ondemand_readahead(struct address_space *mapping,
 	ra->size = get_init_ra_size(req_size, max);
 	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
 
-	/*
-	 * Hit on a marked page without valid readahead state.
-	 * E.g. interleaved reads.
-	 * Not knowing its readahead pos/size, bet on the minimal possible one.
-	 */
-	if (hit_readahead_marker) {
-		ra->start++;
-		ra->size = get_next_ra_size(ra, max);
-	}
-
 readit:
 	return ra_submit(ra, mapping, filp);
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 41ac39749ef..2b9f413c9c0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -436,7 +436,6 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
 		entry = pte_wrprotect(entry);
 		entry = pte_mkclean(entry);
 		set_pte_at(mm, address, pte, entry);
-		lazy_mmu_prot_update(entry);
 		ret = 1;
 	}
 
diff --git a/mm/shmem.c b/mm/shmem.c
index fcd19d323f9..8a82342a859 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -49,7 +49,6 @@
 #include <linux/ctype.h>
 #include <linux/migrate.h>
 #include <linux/highmem.h>
-#include <linux/backing-dev.h>
 
 #include <asm/uaccess.h>
 #include <asm/div64.h>
@@ -96,9 +95,9 @@ static inline struct page *shmem_dir_alloc(gfp_t gfp_mask)
 	 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
 	 * might be reconsidered if it ever diverges from PAGE_SIZE.
 	 *
-	 * __GFP_MOVABLE is masked out as swap vectors cannot move
+	 * Mobility flags are masked out as swap vectors cannot move
 	 */
-	return alloc_pages((gfp_mask & ~__GFP_MOVABLE) | __GFP_ZERO,
+	return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
 				PAGE_CACHE_SHIFT-PAGE_SHIFT);
 }
 
@@ -972,7 +971,7 @@ static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_
 		*nodelist++ = '\0';
 		if (nodelist_parse(nodelist, *policy_nodes))
 			goto out;
-		if (!nodes_subset(*policy_nodes, node_online_map))
+		if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY]))
 			goto out;
 	}
 	if (!strcmp(value, "default")) {
@@ -997,9 +996,11 @@ static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_
 			err = 0;
 	} else if (!strcmp(value, "interleave")) {
 		*policy = MPOL_INTERLEAVE;
-		/* Default to nodes online if no nodelist */
+		/*
+		 * Default to online nodes with memory if no nodelist
+		 */
 		if (!nodelist)
-			*policy_nodes = node_online_map;
+			*policy_nodes = node_states[N_HIGH_MEMORY];
 		err = 0;
 	}
 out:
@@ -1025,8 +1026,8 @@ static struct page *shmem_swapin_async(struct shared_policy *p,
 	return page;
 }
 
-struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry,
-			  unsigned long idx)
+static struct page *shmem_swapin(struct shmem_inode_info *info,
+				 swp_entry_t entry, unsigned long idx)
 {
 	struct shared_policy *p = &info->policy;
 	int i, num;
@@ -1061,7 +1062,8 @@ shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info,
 	return page;
 }
 #else
-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+static inline int shmem_parse_mpol(char *value, int *policy,
+						nodemask_t *policy_nodes)
 {
 	return 1;
 }
@@ -1109,7 +1111,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 	 * Normally, filepage is NULL on entry, and either found
 	 * uptodate immediately, or allocated and zeroed, or read
 	 * in under swappage, which is then assigned to filepage.
-	 * But shmem_readpage and shmem_prepare_write pass in a locked
+	 * But shmem_readpage and shmem_write_begin pass in a locked
 	 * filepage, which may be found not uptodate by other callers
 	 * too, and may need to be copied from the swappage read in.
 	 */
@@ -1327,14 +1329,14 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 #ifdef CONFIG_NUMA
-int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
 {
 	struct inode *i = vma->vm_file->f_path.dentry->d_inode;
 	return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new);
 }
 
-struct mempolicy *
-shmem_get_policy(struct vm_area_struct *vma, unsigned long addr)
+static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
+					  unsigned long addr)
 {
 	struct inode *i = vma->vm_file->f_path.dentry->d_inode;
 	unsigned long idx;
@@ -1446,7 +1448,7 @@ static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_symlink_inline_operations;
 
 /*
- * Normally tmpfs avoids the use of shmem_readpage and shmem_prepare_write;
+ * Normally tmpfs avoids the use of shmem_readpage and shmem_write_begin;
  * but providing them allows a tmpfs file to be used for splice, sendfile, and
  * below the loop driver, in the generic fashion that many filesystems support.
  */
@@ -1459,10 +1461,30 @@ static int shmem_readpage(struct file *file, struct page *page)
 }
 
 static int
-shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+shmem_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
-	struct inode *inode = page->mapping->host;
-	return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL);
+	struct inode *inode = mapping->host;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	*pagep = NULL;
+	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+}
+
+static int
+shmem_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
+{
+	struct inode *inode = mapping->host;
+
+	set_page_dirty(page);
+	page_cache_release(page);
+
+	if (pos+copied > inode->i_size)
+		i_size_write(inode, pos+copied);
+
+	return copied;
 }
 
 static ssize_t
@@ -2219,7 +2241,7 @@ static int shmem_fill_super(struct super_block *sb,
 	unsigned long blocks = 0;
 	unsigned long inodes = 0;
 	int policy = MPOL_DEFAULT;
-	nodemask_t policy_nodes = node_online_map;
+	nodemask_t policy_nodes = node_states[N_HIGH_MEMORY];
 
 #ifdef CONFIG_TMPFS
 	/*
@@ -2338,8 +2360,8 @@ static const struct address_space_operations shmem_aops = {
 	.set_page_dirty	= __set_page_dirty_no_writeback,
 #ifdef CONFIG_TMPFS
 	.readpage	= shmem_readpage,
-	.prepare_write	= shmem_prepare_write,
-	.commit_write	= simple_commit_write,
+	.write_begin	= shmem_write_begin,
+	.write_end	= shmem_write_end,
 #endif
 	.migratepage	= migrate_page,
 };
diff --git a/mm/slab.c b/mm/slab.c
index 6f6abef83a1..e34bcb87a6e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1568,7 +1568,7 @@ void __init kmem_cache_init(void)
 		/* Replace the static kmem_list3 structures for the boot cpu */
 		init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node);
 
-		for_each_online_node(nid) {
+		for_each_node_state(nid, N_NORMAL_MEMORY) {
 			init_list(malloc_sizes[INDEX_AC].cs_cachep,
 				  &initkmem_list3[SIZE_AC + nid], nid);
 
@@ -1643,6 +1643,8 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 #endif
 
 	flags |= cachep->gfpflags;
+	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+		flags |= __GFP_RECLAIMABLE;
 
 	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
 	if (!page)
@@ -1944,7 +1946,7 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index)
 {
 	int node;
 
-	for_each_online_node(node) {
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 		cachep->nodelists[node] = &initkmem_list3[index + node];
 		cachep->nodelists[node]->next_reap = jiffies +
 		    REAPTIMEOUT_LIST3 +
@@ -2075,7 +2077,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
 			g_cpucache_up = PARTIAL_L3;
 		} else {
 			int node;
-			for_each_online_node(node) {
+			for_each_node_state(node, N_NORMAL_MEMORY) {
 				cachep->nodelists[node] =
 				    kmalloc_node(sizeof(struct kmem_list3),
 						GFP_KERNEL, node);
@@ -2746,9 +2748,9 @@ static int cache_grow(struct kmem_cache *cachep,
 	 * Be lazy and only check for valid flags here,  keeping it out of the
 	 * critical path in kmem_cache_alloc().
 	 */
-	BUG_ON(flags & ~(GFP_DMA | __GFP_ZERO | GFP_LEVEL_MASK));
+	BUG_ON(flags & GFP_SLAB_BUG_MASK);
+	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
 
-	local_flags = (flags & GFP_LEVEL_MASK);
 	/* Take the l3 list lock to change the colour_next on this node */
 	check_irq_off();
 	l3 = cachep->nodelists[nodeid];
@@ -2785,7 +2787,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
 	/* Get slab management. */
 	slabp = alloc_slabmgmt(cachep, objp, offset,
-			local_flags & ~GFP_THISNODE, nodeid);
+			local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
 	if (!slabp)
 		goto opps1;
 
@@ -3225,7 +3227,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
 
 	zonelist = &NODE_DATA(slab_node(current->mempolicy))
 			->node_zonelists[gfp_zone(flags)];
-	local_flags = (flags & GFP_LEVEL_MASK);
+	local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
 
 retry:
 	/*
@@ -3792,7 +3794,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
 	struct array_cache *new_shared;
 	struct array_cache **new_alien = NULL;
 
-	for_each_online_node(node) {
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 
                 if (use_alien_caches) {
                         new_alien = alloc_alien_cache(node, cachep->limit);
@@ -4446,7 +4448,8 @@ const struct seq_operations slabstats_op = {
  */
 size_t ksize(const void *objp)
 {
-	if (unlikely(ZERO_OR_NULL_PTR(objp)))
+	BUG_ON(!objp);
+	if (unlikely(objp == ZERO_SIZE_PTR))
 		return 0;
 
 	return obj_size(virt_to_cache(objp));
diff --git a/mm/slob.c b/mm/slob.c
index ec33fcdc852..de5d5563a46 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -360,7 +360,7 @@ static void slob_free(void *block, int size)
 	slobidx_t units;
 	unsigned long flags;
 
-	if (ZERO_OR_NULL_PTR(block))
+	if (unlikely(ZERO_OR_NULL_PTR(block)))
 		return;
 	BUG_ON(!size);
 
@@ -466,7 +466,7 @@ void kfree(const void *block)
 {
 	struct slob_page *sp;
 
-	if (ZERO_OR_NULL_PTR(block))
+	if (unlikely(ZERO_OR_NULL_PTR(block)))
 		return;
 
 	sp = (struct slob_page *)virt_to_page(block);
@@ -484,7 +484,8 @@ size_t ksize(const void *block)
 {
 	struct slob_page *sp;
 
-	if (ZERO_OR_NULL_PTR(block))
+	BUG_ON(!block);
+	if (unlikely(block == ZERO_SIZE_PTR))
 		return 0;
 
 	sp = (struct slob_page *)virt_to_page(block);
diff --git a/mm/slub.c b/mm/slub.c
index addb20a6d67..f426f9bc644 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -90,7 +90,7 @@
  * 			One use of this flag is to mark slabs that are
  * 			used for allocations. Then such a slab becomes a cpu
  * 			slab. The cpu slab may be equipped with an additional
- * 			lockless_freelist that allows lockless access to
+ * 			freelist that allows lockless access to
  * 			free objects in addition to the regular freelist
  * 			that requires the slab lock.
  *
@@ -140,11 +140,6 @@ static inline void ClearSlabDebug(struct page *page)
 /*
  * Issues still to be resolved:
  *
- * - The per cpu array is updated for each new slab and and is a remote
- *   cacheline for most nodes. This could become a bouncing cacheline given
- *   enough frequent updates. There are 16 pointers in a cacheline, so at
- *   max 16 cpus could compete for the cacheline which may be okay.
- *
  * - Support PAGE_ALLOC_DEBUG. Should be easy to do.
  *
  * - Variable sizing of the per node arrays
@@ -205,11 +200,6 @@ static inline void ClearSlabDebug(struct page *page)
 #define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
 #endif
 
-/*
- * The page->inuse field is 16 bit thus we have this limitation
- */
-#define MAX_OBJECTS_PER_SLAB 65535
-
 /* Internal SLUB flags */
 #define __OBJECT_POISON		0x80000000 /* Poison object */
 #define __SYSFS_ADD_DEFERRED	0x40000000 /* Not yet visible via sysfs */
@@ -277,6 +267,15 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
 #endif
 }
 
+static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
+{
+#ifdef CONFIG_SMP
+	return s->cpu_slab[cpu];
+#else
+	return &s->cpu_slab;
+#endif
+}
+
 static inline int check_valid_pointer(struct kmem_cache *s,
 				struct page *page, const void *object)
 {
@@ -729,11 +728,6 @@ static int check_slab(struct kmem_cache *s, struct page *page)
 		slab_err(s, page, "Not a valid slab page");
 		return 0;
 	}
-	if (page->offset * sizeof(void *) != s->offset) {
-		slab_err(s, page, "Corrupted offset %lu",
-			(unsigned long)(page->offset * sizeof(void *)));
-		return 0;
-	}
 	if (page->inuse > s->objects) {
 		slab_err(s, page, "inuse %u > max %u",
 			s->name, page->inuse, s->objects);
@@ -872,8 +866,6 @@ bad:
 		slab_fix(s, "Marking all objects used");
 		page->inuse = s->objects;
 		page->freelist = NULL;
-		/* Fix up fields that may be corrupted */
-		page->offset = s->offset / sizeof(void *);
 	}
 	return 0;
 }
@@ -1055,6 +1047,9 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	if (s->flags & SLAB_CACHE_DMA)
 		flags |= SLUB_DMA;
 
+	if (s->flags & SLAB_RECLAIM_ACCOUNT)
+		flags |= __GFP_RECLAIMABLE;
+
 	if (node == -1)
 		page = alloc_pages(flags, s->order);
 	else
@@ -1088,19 +1083,19 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	void *last;
 	void *p;
 
-	BUG_ON(flags & ~(GFP_DMA | __GFP_ZERO | GFP_LEVEL_MASK));
+	BUG_ON(flags & GFP_SLAB_BUG_MASK);
 
 	if (flags & __GFP_WAIT)
 		local_irq_enable();
 
-	page = allocate_slab(s, flags & GFP_LEVEL_MASK, node);
+	page = allocate_slab(s,
+		flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
 	if (!page)
 		goto out;
 
 	n = get_node(s, page_to_nid(page));
 	if (n)
 		atomic_long_inc(&n->nr_slabs);
-	page->offset = s->offset / sizeof(void *);
 	page->slab = s;
 	page->flags |= 1 << PG_slab;
 	if (s->flags & (SLAB_DEBUG_FREE | SLAB_RED_ZONE | SLAB_POISON |
@@ -1123,7 +1118,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	set_freepointer(s, last, NULL);
 
 	page->freelist = start;
-	page->lockless_freelist = NULL;
 	page->inuse = 0;
 out:
 	if (flags & __GFP_WAIT)
@@ -1149,7 +1143,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		- pages);
 
-	page->mapping = NULL;
 	__free_pages(page, s->order);
 }
 
@@ -1383,33 +1376,34 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page)
 /*
  * Remove the cpu slab
  */
-static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu)
+static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 {
+	struct page *page = c->page;
 	/*
 	 * Merge cpu freelist into freelist. Typically we get here
 	 * because both freelists are empty. So this is unlikely
 	 * to occur.
 	 */
-	while (unlikely(page->lockless_freelist)) {
+	while (unlikely(c->freelist)) {
 		void **object;
 
 		/* Retrieve object from cpu_freelist */
-		object = page->lockless_freelist;
-		page->lockless_freelist = page->lockless_freelist[page->offset];
+		object = c->freelist;
+		c->freelist = c->freelist[c->offset];
 
 		/* And put onto the regular freelist */
-		object[page->offset] = page->freelist;
+		object[c->offset] = page->freelist;
 		page->freelist = object;
 		page->inuse--;
 	}
-	s->cpu_slab[cpu] = NULL;
+	c->page = NULL;
 	unfreeze_slab(s, page);
 }
 
-static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu)
+static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 {
-	slab_lock(page);
-	deactivate_slab(s, page, cpu);
+	slab_lock(c->page);
+	deactivate_slab(s, c);
 }
 
 /*
@@ -1418,18 +1412,17 @@ static inline void flush_slab(struct kmem_cache *s, struct page *page, int cpu)
  */
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 {
-	struct page *page = s->cpu_slab[cpu];
+	struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
 
-	if (likely(page))
-		flush_slab(s, page, cpu);
+	if (likely(c && c->page))
+		flush_slab(s, c);
 }
 
 static void flush_cpu_slab(void *d)
 {
 	struct kmem_cache *s = d;
-	int cpu = smp_processor_id();
 
-	__flush_cpu_slab(s, cpu);
+	__flush_cpu_slab(s, smp_processor_id());
 }
 
 static void flush_all(struct kmem_cache *s)
@@ -1446,6 +1439,19 @@ static void flush_all(struct kmem_cache *s)
 }
 
 /*
+ * Check if the objects in a per cpu structure fit numa
+ * locality expectations.
+ */
+static inline int node_match(struct kmem_cache_cpu *c, int node)
+{
+#ifdef CONFIG_NUMA
+	if (node != -1 && c->node != node)
+		return 0;
+#endif
+	return 1;
+}
+
+/*
  * Slow path. The lockless freelist is empty or we need to perform
  * debugging duties.
  *
@@ -1463,45 +1469,46 @@ static void flush_all(struct kmem_cache *s)
  * we need to allocate a new slab. This is slowest path since we may sleep.
  */
 static void *__slab_alloc(struct kmem_cache *s,
-		gfp_t gfpflags, int node, void *addr, struct page *page)
+		gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
 {
 	void **object;
-	int cpu = smp_processor_id();
+	struct page *new;
 
-	if (!page)
+	if (!c->page)
 		goto new_slab;
 
-	slab_lock(page);
-	if (unlikely(node != -1 && page_to_nid(page) != node))
+	slab_lock(c->page);
+	if (unlikely(!node_match(c, node)))
 		goto another_slab;
 load_freelist:
-	object = page->freelist;
+	object = c->page->freelist;
 	if (unlikely(!object))
 		goto another_slab;
-	if (unlikely(SlabDebug(page)))
+	if (unlikely(SlabDebug(c->page)))
 		goto debug;
 
-	object = page->freelist;
-	page->lockless_freelist = object[page->offset];
-	page->inuse = s->objects;
-	page->freelist = NULL;
-	slab_unlock(page);
+	object = c->page->freelist;
+	c->freelist = object[c->offset];
+	c->page->inuse = s->objects;
+	c->page->freelist = NULL;
+	c->node = page_to_nid(c->page);
+	slab_unlock(c->page);
 	return object;
 
 another_slab:
-	deactivate_slab(s, page, cpu);
+	deactivate_slab(s, c);
 
 new_slab:
-	page = get_partial(s, gfpflags, node);
-	if (page) {
-		s->cpu_slab[cpu] = page;
+	new = get_partial(s, gfpflags, node);
+	if (new) {
+		c->page = new;
 		goto load_freelist;
 	}
 
-	page = new_slab(s, gfpflags, node);
-	if (page) {
-		cpu = smp_processor_id();
-		if (s->cpu_slab[cpu]) {
+	new = new_slab(s, gfpflags, node);
+	if (new) {
+		c = get_cpu_slab(s, smp_processor_id());
+		if (c->page) {
 			/*
 			 * Someone else populated the cpu_slab while we
 			 * enabled interrupts, or we have gotten scheduled
@@ -1509,34 +1516,33 @@ new_slab:
 			 * requested node even if __GFP_THISNODE was
 			 * specified. So we need to recheck.
 			 */
-			if (node == -1 ||
-				page_to_nid(s->cpu_slab[cpu]) == node) {
+			if (node_match(c, node)) {
 				/*
 				 * Current cpuslab is acceptable and we
 				 * want the current one since its cache hot
 				 */
-				discard_slab(s, page);
-				page = s->cpu_slab[cpu];
-				slab_lock(page);
+				discard_slab(s, new);
+				slab_lock(c->page);
 				goto load_freelist;
 			}
 			/* New slab does not fit our expectations */
-			flush_slab(s, s->cpu_slab[cpu], cpu);
+			flush_slab(s, c);
 		}
-		slab_lock(page);
-		SetSlabFrozen(page);
-		s->cpu_slab[cpu] = page;
+		slab_lock(new);
+		SetSlabFrozen(new);
+		c->page = new;
 		goto load_freelist;
 	}
 	return NULL;
 debug:
-	object = page->freelist;
-	if (!alloc_debug_processing(s, page, object, addr))
+	object = c->page->freelist;
+	if (!alloc_debug_processing(s, c->page, object, addr))
 		goto another_slab;
 
-	page->inuse++;
-	page->freelist = object[page->offset];
-	slab_unlock(page);
+	c->page->inuse++;
+	c->page->freelist = object[c->offset];
+	c->node = -1;
+	slab_unlock(c->page);
 	return object;
 }
 
@@ -1553,25 +1559,24 @@ debug:
 static void __always_inline *slab_alloc(struct kmem_cache *s,
 		gfp_t gfpflags, int node, void *addr)
 {
-	struct page *page;
 	void **object;
 	unsigned long flags;
+	struct kmem_cache_cpu *c;
 
 	local_irq_save(flags);
-	page = s->cpu_slab[smp_processor_id()];
-	if (unlikely(!page || !page->lockless_freelist ||
-			(node != -1 && page_to_nid(page) != node)))
+	c = get_cpu_slab(s, smp_processor_id());
+	if (unlikely(!c->freelist || !node_match(c, node)))
 
-		object = __slab_alloc(s, gfpflags, node, addr, page);
+		object = __slab_alloc(s, gfpflags, node, addr, c);
 
 	else {
-		object = page->lockless_freelist;
-		page->lockless_freelist = object[page->offset];
+		object = c->freelist;
+		c->freelist = object[c->offset];
 	}
 	local_irq_restore(flags);
 
 	if (unlikely((gfpflags & __GFP_ZERO) && object))
-		memset(object, 0, s->objsize);
+		memset(object, 0, c->objsize);
 
 	return object;
 }
@@ -1599,7 +1604,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
  * handling required then we can return immediately.
  */
 static void __slab_free(struct kmem_cache *s, struct page *page,
-					void *x, void *addr)
+				void *x, void *addr, unsigned int offset)
 {
 	void *prior;
 	void **object = (void *)x;
@@ -1609,7 +1614,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 	if (unlikely(SlabDebug(page)))
 		goto debug;
 checks_ok:
-	prior = object[page->offset] = page->freelist;
+	prior = object[offset] = page->freelist;
 	page->freelist = object;
 	page->inuse--;
 
@@ -1664,15 +1669,16 @@ static void __always_inline slab_free(struct kmem_cache *s,
 {
 	void **object = (void *)x;
 	unsigned long flags;
+	struct kmem_cache_cpu *c;
 
 	local_irq_save(flags);
 	debug_check_no_locks_freed(object, s->objsize);
-	if (likely(page == s->cpu_slab[smp_processor_id()] &&
-						!SlabDebug(page))) {
-		object[page->offset] = page->lockless_freelist;
-		page->lockless_freelist = object;
+	c = get_cpu_slab(s, smp_processor_id());
+	if (likely(page == c->page && c->node >= 0)) {
+		object[c->offset] = c->freelist;
+		c->freelist = object;
 	} else
-		__slab_free(s, page, x, addr);
+		__slab_free(s, page, x, addr, c->offset);
 
 	local_irq_restore(flags);
 }
@@ -1759,14 +1765,6 @@ static inline int slab_order(int size, int min_objects,
 	int rem;
 	int min_order = slub_min_order;
 
-	/*
-	 * If we would create too many object per slab then reduce
-	 * the slab order even if it goes below slub_min_order.
-	 */
-	while (min_order > 0 &&
-		(PAGE_SIZE << min_order) >= MAX_OBJECTS_PER_SLAB * size)
-			min_order--;
-
 	for (order = max(min_order,
 				fls(min_objects * size - 1) - PAGE_SHIFT);
 			order <= max_order; order++) {
@@ -1781,9 +1779,6 @@ static inline int slab_order(int size, int min_objects,
 		if (rem <= slab_size / fract_leftover)
 			break;
 
-		/* If the next size is too high then exit now */
-		if (slab_size * 2 >= MAX_OBJECTS_PER_SLAB * size)
-			break;
 	}
 
 	return order;
@@ -1858,6 +1853,16 @@ static unsigned long calculate_alignment(unsigned long flags,
 	return ALIGN(align, sizeof(void *));
 }
 
+static void init_kmem_cache_cpu(struct kmem_cache *s,
+			struct kmem_cache_cpu *c)
+{
+	c->page = NULL;
+	c->freelist = NULL;
+	c->node = 0;
+	c->offset = s->offset / sizeof(void *);
+	c->objsize = s->objsize;
+}
+
 static void init_kmem_cache_node(struct kmem_cache_node *n)
 {
 	n->nr_partial = 0;
@@ -1869,6 +1874,131 @@ static void init_kmem_cache_node(struct kmem_cache_node *n)
 #endif
 }
 
+#ifdef CONFIG_SMP
+/*
+ * Per cpu array for per cpu structures.
+ *
+ * The per cpu array places all kmem_cache_cpu structures from one processor
+ * close together meaning that it becomes possible that multiple per cpu
+ * structures are contained in one cacheline. This may be particularly
+ * beneficial for the kmalloc caches.
+ *
+ * A desktop system typically has around 60-80 slabs. With 100 here we are
+ * likely able to get per cpu structures for all caches from the array defined
+ * here. We must be able to cover all kmalloc caches during bootstrap.
+ *
+ * If the per cpu array is exhausted then fall back to kmalloc
+ * of individual cachelines. No sharing is possible then.
+ */
+#define NR_KMEM_CACHE_CPU 100
+
+static DEFINE_PER_CPU(struct kmem_cache_cpu,
+				kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
+
+static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
+static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE;
+
+static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
+							int cpu, gfp_t flags)
+{
+	struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
+
+	if (c)
+		per_cpu(kmem_cache_cpu_free, cpu) =
+				(void *)c->freelist;
+	else {
+		/* Table overflow: So allocate ourselves */
+		c = kmalloc_node(
+			ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
+			flags, cpu_to_node(cpu));
+		if (!c)
+			return NULL;
+	}
+
+	init_kmem_cache_cpu(s, c);
+	return c;
+}
+
+static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
+{
+	if (c < per_cpu(kmem_cache_cpu, cpu) ||
+			c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
+		kfree(c);
+		return;
+	}
+	c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
+	per_cpu(kmem_cache_cpu_free, cpu) = c;
+}
+
+static void free_kmem_cache_cpus(struct kmem_cache *s)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+
+		if (c) {
+			s->cpu_slab[cpu] = NULL;
+			free_kmem_cache_cpu(c, cpu);
+		}
+	}
+}
+
+static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu) {
+		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+
+		if (c)
+			continue;
+
+		c = alloc_kmem_cache_cpu(s, cpu, flags);
+		if (!c) {
+			free_kmem_cache_cpus(s);
+			return 0;
+		}
+		s->cpu_slab[cpu] = c;
+	}
+	return 1;
+}
+
+/*
+ * Initialize the per cpu array.
+ */
+static void init_alloc_cpu_cpu(int cpu)
+{
+	int i;
+
+	if (cpu_isset(cpu, kmem_cach_cpu_free_init_once))
+		return;
+
+	for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
+		free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
+
+	cpu_set(cpu, kmem_cach_cpu_free_init_once);
+}
+
+static void __init init_alloc_cpu(void)
+{
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		init_alloc_cpu_cpu(cpu);
+  }
+
+#else
+static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
+static inline void init_alloc_cpu(void) {}
+
+static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
+{
+	init_kmem_cache_cpu(s, &s->cpu_slab);
+	return 1;
+}
+#endif
+
 #ifdef CONFIG_NUMA
 /*
  * No kmalloc_node yet so do it by hand. We know that this is the first
@@ -1876,10 +2006,11 @@ static void init_kmem_cache_node(struct kmem_cache_node *n)
  * possible.
  *
  * Note that this function only works on the kmalloc_node_cache
- * when allocating for the kmalloc_node_cache.
+ * when allocating for the kmalloc_node_cache. This is used for bootstrapping
+ * memory on a fresh node that has no slab structures yet.
  */
-static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflags,
-								int node)
+static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
+							   int node)
 {
 	struct page *page;
 	struct kmem_cache_node *n;
@@ -1921,7 +2052,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
 {
 	int node;
 
-	for_each_online_node(node) {
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = s->node[node];
 		if (n && n != &s->local_node)
 			kmem_cache_free(kmalloc_caches, n);
@@ -1939,7 +2070,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
 	else
 		local_node = 0;
 
-	for_each_online_node(node) {
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n;
 
 		if (local_node == node)
@@ -2077,14 +2208,7 @@ static int calculate_sizes(struct kmem_cache *s)
 	 */
 	s->objects = (PAGE_SIZE << s->order) / size;
 
-	/*
-	 * Verify that the number of objects is within permitted limits.
-	 * The page->inuse field is only 16 bit wide! So we cannot have
-	 * more than 64k objects per slab.
-	 */
-	if (!s->objects || s->objects > MAX_OBJECTS_PER_SLAB)
-		return 0;
-	return 1;
+	return !!s->objects;
 
 }
 
@@ -2107,9 +2231,12 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
 #ifdef CONFIG_NUMA
 	s->defrag_ratio = 100;
 #endif
+	if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
+		goto error;
 
-	if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
+	if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
 		return 1;
+	free_kmem_cache_nodes(s);
 error:
 	if (flags & SLAB_PANIC)
 		panic("Cannot create slab %s size=%lu realsize=%u "
@@ -2192,7 +2319,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
 	flush_all(s);
 
 	/* Attempt to free all objects */
-	for_each_online_node(node) {
+	free_kmem_cache_cpus(s);
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 
 		n->nr_partial -= free_list(s, n, &n->partial);
@@ -2227,11 +2355,11 @@ EXPORT_SYMBOL(kmem_cache_destroy);
  *		Kmalloc subsystem
  *******************************************************************/
 
-struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned;
 EXPORT_SYMBOL(kmalloc_caches);
 
 #ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[KMALLOC_SHIFT_HIGH + 1];
+static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT];
 #endif
 
 static int __init setup_slub_min_order(char *str)
@@ -2397,12 +2525,8 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 			return ZERO_SIZE_PTR;
 
 		index = size_index[(size - 1) / 8];
-	} else {
-		if (size > KMALLOC_MAX_SIZE)
-			return NULL;
-
+	} else
 		index = fls(size - 1);
-	}
 
 #ifdef CONFIG_ZONE_DMA
 	if (unlikely((flags & SLUB_DMA)))
@@ -2414,9 +2538,15 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 
 void *__kmalloc(size_t size, gfp_t flags)
 {
-	struct kmem_cache *s = get_slab(size, flags);
+	struct kmem_cache *s;
+
+	if (unlikely(size > PAGE_SIZE / 2))
+		return (void *)__get_free_pages(flags | __GFP_COMP,
+							get_order(size));
 
-	if (ZERO_OR_NULL_PTR(s))
+	s = get_slab(size, flags);
+
+	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
 	return slab_alloc(s, flags, -1, __builtin_return_address(0));
@@ -2426,9 +2556,15 @@ EXPORT_SYMBOL(__kmalloc);
 #ifdef CONFIG_NUMA
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
-	struct kmem_cache *s = get_slab(size, flags);
+	struct kmem_cache *s;
 
-	if (ZERO_OR_NULL_PTR(s))
+	if (unlikely(size > PAGE_SIZE / 2))
+		return (void *)__get_free_pages(flags | __GFP_COMP,
+							get_order(size));
+
+	s = get_slab(size, flags);
+
+	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
 	return slab_alloc(s, flags, node, __builtin_return_address(0));
@@ -2441,7 +2577,8 @@ size_t ksize(const void *object)
 	struct page *page;
 	struct kmem_cache *s;
 
-	if (ZERO_OR_NULL_PTR(object))
+	BUG_ON(!object);
+	if (unlikely(object == ZERO_SIZE_PTR))
 		return 0;
 
 	page = get_object_page(object);
@@ -2473,22 +2610,17 @@ EXPORT_SYMBOL(ksize);
 
 void kfree(const void *x)
 {
-	struct kmem_cache *s;
 	struct page *page;
 
-	/*
-	 * This has to be an unsigned comparison. According to Linus
-	 * some gcc version treat a pointer as a signed entity. Then
-	 * this comparison would be true for all "negative" pointers
-	 * (which would cover the whole upper half of the address space).
-	 */
-	if (ZERO_OR_NULL_PTR(x))
+	if (unlikely(ZERO_OR_NULL_PTR(x)))
 		return;
 
 	page = virt_to_head_page(x);
-	s = page->slab;
-
-	slab_free(s, page, (void *)x, __builtin_return_address(0));
+	if (unlikely(!PageSlab(page))) {
+		put_page(page);
+		return;
+	}
+	slab_free(page->slab, page, (void *)x, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(kfree);
 
@@ -2517,7 +2649,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
 		return -ENOMEM;
 
 	flush_all(s);
-	for_each_online_node(node) {
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 		n = get_node(s, node);
 
 		if (!n->nr_partial)
@@ -2575,6 +2707,8 @@ void __init kmem_cache_init(void)
 	int i;
 	int caches = 0;
 
+	init_alloc_cpu();
+
 #ifdef CONFIG_NUMA
 	/*
 	 * Must first have the slab cache available for the allocations of the
@@ -2602,7 +2736,7 @@ void __init kmem_cache_init(void)
 		caches++;
 	}
 
-	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
+	for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) {
 		create_kmalloc_cache(&kmalloc_caches[i],
 			"kmalloc", 1 << i, GFP_KERNEL);
 		caches++;
@@ -2629,16 +2763,18 @@ void __init kmem_cache_init(void)
 	slab_state = UP;
 
 	/* Provide the correct kmalloc names now that the caches are up */
-	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
+	for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++)
 		kmalloc_caches[i]. name =
 			kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
 
 #ifdef CONFIG_SMP
 	register_cpu_notifier(&slab_notifier);
+	kmem_size = offsetof(struct kmem_cache, cpu_slab) +
+				nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
+#else
+	kmem_size = sizeof(struct kmem_cache);
 #endif
 
-	kmem_size = offsetof(struct kmem_cache, cpu_slab) +
-				nr_cpu_ids * sizeof(struct page *);
 
 	printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
 		" CPUs=%d, Nodes=%d\n",
@@ -2717,12 +2853,21 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 	down_write(&slub_lock);
 	s = find_mergeable(size, align, flags, name, ctor);
 	if (s) {
+		int cpu;
+
 		s->refcount++;
 		/*
 		 * Adjust the object sizes so that we clear
 		 * the complete object on kzalloc.
 		 */
 		s->objsize = max(s->objsize, (int)size);
+
+		/*
+		 * And then we need to update the object size in the
+		 * per cpu structures
+		 */
+		for_each_online_cpu(cpu)
+			get_cpu_slab(s, cpu)->objsize = s->objsize;
 		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
 		up_write(&slub_lock);
 		if (sysfs_slab_alias(s, name))
@@ -2765,15 +2910,29 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
 	unsigned long flags;
 
 	switch (action) {
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		init_alloc_cpu_cpu(cpu);
+		down_read(&slub_lock);
+		list_for_each_entry(s, &slab_caches, list)
+			s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
+							GFP_KERNEL);
+		up_read(&slub_lock);
+		break;
+
 	case CPU_UP_CANCELED:
 	case CPU_UP_CANCELED_FROZEN:
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		down_read(&slub_lock);
 		list_for_each_entry(s, &slab_caches, list) {
+			struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+
 			local_irq_save(flags);
 			__flush_cpu_slab(s, cpu);
 			local_irq_restore(flags);
+			free_kmem_cache_cpu(c, cpu);
+			s->cpu_slab[cpu] = NULL;
 		}
 		up_read(&slub_lock);
 		break;
@@ -2790,9 +2949,14 @@ static struct notifier_block __cpuinitdata slab_notifier =
 
 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 {
-	struct kmem_cache *s = get_slab(size, gfpflags);
+	struct kmem_cache *s;
+
+	if (unlikely(size > PAGE_SIZE / 2))
+		return (void *)__get_free_pages(gfpflags | __GFP_COMP,
+							get_order(size));
+	s = get_slab(size, gfpflags);
 
-	if (ZERO_OR_NULL_PTR(s))
+	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
 	return slab_alloc(s, gfpflags, -1, caller);
@@ -2801,9 +2965,14 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 					int node, void *caller)
 {
-	struct kmem_cache *s = get_slab(size, gfpflags);
+	struct kmem_cache *s;
+
+	if (unlikely(size > PAGE_SIZE / 2))
+		return (void *)__get_free_pages(gfpflags | __GFP_COMP,
+							get_order(size));
+	s = get_slab(size, gfpflags);
 
-	if (ZERO_OR_NULL_PTR(s))
+	if (unlikely(ZERO_OR_NULL_PTR(s)))
 		return s;
 
 	return slab_alloc(s, gfpflags, node, caller);
@@ -2902,7 +3071,7 @@ static long validate_slab_cache(struct kmem_cache *s)
 		return -ENOMEM;
 
 	flush_all(s);
-	for_each_online_node(node) {
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 
 		count += validate_slab_node(s, n, map);
@@ -3116,13 +3285,13 @@ static int list_locations(struct kmem_cache *s, char *buf,
 	int node;
 
 	if (!alloc_loc_track(&t, PAGE_SIZE / sizeof(struct location),
-			GFP_KERNEL))
+			GFP_TEMPORARY))
 		return sprintf(buf, "Out of memory\n");
 
 	/* Push back cpu slabs */
 	flush_all(s);
 
-	for_each_online_node(node) {
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 		unsigned long flags;
 		struct page *page;
@@ -3230,11 +3399,18 @@ static unsigned long slab_objects(struct kmem_cache *s,
 	per_cpu = nodes + nr_node_ids;
 
 	for_each_possible_cpu(cpu) {
-		struct page *page = s->cpu_slab[cpu];
+		struct page *page;
 		int node;
+		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
 
+		if (!c)
+			continue;
+
+		page = c->page;
+		node = c->node;
+		if (node < 0)
+			continue;
 		if (page) {
-			node = page_to_nid(page);
 			if (flags & SO_CPU) {
 				int x = 0;
 
@@ -3249,7 +3425,7 @@ static unsigned long slab_objects(struct kmem_cache *s,
 		}
 	}
 
-	for_each_online_node(node) {
+	for_each_node_state(node, N_NORMAL_MEMORY) {
 		struct kmem_cache_node *n = get_node(s, node);
 
 		if (flags & SO_PARTIAL) {
@@ -3277,7 +3453,7 @@ static unsigned long slab_objects(struct kmem_cache *s,
 
 	x = sprintf(buf, "%lu", total);
 #ifdef CONFIG_NUMA
-	for_each_online_node(node)
+	for_each_node_state(node, N_NORMAL_MEMORY)
 		if (nodes[node])
 			x += sprintf(buf + x, " N%d=%lu",
 					node, nodes[node]);
@@ -3291,13 +3467,19 @@ static int any_slab_objects(struct kmem_cache *s)
 	int node;
 	int cpu;
 
-	for_each_possible_cpu(cpu)
-		if (s->cpu_slab[cpu])
+	for_each_possible_cpu(cpu) {
+		struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+
+		if (c && c->page)
 			return 1;
+	}
 
-	for_each_node(node) {
+	for_each_online_node(node) {
 		struct kmem_cache_node *n = get_node(s, node);
 
+		if (!n)
+			continue;
+
 		if (n->nr_partial || atomic_long_read(&n->nr_slabs))
 			return 1;
 	}
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
new file mode 100644
index 00000000000..d3b718b0c20
--- /dev/null
+++ b/mm/sparse-vmemmap.c
@@ -0,0 +1,148 @@
+/*
+ * Virtual Memory Map support
+ *
+ * (C) 2007 sgi. Christoph Lameter <clameter@sgi.com>.
+ *
+ * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
+ * virt_to_page, page_address() to be implemented as a base offset
+ * calculation without memory access.
+ *
+ * However, virtual mappings need a page table and TLBs. Many Linux
+ * architectures already map their physical space using 1-1 mappings
+ * via TLBs. For those arches the virtual memmory map is essentially
+ * for free if we use the same page size as the 1-1 mappings. In that
+ * case the overhead consists of a few additional pages that are
+ * allocated to create a view of memory for vmemmap.
+ *
+ * The architecture is expected to provide a vmemmap_populate() function
+ * to instantiate the mapping.
+ */
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <asm/dma.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+
+/*
+ * Allocate a block of memory to be used to back the virtual memory map
+ * or to back the page tables that are used to create the mapping.
+ * Uses the main allocators if they are available, else bootmem.
+ */
+void * __meminit vmemmap_alloc_block(unsigned long size, int node)
+{
+	/* If the main allocator is up use that, fallback to bootmem. */
+	if (slab_is_available()) {
+		struct page *page = alloc_pages_node(node,
+				GFP_KERNEL | __GFP_ZERO, get_order(size));
+		if (page)
+			return page_address(page);
+		return NULL;
+	} else
+		return __alloc_bootmem_node(NODE_DATA(node), size, size,
+				__pa(MAX_DMA_ADDRESS));
+}
+
+void __meminit vmemmap_verify(pte_t *pte, int node,
+				unsigned long start, unsigned long end)
+{
+	unsigned long pfn = pte_pfn(*pte);
+	int actual_node = early_pfn_to_nid(pfn);
+
+	if (actual_node != node)
+		printk(KERN_WARNING "[%lx-%lx] potential offnode "
+			"page_structs\n", start, end - 1);
+}
+
+pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
+{
+	pte_t *pte = pte_offset_kernel(pmd, addr);
+	if (pte_none(*pte)) {
+		pte_t entry;
+		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		if (!p)
+			return 0;
+		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+		set_pte_at(&init_mm, addr, pte, entry);
+	}
+	return pte;
+}
+
+pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
+{
+	pmd_t *pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd)) {
+		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		if (!p)
+			return 0;
+		pmd_populate_kernel(&init_mm, pmd, p);
+	}
+	return pmd;
+}
+
+pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
+{
+	pud_t *pud = pud_offset(pgd, addr);
+	if (pud_none(*pud)) {
+		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		if (!p)
+			return 0;
+		pud_populate(&init_mm, pud, p);
+	}
+	return pud;
+}
+
+pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd)) {
+		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
+		if (!p)
+			return 0;
+		pgd_populate(&init_mm, pgd, p);
+	}
+	return pgd;
+}
+
+int __meminit vmemmap_populate_basepages(struct page *start_page,
+						unsigned long size, int node)
+{
+	unsigned long addr = (unsigned long)start_page;
+	unsigned long end = (unsigned long)(start_page + size);
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	for (; addr < end; addr += PAGE_SIZE) {
+		pgd = vmemmap_pgd_populate(addr, node);
+		if (!pgd)
+			return -ENOMEM;
+		pud = vmemmap_pud_populate(pgd, addr, node);
+		if (!pud)
+			return -ENOMEM;
+		pmd = vmemmap_pmd_populate(pud, addr, node);
+		if (!pmd)
+			return -ENOMEM;
+		pte = vmemmap_pte_populate(pmd, addr, node);
+		if (!pte)
+			return -ENOMEM;
+		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+	}
+
+	return 0;
+}
+
+struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
+{
+	struct page *map = pfn_to_page(pnum * PAGES_PER_SECTION);
+	int error = vmemmap_populate(map, PAGES_PER_SECTION, nid);
+	if (error)
+		return NULL;
+
+	return map;
+}
diff --git a/mm/sparse.c b/mm/sparse.c
index 239f5a720d3..08fb14f5eea 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -9,6 +9,8 @@
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <asm/dma.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 
 /*
  * Permanent SPARSEMEM data:
@@ -106,7 +108,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
 
 /*
  * Although written for the SPARSEMEM_EXTREME case, this happens
- * to also work for the flat array case becase
+ * to also work for the flat array case because
  * NR_SECTION_ROOTS==NR_MEM_SECTIONS.
  */
 int __section_nr(struct mem_section* ms)
@@ -176,7 +178,7 @@ unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
 		if (nid != early_pfn_to_nid(pfn))
 			continue;
 
-		if (pfn_valid(pfn))
+		if (pfn_present(pfn))
 			nr_pages += PAGES_PER_SECTION;
 	}
 
@@ -204,13 +206,16 @@ struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pn
 }
 
 static int __meminit sparse_init_one_section(struct mem_section *ms,
-		unsigned long pnum, struct page *mem_map)
+		unsigned long pnum, struct page *mem_map,
+		unsigned long *pageblock_bitmap)
 {
-	if (!valid_section(ms))
+	if (!present_section(ms))
 		return -EINVAL;
 
 	ms->section_mem_map &= ~SECTION_MAP_MASK;
-	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum);
+	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) |
+							SECTION_HAS_MEM_MAP;
+ 	ms->pageblock_flags = pageblock_bitmap;
 
 	return 1;
 }
@@ -221,12 +226,43 @@ void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
 	return NULL;
 }
 
-static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
+static unsigned long usemap_size(void)
 {
-	struct page *map;
+	unsigned long size_bytes;
+	size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
+	size_bytes = roundup(size_bytes, sizeof(unsigned long));
+	return size_bytes;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long *__kmalloc_section_usemap(void)
+{
+	return kmalloc(usemap_size(), GFP_KERNEL);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+static unsigned long *sparse_early_usemap_alloc(unsigned long pnum)
+{
+	unsigned long *usemap;
 	struct mem_section *ms = __nr_to_section(pnum);
 	int nid = sparse_early_nid(ms);
 
+	usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
+	if (usemap)
+		return usemap;
+
+	/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
+	nid = 0;
+
+	printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
+	return NULL;
+}
+
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
+struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
+{
+	struct page *map;
+
 	map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
 	if (map)
 		return map;
@@ -238,10 +274,22 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
 
 	map = alloc_bootmem_node(NODE_DATA(nid),
 			sizeof(struct page) * PAGES_PER_SECTION);
+	return map;
+}
+#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
+
+struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
+{
+	struct page *map;
+	struct mem_section *ms = __nr_to_section(pnum);
+	int nid = sparse_early_nid(ms);
+
+	map = sparse_mem_map_populate(pnum, nid);
 	if (map)
 		return map;
 
-	printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__);
+	printk(KERN_ERR "%s: sparsemem memory map backing failed "
+			"some memory will not be available.\n", __FUNCTION__);
 	ms->section_mem_map = 0;
 	return NULL;
 }
@@ -254,19 +302,38 @@ void __init sparse_init(void)
 {
 	unsigned long pnum;
 	struct page *map;
+	unsigned long *usemap;
 
 	for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
-		if (!valid_section_nr(pnum))
+		if (!present_section_nr(pnum))
 			continue;
 
 		map = sparse_early_mem_map_alloc(pnum);
 		if (!map)
 			continue;
-		sparse_init_one_section(__nr_to_section(pnum), pnum, map);
+
+		usemap = sparse_early_usemap_alloc(pnum);
+		if (!usemap)
+			continue;
+
+		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
+								usemap);
 	}
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
+						 unsigned long nr_pages)
+{
+	/* This will make the necessary allocations eventually. */
+	return sparse_mem_map_populate(pnum, nid);
+}
+static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
+{
+	return; /* XXX: Not implemented yet */
+}
+#else
 static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
 {
 	struct page *page, *ret;
@@ -289,6 +356,12 @@ got_map_ptr:
 	return ret;
 }
 
+static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
+						  unsigned long nr_pages)
+{
+	return __kmalloc_section_memmap(nr_pages);
+}
+
 static int vaddr_in_vmalloc_area(void *addr)
 {
 	if (addr >= (void *)VMALLOC_START &&
@@ -305,6 +378,7 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
 		free_pages((unsigned long)memmap,
 			   get_order(sizeof(struct page) * nr_pages));
 }
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 /*
  * returns the number of sections whose mem_maps were properly
@@ -318,6 +392,7 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 	struct pglist_data *pgdat = zone->zone_pgdat;
 	struct mem_section *ms;
 	struct page *memmap;
+	unsigned long *usemap;
 	unsigned long flags;
 	int ret;
 
@@ -326,7 +401,8 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 	 * plus, it does a kmalloc
 	 */
 	sparse_index_init(section_nr, pgdat->node_id);
-	memmap = __kmalloc_section_memmap(nr_pages);
+	memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
+	usemap = __kmalloc_section_usemap();
 
 	pgdat_resize_lock(pgdat, &flags);
 
@@ -335,9 +411,14 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 		ret = -EEXIST;
 		goto out;
 	}
+
+	if (!usemap) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	ms->section_mem_map |= SECTION_MARKED_PRESENT;
 
-	ret = sparse_init_one_section(ms, section_nr, memmap);
+	ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
 
 out:
 	pgdat_resize_unlock(pgdat, &flags);
diff --git a/mm/swap.c b/mm/swap.c
index d3cb966fe99..d034b2128d2 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -24,16 +24,18 @@
 #include <linux/module.h>
 #include <linux/mm_inline.h>
 #include <linux/buffer_head.h>	/* for try_to_release_page() */
-#include <linux/module.h>
 #include <linux/percpu_counter.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
-#include <linux/init.h>
 
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
 
+static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
+static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
+static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs) = { 0, };
+
 /*
  * This path almost never happens for VM activity - pages are normally
  * freed via pagevecs.  But it gets used by networking.
@@ -94,23 +96,47 @@ void put_pages_list(struct list_head *pages)
 EXPORT_SYMBOL(put_pages_list);
 
 /*
+ * pagevec_move_tail() must be called with IRQ disabled.
+ * Otherwise this may cause nasty races.
+ */
+static void pagevec_move_tail(struct pagevec *pvec)
+{
+	int i;
+	int pgmoved = 0;
+	struct zone *zone = NULL;
+
+	for (i = 0; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+		struct zone *pagezone = page_zone(page);
+
+		if (pagezone != zone) {
+			if (zone)
+				spin_unlock(&zone->lru_lock);
+			zone = pagezone;
+			spin_lock(&zone->lru_lock);
+		}
+		if (PageLRU(page) && !PageActive(page)) {
+			list_move_tail(&page->lru, &zone->inactive_list);
+			pgmoved++;
+		}
+	}
+	if (zone)
+		spin_unlock(&zone->lru_lock);
+	__count_vm_events(PGROTATED, pgmoved);
+	release_pages(pvec->pages, pvec->nr, pvec->cold);
+	pagevec_reinit(pvec);
+}
+
+/*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
- * inactive list.  The page still has PageWriteback set, which will pin it.
- *
- * We don't expect many pages to come through here, so don't bother batching
- * things up.
- *
- * To avoid placing the page at the tail of the LRU while PG_writeback is still
- * set, this function will clear PG_writeback before performing the page
- * motion.  Do that inside the lru lock because once PG_writeback is cleared
- * we may not touch the page.
+ * inactive list.
  *
  * Returns zero if it cleared PG_writeback.
  */
 int rotate_reclaimable_page(struct page *page)
 {
-	struct zone *zone;
+	struct pagevec *pvec;
 	unsigned long flags;
 
 	if (PageLocked(page))
@@ -122,15 +148,16 @@ int rotate_reclaimable_page(struct page *page)
 	if (!PageLRU(page))
 		return 1;
 
-	zone = page_zone(page);
-	spin_lock_irqsave(&zone->lru_lock, flags);
-	if (PageLRU(page) && !PageActive(page)) {
-		list_move_tail(&page->lru, &zone->inactive_list);
-		__count_vm_event(PGROTATED);
-	}
+	page_cache_get(page);
+	local_irq_save(flags);
+	pvec = &__get_cpu_var(lru_rotate_pvecs);
+	if (!pagevec_add(pvec, page))
+		pagevec_move_tail(pvec);
+	local_irq_restore(flags);
+
 	if (!test_clear_page_writeback(page))
 		BUG();
-	spin_unlock_irqrestore(&zone->lru_lock, flags);
+
 	return 0;
 }
 
@@ -174,9 +201,6 @@ EXPORT_SYMBOL(mark_page_accessed);
  * lru_cache_add: add a page to the page lists
  * @page: the page to add
  */
-static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
-static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
-
 void fastcall lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs);
@@ -197,21 +221,37 @@ void fastcall lru_cache_add_active(struct page *page)
 	put_cpu_var(lru_add_active_pvecs);
 }
 
-static void __lru_add_drain(int cpu)
+/*
+ * Drain pages out of the cpu's pagevecs.
+ * Either "cpu" is the current CPU, and preemption has already been
+ * disabled; or "cpu" is being hot-unplugged, and is already dead.
+ */
+static void drain_cpu_pagevecs(int cpu)
 {
-	struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
+	struct pagevec *pvec;
 
-	/* CPU is dead, so no locking needed. */
+	pvec = &per_cpu(lru_add_pvecs, cpu);
 	if (pagevec_count(pvec))
 		__pagevec_lru_add(pvec);
+
 	pvec = &per_cpu(lru_add_active_pvecs, cpu);
 	if (pagevec_count(pvec))
 		__pagevec_lru_add_active(pvec);
+
+	pvec = &per_cpu(lru_rotate_pvecs, cpu);
+	if (pagevec_count(pvec)) {
+		unsigned long flags;
+
+		/* No harm done if a racing interrupt already did this */
+		local_irq_save(flags);
+		pagevec_move_tail(pvec);
+		local_irq_restore(flags);
+	}
 }
 
 void lru_add_drain(void)
 {
-	__lru_add_drain(get_cpu());
+	drain_cpu_pagevecs(get_cpu());
 	put_cpu();
 }
 
@@ -258,6 +298,7 @@ void release_pages(struct page **pages, int nr, int cold)
 	int i;
 	struct pagevec pages_to_free;
 	struct zone *zone = NULL;
+	unsigned long uninitialized_var(flags);
 
 	pagevec_init(&pages_to_free, cold);
 	for (i = 0; i < nr; i++) {
@@ -265,7 +306,7 @@ void release_pages(struct page **pages, int nr, int cold)
 
 		if (unlikely(PageCompound(page))) {
 			if (zone) {
-				spin_unlock_irq(&zone->lru_lock);
+				spin_unlock_irqrestore(&zone->lru_lock, flags);
 				zone = NULL;
 			}
 			put_compound_page(page);
@@ -279,9 +320,10 @@ void release_pages(struct page **pages, int nr, int cold)
 			struct zone *pagezone = page_zone(page);
 			if (pagezone != zone) {
 				if (zone)
-					spin_unlock_irq(&zone->lru_lock);
+					spin_unlock_irqrestore(&zone->lru_lock,
+									flags);
 				zone = pagezone;
-				spin_lock_irq(&zone->lru_lock);
+				spin_lock_irqsave(&zone->lru_lock, flags);
 			}
 			VM_BUG_ON(!PageLRU(page));
 			__ClearPageLRU(page);
@@ -290,7 +332,7 @@ void release_pages(struct page **pages, int nr, int cold)
 
 		if (!pagevec_add(&pages_to_free, page)) {
 			if (zone) {
-				spin_unlock_irq(&zone->lru_lock);
+				spin_unlock_irqrestore(&zone->lru_lock, flags);
 				zone = NULL;
 			}
 			__pagevec_free(&pages_to_free);
@@ -298,7 +340,7 @@ void release_pages(struct page **pages, int nr, int cold)
   		}
 	}
 	if (zone)
-		spin_unlock_irq(&zone->lru_lock);
+		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
 	pagevec_free(&pages_to_free);
 }
@@ -491,7 +533,7 @@ static int cpu_swap_callback(struct notifier_block *nfb,
 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
 		atomic_add(*committed, &vm_committed_space);
 		*committed = 0;
-		__lru_add_drain((long)hcpu);
+		drain_cpu_pagevecs((long)hcpu);
 	}
 	return NOTIFY_OK;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 67daecb6031..b52635601df 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -74,6 +74,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
 {
 	int error;
 
+	BUG_ON(!PageLocked(page));
 	BUG_ON(PageSwapCache(page));
 	BUG_ON(PagePrivate(page));
 	error = radix_tree_preload(gfp_mask);
@@ -83,7 +84,6 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
 						entry.val, page);
 		if (!error) {
 			page_cache_get(page);
-			SetPageLocked(page);
 			SetPageSwapCache(page);
 			set_page_private(page, entry.val);
 			total_swapcache_pages++;
@@ -99,15 +99,18 @@ static int add_to_swap_cache(struct page *page, swp_entry_t entry)
 {
 	int error;
 
+	BUG_ON(PageLocked(page));
 	if (!swap_duplicate(entry)) {
 		INC_CACHE_INFO(noent_race);
 		return -ENOENT;
 	}
+	SetPageLocked(page);
 	error = __add_to_swap_cache(page, entry, GFP_KERNEL);
 	/*
 	 * Anon pages are already on the LRU, we don't run lru_cache_add here.
 	 */
 	if (error) {
+		ClearPageLocked(page);
 		swap_free(entry);
 		if (error == -EEXIST)
 			INC_CACHE_INFO(exist_race);
diff --git a/mm/util.c b/mm/util.c
index bf340d80686..5f64026cbb4 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -81,14 +81,16 @@ EXPORT_SYMBOL(kmemdup);
 void *krealloc(const void *p, size_t new_size, gfp_t flags)
 {
 	void *ret;
-	size_t ks;
+	size_t ks = 0;
 
 	if (unlikely(!new_size)) {
 		kfree(p);
 		return ZERO_SIZE_PTR;
 	}
 
-	ks = ksize(p);
+	if (p)
+		ks = ksize(p);
+
 	if (ks >= new_size)
 		return (void *)p;
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 3cee76a8c9f..2e01af36584 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -190,7 +190,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long fl
 	if (unlikely(!size))
 		return NULL;
 
-	area = kmalloc_node(sizeof(*area), gfp_mask & GFP_LEVEL_MASK, node);
+	area = kmalloc_node(sizeof(*area), gfp_mask & GFP_RECLAIM_MASK, node);
+
 	if (unlikely(!area))
 		return NULL;
 
@@ -439,7 +440,7 @@ void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		area->flags |= VM_VPAGES;
 	} else {
 		pages = kmalloc_node(array_size,
-				(gfp_mask & GFP_LEVEL_MASK) | __GFP_ZERO,
+				(gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
 				node);
 	}
 	area->pages = pages;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6e65d02499..bbd194630c5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -932,6 +932,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		long mapped_ratio;
 		long distress;
 		long swap_tendency;
+		long imbalance;
 
 		if (zone_is_near_oom(zone))
 			goto force_reclaim_mapped;
@@ -967,6 +968,46 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		swap_tendency = mapped_ratio / 2 + distress + sc->swappiness;
 
 		/*
+		 * If there's huge imbalance between active and inactive
+		 * (think active 100 times larger than inactive) we should
+		 * become more permissive, or the system will take too much
+		 * cpu before it start swapping during memory pressure.
+		 * Distress is about avoiding early-oom, this is about
+		 * making swappiness graceful despite setting it to low
+		 * values.
+		 *
+		 * Avoid div by zero with nr_inactive+1, and max resulting
+		 * value is vm_total_pages.
+		 */
+		imbalance  = zone_page_state(zone, NR_ACTIVE);
+		imbalance /= zone_page_state(zone, NR_INACTIVE) + 1;
+
+		/*
+		 * Reduce the effect of imbalance if swappiness is low,
+		 * this means for a swappiness very low, the imbalance
+		 * must be much higher than 100 for this logic to make
+		 * the difference.
+		 *
+		 * Max temporary value is vm_total_pages*100.
+		 */
+		imbalance *= (vm_swappiness + 1);
+		imbalance /= 100;
+
+		/*
+		 * If not much of the ram is mapped, makes the imbalance
+		 * less relevant, it's high priority we refill the inactive
+		 * list with mapped pages only in presence of high ratio of
+		 * mapped pages.
+		 *
+		 * Max temporary value is vm_total_pages*100.
+		 */
+		imbalance *= mapped_ratio;
+		imbalance /= 100;
+
+		/* apply imbalance feedback to swap_tendency */
+		swap_tendency += imbalance;
+
+		/*
 		 * Now use this metric to decide whether to start moving mapped
 		 * memory onto the inactive list.
 		 */
@@ -1371,7 +1412,13 @@ loop_again:
 			temp_priority[i] = priority;
 			sc.nr_scanned = 0;
 			note_zone_scanning_priority(zone, priority);
-			nr_reclaimed += shrink_zone(priority, zone, &sc);
+			/*
+			 * We put equal pressure on every zone, unless one
+			 * zone has way too many pages free already.
+			 */
+			if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
+						end_zone, 0))
+				nr_reclaimed += shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
 						lru_pages);
@@ -1688,9 +1735,11 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
 {
 	pg_data_t *pgdat;
 	cpumask_t mask;
+	int nid;
 
 	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
-		for_each_online_pgdat(pgdat) {
+		for_each_node_state(nid, N_HIGH_MEMORY) {
+			pgdat = NODE_DATA(nid);
 			mask = node_to_cpumask(pgdat->node_id);
 			if (any_online_cpu(mask) != NR_CPUS)
 				/* One of our CPUs online: restore mask */
@@ -1727,7 +1776,7 @@ static int __init kswapd_init(void)
 	int nid;
 
 	swap_setup();
-	for_each_online_node(nid)
+	for_each_node_state(nid, N_HIGH_MEMORY)
  		kswapd_run(nid);
 	hotcpu_notifier(cpu_callback, 0);
 	return 0;
@@ -1847,7 +1896,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 
 int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 {
-	cpumask_t mask;
 	int node_id;
 
 	/*
@@ -1884,8 +1932,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	 * as wide as possible.
 	 */
 	node_id = zone_to_nid(zone);
-	mask = node_to_cpumask(node_id);
-	if (!cpus_empty(mask) && node_id != numa_node_id())
+	if (node_state(node_id, N_CPU) && node_id != numa_node_id())
 		return 0;
 	return __zone_reclaim(zone, gfp_mask, order);
 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c64d169537b..3b5e9043e7d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -353,23 +353,6 @@ void refresh_cpu_vm_stats(int cpu)
 	}
 }
 
-static void __refresh_cpu_vm_stats(void *dummy)
-{
-	refresh_cpu_vm_stats(smp_processor_id());
-}
-
-/*
- * Consolidate all counters.
- *
- * Note that the result is less inaccurate but still inaccurate
- * if concurrent processes are allowed to run.
- */
-void refresh_vm_stats(void)
-{
-	on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1);
-}
-EXPORT_SYMBOL(refresh_vm_stats);
-
 #endif
 
 #ifdef CONFIG_NUMA
@@ -398,6 +381,13 @@ void zone_statistics(struct zonelist *zonelist, struct zone *z)
 
 #include <linux/seq_file.h>
 
+static char * const migratetype_names[MIGRATE_TYPES] = {
+	"Unmovable",
+	"Reclaimable",
+	"Movable",
+	"Reserve",
+};
+
 static void *frag_start(struct seq_file *m, loff_t *pos)
 {
 	pg_data_t *pgdat;
@@ -422,28 +412,144 @@ static void frag_stop(struct seq_file *m, void *arg)
 {
 }
 
-/*
- * This walks the free areas for each zone.
- */
-static int frag_show(struct seq_file *m, void *arg)
+/* Walk all the zones in a node and print using a callback */
+static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
+		void (*print)(struct seq_file *m, pg_data_t *, struct zone *))
 {
-	pg_data_t *pgdat = (pg_data_t *)arg;
 	struct zone *zone;
 	struct zone *node_zones = pgdat->node_zones;
 	unsigned long flags;
-	int order;
 
 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
 		if (!populated_zone(zone))
 			continue;
 
 		spin_lock_irqsave(&zone->lock, flags);
-		seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
-		for (order = 0; order < MAX_ORDER; ++order)
-			seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
+		print(m, pgdat, zone);
 		spin_unlock_irqrestore(&zone->lock, flags);
+	}
+}
+
+static void frag_show_print(struct seq_file *m, pg_data_t *pgdat,
+						struct zone *zone)
+{
+	int order;
+
+	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+	for (order = 0; order < MAX_ORDER; ++order)
+		seq_printf(m, "%6lu ", zone->free_area[order].nr_free);
+	seq_putc(m, '\n');
+}
+
+/*
+ * This walks the free areas for each zone.
+ */
+static int frag_show(struct seq_file *m, void *arg)
+{
+	pg_data_t *pgdat = (pg_data_t *)arg;
+	walk_zones_in_node(m, pgdat, frag_show_print);
+	return 0;
+}
+
+static void pagetypeinfo_showfree_print(struct seq_file *m,
+					pg_data_t *pgdat, struct zone *zone)
+{
+	int order, mtype;
+
+	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++) {
+		seq_printf(m, "Node %4d, zone %8s, type %12s ",
+					pgdat->node_id,
+					zone->name,
+					migratetype_names[mtype]);
+		for (order = 0; order < MAX_ORDER; ++order) {
+			unsigned long freecount = 0;
+			struct free_area *area;
+			struct list_head *curr;
+
+			area = &(zone->free_area[order]);
+
+			list_for_each(curr, &area->free_list[mtype])
+				freecount++;
+			seq_printf(m, "%6lu ", freecount);
+		}
 		seq_putc(m, '\n');
 	}
+}
+
+/* Print out the free pages at each order for each migatetype */
+static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
+{
+	int order;
+	pg_data_t *pgdat = (pg_data_t *)arg;
+
+	/* Print header */
+	seq_printf(m, "%-43s ", "Free pages count per migrate type at order");
+	for (order = 0; order < MAX_ORDER; ++order)
+		seq_printf(m, "%6d ", order);
+	seq_putc(m, '\n');
+
+	walk_zones_in_node(m, pgdat, pagetypeinfo_showfree_print);
+
+	return 0;
+}
+
+static void pagetypeinfo_showblockcount_print(struct seq_file *m,
+					pg_data_t *pgdat, struct zone *zone)
+{
+	int mtype;
+	unsigned long pfn;
+	unsigned long start_pfn = zone->zone_start_pfn;
+	unsigned long end_pfn = start_pfn + zone->spanned_pages;
+	unsigned long count[MIGRATE_TYPES] = { 0, };
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+		struct page *page;
+
+		if (!pfn_valid(pfn))
+			continue;
+
+		page = pfn_to_page(pfn);
+		mtype = get_pageblock_migratetype(page);
+
+		count[mtype]++;
+	}
+
+	/* Print counts */
+	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
+		seq_printf(m, "%12lu ", count[mtype]);
+	seq_putc(m, '\n');
+}
+
+/* Print out the free pages at each order for each migratetype */
+static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
+{
+	int mtype;
+	pg_data_t *pgdat = (pg_data_t *)arg;
+
+	seq_printf(m, "\n%-23s", "Number of blocks type ");
+	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
+		seq_printf(m, "%12s ", migratetype_names[mtype]);
+	seq_putc(m, '\n');
+	walk_zones_in_node(m, pgdat, pagetypeinfo_showblockcount_print);
+
+	return 0;
+}
+
+/*
+ * This prints out statistics in relation to grouping pages by mobility.
+ * It is expensive to collect so do not constantly read the file.
+ */
+static int pagetypeinfo_show(struct seq_file *m, void *arg)
+{
+	pg_data_t *pgdat = (pg_data_t *)arg;
+
+	seq_printf(m, "Page block order: %d\n", pageblock_order);
+	seq_printf(m, "Pages per block:  %lu\n", pageblock_nr_pages);
+	seq_putc(m, '\n');
+	pagetypeinfo_showfree(m, pgdat);
+	pagetypeinfo_showblockcount(m, pgdat);
+
 	return 0;
 }
 
@@ -454,6 +560,13 @@ const struct seq_operations fragmentation_op = {
 	.show	= frag_show,
 };
 
+const struct seq_operations pagetypeinfo_op = {
+	.start	= frag_start,
+	.next	= frag_next,
+	.stop	= frag_stop,
+	.show	= pagetypeinfo_show,
+};
+
 #ifdef CONFIG_ZONE_DMA
 #define TEXT_FOR_DMA(xx) xx "_dma",
 #else
@@ -532,84 +645,78 @@ static const char * const vmstat_text[] = {
 #endif
 };
 
-/*
- * Output information about zones in @pgdat.
- */
-static int zoneinfo_show(struct seq_file *m, void *arg)
+static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
+							struct zone *zone)
 {
-	pg_data_t *pgdat = arg;
-	struct zone *zone;
-	struct zone *node_zones = pgdat->node_zones;
-	unsigned long flags;
-
-	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
-		int i;
-
-		if (!populated_zone(zone))
-			continue;
-
-		spin_lock_irqsave(&zone->lock, flags);
-		seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
-		seq_printf(m,
-			   "\n  pages free     %lu"
-			   "\n        min      %lu"
-			   "\n        low      %lu"
-			   "\n        high     %lu"
-			   "\n        scanned  %lu (a: %lu i: %lu)"
-			   "\n        spanned  %lu"
-			   "\n        present  %lu",
-			   zone_page_state(zone, NR_FREE_PAGES),
-			   zone->pages_min,
-			   zone->pages_low,
-			   zone->pages_high,
-			   zone->pages_scanned,
-			   zone->nr_scan_active, zone->nr_scan_inactive,
-			   zone->spanned_pages,
-			   zone->present_pages);
+	int i;
+	seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name);
+	seq_printf(m,
+		   "\n  pages free     %lu"
+		   "\n        min      %lu"
+		   "\n        low      %lu"
+		   "\n        high     %lu"
+		   "\n        scanned  %lu (a: %lu i: %lu)"
+		   "\n        spanned  %lu"
+		   "\n        present  %lu",
+		   zone_page_state(zone, NR_FREE_PAGES),
+		   zone->pages_min,
+		   zone->pages_low,
+		   zone->pages_high,
+		   zone->pages_scanned,
+		   zone->nr_scan_active, zone->nr_scan_inactive,
+		   zone->spanned_pages,
+		   zone->present_pages);
 
-		for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-			seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
-					zone_page_state(zone, i));
-
-		seq_printf(m,
-			   "\n        protection: (%lu",
-			   zone->lowmem_reserve[0]);
-		for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
-			seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
-		seq_printf(m,
-			   ")"
-			   "\n  pagesets");
-		for_each_online_cpu(i) {
-			struct per_cpu_pageset *pageset;
-			int j;
-
-			pageset = zone_pcp(zone, i);
-			for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
-				seq_printf(m,
-					   "\n    cpu: %i pcp: %i"
-					   "\n              count: %i"
-					   "\n              high:  %i"
-					   "\n              batch: %i",
-					   i, j,
-					   pageset->pcp[j].count,
-					   pageset->pcp[j].high,
-					   pageset->pcp[j].batch);
+	for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+		seq_printf(m, "\n    %-12s %lu", vmstat_text[i],
+				zone_page_state(zone, i));
+
+	seq_printf(m,
+		   "\n        protection: (%lu",
+		   zone->lowmem_reserve[0]);
+	for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++)
+		seq_printf(m, ", %lu", zone->lowmem_reserve[i]);
+	seq_printf(m,
+		   ")"
+		   "\n  pagesets");
+	for_each_online_cpu(i) {
+		struct per_cpu_pageset *pageset;
+		int j;
+
+		pageset = zone_pcp(zone, i);
+		for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
+			seq_printf(m,
+				   "\n    cpu: %i pcp: %i"
+				   "\n              count: %i"
+				   "\n              high:  %i"
+				   "\n              batch: %i",
+				   i, j,
+				   pageset->pcp[j].count,
+				   pageset->pcp[j].high,
+				   pageset->pcp[j].batch);
 			}
 #ifdef CONFIG_SMP
-			seq_printf(m, "\n  vm stats threshold: %d",
-					pageset->stat_threshold);
+		seq_printf(m, "\n  vm stats threshold: %d",
+				pageset->stat_threshold);
 #endif
-		}
-		seq_printf(m,
-			   "\n  all_unreclaimable: %u"
-			   "\n  prev_priority:     %i"
-			   "\n  start_pfn:         %lu",
-			   zone->all_unreclaimable,
-			   zone->prev_priority,
-			   zone->zone_start_pfn);
-		spin_unlock_irqrestore(&zone->lock, flags);
-		seq_putc(m, '\n');
 	}
+	seq_printf(m,
+		   "\n  all_unreclaimable: %u"
+		   "\n  prev_priority:     %i"
+		   "\n  start_pfn:         %lu",
+		   zone->all_unreclaimable,
+		   zone->prev_priority,
+		   zone->zone_start_pfn);
+	seq_putc(m, '\n');
+}
+
+/*
+ * Output information about zones in @pgdat.
+ */
+static int zoneinfo_show(struct seq_file *m, void *arg)
+{
+	pg_data_t *pgdat = (pg_data_t *)arg;
+	walk_zones_in_node(m, pgdat, zoneinfo_show_print);
 	return 0;
 }
 
@@ -741,7 +848,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb,
 static struct notifier_block __cpuinitdata vmstat_notifier =
 	{ &vmstat_cpuup_callback, NULL, 0 };
 
-int __init setup_vmstat(void)
+static int __init setup_vmstat(void)
 {
 	int cpu;
 
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 9ec8ca4f602..44b0fb942e8 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1263,7 +1263,8 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
 
 	dprintk("RPC:       %s: phys convert: 0x%llx "
 			"registered 0x%llx length %d\n",
-			__func__, ipb.addr, iov->addr, len);
+			__func__, (unsigned long long)ipb.addr,
+			(unsigned long long)iov->addr, len);
 
 	if (IS_ERR(mr)) {
 		*mrp = NULL;