431 files changed, 42908 insertions, 4762 deletions
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index 6367bba32d2..582032eea87 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = {
     </listitem>
     <listitem>
      <para>
-      Function names as strings (__func__).
+      Function names as strings (__FUNCTION__).
      </para>
     </listitem>
     <listitem>
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
new file mode 100644
index 00000000000..dca274ff400
--- /dev/null
+++ b/Documentation/device-mapper/snapshot.txt
@@ -0,0 +1,73 @@
+Device-mapper snapshot support
+==============================
+
+Device-mapper allows you, without massive data copying:
+
+*) To create snapshots of any block device i.e. mountable, saved states of
+the block device which are also writable without interfering with the
+original content;
+*) To create device "forks", i.e. multiple different versions of the
+same data stream.
+
+
+In both cases, dm copies only the chunks of data that get changed and
+uses a separate copy-on-write (COW) block device for storage.
+
+
+There are two dm targets available: snapshot and snapshot-origin.
+
+*) snapshot-origin <origin>
+
+which will normally have one or more snapshots based on it.
+You must create the snapshot-origin device before you can create snapshots.
+Reads will be mapped directly to the backing device. For each write, the
+original data will be saved in the <COW device> of each snapshot to keep
+its visible content unchanged, at least until the <COW device> fills up.
+
+
+*) snapshot <origin> <COW device> <persistent?> <chunksize>
+
+A snapshot is created of the <origin> block device. Changed chunks of
+<chunksize> sectors will be stored on the <COW device>.  Writes will
+only go to the <COW device>.  Reads will come from the <COW device> or
+from <origin> for unchanged data.  <COW device> will often be
+smaller than the origin and if it fills up the snapshot will become
+useless and be disabled, returning errors.  So it is important to monitor
+the amount of free space and expand the <COW device> before it fills up.
+
+<persistent?> is P (Persistent) or N (Not persistent - will not survive
+after reboot).
+
+
+How this is used by LVM2
+========================
+When you create the first LVM2 snapshot of a volume, four dm devices are used:
+
+1) a device containing the original mapping table of the source volume;
+2) a device used as the <COW device>;
+3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
+   volume;
+4) the "original" volume (which uses the device number used by the original
+   source volume), whose table is replaced by a "snapshot-origin" mapping
+   from device #1.
+
+A fixed naming scheme is used, so with the following commands:
+
+lvcreate -L 1G -n base volumeGroup
+lvcreate -L 100M --snapshot -n snap volumeGroup/base
+
+we'll have this situation (with volumes in above order):
+
+# dmsetup table|grep volumeGroup
+
+volumeGroup-base-real: 0 2097152 linear 8:19 384
+volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
+volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
+volumeGroup-base: 0 2097152 snapshot-origin 254:11
+
+# ls -lL /dev/mapper/volumeGroup-*
+brw-------  1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+brw-------  1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
+brw-------  1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
+brw-------  1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
+
diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt
index 5df44dc894e..1829009db77 100644
--- a/Documentation/sparse.txt
+++ b/Documentation/sparse.txt
@@ -51,9 +51,9 @@ or you don't get any checking at all.
 Where to get sparse
 ~~~~~~~~~~~~~~~~~~~
 
-With BK, you can just get it from
+With git, you can just get it from
 
-        bk://sparse.bkbits.net/sparse
+        rsync://rsync.kernel.org/pub/scm/devel/sparse/sparse.git
 
 and DaveJ has tar-balls at
 
diff --git a/Documentation/usb/URB.txt b/Documentation/usb/URB.txt
index d59b95cc6f1..a49e5f2c2b4 100644
--- a/Documentation/usb/URB.txt
+++ b/Documentation/usb/URB.txt
@@ -1,5 +1,6 @@
 Revised: 2000-Dec-05.
 Again:   2002-Jul-06
+Again:   2005-Sep-19
 
     NOTE:
 
@@ -18,8 +19,8 @@ called USB Request Block, or URB for short.
   and deliver the data and status back. 
 
 - Execution of an URB is inherently an asynchronous operation, i.e. the 
-  usb_submit_urb(urb) call returns immediately after it has successfully queued 
-  the requested action. 
+  usb_submit_urb(urb) call returns immediately after it has successfully
+  queued the requested action.
 
 - Transfers for one URB can be canceled with usb_unlink_urb(urb) at any time. 
 
@@ -94,8 +95,9 @@ To free an URB, use
 
 	void usb_free_urb(struct urb *urb)
 
-You may not free an urb that you've submitted, but which hasn't yet been
-returned to you in a completion callback.
+You may free an urb that you've submitted, but which hasn't yet been
+returned to you in a completion callback.  It will automatically be
+deallocated when it is no longer in use.
 
 
 1.4. What has to be filled in?
@@ -145,30 +147,36 @@ to get seamless ISO streaming.
 
 1.6. How to cancel an already running URB?
 
-For an URB which you've submitted, but which hasn't been returned to
-your driver by the host controller, call
+There are two ways to cancel an URB you've submitted but which hasn't
+been returned to your driver yet.  For an asynchronous cancel, call
 
 	int usb_unlink_urb(struct urb *urb)
 
 It removes the urb from the internal list and frees all allocated
-HW descriptors. The status is changed to reflect unlinking. After 
-usb_unlink_urb() returns with that status code, you can free the URB
-with usb_free_urb().
+HW descriptors. The status is changed to reflect unlinking.  Note
+that the URB will not normally have finished when usb_unlink_urb()
+returns; you must still wait for the completion handler to be called.
 
-There is also an asynchronous unlink mode.  To use this, set the
-the URB_ASYNC_UNLINK flag in urb->transfer flags before calling
-usb_unlink_urb().  When using async unlinking, the URB will not
-normally be unlinked when usb_unlink_urb() returns.  Instead, wait
-for the completion handler to be called.
+To cancel an URB synchronously, call
+
+	void usb_kill_urb(struct urb *urb)
+
+It does everything usb_unlink_urb does, and in addition it waits
+until after the URB has been returned and the completion handler
+has finished.  It also marks the URB as temporarily unusable, so
+that if the completion handler or anyone else tries to resubmit it
+they will get a -EPERM error.  Thus you can be sure that when
+usb_kill_urb() returns, the URB is totally idle.
 
 
 1.7. What about the completion handler?
 
 The handler is of the following type:
 
-	typedef void (*usb_complete_t)(struct urb *);
+	typedef void (*usb_complete_t)(struct urb *, struct pt_regs *)
 
-i.e. it gets just the URB that caused the completion call.
+I.e., it gets the URB that caused the completion call, plus the
+register values at the time of the corresponding interrupt (if any).
 In the completion handler, you should have a look at urb->status to
 detect any USB errors. Since the context parameter is included in the URB,
 you can pass information to the completion handler. 
@@ -176,17 +184,11 @@ you can pass information to the completion handler.
 Note that even when an error (or unlink) is reported, data may have been
 transferred.  That's because USB transfers are packetized; it might take
 sixteen packets to transfer your 1KByte buffer, and ten of them might
-have transferred succesfully before the completion is called.
+have transferred succesfully before the completion was called.
 
 
 NOTE:  ***** WARNING *****
-Don't use urb->dev field in your completion handler; it's cleared
-as part of giving urbs back to drivers.  (Addressing an issue with
-ownership of periodic URBs, which was otherwise ambiguous.) Instead,
-use urb->context to hold all the data your driver needs.
-
-NOTE:  ***** WARNING *****
-Also, NEVER SLEEP IN A COMPLETION HANDLER.  These are normally called
+NEVER SLEEP IN A COMPLETION HANDLER.  These are normally called
 during hardware interrupt processing.  If you can, defer substantial
 work to a tasklet (bottom half) to keep system latencies low.  You'll
 probably need to use spinlocks to protect data structures you manipulate
@@ -229,24 +231,10 @@ ISO data with some other event stream.
 Interrupt transfers, like isochronous transfers, are periodic, and happen
 in intervals that are powers of two (1, 2, 4 etc) units.  Units are frames
 for full and low speed devices, and microframes for high speed ones.
-
-Currently, after you submit one interrupt URB, that urb is owned by the
-host controller driver until you cancel it with usb_unlink_urb().  You
-may unlink interrupt urbs in their completion handlers, if you need to.
-
-After a transfer completion is called, the URB is automagically resubmitted.
-THIS BEHAVIOR IS EXPECTED TO BE REMOVED!!
-
-Interrupt transfers may only send (or receive) the "maxpacket" value for
-the given interrupt endpoint; if you need more data, you will need to
-copy that data out of (or into) another buffer.  Similarly, you can't
-queue interrupt transfers.
-THESE RESTRICTIONS ARE EXPECTED TO BE REMOVED!!
-
-Note that this automagic resubmission model does make it awkward to use
-interrupt OUT transfers.  The portable solution involves unlinking those
-OUT urbs after the data is transferred, and perhaps submitting a final
-URB for a short packet.
-
 The usb_submit_urb() call modifies urb->interval to the implemented interval
 value that is less than or equal to the requested interval value.
+
+In Linux 2.6, unlike earlier versions, interrupt URBs are not automagically
+restarted when they complete.  They end when the completion handler is
+called, just like other URBs.  If you want an interrupt URB to be restarted,
+your completion handler must resubmit it.
diff --git a/MAINTAINERS b/MAINTAINERS
index dc8f3babcab..7d1dd5bad39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1063,8 +1063,6 @@ M:	wli@holomorphy.com
 S:	Maintained
 
 I2C SUBSYSTEM
-P:	Greg Kroah-Hartman
-M:	greg@kroah.com
 P:	Jean Delvare
 M:	khali@linux-fr.org
 L:	lm-sensors@lm-sensors.org
@@ -1404,6 +1402,18 @@ L:	linux-kernel@vger.kernel.org
 L:	fastboot@osdl.org
 S:	Maintained
 
+KPROBES
+P:	Prasanna S Panchamukhi
+M:	prasanna@in.ibm.com
+P:	Ananth N Mavinakayanahalli
+M:	ananth@in.ibm.com
+P:	Anil S Keshavamurthy
+M:	anil.s.keshavamurthy@intel.com
+P:	David S. Miller
+M:	davem@davemloft.net
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+
 LANMEDIA WAN CARD DRIVER
 P:	Andrew Stanley-Jones
 M:	asj@lanmedia.com
@@ -2266,6 +2276,12 @@ M:	kristen.c.accardi@intel.com
 L:	pcihpd-discuss@lists.sourceforge.net
 S:	Maintained
 
+SKGE, SKY2 10/100/1000 GIGABIT ETHERNET DRIVERS
+P:	Stephen Hemminger
+M:	shemminger@osdl.org
+L:	netdev@vger.kernel.org
+S:	Maintained
+
 SPARC (sparc32):
 P:	William L. Irwin
 M:	wli@holomorphy.com
diff --git a/Makefile b/Makefile
index 4e0d7c68d22..8cf6becf68d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 14
-EXTRAVERSION =-rc1
+EXTRAVERSION =-rc2
 NAME=Affluent Albatross
 
 # *DOCUMENTATION*
diff --git a/README b/README
index 76dd780d88e..d1edcc7adab 100644
--- a/README
+++ b/README
@@ -149,6 +149,9 @@ CONFIGURING the kernel:
 	"make gconfig"     X windows (Gtk) based configuration tool.
 	"make oldconfig"   Default all questions based on the contents of
 			   your existing ./.config file.
+	"make silentoldconfig"
+			   Like above, but avoids cluttering the screen
+			   with questions already answered.
    
 	NOTES on "make config":
 	- having unnecessary drivers will make the kernel bigger, and can
@@ -169,9 +172,6 @@ CONFIGURING the kernel:
 	  should probably answer 'n' to the questions for
           "development", "experimental", or "debugging" features.
 
- - Check the top Makefile for further site-dependent configuration
-   (default SVGA mode etc). 
-
 COMPILING the kernel:
 
  - Make sure you have gcc 2.95.3 available.
@@ -199,6 +199,9 @@ COMPILING the kernel:
    are installing a new kernel with the same version number as your
    working kernel, make a backup of your modules directory before you
    do a "make modules_install".
+   Alternatively, before compiling, use the kernel config option
+   "LOCALVERSION" to append a unique suffix to the regular kernel version.
+   LOCALVERSION can be set in the "General Setup" menu.
 
  - In order to boot your new kernel, you'll need to copy the kernel
    image (e.g. .../linux/arch/i386/boot/bzImage after compilation)
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index fa98dae3cd9..eb20c3afff5 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -127,6 +127,10 @@ common_shutdown_1(void *generic_ptr)
 	/* If booted from SRM, reset some of the original environment. */
 	if (alpha_using_srm) {
 #ifdef CONFIG_DUMMY_CONSOLE
+		/* If we've gotten here after SysRq-b, leave interrupt
+		   context before taking over the console. */
+		if (in_interrupt())
+			irq_exit();
 		/* This has the effect of resetting the VGA video origin.  */
 		take_over_console(&dummy_con, 0, MAX_NR_CONSOLES-1, 1);
 #endif
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 9e36b07fa94..d5da6b1b28e 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -395,6 +395,22 @@ clipper_init_irq(void)
  */
 
 static int __init
+isa_irq_fixup(struct pci_dev *dev, int irq)
+{
+	u8 irq8;
+
+	if (irq > 0)
+		return irq;
+
+	/* This interrupt is routed via ISA bridge, so we'll
+	   just have to trust whatever value the console might
+	   have assigned.  */
+	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq8);
+
+	return irq8 & 0xf;
+}
+
+static int __init
 dp264_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
 {
 	static char irq_tab[6][5] __initdata = {
@@ -407,25 +423,13 @@ dp264_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
 		{ 16+ 3, 16+ 3, 16+ 2, 16+ 1, 16+ 0}  /* IdSel 10 slot 3 */
 	};
 	const long min_idsel = 5, max_idsel = 10, irqs_per_slot = 5;
-
 	struct pci_controller *hose = dev->sysdata;
 	int irq = COMMON_TABLE_LOOKUP;
 
-	if (irq > 0) {
+	if (irq > 0)
 		irq += 16 * hose->index;
-	} else {
-		/* ??? The Contaq IDE controller on the ISA bridge uses
-		   "legacy" interrupts 14 and 15.  I don't know if anything
-		   can wind up at the same slot+pin on hose1, so we'll
-		   just have to trust whatever value the console might
-		   have assigned.  */
-
-		u8 irq8;
-		pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq8);
-		irq = irq8;
-	}
 
-	return irq;
+	return isa_irq_fixup(dev, irq);
 }
 
 static int __init
@@ -453,7 +457,8 @@ monet_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
 		{    24,    24,    25,    26,    27}  /* IdSel 15 slot 5 PCI2*/
 	};
 	const long min_idsel = 3, max_idsel = 15, irqs_per_slot = 5;
-	return COMMON_TABLE_LOOKUP;
+
+	return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP);
 }
 
 static u8 __init
@@ -507,7 +512,8 @@ webbrick_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
 		{    47,    47,    46,    45,    44}, /* IdSel 17 slot 3 */
 	};
 	const long min_idsel = 7, max_idsel = 17, irqs_per_slot = 5;
-	return COMMON_TABLE_LOOKUP;
+
+	return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP);
 }
 
 static int __init
@@ -524,14 +530,13 @@ clipper_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
 		{    -1,    -1,    -1,    -1,    -1}  /* IdSel 7 ISA Bridge */
 	};
 	const long min_idsel = 1, max_idsel = 7, irqs_per_slot = 5;
-
 	struct pci_controller *hose = dev->sysdata;
 	int irq = COMMON_TABLE_LOOKUP;
 
 	if (irq > 0)
 		irq += 16 * hose->index;
 
-	return irq;
+	return isa_irq_fixup(dev, irq);
 }
 
 static void __init
diff --git a/arch/arm/boot/compressed/ofw-shark.c b/arch/arm/boot/compressed/ofw-shark.c
index 7f6f5db0d06..465c54b6b12 100644
--- a/arch/arm/boot/compressed/ofw-shark.c
+++ b/arch/arm/boot/compressed/ofw-shark.c
@@ -256,5 +256,5 @@ asmlinkage void ofw_init(ofw_handle_t o, int *nomr, int *pointer)
 	temp[11]='\0';
 	mem_len = OF_getproplen(o,phandle, temp);
 	OF_getprop(o,phandle, temp, buffer, mem_len);
-	(unsigned char) pointer[32] = ((unsigned char *) buffer)[mem_len-2];
+	* ((unsigned char *) &pointer[32]) = ((unsigned char *) buffer)[mem_len-2];
 }
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7152bfbee58..93b5e8e5292 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -537,7 +537,7 @@ ENTRY(__switch_to)
 #ifdef CONFIG_CPU_MPCORE
 	clrex
 #else
-	strex	r3, r4, [ip]			@ Clear exclusive monitor
+	strex	r5, r4, [ip]			@ Clear exclusive monitor
 #endif
 #endif
 #if defined(CONFIG_CPU_XSCALE) && !defined(CONFIG_IWMMXT)
diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c
index 6c20c1188b6..1f6822dfae7 100644
--- a/arch/arm/kernel/io.c
+++ b/arch/arm/kernel/io.c
@@ -7,7 +7,7 @@
  * Copy data from IO memory space to "real" memory space.
  * This needs to be optimized.
  */
-void _memcpy_fromio(void *to, void __iomem *from, size_t count)
+void _memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
 	unsigned char *t = to;
 	while (count) {
@@ -22,7 +22,7 @@ void _memcpy_fromio(void *to, void __iomem *from, size_t count)
  * Copy data from "real" memory space to IO memory space.
  * This needs to be optimized.
  */
-void _memcpy_toio(void __iomem *to, const void *from, size_t count)
+void _memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
 {
 	const unsigned char *f = from;
 	while (count) {
@@ -37,7 +37,7 @@ void _memcpy_toio(void __iomem *to, const void *from, size_t count)
  * "memset" on IO memory space.
  * This needs to be optimized.
  */
-void _memset_io(void __iomem *dst, int c, size_t count)
+void _memset_io(volatile void __iomem *dst, int c, size_t count)
 {
 	while (count) {
 		count--;
diff --git a/arch/arm/kernel/semaphore.c b/arch/arm/kernel/semaphore.c
index ac423e3e224..4c31f292305 100644
--- a/arch/arm/kernel/semaphore.c
+++ b/arch/arm/kernel/semaphore.c
@@ -178,7 +178,7 @@ int __down_trylock(struct semaphore * sem)
  * registers (r0 to r3 and lr), but not ip, as we use it as a return
  * value in some cases..
  */
-asm("	.section .sched.text,\"ax\"		\n\
+asm("	.section .sched.text,\"ax\",%progbits	\n\
 	.align	5				\n\
 	.globl	__down_failed			\n\
 __down_failed:					\n\
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 4554c961251..e7d22dbcb69 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -624,6 +624,9 @@ void __attribute__((noreturn)) __bug(const char *file, int line, void *data)
 		printk(" - extra data = %p", data);
 	printk("\n");
 	*(int *)0 = 0;
+
+	/* Avoid "noreturn function does return" */
+	for (;;);
 }
 EXPORT_SYMBOL(__bug);
 
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index ad2d66c93a5..08e58ecd44b 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -23,20 +23,20 @@ SECTIONS
 			*(.init.text)
 			_einittext = .;
 		__proc_info_begin = .;
-			*(.proc.info)
+			*(.proc.info.init)
 		__proc_info_end = .;
 		__arch_info_begin = .;
-			*(.arch.info)
+			*(.arch.info.init)
 		__arch_info_end = .;
 		__tagtable_begin = .;
-			*(.taglist)
+			*(.taglist.init)
 		__tagtable_end = .;
 		. = ALIGN(16);
 		__setup_start = .;
 			*(.init.setup)
 		__setup_end = .;
 		__early_begin = .;
-			*(__early_param)
+			*(.early_param.init)
 		__early_end = .;
 		__initcall_start = .;
 			*(.initcall1.init)
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index ae1fa099d5f..39b06ed8064 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -123,6 +123,7 @@ static void __init ixdp425_init(void)
 	platform_add_devices(ixdp425_devices, ARRAY_SIZE(ixdp425_devices));
 }
 
+#ifdef CONFIG_ARCH_IXDP465
 MACHINE_START(IXDP425, "Intel IXDP425 Development Platform")
 	/* Maintainer: MontaVista Software, Inc. */
 	.phys_ram	= PHYS_OFFSET,
@@ -134,7 +135,9 @@ MACHINE_START(IXDP425, "Intel IXDP425 Development Platform")
 	.boot_params	= 0x0100,
 	.init_machine	= ixdp425_init,
 MACHINE_END
+#endif
 
+#ifdef CONFIG_MACH_IXDP465
 MACHINE_START(IXDP465, "Intel IXDP465 Development Platform")
 	/* Maintainer: MontaVista Software, Inc. */
 	.phys_ram	= PHYS_OFFSET,
@@ -146,7 +149,9 @@ MACHINE_START(IXDP465, "Intel IXDP465 Development Platform")
 	.boot_params	= 0x0100,
 	.init_machine	= ixdp425_init,
 MACHINE_END
+#endif
 
+#ifdef CONFIG_ARCH_PRPMC1100
 MACHINE_START(IXCDP1100, "Intel IXCDP1100 Development Platform")
 	/* Maintainer: MontaVista Software, Inc. */
 	.phys_ram	= PHYS_OFFSET,
@@ -158,6 +163,7 @@ MACHINE_START(IXCDP1100, "Intel IXCDP1100 Development Platform")
 	.boot_params	= 0x0100,
 	.init_machine	= ixdp425_init,
 MACHINE_END
+#endif
 
 /*
  * Avila is functionally equivalent to IXDP425 except that it adds
diff --git a/arch/arm/mach-s3c2410/mach-anubis.c b/arch/arm/mach-s3c2410/mach-anubis.c
index f87aa0b669a..7c05f27fe1d 100644
--- a/arch/arm/mach-s3c2410/mach-anubis.c
+++ b/arch/arm/mach-s3c2410/mach-anubis.c
@@ -12,6 +12,7 @@
  *
  * Modifications:
  *	02-May-2005 BJD  Copied from mach-bast.c
+ *	20-Sep-2005 BJD  Added static to non-exported items
 */
 
 #include <linux/kernel.h>
@@ -232,7 +233,7 @@ static struct s3c24xx_board anubis_board __initdata = {
 	.clocks_count  = ARRAY_SIZE(anubis_clocks)
 };
 
-void __init anubis_map_io(void)
+static void __init anubis_map_io(void)
 {
 	/* initialise the clocks */
 
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 1a3367da640..ed1f07d7252 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -31,6 +31,7 @@
  *     17-Jul-2005 BJD  Changed to platform device for SuperIO 16550s
  *     25-Jul-2005 BJD  Removed ASIX static mappings
  *     27-Jul-2005 BJD  Ensure maximum frequency of i2c bus
+ *     20-Sep-2005 BJD  Added static to non-exported items
 */
 
 #include <linux/kernel.h>
@@ -428,7 +429,7 @@ static struct s3c24xx_board bast_board __initdata = {
 	.clocks_count  = ARRAY_SIZE(bast_clocks)
 };
 
-void __init bast_map_io(void)
+static void __init bast_map_io(void)
 {
 	/* initialise the clocks */
 
diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c
index 6ff1889fbd2..fb3cb01266e 100644
--- a/arch/arm/mach-s3c2410/mach-h1940.c
+++ b/arch/arm/mach-s3c2410/mach-h1940.c
@@ -24,6 +24,7 @@
  *     10-Jan-2005 BJD  Removed include of s3c2410.h
  *     14-Jan-2005 BJD  Added clock init
  *     10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA
+ *     20-Sep-2005 BJD  Added static to non-exported items
 */
 
 #include <linux/kernel.h>
@@ -147,7 +148,7 @@ static struct s3c24xx_board h1940_board __initdata = {
 	.devices_count = ARRAY_SIZE(h1940_devices)
 };
 
-void __init h1940_map_io(void)
+static void __init h1940_map_io(void)
 {
 	s3c24xx_init_io(h1940_iodesc, ARRAY_SIZE(h1940_iodesc));
 	s3c24xx_init_clocks(0);
@@ -155,13 +156,13 @@ void __init h1940_map_io(void)
 	s3c24xx_set_board(&h1940_board);
 }
 
-void __init h1940_init_irq(void)
+static void __init h1940_init_irq(void)
 {
 	s3c24xx_init_irq();
 
 }
 
-void __init h1940_init(void)
+static void __init h1940_init(void)
 {
 	set_s3c2410fb_info(&h1940_lcdcfg);
 }
diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c
index 66bf5bb2b3d..5c0f2b091f9 100644
--- a/arch/arm/mach-s3c2410/mach-n30.c
+++ b/arch/arm/mach-s3c2410/mach-n30.c
@@ -97,7 +97,7 @@ static struct s3c24xx_board n30_board __initdata = {
 	.devices_count = ARRAY_SIZE(n30_devices)
 };
 
-void __init n30_map_io(void)
+static void __init n30_map_io(void)
 {
 	s3c24xx_init_io(n30_iodesc, ARRAY_SIZE(n30_iodesc));
 	s3c24xx_init_clocks(0);
@@ -105,14 +105,14 @@ void __init n30_map_io(void)
 	s3c24xx_set_board(&n30_board);
 }
 
-void __init n30_init_irq(void)
+static void __init n30_init_irq(void)
 {
 	s3c24xx_init_irq();
 }
 
 /* GPB3 is the line that controls the pull-up for the USB D+ line */
 
-void __init n30_init(void)
+static void __init n30_init(void)
 {
 	s3c_device_i2c.dev.platform_data = &n30_i2ccfg;
 
diff --git a/arch/arm/mach-s3c2410/mach-nexcoder.c b/arch/arm/mach-s3c2410/mach-nexcoder.c
index d24c242414c..c22f8216032 100644
--- a/arch/arm/mach-s3c2410/mach-nexcoder.c
+++ b/arch/arm/mach-s3c2410/mach-nexcoder.c
@@ -136,7 +136,7 @@ static void __init nexcoder_sensorboard_init(void)
 	s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPF2_OUTP); // CAM_GPIO6 => CAM_PWRDN
 }
 
-void __init nexcoder_map_io(void)
+static void __init nexcoder_map_io(void)
 {
 	s3c24xx_init_io(nexcoder_iodesc, ARRAY_SIZE(nexcoder_iodesc));
 	s3c24xx_init_clocks(0);
diff --git a/arch/arm/mach-s3c2410/mach-otom.c b/arch/arm/mach-s3c2410/mach-otom.c
index d901ed492ff..ad1459e402e 100644
--- a/arch/arm/mach-s3c2410/mach-otom.c
+++ b/arch/arm/mach-s3c2410/mach-otom.c
@@ -105,7 +105,7 @@ static struct s3c24xx_board otom11_board __initdata = {
 };
 
 
-void __init otom11_map_io(void)
+static void __init otom11_map_io(void)
 {
 	s3c24xx_init_io(otom11_iodesc, ARRAY_SIZE(otom11_iodesc));
 	s3c24xx_init_clocks(0);
diff --git a/arch/arm/mach-s3c2410/mach-rx3715.c b/arch/arm/mach-s3c2410/mach-rx3715.c
index a73d61c1de4..22d9e070fd6 100644
--- a/arch/arm/mach-s3c2410/mach-rx3715.c
+++ b/arch/arm/mach-s3c2410/mach-rx3715.c
@@ -16,6 +16,7 @@
  *	14-Jan-2005 BJD  Added new clock init
  *	10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA
  *	14-Mar-2005 BJD  Fixed __iomem warnings
+ *	20-Sep-2005 BJD  Added static to non-exported items
 */
 
 #include <linux/kernel.h>
@@ -108,7 +109,7 @@ static struct s3c24xx_board rx3715_board __initdata = {
 	.devices_count = ARRAY_SIZE(rx3715_devices)
 };
 
-void __init rx3715_map_io(void)
+static void __init rx3715_map_io(void)
 {
 	s3c24xx_init_io(rx3715_iodesc, ARRAY_SIZE(rx3715_iodesc));
 	s3c24xx_init_clocks(16934000);
@@ -116,7 +117,7 @@ void __init rx3715_map_io(void)
 	s3c24xx_set_board(&rx3715_board);
 }
 
-void __init rx3715_init_irq(void)
+static void __init rx3715_init_irq(void)
 {
 	s3c24xx_init_irq();
 }
diff --git a/arch/arm/mach-s3c2410/mach-smdk2410.c b/arch/arm/mach-s3c2410/mach-smdk2410.c
index 67e903a700d..2eda55a6b67 100644
--- a/arch/arm/mach-s3c2410/mach-smdk2410.c
+++ b/arch/arm/mach-s3c2410/mach-smdk2410.c
@@ -28,6 +28,7 @@
  * Ben Dooks <ben@simtec.co.uk>
  *
  * 10-Mar-2005 LCVR  Changed S3C2410_VA to S3C24XX_VA
+ * 20-Sep-2005 BJD  Added static to non-exported items
  *
  ***********************************************************************/
 
@@ -97,7 +98,7 @@ static struct s3c24xx_board smdk2410_board __initdata = {
 	.devices_count = ARRAY_SIZE(smdk2410_devices)
 };
 
-void __init smdk2410_map_io(void)
+static void __init smdk2410_map_io(void)
 {
 	s3c24xx_init_io(smdk2410_iodesc, ARRAY_SIZE(smdk2410_iodesc));
 	s3c24xx_init_clocks(0);
@@ -105,7 +106,7 @@ void __init smdk2410_map_io(void)
 	s3c24xx_set_board(&smdk2410_board);
 }
 
-void __init smdk2410_init_irq(void)
+static void __init smdk2410_init_irq(void)
 {
 	s3c24xx_init_irq();
 }
diff --git a/arch/arm/mach-s3c2410/mach-smdk2440.c b/arch/arm/mach-s3c2410/mach-smdk2440.c
index 357522106f6..722ef46b630 100644
--- a/arch/arm/mach-s3c2410/mach-smdk2440.c
+++ b/arch/arm/mach-s3c2410/mach-smdk2440.c
@@ -18,6 +18,7 @@
  *	22-Feb-2005 BJD   Updated for 2.6.11-rc5 relesa
  *	10-Mar-2005 LCVR  Replaced S3C2410_VA by S3C24XX_VA
  *	14-Mar-2005 BJD	  void __iomem fixes
+ *	20-Sep-2005 BJD   Added static to non-exported items
 */
 
 #include <linux/kernel.h>
@@ -98,7 +99,7 @@ static struct s3c24xx_board smdk2440_board __initdata = {
 	.devices_count = ARRAY_SIZE(smdk2440_devices)
 };
 
-void __init smdk2440_map_io(void)
+static void __init smdk2440_map_io(void)
 {
 	s3c24xx_init_io(smdk2440_iodesc, ARRAY_SIZE(smdk2440_iodesc));
 	s3c24xx_init_clocks(16934400);
@@ -106,7 +107,7 @@ void __init smdk2440_map_io(void)
 	s3c24xx_set_board(&smdk2440_board);
 }
 
-void __init smdk2440_machine_init(void)
+static void __init smdk2440_machine_init(void)
 {
 	/* Configure the LEDs (even if we have no LED support)*/
 
diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c
index 8f9ab2893df..663a7f98fc0 100644
--- a/arch/arm/mach-s3c2410/mach-vr1000.c
+++ b/arch/arm/mach-s3c2410/mach-vr1000.c
@@ -28,6 +28,7 @@
  *     10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA
  *     14-Mar-2006 BJD  void __iomem fixes
  *     22-Jun-2006 BJD  Added DM9000 platform information
+ *     20-Sep-2005 BJD  Added static to non-exported items
 */
 
 #include <linux/kernel.h>
@@ -347,7 +348,7 @@ static void vr1000_power_off(void)
 	s3c2410_gpio_setpin(S3C2410_GPB9, 1);
 }
 
-void __init vr1000_map_io(void)
+static void __init vr1000_map_io(void)
 {
 	/* initialise clock sources */
 
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index 279e3afa3c3..f085d68e568 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -39,3 +39,6 @@ extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops);
 
 struct irda_platform_data;
 void sa11x0_set_irda_data(struct irda_platform_data *irda);
+
+struct mcp_plat_data;
+void sa11x0_set_mcp_data(struct mcp_plat_data *data);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 0b6c4db44e0..4a884baf3b9 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -233,7 +233,17 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	if (in_interrupt() || !mm)
 		goto no_context;
 
-	down_read(&mm->mmap_sem);
+	/*
+	 * As per x86, we may deadlock here.  However, since the kernel only
+	 * validly references user space from well defined areas of the code,
+	 * we can bug out early if this is from code which shouldn't.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
+			goto no_context;
+		down_read(&mm->mmap_sem);
+	}
+
 	fault = __do_page_fault(mm, addr, fsr, tsk);
 	up_read(&mm->mmap_sem);
 
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
index 1d739d282a4..82ec954e45b 100644
--- a/arch/arm/mm/proc-arm1020.S
+++ b/arch/arm/mm/proc-arm1020.S
@@ -509,7 +509,7 @@ cpu_arm1020_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__arm1020_proc_info,#object
 __arm1020_proc_info:
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
index 9b725665b5c..7375fe930f7 100644
--- a/arch/arm/mm/proc-arm1020e.S
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -491,7 +491,7 @@ cpu_arm1020e_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__arm1020e_proc_info,#object
 __arm1020e_proc_info:
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
index 37b70fa21c7..6ca639094d6 100644
--- a/arch/arm/mm/proc-arm1022.S
+++ b/arch/arm/mm/proc-arm1022.S
@@ -473,7 +473,7 @@ cpu_arm1022_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__arm1022_proc_info,#object
 __arm1022_proc_info:
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
index 931b690d1be..10317e4f55d 100644
--- a/arch/arm/mm/proc-arm1026.S
+++ b/arch/arm/mm/proc-arm1026.S
@@ -469,7 +469,7 @@ cpu_arm1026_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__arm1026_proc_info,#object
 __arm1026_proc_info:
diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S
index d0f1bbb48f6..8e7e1e70ab0 100644
--- a/arch/arm/mm/proc-arm6_7.S
+++ b/arch/arm/mm/proc-arm6_7.S
@@ -332,7 +332,7 @@ cpu_arm710_name:
 
 		.align
 
-		.section ".proc.info", #alloc, #execinstr
+		.section ".proc.info.init", #alloc, #execinstr
 
 		.type	__arm6_proc_info, #object
 __arm6_proc_info:
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
index c69c9de3239..a13e0184d34 100644
--- a/arch/arm/mm/proc-arm720.S
+++ b/arch/arm/mm/proc-arm720.S
@@ -222,7 +222,7 @@ cpu_arm720_name:
  * See linux/include/asm-arm/procinfo.h for a definition of this structure.
  */
 	
-		.section ".proc.info", #alloc, #execinstr
+		.section ".proc.info.init", #alloc, #execinstr
 
 		.type	__arm710_proc_info, #object
 __arm710_proc_info:
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 0f490a0fcb7..d1651389999 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -452,7 +452,7 @@ cpu_arm920_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__arm920_proc_info,#object
 __arm920_proc_info:
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
index 62bc34a139e..23b8ed97f4e 100644
--- a/arch/arm/mm/proc-arm922.S
+++ b/arch/arm/mm/proc-arm922.S
@@ -456,7 +456,7 @@ cpu_arm922_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__arm922_proc_info,#object
 __arm922_proc_info:
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
index ee49aa2ca78..ee95c52db51 100644
--- a/arch/arm/mm/proc-arm925.S
+++ b/arch/arm/mm/proc-arm925.S
@@ -521,7 +521,7 @@ cpu_arm925_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__arm925_proc_info,#object
 __arm925_proc_info:
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index bb95cc9fed0..7d042dc20c4 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -471,7 +471,7 @@ cpu_arm926_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__arm926_proc_info,#object
 __arm926_proc_info:
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
index 34f7e7d3f41..bd330c4075a 100644
--- a/arch/arm/mm/proc-sa110.S
+++ b/arch/arm/mm/proc-sa110.S
@@ -249,7 +249,7 @@ cpu_sa110_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__sa110_proc_info,#object
 __sa110_proc_info:
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
index ca14f80d5ab..91b89124c0d 100644
--- a/arch/arm/mm/proc-sa1100.S
+++ b/arch/arm/mm/proc-sa1100.S
@@ -280,7 +280,7 @@ cpu_sa1110_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__sa1100_proc_info,#object
 __sa1100_proc_info:
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index eb34823c9db..caf3b19b167 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -240,7 +240,7 @@ cpu_elf_name:
 	.size	cpu_elf_name, . - cpu_elf_name
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	/*
 	 * Match any ARMv6 processor core.
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index b88de270014..861b3594728 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -578,7 +578,7 @@ cpu_pxa270_name:
 
 	.align
 
-	.section ".proc.info", #alloc, #execinstr
+	.section ".proc.info.init", #alloc, #execinstr
 
 	.type	__80200_proc_info,#object
 __80200_proc_info:
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index 56405dbfd73..a18983a3c93 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -233,6 +233,23 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
 		simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512);
 }
 
+static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len)
+{
+
+	int scatterlen = sc->use_sg;
+	struct scatterlist *slp;
+
+	if (scatterlen == 0)
+		memcpy(sc->request_buffer, buf, len);
+	else for (slp = (struct scatterlist *)sc->buffer; scatterlen-- > 0 && len > 0; slp++) {
+		unsigned thislen = min(len, slp->length);
+
+		memcpy(page_address(slp->page) + slp->offset, buf, thislen);
+		slp++;
+		len -= thislen;
+	}
+}
+
 static int
 simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
@@ -240,6 +257,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	char fname[MAX_ROOT_LEN+16];
 	size_t disk_size;
 	char *buf;
+	char localbuf[36];
 #if DEBUG_SIMSCSI
 	register long sp asm ("sp");
 
@@ -263,7 +281,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 				/* disk doesn't exist... */
 				break;
 			}
-			buf = sc->request_buffer;
+			buf = localbuf;
 			buf[0] = 0;	/* magnetic disk */
 			buf[1] = 0;	/* not a removable medium */
 			buf[2] = 2;	/* SCSI-2 compliant device */
@@ -273,6 +291,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[6] = 0;	/* reserved */
 			buf[7] = 0;	/* various flags */
 			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
+			simscsi_fillresult(sc, buf, 36);
 			sc->result = GOOD;
 			break;
 
@@ -304,16 +323,13 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			simscsi_readwrite10(sc, SSC_WRITE);
 			break;
 
-
 		      case READ_CAPACITY:
 			if (desc[target_id] < 0 || sc->request_bufflen < 8) {
 				break;
 			}
-			buf = sc->request_buffer;
-
+			buf = localbuf;
 			disk_size = simscsi_get_disk_size(desc[target_id]);
 
-			/* pretend to be a 1GB disk (partition table contains real stuff): */
 			buf[0] = (disk_size >> 24) & 0xff;
 			buf[1] = (disk_size >> 16) & 0xff;
 			buf[2] = (disk_size >>  8) & 0xff;
@@ -323,13 +339,14 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[5] = 0;
 			buf[6] = 2;
 			buf[7] = 0;
+			simscsi_fillresult(sc, buf, 8);
 			sc->result = GOOD;
 			break;
 
 		      case MODE_SENSE:
 		      case MODE_SENSE_10:
 			/* sd.c uses this to determine whether disk does write-caching. */
-			memset(sc->request_buffer, 0, 128);
+			simscsi_fillresult(sc, (char *)empty_zero_page, sc->request_bufflen);
 			sc->result = GOOD;
 			break;
 
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 499a065f4e6..db32fc1d393 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -489,24 +489,27 @@ ia64_state_save:
 	;;
 	st8 [temp1]=r17,16	// pal_min_state
 	st8 [temp2]=r6,16	// prev_IA64_KR_CURRENT
+	mov r6=IA64_KR(CURRENT_STACK)
+	;;
+	st8 [temp1]=r6,16	// prev_IA64_KR_CURRENT_STACK
+	st8 [temp2]=r0,16	// prev_task, starts off as NULL
 	mov r6=cr.ifa
 	;;
-	st8 [temp1]=r0,16	// prev_task, starts off as NULL
-	st8 [temp2]=r12,16	// cr.isr
+	st8 [temp1]=r12,16	// cr.isr
+	st8 [temp2]=r6,16	// cr.ifa
 	mov r12=cr.itir
 	;;
-	st8 [temp1]=r6,16	// cr.ifa
-	st8 [temp2]=r12,16	// cr.itir
+	st8 [temp1]=r12,16	// cr.itir
+	st8 [temp2]=r11,16	// cr.iipa
 	mov r12=cr.iim
 	;;
-	st8 [temp1]=r11,16	// cr.iipa
-	st8 [temp2]=r12,16	// cr.iim
-	mov r6=cr.iha
+	st8 [temp1]=r12,16	// cr.iim
 (p1)	mov r12=IA64_MCA_COLD_BOOT
 (p2)	mov r12=IA64_INIT_WARM_BOOT
+	mov r6=cr.iha
 	;;
-	st8 [temp1]=r6,16	// cr.iha
-	st8 [temp2]=r12		// os_status, default is cold boot
+	st8 [temp2]=r6,16	// cr.iha
+	st8 [temp1]=r12		// os_status, default is cold boot
 	mov r6=IA64_MCA_SAME_CONTEXT
 	;;
 	st8 [temp1]=r6		// context, default is same context
@@ -823,9 +826,12 @@ ia64_state_restore:
 	ld8 r12=[temp1],16	// sal_ra
 	ld8 r9=[temp2],16	// sal_gp
 	;;
-	ld8 r22=[temp1],24	// pal_min_state, virtual.  skip prev_task
+	ld8 r22=[temp1],16	// pal_min_state, virtual
 	ld8 r21=[temp2],16	// prev_IA64_KR_CURRENT
 	;;
+	ld8 r16=[temp1],16	// prev_IA64_KR_CURRENT_STACK
+	ld8 r20=[temp2],16	// prev_task
+	;;
 	ld8 temp3=[temp1],16	// cr.isr
 	ld8 temp4=[temp2],16	// cr.ifa
 	;;
@@ -846,6 +852,45 @@ ia64_state_restore:
 	ld8 r8=[temp1]		// os_status
 	ld8 r10=[temp2]		// context
 
+	/* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to.  To
+	 * avoid any dependencies on the algorithm in ia64_switch_to(), just
+	 * purge any existing CURRENT_STACK mapping and insert the new one.
+	 *
+	 * r16 contains prev_IA64_KR_CURRENT_STACK, r21 contains
+	 * prev_IA64_KR_CURRENT, these values may have been changed by the C
+	 * code.  Do not use r8, r9, r10, r22, they contain values ready for
+	 * the return to SAL.
+	 */
+
+	mov r15=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
+	;;
+	shl r15=r15,IA64_GRANULE_SHIFT
+	;;
+	dep r15=-1,r15,61,3			// virtual granule
+	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
+	;;
+	ptr.d r15,r18
+	;;
+	srlz.d
+
+	extr.u r19=r21,61,3			// r21 = prev_IA64_KR_CURRENT
+	shl r20=r16,IA64_GRANULE_SHIFT		// r16 = prev_IA64_KR_CURRENT_STACK
+	movl r21=PAGE_KERNEL			// page properties
+	;;
+	mov IA64_KR(CURRENT_STACK)=r16
+	cmp.ne p6,p0=RGN_KERNEL,r19		// new stack is in the kernel region?
+	or r21=r20,r21				// construct PA | page properties
+(p6)	br.spnt 1f				// the dreaded cpu 0 idle task in region 5:(
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r21
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r21
+	;;
+	srlz.d
+1:
+
 	br.sptk b0
 
 //EndStub//////////////////////////////////////////////////////////////////////
@@ -982,6 +1027,7 @@ ia64_set_kernel_registers:
 	add temp4=temp4, temp1	// &struct ia64_sal_os_state.os_gp
 	add r12=temp1, temp3	// kernel stack pointer on MCA/INIT stack
 	add r13=temp1, r3	// set current to start of MCA/INIT stack
+	add r20=temp1, r3	// physical start of MCA/INIT stack
 	;;
 	ld8 r1=[temp4]		// OS GP from SAL OS state
 	;;
@@ -991,7 +1037,35 @@ ia64_set_kernel_registers:
 	;;
 	mov IA64_KR(CURRENT)=r13
 
-	// FIXME: do I need to wire IA64_KR_CURRENT_STACK and IA64_TR_CURRENT_STACK?
+	/* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack.  To avoid
+	 * any dependencies on the algorithm in ia64_switch_to(), just purge
+	 * any existing CURRENT_STACK mapping and insert the new one.
+	 */
+
+	mov r16=IA64_KR(CURRENT_STACK)		// physical granule mapped by IA64_TR_CURRENT_STACK
+	;;
+	shl r16=r16,IA64_GRANULE_SHIFT
+	;;
+	dep r16=-1,r16,61,3			// virtual granule
+	mov r18=IA64_GRANULE_SHIFT<<2		// for cr.itir.ps
+	;;
+	ptr.d r16,r18
+	;;
+	srlz.d
+
+	shr.u r16=r20,IA64_GRANULE_SHIFT	// r20 = physical start of MCA/INIT stack
+	movl r21=PAGE_KERNEL			// page properties
+	;;
+	mov IA64_KR(CURRENT_STACK)=r16
+	or r21=r20,r21				// construct PA | page properties
+	;;
+	mov cr.itir=r18
+	mov cr.ifa=r13
+	mov r20=IA64_TR_CURRENT_STACK
+	;;
+	itr.d dtr[r20]=r21
+	;;
+	srlz.d
 
 	br.sptk b0
 
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 80f83d6cdbf..f081c60ab20 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -56,8 +56,9 @@ static struct page *page_isolate[MAX_PAGE_ISOLATE];
 static int num_page_isolate = 0;
 
 typedef enum {
-	ISOLATE_NG = 0,
-	ISOLATE_OK = 1
+	ISOLATE_NG,
+	ISOLATE_OK,
+	ISOLATE_NONE
 } isolate_status_t;
 
 /*
@@ -74,7 +75,7 @@ static struct {
  * @paddr:	poisoned memory location
  *
  * Return value:
- *	ISOLATE_OK / ISOLATE_NG
+ *	one of isolate_status_t, ISOLATE_OK/NG/NONE.
  */
 
 static isolate_status_t
@@ -85,7 +86,10 @@ mca_page_isolate(unsigned long paddr)
 
 	/* whether physical address is valid or not */
 	if (!ia64_phys_addr_valid(paddr))
-		return ISOLATE_NG;
+		return ISOLATE_NONE;
+
+	if (!pfn_valid(paddr))
+		return ISOLATE_NONE;
 
 	/* convert physical address to physical page number */
 	p = pfn_to_page(paddr>>PAGE_SHIFT);
@@ -122,10 +126,15 @@ mca_handler_bh(unsigned long paddr)
 		current->pid, current->comm);
 
 	spin_lock(&mca_bh_lock);
-	if (mca_page_isolate(paddr) == ISOLATE_OK) {
+	switch (mca_page_isolate(paddr)) {
+	case ISOLATE_OK:
 		printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
-	} else {
+		break;
+	case ISOLATE_NG:
 		printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr);
+		break;
+	default:
+		break;
 	}
 	spin_unlock(&mca_bh_lock);
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
new file mode 100644
index 00000000000..edfac467b9e
--- /dev/null
+++ b/arch/powerpc/Kconfig
@@ -0,0 +1,861 @@
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+
+mainmenu "Linux/PowerPC Kernel Configuration"
+
+config PPC64
+	bool "64-bit kernel"
+	default n
+	help
+	  This option selects whether a 32-bit or a 64-bit kernel
+	  will be built.
+
+config PPC32
+	bool
+	default y if !PPC64
+
+config 64BIT
+	bool
+	default y if PPC64
+
+config PPC_MERGE
+	def_bool y
+
+config MMU
+	bool
+	default y
+
+config UID16
+	bool
+
+config GENERIC_HARDIRQS
+	bool
+	default y
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config PPC
+	bool
+	default y
+
+config EARLY_PRINTK
+	bool
+	default y if PPC64
+
+config COMPAT
+	bool
+	default y if PPC64
+
+config SYSVIPC_COMPAT
+	bool
+	depends on COMPAT && SYSVIPC
+	default y
+
+# All PPC32s use generic nvram driver through ppc_md
+config GENERIC_NVRAM
+	bool
+	default y if PPC32
+
+config SCHED_NO_NO_OMIT_FRAME_POINTER
+	bool
+	default y
+
+config ARCH_MAY_HAVE_PC_FDC
+	bool
+	default y
+
+menu "Processor support"
+choice
+	prompt "Processor Type"
+	depends on PPC32
+	default 6xx
+
+config 6xx
+	bool "6xx/7xx/74xx"
+	select PPC_FPU
+	help
+	  There are four families of PowerPC chips supported.  The more common
+	  types (601, 603, 604, 740, 750, 7400), the Motorola embedded
+	  versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC
+	  embedded versions (403 and 405) and the high end 64 bit Power
+	  processors (POWER 3, POWER4, and IBM PPC970 also known as G5).
+	  
+	  Unless you are building a kernel for one of the embedded processor
+	  systems, 64 bit IBM RS/6000 or an Apple G5, choose 6xx.
+	  Note that the kernel runs in 32-bit mode even on 64-bit chips.
+
+config PPC_52xx
+	bool "Freescale 52xx"
+	
+config PPC_82xx
+	bool "Freescale 82xx"
+
+config PPC_83xx
+	bool "Freescale 83xx"
+
+config 40x
+	bool "AMCC 40x"
+
+config 44x
+	bool "AMCC 44x"
+
+config PPC64BRIDGE
+	select PPC_FPU
+	bool "POWER3, POWER4 and PPC970 (G5)"
+
+config 8xx
+	bool "Freescale 8xx"
+
+config E200
+	bool "Freescale e200"
+
+config E500
+	bool "Freescale e500"
+endchoice
+
+config POWER4_ONLY
+	bool "Optimize for POWER4"
+	depends on PPC64 || PPC64BRIDGE
+	default n
+	---help---
+	  Cause the compiler to optimize for POWER4/POWER5/PPC970 processors.
+	  The resulting binary will not work on POWER3 or RS64 processors
+	  when compiled with binutils 2.15 or later.
+
+config POWER3
+	bool
+	depends on PPC64 || PPC64BRIDGE
+	default y if !POWER4_ONLY
+
+config POWER4
+	depends on PPC64 || PPC64BRIDGE
+	def_bool y
+
+config PPC_FPU
+	bool
+	default y if PPC64
+
+config BOOKE
+	bool
+	depends on E200 || E500
+	default y
+
+config FSL_BOOKE
+	bool
+	depends on E200 || E500
+	default y
+
+config PTE_64BIT
+	bool
+	depends on 44x || E500
+	default y if 44x
+	default y if E500 && PHYS_64BIT
+
+config PHYS_64BIT
+	bool 'Large physical address support' if E500
+	depends on 44x || E500
+	default y if 44x
+	---help---
+	  This option enables kernel support for larger than 32-bit physical
+	  addresses.  This features is not be available on all e500 cores.
+
+	  If in doubt, say N here.
+
+config ALTIVEC
+	bool "AltiVec Support"
+	depends on 6xx || POWER4
+	---help---
+	  This option enables kernel support for the Altivec extensions to the
+	  PowerPC processor. The kernel currently supports saving and restoring
+	  altivec registers, and turning on the 'altivec enable' bit so user
+	  processes can execute altivec instructions.
+
+	  This option is only usefully if you have a processor that supports
+	  altivec (G4, otherwise known as 74xx series), but does not have
+	  any affect on a non-altivec cpu (it does, however add code to the
+	  kernel).
+
+	  If in doubt, say Y here.
+
+config SPE
+	bool "SPE Support"
+	depends on E200 || E500
+	---help---
+	  This option enables kernel support for the Signal Processing
+	  Extensions (SPE) to the PowerPC processor. The kernel currently
+	  supports saving and restoring SPE registers, and turning on the
+	  'spe enable' bit so user processes can execute SPE instructions.
+
+	  This option is only useful if you have a processor that supports
+	  SPE (e500, otherwise known as 85xx series), but does not have any
+	  effect on a non-spe cpu (it does, however add code to the kernel).
+
+	  If in doubt, say Y here.
+
+config PPC_STD_MMU
+	bool
+	depends on 6xx || POWER3 || POWER4 || PPC64
+	default y
+
+config PPC_STD_MMU_32
+	def_bool y
+	depends on PPC_STD_MMU && PPC32
+
+config SMP
+	depends on PPC_STD_MMU
+	bool "Symmetric multi-processing support"
+	---help---
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N. If you have a system with more
+	  than one CPU, say Y.  Note that the kernel does not currently
+	  support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
+	  since they have inadequate hardware support for multiprocessor
+	  operation.
+
+	  If you say N here, the kernel will run on single and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on single-processor machines.
+	  On a single-processor machine, the kernel will run faster if you say
+	  N here.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs (2-32)"
+	range 2 128
+	depends on SMP
+	default "32" if PPC64
+	default "4"
+
+config NOT_COHERENT_CACHE
+	bool
+	depends on 4xx || 8xx || E200
+	default y
+endmenu
+
+source "init/Kconfig"
+
+menu "Platform support"
+	depends on PPC64 || 6xx
+
+choice
+	prompt "Machine type"
+	default PPC_MULTIPLATFORM
+
+config PPC_MULTIPLATFORM
+	bool "Generic desktop/server/laptop"
+	help
+	  Select this option if configuring for an IBM pSeries or
+	  RS/6000 machine, an Apple machine, or a PReP, CHRP,
+	  Maple or Cell-based machine.
+
+config PPC_ISERIES
+	bool "IBM Legacy iSeries"
+	depends on PPC64
+
+config EMBEDDED6xx
+	bool "Embedded 6xx/7xx/7xxx-based board"
+	depends on PPC32
+
+config APUS
+	bool "Amiga-APUS"
+	depends on PPC32 && BROKEN
+	help
+	  Select APUS if configuring for a PowerUP Amiga.
+	  More information is available at:
+	  <http://linux-apus.sourceforge.net/>.
+endchoice
+
+config PPC_PSERIES
+	depends on PPC_MULTIPLATFORM && PPC64
+	bool "  IBM pSeries & new (POWER5-based) iSeries"
+	default y
+
+config PPC_CHRP
+	bool "  Common Hardware Reference Platform (CHRP) based machines"
+	depends on PPC_MULTIPLATFORM && PPC32
+	default y
+
+config PPC_PMAC
+	bool "  Apple PowerMac based machines"
+	depends on PPC_MULTIPLATFORM
+	default y
+
+config PPC_PMAC64
+	bool
+	depends on PPC_PMAC && POWER4
+	default y
+
+config PPC_PREP
+	bool "  PowerPC Reference Platform (PReP) based machines"
+	depends on PPC_MULTIPLATFORM && PPC32
+	default y
+
+config PPC_MAPLE
+	depends on PPC_MULTIPLATFORM && PPC64
+	bool "  Maple 970FX Evaluation Board"
+	select U3_DART
+	select MPIC_BROKEN_U3
+	default n
+	help
+          This option enables support for the Maple 970FX Evaluation Board.
+	  For more informations, refer to <http://www.970eval.com>
+
+config PPC_BPA
+	bool "  Broadband Processor Architecture"
+	depends on PPC_MULTIPLATFORM && PPC64
+
+config PPC_OF
+	bool
+	depends on PPC_MULTIPLATFORM	# for now
+	default y
+
+config XICS
+	depends on PPC_PSERIES
+	bool
+	default y
+
+config U3_DART
+	bool 
+	depends on PPC_MULTIPLATFORM && PPC64
+	default n
+
+config MPIC
+	depends on PPC_PSERIES || PPC_PMAC || PPC_MAPLE
+	bool
+	default y
+
+config MPIC_BROKEN_U3
+	bool
+	depends on PPC_MAPLE
+	default y
+
+config BPA_IIC
+	depends on PPC_BPA
+	bool
+	default y
+
+config IBMVIO
+	depends on PPC_PSERIES || PPC_ISERIES
+	bool
+	default y
+
+source "drivers/cpufreq/Kconfig"
+
+config CPU_FREQ_PMAC
+	bool "Support for Apple PowerBooks"
+	depends on CPU_FREQ && ADB_PMU && PPC32
+	select CPU_FREQ_TABLE
+	help
+	  This adds support for frequency switching on Apple PowerBooks,
+	  this currently includes some models of iBook & Titanium
+	  PowerBook.
+
+config PPC601_SYNC_FIX
+	bool "Workarounds for PPC601 bugs"
+	depends on 6xx && (PPC_PREP || PPC_PMAC)
+	help
+	  Some versions of the PPC601 (the first PowerPC chip) have bugs which
+	  mean that extra synchronization instructions are required near
+	  certain instructions, typically those that make major changes to the
+	  CPU state.  These extra instructions reduce performance slightly.
+	  If you say N here, these extra instructions will not be included,
+	  resulting in a kernel which will run faster but may not run at all
+	  on some systems with the PPC601 chip.
+
+	  If in doubt, say Y here.
+
+config TAU
+	bool "Thermal Management Support"
+	depends on 6xx
+	help
+	  G3 and G4 processors have an on-chip temperature sensor called the
+	  'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die
+	  temperature within 2-4 degrees Celsius. This option shows the current
+	  on-die temperature in /proc/cpuinfo if the cpu supports it.
+
+	  Unfortunately, on some chip revisions, this sensor is very inaccurate
+	  and in some cases, does not work at all, so don't assume the cpu
+	  temp is actually what /proc/cpuinfo says it is.
+
+config TAU_INT
+	bool "Interrupt driven TAU driver (DANGEROUS)"
+	depends on TAU
+	---help---
+	  The TAU supports an interrupt driven mode which causes an interrupt
+	  whenever the temperature goes out of range. This is the fastest way
+	  to get notified the temp has exceeded a range. With this option off,
+	  a timer is used to re-check the temperature periodically.
+
+	  However, on some cpus it appears that the TAU interrupt hardware
+	  is buggy and can cause a situation which would lead unexplained hard
+	  lockups.
+
+	  Unless you are extending the TAU driver, or enjoy kernel/hardware
+	  debugging, leave this option off.
+
+config TAU_AVERAGE
+	bool "Average high and low temp"
+	depends on TAU
+	---help---
+	  The TAU hardware can compare the temperature to an upper and lower
+	  bound.  The default behavior is to show both the upper and lower
+	  bound in /proc/cpuinfo. If the range is large, the temperature is
+	  either changing a lot, or the TAU hardware is broken (likely on some
+	  G4's). If the range is small (around 4 degrees), the temperature is
+	  relatively stable.  If you say Y here, a single temperature value,
+	  halfway between the upper and lower bounds, will be reported in
+	  /proc/cpuinfo.
+
+	  If in doubt, say N here.
+endmenu
+
+source arch/powerpc/platforms/embedded6xx/Kconfig
+source arch/powerpc/platforms/4xx/Kconfig
+source arch/powerpc/platforms/85xx/Kconfig
+source arch/powerpc/platforms/8xx/Kconfig
+
+menu "Kernel options"
+
+config HIGHMEM
+	bool "High memory support"
+	depends on PPC32
+
+source kernel/Kconfig.hz
+source kernel/Kconfig.preempt
+source "fs/Kconfig.binfmt"
+
+# We optimistically allocate largepages from the VM, so make the limit
+# large enough (16MB). This badly named config option is actually
+# max order + 1
+config FORCE_MAX_ZONEORDER
+	int
+	depends on PPC64
+	default "13"
+
+config MATH_EMULATION
+	bool "Math emulation"
+	depends on 4xx || 8xx || E200 || E500
+	---help---
+	  Some PowerPC chips designed for embedded applications do not have
+	  a floating-point unit and therefore do not implement the
+	  floating-point instructions in the PowerPC instruction set.  If you
+	  say Y here, the kernel will include code to emulate a floating-point
+	  unit, which will allow programs that use floating-point
+	  instructions to run.
+
+config IOMMU_VMERGE
+	bool "Enable IOMMU virtual merging (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && PPC64
+	default n
+	help
+	  Cause IO segments sent to a device for DMA to be merged virtually
+	  by the IOMMU when they happen to have been allocated contiguously.
+	  This doesn't add pressure to the IOMMU allocator. However, some
+	  drivers don't support getting large merged segments coming back
+	  from *_map_sg(). Say Y if you know the drivers you are using are
+	  properly handling this case.
+
+config HOTPLUG_CPU
+	bool "Support for enabling/disabling CPUs"
+	depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
+	---help---
+	  Say Y here to be able to disable and re-enable individual
+	  CPUs at runtime on SMP machines.
+
+	  Say N if you are unsure.
+
+config KEXEC
+	bool "kexec system call (EXPERIMENTAL)"
+	depends on PPC_MULTIPLATFORM && EXPERIMENTAL
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is indepedent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similiarity to the exec system call.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  is properly shutdown, so do not be surprised if this code does not
+	  initially work for you.  It may help to enable device hotplugging
+	  support.  As of this writing the exact hardware interface is
+	  strongly in flux, so no good recommendation can be made.
+
+config EMBEDDEDBOOT
+	bool
+	depends on 8xx || 8260
+	default y
+
+config PC_KEYBOARD
+	bool "PC PS/2 style Keyboard"
+	depends on 4xx || CPM2
+
+config PPCBUG_NVRAM
+	bool "Enable reading PPCBUG NVRAM during boot" if PPLUS || LOPEC
+	default y if PPC_PREP
+
+config IRQ_ALL_CPUS
+	bool "Distribute interrupts on all CPUs by default"
+	depends on SMP && !MV64360
+	help
+	  This option gives the kernel permission to distribute IRQs across
+	  multiple CPUs.  Saying N here will route all IRQs to the first
+	  CPU.  Generally saying Y is safe, although some problems have been
+	  reported with SMP Power Macintoshes with this option enabled.
+
+source "arch/powerpc/platforms/pseries/Kconfig"
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool y
+	depends on PPC64
+
+config ARCH_FLATMEM_ENABLE
+       def_bool y
+       depends on PPC64 && !NUMA
+
+config ARCH_DISCONTIGMEM_ENABLE
+	def_bool y
+	depends on SMP && PPC_PSERIES
+
+config ARCH_DISCONTIGMEM_DEFAULT
+	def_bool y
+	depends on ARCH_DISCONTIGMEM_ENABLE
+
+config ARCH_FLATMEM_ENABLE
+	def_bool y
+	depends on PPC64
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	depends on ARCH_DISCONTIGMEM_ENABLE
+
+source "mm/Kconfig"
+
+config HAVE_ARCH_EARLY_PFN_TO_NID
+	def_bool y
+	depends on NEED_MULTIPLE_NODES
+
+# Some NUMA nodes have memory ranges that span
+# other nodes.  Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node.
+#
+# This is a relatively temporary hack that should
+# be able to go away when sparsemem is fully in
+# place
+
+config NODES_SPAN_OTHER_NODES
+	def_bool y
+	depends on NEED_MULTIPLE_NODES
+
+config NUMA
+	bool "NUMA support"
+	default y if DISCONTIGMEM || SPARSEMEM
+
+config SCHED_SMT
+	bool "SMT (Hyperthreading) scheduler support"
+	depends on PPC64 && SMP
+	default off
+	help
+	  SMT scheduler support improves the CPU scheduler's decision making
+	  when dealing with POWER5 cpus at a cost of slightly increased
+	  overhead in some places. If unsure say N here.
+
+config PROC_DEVICETREE
+	bool "Support for Open Firmware device tree in /proc"
+	depends on PPC_OF && PROC_FS
+	help
+	  This option adds a device-tree directory under /proc which contains
+	  an image of the device tree that the kernel copies from Open
+	  Firmware. If unsure, say Y here.
+
+source "arch/powerpc/platforms/prep/Kconfig"
+
+config CMDLINE_BOOL
+	bool "Default bootloader kernel arguments"
+	depends on !PPC_ISERIES
+
+config CMDLINE
+	string "Initial kernel command string"
+	depends on CMDLINE_BOOL
+	default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
+	help
+	  On some platforms, there is currently no way for the boot loader to
+	  pass arguments to the kernel. For these platforms, you can supply
+	  some command-line options at build time by entering them here.  In
+	  most cases you will need to specify the root device here.
+
+if !44x || BROKEN
+source kernel/power/Kconfig
+endif
+
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	depends on PROC_FS
+	default y
+	help
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via /proc/<pid>/seccomp, it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
+	  If unsure, say Y. Only embedded should say N here.
+
+endmenu
+
+config ISA_DMA_API
+	bool
+	default y
+
+menu "Bus options"
+
+config ISA
+	bool "Support for ISA-bus hardware"
+	depends on PPC_PREP || PPC_CHRP
+	help
+	  Find out whether you have ISA slots on your motherboard.  ISA is the
+	  name of a bus system, i.e. the way the CPU talks to the other stuff
+	  inside your box.  If you have an Apple machine, say N here; if you
+	  have an IBM RS/6000 or pSeries machine or a PReP machine, say Y.  If
+	  you have an embedded board, consult your board documentation.
+
+config GENERIC_ISA_DMA
+	bool
+	depends on PPC64 || POWER4 || 6xx && !CPM2
+	default y
+
+config EISA
+	bool
+
+config SBUS
+	bool
+
+# Yes MCA RS/6000s exist but Linux-PPC does not currently support any
+config MCA
+	bool
+
+config PCI
+	bool "PCI support" if 40x || CPM2 || 83xx || 85xx || PPC_MPC52xx || (EMBEDDED && PPC_ISERIES)
+	default y if !40x && !CPM2 && !8xx && !APUS && !83xx && !85xx
+	default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS
+	default PCI_QSPAN if !4xx && !CPM2 && 8xx
+	help
+	  Find out whether your system includes a PCI bus. PCI is the name of
+	  a bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box.  If you say Y here, the kernel will include drivers and
+	  infrastructure code to support PCI bus devices.
+
+config PCI_DOMAINS
+	bool
+	default PCI
+
+config MPC83xx_PCI2
+	bool "  Supprt for 2nd PCI host controller"
+	depends on PCI && MPC834x
+	default y if MPC834x_SYS
+
+config PCI_QSPAN
+	bool "QSpan PCI"
+	depends on !4xx && !CPM2 && 8xx
+	help
+	  Say Y here if you have a system based on a Motorola 8xx-series
+	  embedded processor with a QSPAN PCI interface, otherwise say N.
+
+config PCI_8260
+	bool
+	depends on PCI && 8260
+	default y
+
+config 8260_PCI9
+	bool "  Enable workaround for MPC826x erratum PCI 9"
+	depends on PCI_8260 && !ADS8272
+	default y
+
+choice
+	prompt "  IDMA channel for PCI 9 workaround"
+	depends on 8260_PCI9
+
+config 8260_PCI9_IDMA1
+	bool "IDMA1"
+
+config 8260_PCI9_IDMA2
+	bool "IDMA2"
+
+config 8260_PCI9_IDMA3
+	bool "IDMA3"
+
+config 8260_PCI9_IDMA4
+	bool "IDMA4"
+
+endchoice
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+source "drivers/pci/hotplug/Kconfig"
+
+endmenu
+
+menu "Advanced setup"
+	depends on PPC32
+
+config ADVANCED_OPTIONS
+	bool "Prompt for advanced kernel configuration options"
+	help
+	  This option will enable prompting for a variety of advanced kernel
+	  configuration options.  These options can cause the kernel to not
+	  work if they are set incorrectly, but can be used to optimize certain
+	  aspects of kernel memory management.
+
+	  Unless you know what you are doing, say N here.
+
+comment "Default settings for advanced configuration options are used"
+	depends on !ADVANCED_OPTIONS
+
+config HIGHMEM_START_BOOL
+	bool "Set high memory pool address"
+	depends on ADVANCED_OPTIONS && HIGHMEM
+	help
+	  This option allows you to set the base address of the kernel virtual
+	  area used to map high memory pages.  This can be useful in
+	  optimizing the layout of kernel virtual memory.
+
+	  Say N here unless you know what you are doing.
+
+config HIGHMEM_START
+	hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL
+	default "0xfe000000"
+
+config LOWMEM_SIZE_BOOL
+	bool "Set maximum low memory"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the maximum amount of memory which
+	  will be used as "low memory", that is, memory which the kernel can
+	  access directly, without having to set up a kernel virtual mapping.
+	  This can be useful in optimizing the layout of kernel virtual
+	  memory.
+
+	  Say N here unless you know what you are doing.
+
+config LOWMEM_SIZE
+	hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL
+	default "0x30000000"
+
+config KERNEL_START_BOOL
+	bool "Set custom kernel base address"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the kernel virtual address at which
+	  the kernel will map low memory (the kernel image will be linked at
+	  this address).  This can be useful in optimizing the virtual memory
+	  layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config KERNEL_START
+	hex "Virtual address of kernel base" if KERNEL_START_BOOL
+	default "0xc0000000"
+
+config TASK_SIZE_BOOL
+	bool "Set custom user task size"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the amount of virtual address space
+	  allocated to user tasks.  This can be useful in optimizing the
+	  virtual memory layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config TASK_SIZE
+	hex "Size of user task space" if TASK_SIZE_BOOL
+	default "0x80000000"
+
+config CONSISTENT_START_BOOL
+	bool "Set custom consistent memory pool address"
+	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+	help
+	  This option allows you to set the base virtual address
+	  of the the consistent memory pool.  This pool of virtual
+	  memory is used to make consistent memory allocations.
+
+config CONSISTENT_START
+	hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL
+	default "0xff100000" if NOT_COHERENT_CACHE
+
+config CONSISTENT_SIZE_BOOL
+	bool "Set custom consistent memory pool size"
+	depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+	help
+	  This option allows you to set the size of the the
+	  consistent memory pool.  This pool of virtual memory
+	  is used to make consistent memory allocations.
+
+config CONSISTENT_SIZE
+	hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
+	default "0x00200000" if NOT_COHERENT_CACHE
+
+config BOOT_LOAD_BOOL
+	bool "Set the boot link/load address"
+	depends on ADVANCED_OPTIONS && !PPC_MULTIPLATFORM
+	help
+	  This option allows you to set the initial load address of the zImage
+	  or zImage.initrd file.  This can be useful if you are on a board
+	  which has a small amount of memory.
+
+	  Say N here unless you know what you are doing.
+
+config BOOT_LOAD
+	hex "Link/load address for booting" if BOOT_LOAD_BOOL
+	default "0x00400000" if 40x || 8xx || 8260
+	default "0x01000000" if 44x
+	default "0x00800000"
+
+config PIN_TLB
+	bool "Pinned Kernel TLBs (860 ONLY)"
+	depends on ADVANCED_OPTIONS && 8xx
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+# XXX source "arch/ppc/8xx_io/Kconfig"
+
+# XXX source "arch/ppc/8260_io/Kconfig"
+
+source "arch/powerpc/platforms/iseries/Kconfig"
+
+source "lib/Kconfig"
+
+source "arch/powerpc/oprofile/Kconfig"
+
+source "arch/powerpc/Kconfig.debug"
+
+source "security/Kconfig"
+
+config KEYS_COMPAT
+	bool
+	depends on COMPAT && KEYS
+	default y
+
+source "crypto/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
new file mode 100644
index 00000000000..61653cb60c4
--- /dev/null
+++ b/arch/powerpc/Kconfig.debug
@@ -0,0 +1,73 @@
+menu "Kernel hacking"
+
+source "lib/Kconfig.debug"
+
+config KGDB
+	bool "Include kgdb kernel debugger"
+	depends on DEBUG_KERNEL && (BROKEN || PPC_GEN550 || 4xx)
+	select DEBUG_INFO
+	help
+	  Include in-kernel hooks for kgdb, the Linux kernel source level
+	  debugger.  See <http://kgdb.sourceforge.net/> for more information.
+	  Unless you are intending to debug the kernel, say N here.
+
+choice
+	prompt "Serial Port"
+	depends on KGDB
+	default KGDB_TTYS1
+
+config KGDB_TTYS0
+	bool "ttyS0"
+
+config KGDB_TTYS1
+	bool "ttyS1"
+
+config KGDB_TTYS2
+	bool "ttyS2"
+
+config KGDB_TTYS3
+	bool "ttyS3"
+
+endchoice
+
+config KGDB_CONSOLE
+	bool "Enable serial console thru kgdb port"
+	depends on KGDB && 8xx || CPM2
+	help
+	  If you enable this, all serial console messages will be sent
+	  over the gdb stub.
+	  If unsure, say N.
+
+config XMON
+	bool "Include xmon kernel debugger"
+	depends on DEBUG_KERNEL
+	help
+	  Include in-kernel hooks for the xmon kernel monitor/debugger.
+	  Unless you are intending to debug the kernel, say N here.
+
+config BDI_SWITCH
+	bool "Include BDI-2000 user context switcher"
+	depends on DEBUG_KERNEL
+	help
+	  Include in-kernel support for the Abatron BDI2000 debugger.
+	  Unless you are intending to debug the kernel with one of these
+	  machines, say N here.
+
+config BOOTX_TEXT
+	bool "Support for early boot text console (BootX or OpenFirmware only)"
+	depends PPC_OF
+	help
+	  Say Y here to see progress messages from the boot firmware in text
+	  mode. Requires either BootX or Open Firmware.
+
+config SERIAL_TEXT_DEBUG
+	bool "Support for early boot texts over serial port"
+	depends on 4xx || LOPEC || MV64X60 || PPLUS || PRPMC800 || \
+		PPC_GEN550 || PPC_MPC52xx
+
+config PPC_OCP
+	bool
+	depends on IBM_OCP || XILINX_OCP
+	default y
+
+endmenu
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
new file mode 100644
index 00000000000..8a65e112211
--- /dev/null
+++ b/arch/powerpc/Makefile
@@ -0,0 +1,222 @@
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+# Changes for PPC by Gary Thomas
+# Rewritten by Cort Dougan and Paul Mackerras
+#
+
+# This must match PAGE_OFFSET in include/asm-powerpc/page.h.
+KERNELLOAD	:= $(CONFIG_KERNEL_START)
+
+HAS_BIARCH	:= $(call cc-option-yn, -m32)
+
+ifeq ($(CONFIG_PPC64),y)
+SZ	:= 64
+
+# Set default 32 bits cross compilers for vdso and boot wrapper
+CROSS32_COMPILE ?=
+
+CROSS32CC		:= $(CROSS32_COMPILE)gcc
+CROSS32AS		:= $(CROSS32_COMPILE)as
+CROSS32LD		:= $(CROSS32_COMPILE)ld
+CROSS32OBJCOPY		:= $(CROSS32_COMPILE)objcopy
+
+ifeq ($(HAS_BIARCH),y)
+ifeq ($(CROSS32_COMPILE),)
+CROSS32CC	:= $(CC) -m32
+CROSS32AS	:= $(AS) -a32
+CROSS32LD	:= $(LD) -m elf32ppc
+CROSS32OBJCOPY	:= $(OBJCOPY)
+endif
+endif
+
+export CROSS32CC CROSS32AS CROSS32LD CROSS32OBJCOPY
+
+new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
+
+ifeq ($(new_nm),y)
+NM		:= $(NM) --synthetic
+endif
+
+else
+SZ	:= 32
+endif
+
+ifeq ($(HAS_BIARCH),y)
+override AS	+= -a$(SZ)
+override LD	+= -m elf$(SZ)ppc
+override CC	+= -m$(SZ)
+endif
+
+LDFLAGS_vmlinux	:= -Ttext $(KERNELLOAD) -Bstatic -e $(KERNELLOAD)
+
+# The -Iarch/$(ARCH)/include is temporary while we are merging
+CPPFLAGS	+= -Iarch/$(ARCH) -Iarch/$(ARCH)/include
+AFLAGS		+= -Iarch/$(ARCH)
+CFLAGS		+= -Iarch/$(ARCH) -msoft-float -pipe
+ifeq ($(CONFIG_PPC64),y)
+CFLAGS		+= -mminimal-toc -mtraceback=none  -mcall-aixdesc
+else
+CFLAGS		+= -ffixed-r2 -mmultiple
+endif
+CPP		= $(CC) -E $(CFLAGS)
+# Temporary hack until we have migrated to asm-powerpc
+LINUXINCLUDE    += -Iarch/$(ARCH)/include
+
+CHECKFLAGS	+= -m$(SZ) -D__powerpc__ -D__powerpc$(SZ)__
+
+ifeq ($(CONFIG_PPC64),y)
+GCC_VERSION     := $(call cc-version)
+GCC_BROKEN_VEC	:= $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi)
+
+ifeq ($(CONFIG_POWER4_ONLY),y)
+ifeq ($(CONFIG_ALTIVEC),y)
+ifeq ($(GCC_BROKEN_VEC),y)
+	CFLAGS += $(call cc-option,-mcpu=970)
+else
+	CFLAGS += $(call cc-option,-mcpu=power4)
+endif
+else
+	CFLAGS += $(call cc-option,-mcpu=power4)
+endif
+else
+	CFLAGS += $(call cc-option,-mtune=power4)
+endif
+endif
+
+# Enable unit-at-a-time mode when possible. It shrinks the
+# kernel considerably.
+CFLAGS += $(call cc-option,-funit-at-a-time)
+
+ifndef CONFIG_FSL_BOOKE
+CFLAGS		+= -mstring
+endif
+
+cpu-as-$(CONFIG_PPC64BRIDGE)	+= -Wa,-mppc64bridge
+cpu-as-$(CONFIG_4xx)		+= -Wa,-m405
+cpu-as-$(CONFIG_6xx)		+= -Wa,-maltivec
+cpu-as-$(CONFIG_POWER4)		+= -Wa,-maltivec
+cpu-as-$(CONFIG_E500)		+= -Wa,-me500
+cpu-as-$(CONFIG_E200)		+= -Wa,-me200
+
+AFLAGS += $(cpu-as-y)
+CFLAGS += $(cpu-as-y)
+
+# Default to the common case.
+KBUILD_DEFCONFIG := common_defconfig
+
+head-y				:= arch/powerpc/kernel/head.o
+head-$(CONFIG_PPC64)		:= arch/powerpc/kernel/head_64.o
+head-$(CONFIG_8xx)		:= arch/powerpc/kernel/head_8xx.o
+head-$(CONFIG_4xx)		:= arch/powerpc/kernel/head_4xx.o
+head-$(CONFIG_44x)		:= arch/powerpc/kernel/head_44x.o
+head-$(CONFIG_FSL_BOOKE)	:= arch/powerpc/kernel/head_fsl_booke.o
+
+ifeq ($(CONFIG_PPC32),y)
+head-$(CONFIG_6xx)		+= arch/powerpc/kernel/idle_6xx.o
+head-$(CONFIG_POWER4)		+= arch/powerpc/kernel/idle_power4.o
+head-$(CONFIG_PPC_FPU)		+= arch/powerpc/kernel/fpu.o
+endif
+
+core-y				+= arch/powerpc/kernel/ \
+				   arch/powerpc/mm/ \
+				   arch/powerpc/lib/ \
+				   arch/powerpc/sysdev/
+core-$(CONFIG_PPC32)		+= arch/ppc/kernel/ \
+				   arch/ppc/syslib/
+core-$(CONFIG_PPC64)		+= arch/ppc64/kernel/
+core-$(CONFIG_PPC_PMAC)		+= arch/powerpc/platforms/powermac/
+core-$(CONFIG_4xx)		+= arch/ppc/platforms/4xx/
+core-$(CONFIG_83xx)		+= arch/ppc/platforms/83xx/
+core-$(CONFIG_85xx)		+= arch/ppc/platforms/85xx/
+core-$(CONFIG_MATH_EMULATION)	+= arch/ppc/math-emu/
+core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
+core-$(CONFIG_APUS)		+= arch/ppc/amiga/
+drivers-$(CONFIG_8xx)		+= arch/ppc/8xx_io/
+drivers-$(CONFIG_4xx)		+= arch/ppc/4xx_io/
+drivers-$(CONFIG_CPM2)		+= arch/ppc/8260_io/
+
+drivers-$(CONFIG_OPROFILE)	+= arch/powerpc/oprofile/
+
+BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm
+
+.PHONY: $(BOOT_TARGETS)
+
+all: uImage zImage
+
+CPPFLAGS_vmlinux.lds	:= -Upowerpc
+
+# All the instructions talk about "make bzImage".
+bzImage: zImage
+
+boot := arch/$(ARCH)/boot
+
+$(BOOT_TARGETS): vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+uImage: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot)/images $(boot)/images/$@
+
+define archhelp
+  @echo '* zImage          - Compressed kernel image (arch/$(ARCH)/boot/images/zImage.*)'
+  @echo '  uImage          - Create a bootable image for U-Boot / PPCBoot'
+  @echo '  install         - Install kernel using'
+  @echo '                    (your) ~/bin/installkernel or'
+  @echo '                    (distribution) /sbin/installkernel or'
+  @echo '                    install to $$(INSTALL_PATH) and run lilo'
+  @echo '  *_defconfig     - Select default config from arch/$(ARCH)/ppc/configs'
+endef
+
+archclean:
+	$(Q)$(MAKE) $(clean)=arch/ppc/boot
+	# Temporary hack until we have migrated to asm-powerpc
+	$(Q)rm -rf arch/$(ARCH)/include
+
+archprepare: checkbin
+
+# Temporary hack until we have migrated to asm-powerpc
+ifeq ($(CONFIG_PPC64),y)
+include/asm: arch/$(ARCH)/include/asm
+arch/$(ARCH)/include/asm:
+	$(Q)if [ ! -d arch/$(ARCH)/include ]; then mkdir -p arch/$(ARCH)/include; fi
+	$(Q)ln -fsn $(srctree)/include/asm-ppc64 arch/$(ARCH)/include/asm
+else
+include/asm: arch/$(ARCH)/include/asm
+arch/$(ARCH)/include/asm:
+	$(Q)if [ ! -d arch/$(ARCH)/include ]; then mkdir -p arch/$(ARCH)/include; fi
+	$(Q)ln -fsn $(srctree)/include/asm-ppc arch/$(ARCH)/include/asm
+endif
+
+# Use the file '.tmp_gas_check' for binutils tests, as gas won't output
+# to stdout and these checks are run even on install targets.
+TOUT	:= .tmp_gas_check
+# Ensure this is binutils 2.12.1 (or 2.12.90.0.7) or later for altivec
+# instructions.
+# gcc-3.4 and binutils-2.14 are a fatal combination.
+GCC_VERSION	:= $(call cc-version)
+
+checkbin:
+	@if test "$(GCC_VERSION)" = "0304" ; then \
+		if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
+			echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
+			echo 'correctly with gcc-3.4 and your version of binutils.'; \
+			echo '*** Please upgrade your binutils or downgrade your gcc'; \
+			false; \
+		fi ; \
+	fi
+	@if ! /bin/echo dssall | $(AS) -many -o $(TOUT) >/dev/null 2>&1 ; then \
+		echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build ' ; \
+		echo 'correctly with old versions of binutils.' ; \
+		echo '*** Please upgrade your binutils to 2.12.1 or newer' ; \
+		false ; \
+	fi
+
+CLEAN_FILES += $(TOUT)
+
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
new file mode 100644
index 00000000000..62c4a51a23d
--- /dev/null
+++ b/arch/powerpc/kernel/Makefile
@@ -0,0 +1,18 @@
+#
+# Makefile for the linux kernel.
+#
+
+extra-$(CONFIG_PPC_STD_MMU)	:= head.o
+extra_$(CONFIG_PPC64)		:= head_64.o
+extra-$(CONFIG_40x)		:= head_4xx.o
+extra-$(CONFIG_44x)		:= head_44x.o
+extra-$(CONFIG_FSL_BOOKE)	:= head_fsl_booke.o
+extra-$(CONFIG_8xx)		:= head_8xx.o
+extra-$(CONFIG_6xx)		+= idle_6xx.o
+extra-$(CONFIG_POWER4)		+= idle_power4.o
+extra-$(CONFIG_PPC_FPU)		+= fpu.o
+extra-y				+= vmlinux.lds
+
+obj-y				:= semaphore.o traps.o process.o
+
+obj-$(CONFIG_ALTIVEC)		+= vecemu.o vector.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
new file mode 100644
index 00000000000..16cf0b7ee2b
--- /dev/null
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -0,0 +1,262 @@
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ * We use the technique used in the OSF Mach kernel code:
+ * generate asm statements containing #defines,
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/suspend.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/time.h>
+#include <linux/hardirq.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/iSeries/HvLpEvent.h>
+#include <asm/rtas.h>
+#include <asm/cache.h>
+#include <asm/systemcfg.h>
+#include <asm/compat.h>
+#endif
+
+#define DEFINE(sym, val) \
+	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+int main(void)
+{
+	/* thread struct on stack */
+	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+	DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
+	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+#ifdef CONFIG_PPC32
+	DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
+#endif
+#ifdef CONFIG_PPC64
+	DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror));
+	DEFINE(THREAD_SHIFT, THREAD_SHIFT);
+#endif
+	DEFINE(THREAD_SIZE, THREAD_SIZE);
+
+	/* task_struct->thread */
+	DEFINE(THREAD, offsetof(struct task_struct, thread));
+	DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info));
+	DEFINE(MM, offsetof(struct task_struct, mm));
+	DEFINE(PTRACE, offsetof(struct task_struct, ptrace));
+	DEFINE(KSP, offsetof(struct thread_struct, ksp));
+	DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
+	DEFINE(LAST_SYSCALL, offsetof(struct thread_struct, last_syscall));
+	DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
+	DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
+	DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
+	DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+	DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
+	DEFINE(PT_PTRACED, PT_PTRACED);
+#endif
+#ifdef CONFIG_PPC64
+	DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
+#endif
+
+#ifdef CONFIG_ALTIVEC
+	DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
+	DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
+	DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
+	DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+	DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
+	DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
+	DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr));
+	DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
+#endif /* CONFIG_SPE */
+	/* Interrupt register frame */
+	DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
+#ifndef CONFIG_PPC64
+	DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+#else
+	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+
+	/* 288 = # of volatile regs, int & fp, for leaf routines */
+	/* which do not stack a frame.  See the PPC64 ABI.       */
+	DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288);
+#endif
+	/* in fact we only use gpr0 - gpr9 and gpr20 - gpr23 */
+	DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
+	DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
+	DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2]));
+	DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3]));
+	DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4]));
+	DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5]));
+	DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6]));
+	DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7]));
+	DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8]));
+	DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9]));
+	DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10]));
+	DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11]));
+	DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12]));
+	DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13]));
+	DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14]));
+	DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15]));
+	DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16]));
+	DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17]));
+	DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18]));
+	DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19]));
+	DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20]));
+	DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21]));
+	DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22]));
+	DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23]));
+	DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24]));
+	DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25]));
+	DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26]));
+	DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27]));
+	DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28]));
+	DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29]));
+	DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30]));
+	DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31]));
+	/*
+	 * Note: these symbols include _ because they overlap with special
+	 * register names
+	 */
+	DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip));
+	DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr));
+	DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
+	DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
+	DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
+	DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq));
+	DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
+	DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
+	DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
+	/* The PowerPC 400-class & Book-E processors have neither the DAR nor the DSISR
+	 * SPRs. Hence, we overload them to hold the similar DEAR and ESR SPRs
+	 * for such processors.  For critical interrupts we use them to
+	 * hold SRR0 and SRR1.
+	 */
+	DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
+	DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
+	DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3));
+	DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result));
+	DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
+	DEFINE(CLONE_VM, CLONE_VM);
+	DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
+	DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
+
+	/* About the CPU features table */
+	DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec));
+	DEFINE(CPU_SPEC_PVR_MASK, offsetof(struct cpu_spec, pvr_mask));
+	DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value));
+	DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
+	DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
+
+#ifdef CONFIG_PPC64
+	DEFINE(MM, offsetof(struct task_struct, mm));
+	DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
+
+	DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
+	DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size));
+	DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page));
+	DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
+	DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
+	DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
+	DEFINE(PLATFORM, offsetof(struct systemcfg, platform));
+
+	/* paca */
+        DEFINE(PACA_SIZE, sizeof(struct paca_struct));
+        DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
+        DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
+        DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
+	DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
+        DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
+        DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
+        DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
+	DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
+        DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
+	DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
+	DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled));
+	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
+	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
+	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+#ifdef CONFIG_HUGETLB_PAGE
+	DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas));
+	DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas));
+#endif /* CONFIG_HUGETLB_PAGE */
+	DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr));
+        DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
+        DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
+        DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
+        DEFINE(PACA_EXDSI, offsetof(struct paca_struct, exdsi));
+        DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+	DEFINE(PACALPPACA, offsetof(struct paca_struct, lppaca));
+	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
+	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
+	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
+	DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
+	DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
+
+	/* RTAS */
+	DEFINE(RTASBASE, offsetof(struct rtas_t, base));
+	DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
+
+	DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
+	DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe));
+
+	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
+	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+
+	/* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
+	DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
+	DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
+
+	/* systemcfg offsets for use by vdso */
+	DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp));
+	DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec));
+	DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs));
+	DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec));
+	DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count));
+	DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest));
+	DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime));
+	DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32));
+	DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64));
+
+	/* timeval/timezone offsets for use by vdso */
+	DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
+	DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
+	DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
+	DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
+	DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
+	DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
+#endif
+
+	DEFINE(pbe_address, offsetof(struct pbe, address));
+	DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
+	DEFINE(pbe_next, offsetof(struct pbe, next));
+
+	DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
+	return 0;
+}
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
new file mode 100644
index 00000000000..665d7d34304
--- /dev/null
+++ b/arch/powerpc/kernel/fpu.S
@@ -0,0 +1,133 @@
+/*
+ *  FPU support code, moved here from head.S so that it can be used
+ *  by chips which use other head-whatever.S files.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * This task wants to use the FPU now.
+ * On UP, disable FP for the task which had the FPU previously,
+ * and save its floating-point registers in its thread_struct.
+ * Load up this task's FP registers from its thread_struct,
+ * enable the FPU for the current task and return to the task.
+ */
+	.globl	load_up_fpu
+load_up_fpu:
+	mfmsr	r5
+	ori	r5,r5,MSR_FP
+#ifdef CONFIG_PPC64BRIDGE
+	clrldi	r5,r5,1			/* turn off 64-bit mode */
+#endif /* CONFIG_PPC64BRIDGE */
+	SYNC
+	MTMSRD(r5)			/* enable use of fpu now */
+	isync
+/*
+ * For SMP, we don't do lazy FPU switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_fpu in switch_to.
+ */
+#ifndef CONFIG_SMP
+	tophys(r6,0)			/* get __pa constant */
+	addis	r3,r6,last_task_used_math@ha
+	lwz	r4,last_task_used_math@l(r3)
+	cmpwi	0,r4,0
+	beq	1f
+	add	r4,r4,r6
+	addi	r4,r4,THREAD		/* want last_task_used_math->thread */
+	SAVE_32FPRS(0, r4)
+	mffs	fr0
+	stfd	fr0,THREAD_FPSCR-4(r4)
+	lwz	r5,PT_REGS(r4)
+	add	r5,r5,r6
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	li	r10,MSR_FP|MSR_FE0|MSR_FE1
+	andc	r4,r4,r10		/* disable FP for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* enable use of FP after return */
+	mfspr	r5,SPRN_SPRG3		/* current task's THREAD (phys) */
+	lwz	r4,THREAD_FPEXC_MODE(r5)
+	ori	r9,r9,MSR_FP		/* enable FP for current */
+	or	r9,r9,r4
+	lfd	fr0,THREAD_FPSCR-4(r5)
+	mtfsf	0xff,fr0
+	REST_32FPRS(0, r5)
+#ifndef CONFIG_SMP
+	subi	r4,r5,THREAD
+	sub	r4,r4,r6
+	stw	r4,last_task_used_math@l(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	/* we haven't used ctr or xer or lr */
+	b	fast_exception_return
+
+/*
+ * FP unavailable trap from kernel - print a message, but let
+ * the task use FP in the kernel until it returns to user mode.
+ */
+ 	.globl	KernelFP
+KernelFP:
+	lwz	r3,_MSR(r1)
+	ori	r3,r3,MSR_FP
+	stw	r3,_MSR(r1)		/* enable use of FP after return */
+	lis	r3,86f@h
+	ori	r3,r3,86f@l
+	mr	r4,r2			/* current */
+	lwz	r5,_NIP(r1)
+	bl	printk
+	b	ret_from_except
+86:	.string	"floating point used in kernel (task=%p, pc=%x)\n"
+	.align	4,0
+
+/*
+ * giveup_fpu(tsk)
+ * Disable FP for the task given as the argument,
+ * and save the floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ */
+	.globl	giveup_fpu
+giveup_fpu:
+	mfmsr	r5
+	ori	r5,r5,MSR_FP
+	SYNC_601
+	ISYNC_601
+	MTMSRD(r5)			/* enable use of fpu now */
+	SYNC_601
+	isync
+	cmpwi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD	        /* want THREAD of task */
+	lwz	r5,PT_REGS(r3)
+	cmpwi	0,r5,0
+	SAVE_32FPRS(0, r3)
+	mffs	fr0
+	stfd	fr0,THREAD_FPSCR-4(r3)
+	beq	1f
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	li	r3,MSR_FP|MSR_FE0|MSR_FE1
+	andc	r4,r4,r3		/* disable FP for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	lis	r4,last_task_used_math@ha
+	stw	r5,last_task_used_math@l(r4)
+#endif /* CONFIG_SMP */
+	blr
diff --git a/arch/powerpc/kernel/head.S b/arch/powerpc/kernel/head.S
new file mode 100644
index 00000000000..d05509f197d
--- /dev/null
+++ b/arch/powerpc/kernel/head.S
@@ -0,0 +1,1545 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  This file contains the low-level support and setup for the
+ *  PowerPC platform, including trap and interrupt dispatch.
+ *  (The PPC 8xx embedded CPUs use head_8xx.S instead.)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_APUS
+#include <asm/amigappc.h>
+#endif
+
+#ifdef CONFIG_PPC64BRIDGE
+#define LOAD_BAT(n, reg, RA, RB)	\
+	ld	RA,(n*32)+0(reg);	\
+	ld	RB,(n*32)+8(reg);	\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_IBAT##n##L,RB;	\
+	ld	RA,(n*32)+16(reg);	\
+	ld	RB,(n*32)+24(reg);	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##L,RB;	\
+
+#else /* CONFIG_PPC64BRIDGE */
+
+/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+#define LOAD_BAT(n, reg, RA, RB)	\
+	/* see the comment for clear_bats() -- Cort */ \
+	li	RA,0;			\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	lwz	RA,(n*16)+0(reg);	\
+	lwz	RB,(n*16)+4(reg);	\
+	mtspr	SPRN_IBAT##n##U,RA;	\
+	mtspr	SPRN_IBAT##n##L,RB;	\
+	beq	1f;			\
+	lwz	RA,(n*16)+8(reg);	\
+	lwz	RB,(n*16)+12(reg);	\
+	mtspr	SPRN_DBAT##n##U,RA;	\
+	mtspr	SPRN_DBAT##n##L,RB;	\
+1:
+#endif /* CONFIG_PPC64BRIDGE */
+
+	.text
+	.stabs	"arch/ppc/kernel/",N_SO,0,0,0f
+	.stabs	"head.S",N_SO,0,0,0f
+0:
+	.globl	_stext
+_stext:
+
+/*
+ * _start is defined this way because the XCOFF loader in the OpenFirmware
+ * on the powermac expects the entry point to be a procedure descriptor.
+ */
+	.text
+	.globl	_start
+_start:
+	/*
+	 * These are here for legacy reasons, the kernel used to
+	 * need to look like a coff function entry for the pmac
+	 * but we're always started by some kind of bootloader now.
+	 *  -- Cort
+	 */
+	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
+	nop	/* used by __secondary_hold on prep (mtx) and chrp smp */
+	nop
+
+/* PMAC
+ * Enter here with the kernel text, data and bss loaded starting at
+ * 0, running with virtual == physical mapping.
+ * r5 points to the prom entry point (the client interface handler
+ * address).  Address translation is turned on, with the prom
+ * managing the hash table.  Interrupts are disabled.  The stack
+ * pointer (r1) points to just below the end of the half-meg region
+ * from 0x380000 - 0x400000, which is mapped in already.
+ *
+ * If we are booted from MacOS via BootX, we enter with the kernel
+ * image loaded somewhere, and the following values in registers:
+ *  r3: 'BooX' (0x426f6f58)
+ *  r4: virtual address of boot_infos_t
+ *  r5: 0
+ *
+ * APUS
+ *   r3: 'APUS'
+ *   r4: physical address of memory base
+ *   Linux/m68k style BootInfo structure at &_end.
+ *
+ * PREP
+ * This is jumped to on prep systems right after the kernel is relocated
+ * to its proper place in memory by the boot loader.  The expected layout
+ * of the regs is:
+ *   r3: ptr to residual data
+ *   r4: initrd_start or if no initrd then 0
+ *   r5: initrd_end - unused if r4 is 0
+ *   r6: Start of command line string
+ *   r7: End of command line string
+ *
+ * This just gets a minimal mmu environment setup so we can call
+ * start_here() to do the real work.
+ * -- Cort
+ */
+
+	.globl	__start
+__start:
+/*
+ * We have to do any OF calls before we map ourselves to KERNELBASE,
+ * because OF may have I/O devices mapped into that area
+ * (particularly on CHRP).
+ */
+	mr	r31,r3			/* save parameters */
+	mr	r30,r4
+	mr	r29,r5
+	mr	r28,r6
+	mr	r27,r7
+	li	r24,0			/* cpu # */
+
+/*
+ * early_init() does the early machine identification and does
+ * the necessary low-level setup and clears the BSS
+ *  -- Cort <cort@fsmlabs.com>
+ */
+	bl	early_init
+
+/*
+ * On POWER4, we first need to tweak some CPU configuration registers
+ * like real mode cache inhibit or exception base
+ */
+#ifdef CONFIG_POWER4
+	bl	__970_cpu_preinit
+#endif /* CONFIG_POWER4 */
+
+#ifdef CONFIG_APUS
+/* On APUS the __va/__pa constants need to be set to the correct
+ * values before continuing.
+ */
+	mr	r4,r30
+	bl	fix_mem_constants
+#endif /* CONFIG_APUS */
+
+/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
+ * the physical address we are running at, returned by early_init()
+ */
+ 	bl	mmu_off
+__after_mmu_off:
+#ifndef CONFIG_POWER4
+	bl	clear_bats
+	bl	flush_tlbs
+
+	bl	initial_bats
+#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
+	bl	setup_disp_bat
+#endif
+#else /* CONFIG_POWER4 */
+	bl	reloc_offset
+	bl	initial_mm_power4
+#endif /* CONFIG_POWER4 */
+
+/*
+ * Call setup_cpu for CPU 0 and initialize 6xx Idle
+ */
+	bl	reloc_offset
+	li	r24,0			/* cpu# */
+	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
+#ifdef CONFIG_6xx
+	bl	reloc_offset
+	bl	init_idle_6xx
+#endif /* CONFIG_6xx */
+#ifdef CONFIG_POWER4
+	bl	reloc_offset
+	bl	init_idle_power4
+#endif /* CONFIG_POWER4 */
+
+
+#ifndef CONFIG_APUS
+/*
+ * We need to run with _start at physical address 0.
+ * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
+ * the exception vectors at 0 (and therefore this copy
+ * overwrites OF's exception vectors with our own).
+ * If the MMU is already turned on, we copy stuff to KERNELBASE,
+ * otherwise we copy it to 0.
+ */
+	bl	reloc_offset
+	mr	r26,r3
+	addis	r4,r3,KERNELBASE@h	/* current address of _start */
+	cmpwi	0,r4,0			/* are we already running at 0? */
+	bne	relocate_kernel
+#endif /* CONFIG_APUS */
+/*
+ * we now have the 1st 16M of ram mapped with the bats.
+ * prep needs the mmu to be turned on here, but pmac already has it on.
+ * this shouldn't bother the pmac since it just gets turned on again
+ * as we jump to our code at KERNELBASE. -- Cort
+ * Actually no, pmac doesn't have it on any more. BootX enters with MMU
+ * off, and in other cases, we now turn it off before changing BATs above.
+ */
+turn_on_mmu:
+	mfmsr	r0
+	ori	r0,r0,MSR_DR|MSR_IR
+	mtspr	SPRN_SRR1,r0
+	lis	r0,start_here@h
+	ori	r0,r0,start_here@l
+	mtspr	SPRN_SRR0,r0
+	SYNC
+	RFI				/* enables MMU */
+
+/*
+ * We need __secondary_hold as a place to hold the other cpus on
+ * an SMP machine, even when we are running a UP kernel.
+ */
+	. = 0xc0			/* for prep bootloader */
+	li	r3,1			/* MTX only has 1 cpu */
+	.globl	__secondary_hold
+__secondary_hold:
+	/* tell the master we're here */
+	stw	r3,4(0)
+#ifdef CONFIG_SMP
+100:	lwz	r4,0(0)
+	/* wait until we're told to start */
+	cmpw	0,r4,r3
+	bne	100b
+	/* our cpu # was at addr 0 - go */
+	mr	r24,r3			/* cpu # */
+	b	__secondary_start
+#else
+	b	.
+#endif /* CONFIG_SMP */
+
+/*
+ * Exception entry code.  This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+#define EXCEPTION_PROLOG	\
+	mtspr	SPRN_SPRG0,r10;	\
+	mtspr	SPRN_SPRG1,r11;	\
+	mfcr	r10;		\
+	EXCEPTION_PROLOG_1;	\
+	EXCEPTION_PROLOG_2
+
+#define EXCEPTION_PROLOG_1	\
+	mfspr	r11,SPRN_SRR1;		/* check whether user or kernel */ \
+	andi.	r11,r11,MSR_PR;	\
+	tophys(r11,r1);			/* use tophys(r1) if kernel */ \
+	beq	1f;		\
+	mfspr	r11,SPRN_SPRG3;	\
+	lwz	r11,THREAD_INFO-THREAD(r11);	\
+	addi	r11,r11,THREAD_SIZE;	\
+	tophys(r11,r11);	\
+1:	subi	r11,r11,INT_FRAME_SIZE	/* alloc exc. frame */
+
+
+#define EXCEPTION_PROLOG_2	\
+	CLR_TOP32(r11);		\
+	stw	r10,_CCR(r11);		/* save registers */ \
+	stw	r12,GPR12(r11);	\
+	stw	r9,GPR9(r11);	\
+	mfspr	r10,SPRN_SPRG0;	\
+	stw	r10,GPR10(r11);	\
+	mfspr	r12,SPRN_SPRG1;	\
+	stw	r12,GPR11(r11);	\
+	mflr	r10;		\
+	stw	r10,_LINK(r11);	\
+	mfspr	r12,SPRN_SRR0;	\
+	mfspr	r9,SPRN_SRR1;	\
+	stw	r1,GPR1(r11);	\
+	stw	r1,0(r11);	\
+	tovirt(r1,r11);			/* set new kernel sp */	\
+	li	r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
+	MTMSRD(r10);			/* (except for mach check in rtas) */ \
+	stw	r0,GPR0(r11);	\
+	SAVE_4GPRS(3, r11);	\
+	SAVE_2GPRS(7, r11)
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#define EXCEPTION(n, label, hdlr, xfer)		\
+	. = n;					\
+label:						\
+	EXCEPTION_PROLOG;			\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;	\
+	xfer(n, hdlr)
+
+#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)	\
+	li	r10,trap;					\
+	stw	r10,TRAP(r11);					\
+	li	r10,MSR_KERNEL;					\
+	copyee(r10, r9);					\
+	bl	tfer;						\
+i##n:								\
+	.long	hdlr;						\
+	.long	ret
+
+#define COPY_EE(d, s)		rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full,	\
+			  ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
+			  ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr)	\
+	EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
+			  ret_from_except)
+
+/* System reset */
+/* core99 pmac starts the seconary here by changing the vector, and
+   putting it back to what it was (UnknownException) when done.  */
+#if defined(CONFIG_GEMINI) && defined(CONFIG_SMP)
+	. = 0x100
+	b	__secondary_start_gemini
+#else
+	EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD)
+#endif
+
+/* Machine check */
+/*
+ * On CHRP, this is complicated by the fact that we could get a
+ * machine check inside RTAS, and we have no guarantee that certain
+ * critical registers will have the values we expect.  The set of
+ * registers that might have bad values includes all the GPRs
+ * and all the BATs.  We indicate that we are in RTAS by putting
+ * a non-zero value, the address of the exception frame to use,
+ * in SPRG2.  The machine check handler checks SPRG2 and uses its
+ * value if it is non-zero.  If we ever needed to free up SPRG2,
+ * we could use a field in the thread_info or thread_struct instead.
+ * (Other exception handlers assume that r1 is a valid kernel stack
+ * pointer when we take an exception from supervisor mode.)
+ *	-- paulus.
+ */
+	. = 0x200
+	mtspr	SPRN_SPRG0,r10
+	mtspr	SPRN_SPRG1,r11
+	mfcr	r10
+#ifdef CONFIG_PPC_CHRP
+	mfspr	r11,SPRN_SPRG2
+	cmpwi	0,r11,0
+	bne	7f
+#endif /* CONFIG_PPC_CHRP */
+	EXCEPTION_PROLOG_1
+7:	EXCEPTION_PROLOG_2
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_CHRP
+	mfspr	r4,SPRN_SPRG2
+	cmpwi	cr1,r4,0
+	bne	cr1,1f
+#endif
+	EXC_XFER_STD(0x200, MachineCheckException)
+#ifdef CONFIG_PPC_CHRP
+1:	b	machine_check_in_rtas
+#endif
+
+/* Data access exception. */
+	. = 0x300
+#ifdef CONFIG_PPC64BRIDGE
+	b	DataAccess
+DataAccessCont:
+#else
+DataAccess:
+	EXCEPTION_PROLOG
+#endif /* CONFIG_PPC64BRIDGE */
+	mfspr	r10,SPRN_DSISR
+	andis.	r0,r10,0xa470		/* weird error? */
+	bne	1f			/* if not, try to put a PTE */
+	mfspr	r4,SPRN_DAR		/* into the hash table */
+	rlwinm	r3,r10,32-15,21,21	/* DSISR_STORE -> _PAGE_RW */
+	bl	hash_page
+1:	stw	r10,_DSISR(r11)
+	mr	r5,r10
+	mfspr	r4,SPRN_DAR
+	EXC_XFER_EE_LITE(0x300, handle_page_fault)
+
+#ifdef CONFIG_PPC64BRIDGE
+/* SLB fault on data access. */
+	. = 0x380
+	b	DataSegment
+#endif /* CONFIG_PPC64BRIDGE */
+
+/* Instruction access exception. */
+	. = 0x400
+#ifdef CONFIG_PPC64BRIDGE
+	b	InstructionAccess
+InstructionAccessCont:
+#else
+InstructionAccess:
+	EXCEPTION_PROLOG
+#endif /* CONFIG_PPC64BRIDGE */
+	andis.	r0,r9,0x4000		/* no pte found? */
+	beq	1f			/* if so, try to put a PTE */
+	li	r3,0			/* into the hash table */
+	mr	r4,r12			/* SRR0 is fault address */
+	bl	hash_page
+1:	mr	r4,r12
+	mr	r5,r9
+	EXC_XFER_EE_LITE(0x400, handle_page_fault)
+
+#ifdef CONFIG_PPC64BRIDGE
+/* SLB fault on instruction access. */
+	. = 0x480
+	b	InstructionSegment
+#endif /* CONFIG_PPC64BRIDGE */
+
+/* External interrupt */
+	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* Alignment exception */
+	. = 0x600
+Alignment:
+	EXCEPTION_PROLOG
+	mfspr	r4,SPRN_DAR
+	stw	r4,_DAR(r11)
+	mfspr	r5,SPRN_DSISR
+	stw	r5,_DSISR(r11)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE(0x600, AlignmentException)
+
+/* Program check exception */
+	EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD)
+
+/* Floating-point unavailable */
+	. = 0x800
+FPUnavailable:
+	EXCEPTION_PROLOG
+	bne	load_up_fpu		/* if from user, just load it up */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE_LITE(0x800, KernelFP)
+
+/* Decrementer */
+	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+
+	EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE)
+
+/* System call */
+	. = 0xc00
+SystemCall:
+	EXCEPTION_PROLOG
+	EXC_XFER_EE_LITE(0xc00, DoSyscall)
+
+/* Single step - not used on 601 */
+	EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD)
+	EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE)
+
+/*
+ * The Altivec unavailable trap is at 0x0f20.  Foo.
+ * We effectively remap it to 0x3000.
+ * We include an altivec unavailable exception vector even if
+ * not configured for Altivec, so that you can't panic a
+ * non-altivec kernel running on a machine with altivec just
+ * by executing an altivec instruction.
+ */
+	. = 0xf00
+	b	Trap_0f
+
+	. = 0xf20
+	b	AltiVecUnavailable
+
+Trap_0f:
+	EXCEPTION_PROLOG
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE(0xf00, UnknownException)
+
+/*
+ * Handle TLB miss for instruction on 603/603e.
+ * Note: we get an alternate set of r0 - r3 to use automatically.
+ */
+	. = 0x1000
+InstructionTLBMiss:
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_IMISS
+	lis	r1,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r3,r1
+	mfspr	r2,SPRN_SPRG3
+	li	r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
+	lwz	r2,PGDIR(r2)
+	blt+	112f
+	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
+	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
+	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
+112:	tophys(r2,r2)
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	InstructionAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r3,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r3		/* check access & ~permission */
+	bne-	InstructionAddressInvalid /* return if access not permitted */
+	ori	r3,r3,_PAGE_ACCESSED	/* set _PAGE_ACCESSED in pte */
+	/*
+	 * NOTE! We are assuming this is not an SMP system, otherwise
+	 * we would need to update the pte atomically with lwarx/stwcx.
+	 */
+	stw	r3,0(r2)		/* update PTE (accessed bit) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwinm	r1,r3,32-10,31,31	/* _PAGE_RW -> PP lsb */
+	rlwinm	r2,r3,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
+	and	r1,r1,r2		/* writable if _RW and _DIRTY */
+	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r3,r3,32-1,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r1,r1,0xe14		/* clear out reserved bits and M */
+	andc	r1,r3,r1		/* PP = user? (rw&dirty? 2: 3): 0 */
+	mtspr	SPRN_RPA,r1
+	mfspr	r3,SPRN_IMISS
+	tlbli	r3
+	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi
+InstructionAddressInvalid:
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+
+	addis	r1,r1,0x2000
+	mtspr	SPRN_DSISR,r1	/* (shouldn't be needed) */
+	mtctr	r0		/* Restore CTR */
+	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
+	or	r2,r2,r1
+	mtspr	SPRN_SRR1,r2
+	mfspr	r1,SPRN_IMISS	/* Get failing address */
+	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
+	rlwimi	r2,r2,1,30,30	/* change 1 -> 3 */
+	xor	r1,r1,r2
+	mtspr	SPRN_DAR,r1	/* Set fault address */
+	mfmsr	r0		/* Restore "normal" registers */
+	xoris	r0,r0,MSR_TGPR>>16
+	mtcrf	0x80,r3		/* Restore CR0 */
+	mtmsr	r0
+	b	InstructionAccess
+
+/*
+ * Handle TLB miss for DATA Load operation on 603/603e
+ */
+	. = 0x1100
+DataLoadTLBMiss:
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_DMISS
+	lis	r1,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r3,r1
+	mfspr	r2,SPRN_SPRG3
+	li	r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
+	lwz	r2,PGDIR(r2)
+	blt+	112f
+	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
+	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
+	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
+112:	tophys(r2,r2)
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r3,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r3		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	ori	r3,r3,_PAGE_ACCESSED	/* set _PAGE_ACCESSED in pte */
+	/*
+	 * NOTE! We are assuming this is not an SMP system, otherwise
+	 * we would need to update the pte atomically with lwarx/stwcx.
+	 */
+	stw	r3,0(r2)		/* update PTE (accessed bit) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwinm	r1,r3,32-10,31,31	/* _PAGE_RW -> PP lsb */
+	rlwinm	r2,r3,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
+	and	r1,r1,r2		/* writable if _RW and _DIRTY */
+	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r3,r3,32-1,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r1,r1,0xe14		/* clear out reserved bits and M */
+	andc	r1,r3,r1		/* PP = user? (rw&dirty? 2: 3): 0 */
+	mtspr	SPRN_RPA,r1
+	mfspr	r3,SPRN_DMISS
+	tlbld	r3
+	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi
+DataAddressInvalid:
+	mfspr	r3,SPRN_SRR1
+	rlwinm	r1,r3,9,6,6	/* Get load/store bit */
+	addis	r1,r1,0x2000
+	mtspr	SPRN_DSISR,r1
+	mtctr	r0		/* Restore CTR */
+	andi.	r2,r3,0xFFFF	/* Clear upper bits of SRR1 */
+	mtspr	SPRN_SRR1,r2
+	mfspr	r1,SPRN_DMISS	/* Get failing address */
+	rlwinm.	r2,r2,0,31,31	/* Check for little endian access */
+	beq	20f		/* Jump if big endian */
+	xori	r1,r1,3
+20:	mtspr	SPRN_DAR,r1	/* Set fault address */
+	mfmsr	r0		/* Restore "normal" registers */
+	xoris	r0,r0,MSR_TGPR>>16
+	mtcrf	0x80,r3		/* Restore CR0 */
+	mtmsr	r0
+	b	DataAccess
+
+/*
+ * Handle TLB miss for DATA Store on 603/603e
+ */
+	. = 0x1200
+DataStoreTLBMiss:
+/*
+ * r0:	stored ctr
+ * r1:	linux style pte ( later becomes ppc hardware pte )
+ * r2:	ptr to linux-style pte
+ * r3:	scratch
+ */
+	mfctr	r0
+	/* Get PTE (linux-style) and check access */
+	mfspr	r3,SPRN_DMISS
+	lis	r1,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r3,r1
+	mfspr	r2,SPRN_SPRG3
+	li	r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
+	lwz	r2,PGDIR(r2)
+	blt+	112f
+	lis	r2,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r2,r2,swapper_pg_dir@l	/* kernel page table */
+	mfspr	r1,SPRN_SRR1		/* and MSR_PR bit from SRR1 */
+	rlwinm	r1,r1,32-12,29,29	/* shift MSR_PR to _PAGE_USER posn */
+112:	tophys(r2,r2)
+	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
+	lwz	r2,0(r2)		/* get pmd entry */
+	rlwinm.	r2,r2,0,0,19		/* extract address of pte page */
+	beq-	DataAddressInvalid	/* return if no mapping */
+	rlwimi	r2,r3,22,20,29		/* insert next 10 bits of address */
+	lwz	r3,0(r2)		/* get linux-style pte */
+	andc.	r1,r1,r3		/* check access & ~permission */
+	bne-	DataAddressInvalid	/* return if access not permitted */
+	ori	r3,r3,_PAGE_ACCESSED|_PAGE_DIRTY
+	/*
+	 * NOTE! We are assuming this is not an SMP system, otherwise
+	 * we would need to update the pte atomically with lwarx/stwcx.
+	 */
+	stw	r3,0(r2)		/* update PTE (accessed/dirty bits) */
+	/* Convert linux-style PTE to low word of PPC-style PTE */
+	rlwimi	r3,r3,32-1,30,30	/* _PAGE_USER -> PP msb */
+	li	r1,0xe15		/* clear out reserved bits and M */
+	andc	r1,r3,r1		/* PP = user? 2: 0 */
+	mtspr	SPRN_RPA,r1
+	mfspr	r3,SPRN_DMISS
+	tlbld	r3
+	mfspr	r3,SPRN_SRR1		/* Need to restore CR0 */
+	mtcrf	0x80,r3
+	rfi
+
+#ifndef CONFIG_ALTIVEC
+#define AltivecAssistException	UnknownException
+#endif
+
+	EXCEPTION(0x1300, Trap_13, InstructionBreakpoint, EXC_XFER_EE)
+	EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
+	EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE)
+#ifdef CONFIG_POWER4
+	EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1700, Trap_17, AltivecAssistException, EXC_XFER_EE)
+	EXCEPTION(0x1800, Trap_18, TAUException, EXC_XFER_STD)
+#else /* !CONFIG_POWER4 */
+	EXCEPTION(0x1600, Trap_16, AltivecAssistException, EXC_XFER_EE)
+	EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
+	EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE)
+#endif /* CONFIG_POWER4 */
+	EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
+	EXCEPTION(0x2100, Trap_21, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2200, Trap_22, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2300, Trap_23, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2400, Trap_24, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2500, Trap_25, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2600, Trap_26, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2700, Trap_27, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2800, Trap_28, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2900, Trap_29, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2a00, Trap_2a, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2b00, Trap_2b, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2c00, Trap_2c, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2d00, Trap_2d, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2e00, Trap_2e, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x2f00, MOLTrampoline, UnknownException, EXC_XFER_EE_LITE)
+
+	.globl mol_trampoline
+	.set mol_trampoline, i0x2f00
+
+	. = 0x3000
+
+AltiVecUnavailable:
+	EXCEPTION_PROLOG
+#ifdef CONFIG_ALTIVEC
+	bne	load_up_altivec		/* if from user, just load it up */
+#endif /* CONFIG_ALTIVEC */
+	EXC_XFER_EE_LITE(0xf20, AltivecUnavailException)
+
+#ifdef CONFIG_PPC64BRIDGE
+DataAccess:
+	EXCEPTION_PROLOG
+	b	DataAccessCont
+
+InstructionAccess:
+	EXCEPTION_PROLOG
+	b	InstructionAccessCont
+
+DataSegment:
+	EXCEPTION_PROLOG
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	mfspr	r4,SPRN_DAR
+	stw	r4,_DAR(r11)
+	EXC_XFER_STD(0x380, UnknownException)
+
+InstructionSegment:
+	EXCEPTION_PROLOG
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_STD(0x480, UnknownException)
+#endif /* CONFIG_PPC64BRIDGE */
+
+#ifdef CONFIG_ALTIVEC
+/* Note that the AltiVec support is closely modeled after the FP
+ * support.  Changes to one are likely to be applicable to the
+ * other!  */
+load_up_altivec:
+/*
+ * Disable AltiVec for the task which had AltiVec previously,
+ * and save its AltiVec registers in its thread_struct.
+ * Enables AltiVec for use in the kernel on return.
+ * On SMP we know the AltiVec units are free, since we give it up every
+ * switch.  -- Kumar
+ */
+	mfmsr	r5
+	oris	r5,r5,MSR_VEC@h
+	MTMSRD(r5)			/* enable use of AltiVec now */
+	isync
+/*
+ * For SMP, we don't do lazy AltiVec switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_altivec in switch_to.
+ */
+#ifndef CONFIG_SMP
+	tophys(r6,0)
+	addis	r3,r6,last_task_used_altivec@ha
+	lwz	r4,last_task_used_altivec@l(r3)
+	cmpwi	0,r4,0
+	beq	1f
+	add	r4,r4,r6
+	addi	r4,r4,THREAD	/* want THREAD of last_task_used_altivec */
+	SAVE_32VRS(0,r10,r4)
+	mfvscr	vr0
+	li	r10,THREAD_VSCR
+	stvx	vr0,r10,r4
+	lwz	r5,PT_REGS(r4)
+	add	r5,r5,r6
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r10,MSR_VEC@h
+	andc	r4,r4,r10	/* disable altivec for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* enable use of AltiVec after return */
+	oris	r9,r9,MSR_VEC@h
+	mfspr	r5,SPRN_SPRG3		/* current task's THREAD (phys) */
+	li	r4,1
+	li	r10,THREAD_VSCR
+	stw	r4,THREAD_USED_VR(r5)
+	lvx	vr0,r10,r5
+	mtvscr	vr0
+	REST_32VRS(0,r10,r5)
+#ifndef CONFIG_SMP
+	subi	r4,r5,THREAD
+	sub	r4,r4,r6
+	stw	r4,last_task_used_altivec@l(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	/* we haven't used ctr or xer or lr */
+	b	fast_exception_return
+
+/*
+ * AltiVec unavailable trap from kernel - print a message, but let
+ * the task use AltiVec in the kernel until it returns to user mode.
+ */
+KernelAltiVec:
+	lwz	r3,_MSR(r1)
+	oris	r3,r3,MSR_VEC@h
+	stw	r3,_MSR(r1)	/* enable use of AltiVec after return */
+	lis	r3,87f@h
+	ori	r3,r3,87f@l
+	mr	r4,r2		/* current */
+	lwz	r5,_NIP(r1)
+	bl	printk
+	b	ret_from_except
+87:	.string	"AltiVec used in kernel  (task=%p, pc=%x)  \n"
+	.align	4,0
+
+/*
+ * giveup_altivec(tsk)
+ * Disable AltiVec for the task given as the argument,
+ * and save the AltiVec registers in its thread_struct.
+ * Enables AltiVec for use in the kernel on return.
+ */
+
+	.globl	giveup_altivec
+giveup_altivec:
+	mfmsr	r5
+	oris	r5,r5,MSR_VEC@h
+	SYNC
+	MTMSRD(r5)			/* enable use of AltiVec now */
+	isync
+	cmpwi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	lwz	r5,PT_REGS(r3)
+	cmpwi	0,r5,0
+	SAVE_32VRS(0, r4, r3)
+	mfvscr	vr0
+	li	r4,THREAD_VSCR
+	stvx	vr0,r4,r3
+	beq	1f
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r3,MSR_VEC@h
+	andc	r4,r4,r3		/* disable AltiVec for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	lis	r4,last_task_used_altivec@ha
+	stw	r5,last_task_used_altivec@l(r4)
+#endif /* CONFIG_SMP */
+	blr
+#endif /* CONFIG_ALTIVEC */
+
+/*
+ * This code is jumped to from the startup code to copy
+ * the kernel image to physical address 0.
+ */
+relocate_kernel:
+	addis	r9,r26,klimit@ha	/* fetch klimit */
+	lwz	r25,klimit@l(r9)
+	addis	r25,r25,-KERNELBASE@h
+	li	r3,0			/* Destination base address */
+	li	r6,0			/* Destination offset */
+	li	r5,0x4000		/* # bytes of memory to copy */
+	bl	copy_and_flush		/* copy the first 0x4000 bytes */
+	addi	r0,r3,4f@l		/* jump to the address of 4f */
+	mtctr	r0			/* in copy and do the rest. */
+	bctr				/* jump to the copy */
+4:	mr	r5,r25
+	bl	copy_and_flush		/* copy the rest */
+	b	turn_on_mmu
+
+/*
+ * Copy routine used to copy the kernel to start at physical address 0
+ * and flush and invalidate the caches as needed.
+ * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
+ * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
+ */
+copy_and_flush:
+	addi	r5,r5,-4
+	addi	r6,r6,-4
+4:	li	r0,L1_CACHE_LINE_SIZE/4
+	mtctr	r0
+3:	addi	r6,r6,4			/* copy a cache line */
+	lwzx	r0,r6,r4
+	stwx	r0,r6,r3
+	bdnz	3b
+	dcbst	r6,r3			/* write it to memory */
+	sync
+	icbi	r6,r3			/* flush the icache line */
+	cmplw	0,r6,r5
+	blt	4b
+	sync				/* additional sync needed on g4 */
+	isync
+	addi	r5,r5,4
+	addi	r6,r6,4
+	blr
+
+#ifdef CONFIG_APUS
+/*
+ * On APUS the physical base address of the kernel is not known at compile
+ * time, which means the __pa/__va constants used are incorrect. In the
+ * __init section is recorded the virtual addresses of instructions using
+ * these constants, so all that has to be done is fix these before
+ * continuing the kernel boot.
+ *
+ * r4 = The physical address of the kernel base.
+ */
+fix_mem_constants:
+	mr	r10,r4
+	addis	r10,r10,-KERNELBASE@h    /* virt_to_phys constant */
+	neg	r11,r10	                 /* phys_to_virt constant */
+
+	lis	r12,__vtop_table_begin@h
+	ori	r12,r12,__vtop_table_begin@l
+	add	r12,r12,r10	         /* table begin phys address */
+	lis	r13,__vtop_table_end@h
+	ori	r13,r13,__vtop_table_end@l
+	add	r13,r13,r10	         /* table end phys address */
+	subi	r12,r12,4
+	subi	r13,r13,4
+1:	lwzu	r14,4(r12)               /* virt address of instruction */
+	add     r14,r14,r10              /* phys address of instruction */
+	lwz     r15,0(r14)               /* instruction, now insert top */
+	rlwimi  r15,r10,16,16,31         /* half of vp const in low half */
+	stw	r15,0(r14)               /* of instruction and restore. */
+	dcbst	r0,r14			 /* write it to memory */
+	sync
+	icbi	r0,r14			 /* flush the icache line */
+	cmpw	r12,r13
+	bne     1b
+	sync				/* additional sync needed on g4 */
+	isync
+
+/*
+ * Map the memory where the exception handlers will
+ * be copied to when hash constants have been patched.
+ */
+#ifdef CONFIG_APUS_FAST_EXCEPT
+	lis	r8,0xfff0
+#else
+	lis	r8,0
+#endif
+	ori	r8,r8,0x2		/* 128KB, supervisor */
+	mtspr	SPRN_DBAT3U,r8
+	mtspr	SPRN_DBAT3L,r8
+
+	lis	r12,__ptov_table_begin@h
+	ori	r12,r12,__ptov_table_begin@l
+	add	r12,r12,r10	         /* table begin phys address */
+	lis	r13,__ptov_table_end@h
+	ori	r13,r13,__ptov_table_end@l
+	add	r13,r13,r10	         /* table end phys address */
+	subi	r12,r12,4
+	subi	r13,r13,4
+1:	lwzu	r14,4(r12)               /* virt address of instruction */
+	add     r14,r14,r10              /* phys address of instruction */
+	lwz     r15,0(r14)               /* instruction, now insert top */
+	rlwimi  r15,r11,16,16,31         /* half of pv const in low half*/
+	stw	r15,0(r14)               /* of instruction and restore. */
+	dcbst	r0,r14			 /* write it to memory */
+	sync
+	icbi	r0,r14			 /* flush the icache line */
+	cmpw	r12,r13
+	bne     1b
+
+	sync				/* additional sync needed on g4 */
+	isync				/* No speculative loading until now */
+	blr
+
+/***********************************************************************
+ *  Please note that on APUS the exception handlers are located at the
+ *  physical address 0xfff0000. For this reason, the exception handlers
+ *  cannot use relative branches to access the code below.
+ ***********************************************************************/
+#endif /* CONFIG_APUS */
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_GEMINI
+	.globl	__secondary_start_gemini
+__secondary_start_gemini:
+        mfspr   r4,SPRN_HID0
+        ori     r4,r4,HID0_ICFI
+        li      r3,0
+        ori     r3,r3,HID0_ICE
+        andc    r4,r4,r3
+        mtspr   SPRN_HID0,r4
+        sync
+        b       __secondary_start
+#endif /* CONFIG_GEMINI */
+
+	.globl	__secondary_start_pmac_0
+__secondary_start_pmac_0:
+	/* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */
+	li	r24,0
+	b	1f
+	li	r24,1
+	b	1f
+	li	r24,2
+	b	1f
+	li	r24,3
+1:
+	/* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0
+	   set to map the 0xf0000000 - 0xffffffff region */
+	mfmsr	r0
+	rlwinm	r0,r0,0,28,26		/* clear DR (0x10) */
+	SYNC
+	mtmsr	r0
+	isync
+
+	.globl	__secondary_start
+__secondary_start:
+#ifdef CONFIG_PPC64BRIDGE
+	mfmsr	r0
+	clrldi	r0,r0,1			/* make sure it's in 32-bit mode */
+	SYNC
+	MTMSRD(r0)
+	isync
+#endif
+	/* Copy some CPU settings from CPU 0 */
+	bl	__restore_cpu_setup
+
+	lis	r3,-KERNELBASE@h
+	mr	r4,r24
+	bl	identify_cpu
+	bl	call_setup_cpu		/* Call setup_cpu for this CPU */
+#ifdef CONFIG_6xx
+	lis	r3,-KERNELBASE@h
+	bl	init_idle_6xx
+#endif /* CONFIG_6xx */
+#ifdef CONFIG_POWER4
+	lis	r3,-KERNELBASE@h
+	bl	init_idle_power4
+#endif /* CONFIG_POWER4 */
+
+	/* get current_thread_info and current */
+	lis	r1,secondary_ti@ha
+	tophys(r1,r1)
+	lwz	r1,secondary_ti@l(r1)
+	tophys(r2,r1)
+	lwz	r2,TI_TASK(r2)
+
+	/* stack */
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	li	r0,0
+	tophys(r3,r1)
+	stw	r0,0(r3)
+
+	/* load up the MMU */
+	bl	load_up_mmu
+
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* phys address of our thread_struct */
+	CLR_TOP32(r4)
+	mtspr	SPRN_SPRG3,r4
+	li	r3,0
+	mtspr	SPRN_SPRG2,r3	/* 0 => not in RTAS */
+
+	/* enable MMU and jump to start_secondary */
+	li	r4,MSR_KERNEL
+	FIX_SRR1(r4,r5)
+	lis	r3,start_secondary@h
+	ori	r3,r3,start_secondary@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	SYNC
+	RFI
+#endif /* CONFIG_SMP */
+
+/*
+ * Those generic dummy functions are kept for CPUs not
+ * included in CONFIG_6xx
+ */
+_GLOBAL(__setup_cpu_power3)
+	blr
+_GLOBAL(__setup_cpu_generic)
+	blr
+
+#if !defined(CONFIG_6xx) && !defined(CONFIG_POWER4)
+_GLOBAL(__save_cpu_setup)
+	blr
+_GLOBAL(__restore_cpu_setup)
+	blr
+#endif /* !defined(CONFIG_6xx) && !defined(CONFIG_POWER4) */
+
+
+/*
+ * Load stuff into the MMU.  Intended to be called with
+ * IR=0 and DR=0.
+ */
+load_up_mmu:
+	sync			/* Force all PTE updates to finish */
+	isync
+	tlbia			/* Clear all TLB entries */
+	sync			/* wait for tlbia/tlbie to finish */
+	TLBSYNC			/* ... on all CPUs */
+	/* Load the SDR1 register (hash table base & size) */
+	lis	r6,_SDR1@ha
+	tophys(r6,r6)
+	lwz	r6,_SDR1@l(r6)
+	mtspr	SPRN_SDR1,r6
+#ifdef CONFIG_PPC64BRIDGE
+	/* clear the ASR so we only use the pseudo-segment registers. */
+	li	r6,0
+	mtasr	r6
+#endif /* CONFIG_PPC64BRIDGE */
+	li	r0,16		/* load up segment register values */
+	mtctr	r0		/* for context 0 */
+	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
+	li	r4,0
+3:	mtsrin	r3,r4
+	addi	r3,r3,0x111	/* increment VSID */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+#ifndef CONFIG_POWER4
+/* Load the BAT registers with the values set up by MMU_init.
+   MMU_init takes care of whether we're on a 601 or not. */
+	mfpvr	r3
+	srwi	r3,r3,16
+	cmpwi	r3,1
+	lis	r3,BATS@ha
+	addi	r3,r3,BATS@l
+	tophys(r3,r3)
+	LOAD_BAT(0,r3,r4,r5)
+	LOAD_BAT(1,r3,r4,r5)
+	LOAD_BAT(2,r3,r4,r5)
+	LOAD_BAT(3,r3,r4,r5)
+#endif /* CONFIG_POWER4 */
+	blr
+
+/*
+ * This is where the main kernel code starts.
+ */
+start_here:
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+	/* Set up for using our exception vectors */
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* init task's THREAD */
+	CLR_TOP32(r4)
+	mtspr	SPRN_SPRG3,r4
+	li	r3,0
+	mtspr	SPRN_SPRG2,r3	/* 0 => not in RTAS */
+
+	/* stack */
+	lis	r1,init_thread_union@ha
+	addi	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+/*
+ * Do early bootinfo parsing, platform-specific initialization,
+ * and set up the MMU.
+ */
+	mr	r3,r31
+	mr	r4,r30
+	mr	r5,r29
+	mr	r6,r28
+	mr	r7,r27
+	bl	machine_init
+	bl	MMU_init
+
+#ifdef CONFIG_APUS
+	/* Copy exception code to exception vector base on APUS. */
+	lis	r4,KERNELBASE@h
+#ifdef CONFIG_APUS_FAST_EXCEPT
+	lis	r3,0xfff0		/* Copy to 0xfff00000 */
+#else
+	lis	r3,0			/* Copy to 0x00000000 */
+#endif
+	li	r5,0x4000		/* # bytes of memory to copy */
+	li	r6,0
+	bl	copy_and_flush		/* copy the first 0x4000 bytes */
+#endif  /* CONFIG_APUS */
+
+/*
+ * Go back to running unmapped so we can load up new values
+ * for SDR1 (hash table pointer) and the segment registers
+ * and change to using our exception vectors.
+ */
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4)
+	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+	FIX_SRR1(r3,r5)
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	SYNC
+	RFI
+/* Load up the kernel context */
+2:	bl	load_up_mmu
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Add helper information for the Abatron bdiGDB debugger.
+	 * We do this here because we know the mmu is disabled, and
+	 * will be enabled for real in just a few instructions.
+	 */
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r5, 0xf0(r0)	/* This much match your Abatron config */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	tophys(r5, r5)
+	stw	r6, 0(r5)
+#endif /* CONFIG_BDI_SWITCH */
+
+/* Now turn on the MMU for real! */
+	li	r4,MSR_KERNEL
+	FIX_SRR1(r4,r5)
+	lis	r3,start_kernel@h
+	ori	r3,r3,start_kernel@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	SYNC
+	RFI
+
+/*
+ * Set up the segment registers for a new context.
+ */
+_GLOBAL(set_context)
+	mulli	r3,r3,897	/* multiply context by skew factor */
+	rlwinm	r3,r3,4,8,27	/* VSID = (context & 0xfffff) << 4 */
+	addis	r3,r3,0x6000	/* Set Ks, Ku bits */
+	li	r0,NUM_USER_SEGMENTS
+	mtctr	r0
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Context switch the PTE pointer for the Abatron BDI2000.
+	 * The PGDIR is passed as second argument.
+	 */
+	lis	r5, KERNELBASE@h
+	lwz	r5, 0xf0(r5)
+	stw	r4, 0x4(r5)
+#endif
+	li	r4,0
+	isync
+3:
+#ifdef CONFIG_PPC64BRIDGE
+	slbie	r4
+#endif /* CONFIG_PPC64BRIDGE */
+	mtsrin	r3,r4
+	addi	r3,r3,0x111	/* next VSID */
+	rlwinm	r3,r3,0,8,3	/* clear out any overflow from VSID field */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+	sync
+	isync
+	blr
+
+/*
+ * An undocumented "feature" of 604e requires that the v bit
+ * be cleared before changing BAT values.
+ *
+ * Also, newer IBM firmware does not clear bat3 and 4 so
+ * this makes sure it's done.
+ *  -- Cort
+ */
+clear_bats:
+	li	r10,0
+	mfspr	r9,SPRN_PVR
+	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
+	cmpwi	r9, 1
+	beq	1f
+
+	mtspr	SPRN_DBAT0U,r10
+	mtspr	SPRN_DBAT0L,r10
+	mtspr	SPRN_DBAT1U,r10
+	mtspr	SPRN_DBAT1L,r10
+	mtspr	SPRN_DBAT2U,r10
+	mtspr	SPRN_DBAT2L,r10
+	mtspr	SPRN_DBAT3U,r10
+	mtspr	SPRN_DBAT3L,r10
+1:
+	mtspr	SPRN_IBAT0U,r10
+	mtspr	SPRN_IBAT0L,r10
+	mtspr	SPRN_IBAT1U,r10
+	mtspr	SPRN_IBAT1L,r10
+	mtspr	SPRN_IBAT2U,r10
+	mtspr	SPRN_IBAT2L,r10
+	mtspr	SPRN_IBAT3U,r10
+	mtspr	SPRN_IBAT3L,r10
+BEGIN_FTR_SECTION
+	/* Here's a tweak: at this point, CPU setup have
+	 * not been called yet, so HIGH_BAT_EN may not be
+	 * set in HID0 for the 745x processors. However, it
+	 * seems that doesn't affect our ability to actually
+	 * write to these SPRs.
+	 */
+	mtspr	SPRN_DBAT4U,r10
+	mtspr	SPRN_DBAT4L,r10
+	mtspr	SPRN_DBAT5U,r10
+	mtspr	SPRN_DBAT5L,r10
+	mtspr	SPRN_DBAT6U,r10
+	mtspr	SPRN_DBAT6L,r10
+	mtspr	SPRN_DBAT7U,r10
+	mtspr	SPRN_DBAT7L,r10
+	mtspr	SPRN_IBAT4U,r10
+	mtspr	SPRN_IBAT4L,r10
+	mtspr	SPRN_IBAT5U,r10
+	mtspr	SPRN_IBAT5L,r10
+	mtspr	SPRN_IBAT6U,r10
+	mtspr	SPRN_IBAT6L,r10
+	mtspr	SPRN_IBAT7U,r10
+	mtspr	SPRN_IBAT7L,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+	blr
+
+flush_tlbs:
+	lis	r10, 0x40
+1:	addic.	r10, r10, -0x1000
+	tlbie	r10
+	blt	1b
+	sync
+	blr
+
+mmu_off:
+ 	addi	r4, r3, __after_mmu_off - _start
+	mfmsr	r3
+	andi.	r0,r3,MSR_DR|MSR_IR		/* MMU enabled? */
+	beqlr
+	andc	r3,r3,r0
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	sync
+	RFI
+
+#ifndef CONFIG_POWER4
+/*
+ * Use the first pair of BAT registers to map the 1st 16MB
+ * of RAM to KERNELBASE.  From this point on we can't safely
+ * call OF any more.
+ */
+initial_bats:
+	lis	r11,KERNELBASE@h
+#ifndef CONFIG_PPC64BRIDGE
+	mfspr	r9,SPRN_PVR
+	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
+	cmpwi	0,r9,1
+	bne	4f
+	ori	r11,r11,4		/* set up BAT registers for 601 */
+	li	r8,0x7f			/* valid, block length = 8MB */
+	oris	r9,r11,0x800000@h	/* set up BAT reg for 2nd 8M */
+	oris	r10,r8,0x800000@h	/* set up BAT reg for 2nd 8M */
+	mtspr	SPRN_IBAT0U,r11		/* N.B. 601 has valid bit in */
+	mtspr	SPRN_IBAT0L,r8		/* lower BAT register */
+	mtspr	SPRN_IBAT1U,r9
+	mtspr	SPRN_IBAT1L,r10
+	isync
+	blr
+#endif /* CONFIG_PPC64BRIDGE */
+
+4:	tophys(r8,r11)
+#ifdef CONFIG_SMP
+	ori	r8,r8,0x12		/* R/W access, M=1 */
+#else
+	ori	r8,r8,2			/* R/W access */
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_APUS
+	ori	r11,r11,BL_8M<<2|0x2	/* set up 8MB BAT registers for 604 */
+#else
+	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
+#endif /* CONFIG_APUS */
+
+#ifdef CONFIG_PPC64BRIDGE
+	/* clear out the high 32 bits in the BAT */
+	clrldi	r11,r11,32
+	clrldi	r8,r8,32
+#endif /* CONFIG_PPC64BRIDGE */
+	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx (not 601) have valid */
+	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
+	mtspr	SPRN_IBAT0L,r8
+	mtspr	SPRN_IBAT0U,r11
+	isync
+	blr
+
+#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
+setup_disp_bat:
+	/*
+	 * setup the display bat prepared for us in prom.c
+	 */
+	mflr	r8
+	bl	reloc_offset
+	mtlr	r8
+	addis	r8,r3,disp_BAT@ha
+	addi	r8,r8,disp_BAT@l
+	lwz	r11,0(r8)
+	lwz	r8,4(r8)
+	mfspr	r9,SPRN_PVR
+	rlwinm	r9,r9,16,16,31		/* r9 = 1 for 601, 4 for 604 */
+	cmpwi	0,r9,1
+	beq	1f
+	mtspr	SPRN_DBAT3L,r8
+	mtspr	SPRN_DBAT3U,r11
+	blr
+1:	mtspr	SPRN_IBAT3L,r8
+	mtspr	SPRN_IBAT3U,r11
+	blr
+
+#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */
+
+#else /* CONFIG_POWER4 */
+/*
+ * Load up the SDR1 and segment register values now
+ * since we don't have the BATs.
+ * Also make sure we are running in 32-bit mode.
+ */
+
+initial_mm_power4:
+	addis	r14,r3,_SDR1@ha		/* get the value from _SDR1 */
+	lwz	r14,_SDR1@l(r14)	/* assume hash table below 4GB */
+	mtspr	SPRN_SDR1,r14
+	slbia
+	lis	r4,0x2000		/* set pseudo-segment reg 12 */
+	ori	r5,r4,0x0ccc
+	mtsr	12,r5
+#if 0
+	ori	r5,r4,0x0888		/* set pseudo-segment reg 8 */
+	mtsr	8,r5			/* (for access to serial port) */
+#endif
+#ifdef CONFIG_BOOTX_TEXT
+	ori	r5,r4,0x0999		/* set pseudo-segment reg 9 */
+	mtsr	9,r5			/* (for access to screen) */
+#endif
+	mfmsr	r0
+	clrldi	r0,r0,1
+	sync
+	mtmsr	r0
+	isync
+	blr
+
+#endif /* CONFIG_POWER4 */
+
+#ifdef CONFIG_8260
+/* Jump into the system reset for the rom.
+ * We first disable the MMU, and then jump to the ROM reset address.
+ *
+ * r3 is the board info structure, r4 is the location for starting.
+ * I use this for building a small kernel that can load other kernels,
+ * rather than trying to write or rely on a rom monitor that can tftp load.
+ */
+       .globl  m8260_gorom
+m8260_gorom:
+	mfmsr	r0
+	rlwinm	r0,r0,0,17,15	/* clear MSR_EE in r0 */
+	sync
+	mtmsr	r0
+	sync
+	mfspr	r11, SPRN_HID0
+	lis	r10, 0
+	ori	r10,r10,HID0_ICE|HID0_DCE
+	andc	r11, r11, r10
+	mtspr	SPRN_HID0, r11
+	isync
+	li	r5, MSR_ME|MSR_RI
+	lis	r6,2f@h
+	addis	r6,r6,-KERNELBASE@h
+	ori	r6,r6,2f@l
+	mtspr	SPRN_SRR0,r6
+	mtspr	SPRN_SRR1,r5
+	isync
+	sync
+	rfi
+2:
+	mtlr	r4
+	blr
+#endif
+
+
+/*
+ * We put a few things here that have to be page-aligned.
+ * This stuff goes at the beginning of the data segment,
+ * which is page-aligned.
+ */
+	.data
+	.globl	sdata
+sdata:
+	.globl	empty_zero_page
+empty_zero_page:
+	.space	4096
+
+	.globl	swapper_pg_dir
+swapper_pg_dir:
+	.space	4096
+
+/*
+ * This space gets a copy of optional info passed to us by the bootstrap
+ * Used to pass parameters into the kernel like root=/dev/sda1, etc.
+ */
+	.globl	cmd_line
+cmd_line:
+	.space	512
+
+	.globl intercept_table
+intercept_table:
+	.long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700
+	.long i0x800, 0, 0, 0, 0, i0xd00, 0, 0
+	.long 0, 0, 0, i0x1300, 0, 0, 0, 0
+	.long 0, 0, 0, 0, 0, 0, 0, 0
+	.long 0, 0, 0, 0, 0, 0, 0, 0
+	.long 0, 0, 0, 0, 0, 0, 0, 0
+
+/* Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+	.space	8
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
new file mode 100644
index 00000000000..599245b0407
--- /dev/null
+++ b/arch/powerpc/kernel/head_44x.S
@@ -0,0 +1,778 @@
+/*
+ * arch/ppc/kernel/head_44x.S
+ *
+ * Kernel execution entry point code.
+ *
+ *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ *      Initial PowerPC version.
+ *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *      Rewritten for PReP
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Low-level exception handers, MMU support, and rewrite.
+ *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ *      PowerPC 8xx modifications.
+ *    Copyright (c) 1998-1999 TiVo, Inc.
+ *      PowerPC 403GCX modifications.
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *      PowerPC 403GCX/405GP modifications.
+ *    Copyright 2000 MontaVista Software Inc.
+ *	PPC405 modifications
+ *      PowerPC 403GCX/405GP modifications.
+ * 	Author: MontaVista Software, Inc.
+ *         	frank_rowand@mvista.com or source@mvista.com
+ * 	   	debbie_chu@mvista.com
+ *    Copyright 2002-2005 MontaVista Software, Inc.
+ *      PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/ibm4xx.h>
+#include <asm/ibm44x.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include "head_booke.h"
+
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ *   r4 - Starting address of the init RAM disk
+ *   r5 - Ending address of the init RAM disk
+ *   r6 - Start of kernel command line string (e.g. "mem=128")
+ *   r7 - End of kernel command line string
+ *
+ */
+	.text
+_GLOBAL(_stext)
+_GLOBAL(_start)
+	/*
+	 * Reserve a word at a fixed location to store the address
+	 * of abatron_pteptrs
+	 */
+	nop
+/*
+ * Save parameters we are passed
+ */
+	mr	r31,r3
+	mr	r30,r4
+	mr	r29,r5
+	mr	r28,r6
+	mr	r27,r7
+	li	r24,0		/* CPU number */
+
+/*
+ * Set up the initial MMU state
+ *
+ * We are still executing code at the virtual address
+ * mappings set by the firmware for the base of RAM.
+ *
+ * We first invalidate all TLB entries but the one
+ * we are running from.  We then load the KERNELBASE
+ * mappings so we can begin to use kernel addresses
+ * natively and so the interrupt vector locations are
+ * permanently pinned (necessary since Book E
+ * implementations always have translation enabled).
+ *
+ * TODO: Use the known TLB entry we are running from to
+ *	 determine which physical region we are located
+ *	 in.  This can be used to determine where in RAM
+ *	 (on a shared CPU system) or PCI memory space
+ *	 (on a DRAMless system) we are located.
+ *       For now, we assume a perfect world which means
+ *	 we are located at the base of DRAM (physical 0).
+ */
+
+/*
+ * Search TLB for entry that we are currently using.
+ * Invalidate all entries but the one we are using.
+ */
+	/* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */
+	mfspr	r3,SPRN_PID			/* Get PID */
+	mfmsr	r4				/* Get MSR */
+	andi.	r4,r4,MSR_IS@l			/* TS=1? */
+	beq	wmmucr				/* If not, leave STS=0 */
+	oris	r3,r3,PPC44x_MMUCR_STS@h	/* Set STS=1 */
+wmmucr:	mtspr	SPRN_MMUCR,r3			/* Put MMUCR */
+	sync
+
+	bl	invstr				/* Find our address */
+invstr:	mflr	r5				/* Make it accessible */
+	tlbsx	r23,0,r5			/* Find entry we are in */
+	li	r4,0				/* Start at TLB entry 0 */
+	li	r3,0				/* Set PAGEID inval value */
+1:	cmpw	r23,r4				/* Is this our entry? */
+	beq	skpinv				/* If so, skip the inval */
+	tlbwe	r3,r4,PPC44x_TLB_PAGEID		/* If not, inval the entry */
+skpinv:	addi	r4,r4,1				/* Increment */
+	cmpwi	r4,64				/* Are we done? */
+	bne	1b				/* If not, repeat */
+	isync					/* If so, context change */
+
+/*
+ * Configure and load pinned entry into TLB slot 63.
+ */
+
+	lis	r3,KERNELBASE@h		/* Load the kernel virtual address */
+	ori	r3,r3,KERNELBASE@l
+
+	/* Kernel is at the base of RAM */
+	li r4, 0			/* Load the kernel physical address */
+
+	/* Load the kernel PID = 0 */
+	li	r0,0
+	mtspr	SPRN_PID,r0
+	sync
+
+	/* Initialize MMUCR */
+	li	r5,0
+	mtspr	SPRN_MMUCR,r5
+	sync
+
+ 	/* pageid fields */
+	clrrwi	r3,r3,10		/* Mask off the effective page number */
+	ori	r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M
+
+	/* xlat fields */
+	clrrwi	r4,r4,10		/* Mask off the real page number */
+					/* ERPN is 0 for first 4GB page */
+
+	/* attrib fields */
+	/* Added guarded bit to protect against speculative loads/stores */
+	li	r5,0
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+        li      r0,63                    /* TLB slot 63 */
+
+	tlbwe	r3,r0,PPC44x_TLB_PAGEID	/* Load the pageid fields */
+	tlbwe	r4,r0,PPC44x_TLB_XLAT	/* Load the translation fields */
+	tlbwe	r5,r0,PPC44x_TLB_ATTRIB	/* Load the attrib/access fields */
+
+	/* Force context change */
+	mfmsr	r0
+	mtspr	SPRN_SRR1, r0
+	lis	r0,3f@h
+	ori	r0,r0,3f@l
+	mtspr	SPRN_SRR0,r0
+	sync
+	rfi
+
+	/* If necessary, invalidate original entry we used */
+3:	cmpwi	r23,63
+	beq	4f
+	li	r6,0
+	tlbwe   r6,r23,PPC44x_TLB_PAGEID
+	isync
+
+4:
+#ifdef CONFIG_SERIAL_TEXT_DEBUG
+	/*
+	 * Add temporary UART mapping for early debug.
+	 * We can map UART registers wherever we want as long as they don't
+	 * interfere with other system mappings (e.g. with pinned entries).
+	 * For an example of how we handle this - see ocotea.h.       --ebs
+	 */
+ 	/* pageid fields */
+	lis	r3,UART0_IO_BASE@h
+	ori	r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_4K
+
+	/* xlat fields */
+	lis	r4,UART0_PHYS_IO_BASE@h		/* RPN depends on SoC */
+#ifndef CONFIG_440EP
+	ori	r4,r4,0x0001		/* ERPN is 1 for second 4GB page */
+#endif
+
+	/* attrib fields */
+	li	r5,0
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_I | PPC44x_TLB_G)
+
+        li      r0,0                    /* TLB slot 0 */
+
+	tlbwe	r3,r0,PPC44x_TLB_PAGEID	/* Load the pageid fields */
+	tlbwe	r4,r0,PPC44x_TLB_XLAT	/* Load the translation fields */
+	tlbwe	r5,r0,PPC44x_TLB_ATTRIB	/* Load the attrib/access fields */
+
+	/* Force context change */
+	isync
+#endif /* CONFIG_SERIAL_TEXT_DEBUG */
+
+	/* Establish the interrupt vector offsets */
+	SET_IVOR(0,  CriticalInput);
+	SET_IVOR(1,  MachineCheck);
+	SET_IVOR(2,  DataStorage);
+	SET_IVOR(3,  InstructionStorage);
+	SET_IVOR(4,  ExternalInput);
+	SET_IVOR(5,  Alignment);
+	SET_IVOR(6,  Program);
+	SET_IVOR(7,  FloatingPointUnavailable);
+	SET_IVOR(8,  SystemCall);
+	SET_IVOR(9,  AuxillaryProcessorUnavailable);
+	SET_IVOR(10, Decrementer);
+	SET_IVOR(11, FixedIntervalTimer);
+	SET_IVOR(12, WatchdogTimer);
+	SET_IVOR(13, DataTLBError);
+	SET_IVOR(14, InstructionTLBError);
+	SET_IVOR(15, Debug);
+
+	/* Establish the interrupt vector base */
+	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
+	mtspr	SPRN_IVPR,r4
+
+#ifdef CONFIG_440EP
+	/* Clear DAPUIB flag in CCR0 (enable APU between CPU and FPU) */
+	mfspr	r2,SPRN_CCR0
+	lis	r3,0xffef
+	ori	r3,r3,0xffff
+	and	r2,r2,r3
+	mtspr	SPRN_CCR0,r2
+	isync
+#endif
+
+	/*
+	 * This is where the main kernel code starts.
+	 */
+
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+
+	/* ptr to current thread */
+	addi	r4,r2,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG3,r4
+
+	/* stack */
+	lis	r1,init_thread_union@h
+	ori	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+	bl	early_init
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+	mr	r3,r31
+	mr	r4,r30
+	mr	r5,r29
+	mr	r6,r28
+	mr	r7,r27
+	bl	machine_init
+	bl	MMU_init
+
+	/* Setup PTE pointers for the Abatron bdiGDB */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	lis	r4, KERNELBASE@h
+	ori	r4, r4, KERNELBASE@l
+	stw	r5, 0(r4)	/* Save abatron_pteptrs at a fixed location */
+	stw	r6, 0(r5)
+
+	/* Let's move on */
+	lis	r4,start_kernel@h
+	ori	r4,r4,start_kernel@l
+	lis	r3,MSR_KERNEL@h
+	ori	r3,r3,MSR_KERNEL@l
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi			/* change context and jump to start_kernel */
+
+/*
+ * Interrupt vector entry code
+ *
+ * The Book E MMUs are always on so we don't need to handle
+ * interrupts in real mode as with previous PPC processors. In
+ * this case we handle interrupts in the kernel virtual address
+ * space.
+ *
+ * Interrupt vectors are dynamically placed relative to the
+ * interrupt prefix as determined by the address of interrupt_base.
+ * The interrupt vectors offsets are programmed using the labels
+ * for each interrupt vector entry.
+ *
+ * Interrupt vectors must be aligned on a 16 byte boundary.
+ * We align on a 32 byte cache line boundary for good measure.
+ */
+
+interrupt_base:
+	/* Critical Input Interrupt */
+	CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException)
+
+	/* Machine Check Interrupt */
+#ifdef CONFIG_440A
+	MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException)
+#else
+	CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException)
+#endif
+
+	/* Data Storage Interrupt */
+	START_EXCEPTION(DataStorage)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+	mtspr	SPRN_SPRG4W, r12
+	mtspr	SPRN_SPRG5W, r13
+	mfcr	r11
+	mtspr	SPRN_SPRG7W, r11
+
+	/*
+	 * Check if it was a store fault, if not then bail
+	 * because a user tried to access a kernel or
+	 * read-protected page.  Otherwise, get the
+	 * offending address and handle it.
+	 */
+	mfspr	r10, SPRN_ESR
+	andis.	r10, r10, ESR_ST@h
+	beq	2f
+
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr   r12,SPRN_MMUCR
+	rlwinm	r12,r12,0,0,23		/* Clear TID */
+
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+
+	/* Load PID into MMUCR TID */
+	mfspr	r12,SPRN_MMUCR		/* Get MMUCR */
+	mfspr   r13,SPRN_PID		/* Get PID */
+	rlwimi	r12,r13,0,24,31		/* Set TID */
+
+4:
+	mtspr   SPRN_MMUCR,r12
+
+	rlwinm  r12, r10, 13, 19, 29    /* Compute pgdir/pmd offset */
+	lwzx    r11, r12, r11           /* Get pgd/pmd entry */
+	rlwinm. r12, r11, 0, 0, 20      /* Extract pt base address */
+	beq     2f                      /* Bail if no table */
+
+	rlwimi  r12, r10, 23, 20, 28    /* Compute pte address */
+	lwz     r11, 4(r12)             /* Get pte entry */
+
+	andi.	r13, r11, _PAGE_RW	/* Is it writeable? */
+	beq	2f			/* Bail if not */
+
+	/* Update 'changed'.
+	*/
+	ori	r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
+	stw	r11, 4(r12)		/* Update Linux page table */
+
+	li	r13, PPC44x_TLB_SR@l	/* Set SR */
+	rlwimi	r13, r11, 29, 29, 29	/* SX = _PAGE_HWEXEC */
+	rlwimi	r13, r11, 0, 30, 30	/* SW = _PAGE_RW */
+	rlwimi	r13, r11, 29, 28, 28	/* UR = _PAGE_USER */
+	rlwimi	r12, r11, 31, 26, 26	/* (_PAGE_USER>>1)->r12 */
+	rlwimi	r12, r11, 29, 30, 30	/* (_PAGE_USER>>3)->r12 */
+	and	r12, r12, r11		/* HWEXEC/RW & USER */
+	rlwimi	r13, r12, 0, 26, 26	/* UX = HWEXEC & USER */
+	rlwimi	r13, r12, 3, 27, 27	/* UW = RW & USER */
+
+	rlwimi	r11,r13,0,26,31		/* Insert static perms */
+
+	rlwinm	r11,r11,0,20,15		/* Clear U0-U3 */
+
+	/* find the TLB index that caused the fault.  It has to be here. */
+	tlbsx	r10, 0, r10
+
+	tlbwe	r11, r10, PPC44x_TLB_ATTRIB	/* Write ATTRIB */
+
+	/* Done...restore registers and get out of here.
+	*/
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	rfi			/* Force context change */
+
+2:
+	/*
+	 * The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	data_access
+
+	/* Instruction Storage Interrupt */
+	INSTRUCTION_STORAGE_EXCEPTION
+
+	/* External Input Interrupt */
+	EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+
+	/* Alignment Interrupt */
+	ALIGNMENT_EXCEPTION
+
+	/* Program Interrupt */
+	PROGRAM_EXCEPTION
+
+	/* Floating Point Unavailable Interrupt */
+#ifdef CONFIG_PPC_FPU
+	FP_UNAVAILABLE_EXCEPTION
+#else
+	EXCEPTION(0x2010, FloatingPointUnavailable, UnknownException, EXC_XFER_EE)
+#endif
+
+	/* System Call Interrupt */
+	START_EXCEPTION(SystemCall)
+	NORMAL_EXCEPTION_PROLOG
+	EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+
+	/* Auxillary Processor Unavailable Interrupt */
+	EXCEPTION(0x2020, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE)
+
+	/* Decrementer Interrupt */
+	DECREMENTER_EXCEPTION
+
+	/* Fixed Internal Timer Interrupt */
+	/* TODO: Add FIT support */
+	EXCEPTION(0x1010, FixedIntervalTimer, UnknownException, EXC_XFER_EE)
+
+	/* Watchdog Timer Interrupt */
+	/* TODO: Add watchdog support */
+#ifdef CONFIG_BOOKE_WDT
+	CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException)
+#else
+	CRITICAL_EXCEPTION(0x1020, WatchdogTimer, UnknownException)
+#endif
+
+	/* Data TLB Error Interrupt */
+	START_EXCEPTION(DataTLBError)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+	mtspr	SPRN_SPRG4W, r12
+	mtspr	SPRN_SPRG5W, r13
+	mfcr	r11
+	mtspr	SPRN_SPRG7W, r11
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr	r12,SPRN_MMUCR
+	rlwinm	r12,r12,0,0,23		/* Clear TID */
+
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+
+	/* Load PID into MMUCR TID */
+	mfspr	r12,SPRN_MMUCR
+	mfspr   r13,SPRN_PID		/* Get PID */
+	rlwimi	r12,r13,0,24,31		/* Set TID */
+
+4:
+	mtspr	SPRN_MMUCR,r12
+
+	rlwinm 	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
+	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
+	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
+	beq	2f			/* Bail if no table */
+
+	rlwimi	r12, r10, 23, 20, 28	/* Compute pte address */
+	lwz	r11, 4(r12)		/* Get pte entry */
+	andi.	r13, r11, _PAGE_PRESENT	/* Is the page present? */
+	beq	2f			/* Bail if not present */
+
+	ori	r11, r11, _PAGE_ACCESSED
+	stw	r11, 4(r12)
+
+	 /* Jump to common tlb load */
+	b	finish_tlb_load
+
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	data_access
+
+	/* Instruction TLB Error Interrupt */
+	/*
+	 * Nearly the same as above, except we get our
+	 * information from different registers and bailout
+	 * to a different point.
+	 */
+	START_EXCEPTION(InstructionTLBError)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+	mtspr	SPRN_SPRG4W, r12
+	mtspr	SPRN_SPRG5W, r13
+	mfcr	r11
+	mtspr	SPRN_SPRG7W, r11
+	mfspr	r10, SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr	r12,SPRN_MMUCR
+	rlwinm	r12,r12,0,0,23		/* Clear TID */
+
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+
+	/* Load PID into MMUCR TID */
+	mfspr	r12,SPRN_MMUCR
+	mfspr   r13,SPRN_PID		/* Get PID */
+	rlwimi	r12,r13,0,24,31		/* Set TID */
+
+4:
+	mtspr	SPRN_MMUCR,r12
+
+	rlwinm	r12, r10, 13, 19, 29	/* Compute pgdir/pmd offset */
+	lwzx	r11, r12, r11		/* Get pgd/pmd entry */
+	rlwinm.	r12, r11, 0, 0, 20	/* Extract pt base address */
+	beq	2f			/* Bail if no table */
+
+	rlwimi	r12, r10, 23, 20, 28	/* Compute pte address */
+	lwz	r11, 4(r12)		/* Get pte entry */
+	andi.	r13, r11, _PAGE_PRESENT	/* Is the page present? */
+	beq	2f			/* Bail if not present */
+
+	ori	r11, r11, _PAGE_ACCESSED
+	stw	r11, 4(r12)
+
+	/* Jump to common TLB load point */
+	b	finish_tlb_load
+
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	InstructionStorage
+
+	/* Debug Interrupt */
+	DEBUG_EXCEPTION
+
+/*
+ * Local functions
+ */
+	/*
+	 * Data TLB exceptions will bail out to this point
+	 * if they can't resolve the lightweight TLB fault.
+	 */
+data_access:
+	NORMAL_EXCEPTION_PROLOG
+	mfspr	r5,SPRN_ESR		/* Grab the ESR, save it, pass arg3 */
+	stw	r5,_ESR(r11)
+	mfspr	r4,SPRN_DEAR		/* Grab the DEAR, save it, pass arg2 */
+	EXC_XFER_EE_LITE(0x0300, handle_page_fault)
+
+/*
+
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * 	r10 - EA of fault
+ * 	r11 - available to use
+ *	r12 - Pointer to the 64-bit PTE
+ *	r13 - available to use
+ *	MMUCR - loaded with proper value when we get here
+ *	Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load:
+	/*
+	 * We set execute, because we don't have the granularity to
+	 * properly set this at the page level (Linux problem).
+	 * If shared is set, we cause a zero PID->TID load.
+	 * Many of these bits are software only.  Bits we don't set
+	 * here we (properly should) assume have the appropriate value.
+	 */
+
+	/* Load the next available TLB index */
+	lis	r13, tlb_44x_index@ha
+	lwz	r13, tlb_44x_index@l(r13)
+	/* Load the TLB high watermark */
+	lis	r11, tlb_44x_hwater@ha
+	lwz	r11, tlb_44x_hwater@l(r11)
+
+	/* Increment, rollover, and store TLB index */
+	addi	r13, r13, 1
+	cmpw	0, r13, r11			/* reserve entries */
+	ble	7f
+	li	r13, 0
+7:
+	/* Store the next available TLB index */
+	lis	r11, tlb_44x_index@ha
+	stw	r13, tlb_44x_index@l(r11)
+
+	lwz	r11, 0(r12)			/* Get MS word of PTE */
+	lwz	r12, 4(r12)			/* Get LS word of PTE */
+	rlwimi	r11, r12, 0, 0 , 19		/* Insert RPN */
+	tlbwe	r11, r13, PPC44x_TLB_XLAT	/* Write XLAT */
+
+	/*
+	 * Create PAGEID. This is the faulting address,
+	 * page size, and valid flag.
+	 */
+	li	r11, PPC44x_TLB_VALID | PPC44x_TLB_4K
+	rlwimi	r10, r11, 0, 20, 31		/* Insert valid and page size */
+	tlbwe	r10, r13, PPC44x_TLB_PAGEID	/* Write PAGEID */
+
+	li	r10, PPC44x_TLB_SR@l		/* Set SR */
+	rlwimi	r10, r12, 0, 30, 30		/* Set SW = _PAGE_RW */
+	rlwimi	r10, r12, 29, 29, 29		/* SX = _PAGE_HWEXEC */
+	rlwimi	r10, r12, 29, 28, 28		/* UR = _PAGE_USER */
+	rlwimi	r11, r12, 31, 26, 26		/* (_PAGE_USER>>1)->r12 */
+	and	r11, r12, r11			/* HWEXEC & USER */
+	rlwimi	r10, r11, 0, 26, 26		/* UX = HWEXEC & USER */
+
+	rlwimi	r12, r10, 0, 26, 31		/* Insert static perms */
+	rlwinm	r12, r12, 0, 20, 15		/* Clear U0-U3 */
+	tlbwe	r12, r13, PPC44x_TLB_ATTRIB	/* Write ATTRIB */
+
+	/* Done...restore registers and get out of here.
+	*/
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	rfi					/* Force context change */
+
+/*
+ * Global functions
+ */
+
+/*
+ * extern void giveup_altivec(struct task_struct *prev)
+ *
+ * The 44x core does not have an AltiVec unit.
+ */
+_GLOBAL(giveup_altivec)
+	blr
+
+/*
+ * extern void giveup_fpu(struct task_struct *prev)
+ *
+ * The 44x core does not have an FPU.
+ */
+#ifndef CONFIG_PPC_FPU
+_GLOBAL(giveup_fpu)
+	blr
+#endif
+
+/*
+ * extern void abort(void)
+ *
+ * At present, this routine just applies a system reset.
+ */
+_GLOBAL(abort)
+        mfspr   r13,SPRN_DBCR0
+        oris    r13,r13,DBCR0_RST_SYSTEM@h
+        mtspr   SPRN_DBCR0,r13
+
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Context switch the PTE pointer for the Abatron BDI2000.
+	 * The PGDIR is the second parameter.
+	 */
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r4, 0x4(r5)
+#endif
+	mtspr	SPRN_PID,r3
+	isync			/* Force context change */
+	blr
+
+/*
+ * We put a few things here that have to be page-aligned. This stuff
+ * goes at the beginning of the data segment, which is page-aligned.
+ */
+	.data
+_GLOBAL(sdata)
+_GLOBAL(empty_zero_page)
+	.space	4096
+
+/*
+ * To support >32-bit physical addresses, we use an 8KB pgdir.
+ */
+_GLOBAL(swapper_pg_dir)
+	.space	8192
+
+/* Reserved 4k for the critical exception stack & 4k for the machine
+ * check stack per CPU for kernel mode exceptions */
+	.section .bss
+        .align 12
+exception_stack_bottom:
+	.space	BOOKE_EXCEPTION_STACK_SIZE
+_GLOBAL(exception_stack_top)
+
+/*
+ * This space gets a copy of optional info passed to us by the bootstrap
+ * which is used to pass parameters into the kernel like root=/dev/sda1, etc.
+ */
+_GLOBAL(cmd_line)
+	.space	512
+
+/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+	.space	8
+
+
diff --git a/arch/powerpc/kernel/head_4xx.S b/arch/powerpc/kernel/head_4xx.S
new file mode 100644
index 00000000000..8562b807b37
--- /dev/null
+++ b/arch/powerpc/kernel/head_4xx.S
@@ -0,0 +1,1016 @@
+/*
+ *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ *      Initial PowerPC version.
+ *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *      Rewritten for PReP
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Low-level exception handers, MMU support, and rewrite.
+ *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ *      PowerPC 8xx modifications.
+ *    Copyright (c) 1998-1999 TiVo, Inc.
+ *      PowerPC 403GCX modifications.
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *      PowerPC 403GCX/405GP modifications.
+ *    Copyright 2000 MontaVista Software Inc.
+ *	PPC405 modifications
+ *      PowerPC 403GCX/405GP modifications.
+ * 	Author: MontaVista Software, Inc.
+ *         	frank_rowand@mvista.com or source@mvista.com
+ * 	   	debbie_chu@mvista.com
+ *
+ *
+ *    Module name: head_4xx.S
+ *
+ *    Description:
+ *      Kernel execution entry point code.
+ *
+ *    This program is free software; you can redistribute it and/or
+ *    modify it under the terms of the GNU General Public License
+ *    as published by the Free Software Foundation; either version
+ *    2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/ibm4xx.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ *   r4 - Starting address of the init RAM disk
+ *   r5 - Ending address of the init RAM disk
+ *   r6 - Start of kernel command line string (e.g. "mem=96m")
+ *   r7 - End of kernel command line string
+ *
+ * This is all going to change RSN when we add bi_recs.......  -- Dan
+ */
+	.text
+_GLOBAL(_stext)
+_GLOBAL(_start)
+
+	/* Save parameters we are passed.
+	*/
+	mr	r31,r3
+	mr	r30,r4
+	mr	r29,r5
+	mr	r28,r6
+	mr	r27,r7
+
+	/* We have to turn on the MMU right away so we get cache modes
+	 * set correctly.
+	 */
+	bl	initial_mmu
+
+/* We now have the lower 16 Meg mapped into TLB entries, and the caches
+ * ready to work.
+ */
+turn_on_mmu:
+	lis	r0,MSR_KERNEL@h
+	ori	r0,r0,MSR_KERNEL@l
+	mtspr	SPRN_SRR1,r0
+	lis	r0,start_here@h
+	ori	r0,r0,start_here@l
+	mtspr	SPRN_SRR0,r0
+	SYNC
+	rfi				/* enables MMU */
+	b	.			/* prevent prefetch past rfi */
+
+/*
+ * This area is used for temporarily saving registers during the
+ * critical exception prolog.
+ */
+	. = 0xc0
+crit_save:
+_GLOBAL(crit_r10)
+	.space	4
+_GLOBAL(crit_r11)
+	.space	4
+
+/*
+ * Exception vector entry code. This code runs with address translation
+ * turned off (i.e. using physical addresses). We assume SPRG3 has the
+ * physical address of the current task thread_struct.
+ * Note that we have to have decremented r1 before we write to any fields
+ * of the exception frame, since a critical interrupt could occur at any
+ * time, and it will write to the area immediately below the current r1.
+ */
+#define NORMAL_EXCEPTION_PROLOG						     \
+	mtspr	SPRN_SPRG0,r10;		/* save two registers to work with */\
+	mtspr	SPRN_SPRG1,r11;						     \
+	mtspr	SPRN_SPRG2,r1;						     \
+	mfcr	r10;			/* save CR in r10 for now	   */\
+	mfspr	r11,SPRN_SRR1;		/* check whether user or kernel    */\
+	andi.	r11,r11,MSR_PR;						     \
+	beq	1f;							     \
+	mfspr	r1,SPRN_SPRG3;		/* if from user, start at top of   */\
+	lwz	r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack   */\
+	addi	r1,r1,THREAD_SIZE;					     \
+1:	subi	r1,r1,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
+	tophys(r11,r1);							     \
+	stw	r10,_CCR(r11);          /* save various registers	   */\
+	stw	r12,GPR12(r11);						     \
+	stw	r9,GPR9(r11);						     \
+	mfspr	r10,SPRN_SPRG0;						     \
+	stw	r10,GPR10(r11);						     \
+	mfspr	r12,SPRN_SPRG1;						     \
+	stw	r12,GPR11(r11);						     \
+	mflr	r10;							     \
+	stw	r10,_LINK(r11);						     \
+	mfspr	r10,SPRN_SPRG2;						     \
+	mfspr	r12,SPRN_SRR0;						     \
+	stw	r10,GPR1(r11);						     \
+	mfspr	r9,SPRN_SRR1;						     \
+	stw	r10,0(r11);						     \
+	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
+	stw	r0,GPR0(r11);						     \
+	SAVE_4GPRS(3, r11);						     \
+	SAVE_2GPRS(7, r11)
+
+/*
+ * Exception prolog for critical exceptions.  This is a little different
+ * from the normal exception prolog above since a critical exception
+ * can potentially occur at any point during normal exception processing.
+ * Thus we cannot use the same SPRG registers as the normal prolog above.
+ * Instead we use a couple of words of memory at low physical addresses.
+ * This is OK since we don't support SMP on these processors.
+ */
+#define CRITICAL_EXCEPTION_PROLOG					     \
+	stw	r10,crit_r10@l(0);	/* save two registers to work with */\
+	stw	r11,crit_r11@l(0);					     \
+	mfcr	r10;			/* save CR in r10 for now	   */\
+	mfspr	r11,SPRN_SRR3;		/* check whether user or kernel    */\
+	andi.	r11,r11,MSR_PR;						     \
+	lis	r11,critical_stack_top@h;				     \
+	ori	r11,r11,critical_stack_top@l;				     \
+	beq	1f;							     \
+	/* COMING FROM USER MODE */					     \
+	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
+	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+	addi	r11,r11,THREAD_SIZE;					     \
+1:	subi	r11,r11,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
+	tophys(r11,r11);						     \
+	stw	r10,_CCR(r11);          /* save various registers	   */\
+	stw	r12,GPR12(r11);						     \
+	stw	r9,GPR9(r11);						     \
+	mflr	r10;							     \
+	stw	r10,_LINK(r11);						     \
+	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
+	stw	r12,_DEAR(r11);		/* since they may have had stuff   */\
+	mfspr	r9,SPRN_ESR;		/* in them at the point where the  */\
+	stw	r9,_ESR(r11);		/* exception was taken		   */\
+	mfspr	r12,SPRN_SRR2;						     \
+	stw	r1,GPR1(r11);						     \
+	mfspr	r9,SPRN_SRR3;						     \
+	stw	r1,0(r11);						     \
+	tovirt(r1,r11);							     \
+	rlwinm	r9,r9,0,14,12;		/* clear MSR_WE (necessary?)	   */\
+	stw	r0,GPR0(r11);						     \
+	SAVE_4GPRS(3, r11);						     \
+	SAVE_2GPRS(7, r11)
+
+	/*
+	 * State at this point:
+	 * r9 saved in stack frame, now saved SRR3 & ~MSR_WE
+	 * r10 saved in crit_r10 and in stack frame, trashed
+	 * r11 saved in crit_r11 and in stack frame,
+	 *	now phys stack/exception frame pointer
+	 * r12 saved in stack frame, now saved SRR2
+	 * CR saved in stack frame, CR0.EQ = !SRR3.PR
+	 * LR, DEAR, ESR in stack frame
+	 * r1 saved in stack frame, now virt stack/excframe pointer
+	 * r0, r3-r8 saved in stack frame
+	 */
+
+/*
+ * Exception vectors.
+ */
+#define	START_EXCEPTION(n, label)					     \
+	. = n;								     \
+label:
+
+#define EXCEPTION(n, label, hdlr, xfer)				\
+	START_EXCEPTION(n, label);				\
+	NORMAL_EXCEPTION_PROLOG;				\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	xfer(n, hdlr)
+
+#define CRITICAL_EXCEPTION(n, label, hdlr)			\
+	START_EXCEPTION(n, label);				\
+	CRITICAL_EXCEPTION_PROLOG;				\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;			\
+	EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+			  NOCOPY, crit_transfer_to_handler,	\
+			  ret_from_crit_exc)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret)	\
+	li	r10,trap;					\
+	stw	r10,TRAP(r11);					\
+	lis	r10,msr@h;					\
+	ori	r10,r10,msr@l;					\
+	copyee(r10, r9);					\
+	bl	tfer;		 				\
+	.long	hdlr;						\
+	.long	ret
+
+#define COPY_EE(d, s)		rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
+			  ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr)	\
+	EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+			  ret_from_except)
+
+
+/*
+ * 0x0100 - Critical Interrupt Exception
+ */
+	CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, UnknownException)
+
+/*
+ * 0x0200 - Machine Check Exception
+ */
+	CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException)
+
+/*
+ * 0x0300 - Data Storage Exception
+ * This happens for just a few reasons.  U0 set (but we don't do that),
+ * or zone protection fault (user violation, write to protected page).
+ * If this is just an update of modified status, we do that quickly
+ * and exit.  Otherwise, we call heavywight functions to do the work.
+ */
+	START_EXCEPTION(0x0300,	DataStorage)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+#ifdef CONFIG_403GCX
+	stw     r12, 0(r0)
+	stw     r9, 4(r0)
+	mfcr    r11
+	mfspr   r12, SPRN_PID
+	stw     r11, 8(r0)
+	stw     r12, 12(r0)
+#else
+	mtspr	SPRN_SPRG4, r12
+	mtspr	SPRN_SPRG5, r9
+	mfcr	r11
+	mfspr	r12, SPRN_PID
+	mtspr	SPRN_SPRG7, r11
+	mtspr	SPRN_SPRG6, r12
+#endif
+
+	/* First, check if it was a zone fault (which means a user
+	* tried to access a kernel or read-protected page - always
+	* a SEGV).  All other faults here must be stores, so no
+	* need to check ESR_DST as well. */
+	mfspr	r10, SPRN_ESR
+	andis.	r10, r10, ESR_DIZ@h
+	bne	2f
+
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	li	r9, 0
+	mtspr	SPRN_PID, r9		/* TLB will have 0 TID */
+	b	4f
+
+	/* Get the PGD for the current thread.
+	 */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+4:
+	tophys(r11, r11)
+	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */
+	lwz	r11, 0(r11)		/* Get L1 entry */
+	rlwinm.	r12, r11, 0, 0, 19	/* Extract L2 (pte) base address */
+	beq	2f			/* Bail if no table */
+
+	rlwimi	r12, r10, 22, 20, 29	/* Compute PTE address */
+	lwz	r11, 0(r12)		/* Get Linux PTE */
+
+	andi.	r9, r11, _PAGE_RW	/* Is it writeable? */
+	beq	2f			/* Bail if not */
+
+	/* Update 'changed'.
+	*/
+	ori	r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
+	stw	r11, 0(r12)		/* Update Linux page table */
+
+	/* Most of the Linux PTE is ready to load into the TLB LO.
+	 * We set ZSEL, where only the LS-bit determines user access.
+	 * We set execute, because we don't have the granularity to
+	 * properly set this at the page level (Linux problem).
+	 * If shared is set, we cause a zero PID->TID load.
+	 * Many of these bits are software only.  Bits we don't set
+	 * here we (properly should) assume have the appropriate value.
+	 */
+	li	r12, 0x0ce2
+	andc	r11, r11, r12		/* Make sure 20, 21 are zero */
+
+	/* find the TLB index that caused the fault.  It has to be here.
+	*/
+	tlbsx	r9, 0, r10
+
+	tlbwe	r11, r9, TLB_DATA		/* Load TLB LO */
+
+	/* Done...restore registers and get out of here.
+	*/
+#ifdef CONFIG_403GCX
+	lwz     r12, 12(r0)
+	lwz     r11, 8(r0)
+	mtspr   SPRN_PID, r12
+	mtcr    r11
+	lwz     r9, 4(r0)
+	lwz     r12, 0(r0)
+#else
+	mfspr	r12, SPRN_SPRG6
+	mfspr	r11, SPRN_SPRG7
+	mtspr	SPRN_PID, r12
+	mtcr	r11
+	mfspr	r9, SPRN_SPRG5
+	mfspr	r12, SPRN_SPRG4
+#endif
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	PPC405_ERR77_SYNC
+	rfi			/* Should sync shadow TLBs */
+	b	.		/* prevent prefetch past rfi */
+
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+#ifdef CONFIG_403GCX
+	lwz     r12, 12(r0)
+	lwz     r11, 8(r0)
+	mtspr   SPRN_PID, r12
+	mtcr    r11
+	lwz     r9, 4(r0)
+	lwz     r12, 0(r0)
+#else
+	mfspr	r12, SPRN_SPRG6
+	mfspr	r11, SPRN_SPRG7
+	mtspr	SPRN_PID, r12
+	mtcr	r11
+	mfspr	r9, SPRN_SPRG5
+	mfspr	r12, SPRN_SPRG4
+#endif
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	DataAccess
+
+/*
+ * 0x0400 - Instruction Storage Exception
+ * This is caused by a fetch from non-execute or guarded pages.
+ */
+	START_EXCEPTION(0x0400, InstructionAccess)
+	NORMAL_EXCEPTION_PROLOG
+	mr	r4,r12			/* Pass SRR0 as arg2 */
+	li	r5,0			/* Pass zero as arg3 */
+	EXC_XFER_EE_LITE(0x400, handle_page_fault)
+
+/* 0x0500 - External Interrupt Exception */
+	EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* 0x0600 - Alignment Exception */
+	START_EXCEPTION(0x0600, Alignment)
+	NORMAL_EXCEPTION_PROLOG
+	mfspr	r4,SPRN_DEAR		/* Grab the DEAR and save it */
+	stw	r4,_DEAR(r11)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE(0x600, AlignmentException)
+
+/* 0x0700 - Program Exception */
+	START_EXCEPTION(0x0700, ProgramCheck)
+	NORMAL_EXCEPTION_PROLOG
+	mfspr	r4,SPRN_ESR		/* Grab the ESR and save it */
+	stw	r4,_ESR(r11)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_STD(0x700, ProgramCheckException)
+
+	EXCEPTION(0x0800, Trap_08, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x0900, Trap_09, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x0A00, Trap_0A, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x0B00, Trap_0B, UnknownException, EXC_XFER_EE)
+
+/* 0x0C00 - System Call Exception */
+	START_EXCEPTION(0x0C00,	SystemCall)
+	NORMAL_EXCEPTION_PROLOG
+	EXC_XFER_EE_LITE(0xc00, DoSyscall)
+
+	EXCEPTION(0x0D00, Trap_0D, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x0E00, Trap_0E, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x0F00, Trap_0F, UnknownException, EXC_XFER_EE)
+
+/* 0x1000 - Programmable Interval Timer (PIT) Exception */
+	START_EXCEPTION(0x1000, Decrementer)
+	NORMAL_EXCEPTION_PROLOG
+	lis	r0,TSR_PIS@h
+	mtspr	SPRN_TSR,r0		/* Clear the PIT exception */
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_LITE(0x1000, timer_interrupt)
+
+#if 0
+/* NOTE:
+ * FIT and WDT handlers are not implemented yet.
+ */
+
+/* 0x1010 - Fixed Interval Timer (FIT) Exception
+*/
+	STND_EXCEPTION(0x1010,	FITException,		UnknownException)
+
+/* 0x1020 - Watchdog Timer (WDT) Exception
+*/
+#ifdef CONFIG_BOOKE_WDT
+	CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException)
+#else
+	CRITICAL_EXCEPTION(0x1020, WDTException, UnknownException)
+#endif
+#endif
+
+/* 0x1100 - Data TLB Miss Exception
+ * As the name implies, translation is not in the MMU, so search the
+ * page tables and fix it.  The only purpose of this function is to
+ * load TLB entries from the page table if they exist.
+ */
+	START_EXCEPTION(0x1100,	DTLBMiss)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+#ifdef CONFIG_403GCX
+	stw     r12, 0(r0)
+	stw     r9, 4(r0)
+	mfcr    r11
+	mfspr   r12, SPRN_PID
+	stw     r11, 8(r0)
+	stw     r12, 12(r0)
+#else
+	mtspr	SPRN_SPRG4, r12
+	mtspr	SPRN_SPRG5, r9
+	mfcr	r11
+	mfspr	r12, SPRN_PID
+	mtspr	SPRN_SPRG7, r11
+	mtspr	SPRN_SPRG6, r12
+#endif
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	li	r9, 0
+	mtspr	SPRN_PID, r9		/* TLB will have 0 TID */
+	b	4f
+
+	/* Get the PGD for the current thread.
+	 */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+4:
+	tophys(r11, r11)
+	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */
+	lwz	r12, 0(r11)		/* Get L1 entry */
+	andi.	r9, r12, _PMD_PRESENT	/* Check if it points to a PTE page */
+	beq	2f			/* Bail if no table */
+
+	rlwimi	r12, r10, 22, 20, 29	/* Compute PTE address */
+	lwz	r11, 0(r12)		/* Get Linux PTE */
+	andi.	r9, r11, _PAGE_PRESENT
+	beq	5f
+
+	ori	r11, r11, _PAGE_ACCESSED
+	stw	r11, 0(r12)
+
+	/* Create TLB tag.  This is the faulting address plus a static
+	 * set of bits.  These are size, valid, E, U0.
+	*/
+	li	r12, 0x00c0
+	rlwimi	r10, r12, 0, 20, 31
+
+	b	finish_tlb_load
+
+2:	/* Check for possible large-page pmd entry */
+	rlwinm.	r9, r12, 2, 22, 24
+	beq	5f
+
+	/* Create TLB tag.  This is the faulting address, plus a static
+	 * set of bits (valid, E, U0) plus the size from the PMD.
+	 */
+	ori	r9, r9, 0x40
+	rlwimi	r10, r9, 0, 20, 31
+	mr	r11, r12
+
+	b	finish_tlb_load
+
+5:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+#ifdef CONFIG_403GCX
+	lwz     r12, 12(r0)
+	lwz     r11, 8(r0)
+	mtspr   SPRN_PID, r12
+	mtcr    r11
+	lwz     r9, 4(r0)
+	lwz     r12, 0(r0)
+#else
+	mfspr	r12, SPRN_SPRG6
+	mfspr	r11, SPRN_SPRG7
+	mtspr	SPRN_PID, r12
+	mtcr	r11
+	mfspr	r9, SPRN_SPRG5
+	mfspr	r12, SPRN_SPRG4
+#endif
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	DataAccess
+
+/* 0x1200 - Instruction TLB Miss Exception
+ * Nearly the same as above, except we get our information from different
+ * registers and bailout to a different point.
+ */
+	START_EXCEPTION(0x1200,	ITLBMiss)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+#ifdef CONFIG_403GCX
+	stw     r12, 0(r0)
+	stw     r9, 4(r0)
+	mfcr    r11
+	mfspr   r12, SPRN_PID
+	stw     r11, 8(r0)
+	stw     r12, 12(r0)
+#else
+	mtspr	SPRN_SPRG4, r12
+	mtspr	SPRN_SPRG5, r9
+	mfcr	r11
+	mfspr	r12, SPRN_PID
+	mtspr	SPRN_SPRG7, r11
+	mtspr	SPRN_SPRG6, r12
+#endif
+	mfspr	r10, SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	cmplw	r10, r11
+	blt+	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	li	r9, 0
+	mtspr	SPRN_PID, r9		/* TLB will have 0 TID */
+	b	4f
+
+	/* Get the PGD for the current thread.
+	 */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+4:
+	tophys(r11, r11)
+	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */
+	lwz	r12, 0(r11)		/* Get L1 entry */
+	andi.	r9, r12, _PMD_PRESENT	/* Check if it points to a PTE page */
+	beq	2f			/* Bail if no table */
+
+	rlwimi	r12, r10, 22, 20, 29	/* Compute PTE address */
+	lwz	r11, 0(r12)		/* Get Linux PTE */
+	andi.	r9, r11, _PAGE_PRESENT
+	beq	5f
+
+	ori	r11, r11, _PAGE_ACCESSED
+	stw	r11, 0(r12)
+
+	/* Create TLB tag.  This is the faulting address plus a static
+	 * set of bits.  These are size, valid, E, U0.
+	*/
+	li	r12, 0x00c0
+	rlwimi	r10, r12, 0, 20, 31
+
+	b	finish_tlb_load
+
+2:	/* Check for possible large-page pmd entry */
+	rlwinm.	r9, r12, 2, 22, 24
+	beq	5f
+
+	/* Create TLB tag.  This is the faulting address, plus a static
+	 * set of bits (valid, E, U0) plus the size from the PMD.
+	 */
+	ori	r9, r9, 0x40
+	rlwimi	r10, r9, 0, 20, 31
+	mr	r11, r12
+
+	b	finish_tlb_load
+
+5:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+#ifdef CONFIG_403GCX
+	lwz     r12, 12(r0)
+	lwz     r11, 8(r0)
+	mtspr   SPRN_PID, r12
+	mtcr    r11
+	lwz     r9, 4(r0)
+	lwz     r12, 0(r0)
+#else
+	mfspr	r12, SPRN_SPRG6
+	mfspr	r11, SPRN_SPRG7
+	mtspr	SPRN_PID, r12
+	mtcr	r11
+	mfspr	r9, SPRN_SPRG5
+	mfspr	r12, SPRN_SPRG4
+#endif
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	InstructionAccess
+
+	EXCEPTION(0x1300, Trap_13, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1400, Trap_14, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE)
+#ifdef CONFIG_IBM405_ERR51
+	/* 405GP errata 51 */
+	START_EXCEPTION(0x1700, Trap_17)
+	b DTLBMiss
+#else
+	EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE)
+#endif
+	EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1A00, Trap_1A, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1B00, Trap_1B, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1C00, Trap_1C, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1D00, Trap_1D, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1E00, Trap_1E, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1F00, Trap_1F, UnknownException, EXC_XFER_EE)
+
+/* Check for a single step debug exception while in an exception
+ * handler before state has been saved.  This is to catch the case
+ * where an instruction that we are trying to single step causes
+ * an exception (eg ITLB/DTLB miss) and thus the first instruction of
+ * the exception handler generates a single step debug exception.
+ *
+ * If we get a debug trap on the first instruction of an exception handler,
+ * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
+ * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
+ * The exception handler was handling a non-critical interrupt, so it will
+ * save (and later restore) the MSR via SPRN_SRR1, which will still have
+ * the MSR_DE bit set.
+ */
+	/* 0x2000 - Debug Exception */
+	START_EXCEPTION(0x2000, DebugTrap)
+	CRITICAL_EXCEPTION_PROLOG
+
+	/*
+	 * If this is a single step or branch-taken exception in an
+	 * exception entry sequence, it was probably meant to apply to
+	 * the code where the exception occurred (since exception entry
+	 * doesn't turn off DE automatically).  We simulate the effect
+	 * of turning off DE on entry to an exception handler by turning
+	 * off DE in the SRR3 value and clearing the debug status.
+	 */
+	mfspr	r10,SPRN_DBSR		/* check single-step/branch taken */
+	andis.	r10,r10,DBSR_IC@h
+	beq+	2f
+
+	andi.	r10,r9,MSR_IR|MSR_PR	/* check supervisor + MMU off */
+	beq	1f			/* branch and fix it up */
+
+	mfspr   r10,SPRN_SRR2		/* Faulting instruction address */
+	cmplwi  r10,0x2100
+	bgt+    2f			/* address above exception vectors */
+
+	/* here it looks like we got an inappropriate debug exception. */
+1:	rlwinm	r9,r9,0,~MSR_DE		/* clear DE in the SRR3 value */
+	lis	r10,DBSR_IC@h		/* clear the IC event */
+	mtspr	SPRN_DBSR,r10
+	/* restore state and get out */
+	lwz	r10,_CCR(r11)
+	lwz	r0,GPR0(r11)
+	lwz	r1,GPR1(r11)
+	mtcrf	0x80,r10
+	mtspr	SPRN_SRR2,r12
+	mtspr	SPRN_SRR3,r9
+	lwz	r9,GPR9(r11)
+	lwz	r12,GPR12(r11)
+	lwz	r10,crit_r10@l(0)
+	lwz	r11,crit_r11@l(0)
+	PPC405_ERR77_SYNC
+	rfci
+	b	.
+
+	/* continue normal handling for a critical exception... */
+2:	mfspr	r4,SPRN_DBSR
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_TEMPLATE(DebugException, 0x2002, \
+		(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
+		NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+
+/*
+ * The other Data TLB exceptions bail out to this point
+ * if they can't resolve the lightweight TLB fault.
+ */
+DataAccess:
+	NORMAL_EXCEPTION_PROLOG
+	mfspr	r5,SPRN_ESR		/* Grab the ESR, save it, pass arg3 */
+	stw	r5,_ESR(r11)
+	mfspr	r4,SPRN_DEAR		/* Grab the DEAR, save it, pass arg2 */
+	EXC_XFER_EE_LITE(0x300, handle_page_fault)
+
+/* Other PowerPC processors, namely those derived from the 6xx-series
+ * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
+ * However, for the 4xx-series processors these are neither defined nor
+ * reserved.
+ */
+
+	/* Damn, I came up one instruction too many to fit into the
+	 * exception space :-).  Both the instruction and data TLB
+	 * miss get to this point to load the TLB.
+	 * 	r10 - TLB_TAG value
+	 * 	r11 - Linux PTE
+	 *	r12, r9 - avilable to use
+	 *	PID - loaded with proper value when we get here
+	 *	Upon exit, we reload everything and RFI.
+	 * Actually, it will fit now, but oh well.....a common place
+	 * to load the TLB.
+	 */
+tlb_4xx_index:
+	.long	0
+finish_tlb_load:
+	/* load the next available TLB index.
+	*/
+	lwz	r9, tlb_4xx_index@l(0)
+	addi	r9, r9, 1
+	andi.	r9, r9, (PPC4XX_TLB_SIZE-1)
+	stw	r9, tlb_4xx_index@l(0)
+
+6:
+	/*
+	 * Clear out the software-only bits in the PTE to generate the
+	 * TLB_DATA value.  These are the bottom 2 bits of the RPM, the
+	 * top 3 bits of the zone field, and M.
+	 */
+	li	r12, 0x0ce2
+	andc	r11, r11, r12
+
+	tlbwe	r11, r9, TLB_DATA		/* Load TLB LO */
+	tlbwe	r10, r9, TLB_TAG		/* Load TLB HI */
+
+	/* Done...restore registers and get out of here.
+	*/
+#ifdef CONFIG_403GCX
+	lwz     r12, 12(r0)
+	lwz     r11, 8(r0)
+	mtspr   SPRN_PID, r12
+	mtcr    r11
+	lwz     r9, 4(r0)
+	lwz     r12, 0(r0)
+#else
+	mfspr	r12, SPRN_SPRG6
+	mfspr	r11, SPRN_SPRG7
+	mtspr	SPRN_PID, r12
+	mtcr	r11
+	mfspr	r9, SPRN_SPRG5
+	mfspr	r12, SPRN_SPRG4
+#endif
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	PPC405_ERR77_SYNC
+	rfi			/* Should sync shadow TLBs */
+	b	.		/* prevent prefetch past rfi */
+
+/* extern void giveup_fpu(struct task_struct *prev)
+ *
+ * The PowerPC 4xx family of processors do not have an FPU, so this just
+ * returns.
+ */
+_GLOBAL(giveup_fpu)
+	blr
+
+/* This is where the main kernel code starts.
+ */
+start_here:
+
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG3,r4
+
+	/* stack */
+	lis	r1,init_thread_union@ha
+	addi	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+	bl	early_init	/* We have to do this with MMU on */
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+	mr	r3,r31
+	mr	r4,r30
+	mr	r5,r29
+	mr	r6,r28
+	mr	r7,r27
+	bl	machine_init
+	bl	MMU_init
+
+/* Go back to running unmapped so we can load up new values
+ * and change to using our exception vectors.
+ * On the 4xx, all we have to do is invalidate the TLB to clear
+ * the old 16M byte TLB mappings.
+ */
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4)
+	lis	r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h
+	ori	r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi
+	b	.		/* prevent prefetch past rfi */
+
+/* Load up the kernel context */
+2:
+	sync			/* Flush to memory before changing TLB */
+	tlbia
+	isync			/* Flush shadow TLBs */
+
+	/* set up the PTE pointers for the Abatron bdiGDB.
+	*/
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r5, 0xf0(r0)	/* Must match your Abatron config file */
+	tophys(r5,r5)
+	stw	r6, 0(r5)
+
+/* Now turn on the MMU for real! */
+	lis	r4,MSR_KERNEL@h
+	ori	r4,r4,MSR_KERNEL@l
+	lis	r3,start_kernel@h
+	ori	r3,r3,start_kernel@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	rfi			/* enable MMU and jump to start_kernel */
+	b	.		/* prevent prefetch past rfi */
+
+/* Set up the initial MMU state so we can do the first level of
+ * kernel initialization.  This maps the first 16 MBytes of memory 1:1
+ * virtual to physical and more importantly sets the cache mode.
+ */
+initial_mmu:
+	tlbia			/* Invalidate all TLB entries */
+	isync
+
+	/* We should still be executing code at physical address 0x0000xxxx
+	 * at this point. However, start_here is at virtual address
+	 * 0xC000xxxx. So, set up a TLB mapping to cover this once
+	 * translation is enabled.
+	 */
+
+	lis	r3,KERNELBASE@h		/* Load the kernel virtual address */
+	ori	r3,r3,KERNELBASE@l
+	tophys(r4,r3)			/* Load the kernel physical address */
+
+	iccci	r0,r3			/* Invalidate the i-cache before use */
+
+	/* Load the kernel PID.
+	*/
+	li	r0,0
+	mtspr	SPRN_PID,r0
+	sync
+
+	/* Configure and load two entries into TLB slots 62 and 63.
+	 * In case we are pinning TLBs, these are reserved in by the
+	 * other TLB functions.  If not reserving, then it doesn't
+	 * matter where they are loaded.
+	 */
+	clrrwi	r4,r4,10		/* Mask off the real page number */
+	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
+
+	clrrwi	r3,r3,10		/* Mask off the effective page number */
+	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
+
+        li      r0,63                    /* TLB slot 63 */
+
+	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */
+	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */
+
+#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE)
+
+	/* Load a TLB entry for the UART, so that ppc4xx_progress() can use
+	 * the UARTs nice and early.  We use a 4k real==virtual mapping. */
+
+	lis	r3,SERIAL_DEBUG_IO_BASE@h
+	ori	r3,r3,SERIAL_DEBUG_IO_BASE@l
+	mr	r4,r3
+	clrrwi	r4,r4,12
+	ori	r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G)
+
+	clrrwi	r3,r3,12
+	ori	r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K))
+
+	li	r0,0			/* TLB slot 0 */
+	tlbwe	r4,r0,TLB_DATA
+	tlbwe	r3,r0,TLB_TAG
+#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */
+
+	isync
+
+	/* Establish the exception vector base
+	*/
+	lis	r4,KERNELBASE@h		/* EVPR only uses the high 16-bits */
+	tophys(r0,r4)			/* Use the physical address */
+	mtspr	SPRN_EVPR,r0
+
+	blr
+
+_GLOBAL(abort)
+        mfspr   r13,SPRN_DBCR0
+        oris    r13,r13,DBCR0_RST_SYSTEM@h
+        mtspr   SPRN_DBCR0,r13
+
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Context switch the PTE pointer for the Abatron BDI2000.
+	 * The PGDIR is the second parameter.
+	 */
+	lis	r5, KERNELBASE@h
+	lwz	r5, 0xf0(r5)
+	stw	r4, 0x4(r5)
+#endif
+	sync
+	mtspr	SPRN_PID,r3
+	isync				/* Need an isync to flush shadow */
+					/* TLBs after changing PID */
+	blr
+
+/* We put a few things here that have to be page-aligned. This stuff
+ * goes at the beginning of the data segment, which is page-aligned.
+ */
+	.data
+_GLOBAL(sdata)
+_GLOBAL(empty_zero_page)
+	.space	4096
+_GLOBAL(swapper_pg_dir)
+	.space	4096
+
+
+/* Stack for handling critical exceptions from kernel mode */
+	.section .bss
+        .align 12
+exception_stack_bottom:
+	.space	4096
+critical_stack_top:
+_GLOBAL(exception_stack_top)
+
+/* This space gets a copy of optional info passed to us by the bootstrap
+ * which is used to pass parameters into the kernel like root=/dev/sda1, etc.
+ */
+_GLOBAL(cmd_line)
+	.space	512
+
+/* Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+	.space	8
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
new file mode 100644
index 00000000000..22a5ee07e1e
--- /dev/null
+++ b/arch/powerpc/kernel/head_64.S
@@ -0,0 +1,2011 @@
+/*
+ *  arch/ppc64/kernel/head.S
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *
+ *  Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and
+ *    Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com
+ *
+ *  This file contains the low-level support and setup for the
+ *  PowerPC-64 platform, including trap and interrupt dispatch.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/systemcfg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#include <asm/cputable.h>
+#include <asm/setup.h>
+#include <asm/hvcall.h>
+#include <asm/iSeries/LparMap.h>
+
+#ifdef CONFIG_PPC_ISERIES
+#define DO_SOFT_DISABLE
+#endif
+
+/*
+ * We layout physical memory as follows:
+ * 0x0000 - 0x00ff : Secondary processor spin code
+ * 0x0100 - 0x2fff : pSeries Interrupt prologs
+ * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs
+ * 0x6000 - 0x6fff : Initial (CPU0) segment table
+ * 0x7000 - 0x7fff : FWNMI data area
+ * 0x8000 -        : Early init and support code
+ */
+
+/*
+ *   SPRG Usage
+ *
+ *   Register	Definition
+ *
+ *   SPRG0	reserved for hypervisor
+ *   SPRG1	temp - used to save gpr
+ *   SPRG2	temp - used to save gpr
+ *   SPRG3	virt addr of paca
+ */
+
+/*
+ * Entering into this code we make the following assumptions:
+ *  For pSeries:
+ *   1. The MMU is off & open firmware is running in real mode.
+ *   2. The kernel is entered at __start
+ *
+ *  For iSeries:
+ *   1. The MMU is on (as it always is for iSeries)
+ *   2. The kernel is entered at system_reset_iSeries
+ */
+
+	.text
+	.globl  _stext
+_stext:
+#ifdef CONFIG_PPC_MULTIPLATFORM
+_GLOBAL(__start)
+	/* NOP this out unconditionally */
+BEGIN_FTR_SECTION
+	b .__start_initialization_multiplatform
+END_FTR_SECTION(0, 1)
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+	/* Catch branch to 0 in real mode */
+	trap
+
+#ifdef CONFIG_PPC_ISERIES
+	/*
+	 * At offset 0x20, there is a pointer to iSeries LPAR data.
+	 * This is required by the hypervisor
+	 */
+	. = 0x20
+	.llong hvReleaseData-KERNELBASE
+
+	/*
+	 * At offset 0x28 and 0x30 are offsets to the mschunks_map
+	 * array (used by the iSeries LPAR debugger to do translation
+	 * between physical addresses and absolute addresses) and
+	 * to the pidhash table (also used by the debugger)
+	 */
+	.llong mschunks_map-KERNELBASE
+	.llong 0	/* pidhash-KERNELBASE SFRXXX */
+
+	/* Offset 0x38 - Pointer to start of embedded System.map */
+	.globl	embedded_sysmap_start
+embedded_sysmap_start:
+	.llong	0
+	/* Offset 0x40 - Pointer to end of embedded System.map */
+	.globl	embedded_sysmap_end
+embedded_sysmap_end:
+	.llong	0
+
+#endif /* CONFIG_PPC_ISERIES */
+
+	/* Secondary processors spin on this value until it goes to 1. */
+	.globl  __secondary_hold_spinloop
+__secondary_hold_spinloop:
+	.llong	0x0
+
+	/* Secondary processors write this value with their cpu # */
+	/* after they enter the spin loop immediately below.	  */
+	.globl	__secondary_hold_acknowledge
+__secondary_hold_acknowledge:
+	.llong	0x0
+
+	. = 0x60
+/*
+ * The following code is used on pSeries to hold secondary processors
+ * in a spin loop after they have been freed from OpenFirmware, but
+ * before the bulk of the kernel has been relocated.  This code
+ * is relocated to physical address 0x60 before prom_init is run.
+ * All of it must fit below the first exception vector at 0x100.
+ */
+_GLOBAL(__secondary_hold)
+	mfmsr	r24
+	ori	r24,r24,MSR_RI
+	mtmsrd	r24			/* RI on */
+
+	/* Grab our linux cpu number */
+	mr	r24,r3
+
+	/* Tell the master cpu we're here */
+	/* Relocation is off & we are located at an address less */
+	/* than 0x100, so only need to grab low order offset.    */
+	std	r24,__secondary_hold_acknowledge@l(0)
+	sync
+
+	/* All secondary cpus wait here until told to start. */
+100:	ld	r4,__secondary_hold_spinloop@l(0)
+	cmpdi	0,r4,1
+	bne	100b
+
+#ifdef CONFIG_HMT
+	b	.hmt_init
+#else
+#ifdef CONFIG_SMP
+	mr	r3,r24
+	b	.pSeries_secondary_smp_init
+#else
+	BUG_OPCODE
+#endif
+#endif
+
+/* This value is used to mark exception frames on the stack. */
+	.section ".toc","aw"
+exception_marker:
+	.tc	ID_72656773_68657265[TC],0x7265677368657265
+	.text
+
+/*
+ * The following macros define the code that appears as
+ * the prologue to each of the exception handlers.  They
+ * are split into two parts to allow a single kernel binary
+ * to be used for pSeries and iSeries.
+ * LOL.  One day... - paulus
+ */
+
+/*
+ * We make as much of the exception code common between native
+ * exception handlers (including pSeries LPAR) and iSeries LPAR
+ * implementations as possible.
+ */
+
+/*
+ * This is the start of the interrupt handlers for pSeries
+ * This code runs with relocation off.
+ */
+#define EX_R9		0
+#define EX_R10		8
+#define EX_R11		16
+#define EX_R12		24
+#define EX_R13		32
+#define EX_SRR0		40
+#define EX_R3		40	/* SLB miss saves R3, but not SRR0 */
+#define EX_DAR		48
+#define EX_LR		48	/* SLB miss saves LR, but not DAR */
+#define EX_DSISR	56
+#define EX_CCR		60
+
+#define EXCEPTION_PROLOG_PSERIES(area, label)				\
+	mfspr	r13,SPRG3;		/* get paca address into r13 */	\
+	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
+	std	r10,area+EX_R10(r13);					\
+	std	r11,area+EX_R11(r13);					\
+	std	r12,area+EX_R12(r13);					\
+	mfspr	r9,SPRG1;						\
+	std	r9,area+EX_R13(r13);					\
+	mfcr	r9;							\
+	clrrdi	r12,r13,32;		/* get high part of &label */	\
+	mfmsr	r10;							\
+	mfspr	r11,SRR0;		/* save SRR0 */			\
+	ori	r12,r12,(label)@l;	/* virt addr of handler */	\
+	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI;				\
+	mtspr	SRR0,r12;						\
+	mfspr	r12,SRR1;		/* and SRR1 */			\
+	mtspr	SRR1,r10;						\
+	rfid;								\
+	b	.	/* prevent speculative execution */
+
+/*
+ * This is the start of the interrupt handlers for iSeries
+ * This code runs with relocation on.
+ */
+#define EXCEPTION_PROLOG_ISERIES_1(area)				\
+	mfspr	r13,SPRG3;		/* get paca address into r13 */	\
+	std	r9,area+EX_R9(r13);	/* save r9 - r12 */		\
+	std	r10,area+EX_R10(r13);					\
+	std	r11,area+EX_R11(r13);					\
+	std	r12,area+EX_R12(r13);					\
+	mfspr	r9,SPRG1;						\
+	std	r9,area+EX_R13(r13);					\
+	mfcr	r9
+
+#define EXCEPTION_PROLOG_ISERIES_2					\
+	mfmsr	r10;							\
+	ld	r11,PACALPPACA+LPPACASRR0(r13);				\
+	ld	r12,PACALPPACA+LPPACASRR1(r13);				\
+	ori	r10,r10,MSR_RI;						\
+	mtmsrd	r10,1
+
+/*
+ * The common exception prolog is used for all except a few exceptions
+ * such as a segment miss on a kernel address.  We have to be prepared
+ * to take another exception from the point where we first touch the
+ * kernel stack onwards.
+ *
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ */
+#define EXCEPTION_PROLOG_COMMON(n, area)				   \
+	andi.	r10,r12,MSR_PR;		/* See if coming from user	*/ \
+	mr	r10,r1;			/* Save r1			*/ \
+	subi	r1,r1,INT_FRAME_SIZE;	/* alloc frame on kernel stack	*/ \
+	beq-	1f;							   \
+	ld	r1,PACAKSAVE(r13);	/* kernel stack to use		*/ \
+1:	cmpdi	cr1,r1,0;		/* check if r1 is in userspace	*/ \
+	bge-	cr1,bad_stack;		/* abort if it is		*/ \
+	std	r9,_CCR(r1);		/* save CR in stackframe	*/ \
+	std	r11,_NIP(r1);		/* save SRR0 in stackframe	*/ \
+	std	r12,_MSR(r1);		/* save SRR1 in stackframe	*/ \
+	std	r10,0(r1);		/* make stack chain pointer	*/ \
+	std	r0,GPR0(r1);		/* save r0 in stackframe	*/ \
+	std	r10,GPR1(r1);		/* save r1 in stackframe	*/ \
+	std	r2,GPR2(r1);		/* save r2 in stackframe	*/ \
+	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe	*/ \
+	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe	*/ \
+	ld	r9,area+EX_R9(r13);	/* move r9, r10 to stackframe	*/ \
+	ld	r10,area+EX_R10(r13);					   \
+	std	r9,GPR9(r1);						   \
+	std	r10,GPR10(r1);						   \
+	ld	r9,area+EX_R11(r13);	/* move r11 - r13 to stackframe	*/ \
+	ld	r10,area+EX_R12(r13);					   \
+	ld	r11,area+EX_R13(r13);					   \
+	std	r9,GPR11(r1);						   \
+	std	r10,GPR12(r1);						   \
+	std	r11,GPR13(r1);						   \
+	ld	r2,PACATOC(r13);	/* get kernel TOC into r2	*/ \
+	mflr	r9;			/* save LR in stackframe	*/ \
+	std	r9,_LINK(r1);						   \
+	mfctr	r10;			/* save CTR in stackframe	*/ \
+	std	r10,_CTR(r1);						   \
+	mfspr	r11,XER;		/* save XER in stackframe	*/ \
+	std	r11,_XER(r1);						   \
+	li	r9,(n)+1;						   \
+	std	r9,_TRAP(r1);		/* set trap number		*/ \
+	li	r10,0;							   \
+	ld	r11,exception_marker@toc(r2);				   \
+	std	r10,RESULT(r1);		/* clear regs->result		*/ \
+	std	r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame	*/
+
+/*
+ * Exception vectors.
+ */
+#define STD_EXCEPTION_PSERIES(n, label)			\
+	. = n;						\
+	.globl label##_pSeries;				\
+label##_pSeries:					\
+	HMT_MEDIUM;					\
+	mtspr	SPRG1,r13;		/* save r13 */	\
+	RUNLATCH_ON(r13);				\
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common)
+
+#define STD_EXCEPTION_ISERIES(n, label, area)		\
+	.globl label##_iSeries;				\
+label##_iSeries:					\
+	HMT_MEDIUM;					\
+	mtspr	SPRG1,r13;		/* save r13 */	\
+	RUNLATCH_ON(r13);				\
+	EXCEPTION_PROLOG_ISERIES_1(area);		\
+	EXCEPTION_PROLOG_ISERIES_2;			\
+	b	label##_common
+
+#define MASKABLE_EXCEPTION_ISERIES(n, label)				\
+	.globl label##_iSeries;						\
+label##_iSeries:							\
+	HMT_MEDIUM;							\
+	mtspr	SPRG1,r13;		/* save r13 */			\
+	RUNLATCH_ON(r13);						\
+	EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN);				\
+	lbz	r10,PACAPROCENABLED(r13);				\
+	cmpwi	0,r10,0;						\
+	beq-	label##_iSeries_masked;					\
+	EXCEPTION_PROLOG_ISERIES_2;					\
+	b	label##_common;						\
+
+#ifdef DO_SOFT_DISABLE
+#define DISABLE_INTS				\
+	lbz	r10,PACAPROCENABLED(r13);	\
+	li	r11,0;				\
+	std	r10,SOFTE(r1);			\
+	mfmsr	r10;				\
+	stb	r11,PACAPROCENABLED(r13);	\
+	ori	r10,r10,MSR_EE;			\
+	mtmsrd	r10,1
+
+#define ENABLE_INTS				\
+	lbz	r10,PACAPROCENABLED(r13);	\
+	mfmsr	r11;				\
+	std	r10,SOFTE(r1);			\
+	ori	r11,r11,MSR_EE;			\
+	mtmsrd	r11,1
+
+#else	/* hard enable/disable interrupts */
+#define DISABLE_INTS
+
+#define ENABLE_INTS				\
+	ld	r12,_MSR(r1);			\
+	mfmsr	r11;				\
+	rlwimi	r11,r12,0,MSR_EE;		\
+	mtmsrd	r11,1
+
+#endif
+
+#define STD_EXCEPTION_COMMON(trap, label, hdlr)		\
+	.align	7;					\
+	.globl label##_common;				\
+label##_common:						\
+	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);	\
+	DISABLE_INTS;					\
+	bl	.save_nvgprs;				\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;		\
+	bl	hdlr;					\
+	b	.ret_from_except
+
+#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr)	\
+	.align	7;					\
+	.globl label##_common;				\
+label##_common:						\
+	EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN);	\
+	DISABLE_INTS;					\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;		\
+	bl	hdlr;					\
+	b	.ret_from_except_lite
+
+/*
+ * Start of pSeries system interrupt routines
+ */
+	. = 0x100
+	.globl __start_interrupts
+__start_interrupts:
+
+	STD_EXCEPTION_PSERIES(0x100, system_reset)
+
+	. = 0x200
+_machine_check_pSeries:
+	HMT_MEDIUM
+	mtspr	SPRG1,r13		/* save r13 */
+	RUNLATCH_ON(r13)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+
+	. = 0x300
+	.globl data_access_pSeries
+data_access_pSeries:
+	HMT_MEDIUM
+	mtspr	SPRG1,r13
+BEGIN_FTR_SECTION
+	mtspr	SPRG2,r12
+	mfspr	r13,DAR
+	mfspr	r12,DSISR
+	srdi	r13,r13,60
+	rlwimi	r13,r12,16,0x20
+	mfcr	r12
+	cmpwi	r13,0x2c
+	beq	.do_stab_bolted_pSeries
+	mtcrf	0x80,r12
+	mfspr	r12,SPRG2
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common)
+
+	. = 0x380
+	.globl data_access_slb_pSeries
+data_access_slb_pSeries:
+	HMT_MEDIUM
+	mtspr	SPRG1,r13
+	RUNLATCH_ON(r13)
+	mfspr	r13,SPRG3		/* get paca address into r13 */
+	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
+	std	r10,PACA_EXSLB+EX_R10(r13)
+	std	r11,PACA_EXSLB+EX_R11(r13)
+	std	r12,PACA_EXSLB+EX_R12(r13)
+	std	r3,PACA_EXSLB+EX_R3(r13)
+	mfspr	r9,SPRG1
+	std	r9,PACA_EXSLB+EX_R13(r13)
+	mfcr	r9
+	mfspr	r12,SRR1		/* and SRR1 */
+	mfspr	r3,DAR
+	b	.do_slb_miss		/* Rel. branch works in real mode */
+
+	STD_EXCEPTION_PSERIES(0x400, instruction_access)
+
+	. = 0x480
+	.globl instruction_access_slb_pSeries
+instruction_access_slb_pSeries:
+	HMT_MEDIUM
+	mtspr	SPRG1,r13
+	RUNLATCH_ON(r13)
+	mfspr	r13,SPRG3		/* get paca address into r13 */
+	std	r9,PACA_EXSLB+EX_R9(r13)	/* save r9 - r12 */
+	std	r10,PACA_EXSLB+EX_R10(r13)
+	std	r11,PACA_EXSLB+EX_R11(r13)
+	std	r12,PACA_EXSLB+EX_R12(r13)
+	std	r3,PACA_EXSLB+EX_R3(r13)
+	mfspr	r9,SPRG1
+	std	r9,PACA_EXSLB+EX_R13(r13)
+	mfcr	r9
+	mfspr	r12,SRR1		/* and SRR1 */
+	mfspr	r3,SRR0			/* SRR0 is faulting address */
+	b	.do_slb_miss		/* Rel. branch works in real mode */
+
+	STD_EXCEPTION_PSERIES(0x500, hardware_interrupt)
+	STD_EXCEPTION_PSERIES(0x600, alignment)
+	STD_EXCEPTION_PSERIES(0x700, program_check)
+	STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
+	STD_EXCEPTION_PSERIES(0x900, decrementer)
+	STD_EXCEPTION_PSERIES(0xa00, trap_0a)
+	STD_EXCEPTION_PSERIES(0xb00, trap_0b)
+
+	. = 0xc00
+	.globl	system_call_pSeries
+system_call_pSeries:
+	HMT_MEDIUM
+	RUNLATCH_ON(r9)
+	mr	r9,r13
+	mfmsr	r10
+	mfspr	r13,SPRG3
+	mfspr	r11,SRR0
+	clrrdi	r12,r13,32
+	oris	r12,r12,system_call_common@h
+	ori	r12,r12,system_call_common@l
+	mtspr	SRR0,r12
+	ori	r10,r10,MSR_IR|MSR_DR|MSR_RI
+	mfspr	r12,SRR1
+	mtspr	SRR1,r10
+	rfid
+	b	.	/* prevent speculative execution */
+
+	STD_EXCEPTION_PSERIES(0xd00, single_step)
+	STD_EXCEPTION_PSERIES(0xe00, trap_0e)
+
+	/* We need to deal with the Altivec unavailable exception
+	 * here which is at 0xf20, thus in the middle of the
+	 * prolog code of the PerformanceMonitor one. A little
+	 * trickery is thus necessary
+	 */
+	. = 0xf00
+	b	performance_monitor_pSeries
+
+	STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable)
+
+	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
+	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
+
+	. = 0x3000
+
+/*** pSeries interrupt support ***/
+
+	/* moved from 0xf00 */
+	STD_EXCEPTION_PSERIES(., performance_monitor)
+
+	.align	7
+_GLOBAL(do_stab_bolted_pSeries)
+	mtcrf	0x80,r12
+	mfspr	r12,SPRG2
+	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
+
+/*
+ * Vectors for the FWNMI option.  Share common code.
+ */
+      .globl system_reset_fwnmi
+system_reset_fwnmi:
+      HMT_MEDIUM
+      mtspr   SPRG1,r13               /* save r13 */
+      RUNLATCH_ON(r13)
+      EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common)
+
+      .globl machine_check_fwnmi
+machine_check_fwnmi:
+      HMT_MEDIUM
+      mtspr   SPRG1,r13               /* save r13 */
+      RUNLATCH_ON(r13)
+      EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common)
+
+#ifdef CONFIG_PPC_ISERIES
+/***  ISeries-LPAR interrupt handlers ***/
+
+	STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC)
+
+	.globl data_access_iSeries
+data_access_iSeries:
+	mtspr	SPRG1,r13
+BEGIN_FTR_SECTION
+	mtspr	SPRG2,r12
+	mfspr	r13,DAR
+	mfspr	r12,DSISR
+	srdi	r13,r13,60
+	rlwimi	r13,r12,16,0x20
+	mfcr	r12
+	cmpwi	r13,0x2c
+	beq	.do_stab_bolted_iSeries
+	mtcrf	0x80,r12
+	mfspr	r12,SPRG2
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+	EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
+	EXCEPTION_PROLOG_ISERIES_2
+	b	data_access_common
+
+.do_stab_bolted_iSeries:
+	mtcrf	0x80,r12
+	mfspr	r12,SPRG2
+	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+	EXCEPTION_PROLOG_ISERIES_2
+	b	.do_stab_bolted
+
+	.globl	data_access_slb_iSeries
+data_access_slb_iSeries:
+	mtspr	SPRG1,r13		/* save r13 */
+	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+	std	r3,PACA_EXSLB+EX_R3(r13)
+	ld	r12,PACALPPACA+LPPACASRR1(r13)
+	mfspr	r3,DAR
+	b	.do_slb_miss
+
+	STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN)
+
+	.globl	instruction_access_slb_iSeries
+instruction_access_slb_iSeries:
+	mtspr	SPRG1,r13		/* save r13 */
+	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
+	std	r3,PACA_EXSLB+EX_R3(r13)
+	ld	r12,PACALPPACA+LPPACASRR1(r13)
+	ld	r3,PACALPPACA+LPPACASRR0(r13)
+	b	.do_slb_miss
+
+	MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt)
+	STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN)
+	STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN)
+	STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN)
+	MASKABLE_EXCEPTION_ISERIES(0x900, decrementer)
+	STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN)
+	STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN)
+
+	.globl	system_call_iSeries
+system_call_iSeries:
+	mr	r9,r13
+	mfspr	r13,SPRG3
+	EXCEPTION_PROLOG_ISERIES_2
+	b	system_call_common
+
+	STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN)
+	STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN)
+	STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN)
+
+	.globl system_reset_iSeries
+system_reset_iSeries:
+	mfspr	r13,SPRG3		/* Get paca address */
+	mfmsr	r24
+	ori	r24,r24,MSR_RI
+	mtmsrd	r24			/* RI on */
+	lhz	r24,PACAPACAINDEX(r13)	/* Get processor # */
+	cmpwi	0,r24,0			/* Are we processor 0? */
+	beq	.__start_initialization_iSeries	/* Start up the first processor */
+	mfspr	r4,SPRN_CTRLF
+	li	r5,CTRL_RUNLATCH	/* Turn off the run light */
+	andc	r4,r4,r5
+	mtspr	SPRN_CTRLT,r4
+
+1:
+	HMT_LOW
+#ifdef CONFIG_SMP
+	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor
+					 * should start */
+	sync
+	LOADADDR(r3,current_set)
+	sldi	r28,r24,3		/* get current_set[cpu#] */
+	ldx	r3,r3,r28
+	addi	r1,r3,THREAD_SIZE
+	subi	r1,r1,STACK_FRAME_OVERHEAD
+
+	cmpwi	0,r23,0
+	beq	iSeries_secondary_smp_loop	/* Loop until told to go */
+	bne	.__secondary_start		/* Loop until told to go */
+iSeries_secondary_smp_loop:
+	/* Let the Hypervisor know we are alive */
+	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
+	lis	r3,0x8002
+	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
+#else /* CONFIG_SMP */
+	/* Yield the processor.  This is required for non-SMP kernels
+		which are running on multi-threaded machines. */
+	lis	r3,0x8000
+	rldicr	r3,r3,32,15		/* r3 = (r3 << 32) & 0xffff000000000000 */
+	addi	r3,r3,18		/* r3 = 0x8000000000000012 which is "yield" */
+	li	r4,0			/* "yield timed" */
+	li	r5,-1			/* "yield forever" */
+#endif /* CONFIG_SMP */
+	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
+	sc				/* Invoke the hypervisor via a system call */
+	mfspr	r13,SPRG3		/* Put r13 back ???? */
+	b	1b			/* If SMP not configured, secondaries
+					 * loop forever */
+
+	.globl decrementer_iSeries_masked
+decrementer_iSeries_masked:
+	li	r11,1
+	stb	r11,PACALPPACA+LPPACADECRINT(r13)
+	lwz	r12,PACADEFAULTDECR(r13)
+	mtspr	SPRN_DEC,r12
+	/* fall through */
+
+	.globl hardware_interrupt_iSeries_masked
+hardware_interrupt_iSeries_masked:
+	mtcrf	0x80,r9		/* Restore regs */
+	ld	r11,PACALPPACA+LPPACASRR0(r13)
+	ld	r12,PACALPPACA+LPPACASRR1(r13)
+	mtspr	SRR0,r11
+	mtspr	SRR1,r12
+	ld	r9,PACA_EXGEN+EX_R9(r13)
+	ld	r10,PACA_EXGEN+EX_R10(r13)
+	ld	r11,PACA_EXGEN+EX_R11(r13)
+	ld	r12,PACA_EXGEN+EX_R12(r13)
+	ld	r13,PACA_EXGEN+EX_R13(r13)
+	rfid
+	b	.	/* prevent speculative execution */
+#endif /* CONFIG_PPC_ISERIES */
+
+/*** Common interrupt handlers ***/
+
+	STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
+
+	/*
+	 * Machine check is different because we use a different
+	 * save area: PACA_EXMC instead of PACA_EXGEN.
+	 */
+	.align	7
+	.globl machine_check_common
+machine_check_common:
+	EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
+	DISABLE_INTS
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.machine_check_exception
+	b	.ret_from_except
+
+	STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt)
+	STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception)
+	STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception)
+	STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception)
+	STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception)
+	STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception)
+	STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception)
+#ifdef CONFIG_ALTIVEC
+	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception)
+#else
+	STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception)
+#endif
+
+/*
+ * Here we have detected that the kernel stack pointer is bad.
+ * R9 contains the saved CR, r13 points to the paca,
+ * r10 contains the (bad) kernel stack pointer,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * We switch to using an emergency stack, save the registers there,
+ * and call kernel_bad_stack(), which panics.
+ */
+bad_stack:
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,64+INT_FRAME_SIZE
+	std	r9,_CCR(r1)
+	std	r10,GPR1(r1)
+	std	r11,_NIP(r1)
+	std	r12,_MSR(r1)
+	mfspr	r11,DAR
+	mfspr	r12,DSISR
+	std	r11,_DAR(r1)
+	std	r12,_DSISR(r1)
+	mflr	r10
+	mfctr	r11
+	mfxer	r12
+	std	r10,_LINK(r1)
+	std	r11,_CTR(r1)
+	std	r12,_XER(r1)
+	SAVE_GPR(0,r1)
+	SAVE_GPR(2,r1)
+	SAVE_4GPRS(3,r1)
+	SAVE_2GPRS(7,r1)
+	SAVE_10GPRS(12,r1)
+	SAVE_10GPRS(22,r1)
+	addi	r11,r1,INT_FRAME_SIZE
+	std	r11,0(r1)
+	li	r12,0
+	std	r12,0(r11)
+	ld	r2,PACATOC(r13)
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.kernel_bad_stack
+	b	1b
+
+/*
+ * Return from an exception with minimal checks.
+ * The caller is assumed to have done EXCEPTION_PROLOG_COMMON.
+ * If interrupts have been enabled, or anything has been
+ * done that might have changed the scheduling status of
+ * any task or sent any task a signal, you should use
+ * ret_from_except or ret_from_except_lite instead of this.
+ */
+fast_exception_return:
+	ld	r12,_MSR(r1)
+	ld	r11,_NIP(r1)
+	andi.	r3,r12,MSR_RI		/* check if RI is set */
+	beq-	unrecov_fer
+	ld	r3,_CCR(r1)
+	ld	r4,_LINK(r1)
+	ld	r5,_CTR(r1)
+	ld	r6,_XER(r1)
+	mtcr	r3
+	mtlr	r4
+	mtctr	r5
+	mtxer	r6
+	REST_GPR(0, r1)
+	REST_8GPRS(2, r1)
+
+	mfmsr	r10
+	clrrdi	r10,r10,2		/* clear RI (LE is 0 already) */
+	mtmsrd	r10,1
+
+	mtspr	SRR1,r12
+	mtspr	SRR0,r11
+	REST_4GPRS(10, r1)
+	ld	r1,GPR1(r1)
+	rfid
+	b	.	/* prevent speculative execution */
+
+unrecov_fer:
+	bl	.save_nvgprs
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.unrecoverable_exception
+	b	1b
+
+/*
+ * Here r13 points to the paca, r9 contains the saved CR,
+ * SRR0 and SRR1 are saved in r11 and r12,
+ * r9 - r13 are saved in paca->exgen.
+ */
+	.align	7
+	.globl data_access_common
+data_access_common:
+	RUNLATCH_ON(r10)		/* It wont fit in the 0x300 handler */
+	mfspr	r10,DAR
+	std	r10,PACA_EXGEN+EX_DAR(r13)
+	mfspr	r10,DSISR
+	stw	r10,PACA_EXGEN+EX_DSISR(r13)
+	EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
+	ld	r3,PACA_EXGEN+EX_DAR(r13)
+	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
+	li	r5,0x300
+	b	.do_hash_page	 	/* Try to handle as hpte fault */
+
+	.align	7
+	.globl instruction_access_common
+instruction_access_common:
+	EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
+	ld	r3,_NIP(r1)
+	andis.	r4,r12,0x5820
+	li	r5,0x400
+	b	.do_hash_page		/* Try to handle as hpte fault */
+
+	.align	7
+	.globl hardware_interrupt_common
+	.globl hardware_interrupt_entry
+hardware_interrupt_common:
+	EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN)
+hardware_interrupt_entry:
+	DISABLE_INTS
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.do_IRQ
+	b	.ret_from_except_lite
+
+	.align	7
+	.globl alignment_common
+alignment_common:
+	mfspr	r10,DAR
+	std	r10,PACA_EXGEN+EX_DAR(r13)
+	mfspr	r10,DSISR
+	stw	r10,PACA_EXGEN+EX_DSISR(r13)
+	EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
+	ld	r3,PACA_EXGEN+EX_DAR(r13)
+	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
+	std	r3,_DAR(r1)
+	std	r4,_DSISR(r1)
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	ENABLE_INTS
+	bl	.alignment_exception
+	b	.ret_from_except
+
+	.align	7
+	.globl program_check_common
+program_check_common:
+	EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	ENABLE_INTS
+	bl	.program_check_exception
+	b	.ret_from_except
+
+	.align	7
+	.globl fp_unavailable_common
+fp_unavailable_common:
+	EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
+	bne	.load_up_fpu		/* if from user, just load it up */
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	ENABLE_INTS
+	bl	.kernel_fp_unavailable_exception
+	BUG_OPCODE
+
+/*
+ * load_up_fpu(unused, unused, tsk)
+ * Disable FP for the task which had the FPU previously,
+ * and save its floating-point registers in its thread_struct.
+ * Enables the FPU for use in the kernel on return.
+ * On SMP we know the fpu is free, since we give it up every
+ * switch (ie, no lazy save of the FP registers).
+ * On entry: r13 == 'current' && last_task_used_math != 'current'
+ */
+_STATIC(load_up_fpu)
+	mfmsr	r5			/* grab the current MSR */
+	ori	r5,r5,MSR_FP
+	mtmsrd	r5			/* enable use of fpu now */
+	isync
+/*
+ * For SMP, we don't do lazy FPU switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_fpu in switch_to.
+ *
+ */
+#ifndef CONFIG_SMP
+	ld	r3,last_task_used_math@got(r2)
+	ld	r4,0(r3)
+	cmpdi	0,r4,0
+	beq	1f
+	/* Save FP state to last_task_used_math's THREAD struct */
+	addi	r4,r4,THREAD
+	SAVE_32FPRS(0, r4)
+	mffs	fr0
+	stfd	fr0,THREAD_FPSCR(r4)
+	/* Disable FP for last_task_used_math */
+	ld	r5,PT_REGS(r4)
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	li	r6,MSR_FP|MSR_FE0|MSR_FE1
+	andc	r4,r4,r6
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* enable use of FP after return */
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	ld	r4,THREAD_FPEXC_MODE(r5)
+	ori	r12,r12,MSR_FP
+	or	r12,r12,r4
+	std	r12,_MSR(r1)
+	lfd	fr0,THREAD_FPSCR(r5)
+	mtfsf	0xff,fr0
+	REST_32FPRS(0, r5)
+#ifndef CONFIG_SMP
+	/* Update last_task_used_math to 'current' */
+	subi	r4,r5,THREAD		/* Back to 'current' */
+	std	r4,0(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	b	fast_exception_return
+
+	.align	7
+	.globl altivec_unavailable_common
+altivec_unavailable_common:
+	EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+	bne	.load_up_altivec	/* if from user, just load it up */
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	ENABLE_INTS
+	bl	.altivec_unavailable_exception
+	b	.ret_from_except
+
+#ifdef CONFIG_ALTIVEC
+/*
+ * load_up_altivec(unused, unused, tsk)
+ * Disable VMX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Enables the VMX for use in the kernel on return.
+ * On SMP we know the VMX is free, since we give it up every
+ * switch (ie, no lazy save of the vector registers).
+ * On entry: r13 == 'current' && last_task_used_altivec != 'current'
+ */
+_STATIC(load_up_altivec)
+	mfmsr	r5			/* grab the current MSR */
+	oris	r5,r5,MSR_VEC@h
+	mtmsrd	r5			/* enable use of VMX now */
+	isync
+
+/*
+ * For SMP, we don't do lazy VMX switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_altvec in switch_to.
+ * VRSAVE isn't dealt with here, that is done in the normal context
+ * switch code. Note that we could rely on vrsave value to eventually
+ * avoid saving all of the VREGs here...
+ */
+#ifndef CONFIG_SMP
+	ld	r3,last_task_used_altivec@got(r2)
+	ld	r4,0(r3)
+	cmpdi	0,r4,0
+	beq	1f
+	/* Save VMX state to last_task_used_altivec's THREAD struct */
+	addi	r4,r4,THREAD
+	SAVE_32VRS(0,r5,r4)
+	mfvscr	vr0
+	li	r10,THREAD_VSCR
+	stvx	vr0,r10,r4
+	/* Disable VMX for last_task_used_altivec */
+	ld	r5,PT_REGS(r4)
+	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r6,MSR_VEC@h
+	andc	r4,r4,r6
+	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* Hack: if we get an altivec unavailable trap with VRSAVE
+	 * set to all zeros, we assume this is a broken application
+	 * that fails to set it properly, and thus we switch it to
+	 * all 1's
+	 */
+	mfspr	r4,SPRN_VRSAVE
+	cmpdi	0,r4,0
+	bne+	1f
+	li	r4,-1
+	mtspr	SPRN_VRSAVE,r4
+1:
+	/* enable use of VMX after return */
+	ld	r4,PACACURRENT(r13)
+	addi	r5,r4,THREAD		/* Get THREAD */
+	oris	r12,r12,MSR_VEC@h
+	std	r12,_MSR(r1)
+	li	r4,1
+	li	r10,THREAD_VSCR
+	stw	r4,THREAD_USED_VR(r5)
+	lvx	vr0,r10,r5
+	mtvscr	vr0
+	REST_32VRS(0,r4,r5)
+#ifndef CONFIG_SMP
+	/* Update last_task_used_math to 'current' */
+	subi	r4,r5,THREAD		/* Back to 'current' */
+	std	r4,0(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+	b	fast_exception_return
+#endif /* CONFIG_ALTIVEC */
+
+/*
+ * Hash table stuff
+ */
+	.align	7
+_GLOBAL(do_hash_page)
+	std	r3,_DAR(r1)
+	std	r4,_DSISR(r1)
+
+	andis.	r0,r4,0xa450		/* weird error? */
+	bne-	.handle_page_fault	/* if not, try to insert a HPTE */
+BEGIN_FTR_SECTION
+	andis.	r0,r4,0x0020		/* Is it a segment table fault? */
+	bne-	.do_ste_alloc		/* If so handle it */
+END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
+
+	/*
+	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
+	 * accessing a userspace segment (even from the kernel). We assume
+	 * kernel addresses always have the high bit set.
+	 */
+	rlwinm	r4,r4,32-25+9,31-9,31-9	/* DSISR_STORE -> _PAGE_RW */
+	rotldi	r0,r3,15		/* Move high bit into MSR_PR posn */
+	orc	r0,r12,r0		/* MSR_PR | ~high_bit */
+	rlwimi	r4,r0,32-13,30,30	/* becomes _PAGE_USER access bit */
+	ori	r4,r4,1			/* add _PAGE_PRESENT */
+	rlwimi	r4,r5,22+2,31-2,31-2	/* Set _PAGE_EXEC if trap is 0x400 */
+
+	/*
+	 * On iSeries, we soft-disable interrupts here, then
+	 * hard-enable interrupts so that the hash_page code can spin on
+	 * the hash_table_lock without problems on a shared processor.
+	 */
+	DISABLE_INTS
+
+	/*
+	 * r3 contains the faulting address
+	 * r4 contains the required access permissions
+	 * r5 contains the trap number
+	 *
+	 * at return r3 = 0 for success
+	 */
+	bl	.hash_page		/* build HPTE if possible */
+	cmpdi	r3,0			/* see if hash_page succeeded */
+
+#ifdef DO_SOFT_DISABLE
+	/*
+	 * If we had interrupts soft-enabled at the point where the
+	 * DSI/ISI occurred, and an interrupt came in during hash_page,
+	 * handle it now.
+	 * We jump to ret_from_except_lite rather than fast_exception_return
+	 * because ret_from_except_lite will check for and handle pending
+	 * interrupts if necessary.
+	 */
+	beq	.ret_from_except_lite
+	/* For a hash failure, we don't bother re-enabling interrupts */
+	ble-	12f
+
+	/*
+	 * hash_page couldn't handle it, set soft interrupt enable back
+	 * to what it was before the trap.  Note that .local_irq_restore
+	 * handles any interrupts pending at this point.
+	 */
+	ld	r3,SOFTE(r1)
+	bl	.local_irq_restore
+	b	11f
+#else
+	beq	fast_exception_return   /* Return from exception on success */
+	ble-	12f			/* Failure return from hash_page */
+
+	/* fall through */
+#endif
+
+/* Here we have a page fault that hash_page can't handle. */
+_GLOBAL(handle_page_fault)
+	ENABLE_INTS
+11:	ld	r4,_DAR(r1)
+	ld	r5,_DSISR(r1)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.do_page_fault
+	cmpdi	r3,0
+	beq+	.ret_from_except_lite
+	bl	.save_nvgprs
+	mr	r5,r3
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	lwz	r4,_DAR(r1)
+	bl	.bad_page_fault
+	b	.ret_from_except
+
+/* We have a page fault that hash_page could handle but HV refused
+ * the PTE insertion
+ */
+12:	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	lwz	r4,_DAR(r1)
+	bl	.low_hash_fault
+	b	.ret_from_except
+
+	/* here we have a segment miss */
+_GLOBAL(do_ste_alloc)
+	bl	.ste_allocate		/* try to insert stab entry */
+	cmpdi	r3,0
+	beq+	fast_exception_return
+	b	.handle_page_fault
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * r9 - r13 are saved in paca->exslb.
+ * We assume we aren't going to take any exceptions during this procedure.
+ * We assume (DAR >> 60) == 0xc.
+ */
+	.align	7
+_GLOBAL(do_stab_bolted)
+	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
+	std	r11,PACA_EXSLB+EX_SRR0(r13)	/* save SRR0 in exc. frame */
+
+	/* Hash to the primary group */
+	ld	r10,PACASTABVIRT(r13)
+	mfspr	r11,DAR
+	srdi	r11,r11,28
+	rldimi	r10,r11,7,52	/* r10 = first ste of the group */
+
+	/* Calculate VSID */
+	/* This is a kernel address, so protovsid = ESID */
+	ASM_VSID_SCRAMBLE(r11, r9)
+	rldic	r9,r11,12,16	/* r9 = vsid << 12 */
+
+	/* Search the primary group for a free entry */
+1:	ld	r11,0(r10)	/* Test valid bit of the current ste	*/
+	andi.	r11,r11,0x80
+	beq	2f
+	addi	r10,r10,16
+	andi.	r11,r10,0x70
+	bne	1b
+
+	/* Stick for only searching the primary group for now.		*/
+	/* At least for now, we use a very simple random castout scheme */
+	/* Use the TB as a random number ;  OR in 1 to avoid entry 0	*/
+	mftb	r11
+	rldic	r11,r11,4,57	/* r11 = (r11 << 4) & 0x70 */
+	ori	r11,r11,0x10
+
+	/* r10 currently points to an ste one past the group of interest */
+	/* make it point to the randomly selected entry			*/
+	subi	r10,r10,128
+	or 	r10,r10,r11	/* r10 is the entry to invalidate	*/
+
+	isync			/* mark the entry invalid		*/
+	ld	r11,0(r10)
+	rldicl	r11,r11,56,1	/* clear the valid bit */
+	rotldi	r11,r11,8
+	std	r11,0(r10)
+	sync
+
+	clrrdi	r11,r11,28	/* Get the esid part of the ste		*/
+	slbie	r11
+
+2:	std	r9,8(r10)	/* Store the vsid part of the ste	*/
+	eieio
+
+	mfspr	r11,DAR		/* Get the new esid			*/
+	clrrdi	r11,r11,28	/* Permits a full 32b of ESID		*/
+	ori	r11,r11,0x90	/* Turn on valid and kp			*/
+	std	r11,0(r10)	/* Put new entry back into the stab	*/
+
+	sync
+
+	/* All done -- return from exception. */
+	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
+	ld	r11,PACA_EXSLB+EX_SRR0(r13)	/* get saved SRR0 */
+
+	andi.	r10,r12,MSR_RI
+	beq-	unrecov_slb
+
+	mtcrf	0x80,r9			/* restore CR */
+
+	mfmsr	r10
+	clrrdi	r10,r10,2
+	mtmsrd	r10,1
+
+	mtspr	SRR0,r11
+	mtspr	SRR1,r12
+	ld	r9,PACA_EXSLB+EX_R9(r13)
+	ld	r10,PACA_EXSLB+EX_R10(r13)
+	ld	r11,PACA_EXSLB+EX_R11(r13)
+	ld	r12,PACA_EXSLB+EX_R12(r13)
+	ld	r13,PACA_EXSLB+EX_R13(r13)
+	rfid
+	b	.	/* prevent speculative execution */
+
+/*
+ * r13 points to the PACA, r9 contains the saved CR,
+ * r11 and r12 contain the saved SRR0 and SRR1.
+ * r3 has the faulting address
+ * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
+ * We assume we aren't going to take any exceptions during this procedure.
+ */
+_GLOBAL(do_slb_miss)
+	mflr	r10
+
+	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
+	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */
+
+	bl	.slb_allocate			/* handle it */
+
+	/* All done -- return from exception. */
+
+	ld	r10,PACA_EXSLB+EX_LR(r13)
+	ld	r3,PACA_EXSLB+EX_R3(r13)
+	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
+#ifdef CONFIG_PPC_ISERIES
+	ld	r11,PACALPPACA+LPPACASRR0(r13)	/* get SRR0 value */
+#endif /* CONFIG_PPC_ISERIES */
+
+	mtlr	r10
+
+	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
+	beq-	unrecov_slb
+
+.machine	push
+.machine	"power4"
+	mtcrf	0x80,r9
+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
+.machine	pop
+
+#ifdef CONFIG_PPC_ISERIES
+	mtspr	SRR0,r11
+	mtspr	SRR1,r12
+#endif /* CONFIG_PPC_ISERIES */
+	ld	r9,PACA_EXSLB+EX_R9(r13)
+	ld	r10,PACA_EXSLB+EX_R10(r13)
+	ld	r11,PACA_EXSLB+EX_R11(r13)
+	ld	r12,PACA_EXSLB+EX_R12(r13)
+	ld	r13,PACA_EXSLB+EX_R13(r13)
+	rfid
+	b	.	/* prevent speculative execution */
+
+unrecov_slb:
+	EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
+	DISABLE_INTS
+	bl	.save_nvgprs
+1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.unrecoverable_exception
+	b	1b
+
+/*
+ * Space for CPU0's segment table.
+ *
+ * On iSeries, the hypervisor must fill in at least one entry before
+ * we get control (with relocate on).  The address is give to the hv
+ * as a page number (see xLparMap in LparData.c), so this must be at a
+ * fixed address (the linker can't compute (u64)&initial_stab >>
+ * PAGE_SHIFT).
+ */
+	. = STAB0_PHYS_ADDR	/* 0x6000 */
+	.globl initial_stab
+initial_stab:
+	.space	4096
+
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ */
+	.= 0x7000
+	.globl fwnmi_data_area
+fwnmi_data_area:
+
+	/* iSeries does not use the FWNMI stuff, so it is safe to put
+	 * this here, even if we later allow kernels that will boot on
+	 * both pSeries and iSeries */
+#ifdef CONFIG_PPC_ISERIES
+        . = LPARMAP_PHYS
+#include "lparmap.s"
+/*
+ * This ".text" is here for old compilers that generate a trailing
+ * .note section when compiling .c files to .s
+ */
+	.text
+#endif /* CONFIG_PPC_ISERIES */
+
+        . = 0x8000
+
+/*
+ * On pSeries, secondary processors spin in the following code.
+ * At entry, r3 = this processor's number (physical cpu id)
+ */
+_GLOBAL(pSeries_secondary_smp_init)
+	mr	r24,r3
+	
+	/* turn on 64-bit mode */
+	bl	.enable_64b_mode
+	isync
+
+	/* Copy some CPU settings from CPU 0 */
+	bl	.__restore_cpu_setup
+
+	/* Set up a paca value for this processor. Since we have the
+	 * physical cpu id in r24, we need to search the pacas to find
+	 * which logical id maps to our physical one.
+	 */
+	LOADADDR(r13, paca) 		/* Get base vaddr of paca array	 */
+	li	r5,0			/* logical cpu id                */
+1:	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
+	cmpw	r6,r24			/* Compare to our id             */
+	beq	2f
+	addi	r13,r13,PACA_SIZE	/* Loop to next PACA on miss     */
+	addi	r5,r5,1
+	cmpwi	r5,NR_CPUS
+	blt	1b
+
+	mr	r3,r24			/* not found, copy phys to r3	 */
+	b	.kexec_wait		/* next kernel might do better	 */
+
+2:	mtspr	SPRG3,r13		/* Save vaddr of paca in SPRG3	 */
+	/* From now on, r24 is expected to be logical cpuid */
+	mr	r24,r5
+3:	HMT_LOW
+	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor should */
+					/* start.			 */
+	sync
+
+	/* Create a temp kernel stack for use before relocation is on.	*/
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,STACK_FRAME_OVERHEAD
+
+	cmpwi	0,r23,0
+#ifdef CONFIG_SMP
+	bne	.__secondary_start
+#endif
+	b 	3b			/* Loop until told to go	 */
+
+#ifdef CONFIG_PPC_ISERIES
+_STATIC(__start_initialization_iSeries)
+	/* Clear out the BSS */
+	LOADADDR(r11,__bss_stop)
+	LOADADDR(r8,__bss_start)
+	sub	r11,r11,r8		/* bss size			*/
+	addi	r11,r11,7		/* round up to an even double word */
+	rldicl. r11,r11,61,3		/* shift right by 3		*/
+	beq	4f
+	addi	r8,r8,-8
+	li	r0,0
+	mtctr	r11			/* zero this many doublewords	*/
+3:	stdu	r0,8(r8)
+	bdnz	3b
+4:
+	LOADADDR(r1,init_thread_union)
+	addi	r1,r1,THREAD_SIZE
+	li	r0,0
+	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
+
+	LOADADDR(r3,cpu_specs)
+	LOADADDR(r4,cur_cpu_spec)
+	li	r5,0
+	bl	.identify_cpu
+
+	LOADADDR(r2,__toc_start)
+	addi	r2,r2,0x4000
+	addi	r2,r2,0x4000
+
+	bl	.iSeries_early_setup
+
+	/* relocation is on at this point */
+
+	b	.start_here_common
+#endif /* CONFIG_PPC_ISERIES */
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+
+_STATIC(__mmu_off)
+	mfmsr	r3
+	andi.	r0,r3,MSR_IR|MSR_DR
+	beqlr
+	andc	r3,r3,r0
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	sync
+	rfid
+	b	.	/* prevent speculative execution */
+
+
+/*
+ * Here is our main kernel entry point. We support currently 2 kind of entries
+ * depending on the value of r5.
+ *
+ *   r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content
+ *                 in r3...r7
+ *   
+ *   r5 == NULL -> kexec style entry. r3 is a physical pointer to the
+ *                 DT block, r4 is a physical pointer to the kernel itself
+ *
+ */
+_GLOBAL(__start_initialization_multiplatform)
+	/*
+	 * Are we booted from a PROM Of-type client-interface ?
+	 */
+	cmpldi	cr0,r5,0
+	bne	.__boot_from_prom		/* yes -> prom */
+
+	/* Save parameters */
+	mr	r31,r3
+	mr	r30,r4
+
+	/* Make sure we are running in 64 bits mode */
+	bl	.enable_64b_mode
+
+	/* Setup some critical 970 SPRs before switching MMU off */
+	bl	.__970_cpu_preinit
+
+	/* cpu # */
+	li	r24,0
+
+	/* Switch off MMU if not already */
+	LOADADDR(r4, .__after_prom_start - KERNELBASE)
+	add	r4,r4,r30
+	bl	.__mmu_off
+	b	.__after_prom_start
+
+_STATIC(__boot_from_prom)
+	/* Save parameters */
+	mr	r31,r3
+	mr	r30,r4
+	mr	r29,r5
+	mr	r28,r6
+	mr	r27,r7
+
+	/* Make sure we are running in 64 bits mode */
+	bl	.enable_64b_mode
+
+	/* put a relocation offset into r3 */
+	bl	.reloc_offset
+
+	LOADADDR(r2,__toc_start)
+	addi	r2,r2,0x4000
+	addi	r2,r2,0x4000
+
+	/* Relocate the TOC from a virt addr to a real addr */
+	sub	r2,r2,r3
+
+	/* Restore parameters */
+	mr	r3,r31
+	mr	r4,r30
+	mr	r5,r29
+	mr	r6,r28
+	mr	r7,r27
+
+	/* Do all of the interaction with OF client interface */
+	bl	.prom_init
+	/* We never return */
+	trap
+
+/*
+ * At this point, r3 contains the physical address we are running at,
+ * returned by prom_init()
+ */
+_STATIC(__after_prom_start)
+
+/*
+ * We need to run with __start at physical address 0.
+ * This will leave some code in the first 256B of
+ * real memory, which are reserved for software use.
+ * The remainder of the first page is loaded with the fixed
+ * interrupt vectors.  The next two pages are filled with
+ * unknown exception placeholders.
+ *
+ * Note: This process overwrites the OF exception vectors.
+ *	r26 == relocation offset
+ *	r27 == KERNELBASE
+ */
+	bl	.reloc_offset
+	mr	r26,r3
+	SET_REG_TO_CONST(r27,KERNELBASE)
+
+	li	r3,0			/* target addr */
+
+	// XXX FIXME: Use phys returned by OF (r30)
+	sub	r4,r27,r26 		/* source addr			 */
+					/* current address of _start	 */
+					/*   i.e. where we are running	 */
+					/*	the source addr		 */
+
+	LOADADDR(r5,copy_to_here)	/* # bytes of memory to copy	 */
+	sub	r5,r5,r27
+
+	li	r6,0x100		/* Start offset, the first 0x100 */
+					/* bytes were copied earlier.	 */
+
+	bl	.copy_and_flush		/* copy the first n bytes	 */
+					/* this includes the code being	 */
+					/* executed here.		 */
+
+	LOADADDR(r0, 4f)		/* Jump to the copy of this code */
+	mtctr	r0			/* that we just made/relocated	 */
+	bctr
+
+4:	LOADADDR(r5,klimit)
+	sub	r5,r5,r26
+	ld	r5,0(r5)		/* get the value of klimit */
+	sub	r5,r5,r27
+	bl	.copy_and_flush		/* copy the rest */
+	b	.start_here_multiplatform
+
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+
+/*
+ * Copy routine used to copy the kernel to start at physical address 0
+ * and flush and invalidate the caches as needed.
+ * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
+ * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
+ *
+ * Note: this routine *only* clobbers r0, r6 and lr
+ */
+_GLOBAL(copy_and_flush)
+	addi	r5,r5,-8
+	addi	r6,r6,-8
+4:	li	r0,16			/* Use the least common		*/
+					/* denominator cache line	*/
+					/* size.  This results in	*/
+					/* extra cache line flushes	*/
+					/* but operation is correct.	*/
+					/* Can't get cache line size	*/
+					/* from NACA as it is being	*/
+					/* moved too.			*/
+
+	mtctr	r0			/* put # words/line in ctr	*/
+3:	addi	r6,r6,8			/* copy a cache line		*/
+	ldx	r0,r6,r4
+	stdx	r0,r6,r3
+	bdnz	3b
+	dcbst	r6,r3			/* write it to memory		*/
+	sync
+	icbi	r6,r3			/* flush the icache line	*/
+	cmpld	0,r6,r5
+	blt	4b
+	sync
+	addi	r5,r5,8
+	addi	r6,r6,8
+	blr
+
+.align 8
+copy_to_here:
+
+#ifdef CONFIG_SMP
+#ifdef CONFIG_PPC_PMAC
+/*
+ * On PowerMac, secondary processors starts from the reset vector, which
+ * is temporarily turned into a call to one of the functions below.
+ */
+	.section ".text";
+	.align 2 ;
+
+	.globl	pmac_secondary_start_1	
+pmac_secondary_start_1:	
+	li	r24, 1
+	b	.pmac_secondary_start
+	
+	.globl pmac_secondary_start_2
+pmac_secondary_start_2:	
+	li	r24, 2
+	b	.pmac_secondary_start
+	
+	.globl pmac_secondary_start_3
+pmac_secondary_start_3:
+	li	r24, 3
+	b	.pmac_secondary_start
+	
+_GLOBAL(pmac_secondary_start)
+	/* turn on 64-bit mode */
+	bl	.enable_64b_mode
+	isync
+
+	/* Copy some CPU settings from CPU 0 */
+	bl	.__restore_cpu_setup
+
+	/* pSeries do that early though I don't think we really need it */
+	mfmsr	r3
+	ori	r3,r3,MSR_RI
+	mtmsrd	r3			/* RI on */
+
+	/* Set up a paca value for this processor. */
+	LOADADDR(r4, paca) 		 /* Get base vaddr of paca array	*/
+	mulli	r13,r24,PACA_SIZE	 /* Calculate vaddr of right paca */
+	add	r13,r13,r4		/* for this processor.		*/
+	mtspr	SPRG3,r13		 /* Save vaddr of paca in SPRG3	*/
+
+	/* Create a temp kernel stack for use before relocation is on.	*/
+	ld	r1,PACAEMERGSP(r13)
+	subi	r1,r1,STACK_FRAME_OVERHEAD
+
+	b	.__secondary_start
+
+#endif /* CONFIG_PPC_PMAC */
+
+/*
+ * This function is called after the master CPU has released the
+ * secondary processors.  The execution environment is relocation off.
+ * The paca for this processor has the following fields initialized at
+ * this point:
+ *   1. Processor number
+ *   2. Segment table pointer (virtual address)
+ * On entry the following are set:
+ *   r1	= stack pointer.  vaddr for iSeries, raddr (temp stack) for pSeries
+ *   r24   = cpu# (in Linux terms)
+ *   r13   = paca virtual address
+ *   SPRG3 = paca virtual address
+ */
+_GLOBAL(__secondary_start)
+
+	HMT_MEDIUM			/* Set thread priority to MEDIUM */
+
+	ld	r2,PACATOC(r13)
+	li	r6,0
+	stb	r6,PACAPROCENABLED(r13)
+
+#ifndef CONFIG_PPC_ISERIES
+	/* Initialize the page table pointer register. */
+	LOADADDR(r6,_SDR1)
+	ld	r6,0(r6)		/* get the value of _SDR1	 */
+	mtspr	SDR1,r6			/* set the htab location	 */
+#endif
+	/* Initialize the first segment table (or SLB) entry		 */
+	ld	r3,PACASTABVIRT(r13)	/* get addr of segment table	 */
+	bl	.stab_initialize
+
+	/* Initialize the kernel stack.  Just a repeat for iSeries.	 */
+	LOADADDR(r3,current_set)
+	sldi	r28,r24,3		/* get current_set[cpu#]	 */
+	ldx	r1,r3,r28
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	std	r1,PACAKSAVE(r13)
+
+	ld	r3,PACASTABREAL(r13)	/* get raddr of segment table	 */
+	ori	r4,r3,1			/* turn on valid bit		 */
+
+#ifdef CONFIG_PPC_ISERIES
+	li	r0,-1			/* hypervisor call */
+	li	r3,1
+	sldi	r3,r3,63		/* 0x8000000000000000 */
+	ori	r3,r3,4			/* 0x8000000000000004 */
+	sc				/* HvCall_setASR */
+#else
+	/* set the ASR */
+	ld	r3,systemcfg@got(r2)	/* r3 = ptr to systemcfg	 */
+	ld	r3,0(r3)
+	lwz	r3,PLATFORM(r3)		/* r3 = platform flags		 */
+	andi.	r3,r3,PLATFORM_LPAR	/* Test if bit 0 is set (LPAR bit) */
+	beq	98f			/* branch if result is 0  */
+	mfspr	r3,PVR
+	srwi	r3,r3,16
+	cmpwi	r3,0x37			/* SStar  */
+	beq	97f
+	cmpwi	r3,0x36			/* IStar  */
+	beq	97f
+	cmpwi	r3,0x34			/* Pulsar */
+	bne	98f
+97:	li	r3,H_SET_ASR		/* hcall = H_SET_ASR */
+	HVSC				/* Invoking hcall */
+	b	99f
+98:					/* !(rpa hypervisor) || !(star)  */
+	mtasr	r4			/* set the stab location	 */
+99:
+#endif
+	li	r7,0
+	mtlr	r7
+
+	/* enable MMU and jump to start_secondary */
+	LOADADDR(r3,.start_secondary_prolog)
+	SET_REG_TO_CONST(r4, MSR_KERNEL)
+#ifdef DO_SOFT_DISABLE
+	ori	r4,r4,MSR_EE
+#endif
+	mtspr	SRR0,r3
+	mtspr	SRR1,r4
+	rfid
+	b	.	/* prevent speculative execution */
+
+/* 
+ * Running with relocation on at this point.  All we want to do is
+ * zero the stack back-chain pointer before going into C code.
+ */
+_GLOBAL(start_secondary_prolog)
+	li	r3,0
+	std	r3,0(r1)		/* Zero the stack frame pointer	*/
+	bl	.start_secondary
+#endif
+
+/*
+ * This subroutine clobbers r11 and r12
+ */
+_GLOBAL(enable_64b_mode)
+	mfmsr	r11			/* grab the current MSR */
+	li	r12,1
+	rldicr	r12,r12,MSR_SF_LG,(63-MSR_SF_LG)
+	or	r11,r11,r12
+	li	r12,1
+	rldicr	r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG)
+	or	r11,r11,r12
+	mtmsrd	r11
+	isync
+	blr
+
+#ifdef CONFIG_PPC_MULTIPLATFORM
+/*
+ * This is where the main kernel code starts.
+ */
+_STATIC(start_here_multiplatform)
+	/* get a new offset, now that the kernel has moved. */
+	bl	.reloc_offset
+	mr	r26,r3
+
+	/* Clear out the BSS. It may have been done in prom_init,
+	 * already but that's irrelevant since prom_init will soon
+	 * be detached from the kernel completely. Besides, we need
+	 * to clear it now for kexec-style entry.
+	 */
+	LOADADDR(r11,__bss_stop)
+	LOADADDR(r8,__bss_start)
+	sub	r11,r11,r8		/* bss size			*/
+	addi	r11,r11,7		/* round up to an even double word */
+	rldicl. r11,r11,61,3		/* shift right by 3		*/
+	beq	4f
+	addi	r8,r8,-8
+	li	r0,0
+	mtctr	r11			/* zero this many doublewords	*/
+3:	stdu	r0,8(r8)
+	bdnz	3b
+4:
+
+	mfmsr	r6
+	ori	r6,r6,MSR_RI
+	mtmsrd	r6			/* RI on */
+
+#ifdef CONFIG_HMT
+	/* Start up the second thread on cpu 0 */
+	mfspr	r3,PVR
+	srwi	r3,r3,16
+	cmpwi	r3,0x34			/* Pulsar  */
+	beq	90f
+	cmpwi	r3,0x36			/* Icestar */
+	beq	90f
+	cmpwi	r3,0x37			/* SStar   */
+	beq	90f
+	b	91f			/* HMT not supported */
+90:	li	r3,0
+	bl	.hmt_start_secondary
+91:
+#endif
+
+	/* The following gets the stack and TOC set up with the regs */
+	/* pointing to the real addr of the kernel stack.  This is   */
+	/* all done to support the C function call below which sets  */
+	/* up the htab.  This is done because we have relocated the  */
+	/* kernel but are still running in real mode. */
+
+	LOADADDR(r3,init_thread_union)
+	sub	r3,r3,r26
+
+	/* set up a stack pointer (physical address) */
+	addi	r1,r3,THREAD_SIZE
+	li	r0,0
+	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
+
+	/* set up the TOC (physical address) */
+	LOADADDR(r2,__toc_start)
+	addi	r2,r2,0x4000
+	addi	r2,r2,0x4000
+	sub	r2,r2,r26
+
+	LOADADDR(r3,cpu_specs)
+	sub	r3,r3,r26
+	LOADADDR(r4,cur_cpu_spec)
+	sub	r4,r4,r26
+	mr	r5,r26
+	bl	.identify_cpu
+
+	/* Save some low level config HIDs of CPU0 to be copied to
+	 * other CPUs later on, or used for suspend/resume
+	 */
+	bl	.__save_cpu_setup
+	sync
+
+	/* Setup a valid physical PACA pointer in SPRG3 for early_setup
+	 * note that boot_cpuid can always be 0 nowadays since there is
+	 * nowhere it can be initialized differently before we reach this
+	 * code
+	 */
+	LOADADDR(r27, boot_cpuid)
+	sub	r27,r27,r26
+	lwz	r27,0(r27)
+
+	LOADADDR(r24, paca) 		/* Get base vaddr of paca array	 */
+	mulli	r13,r27,PACA_SIZE	/* Calculate vaddr of right paca */
+	add	r13,r13,r24		/* for this processor.		 */
+	sub	r13,r13,r26		/* convert to physical addr	 */
+	mtspr	SPRG3,r13		/* PPPBBB: Temp... -Peter */
+	
+	/* Do very early kernel initializations, including initial hash table,
+	 * stab and slb setup before we turn on relocation.	*/
+
+	/* Restore parameters passed from prom_init/kexec */
+	mr	r3,r31
+ 	bl	.early_setup
+
+	/* set the ASR */
+	ld	r3,PACASTABREAL(r13)
+	ori	r4,r3,1			/* turn on valid bit		 */
+	ld	r3,systemcfg@got(r2)	/* r3 = ptr to systemcfg */
+	ld	r3,0(r3)
+	lwz	r3,PLATFORM(r3)		/* r3 = platform flags */
+	andi.	r3,r3,PLATFORM_LPAR	/* Test if bit 0 is set (LPAR bit) */
+	beq	98f			/* branch if result is 0  */
+	mfspr	r3,PVR
+	srwi	r3,r3,16
+	cmpwi	r3,0x37			/* SStar */
+	beq	97f
+	cmpwi	r3,0x36			/* IStar  */
+	beq	97f
+	cmpwi	r3,0x34			/* Pulsar */
+	bne	98f
+97:	li	r3,H_SET_ASR		/* hcall = H_SET_ASR */
+	HVSC				/* Invoking hcall */
+	b	99f
+98:					/* !(rpa hypervisor) || !(star) */
+	mtasr	r4			/* set the stab location	*/
+99:
+	/* Set SDR1 (hash table pointer) */
+	ld	r3,systemcfg@got(r2)	/* r3 = ptr to systemcfg */
+	ld	r3,0(r3)
+	lwz	r3,PLATFORM(r3)		/* r3 = platform flags */
+	/* Test if bit 0 is set (LPAR bit) */
+	andi.	r3,r3,PLATFORM_LPAR
+	bne	98f			/* branch if result is !0  */
+	LOADADDR(r6,_SDR1)		/* Only if NOT LPAR */
+	sub	r6,r6,r26
+	ld	r6,0(r6)		/* get the value of _SDR1 */
+	mtspr	SDR1,r6			/* set the htab location  */
+98: 
+	LOADADDR(r3,.start_here_common)
+	SET_REG_TO_CONST(r4, MSR_KERNEL)
+	mtspr	SRR0,r3
+	mtspr	SRR1,r4
+	rfid
+	b	.	/* prevent speculative execution */
+#endif /* CONFIG_PPC_MULTIPLATFORM */
+	
+	/* This is where all platforms converge execution */
+_STATIC(start_here_common)
+	/* relocation is on at this point */
+
+	/* The following code sets up the SP and TOC now that we are */
+	/* running with translation enabled. */
+
+	LOADADDR(r3,init_thread_union)
+
+	/* set up the stack */
+	addi	r1,r3,THREAD_SIZE
+	li	r0,0
+	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
+
+	/* Apply the CPUs-specific fixups (nop out sections not relevant
+	 * to this CPU
+	 */
+	li	r3,0
+	bl	.do_cpu_ftr_fixups
+
+	LOADADDR(r26, boot_cpuid)
+	lwz	r26,0(r26)
+
+	LOADADDR(r24, paca) 		/* Get base vaddr of paca array  */
+	mulli	r13,r26,PACA_SIZE	/* Calculate vaddr of right paca */
+	add	r13,r13,r24		/* for this processor.		 */
+	mtspr	SPRG3,r13
+
+	/* ptr to current */
+	LOADADDR(r4,init_task)
+	std	r4,PACACURRENT(r13)
+
+	/* Load the TOC */
+	ld	r2,PACATOC(r13)
+	std	r1,PACAKSAVE(r13)
+
+	bl	.setup_system
+
+	/* Load up the kernel context */
+5:
+#ifdef DO_SOFT_DISABLE
+	li	r5,0
+	stb	r5,PACAPROCENABLED(r13)	/* Soft Disabled */
+	mfmsr	r5
+	ori	r5,r5,MSR_EE		/* Hard Enabled */
+	mtmsrd	r5
+#endif
+
+	bl .start_kernel
+
+_GLOBAL(hmt_init)
+#ifdef CONFIG_HMT
+	LOADADDR(r5, hmt_thread_data)
+	mfspr	r7,PVR
+	srwi	r7,r7,16
+	cmpwi	r7,0x34			/* Pulsar  */
+	beq	90f
+	cmpwi	r7,0x36			/* Icestar */
+	beq	91f
+	cmpwi	r7,0x37			/* SStar   */
+	beq	91f
+	b	101f
+90:	mfspr	r6,PIR
+	andi.	r6,r6,0x1f
+	b	92f
+91:	mfspr	r6,PIR
+	andi.	r6,r6,0x3ff
+92:	sldi	r4,r24,3
+	stwx	r6,r5,r4
+	bl	.hmt_start_secondary
+	b	101f
+
+__hmt_secondary_hold:
+	LOADADDR(r5, hmt_thread_data)
+	clrldi	r5,r5,4
+	li	r7,0
+	mfspr	r6,PIR
+	mfspr	r8,PVR
+	srwi	r8,r8,16
+	cmpwi	r8,0x34
+	bne	93f
+	andi.	r6,r6,0x1f
+	b	103f
+93:	andi.	r6,r6,0x3f
+
+103:	lwzx	r8,r5,r7
+	cmpw	r8,r6
+	beq	104f
+	addi	r7,r7,8
+	b	103b
+
+104:	addi	r7,r7,4
+	lwzx	r9,r5,r7
+	mr	r24,r9
+101:
+#endif
+	mr	r3,r24
+	b	.pSeries_secondary_smp_init
+
+#ifdef CONFIG_HMT
+_GLOBAL(hmt_start_secondary)
+	LOADADDR(r4,__hmt_secondary_hold)
+	clrldi	r4,r4,4
+	mtspr	NIADORM, r4
+	mfspr	r4, MSRDORM
+	li	r5, -65
+	and	r4, r4, r5
+	mtspr	MSRDORM, r4
+	lis	r4,0xffef
+	ori	r4,r4,0x7403
+	mtspr	TSC, r4
+	li	r4,0x1f4
+	mtspr	TST, r4
+	mfspr	r4, HID0
+	ori	r4, r4, 0x1
+	mtspr	HID0, r4
+	mfspr	r4, SPRN_CTRLF
+	oris	r4, r4, 0x40
+	mtspr	SPRN_CTRLT, r4
+	blr
+#endif
+
+#if defined(CONFIG_KEXEC) || (defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES))
+_GLOBAL(smp_release_cpus)
+	/* All secondary cpus are spinning on a common
+	 * spinloop, release them all now so they can start
+	 * to spin on their individual paca spinloops.
+	 * For non SMP kernels, the secondary cpus never
+	 * get out of the common spinloop.
+	 */
+	li	r3,1
+	LOADADDR(r5,__secondary_hold_spinloop)
+	std	r3,0(r5)
+	sync
+	blr
+#endif /* CONFIG_SMP && !CONFIG_PPC_ISERIES */
+
+
+/*
+ * We put a few things here that have to be page-aligned.
+ * This stuff goes at the beginning of the bss, which is page-aligned.
+ */
+	.section ".bss"
+
+	.align	PAGE_SHIFT
+
+	.globl	empty_zero_page
+empty_zero_page:
+	.space	PAGE_SIZE
+
+	.globl	swapper_pg_dir
+swapper_pg_dir:
+	.space	PAGE_SIZE
+
+/*
+ * This space gets a copy of optional info passed to us by the bootstrap
+ * Used to pass parameters into the kernel like root=/dev/sda1, etc.
+ */
+	.globl	cmd_line
+cmd_line:
+	.space	COMMAND_LINE_SIZE
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
new file mode 100644
index 00000000000..cb1a3a54a02
--- /dev/null
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -0,0 +1,860 @@
+/*
+ *  arch/ppc/kernel/except_8xx.S
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *  MPC8xx modifications by Dan Malek
+ *    Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ *  This file contains low-level support and setup for PowerPC 8xx
+ *  embedded processors, including trap and interrupt dispatch.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/cache.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/* Macro to make the code more readable. */
+#ifdef CONFIG_8xx_CPU6
+#define DO_8xx_CPU6(val, reg)	\
+	li	reg, val;	\
+	stw	reg, 12(r0);	\
+	lwz	reg, 12(r0);
+#else
+#define DO_8xx_CPU6(val, reg)
+#endif
+	.text
+	.globl	_stext
+_stext:
+	.text
+	.globl	_start
+_start:
+
+/* MPC8xx
+ * This port was done on an MBX board with an 860.  Right now I only
+ * support an ELF compressed (zImage) boot from EPPC-Bug because the
+ * code there loads up some registers before calling us:
+ *   r3: ptr to board info data
+ *   r4: initrd_start or if no initrd then 0
+ *   r5: initrd_end - unused if r4 is 0
+ *   r6: Start of command line string
+ *   r7: End of command line string
+ *
+ * I decided to use conditional compilation instead of checking PVR and
+ * adding more processor specific branches around code I don't need.
+ * Since this is an embedded processor, I also appreciate any memory
+ * savings I can get.
+ *
+ * The MPC8xx does not have any BATs, but it supports large page sizes.
+ * We first initialize the MMU to support 8M byte pages, then load one
+ * entry into each of the instruction and data TLBs to map the first
+ * 8M 1:1.  I also mapped an additional I/O space 1:1 so we can get to
+ * the "internal" processor registers before MMU_init is called.
+ *
+ * The TLB code currently contains a major hack.  Since I use the condition
+ * code register, I have to save and restore it.  I am out of registers, so
+ * I just store it in memory location 0 (the TLB handlers are not reentrant).
+ * To avoid making any decisions, I need to use the "segment" valid bit
+ * in the first level table, but that would require many changes to the
+ * Linux page directory/table functions that I don't want to do right now.
+ *
+ * I used to use SPRG2 for a temporary register in the TLB handler, but it
+ * has since been put to other uses.  I now use a hack to save a register
+ * and the CCR at memory location 0.....Someday I'll fix this.....
+ *	-- Dan
+ */
+	.globl	__start
+__start:
+	mr	r31,r3			/* save parameters */
+	mr	r30,r4
+	mr	r29,r5
+	mr	r28,r6
+	mr	r27,r7
+
+	/* We have to turn on the MMU right away so we get cache modes
+	 * set correctly.
+	 */
+	bl	initial_mmu
+
+/* We now have the lower 8 Meg mapped into TLB entries, and the caches
+ * ready to work.
+ */
+
+turn_on_mmu:
+	mfmsr	r0
+	ori	r0,r0,MSR_DR|MSR_IR
+	mtspr	SPRN_SRR1,r0
+	lis	r0,start_here@h
+	ori	r0,r0,start_here@l
+	mtspr	SPRN_SRR0,r0
+	SYNC
+	rfi				/* enables MMU */
+
+/*
+ * Exception entry code.  This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+#define EXCEPTION_PROLOG	\
+	mtspr	SPRN_SPRG0,r10;	\
+	mtspr	SPRN_SPRG1,r11;	\
+	mfcr	r10;		\
+	EXCEPTION_PROLOG_1;	\
+	EXCEPTION_PROLOG_2
+
+#define EXCEPTION_PROLOG_1	\
+	mfspr	r11,SPRN_SRR1;		/* check whether user or kernel */ \
+	andi.	r11,r11,MSR_PR;	\
+	tophys(r11,r1);			/* use tophys(r1) if kernel */ \
+	beq	1f;		\
+	mfspr	r11,SPRN_SPRG3;	\
+	lwz	r11,THREAD_INFO-THREAD(r11);	\
+	addi	r11,r11,THREAD_SIZE;	\
+	tophys(r11,r11);	\
+1:	subi	r11,r11,INT_FRAME_SIZE	/* alloc exc. frame */
+
+
+#define EXCEPTION_PROLOG_2	\
+	CLR_TOP32(r11);		\
+	stw	r10,_CCR(r11);		/* save registers */ \
+	stw	r12,GPR12(r11);	\
+	stw	r9,GPR9(r11);	\
+	mfspr	r10,SPRN_SPRG0;	\
+	stw	r10,GPR10(r11);	\
+	mfspr	r12,SPRN_SPRG1;	\
+	stw	r12,GPR11(r11);	\
+	mflr	r10;		\
+	stw	r10,_LINK(r11);	\
+	mfspr	r12,SPRN_SRR0;	\
+	mfspr	r9,SPRN_SRR1;	\
+	stw	r1,GPR1(r11);	\
+	stw	r1,0(r11);	\
+	tovirt(r1,r11);			/* set new kernel sp */	\
+	li	r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
+	MTMSRD(r10);			/* (except for mach check in rtas) */ \
+	stw	r0,GPR0(r11);	\
+	SAVE_4GPRS(3, r11);	\
+	SAVE_2GPRS(7, r11)
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#define EXCEPTION(n, label, hdlr, xfer)		\
+	. = n;					\
+label:						\
+	EXCEPTION_PROLOG;			\
+	addi	r3,r1,STACK_FRAME_OVERHEAD;	\
+	xfer(n, hdlr)
+
+#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret)	\
+	li	r10,trap;					\
+	stw	r10,TRAP(r11);					\
+	li	r10,MSR_KERNEL;					\
+	copyee(r10, r9);					\
+	bl	tfer;						\
+i##n:								\
+	.long	hdlr;						\
+	.long	ret
+
+#define COPY_EE(d, s)		rlwimi d,s,0,16,16
+#define NOCOPY(d, s)
+
+#define EXC_XFER_STD(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full,	\
+			  ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
+			  ret_from_except)
+
+#define EXC_XFER_EE(n, hdlr)		\
+	EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
+			  ret_from_except_full)
+
+#define EXC_XFER_EE_LITE(n, hdlr)	\
+	EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
+			  ret_from_except)
+
+/* System reset */
+	EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD)
+
+/* Machine check */
+	. = 0x200
+MachineCheck:
+	EXCEPTION_PROLOG
+	mfspr r4,SPRN_DAR
+	stw r4,_DAR(r11)
+	mfspr r5,SPRN_DSISR
+	stw r5,_DSISR(r11)
+	addi r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_STD(0x200, MachineCheckException)
+
+/* Data access exception.
+ * This is "never generated" by the MPC8xx.  We jump to it for other
+ * translation errors.
+ */
+	. = 0x300
+DataAccess:
+	EXCEPTION_PROLOG
+	mfspr	r10,SPRN_DSISR
+	stw	r10,_DSISR(r11)
+	mr	r5,r10
+	mfspr	r4,SPRN_DAR
+	EXC_XFER_EE_LITE(0x300, handle_page_fault)
+
+/* Instruction access exception.
+ * This is "never generated" by the MPC8xx.  We jump to it for other
+ * translation errors.
+ */
+	. = 0x400
+InstructionAccess:
+	EXCEPTION_PROLOG
+	mr	r4,r12
+	mr	r5,r9
+	EXC_XFER_EE_LITE(0x400, handle_page_fault)
+
+/* External interrupt */
+	EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+
+/* Alignment exception */
+	. = 0x600
+Alignment:
+	EXCEPTION_PROLOG
+	mfspr	r4,SPRN_DAR
+	stw	r4,_DAR(r11)
+	mfspr	r5,SPRN_DSISR
+	stw	r5,_DSISR(r11)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE(0x600, AlignmentException)
+
+/* Program check exception */
+	EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD)
+
+/* No FPU on MPC8xx.  This exception is not supposed to happen.
+*/
+	EXCEPTION(0x800, FPUnavailable, UnknownException, EXC_XFER_STD)
+
+/* Decrementer */
+	EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+
+	EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE)
+
+/* System call */
+	. = 0xc00
+SystemCall:
+	EXCEPTION_PROLOG
+	EXC_XFER_EE_LITE(0xc00, DoSyscall)
+
+/* Single step - not used on 601 */
+	EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD)
+	EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0xf00, Trap_0f, UnknownException, EXC_XFER_EE)
+
+/* On the MPC8xx, this is a software emulation interrupt.  It occurs
+ * for all unimplemented and illegal instructions.
+ */
+	EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD)
+
+	. = 0x1100
+/*
+ * For the MPC8xx, this is a software tablewalk to load the instruction
+ * TLB.  It is modelled after the example in the Motorola manual.  The task
+ * switch loads the M_TWB register with the pointer to the first level table.
+ * If we discover there is no second level table (value is zero) or if there
+ * is an invalid pte, we load that into the TLB, which causes another fault
+ * into the TLB Error interrupt where we can handle such problems.
+ * We have to use the MD_xxx registers for the tablewalk because the
+ * equivalent MI_xxx registers only perform the attribute functions.
+ */
+InstructionTLBMiss:
+#ifdef CONFIG_8xx_CPU6
+	stw	r3, 8(r0)
+#endif
+	DO_8xx_CPU6(0x3f80, r3)
+	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
+	mfcr	r10
+	stw	r10, 0(r0)
+	stw	r11, 4(r0)
+	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
+	DO_8xx_CPU6(0x3780, r3)
+	mtspr	SPRN_MD_EPN, r10	/* Have to use MD_EPN for walk, MI_EPN can't */
+	mfspr	r10, SPRN_M_TWB	/* Get level 1 table entry address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	andi.	r11, r10, 0x0800	/* Address >= 0x80000000 */
+	beq	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	rlwimi	r10, r11, 0, 2, 19
+3:
+	lwz	r11, 0(r10)	/* Get the level 1 entry */
+	rlwinm.	r10, r11,0,0,19	/* Extract page descriptor page address */
+	beq	2f		/* If zero, don't try to find a pte */
+
+	/* We have a pte table, so load the MI_TWC with the attributes
+	 * for this "segment."
+	 */
+	ori	r11,r11,1		/* Set valid bit */
+	DO_8xx_CPU6(0x2b80, r3)
+	mtspr	SPRN_MI_TWC, r11	/* Set segment attributes */
+	DO_8xx_CPU6(0x3b80, r3)
+	mtspr	SPRN_MD_TWC, r11	/* Load pte table base address */
+	mfspr	r11, SPRN_MD_TWC	/* ....and get the pte address */
+	lwz	r10, 0(r11)	/* Get the pte */
+
+	ori	r10, r10, _PAGE_ACCESSED
+	stw	r10, 0(r11)
+
+	/* The Linux PTE won't go exactly into the MMU TLB.
+	 * Software indicator bits 21, 22 and 28 must be clear.
+	 * Software indicator bits 24, 25, 26, and 27 must be
+	 * set.  All other Linux PTE bits control the behavior
+	 * of the MMU.
+	 */
+2:	li	r11, 0x00f0
+	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
+	DO_8xx_CPU6(0x2d80, r3)
+	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
+
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	rfi
+
+	. = 0x1200
+DataStoreTLBMiss:
+#ifdef CONFIG_8xx_CPU6
+	stw	r3, 8(r0)
+#endif
+	DO_8xx_CPU6(0x3f80, r3)
+	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
+	mfcr	r10
+	stw	r10, 0(r0)
+	stw	r11, 4(r0)
+	mfspr	r10, SPRN_M_TWB	/* Get level 1 table entry address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	andi.	r11, r10, 0x0800
+	beq	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	rlwimi	r10, r11, 0, 2, 19
+3:
+	lwz	r11, 0(r10)	/* Get the level 1 entry */
+	rlwinm.	r10, r11,0,0,19	/* Extract page descriptor page address */
+	beq	2f		/* If zero, don't try to find a pte */
+
+	/* We have a pte table, so load fetch the pte from the table.
+	 */
+	ori	r11, r11, 1	/* Set valid bit in physical L2 page */
+	DO_8xx_CPU6(0x3b80, r3)
+	mtspr	SPRN_MD_TWC, r11	/* Load pte table base address */
+	mfspr	r10, SPRN_MD_TWC	/* ....and get the pte address */
+	lwz	r10, 0(r10)	/* Get the pte */
+
+	/* Insert the Guarded flag into the TWC from the Linux PTE.
+	 * It is bit 27 of both the Linux PTE and the TWC (at least
+	 * I got that right :-).  It will be better when we can put
+	 * this into the Linux pgd/pmd and load it in the operation
+	 * above.
+	 */
+	rlwimi	r11, r10, 0, 27, 27
+	DO_8xx_CPU6(0x3b80, r3)
+	mtspr	SPRN_MD_TWC, r11
+
+	mfspr	r11, SPRN_MD_TWC	/* get the pte address again */
+	ori	r10, r10, _PAGE_ACCESSED
+	stw	r10, 0(r11)
+
+	/* The Linux PTE won't go exactly into the MMU TLB.
+	 * Software indicator bits 21, 22 and 28 must be clear.
+	 * Software indicator bits 24, 25, 26, and 27 must be
+	 * set.  All other Linux PTE bits control the behavior
+	 * of the MMU.
+	 */
+2:	li	r11, 0x00f0
+	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
+	DO_8xx_CPU6(0x3d80, r3)
+	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	rfi
+
+/* This is an instruction TLB error on the MPC8xx.  This could be due
+ * to many reasons, such as executing guarded memory or illegal instruction
+ * addresses.  There is nothing to do but handle a big time error fault.
+ */
+	. = 0x1300
+InstructionTLBError:
+	b	InstructionAccess
+
+/* This is the data TLB error on the MPC8xx.  This could be due to
+ * many reasons, including a dirty update to a pte.  We can catch that
+ * one here, but anything else is an error.  First, we track down the
+ * Linux pte.  If it is valid, write access is allowed, but the
+ * page dirty bit is not set, we will set it and reload the TLB.  For
+ * any other case, we bail out to a higher level function that can
+ * handle it.
+ */
+	. = 0x1400
+DataTLBError:
+#ifdef CONFIG_8xx_CPU6
+	stw	r3, 8(r0)
+#endif
+	DO_8xx_CPU6(0x3f80, r3)
+	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
+	mfcr	r10
+	stw	r10, 0(r0)
+	stw	r11, 4(r0)
+
+	/* First, make sure this was a store operation.
+	*/
+	mfspr	r10, SPRN_DSISR
+	andis.	r11, r10, 0x0200	/* If set, indicates store op */
+	beq	2f
+
+	/* The EA of a data TLB miss is automatically stored in the MD_EPN
+	 * register.  The EA of a data TLB error is automatically stored in
+	 * the DAR, but not the MD_EPN register.  We must copy the 20 most
+	 * significant bits of the EA from the DAR to MD_EPN before we
+	 * start walking the page tables.  We also need to copy the CASID
+	 * value from the M_CASID register.
+	 * Addendum:  The EA of a data TLB error is _supposed_ to be stored
+	 * in DAR, but it seems that this doesn't happen in some cases, such
+	 * as when the error is due to a dcbi instruction to a page with a
+	 * TLB that doesn't have the changed bit set.  In such cases, there
+	 * does not appear to be any way  to recover the EA of the error
+	 * since it is neither in DAR nor MD_EPN.  As a workaround, the
+	 * _PAGE_HWWRITE bit is set for all kernel data pages when the PTEs
+	 * are initialized in mapin_ram().  This will avoid the problem,
+	 * assuming we only use the dcbi instruction on kernel addresses.
+	 */
+	mfspr	r10, SPRN_DAR
+	rlwinm	r11, r10, 0, 0, 19
+	ori	r11, r11, MD_EVALID
+	mfspr	r10, SPRN_M_CASID
+	rlwimi	r11, r10, 0, 28, 31
+	DO_8xx_CPU6(0x3780, r3)
+	mtspr	SPRN_MD_EPN, r11
+
+	mfspr	r10, SPRN_M_TWB	/* Get level 1 table entry address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	andi.	r11, r10, 0x0800
+	beq	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	rlwimi	r10, r11, 0, 2, 19
+3:
+	lwz	r11, 0(r10)	/* Get the level 1 entry */
+	rlwinm.	r10, r11,0,0,19	/* Extract page descriptor page address */
+	beq	2f		/* If zero, bail */
+
+	/* We have a pte table, so fetch the pte from the table.
+	 */
+	ori	r11, r11, 1		/* Set valid bit in physical L2 page */
+	DO_8xx_CPU6(0x3b80, r3)
+	mtspr	SPRN_MD_TWC, r11		/* Load pte table base address */
+	mfspr	r11, SPRN_MD_TWC		/* ....and get the pte address */
+	lwz	r10, 0(r11)		/* Get the pte */
+
+	andi.	r11, r10, _PAGE_RW	/* Is it writeable? */
+	beq	2f			/* Bail out if not */
+
+	/* Update 'changed', among others.
+	*/
+	ori	r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
+	mfspr	r11, SPRN_MD_TWC		/* Get pte address again */
+	stw	r10, 0(r11)		/* and update pte in table */
+
+	/* The Linux PTE won't go exactly into the MMU TLB.
+	 * Software indicator bits 21, 22 and 28 must be clear.
+	 * Software indicator bits 24, 25, 26, and 27 must be
+	 * set.  All other Linux PTE bits control the behavior
+	 * of the MMU.
+	 */
+	li	r11, 0x00f0
+	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
+	DO_8xx_CPU6(0x3d80, r3)
+	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	rfi
+2:
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	DataAccess
+
+	EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE)
+
+/* On the MPC8xx, these next four traps are used for development
+ * support of breakpoints and such.  Someday I will get around to
+ * using them.
+ */
+	EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE)
+	EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE)
+
+	. = 0x2000
+
+	.globl	giveup_fpu
+giveup_fpu:
+	blr
+
+/*
+ * This is where the main kernel code starts.
+ */
+start_here:
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+
+	/* ptr to phys current thread */
+	tophys(r4,r2)
+	addi	r4,r4,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG3,r4
+	li	r3,0
+	mtspr	SPRN_SPRG2,r3	/* 0 => r1 has kernel sp */
+
+	/* stack */
+	lis	r1,init_thread_union@ha
+	addi	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+	bl	early_init	/* We have to do this with MMU on */
+
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+	mr	r3,r31
+	mr	r4,r30
+	mr	r5,r29
+	mr	r6,r28
+	mr	r7,r27
+	bl	machine_init
+	bl	MMU_init
+
+/*
+ * Go back to running unmapped so we can load up new values
+ * and change to using our exception vectors.
+ * On the 8xx, all we have to do is invalidate the TLB to clear
+ * the old 8M byte TLB mappings and load the page table base register.
+ */
+	/* The right way to do this would be to track it down through
+	 * init's THREAD like the context switch code does, but this is
+	 * easier......until someone changes init's static structures.
+	 */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	tophys(r6,r6)
+#ifdef CONFIG_8xx_CPU6
+	lis	r4, cpu6_errata_word@h
+	ori	r4, r4, cpu6_errata_word@l
+	li	r3, 0x3980
+	stw	r3, 12(r4)
+	lwz	r3, 12(r4)
+#endif
+	mtspr	SPRN_M_TWB, r6
+	lis	r4,2f@h
+	ori	r4,r4,2f@l
+	tophys(r4,r4)
+	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi
+/* Load up the kernel context */
+2:
+	SYNC			/* Force all PTE updates to finish */
+	tlbia			/* Clear all TLB entries */
+	sync			/* wait for tlbia/tlbie to finish */
+	TLBSYNC			/* ... on all CPUs */
+
+	/* set up the PTE pointers for the Abatron bdiGDB.
+	*/
+	tovirt(r6,r6)
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r5, 0xf0(r0)	/* Must match your Abatron config file */
+	tophys(r5,r5)
+	stw	r6, 0(r5)
+
+/* Now turn on the MMU for real! */
+	li	r4,MSR_KERNEL
+	lis	r3,start_kernel@h
+	ori	r3,r3,start_kernel@l
+	mtspr	SPRN_SRR0,r3
+	mtspr	SPRN_SRR1,r4
+	rfi			/* enable MMU and jump to start_kernel */
+
+/* Set up the initial MMU state so we can do the first level of
+ * kernel initialization.  This maps the first 8 MBytes of memory 1:1
+ * virtual to physical.  Also, set the cache mode since that is defined
+ * by TLB entries and perform any additional mapping (like of the IMMR).
+ * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel,
+ * 24 Mbytes of data, and the 8M IMMR space.  Anything not covered by
+ * these mappings is mapped by page tables.
+ */
+initial_mmu:
+	tlbia			/* Invalidate all TLB entries */
+#ifdef CONFIG_PIN_TLB
+	lis	r8, MI_RSV4I@h
+	ori	r8, r8, 0x1c00
+#else
+	li	r8, 0
+#endif
+	mtspr	SPRN_MI_CTR, r8	/* Set instruction MMU control */
+
+#ifdef CONFIG_PIN_TLB
+	lis	r10, (MD_RSV4I | MD_RESETVAL)@h
+	ori	r10, r10, 0x1c00
+	mr	r8, r10
+#else
+	lis	r10, MD_RESETVAL@h
+#endif
+#ifndef CONFIG_8xx_COPYBACK
+	oris	r10, r10, MD_WTDEF@h
+#endif
+	mtspr	SPRN_MD_CTR, r10	/* Set data TLB control */
+
+	/* Now map the lower 8 Meg into the TLBs.  For this quick hack,
+	 * we can load the instruction and data TLB registers with the
+	 * same values.
+	 */
+	lis	r8, KERNELBASE@h	/* Create vaddr for TLB */
+	ori	r8, r8, MI_EVALID	/* Mark it valid */
+	mtspr	SPRN_MI_EPN, r8
+	mtspr	SPRN_MD_EPN, r8
+	li	r8, MI_PS8MEG		/* Set 8M byte page */
+	ori	r8, r8, MI_SVALID	/* Make it valid */
+	mtspr	SPRN_MI_TWC, r8
+	mtspr	SPRN_MD_TWC, r8
+	li	r8, MI_BOOTINIT		/* Create RPN for address 0 */
+	mtspr	SPRN_MI_RPN, r8		/* Store TLB entry */
+	mtspr	SPRN_MD_RPN, r8
+	lis	r8, MI_Kp@h		/* Set the protection mode */
+	mtspr	SPRN_MI_AP, r8
+	mtspr	SPRN_MD_AP, r8
+
+	/* Map another 8 MByte at the IMMR to get the processor
+	 * internal registers (among other things).
+	 */
+#ifdef CONFIG_PIN_TLB
+	addi	r10, r10, 0x0100
+	mtspr	SPRN_MD_CTR, r10
+#endif
+	mfspr	r9, 638			/* Get current IMMR */
+	andis.	r9, r9, 0xff80		/* Get 8Mbyte boundary */
+
+	mr	r8, r9			/* Create vaddr for TLB */
+	ori	r8, r8, MD_EVALID	/* Mark it valid */
+	mtspr	SPRN_MD_EPN, r8
+	li	r8, MD_PS8MEG		/* Set 8M byte page */
+	ori	r8, r8, MD_SVALID	/* Make it valid */
+	mtspr	SPRN_MD_TWC, r8
+	mr	r8, r9			/* Create paddr for TLB */
+	ori	r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
+	mtspr	SPRN_MD_RPN, r8
+
+#ifdef CONFIG_PIN_TLB
+	/* Map two more 8M kernel data pages.
+	*/
+	addi	r10, r10, 0x0100
+	mtspr	SPRN_MD_CTR, r10
+
+	lis	r8, KERNELBASE@h	/* Create vaddr for TLB */
+	addis	r8, r8, 0x0080		/* Add 8M */
+	ori	r8, r8, MI_EVALID	/* Mark it valid */
+	mtspr	SPRN_MD_EPN, r8
+	li	r9, MI_PS8MEG		/* Set 8M byte page */
+	ori	r9, r9, MI_SVALID	/* Make it valid */
+	mtspr	SPRN_MD_TWC, r9
+	li	r11, MI_BOOTINIT	/* Create RPN for address 0 */
+	addis	r11, r11, 0x0080	/* Add 8M */
+	mtspr	SPRN_MD_RPN, r8
+
+	addis	r8, r8, 0x0080		/* Add 8M */
+	mtspr	SPRN_MD_EPN, r8
+	mtspr	SPRN_MD_TWC, r9
+	addis	r11, r11, 0x0080	/* Add 8M */
+	mtspr	SPRN_MD_RPN, r8
+#endif
+
+	/* Since the cache is enabled according to the information we
+	 * just loaded into the TLB, invalidate and enable the caches here.
+	 * We should probably check/set other modes....later.
+	 */
+	lis	r8, IDC_INVALL@h
+	mtspr	SPRN_IC_CST, r8
+	mtspr	SPRN_DC_CST, r8
+	lis	r8, IDC_ENABLE@h
+	mtspr	SPRN_IC_CST, r8
+#ifdef CONFIG_8xx_COPYBACK
+	mtspr	SPRN_DC_CST, r8
+#else
+	/* For a debug option, I left this here to easily enable
+	 * the write through cache mode
+	 */
+	lis	r8, DC_SFWT@h
+	mtspr	SPRN_DC_CST, r8
+	lis	r8, IDC_ENABLE@h
+	mtspr	SPRN_DC_CST, r8
+#endif
+	blr
+
+
+/*
+ * Set up to use a given MMU context.
+ * r3 is context number, r4 is PGD pointer.
+ *
+ * We place the physical address of the new task page directory loaded
+ * into the MMU base register, and set the ASID compare register with
+ * the new "context."
+ */
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Context switch the PTE pointer for the Abatron BDI2000.
+	 * The PGDIR is passed as second argument.
+	 */
+	lis	r5, KERNELBASE@h
+	lwz	r5, 0xf0(r5)
+	stw	r4, 0x4(r5)
+#endif
+
+#ifdef CONFIG_8xx_CPU6
+	lis	r6, cpu6_errata_word@h
+	ori	r6, r6, cpu6_errata_word@l
+	tophys	(r4, r4)
+	li	r7, 0x3980
+	stw	r7, 12(r6)
+	lwz	r7, 12(r6)
+        mtspr   SPRN_M_TWB, r4               /* Update MMU base address */
+	li	r7, 0x3380
+	stw	r7, 12(r6)
+	lwz	r7, 12(r6)
+        mtspr   SPRN_M_CASID, r3             /* Update context */
+#else
+        mtspr   SPRN_M_CASID,r3		/* Update context */
+	tophys	(r4, r4)
+	mtspr	SPRN_M_TWB, r4		/* and pgd */
+#endif
+	SYNC
+	blr
+
+#ifdef CONFIG_8xx_CPU6
+/* It's here because it is unique to the 8xx.
+ * It is important we get called with interrupts disabled.  I used to
+ * do that, but it appears that all code that calls this already had
+ * interrupt disabled.
+ */
+	.globl	set_dec_cpu6
+set_dec_cpu6:
+	lis	r7, cpu6_errata_word@h
+	ori	r7, r7, cpu6_errata_word@l
+	li	r4, 0x2c00
+	stw	r4, 8(r7)
+	lwz	r4, 8(r7)
+        mtspr   22, r3		/* Update Decrementer */
+	SYNC
+	blr
+#endif
+
+/*
+ * We put a few things here that have to be page-aligned.
+ * This stuff goes at the beginning of the data segment,
+ * which is page-aligned.
+ */
+	.data
+	.globl	sdata
+sdata:
+	.globl	empty_zero_page
+empty_zero_page:
+	.space	4096
+
+	.globl	swapper_pg_dir
+swapper_pg_dir:
+	.space	4096
+
+/*
+ * This space gets a copy of optional info passed to us by the bootstrap
+ * Used to pass parameters into the kernel like root=/dev/sda1, etc.
+ */
+	.globl	cmd_line
+cmd_line:
+	.space	512
+
+/* Room for two PTE table poiners, usually the kernel and current user
+ * pointer to their respective root page table (pgdir).
+ */
+abatron_pteptrs:
+	.space	8
+
+#ifdef CONFIG_8xx_CPU6
+	.globl	cpu6_errata_word
+cpu6_errata_word:
+	.space	16
+#endif
+
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
new file mode 100644
index 00000000000..eba5a5f8ff0
--- /dev/null
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -0,0 +1,1058 @@
+/*
+ * arch/ppc/kernel/head_fsl_booke.S
+ *
+ * Kernel execution entry point code.
+ *
+ *    Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
+ *      Initial PowerPC version.
+ *    Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *      Rewritten for PReP
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Low-level exception handers, MMU support, and rewrite.
+ *    Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
+ *      PowerPC 8xx modifications.
+ *    Copyright (c) 1998-1999 TiVo, Inc.
+ *      PowerPC 403GCX modifications.
+ *    Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *      PowerPC 403GCX/405GP modifications.
+ *    Copyright 2000 MontaVista Software Inc.
+ *	PPC405 modifications
+ *      PowerPC 403GCX/405GP modifications.
+ * 	Author: MontaVista Software, Inc.
+ *         	frank_rowand@mvista.com or source@mvista.com
+ * 	   	debbie_chu@mvista.com
+ *    Copyright 2002-2004 MontaVista Software, Inc.
+ *      PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
+ *    Copyright 2004 Freescale Semiconductor, Inc
+ *      PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include "head_booke.h"
+
+/* As with the other PowerPC ports, it is expected that when code
+ * execution begins here, the following registers contain valid, yet
+ * optional, information:
+ *
+ *   r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
+ *   r4 - Starting address of the init RAM disk
+ *   r5 - Ending address of the init RAM disk
+ *   r6 - Start of kernel command line string (e.g. "mem=128")
+ *   r7 - End of kernel command line string
+ *
+ */
+	.text
+_GLOBAL(_stext)
+_GLOBAL(_start)
+	/*
+	 * Reserve a word at a fixed location to store the address
+	 * of abatron_pteptrs
+	 */
+	nop
+/*
+ * Save parameters we are passed
+ */
+	mr	r31,r3
+	mr	r30,r4
+	mr	r29,r5
+	mr	r28,r6
+	mr	r27,r7
+	li	r24,0		/* CPU number */
+
+/* We try to not make any assumptions about how the boot loader
+ * setup or used the TLBs.  We invalidate all mappings from the
+ * boot loader and load a single entry in TLB1[0] to map the
+ * first 16M of kernel memory.  Any boot info passed from the
+ * bootloader needs to live in this first 16M.
+ *
+ * Requirement on bootloader:
+ *  - The page we're executing in needs to reside in TLB1 and
+ *    have IPROT=1.  If not an invalidate broadcast could
+ *    evict the entry we're currently executing in.
+ *
+ *  r3 = Index of TLB1 were executing in
+ *  r4 = Current MSR[IS]
+ *  r5 = Index of TLB1 temp mapping
+ *
+ * Later in mapin_ram we will correctly map lowmem, and resize TLB1[0]
+ * if needed
+ */
+
+/* 1. Find the index of the entry we're executing in */
+	bl	invstr				/* Find our address */
+invstr:	mflr	r6				/* Make it accessible */
+	mfmsr	r7
+	rlwinm	r4,r7,27,31,31			/* extract MSR[IS] */
+	mfspr	r7, SPRN_PID0
+	slwi	r7,r7,16
+	or	r7,r7,r4
+	mtspr	SPRN_MAS6,r7
+	tlbsx	0,r6				/* search MSR[IS], SPID=PID0 */
+#ifndef CONFIG_E200
+	mfspr	r7,SPRN_MAS1
+	andis.	r7,r7,MAS1_VALID@h
+	bne	match_TLB
+	mfspr	r7,SPRN_PID1
+	slwi	r7,r7,16
+	or	r7,r7,r4
+	mtspr	SPRN_MAS6,r7
+	tlbsx	0,r6				/* search MSR[IS], SPID=PID1 */
+	mfspr	r7,SPRN_MAS1
+	andis.	r7,r7,MAS1_VALID@h
+	bne	match_TLB
+	mfspr	r7, SPRN_PID2
+	slwi	r7,r7,16
+	or	r7,r7,r4
+	mtspr	SPRN_MAS6,r7
+	tlbsx	0,r6				/* Fall through, we had to match */
+#endif
+match_TLB:
+	mfspr	r7,SPRN_MAS0
+	rlwinm	r3,r7,16,20,31			/* Extract MAS0(Entry) */
+
+	mfspr	r7,SPRN_MAS1			/* Insure IPROT set */
+	oris	r7,r7,MAS1_IPROT@h
+	mtspr	SPRN_MAS1,r7
+	tlbwe
+
+/* 2. Invalidate all entries except the entry we're executing in */
+	mfspr	r9,SPRN_TLB1CFG
+	andi.	r9,r9,0xfff
+	li	r6,0				/* Set Entry counter to 0 */
+1:	lis	r7,0x1000			/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r6,16,4,15			/* Setup MAS0 = TLBSEL | ESEL(r6) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+	mfspr	r7,SPRN_MAS1
+	rlwinm	r7,r7,0,2,31			/* Clear MAS1 Valid and IPROT */
+	cmpw	r3,r6
+	beq	skpinv				/* Dont update the current execution TLB */
+	mtspr	SPRN_MAS1,r7
+	tlbwe
+	isync
+skpinv:	addi	r6,r6,1				/* Increment */
+	cmpw	r6,r9				/* Are we done? */
+	bne	1b				/* If not, repeat */
+
+	/* Invalidate TLB0 */
+	li      r6,0x04
+	tlbivax 0,r6
+#ifdef CONFIG_SMP
+	tlbsync
+#endif
+	/* Invalidate TLB1 */
+	li      r6,0x0c
+	tlbivax 0,r6
+#ifdef CONFIG_SMP
+	tlbsync
+#endif
+	msync
+
+/* 3. Setup a temp mapping and jump to it */
+	andi.	r5, r3, 0x1	/* Find an entry not used and is non-zero */
+	addi	r5, r5, 0x1
+	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r3,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+
+	/* Just modify the entry ID and EPN for the temp mapping */
+	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r5,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r5) */
+	mtspr	SPRN_MAS0,r7
+	xori	r6,r4,1		/* Setup TMP mapping in the other Address space */
+	slwi	r6,r6,12
+	oris	r6,r6,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r6,r6,(MAS1_TSIZE(BOOKE_PAGESZ_4K))@l
+	mtspr	SPRN_MAS1,r6
+	mfspr	r6,SPRN_MAS2
+	li	r7,0		/* temp EPN = 0 */
+	rlwimi	r7,r6,0,20,31
+	mtspr	SPRN_MAS2,r7
+	tlbwe
+
+	xori	r6,r4,1
+	slwi	r6,r6,5		/* setup new context with other address space */
+	bl	1f		/* Find our address */
+1:	mflr	r9
+	rlwimi	r7,r9,0,20,31
+	addi	r7,r7,24
+	mtspr	SPRN_SRR0,r7
+	mtspr	SPRN_SRR1,r6
+	rfi
+
+/* 4. Clear out PIDs & Search info */
+	li	r6,0
+	mtspr	SPRN_PID0,r6
+#ifndef CONFIG_E200
+	mtspr	SPRN_PID1,r6
+	mtspr	SPRN_PID2,r6
+#endif
+	mtspr	SPRN_MAS6,r6
+
+/* 5. Invalidate mapping we started in */
+	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r3,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r3) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+	li	r6,0
+	mtspr	SPRN_MAS1,r6
+	tlbwe
+	/* Invalidate TLB1 */
+	li      r9,0x0c
+	tlbivax 0,r9
+#ifdef CONFIG_SMP
+	tlbsync
+#endif
+	msync
+
+/* 6. Setup KERNELBASE mapping in TLB1[0] */
+	lis	r6,0x1000		/* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
+	mtspr	SPRN_MAS0,r6
+	lis	r6,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r6,r6,(MAS1_TSIZE(BOOKE_PAGESZ_16M))@l
+	mtspr	SPRN_MAS1,r6
+	li	r7,0
+	lis	r6,KERNELBASE@h
+	ori	r6,r6,KERNELBASE@l
+	rlwimi	r6,r7,0,20,31
+	mtspr	SPRN_MAS2,r6
+	li	r7,(MAS3_SX|MAS3_SW|MAS3_SR)
+	mtspr	SPRN_MAS3,r7
+	tlbwe
+
+/* 7. Jump to KERNELBASE mapping */
+	lis	r7,MSR_KERNEL@h
+	ori	r7,r7,MSR_KERNEL@l
+	bl	1f			/* Find our address */
+1:	mflr	r9
+	rlwimi	r6,r9,0,20,31
+	addi	r6,r6,24
+	mtspr	SPRN_SRR0,r6
+	mtspr	SPRN_SRR1,r7
+	rfi				/* start execution out of TLB1[0] entry */
+
+/* 8. Clear out the temp mapping */
+	lis	r7,0x1000	/* Set MAS0(TLBSEL) = 1 */
+	rlwimi	r7,r5,16,4,15	/* Setup MAS0 = TLBSEL | ESEL(r5) */
+	mtspr	SPRN_MAS0,r7
+	tlbre
+	mtspr	SPRN_MAS1,r8
+	tlbwe
+	/* Invalidate TLB1 */
+	li      r9,0x0c
+	tlbivax 0,r9
+#ifdef CONFIG_SMP
+	tlbsync
+#endif
+	msync
+
+	/* Establish the interrupt vector offsets */
+	SET_IVOR(0,  CriticalInput);
+	SET_IVOR(1,  MachineCheck);
+	SET_IVOR(2,  DataStorage);
+	SET_IVOR(3,  InstructionStorage);
+	SET_IVOR(4,  ExternalInput);
+	SET_IVOR(5,  Alignment);
+	SET_IVOR(6,  Program);
+	SET_IVOR(7,  FloatingPointUnavailable);
+	SET_IVOR(8,  SystemCall);
+	SET_IVOR(9,  AuxillaryProcessorUnavailable);
+	SET_IVOR(10, Decrementer);
+	SET_IVOR(11, FixedIntervalTimer);
+	SET_IVOR(12, WatchdogTimer);
+	SET_IVOR(13, DataTLBError);
+	SET_IVOR(14, InstructionTLBError);
+	SET_IVOR(15, Debug);
+	SET_IVOR(32, SPEUnavailable);
+	SET_IVOR(33, SPEFloatingPointData);
+	SET_IVOR(34, SPEFloatingPointRound);
+#ifndef CONFIG_E200
+	SET_IVOR(35, PerformanceMonitor);
+#endif
+
+	/* Establish the interrupt vector base */
+	lis	r4,interrupt_base@h	/* IVPR only uses the high 16-bits */
+	mtspr	SPRN_IVPR,r4
+
+	/* Setup the defaults for TLB entries */
+	li	r2,(MAS4_TSIZED(BOOKE_PAGESZ_4K))@l
+#ifdef CONFIG_E200
+	oris	r2,r2,MAS4_TLBSELD(1)@h
+#endif
+   	mtspr	SPRN_MAS4, r2
+
+#if 0
+	/* Enable DOZE */
+	mfspr	r2,SPRN_HID0
+	oris	r2,r2,HID0_DOZE@h
+	mtspr	SPRN_HID0, r2
+#endif
+#ifdef CONFIG_E200
+	/* enable dedicated debug exception handling resources (Debug APU) */
+	mfspr	r2,SPRN_HID0
+	ori 	r2,r2,HID0_DAPUEN@l
+	mtspr	SPRN_HID0,r2
+#endif
+
+#if !defined(CONFIG_BDI_SWITCH)
+	/*
+	 * The Abatron BDI JTAG debugger does not tolerate others
+	 * mucking with the debug registers.
+	 */
+	lis	r2,DBCR0_IDM@h
+	mtspr	SPRN_DBCR0,r2
+	/* clear any residual debug events */
+	li	r2,-1
+	mtspr	SPRN_DBSR,r2
+#endif
+
+	/*
+	 * This is where the main kernel code starts.
+	 */
+
+	/* ptr to current */
+	lis	r2,init_task@h
+	ori	r2,r2,init_task@l
+
+	/* ptr to current thread */
+	addi	r4,r2,THREAD	/* init task's THREAD */
+	mtspr	SPRN_SPRG3,r4
+
+	/* stack */
+	lis	r1,init_thread_union@h
+	ori	r1,r1,init_thread_union@l
+	li	r0,0
+	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+	bl	early_init
+
+	mfspr	r3,SPRN_TLB1CFG
+	andi.	r3,r3,0xfff
+	lis	r4,num_tlbcam_entries@ha
+	stw	r3,num_tlbcam_entries@l(r4)
+/*
+ * Decide what sort of machine this is and initialize the MMU.
+ */
+	mr	r3,r31
+	mr	r4,r30
+	mr	r5,r29
+	mr	r6,r28
+	mr	r7,r27
+	bl	machine_init
+	bl	MMU_init
+
+	/* Setup PTE pointers for the Abatron bdiGDB */
+	lis	r6, swapper_pg_dir@h
+	ori	r6, r6, swapper_pg_dir@l
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	lis	r4, KERNELBASE@h
+	ori	r4, r4, KERNELBASE@l
+	stw	r5, 0(r4)	/* Save abatron_pteptrs at a fixed location */
+	stw	r6, 0(r5)
+
+	/* Let's move on */
+	lis	r4,start_kernel@h
+	ori	r4,r4,start_kernel@l
+	lis	r3,MSR_KERNEL@h
+	ori	r3,r3,MSR_KERNEL@l
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r3
+	rfi			/* change context and jump to start_kernel */
+
+/* Macros to hide the PTE size differences
+ *
+ * FIND_PTE -- walks the page tables given EA & pgdir pointer
+ *   r10 -- EA of fault
+ *   r11 -- PGDIR pointer
+ *   r12 -- free
+ *   label 2: is the bailout case
+ *
+ * if we find the pte (fall through):
+ *   r11 is low pte word
+ *   r12 is pointer to the pte
+ */
+#ifdef CONFIG_PTE_64BIT
+#define PTE_FLAGS_OFFSET	4
+#define FIND_PTE	\
+	rlwinm 	r12, r10, 13, 19, 29;	/* Compute pgdir/pmd offset */	\
+	lwzx	r11, r12, r11;		/* Get pgd/pmd entry */		\
+	rlwinm.	r12, r11, 0, 0, 20;	/* Extract pt base address */	\
+	beq	2f;			/* Bail if no table */		\
+	rlwimi	r12, r10, 23, 20, 28;	/* Compute pte address */	\
+	lwz	r11, 4(r12);		/* Get pte entry */
+#else
+#define PTE_FLAGS_OFFSET	0
+#define FIND_PTE	\
+	rlwimi	r11, r10, 12, 20, 29;	/* Create L1 (pgdir/pmd) address */	\
+	lwz	r11, 0(r11);		/* Get L1 entry */			\
+	rlwinm.	r12, r11, 0, 0, 19;	/* Extract L2 (pte) base address */	\
+	beq	2f;			/* Bail if no table */			\
+	rlwimi	r12, r10, 22, 20, 29;	/* Compute PTE address */		\
+	lwz	r11, 0(r12);		/* Get Linux PTE */
+#endif
+
+/*
+ * Interrupt vector entry code
+ *
+ * The Book E MMUs are always on so we don't need to handle
+ * interrupts in real mode as with previous PPC processors. In
+ * this case we handle interrupts in the kernel virtual address
+ * space.
+ *
+ * Interrupt vectors are dynamically placed relative to the
+ * interrupt prefix as determined by the address of interrupt_base.
+ * The interrupt vectors offsets are programmed using the labels
+ * for each interrupt vector entry.
+ *
+ * Interrupt vectors must be aligned on a 16 byte boundary.
+ * We align on a 32 byte cache line boundary for good measure.
+ */
+
+interrupt_base:
+	/* Critical Input Interrupt */
+	CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException)
+
+	/* Machine Check Interrupt */
+#ifdef CONFIG_E200
+	/* no RFMCI, MCSRRs on E200 */
+	CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException)
+#else
+	MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException)
+#endif
+
+	/* Data Storage Interrupt */
+	START_EXCEPTION(DataStorage)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+	mtspr	SPRN_SPRG4W, r12
+	mtspr	SPRN_SPRG5W, r13
+	mfcr	r11
+	mtspr	SPRN_SPRG7W, r11
+
+	/*
+	 * Check if it was a store fault, if not then bail
+	 * because a user tried to access a kernel or
+	 * read-protected page.  Otherwise, get the
+	 * offending address and handle it.
+	 */
+	mfspr	r10, SPRN_ESR
+	andis.	r10, r10, ESR_ST@h
+	beq	2f
+
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	ori	r11, r11, TASK_SIZE@l
+	cmplw	0, r10, r11
+	bge	2f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+4:
+	FIND_PTE
+
+	/* Are _PAGE_USER & _PAGE_RW set & _PAGE_HWWRITE not? */
+	andi.	r13, r11, _PAGE_RW|_PAGE_USER|_PAGE_HWWRITE
+	cmpwi	0, r13, _PAGE_RW|_PAGE_USER
+	bne	2f			/* Bail if not */
+
+	/* Update 'changed'. */
+	ori	r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
+	stw	r11, PTE_FLAGS_OFFSET(r12) /* Update Linux page table */
+
+	/* MAS2 not updated as the entry does exist in the tlb, this
+	   fault taken to detect state transition (eg: COW -> DIRTY)
+	 */
+	andi.	r11, r11, _PAGE_HWEXEC
+	rlwimi	r11, r11, 31, 27, 27	/* SX <- _PAGE_HWEXEC */
+	ori     r11, r11, (MAS3_UW|MAS3_SW|MAS3_UR|MAS3_SR)@l /* set static perms */
+
+	/* update search PID in MAS6, AS = 0 */
+	mfspr	r12, SPRN_PID0
+	slwi	r12, r12, 16
+	mtspr	SPRN_MAS6, r12
+
+	/* find the TLB index that caused the fault.  It has to be here. */
+	tlbsx	0, r10
+
+	/* only update the perm bits, assume the RPN is fine */
+	mfspr	r12, SPRN_MAS3
+	rlwimi	r12, r11, 0, 20, 31
+	mtspr	SPRN_MAS3,r12
+	tlbwe
+
+	/* Done...restore registers and get out of here.  */
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	rfi			/* Force context change */
+
+2:
+	/*
+	 * The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	data_access
+
+	/* Instruction Storage Interrupt */
+	INSTRUCTION_STORAGE_EXCEPTION
+
+	/* External Input Interrupt */
+	EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE)
+
+	/* Alignment Interrupt */
+	ALIGNMENT_EXCEPTION
+
+	/* Program Interrupt */
+	PROGRAM_EXCEPTION
+
+	/* Floating Point Unavailable Interrupt */
+#ifdef CONFIG_PPC_FPU
+	FP_UNAVAILABLE_EXCEPTION
+#else
+#ifdef CONFIG_E200
+	/* E200 treats 'normal' floating point instructions as FP Unavail exception */
+	EXCEPTION(0x0800, FloatingPointUnavailable, ProgramCheckException, EXC_XFER_EE)
+#else
+	EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE)
+#endif
+#endif
+
+	/* System Call Interrupt */
+	START_EXCEPTION(SystemCall)
+	NORMAL_EXCEPTION_PROLOG
+	EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+
+	/* Auxillary Processor Unavailable Interrupt */
+	EXCEPTION(0x2900, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE)
+
+	/* Decrementer Interrupt */
+	DECREMENTER_EXCEPTION
+
+	/* Fixed Internal Timer Interrupt */
+	/* TODO: Add FIT support */
+	EXCEPTION(0x3100, FixedIntervalTimer, UnknownException, EXC_XFER_EE)
+
+	/* Watchdog Timer Interrupt */
+#ifdef CONFIG_BOOKE_WDT
+	CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException)
+#else
+	CRITICAL_EXCEPTION(0x3200, WatchdogTimer, UnknownException)
+#endif
+
+	/* Data TLB Error Interrupt */
+	START_EXCEPTION(DataTLBError)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+	mtspr	SPRN_SPRG4W, r12
+	mtspr	SPRN_SPRG5W, r13
+	mfcr	r11
+	mtspr	SPRN_SPRG7W, r11
+	mfspr	r10, SPRN_DEAR		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	ori	r11, r11, TASK_SIZE@l
+	cmplw	5, r10, r11
+	blt	5, 3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr	r12,SPRN_MAS1		/* Set TID to 0 */
+	rlwinm	r12,r12,0,16,1
+	mtspr	SPRN_MAS1,r12
+
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+
+4:
+	FIND_PTE
+	andi.	r13, r11, _PAGE_PRESENT	/* Is the page present? */
+	beq	2f			/* Bail if not present */
+
+#ifdef CONFIG_PTE_64BIT
+	lwz	r13, 0(r12)
+#endif
+	ori	r11, r11, _PAGE_ACCESSED
+	stw	r11, PTE_FLAGS_OFFSET(r12)
+
+	 /* Jump to common tlb load */
+	b	finish_tlb_load
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	data_access
+
+	/* Instruction TLB Error Interrupt */
+	/*
+	 * Nearly the same as above, except we get our
+	 * information from different registers and bailout
+	 * to a different point.
+	 */
+	START_EXCEPTION(InstructionTLBError)
+	mtspr	SPRN_SPRG0, r10		/* Save some working registers */
+	mtspr	SPRN_SPRG1, r11
+	mtspr	SPRN_SPRG4W, r12
+	mtspr	SPRN_SPRG5W, r13
+	mfcr	r11
+	mtspr	SPRN_SPRG7W, r11
+	mfspr	r10, SPRN_SRR0		/* Get faulting address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	lis	r11, TASK_SIZE@h
+	ori	r11, r11, TASK_SIZE@l
+	cmplw	5, r10, r11
+	blt	5, 3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+
+	mfspr	r12,SPRN_MAS1		/* Set TID to 0 */
+	rlwinm	r12,r12,0,16,1
+	mtspr	SPRN_MAS1,r12
+
+	b	4f
+
+	/* Get the PGD for the current thread */
+3:
+	mfspr	r11,SPRN_SPRG3
+	lwz	r11,PGDIR(r11)
+
+4:
+	FIND_PTE
+	andi.	r13, r11, _PAGE_PRESENT	/* Is the page present? */
+	beq	2f			/* Bail if not present */
+
+#ifdef CONFIG_PTE_64BIT
+	lwz	r13, 0(r12)
+#endif
+	ori	r11, r11, _PAGE_ACCESSED
+	stw	r11, PTE_FLAGS_OFFSET(r12)
+
+	/* Jump to common TLB load point */
+	b	finish_tlb_load
+
+2:
+	/* The bailout.  Restore registers to pre-exception conditions
+	 * and call the heavyweights to help us out.
+	 */
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	b	InstructionStorage
+
+#ifdef CONFIG_SPE
+	/* SPE Unavailable */
+	START_EXCEPTION(SPEUnavailable)
+	NORMAL_EXCEPTION_PROLOG
+	bne	load_up_spe
+	addi    r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE_LITE(0x2010, KernelSPE)
+#else
+	EXCEPTION(0x2020, SPEUnavailable, UnknownException, EXC_XFER_EE)
+#endif /* CONFIG_SPE */
+
+	/* SPE Floating Point Data */
+#ifdef CONFIG_SPE
+	EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE);
+#else
+	EXCEPTION(0x2040, SPEFloatingPointData, UnknownException, EXC_XFER_EE)
+#endif /* CONFIG_SPE */
+
+	/* SPE Floating Point Round */
+	EXCEPTION(0x2050, SPEFloatingPointRound, UnknownException, EXC_XFER_EE)
+
+	/* Performance Monitor */
+	EXCEPTION(0x2060, PerformanceMonitor, PerformanceMonitorException, EXC_XFER_STD)
+
+
+	/* Debug Interrupt */
+	DEBUG_EXCEPTION
+
+/*
+ * Local functions
+ */
+
+	/*
+	 * Data TLB exceptions will bail out to this point
+	 * if they can't resolve the lightweight TLB fault.
+	 */
+data_access:
+	NORMAL_EXCEPTION_PROLOG
+	mfspr	r5,SPRN_ESR		/* Grab the ESR, save it, pass arg3 */
+	stw	r5,_ESR(r11)
+	mfspr	r4,SPRN_DEAR		/* Grab the DEAR, save it, pass arg2 */
+	andis.	r10,r5,(ESR_ILK|ESR_DLK)@h
+	bne	1f
+	EXC_XFER_EE_LITE(0x0300, handle_page_fault)
+1:
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	EXC_XFER_EE_LITE(0x0300, CacheLockingException)
+
+/*
+
+ * Both the instruction and data TLB miss get to this
+ * point to load the TLB.
+ * 	r10 - EA of fault
+ * 	r11 - TLB (info from Linux PTE)
+ * 	r12, r13 - available to use
+ * 	CR5 - results of addr < TASK_SIZE
+ *	MAS0, MAS1 - loaded with proper value when we get here
+ *	MAS2, MAS3 - will need additional info from Linux PTE
+ *	Upon exit, we reload everything and RFI.
+ */
+finish_tlb_load:
+	/*
+	 * We set execute, because we don't have the granularity to
+	 * properly set this at the page level (Linux problem).
+	 * Many of these bits are software only.  Bits we don't set
+	 * here we (properly should) assume have the appropriate value.
+	 */
+
+	mfspr	r12, SPRN_MAS2
+#ifdef CONFIG_PTE_64BIT
+	rlwimi	r12, r11, 26, 24, 31	/* extract ...WIMGE from pte */
+#else
+	rlwimi	r12, r11, 26, 27, 31	/* extract WIMGE from pte */
+#endif
+	mtspr	SPRN_MAS2, r12
+
+	bge	5, 1f
+
+	/* is user addr */
+	andi.	r12, r11, (_PAGE_USER | _PAGE_HWWRITE | _PAGE_HWEXEC)
+	andi.	r10, r11, _PAGE_USER	/* Test for _PAGE_USER */
+	srwi	r10, r12, 1
+	or	r12, r12, r10	/* Copy user perms into supervisor */
+	iseleq	r12, 0, r12
+	b	2f
+
+	/* is kernel addr */
+1:	rlwinm	r12, r11, 31, 29, 29	/* Extract _PAGE_HWWRITE into SW */
+	ori	r12, r12, (MAS3_SX | MAS3_SR)
+
+#ifdef CONFIG_PTE_64BIT
+2:	rlwimi	r12, r13, 24, 0, 7	/* grab RPN[32:39] */
+	rlwimi	r12, r11, 24, 8, 19	/* grab RPN[40:51] */
+	mtspr	SPRN_MAS3, r12
+BEGIN_FTR_SECTION
+	srwi	r10, r13, 8		/* grab RPN[8:31] */
+	mtspr	SPRN_MAS7, r10
+END_FTR_SECTION_IFSET(CPU_FTR_BIG_PHYS)
+#else
+2:	rlwimi	r11, r12, 0, 20, 31	/* Extract RPN from PTE and merge with perms */
+	mtspr	SPRN_MAS3, r11
+#endif
+#ifdef CONFIG_E200
+	/* Round robin TLB1 entries assignment */
+	mfspr	r12, SPRN_MAS0
+
+	/* Extract TLB1CFG(NENTRY) */
+	mfspr	r11, SPRN_TLB1CFG
+	andi.	r11, r11, 0xfff
+
+	/* Extract MAS0(NV) */
+	andi.	r13, r12, 0xfff
+	addi	r13, r13, 1
+	cmpw	0, r13, r11
+	addi	r12, r12, 1
+
+	/* check if we need to wrap */
+	blt	7f
+
+	/* wrap back to first free tlbcam entry */
+	lis	r13, tlbcam_index@ha
+	lwz	r13, tlbcam_index@l(r13)
+	rlwimi	r12, r13, 0, 20, 31
+7:
+	mtspr   SPRN_MAS0,r12
+#endif /* CONFIG_E200 */
+
+	tlbwe
+
+	/* Done...restore registers and get out of here.  */
+	mfspr	r11, SPRN_SPRG7R
+	mtcr	r11
+	mfspr	r13, SPRN_SPRG5R
+	mfspr	r12, SPRN_SPRG4R
+	mfspr	r11, SPRN_SPRG1
+	mfspr	r10, SPRN_SPRG0
+	rfi					/* Force context change */
+
+#ifdef CONFIG_SPE
+/* Note that the SPE support is closely modeled after the AltiVec
+ * support.  Changes to one are likely to be applicable to the
+ * other!  */
+load_up_spe:
+/*
+ * Disable SPE for the task which had SPE previously,
+ * and save its SPE registers in its thread_struct.
+ * Enables SPE for use in the kernel on return.
+ * On SMP we know the SPE units are free, since we give it up every
+ * switch.  -- Kumar
+ */
+	mfmsr	r5
+	oris	r5,r5,MSR_SPE@h
+	mtmsr	r5			/* enable use of SPE now */
+	isync
+/*
+ * For SMP, we don't do lazy SPE switching because it just gets too
+ * horrendously complex, especially when a task switches from one CPU
+ * to another.  Instead we call giveup_spe in switch_to.
+ */
+#ifndef CONFIG_SMP
+	lis	r3,last_task_used_spe@ha
+	lwz	r4,last_task_used_spe@l(r3)
+	cmpi	0,r4,0
+	beq	1f
+	addi	r4,r4,THREAD	/* want THREAD of last_task_used_spe */
+	SAVE_32EVRS(0,r10,r4)
+   	evxor	evr10, evr10, evr10	/* clear out evr10 */
+	evmwumiaa evr10, evr10, evr10	/* evr10 <- ACC = 0 * 0 + ACC */
+	li	r5,THREAD_ACC
+   	evstddx	evr10, r4, r5		/* save off accumulator */
+	lwz	r5,PT_REGS(r4)
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r10,MSR_SPE@h
+	andc	r4,r4,r10	/* disable SPE for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+	/* enable use of SPE after return */
+	oris	r9,r9,MSR_SPE@h
+	mfspr	r5,SPRN_SPRG3		/* current task's THREAD (phys) */
+	li	r4,1
+	li	r10,THREAD_ACC
+	stw	r4,THREAD_USED_SPE(r5)
+	evlddx	evr4,r10,r5
+	evmra	evr4,evr4
+	REST_32EVRS(0,r10,r5)
+#ifndef CONFIG_SMP
+	subi	r4,r5,THREAD
+	stw	r4,last_task_used_spe@l(r3)
+#endif /* CONFIG_SMP */
+	/* restore registers and return */
+2:	REST_4GPRS(3, r11)
+	lwz	r10,_CCR(r11)
+	REST_GPR(1, r11)
+	mtcr	r10
+	lwz	r10,_LINK(r11)
+	mtlr	r10
+	REST_GPR(10, r11)
+	mtspr	SPRN_SRR1,r9
+	mtspr	SPRN_SRR0,r12
+	REST_GPR(9, r11)
+	REST_GPR(12, r11)
+	lwz	r11,GPR11(r11)
+	SYNC
+	rfi
+
+/*
+ * SPE unavailable trap from kernel - print a message, but let
+ * the task use SPE in the kernel until it returns to user mode.
+ */
+KernelSPE:
+	lwz	r3,_MSR(r1)
+	oris	r3,r3,MSR_SPE@h
+	stw	r3,_MSR(r1)	/* enable use of SPE after return */
+	lis	r3,87f@h
+	ori	r3,r3,87f@l
+	mr	r4,r2		/* current */
+	lwz	r5,_NIP(r1)
+	bl	printk
+	b	ret_from_except
+87:	.string	"SPE used in kernel  (task=%p, pc=%x)  \n"
+	.align	4,0
+
+#endif /* CONFIG_SPE */
+
+/*
+ * Global functions
+ */
+
+/*
+ * extern void loadcam_entry(unsigned int index)
+ *
+ * Load TLBCAM[index] entry in to the L2 CAM MMU
+ */
+_GLOBAL(loadcam_entry)
+	lis	r4,TLBCAM@ha
+	addi	r4,r4,TLBCAM@l
+	mulli	r5,r3,20
+	add	r3,r5,r4
+	lwz	r4,0(r3)
+	mtspr	SPRN_MAS0,r4
+	lwz	r4,4(r3)
+	mtspr	SPRN_MAS1,r4
+	lwz	r4,8(r3)
+	mtspr	SPRN_MAS2,r4
+	lwz	r4,12(r3)
+	mtspr	SPRN_MAS3,r4
+	tlbwe
+	isync
+	blr
+
+/*
+ * extern void giveup_altivec(struct task_struct *prev)
+ *
+ * The e500 core does not have an AltiVec unit.
+ */
+_GLOBAL(giveup_altivec)
+	blr
+
+#ifdef CONFIG_SPE
+/*
+ * extern void giveup_spe(struct task_struct *prev)
+ *
+ */
+_GLOBAL(giveup_spe)
+	mfmsr	r5
+	oris	r5,r5,MSR_SPE@h
+	SYNC
+	mtmsr	r5			/* enable use of SPE now */
+	isync
+	cmpi	0,r3,0
+	beqlr-				/* if no previous owner, done */
+	addi	r3,r3,THREAD		/* want THREAD of task */
+	lwz	r5,PT_REGS(r3)
+	cmpi	0,r5,0
+	SAVE_32EVRS(0, r4, r3)
+   	evxor	evr6, evr6, evr6	/* clear out evr6 */
+	evmwumiaa evr6, evr6, evr6	/* evr6 <- ACC = 0 * 0 + ACC */
+	li	r4,THREAD_ACC
+   	evstddx	evr6, r4, r3		/* save off accumulator */
+	mfspr	r6,SPRN_SPEFSCR
+	stw	r6,THREAD_SPEFSCR(r3)	/* save spefscr register value */
+	beq	1f
+	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lis	r3,MSR_SPE@h
+	andc	r4,r4,r3		/* disable SPE for previous task */
+	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+	li	r5,0
+	lis	r4,last_task_used_spe@ha
+	stw	r5,last_task_used_spe@l(r4)
+#endif /* CONFIG_SMP */
+	blr
+#endif /* CONFIG_SPE */
+
+/*
+ * extern void giveup_fpu(struct task_struct *prev)
+ *
+ * Not all FSL Book-E cores have an FPU
+ */
+#ifndef CONFIG_PPC_FPU
+_GLOBAL(giveup_fpu)
+	blr
+#endif
+
+/*
+ * extern void abort(void)
+ *
+ * At present, this routine just applies a system reset.
+ */
+_GLOBAL(abort)
+	li	r13,0
+        mtspr   SPRN_DBCR0,r13		/* disable all debug events */
+	mfmsr	r13
+	ori	r13,r13,MSR_DE@l	/* Enable Debug Events */
+	mtmsr	r13
+        mfspr   r13,SPRN_DBCR0
+        lis	r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h
+        mtspr   SPRN_DBCR0,r13
+
+_GLOBAL(set_context)
+
+#ifdef CONFIG_BDI_SWITCH
+	/* Context switch the PTE pointer for the Abatron BDI2000.
+	 * The PGDIR is the second parameter.
+	 */
+	lis	r5, abatron_pteptrs@h
+	ori	r5, r5, abatron_pteptrs@l
+	stw	r4, 0x4(r5)
+#endif
+	mtspr	SPRN_PID,r3
+	isync			/* Force context change */
+	blr
+
+/*
+ * We put a few things here that have to be page-aligned. This stuff
+ * goes at the beginning of the data segment, which is page-aligned.
+ */
+	.data
+_GLOBAL(sdata)
+_GLOBAL(empty_zero_page)
+	.space	4096
+_GLOBAL(swapper_pg_dir)
+	.space	4096
+
+/* Reserved 4k for the critical exception stack & 4k for the machine
+ * check stack per CPU for kernel mode exceptions */
+	.section .bss
+        .align 12
+exception_stack_bottom:
+	.space	BOOKE_EXCEPTION_STACK_SIZE * NR_CPUS
+_GLOBAL(exception_stack_top)
+
+/*
+ * This space gets a copy of optional info passed to us by the bootstrap
+ * which is used to pass parameters into the kernel like root=/dev/sda1, etc.
+ */
+_GLOBAL(cmd_line)
+	.space	512
+
+/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+abatron_pteptrs:
+	.space	8
+
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
new file mode 100644
index 00000000000..1a2194cf682
--- /dev/null
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -0,0 +1,233 @@
+/*
+ *  This file contains the power_save function for 6xx & 7xxx CPUs
+ *  rewritten in assembler
+ *
+ *  Warning ! This code assumes that if your machine has a 750fx
+ *  it will have PLL 1 set to low speed mode (used during NAP/DOZE).
+ *  if this is not the case some additional changes will have to
+ *  be done to check a runtime var (a bit like powersave-nap)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#undef DEBUG
+
+	.text
+
+/*
+ * Init idle, called at early CPU setup time from head.S for each CPU
+ * Make sure no rest of NAP mode remains in HID0, save default
+ * values for some CPU specific registers. Called with r24
+ * containing CPU number and r3 reloc offset
+ */
+_GLOBAL(init_idle_6xx)
+BEGIN_FTR_SECTION
+	mfspr	r4,SPRN_HID0
+	rlwinm	r4,r4,0,10,8	/* Clear NAP */
+	mtspr	SPRN_HID0, r4
+	b	1f
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+	blr
+1:
+	slwi	r5,r24,2
+	add	r5,r5,r3
+BEGIN_FTR_SECTION
+	mfspr	r4,SPRN_MSSCR0
+	addis	r6,r5, nap_save_msscr0@ha
+	stw	r4,nap_save_msscr0@l(r6)
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+BEGIN_FTR_SECTION
+	mfspr	r4,SPRN_HID1
+	addis	r6,r5,nap_save_hid1@ha
+	stw	r4,nap_save_hid1@l(r6)
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+	blr
+
+/*
+ * Here is the power_save_6xx function. This could eventually be
+ * split into several functions & changing the function pointer
+ * depending on the various features.
+ */
+_GLOBAL(ppc6xx_idle)
+	/* Check if we can nap or doze, put HID0 mask in r3
+	 */
+	lis	r3, 0
+BEGIN_FTR_SECTION
+	lis	r3,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+BEGIN_FTR_SECTION
+	/* We must dynamically check for the NAP feature as it
+	 * can be cleared by CPU init after the fixups are done
+	 */
+	lis	r4,cur_cpu_spec@ha
+	lwz	r4,cur_cpu_spec@l(r4)
+	lwz	r4,CPU_SPEC_FEATURES(r4)
+	andi.	r0,r4,CPU_FTR_CAN_NAP
+	beq	1f
+	/* Now check if user or arch enabled NAP mode */
+	lis	r4,powersave_nap@ha
+	lwz	r4,powersave_nap@l(r4)
+	cmpwi	0,r4,0
+	beq	1f
+	lis	r3,HID0_NAP@h
+1:	
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+	cmpwi	0,r3,0
+	beqlr
+
+	/* Clear MSR:EE */
+	mfmsr	r7
+	rlwinm	r0,r7,0,17,15
+	mtmsr	r0
+
+	/* Check current_thread_info()->flags */
+	rlwinm	r4,r1,0,0,18
+	lwz	r4,TI_FLAGS(r4)
+	andi.	r0,r4,_TIF_NEED_RESCHED
+	beq	1f
+	mtmsr	r7	/* out of line this ? */
+	blr
+1:	
+	/* Some pre-nap cleanups needed on some CPUs */
+	andis.	r0,r3,HID0_NAP@h
+	beq	2f
+BEGIN_FTR_SECTION
+	/* Disable L2 prefetch on some 745x and try to ensure
+	 * L2 prefetch engines are idle. As explained by errata
+	 * text, we can't be sure they are, we just hope very hard
+	 * that well be enough (sic !). At least I noticed Apple
+	 * doesn't even bother doing the dcbf's here...
+	 */
+	mfspr	r4,SPRN_MSSCR0
+	rlwinm	r4,r4,0,0,29
+	sync
+	mtspr	SPRN_MSSCR0,r4
+	sync
+	isync
+	lis	r4,KERNELBASE@h
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+#ifdef DEBUG
+	lis	r6,nap_enter_count@ha
+	lwz	r4,nap_enter_count@l(r6)
+	addi	r4,r4,1
+	stw	r4,nap_enter_count@l(r6)
+#endif	
+2:
+BEGIN_FTR_SECTION
+	/* Go to low speed mode on some 750FX */
+	lis	r4,powersave_lowspeed@ha
+	lwz	r4,powersave_lowspeed@l(r4)
+	cmpwi	0,r4,0
+	beq	1f
+	mfspr	r4,SPRN_HID1
+	oris	r4,r4,0x0001
+	mtspr	SPRN_HID1,r4
+1:	
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+
+	/* Go to NAP or DOZE now */	
+	mfspr	r4,SPRN_HID0
+	lis	r5,(HID0_NAP|HID0_SLEEP)@h
+BEGIN_FTR_SECTION
+	oris	r5,r5,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+	andc	r4,r4,r5
+	or	r4,r4,r3
+BEGIN_FTR_SECTION
+	oris	r4,r4,HID0_DPM@h	/* that should be done once for all  */
+END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
+	mtspr	SPRN_HID0,r4
+BEGIN_FTR_SECTION
+	DSSALL
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+	ori	r7,r7,MSR_EE /* Could be ommited (already set) */
+	oris	r7,r7,MSR_POW@h
+	sync
+	isync
+	mtmsr	r7
+	isync
+	sync
+	blr
+	
+/*
+ * Return from NAP/DOZE mode, restore some CPU specific registers,
+ * we are called with DR/IR still off and r2 containing physical
+ * address of current.
+ */
+_GLOBAL(power_save_6xx_restore)
+	mfspr	r11,SPRN_HID0
+	rlwinm.	r11,r11,0,10,8	/* Clear NAP & copy NAP bit !state to cr1 EQ */
+	cror	4*cr1+eq,4*cr0+eq,4*cr0+eq
+BEGIN_FTR_SECTION
+	rlwinm	r11,r11,0,9,7	/* Clear DOZE */
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+	mtspr	SPRN_HID0, r11
+
+#ifdef DEBUG
+	beq	cr1,1f
+	lis	r11,(nap_return_count-KERNELBASE)@ha
+	lwz	r9,nap_return_count@l(r11)
+	addi	r9,r9,1
+	stw	r9,nap_return_count@l(r11)
+1:
+#endif
+	
+	rlwinm	r9,r1,0,0,18
+	tophys(r9,r9)
+	lwz	r11,TI_CPU(r9)
+	slwi	r11,r11,2
+	/* Todo make sure all these are in the same page
+	 * and load r22 (@ha part + CPU offset) only once
+	 */
+BEGIN_FTR_SECTION
+	beq	cr1,1f
+	addis	r9,r11,(nap_save_msscr0-KERNELBASE)@ha
+	lwz	r9,nap_save_msscr0@l(r9)
+	mtspr	SPRN_MSSCR0, r9
+	sync
+	isync
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
+BEGIN_FTR_SECTION
+	addis	r9,r11,(nap_save_hid1-KERNELBASE)@ha
+	lwz	r9,nap_save_hid1@l(r9)
+	mtspr	SPRN_HID1, r9
+END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
+	b	transfer_to_handler_cont
+
+	.data
+
+_GLOBAL(nap_save_msscr0)
+	.space	4*NR_CPUS
+
+_GLOBAL(nap_save_hid1)
+	.space	4*NR_CPUS
+
+_GLOBAL(powersave_nap)
+	.long	0
+_GLOBAL(powersave_lowspeed)
+	.long	0
+
+#ifdef DEBUG
+_GLOBAL(nap_enter_count)
+	.space	4
+_GLOBAL(nap_return_count)
+	.space	4
+#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
new file mode 100644
index 00000000000..f5a9d2a84fa
--- /dev/null
+++ b/arch/powerpc/kernel/process.c
@@ -0,0 +1,724 @@
+/*
+ *  arch/ppc/kernel/process.c
+ *
+ *  Derived from "arch/i386/kernel/process.c"
+ *    Copyright (C) 1995  Linus Torvalds
+ *
+ *  Updated and modified by Cort Dougan (cort@cs.nmt.edu) and
+ *  Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/init.h>
+#include <linux/prctl.h>
+#include <linux/init_task.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/mqueue.h>
+#include <linux/hardirq.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/prom.h>
+
+extern unsigned long _get_SP(void);
+
+#ifndef CONFIG_SMP
+struct task_struct *last_task_used_math = NULL;
+struct task_struct *last_task_used_altivec = NULL;
+struct task_struct *last_task_used_spe = NULL;
+#endif
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+EXPORT_SYMBOL(init_mm);
+
+/* this is 8kB-aligned so we can get to the thread_info struct
+   at the base of it from the stack pointer with 1 integer instruction. */
+union thread_union init_thread_union
+	__attribute__((__section__(".data.init_task"))) =
+{ INIT_THREAD_INFO(init_task) };
+
+/* initial task structure */
+struct task_struct init_task = INIT_TASK(init_task);
+EXPORT_SYMBOL(init_task);
+
+/* only used to get secondary processor up */
+struct task_struct *current_set[NR_CPUS] = {&init_task, };
+
+/*
+ * Make sure the floating-point register state in the
+ * the thread_struct is up to date for task tsk.
+ */
+void flush_fp_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		/*
+		 * We need to disable preemption here because if we didn't,
+		 * another process could get scheduled after the regs->msr
+		 * test but before we have finished saving the FP registers
+		 * to the thread_struct.  That process could take over the
+		 * FPU, and then when we get scheduled again we would store
+		 * bogus values for the remaining FP registers.
+		 */
+		preempt_disable();
+		if (tsk->thread.regs->msr & MSR_FP) {
+#ifdef CONFIG_SMP
+			/*
+			 * This should only ever be called for current or
+			 * for a stopped child process.  Since we save away
+			 * the FP register state on context switch on SMP,
+			 * there is something wrong if a stopped child appears
+			 * to still have its FP state in the CPU registers.
+			 */
+			BUG_ON(tsk != current);
+#endif
+			giveup_fpu(current);
+		}
+		preempt_enable();
+	}
+}
+
+void enable_kernel_fp(void)
+{
+	WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+	if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
+		giveup_fpu(current);
+	else
+		giveup_fpu(NULL);	/* just enables FP for kernel */
+#else
+	giveup_fpu(last_task_used_math);
+#endif /* CONFIG_SMP */
+}
+EXPORT_SYMBOL(enable_kernel_fp);
+
+int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
+{
+	if (!tsk->thread.regs)
+		return 0;
+	flush_fp_to_thread(current);
+
+	memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs));
+
+	return 1;
+}
+
+#ifdef CONFIG_ALTIVEC
+void enable_kernel_altivec(void)
+{
+	WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
+		giveup_altivec(current);
+	else
+		giveup_altivec(NULL);	/* just enable AltiVec for kernel - force */
+#else
+	giveup_altivec(last_task_used_altivec);
+#endif /* CONFIG_SMP */
+}
+EXPORT_SYMBOL(enable_kernel_altivec);
+
+/*
+ * Make sure the VMX/Altivec register state in the
+ * the thread_struct is up to date for task tsk.
+ */
+void flush_altivec_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		preempt_disable();
+		if (tsk->thread.regs->msr & MSR_VEC) {
+#ifdef CONFIG_SMP
+			BUG_ON(tsk != current);
+#endif
+			giveup_altivec(current);
+		}
+		preempt_enable();
+	}
+}
+
+int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs)
+{
+	flush_altivec_to_thread(current);
+	memcpy(vrregs, &current->thread.vr[0], sizeof(*vrregs));
+	return 1;
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_SPE
+
+void enable_kernel_spe(void)
+{
+	WARN_ON(preemptible());
+
+#ifdef CONFIG_SMP
+	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
+		giveup_spe(current);
+	else
+		giveup_spe(NULL);	/* just enable SPE for kernel - force */
+#else
+	giveup_spe(last_task_used_spe);
+#endif /* __SMP __ */
+}
+EXPORT_SYMBOL(enable_kernel_spe);
+
+void flush_spe_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		preempt_disable();
+		if (tsk->thread.regs->msr & MSR_SPE) {
+#ifdef CONFIG_SMP
+			BUG_ON(tsk != current);
+#endif
+			giveup_spe(current);
+		}
+		preempt_enable();
+	}
+}
+
+int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs)
+{
+	flush_spe_to_thread(current);
+	/* We copy u32 evr[32] + u64 acc + u32 spefscr -> 35 */
+	memcpy(evrregs, &current->thread.evr[0], sizeof(u32) * 35);
+	return 1;
+}
+#endif /* CONFIG_SPE */
+
+static void set_dabr_spr(unsigned long val)
+{
+	mtspr(SPRN_DABR, val);
+}
+
+int set_dabr(unsigned long dabr)
+{
+	int ret = 0;
+
+#ifdef CONFIG_PPC64
+	if (firmware_has_feature(FW_FEATURE_XDABR)) {
+		/* We want to catch accesses from kernel and userspace */
+		unsigned long flags = H_DABRX_KERNEL|H_DABRX_USER;
+		ret = plpar_set_xdabr(dabr, flags);
+	} else if (firmware_has_feature(FW_FEATURE_DABR)) {
+		ret = plpar_set_dabr(dabr);
+	} else
+#endif
+		set_dabr_spr(dabr);
+
+	return ret;
+}
+
+static DEFINE_PER_CPU(unsigned long, current_dabr);
+
+struct task_struct *__switch_to(struct task_struct *prev,
+	struct task_struct *new)
+{
+	struct thread_struct *new_thread, *old_thread;
+	unsigned long flags;
+	struct task_struct *last;
+
+#ifdef CONFIG_SMP
+	/* avoid complexity of lazy save/restore of fpu
+	 * by just saving it every time we switch out if
+	 * this task used the fpu during the last quantum.
+	 *
+	 * If it tries to use the fpu again, it'll trap and
+	 * reload its fp regs.  So we don't have to do a restore
+	 * every switch, just a save.
+	 *  -- Cort
+	 */
+	if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
+		giveup_fpu(prev);
+#ifdef CONFIG_ALTIVEC
+	/*
+	 * If the previous thread used altivec in the last quantum
+	 * (thus changing altivec regs) then save them.
+	 * We used to check the VRSAVE register but not all apps
+	 * set it, so we don't rely on it now (and in fact we need
+	 * to save & restore VSCR even if VRSAVE == 0).  -- paulus
+	 *
+	 * On SMP we always save/restore altivec regs just to avoid the
+	 * complexity of changing processors.
+	 *  -- Cort
+	 */
+	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
+		giveup_altivec(prev);
+	/* Avoid the trap.  On smp this this never happens since
+	 * we don't set last_task_used_altivec -- Cort
+	 */
+	if (new->thread.regs && last_task_used_altivec == new)
+		new->thread.regs->msr |= MSR_VEC;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+	/*
+	 * If the previous thread used spe in the last quantum
+	 * (thus changing spe regs) then save them.
+	 *
+	 * On SMP we always save/restore spe regs just to avoid the
+	 * complexity of changing processors.
+	 */
+	if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE)))
+		giveup_spe(prev);
+	/* Avoid the trap.  On smp this this never happens since
+	 * we don't set last_task_used_spe
+	 */
+	if (new->thread.regs && last_task_used_spe == new)
+		new->thread.regs->msr |= MSR_SPE;
+#endif /* CONFIG_SPE */
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC64	/* for now */
+	if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) {
+		set_dabr(new->thread.dabr);
+		__get_cpu_var(current_dabr) = new->thread.dabr;
+	}
+#endif
+
+	new_thread = &new->thread;
+	old_thread = &current->thread;
+	local_irq_save(flags);
+	last = _switch(old_thread, new_thread);
+
+	local_irq_restore(flags);
+
+	return last;
+}
+
+void show_regs(struct pt_regs * regs)
+{
+	int i, trap;
+
+	printk("NIP: %08lX LR: %08lX SP: %08lX REGS: %p TRAP: %04lx    %s\n",
+	       regs->nip, regs->link, regs->gpr[1], regs, regs->trap,
+	       print_tainted());
+	printk("MSR: %08lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n",
+	       regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0,
+	       regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0,
+	       regs->msr&MSR_IR ? 1 : 0,
+	       regs->msr&MSR_DR ? 1 : 0);
+	trap = TRAP(regs);
+	if (trap == 0x300 || trap == 0x600)
+		printk("DAR: %08lX, DSISR: %08lX\n", regs->dar, regs->dsisr);
+	printk("TASK = %p[%d] '%s' THREAD: %p\n",
+	       current, current->pid, current->comm, current->thread_info);
+	printk("Last syscall: %ld ", current->thread.last_syscall);
+
+#ifdef CONFIG_SMP
+	printk(" CPU: %d", smp_processor_id());
+#endif /* CONFIG_SMP */
+
+	for (i = 0;  i < 32;  i++) {
+		long r;
+		if ((i % 8) == 0)
+			printk("\n" KERN_INFO "GPR%02d: ", i);
+		if (__get_user(r, &regs->gpr[i]))
+			break;
+		printk("%08lX ", r);
+		if (i == 12 && !FULL_REGS(regs))
+			break;
+	}
+	printk("\n");
+#ifdef CONFIG_KALLSYMS
+	/*
+	 * Lookup NIP late so we have the best change of getting the
+	 * above info out without failing
+	 */
+	printk("NIP [%08lx] ", regs->nip);
+	print_symbol("%s\n", regs->nip);
+	printk("LR [%08lx] ", regs->link);
+	print_symbol("%s\n", regs->link);
+#endif
+	show_stack(current, (unsigned long *) regs->gpr[1]);
+}
+
+void exit_thread(void)
+{
+#ifndef CONFIG_SMP
+	if (last_task_used_math == current)
+		last_task_used_math = NULL;
+#ifdef CONFIG_ALTIVEC
+	if (last_task_used_altivec == current)
+		last_task_used_altivec = NULL;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+	if (last_task_used_spe == current)
+		last_task_used_spe = NULL;
+#endif
+#endif /* CONFIG_SMP */
+}
+
+void flush_thread(void)
+{
+#ifndef CONFIG_SMP
+	if (last_task_used_math == current)
+		last_task_used_math = NULL;
+#ifdef CONFIG_ALTIVEC
+	if (last_task_used_altivec == current)
+		last_task_used_altivec = NULL;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+	if (last_task_used_spe == current)
+		last_task_used_spe = NULL;
+#endif
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_PPC64	/* for now */
+	if (current->thread.dabr) {
+		current->thread.dabr = 0;
+		set_dabr(0);
+	}
+#endif
+}
+
+void
+release_thread(struct task_struct *t)
+{
+}
+
+/*
+ * This gets called before we allocate a new thread and copy
+ * the current task into it.
+ */
+void prepare_to_copy(struct task_struct *tsk)
+{
+	flush_fp_to_thread(current);
+	flush_altivec_to_thread(current);
+	flush_spe_to_thread(current);
+}
+
+/*
+ * Copy a thread..
+ */
+int
+copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+	    unsigned long unused,
+	    struct task_struct *p, struct pt_regs *regs)
+{
+	struct pt_regs *childregs, *kregs;
+	extern void ret_from_fork(void);
+	unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE;
+	unsigned long childframe;
+
+	CHECK_FULL_REGS(regs);
+	/* Copy registers */
+	sp -= sizeof(struct pt_regs);
+	childregs = (struct pt_regs *) sp;
+	*childregs = *regs;
+	if ((childregs->msr & MSR_PR) == 0) {
+		/* for kernel thread, set `current' and stackptr in new task */
+		childregs->gpr[1] = sp + sizeof(struct pt_regs);
+		childregs->gpr[2] = (unsigned long) p;
+		p->thread.regs = NULL;	/* no user register state */
+	} else {
+		childregs->gpr[1] = usp;
+		p->thread.regs = childregs;
+		if (clone_flags & CLONE_SETTLS)
+			childregs->gpr[2] = childregs->gpr[6];
+	}
+	childregs->gpr[3] = 0;  /* Result from fork() */
+	sp -= STACK_FRAME_OVERHEAD;
+	childframe = sp;
+
+	/*
+	 * The way this works is that at some point in the future
+	 * some task will call _switch to switch to the new task.
+	 * That will pop off the stack frame created below and start
+	 * the new task running at ret_from_fork.  The new task will
+	 * do some house keeping and then return from the fork or clone
+	 * system call, using the stack frame created above.
+	 */
+	sp -= sizeof(struct pt_regs);
+	kregs = (struct pt_regs *) sp;
+	sp -= STACK_FRAME_OVERHEAD;
+	p->thread.ksp = sp;
+	kregs->nip = (unsigned long)ret_from_fork;
+
+	p->thread.last_syscall = -1;
+
+	return 0;
+}
+
+/*
+ * Set up a thread for executing a new program
+ */
+void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp)
+{
+	set_fs(USER_DS);
+	memset(regs->gpr, 0, sizeof(regs->gpr));
+	regs->ctr = 0;
+	regs->link = 0;
+	regs->xer = 0;
+	regs->ccr = 0;
+	regs->mq = 0;
+	regs->nip = nip;
+	regs->gpr[1] = sp;
+	regs->msr = MSR_USER;
+#ifndef CONFIG_SMP
+	if (last_task_used_math == current)
+		last_task_used_math = NULL;
+#ifdef CONFIG_ALTIVEC
+	if (last_task_used_altivec == current)
+		last_task_used_altivec = NULL;
+#endif
+#ifdef CONFIG_SPE
+	if (last_task_used_spe == current)
+		last_task_used_spe = NULL;
+#endif
+#endif /* CONFIG_SMP */
+	memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
+	current->thread.fpscr = 0;
+#ifdef CONFIG_ALTIVEC
+	memset(current->thread.vr, 0, sizeof(current->thread.vr));
+	memset(&current->thread.vscr, 0, sizeof(current->thread.vscr));
+	current->thread.vrsave = 0;
+	current->thread.used_vr = 0;
+#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_SPE
+	memset(current->thread.evr, 0, sizeof(current->thread.evr));
+	current->thread.acc = 0;
+	current->thread.spefscr = 0;
+	current->thread.used_spe = 0;
+#endif /* CONFIG_SPE */
+}
+
+#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
+		| PR_FP_EXC_RES | PR_FP_EXC_INV)
+
+int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
+{
+	struct pt_regs *regs = tsk->thread.regs;
+
+	/* This is a bit hairy.  If we are an SPE enabled  processor
+	 * (have embedded fp) we store the IEEE exception enable flags in
+	 * fpexc_mode.  fpexc_mode is also used for setting FP exception
+	 * mode (asyn, precise, disabled) for 'Classic' FP. */
+	if (val & PR_FP_EXC_SW_ENABLE) {
+#ifdef CONFIG_SPE
+		tsk->thread.fpexc_mode = val &
+			(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+#else
+		return -EINVAL;
+#endif
+	} else {
+		/* on a CONFIG_SPE this does not hurt us.  The bits that
+		 * __pack_fe01 use do not overlap with bits used for
+		 * PR_FP_EXC_SW_ENABLE.  Additionally, the MSR[FE0,FE1] bits
+		 * on CONFIG_SPE implementations are reserved so writing to
+		 * them does not change anything */
+		if (val > PR_FP_EXC_PRECISE)
+			return -EINVAL;
+		tsk->thread.fpexc_mode = __pack_fe01(val);
+		if (regs != NULL && (regs->msr & MSR_FP) != 0)
+			regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
+				| tsk->thread.fpexc_mode;
+	}
+	return 0;
+}
+
+int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
+{
+	unsigned int val;
+
+	if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
+#ifdef CONFIG_SPE
+		val = tsk->thread.fpexc_mode;
+#else
+		return -EINVAL;
+#endif
+	else
+		val = __unpack_fe01(tsk->thread.fpexc_mode);
+	return put_user(val, (unsigned int __user *) adr);
+}
+
+int sys_clone(unsigned long clone_flags, unsigned long usp,
+	      int __user *parent_tidp, void __user *child_threadptr,
+	      int __user *child_tidp, int p6,
+	      struct pt_regs *regs)
+{
+	CHECK_FULL_REGS(regs);
+	if (usp == 0)
+		usp = regs->gpr[1];	/* stack pointer for child */
+ 	return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp);
+}
+
+int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3,
+	     unsigned long p4, unsigned long p5, unsigned long p6,
+	     struct pt_regs *regs)
+{
+	CHECK_FULL_REGS(regs);
+	return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
+}
+
+int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
+	      unsigned long p4, unsigned long p5, unsigned long p6,
+	      struct pt_regs *regs)
+{
+	CHECK_FULL_REGS(regs);
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1],
+			regs, 0, NULL, NULL);
+}
+
+int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
+	       unsigned long a3, unsigned long a4, unsigned long a5,
+	       struct pt_regs *regs)
+{
+	int error;
+	char * filename;
+
+	filename = getname((char __user *) a0);
+	error = PTR_ERR(filename);
+	if (IS_ERR(filename))
+		goto out;
+	flush_fp_to_thread(current);
+	flush_altivec_to_thread(current);
+	flush_spe_to_thread(current);
+	if (error == 0) {
+		task_lock(current);
+		current->ptrace &= ~PT_DTRACE;
+		task_unlock(current);
+	}
+	putname(filename);
+out:
+	return error;
+}
+
+static int validate_sp(unsigned long sp, struct task_struct *p,
+		       unsigned long nbytes)
+{
+	unsigned long stack_page = (unsigned long)p->thread_info;
+
+	if (sp >= stack_page + sizeof(struct thread_struct)
+	    && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+
+#ifdef CONFIG_IRQSTACKS
+	stack_page = (unsigned long) hardirq_ctx[task_cpu(p)];
+	if (sp >= stack_page + sizeof(struct thread_struct)
+	    && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+
+	stack_page = (unsigned long) softirq_ctx[task_cpu(p)];
+	if (sp >= stack_page + sizeof(struct thread_struct)
+	    && sp <= stack_page + THREAD_SIZE - nbytes)
+		return 1;
+#endif
+
+	return 0;
+}
+
+void dump_stack(void)
+{
+	show_stack(current, NULL);
+}
+
+EXPORT_SYMBOL(dump_stack);
+
+void show_stack(struct task_struct *tsk, unsigned long *stack)
+{
+	unsigned long sp, stack_top, prev_sp, ret;
+	int count = 0;
+	unsigned long next_exc = 0;
+	struct pt_regs *regs;
+	extern char ret_from_except, ret_from_except_full, ret_from_syscall;
+
+	sp = (unsigned long) stack;
+	if (tsk == NULL)
+		tsk = current;
+	if (sp == 0) {
+		if (tsk == current)
+			asm("mr %0,1" : "=r" (sp));
+		else
+			sp = tsk->thread.ksp;
+	}
+
+	prev_sp = (unsigned long) (tsk->thread_info + 1);
+	stack_top = (unsigned long) tsk->thread_info + THREAD_SIZE;
+	while (count < 16 && sp > prev_sp && sp < stack_top && (sp & 3) == 0) {
+		if (count == 0) {
+			printk("Call trace:");
+#ifdef CONFIG_KALLSYMS
+			printk("\n");
+#endif
+		} else {
+			if (next_exc) {
+				ret = next_exc;
+				next_exc = 0;
+			} else
+				ret = *(unsigned long *)(sp + 4);
+			printk(" [%08lx] ", ret);
+#ifdef CONFIG_KALLSYMS
+			print_symbol("%s", ret);
+			printk("\n");
+#endif
+			if (ret == (unsigned long) &ret_from_except
+			    || ret == (unsigned long) &ret_from_except_full
+			    || ret == (unsigned long) &ret_from_syscall) {
+				/* sp + 16 points to an exception frame */
+				regs = (struct pt_regs *) (sp + 16);
+				if (sp + 16 + sizeof(*regs) <= stack_top)
+					next_exc = regs->nip;
+			}
+		}
+		++count;
+		sp = *(unsigned long *)sp;
+	}
+#ifndef CONFIG_KALLSYMS
+	if (count > 0)
+		printk("\n");
+#endif
+}
+
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long ip, sp;
+	int count = 0;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	sp = p->thread.ksp;
+	if (!validate_sp(sp, p, 16))
+		return 0;
+
+	do {
+		sp = *(unsigned long *)sp;
+		if (!validate_sp(sp, p, 16))
+			return 0;
+		if (count > 0) {
+			ip = *(unsigned long *)(sp + 4);
+			if (!in_sched_functions(ip))
+				return ip;
+		}
+	} while (count++ < 16);
+	return 0;
+}
+EXPORT_SYMBOL(get_wchan);
diff --git a/arch/powerpc/kernel/semaphore.c b/arch/powerpc/kernel/semaphore.c
new file mode 100644
index 00000000000..2f8c3c95139
--- /dev/null
+++ b/arch/powerpc/kernel/semaphore.c
@@ -0,0 +1,135 @@
+/*
+ * PowerPC-specific semaphore code.
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
+ * to eliminate the SMP races in the old version between the updates
+ * of `count' and `waking'.  Now we use negative `count' values to
+ * indicate that some process(es) are waiting for the semaphore.
+ */
+
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+#include <asm/errno.h>
+
+/*
+ * Atomically update sem->count.
+ * This does the equivalent of the following:
+ *
+ *	old_count = sem->count;
+ *	tmp = MAX(old_count, 0) + incr;
+ *	sem->count = tmp;
+ *	return old_count;
+ */
+static inline int __sem_update_count(struct semaphore *sem, int incr)
+{
+	int old_count, tmp;
+
+	__asm__ __volatile__("\n"
+"1:	lwarx	%0,0,%3\n"
+"	srawi	%1,%0,31\n"
+"	andc	%1,%0,%1\n"
+"	add	%1,%1,%4\n"
+	PPC405_ERR77(0,%3)
+"	stwcx.	%1,0,%3\n"
+"	bne	1b"
+	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
+	: "r" (&sem->count), "r" (incr), "m" (sem->count)
+	: "cc");
+
+	return old_count;
+}
+
+void __up(struct semaphore *sem)
+{
+	/*
+	 * Note that we incremented count in up() before we came here,
+	 * but that was ineffective since the result was <= 0, and
+	 * any negative value of count is equivalent to 0.
+	 * This ends up setting count to 1, unless count is now > 0
+	 * (i.e. because some other cpu has called up() in the meantime),
+	 * in which case we just increment count.
+	 */
+	__sem_update_count(sem, 1);
+	wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__up);
+
+/*
+ * Note that when we come in to __down or __down_interruptible,
+ * we have already decremented count, but that decrement was
+ * ineffective since the result was < 0, and any negative value
+ * of count is equivalent to 0.
+ * Thus it is only when we decrement count from some value > 0
+ * that we have actually got the semaphore.
+ */
+void __sched __down(struct semaphore *sem)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	add_wait_queue_exclusive(&sem->wait, &wait);
+
+	/*
+	 * Try to get the semaphore.  If the count is > 0, then we've
+	 * got the semaphore; we decrement count and exit the loop.
+	 * If the count is 0 or negative, we set it to -1, indicating
+	 * that we are asleep, and then sleep.
+	 */
+	while (__sem_update_count(sem, -1) <= 0) {
+		schedule();
+		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	}
+	remove_wait_queue(&sem->wait, &wait);
+	__set_task_state(tsk, TASK_RUNNING);
+
+	/*
+	 * If there are any more sleepers, wake one of them up so
+	 * that it can either get the semaphore, or set count to -1
+	 * indicating that there are still processes sleeping.
+	 */
+	wake_up(&sem->wait);
+}
+EXPORT_SYMBOL(__down);
+
+int __sched __down_interruptible(struct semaphore * sem)
+{
+	int retval = 0;
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	__set_task_state(tsk, TASK_INTERRUPTIBLE);
+	add_wait_queue_exclusive(&sem->wait, &wait);
+
+	while (__sem_update_count(sem, -1) <= 0) {
+		if (signal_pending(current)) {
+			/*
+			 * A signal is pending - give up trying.
+			 * Set sem->count to 0 if it is negative,
+			 * since we are no longer sleeping.
+			 */
+			__sem_update_count(sem, 0);
+			retval = -EINTR;
+			break;
+		}
+		schedule();
+		set_task_state(tsk, TASK_INTERRUPTIBLE);
+	}
+	remove_wait_queue(&sem->wait, &wait);
+	__set_task_state(tsk, TASK_RUNNING);
+
+	wake_up(&sem->wait);
+	return retval;
+}
+EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
new file mode 100644
index 00000000000..c7afbbba0f3
--- /dev/null
+++ b/arch/powerpc/kernel/traps.c
@@ -0,0 +1,1047 @@
+/*
+ *  arch/powerpc/kernel/traps.c
+ *
+ *  Copyright (C) 1995-1996  Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ *  Modified by Cort Dougan (cort@cs.nmt.edu)
+ *  and Paul Mackerras (paulus@samba.org)
+ */
+
+/*
+ * This file handles the architecture-dependent parts of hardware exceptions
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/prctl.h>
+#include <linux/delay.h>
+#include <linux/kprobes.h>
+#include <asm/kdebug.h>
+
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/reg.h>
+#include <asm/xmon.h>
+#ifdef CONFIG_PMAC_BACKLIGHT
+#include <asm/backlight.h>
+#endif
+#include <asm/perfmon.h>
+
+#ifdef CONFIG_DEBUGGER
+int (*__debugger)(struct pt_regs *regs);
+int (*__debugger_ipi)(struct pt_regs *regs);
+int (*__debugger_bpt)(struct pt_regs *regs);
+int (*__debugger_sstep)(struct pt_regs *regs);
+int (*__debugger_iabr_match)(struct pt_regs *regs);
+int (*__debugger_dabr_match)(struct pt_regs *regs);
+int (*__debugger_fault_handler)(struct pt_regs *regs);
+
+EXPORT_SYMBOL(__debugger);
+EXPORT_SYMBOL(__debugger_ipi);
+EXPORT_SYMBOL(__debugger_bpt);
+EXPORT_SYMBOL(__debugger_sstep);
+EXPORT_SYMBOL(__debugger_iabr_match);
+EXPORT_SYMBOL(__debugger_dabr_match);
+EXPORT_SYMBOL(__debugger_fault_handler);
+#endif
+
+struct notifier_block *powerpc_die_chain;
+static DEFINE_SPINLOCK(die_notifier_lock);
+
+int register_die_notifier(struct notifier_block *nb)
+{
+	int err = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&die_notifier_lock, flags);
+	err = notifier_chain_register(&powerpc_die_chain, nb);
+	spin_unlock_irqrestore(&die_notifier_lock, flags);
+	return err;
+}
+
+/*
+ * Trap & Exception support
+ */
+
+static DEFINE_SPINLOCK(die_lock);
+
+int die(const char *str, struct pt_regs *regs, long err)
+{
+	static int die_counter;
+	int nl = 0;
+
+	if (debugger(regs))
+		return 1;
+
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+#ifdef CONFIG_PMAC_BACKLIGHT
+	if (_machine == _MACH_Pmac) {
+		set_backlight_enable(1);
+		set_backlight_level(BACKLIGHT_MAX);
+	}
+#endif
+	printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
+#ifdef CONFIG_PREEMPT
+	printk("PREEMPT ");
+	nl = 1;
+#endif
+#ifdef CONFIG_SMP
+	printk("SMP NR_CPUS=%d ", NR_CPUS);
+	nl = 1;
+#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	printk("DEBUG_PAGEALLOC ");
+	nl = 1;
+#endif
+#ifdef CONFIG_NUMA
+	printk("NUMA ");
+	nl = 1;
+#endif
+#ifdef CONFIG_PPC64
+	switch (systemcfg->platform) {
+	case PLATFORM_PSERIES:
+		printk("PSERIES ");
+		nl = 1;
+		break;
+	case PLATFORM_PSERIES_LPAR:
+		printk("PSERIES LPAR ");
+		nl = 1;
+		break;
+	case PLATFORM_ISERIES_LPAR:
+		printk("ISERIES LPAR ");
+		nl = 1;
+		break;
+	case PLATFORM_POWERMAC:
+		printk("POWERMAC ");
+		nl = 1;
+		break;
+	case PLATFORM_BPA:
+		printk("BPA ");
+		nl = 1;
+		break;
+	}
+#endif
+	if (nl)
+		printk("\n");
+	print_modules();
+	show_regs(regs);
+	bust_spinlocks(0);
+	spin_unlock_irq(&die_lock);
+
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+
+	if (panic_on_oops) {
+		panic("Fatal exception");
+	}
+	do_exit(err);
+
+	return 0;
+}
+
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+	siginfo_t info;
+
+	if (!user_mode(regs)) {
+		if (die("Exception in kernel mode", regs, signr))
+			return;
+	}
+
+	memset(&info, 0, sizeof(info));
+	info.si_signo = signr;
+	info.si_code = code;
+	info.si_addr = (void __user *) addr;
+	force_sig_info(signr, &info, current);
+
+	/*
+	 * Init gets no signals that it doesn't have a handler for.
+	 * That's all very well, but if it has caused a synchronous
+	 * exception and we ignore the resulting signal, it will just
+	 * generate the same exception over and over again and we get
+	 * nowhere.  Better to kill it and let the kernel panic.
+	 */
+	if (current->pid == 1) {
+		__sighandler_t handler;
+
+		spin_lock_irq(&current->sighand->siglock);
+		handler = current->sighand->action[signr-1].sa.sa_handler;
+		spin_unlock_irq(&current->sighand->siglock);
+		if (handler == SIG_DFL) {
+			/* init has generated a synchronous exception
+			   and it doesn't have a handler for the signal */
+			printk(KERN_CRIT "init has generated signal %d "
+			       "but has no handler for it\n", signr);
+			do_exit(signr);
+		}
+	}
+}
+
+#ifdef CONFIG_PPC64
+void system_reset_exception(struct pt_regs *regs)
+{
+	/* See if any machine dependent calls */
+	if (ppc_md.system_reset_exception)
+		ppc_md.system_reset_exception(regs);
+
+	die("System Reset", regs, SIGABRT);
+
+	/* Must die if the interrupt is not recoverable */
+	if (!(regs->msr & MSR_RI))
+		panic("Unrecoverable System Reset");
+
+	/* What should we do here? We could issue a shutdown or hard reset. */
+}
+#endif
+
+/*
+ * I/O accesses can cause machine checks on powermacs.
+ * Check if the NIP corresponds to the address of a sync
+ * instruction for which there is an entry in the exception
+ * table.
+ * Note that the 601 only takes a machine check on TEA
+ * (transfer error ack) signal assertion, and does not
+ * set any of the top 16 bits of SRR1.
+ *  -- paulus.
+ */
+static inline int check_io_access(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_PMAC
+	unsigned long msr = regs->msr;
+	const struct exception_table_entry *entry;
+	unsigned int *nip = (unsigned int *)regs->nip;
+
+	if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000)))
+	    && (entry = search_exception_tables(regs->nip)) != NULL) {
+		/*
+		 * Check that it's a sync instruction, or somewhere
+		 * in the twi; isync; nop sequence that inb/inw/inl uses.
+		 * As the address is in the exception table
+		 * we should be able to read the instr there.
+		 * For the debug message, we look at the preceding
+		 * load or store.
+		 */
+		if (*nip == 0x60000000)		/* nop */
+			nip -= 2;
+		else if (*nip == 0x4c00012c)	/* isync */
+			--nip;
+		if (*nip == 0x7c0004ac || (*nip >> 26) == 3) {
+			/* sync or twi */
+			unsigned int rb;
+
+			--nip;
+			rb = (*nip >> 11) & 0x1f;
+			printk(KERN_DEBUG "%s bad port %lx at %p\n",
+			       (*nip & 0x100)? "OUT to": "IN from",
+			       regs->gpr[rb] - _IO_BASE, nip);
+			regs->msr |= MSR_RI;
+			regs->nip = entry->fixup;
+			return 1;
+		}
+	}
+#endif /* CONFIG_PPC_PMAC */
+	return 0;
+}
+
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+/* On 4xx, the reason for the machine check or program exception
+   is in the ESR. */
+#define get_reason(regs)	((regs)->dsisr)
+#ifndef CONFIG_FSL_BOOKE
+#define get_mc_reason(regs)	((regs)->dsisr)
+#else
+#define get_mc_reason(regs)	(mfspr(SPRN_MCSR))
+#endif
+#define REASON_FP		ESR_FP
+#define REASON_ILLEGAL		(ESR_PIL | ESR_PUO)
+#define REASON_PRIVILEGED	ESR_PPR
+#define REASON_TRAP		ESR_PTR
+
+/* single-step stuff */
+#define single_stepping(regs)	(current->thread.dbcr0 & DBCR0_IC)
+#define clear_single_step(regs)	(current->thread.dbcr0 &= ~DBCR0_IC)
+
+#else
+/* On non-4xx, the reason for the machine check or program
+   exception is in the MSR. */
+#define get_reason(regs)	((regs)->msr)
+#define get_mc_reason(regs)	((regs)->msr)
+#define REASON_FP		0x100000
+#define REASON_ILLEGAL		0x80000
+#define REASON_PRIVILEGED	0x40000
+#define REASON_TRAP		0x20000
+
+#define single_stepping(regs)	((regs)->msr & MSR_SE)
+#define clear_single_step(regs)	((regs)->msr &= ~MSR_SE)
+#endif
+
+/*
+ * This is "fall-back" implementation for configurations
+ * which don't provide platform-specific machine check info
+ */
+void __attribute__ ((weak))
+platform_machine_check(struct pt_regs *regs)
+{
+}
+
+void MachineCheckException(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC64
+	int recover = 0;
+
+	/* See if any machine dependent calls */
+	if (ppc_md.machine_check_exception)
+		recover = ppc_md.machine_check_exception(regs);
+
+	if (recover)
+		return;
+#else
+	unsigned long reason = get_mc_reason(regs);
+
+	if (user_mode(regs)) {
+		regs->msr |= MSR_RI;
+		_exception(SIGBUS, regs, BUS_ADRERR, regs->nip);
+		return;
+	}
+
+#if defined(CONFIG_8xx) && defined(CONFIG_PCI)
+	/* the qspan pci read routines can cause machine checks -- Cort */
+	bad_page_fault(regs, regs->dar, SIGBUS);
+	return;
+#endif
+
+	if (debugger_fault_handler(regs)) {
+		regs->msr |= MSR_RI;
+		return;
+	}
+
+	if (check_io_access(regs))
+		return;
+
+#if defined(CONFIG_4xx) && !defined(CONFIG_440A)
+	if (reason & ESR_IMCP) {
+		printk("Instruction");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+	} else
+		printk("Data");
+	printk(" machine check in kernel mode.\n");
+#elif defined(CONFIG_440A)
+	printk("Machine check in kernel mode.\n");
+	if (reason & ESR_IMCP){
+		printk("Instruction Synchronous Machine Check exception\n");
+		mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+	}
+	else {
+		u32 mcsr = mfspr(SPRN_MCSR);
+		if (mcsr & MCSR_IB)
+			printk("Instruction Read PLB Error\n");
+		if (mcsr & MCSR_DRB)
+			printk("Data Read PLB Error\n");
+		if (mcsr & MCSR_DWB)
+			printk("Data Write PLB Error\n");
+		if (mcsr & MCSR_TLBP)
+			printk("TLB Parity Error\n");
+		if (mcsr & MCSR_ICP){
+			flush_instruction_cache();
+			printk("I-Cache Parity Error\n");
+		}
+		if (mcsr & MCSR_DCSP)
+			printk("D-Cache Search Parity Error\n");
+		if (mcsr & MCSR_DCFP)
+			printk("D-Cache Flush Parity Error\n");
+		if (mcsr & MCSR_IMPE)
+			printk("Machine Check exception is imprecise\n");
+
+		/* Clear MCSR */
+		mtspr(SPRN_MCSR, mcsr);
+	}
+#elif defined (CONFIG_E500)
+	printk("Machine check in kernel mode.\n");
+	printk("Caused by (from MCSR=%lx): ", reason);
+
+	if (reason & MCSR_MCP)
+		printk("Machine Check Signal\n");
+	if (reason & MCSR_ICPERR)
+		printk("Instruction Cache Parity Error\n");
+	if (reason & MCSR_DCP_PERR)
+		printk("Data Cache Push Parity Error\n");
+	if (reason & MCSR_DCPERR)
+		printk("Data Cache Parity Error\n");
+	if (reason & MCSR_GL_CI)
+		printk("Guarded Load or Cache-Inhibited stwcx.\n");
+	if (reason & MCSR_BUS_IAERR)
+		printk("Bus - Instruction Address Error\n");
+	if (reason & MCSR_BUS_RAERR)
+		printk("Bus - Read Address Error\n");
+	if (reason & MCSR_BUS_WAERR)
+		printk("Bus - Write Address Error\n");
+	if (reason & MCSR_BUS_IBERR)
+		printk("Bus - Instruction Data Error\n");
+	if (reason & MCSR_BUS_RBERR)
+		printk("Bus - Read Data Bus Error\n");
+	if (reason & MCSR_BUS_WBERR)
+		printk("Bus - Read Data Bus Error\n");
+	if (reason & MCSR_BUS_IPERR)
+		printk("Bus - Instruction Parity Error\n");
+	if (reason & MCSR_BUS_RPERR)
+		printk("Bus - Read Parity Error\n");
+#elif defined (CONFIG_E200)
+	printk("Machine check in kernel mode.\n");
+	printk("Caused by (from MCSR=%lx): ", reason);
+
+	if (reason & MCSR_MCP)
+		printk("Machine Check Signal\n");
+	if (reason & MCSR_CP_PERR)
+		printk("Cache Push Parity Error\n");
+	if (reason & MCSR_CPERR)
+		printk("Cache Parity Error\n");
+	if (reason & MCSR_EXCP_ERR)
+		printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
+	if (reason & MCSR_BUS_IRERR)
+		printk("Bus - Read Bus Error on instruction fetch\n");
+	if (reason & MCSR_BUS_DRERR)
+		printk("Bus - Read Bus Error on data load\n");
+	if (reason & MCSR_BUS_WRERR)
+		printk("Bus - Write Bus Error on buffered store or cache line push\n");
+#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */
+	printk("Machine check in kernel mode.\n");
+	printk("Caused by (from SRR1=%lx): ", reason);
+	switch (reason & 0x601F0000) {
+	case 0x80000:
+		printk("Machine check signal\n");
+		break;
+	case 0:		/* for 601 */
+	case 0x40000:
+	case 0x140000:	/* 7450 MSS error and TEA */
+		printk("Transfer error ack signal\n");
+		break;
+	case 0x20000:
+		printk("Data parity error signal\n");
+		break;
+	case 0x10000:
+		printk("Address parity error signal\n");
+		break;
+	case 0x20000000:
+		printk("L1 Data Cache error\n");
+		break;
+	case 0x40000000:
+		printk("L1 Instruction Cache error\n");
+		break;
+	case 0x00100000:
+		printk("L2 data cache parity error\n");
+		break;
+	default:
+		printk("Unknown values in msr\n");
+	}
+#endif /* CONFIG_4xx */
+
+	/*
+	 * Optional platform-provided routine to print out
+	 * additional info, e.g. bus error registers.
+	 */
+	platform_machine_check(regs);
+#endif /* CONFIG_PPC64 */
+
+	if (debugger_fault_handler(regs))
+		return;
+	die("Machine check", regs, SIGBUS);
+
+	/* Must die if the interrupt is not recoverable */
+	if (!(regs->msr & MSR_RI))
+		panic("Unrecoverable Machine check");
+}
+
+void SMIException(struct pt_regs *regs)
+{
+	die("System Management Interrupt", regs, SIGABRT);
+}
+
+void UnknownException(struct pt_regs *regs)
+{
+	printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+	       regs->nip, regs->msr, regs->trap);
+
+	_exception(SIGTRAP, regs, 0, 0);
+}
+
+void InstructionBreakpoint(struct pt_regs *regs)
+{
+	if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
+					5, SIGTRAP) == NOTIFY_STOP)
+		return;
+	if (debugger_iabr_match(regs))
+		return;
+	_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+}
+
+void RunModeException(struct pt_regs *regs)
+{
+	_exception(SIGTRAP, regs, 0, 0);
+}
+
+void SingleStepException(struct pt_regs *regs)
+{
+	regs->msr &= ~(MSR_SE | MSR_BE);  /* Turn off 'trace' bits */
+
+	if (notify_die(DIE_SSTEP, "single_step", regs, 5,
+					5, SIGTRAP) == NOTIFY_STOP)
+		return;
+	if (debugger_sstep(regs))
+		return;
+
+	_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+}
+
+/*
+ * After we have successfully emulated an instruction, we have to
+ * check if the instruction was being single-stepped, and if so,
+ * pretend we got a single-step exception.  This was pointed out
+ * by Kumar Gala.  -- paulus
+ */
+static void emulate_single_step(struct pt_regs *regs)
+{
+	if (single_stepping(regs)) {
+		clear_single_step(regs);
+		_exception(SIGTRAP, regs, TRAP_TRACE, 0);
+	}
+}
+
+/* Illegal instruction emulation support.  Originally written to
+ * provide the PVR to user applications using the mfspr rd, PVR.
+ * Return non-zero if we can't emulate, or -EFAULT if the associated
+ * memory access caused an access fault.  Return zero on success.
+ *
+ * There are a couple of ways to do this, either "decode" the instruction
+ * or directly match lots of bits.  In this case, matching lots of
+ * bits is faster and easier.
+ *
+ */
+#define INST_MFSPR_PVR		0x7c1f42a6
+#define INST_MFSPR_PVR_MASK	0xfc1fffff
+
+#define INST_DCBA		0x7c0005ec
+#define INST_DCBA_MASK		0x7c0007fe
+
+#define INST_MCRXR		0x7c000400
+#define INST_MCRXR_MASK		0x7c0007fe
+
+#define INST_STRING		0x7c00042a
+#define INST_STRING_MASK	0x7c0007fe
+#define INST_STRING_GEN_MASK	0x7c00067e
+#define INST_LSWI		0x7c0004aa
+#define INST_LSWX		0x7c00042a
+#define INST_STSWI		0x7c0005aa
+#define INST_STSWX		0x7c00052a
+
+static int emulate_string_inst(struct pt_regs *regs, u32 instword)
+{
+	u8 rT = (instword >> 21) & 0x1f;
+	u8 rA = (instword >> 16) & 0x1f;
+	u8 NB_RB = (instword >> 11) & 0x1f;
+	u32 num_bytes;
+	unsigned long EA;
+	int pos = 0;
+
+	/* Early out if we are an invalid form of lswx */
+	if ((instword & INST_STRING_MASK) == INST_LSWX)
+		if ((rT == rA) || (rT == NB_RB))
+			return -EINVAL;
+
+	EA = (rA == 0) ? 0 : regs->gpr[rA];
+
+	switch (instword & INST_STRING_MASK) {
+		case INST_LSWX:
+		case INST_STSWX:
+			EA += NB_RB;
+			num_bytes = regs->xer & 0x7f;
+			break;
+		case INST_LSWI:
+		case INST_STSWI:
+			num_bytes = (NB_RB == 0) ? 32 : NB_RB;
+			break;
+		default:
+			return -EINVAL;
+	}
+
+	while (num_bytes != 0)
+	{
+		u8 val;
+		u32 shift = 8 * (3 - (pos & 0x3));
+
+		switch ((instword & INST_STRING_MASK)) {
+			case INST_LSWX:
+			case INST_LSWI:
+				if (get_user(val, (u8 __user *)EA))
+					return -EFAULT;
+				/* first time updating this reg,
+				 * zero it out */
+				if (pos == 0)
+					regs->gpr[rT] = 0;
+				regs->gpr[rT] |= val << shift;
+				break;
+			case INST_STSWI:
+			case INST_STSWX:
+				val = regs->gpr[rT] >> shift;
+				if (put_user(val, (u8 __user *)EA))
+					return -EFAULT;
+				break;
+		}
+		/* move EA to next address */
+		EA += 1;
+		num_bytes--;
+
+		/* manage our position within the register */
+		if (++pos == 4) {
+			pos = 0;
+			if (++rT == 32)
+				rT = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int emulate_instruction(struct pt_regs *regs)
+{
+	u32 instword;
+	u32 rd;
+
+	if (!user_mode(regs))
+		return -EINVAL;
+	CHECK_FULL_REGS(regs);
+
+	if (get_user(instword, (u32 __user *)(regs->nip)))
+		return -EFAULT;
+
+	/* Emulate the mfspr rD, PVR. */
+	if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) {
+		rd = (instword >> 21) & 0x1f;
+		regs->gpr[rd] = mfspr(SPRN_PVR);
+		return 0;
+	}
+
+	/* Emulating the dcba insn is just a no-op.  */
+	if ((instword & INST_DCBA_MASK) == INST_DCBA)
+		return 0;
+
+	/* Emulate the mcrxr insn.  */
+	if ((instword & INST_MCRXR_MASK) == INST_MCRXR) {
+		int shift = (instword >> 21) & 0x1c;
+		unsigned long msk = 0xf0000000UL >> shift;
+
+		regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk);
+		regs->xer &= ~0xf0000000UL;
+		return 0;
+	}
+
+	/* Emulate load/store string insn. */
+	if ((instword & INST_STRING_GEN_MASK) == INST_STRING)
+		return emulate_string_inst(regs, instword);
+
+	return -EINVAL;
+}
+
+/*
+ * Look through the list of trap instructions that are used for BUG(),
+ * BUG_ON() and WARN_ON() and see if we hit one.  At this point we know
+ * that the exception was caused by a trap instruction of some kind.
+ * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0
+ * otherwise.
+ */
+extern struct bug_entry __start___bug_table[], __stop___bug_table[];
+
+#ifndef CONFIG_MODULES
+#define module_find_bug(x)	NULL
+#endif
+
+struct bug_entry *find_bug(unsigned long bugaddr)
+{
+	struct bug_entry *bug;
+
+	for (bug = __start___bug_table; bug < __stop___bug_table; ++bug)
+		if (bugaddr == bug->bug_addr)
+			return bug;
+	return module_find_bug(bugaddr);
+}
+
+int check_bug_trap(struct pt_regs *regs)
+{
+	struct bug_entry *bug;
+	unsigned long addr;
+
+	if (regs->msr & MSR_PR)
+		return 0;	/* not in kernel */
+	addr = regs->nip;	/* address of trap instruction */
+	if (addr < PAGE_OFFSET)
+		return 0;
+	bug = find_bug(regs->nip);
+	if (bug == NULL)
+		return 0;
+	if (bug->line & BUG_WARNING_TRAP) {
+		/* this is a WARN_ON rather than BUG/BUG_ON */
+#ifdef CONFIG_XMON
+		xmon_printf(KERN_ERR "Badness in %s at %s:%d\n",
+		       bug->function, bug->file,
+		       bug->line & ~BUG_WARNING_TRAP);
+#endif /* CONFIG_XMON */		
+		printk(KERN_ERR "Badness in %s at %s:%d\n",
+		       bug->function, bug->file,
+		       bug->line & ~BUG_WARNING_TRAP);
+		dump_stack();
+		return 1;
+	}
+#ifdef CONFIG_XMON
+	xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n",
+	       bug->function, bug->file, bug->line);
+	xmon(regs);
+#endif /* CONFIG_XMON */
+	printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n",
+	       bug->function, bug->file, bug->line);
+
+	return 0;
+}
+
+void ProgramCheckException(struct pt_regs *regs)
+{
+	unsigned int reason = get_reason(regs);
+	extern int do_mathemu(struct pt_regs *regs);
+
+#ifdef CONFIG_MATH_EMULATION
+	/* (reason & REASON_ILLEGAL) would be the obvious thing here,
+	 * but there seems to be a hardware bug on the 405GP (RevD)
+	 * that means ESR is sometimes set incorrectly - either to
+	 * ESR_DST (!?) or 0.  In the process of chasing this with the
+	 * hardware people - not sure if it can happen on any illegal
+	 * instruction or only on FP instructions, whether there is a
+	 * pattern to occurences etc. -dgibson 31/Mar/2003 */
+	if (!(reason & REASON_TRAP) && do_mathemu(regs) == 0) {
+		emulate_single_step(regs);
+		return;
+	}
+#endif /* CONFIG_MATH_EMULATION */
+
+	if (reason & REASON_FP) {
+		/* IEEE FP exception */
+		int code = 0;
+		u32 fpscr;
+
+		/* We must make sure the FP state is consistent with
+		 * our MSR_FP in regs
+		 */
+		preempt_disable();
+		if (regs->msr & MSR_FP)
+			giveup_fpu(current);
+		preempt_enable();
+
+		fpscr = current->thread.fpscr;
+		fpscr &= fpscr << 22;	/* mask summary bits with enables */
+		if (fpscr & FPSCR_VX)
+			code = FPE_FLTINV;
+		else if (fpscr & FPSCR_OX)
+			code = FPE_FLTOVF;
+		else if (fpscr & FPSCR_UX)
+			code = FPE_FLTUND;
+		else if (fpscr & FPSCR_ZX)
+			code = FPE_FLTDIV;
+		else if (fpscr & FPSCR_XX)
+			code = FPE_FLTRES;
+		_exception(SIGFPE, regs, code, regs->nip);
+		return;
+	}
+
+	if (reason & REASON_TRAP) {
+		/* trap exception */
+		if (debugger_bpt(regs))
+			return;
+		if (check_bug_trap(regs)) {
+			regs->nip += 4;
+			return;
+		}
+		_exception(SIGTRAP, regs, TRAP_BRKPT, 0);
+		return;
+	}
+
+	/* Try to emulate it if we should. */
+	if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
+		switch (emulate_instruction(regs)) {
+		case 0:
+			regs->nip += 4;
+			emulate_single_step(regs);
+			return;
+		case -EFAULT:
+			_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+			return;
+		}
+	}
+
+	if (reason & REASON_PRIVILEGED)
+		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
+	else
+		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+}
+
+void AlignmentException(struct pt_regs *regs)
+{
+	int fixed;
+
+	fixed = fix_alignment(regs);
+
+	if (fixed == 1) {
+		regs->nip += 4;	/* skip over emulated instruction */
+		emulate_single_step(regs);
+		return;
+	}
+
+	/* Operand address was bad */	
+	if (fixed == -EFAULT) {
+		if (user_mode(regs))
+			_exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar);
+		else
+			/* Search exception table */
+			bad_page_fault(regs, regs->dar, SIGSEGV);
+		return;
+	}
+	_exception(SIGBUS, regs, BUS_ADRALN, regs->dar);
+}
+
+void StackOverflow(struct pt_regs *regs)
+{
+	printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
+	       current, regs->gpr[1]);
+	debugger(regs);
+	show_regs(regs);
+	panic("kernel stack overflow");
+}
+
+void nonrecoverable_exception(struct pt_regs *regs)
+{
+	printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n",
+	       regs->nip, regs->msr);
+	debugger(regs);
+	die("nonrecoverable exception", regs, SIGKILL);
+}
+
+void trace_syscall(struct pt_regs *regs)
+{
+	printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
+	       current, current->pid, regs->nip, regs->link, regs->gpr[0],
+	       regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
+}
+
+#ifdef CONFIG_8xx
+void SoftwareEmulation(struct pt_regs *regs)
+{
+	extern int do_mathemu(struct pt_regs *);
+	extern int Soft_emulate_8xx(struct pt_regs *);
+	int errcode;
+
+	CHECK_FULL_REGS(regs);
+
+	if (!user_mode(regs)) {
+		debugger(regs);
+		die("Kernel Mode Software FPU Emulation", regs, SIGFPE);
+	}
+
+#ifdef CONFIG_MATH_EMULATION
+	errcode = do_mathemu(regs);
+#else
+	errcode = Soft_emulate_8xx(regs);
+#endif
+	if (errcode) {
+		if (errcode > 0)
+			_exception(SIGFPE, regs, 0, 0);
+		else if (errcode == -EFAULT)
+			_exception(SIGSEGV, regs, 0, 0);
+		else
+			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+	} else
+		emulate_single_step(regs);
+}
+#endif /* CONFIG_8xx */
+
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+
+void DebugException(struct pt_regs *regs, unsigned long debug_status)
+{
+	if (debug_status & DBSR_IC) {	/* instruction completion */
+		regs->msr &= ~MSR_DE;
+		if (user_mode(regs)) {
+			current->thread.dbcr0 &= ~DBCR0_IC;
+		} else {
+			/* Disable instruction completion */
+			mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
+			/* Clear the instruction completion event */
+			mtspr(SPRN_DBSR, DBSR_IC);
+			if (debugger_sstep(regs))
+				return;
+		}
+		_exception(SIGTRAP, regs, TRAP_TRACE, 0);
+	}
+}
+#endif /* CONFIG_4xx || CONFIG_BOOKE */
+
+#if !defined(CONFIG_TAU_INT)
+void TAUException(struct pt_regs *regs)
+{
+	printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx    %s\n",
+	       regs->nip, regs->msr, regs->trap, print_tainted());
+}
+#endif /* CONFIG_INT_TAU */
+
+void AltivecUnavailException(struct pt_regs *regs)
+{
+	static int kernel_altivec_count;
+
+#ifndef CONFIG_ALTIVEC
+	if (user_mode(regs)) {
+		/* A user program has executed an altivec instruction,
+		   but this kernel doesn't support altivec. */
+		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+		return;
+	}
+#endif
+	/* The kernel has executed an altivec instruction without
+	   first enabling altivec.  Whinge but let it do it. */
+	if (++kernel_altivec_count < 10)
+		printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n",
+		       current, regs->nip);
+	regs->msr |= MSR_VEC;
+}
+
+#ifdef CONFIG_ALTIVEC
+void AltivecAssistException(struct pt_regs *regs)
+{
+	int err;
+
+	preempt_disable();
+	if (regs->msr & MSR_VEC)
+		giveup_altivec(current);
+	preempt_enable();
+	if (!user_mode(regs)) {
+		printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode"
+		       " at %lx\n", regs->nip);
+		die("Kernel Altivec assist exception", regs, SIGILL);
+	}
+
+	err = emulate_altivec(regs);
+	if (err == 0) {
+		regs->nip += 4;		/* skip emulated instruction */
+		emulate_single_step(regs);
+		return;
+	}
+
+	if (err == -EFAULT) {
+		/* got an error reading the instruction */
+		_exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
+	} else {
+		/* didn't recognize the instruction */
+		/* XXX quick hack for now: set the non-Java bit in the VSCR */
+		if (printk_ratelimit())
+			printk(KERN_ERR "Unrecognized altivec instruction "
+			       "in %s at %lx\n", current->comm, regs->nip);
+		current->thread.vscr.u[3] |= 0x10000;
+	}
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_E500
+void PerformanceMonitorException(struct pt_regs *regs)
+{
+	perf_irq(regs);
+}
+#endif
+
+#ifdef CONFIG_FSL_BOOKE
+void CacheLockingException(struct pt_regs *regs, unsigned long address,
+			   unsigned long error_code)
+{
+	/* We treat cache locking instructions from the user
+	 * as priv ops, in the future we could try to do
+	 * something smarter
+	 */
+	if (error_code & (ESR_DLK|ESR_ILK))
+		_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
+	return;
+}
+#endif /* CONFIG_FSL_BOOKE */
+
+#ifdef CONFIG_SPE
+void SPEFloatingPointException(struct pt_regs *regs)
+{
+	unsigned long spefscr;
+	int fpexc_mode;
+	int code = 0;
+
+	spefscr = current->thread.spefscr;
+	fpexc_mode = current->thread.fpexc_mode;
+
+	/* Hardware does not neccessarily set sticky
+	 * underflow/overflow/invalid flags */
+	if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) {
+		code = FPE_FLTOVF;
+		spefscr |= SPEFSCR_FOVFS;
+	}
+	else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) {
+		code = FPE_FLTUND;
+		spefscr |= SPEFSCR_FUNFS;
+	}
+	else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV))
+		code = FPE_FLTDIV;
+	else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) {
+		code = FPE_FLTINV;
+		spefscr |= SPEFSCR_FINVS;
+	}
+	else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES))
+		code = FPE_FLTRES;
+
+	current->thread.spefscr = spefscr;
+
+	_exception(SIGFPE, regs, code, regs->nip);
+	return;
+}
+#endif
+
+#ifdef CONFIG_BOOKE_WDT
+/*
+ * Default handler for a Watchdog exception,
+ * spins until a reboot occurs
+ */
+void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
+{
+	/* Generic WatchdogHandler, implement your own */
+	mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
+	return;
+}
+
+void WatchdogException(struct pt_regs *regs)
+{
+	printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
+	WatchdogHandler(regs);
+}
+#endif
+
+void __init trap_init(void)
+{
+}
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
new file mode 100644
index 00000000000..12cb90bc209
--- /dev/null
+++ b/arch/powerpc/kernel/vector.S
@@ -0,0 +1,197 @@
+#include <linux/config.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+/*
+ * The routines below are in assembler so we can closely control the
+ * usage of floating-point registers.  These routines must be called
+ * with preempt disabled.
+ */
+#ifdef CONFIG_PPC32
+	.data
+fpzero:
+	.long	0
+fpone:
+	.long	0x3f800000	/* 1.0 in single-precision FP */
+fphalf:
+	.long	0x3f000000	/* 0.5 in single-precision FP */
+
+#define LDCONST(fr, name)	\
+	lis	r11,name@ha;	\
+	lfs	fr,name@l(r11)
+#else
+
+	.section ".toc","aw"
+fpzero:
+	.tc	FD_0_0[TC],0
+fpone:
+	.tc	FD_3ff00000_0[TC],0x3ff0000000000000	/* 1.0 */
+fphalf:
+	.tc	FD_3fe00000_0[TC],0x3fe0000000000000	/* 0.5 */
+
+#define LDCONST(fr, name)	\
+	lfd	fr,name@toc(r2)
+#endif
+
+	.text
+/*
+ * Internal routine to enable floating point and set FPSCR to 0.
+ * Don't call it from C; it doesn't use the normal calling convention.
+ */
+fpenable:
+#ifdef CONFIG_PPC32
+	stwu	r1,-64(r1)
+#else
+	stdu	r1,-64(r1)
+#endif
+	mfmsr	r10
+	ori	r11,r10,MSR_FP
+	mtmsr	r11
+	isync
+	stfd	fr0,24(r1)
+	stfd	fr1,16(r1)
+	stfd	fr31,8(r1)
+	LDCONST(fr1, fpzero)
+	mffs	fr31
+	mtfsf	0xff,fr1
+	blr
+
+fpdisable:
+	mtlr	r12
+	mtfsf	0xff,fr31
+	lfd	fr31,8(r1)
+	lfd	fr1,16(r1)
+	lfd	fr0,24(r1)
+	mtmsr	r10
+	isync
+	addi	r1,r1,64
+	blr
+
+/*
+ * Vector add, floating point.
+ */
+_GLOBAL(vaddfp)
+	mflr	r12
+	bl	fpenable
+	li	r0,4
+	mtctr	r0
+	li	r6,0
+1:	lfsx	fr0,r4,r6
+	lfsx	fr1,r5,r6
+	fadds	fr0,fr0,fr1
+	stfsx	fr0,r3,r6
+	addi	r6,r6,4
+	bdnz	1b
+	b	fpdisable
+
+/*
+ * Vector subtract, floating point.
+ */
+_GLOBAL(vsubfp)
+	mflr	r12
+	bl	fpenable
+	li	r0,4
+	mtctr	r0
+	li	r6,0
+1:	lfsx	fr0,r4,r6
+	lfsx	fr1,r5,r6
+	fsubs	fr0,fr0,fr1
+	stfsx	fr0,r3,r6
+	addi	r6,r6,4
+	bdnz	1b
+	b	fpdisable
+
+/*
+ * Vector multiply and add, floating point.
+ */
+_GLOBAL(vmaddfp)
+	mflr	r12
+	bl	fpenable
+	stfd	fr2,32(r1)
+	li	r0,4
+	mtctr	r0
+	li	r7,0
+1:	lfsx	fr0,r4,r7
+	lfsx	fr1,r5,r7
+	lfsx	fr2,r6,r7
+	fmadds	fr0,fr0,fr2,fr1
+	stfsx	fr0,r3,r7
+	addi	r7,r7,4
+	bdnz	1b
+	lfd	fr2,32(r1)
+	b	fpdisable
+
+/*
+ * Vector negative multiply and subtract, floating point.
+ */
+_GLOBAL(vnmsubfp)
+	mflr	r12
+	bl	fpenable
+	stfd	fr2,32(r1)
+	li	r0,4
+	mtctr	r0
+	li	r7,0
+1:	lfsx	fr0,r4,r7
+	lfsx	fr1,r5,r7
+	lfsx	fr2,r6,r7
+	fnmsubs	fr0,fr0,fr2,fr1
+	stfsx	fr0,r3,r7
+	addi	r7,r7,4
+	bdnz	1b
+	lfd	fr2,32(r1)
+	b	fpdisable
+
+/*
+ * Vector reciprocal estimate.  We just compute 1.0/x.
+ * r3 -> destination, r4 -> source.
+ */
+_GLOBAL(vrefp)
+	mflr	r12
+	bl	fpenable
+	li	r0,4
+	LDCONST(fr1, fpone)
+	mtctr	r0
+	li	r6,0
+1:	lfsx	fr0,r4,r6
+	fdivs	fr0,fr1,fr0
+	stfsx	fr0,r3,r6
+	addi	r6,r6,4
+	bdnz	1b
+	b	fpdisable
+
+/*
+ * Vector reciprocal square-root estimate, floating point.
+ * We use the frsqrte instruction for the initial estimate followed
+ * by 2 iterations of Newton-Raphson to get sufficient accuracy.
+ * r3 -> destination, r4 -> source.
+ */
+_GLOBAL(vrsqrtefp)
+	mflr	r12
+	bl	fpenable
+	stfd	fr2,32(r1)
+	stfd	fr3,40(r1)
+	stfd	fr4,48(r1)
+	stfd	fr5,56(r1)
+	li	r0,4
+	LDCONST(fr4, fpone)
+	LDCONST(fr5, fphalf)
+	mtctr	r0
+	li	r6,0
+1:	lfsx	fr0,r4,r6
+	frsqrte	fr1,fr0		/* r = frsqrte(s) */
+	fmuls	fr3,fr1,fr0	/* r * s */
+	fmuls	fr2,fr1,fr5	/* r * 0.5 */
+	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
+	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
+	fmuls	fr3,fr1,fr0	/* r * s */
+	fmuls	fr2,fr1,fr5	/* r * 0.5 */
+	fnmsubs	fr3,fr1,fr3,fr4	/* 1 - s * r * r */
+	fmadds	fr1,fr2,fr3,fr1	/* r = r + 0.5 * r * (1 - s * r * r) */
+	stfsx	fr1,r3,r6
+	addi	r6,r6,4
+	bdnz	1b
+	lfd	fr5,56(r1)
+	lfd	fr4,48(r1)
+	lfd	fr3,40(r1)
+	lfd	fr2,32(r1)
+	b	fpdisable
diff --git a/arch/powerpc/kernel/vmlinux.lds b/arch/powerpc/kernel/vmlinux.lds
new file mode 100644
index 00000000000..d62c288a81d
--- /dev/null
+++ b/arch/powerpc/kernel/vmlinux.lds
@@ -0,0 +1,174 @@
+/* Align . to a 8 byte boundary equals to maximum function alignment. */
+/* sched.text is aling to function alignment to secure we have same
+ * address even at second ld pass when generating System.map */
+/* spinlock.text is aling to function alignment to secure we have same
+ * address even at second ld pass when generating System.map */
+  /* DWARF debug sections.
+		Symbols in the DWARF debugging sections are relative to
+		the beginning of the section so we begin them at 0.  */
+  /* Stabs debugging sections.  */
+OUTPUT_ARCH(powerpc:common)
+jiffies = jiffies_64 + 4;
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = + SIZEOF_HEADERS;
+  .interp : { *(.interp) }
+  .hash : { *(.hash) }
+  .dynsym : { *(.dynsym) }
+  .dynstr : { *(.dynstr) }
+  .rel.text : { *(.rel.text) }
+  .rela.text : { *(.rela.text) }
+  .rel.data : { *(.rel.data) }
+  .rela.data : { *(.rela.data) }
+  .rel.rodata : { *(.rel.rodata) }
+  .rela.rodata : { *(.rela.rodata) }
+  .rel.got : { *(.rel.got) }
+  .rela.got : { *(.rela.got) }
+  .rel.ctors : { *(.rel.ctors) }
+  .rela.ctors : { *(.rela.ctors) }
+  .rel.dtors : { *(.rel.dtors) }
+  .rela.dtors : { *(.rela.dtors) }
+  .rel.bss : { *(.rel.bss) }
+  .rela.bss : { *(.rela.bss) }
+  .rel.plt : { *(.rel.plt) }
+  .rela.plt : { *(.rela.plt) }
+/*  .init          : { *(.init)	} =0*/
+  .plt : { *(.plt) }
+  .text :
+  {
+    *(.text)
+    . = ALIGN(8); __sched_text_start = .; *(.sched.text) __sched_text_end = .;
+    . = ALIGN(8); __lock_text_start = .; *(.spinlock.text) __lock_text_end = .;
+    *(.fixup)
+    *(.got1)
+    __got2_start = .;
+    *(.got2)
+    __got2_end = .;
+  }
+  _etext = .;
+  PROVIDE (etext = .);
+  .rodata : AT(ADDR(.rodata) - 0) { *(.rodata) *(.rodata.*) *(__vermagic) } .rodata1 : AT(ADDR(.rodata1) - 0) { *(.rodata1) } .pci_fixup : AT(ADDR(.pci_fixup) - 0) { __start_pci_fixups_early = .; *(.pci_fixup_early) __end_pci_fixups_early = .; __start_pci_fixups_header = .; *(.pci_fixup_header) __end_pci_fixups_header = .; __start_pci_fixups_final = .; *(.pci_fixup_final) __end_pci_fixups_final = .; __start_pci_fixups_enable = .; *(.pci_fixup_enable) __end_pci_fixups_enable = .; } __ksymtab : AT(ADDR(__ksymtab) - 0) { __start___ksymtab = .; *(__ksymtab) __stop___ksymtab = .; } __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - 0) { __start___ksymtab_gpl = .; *(__ksymtab_gpl) __stop___ksymtab_gpl = .; } __kcrctab : AT(ADDR(__kcrctab) - 0) { __start___kcrctab = .; *(__kcrctab) __stop___kcrctab = .; } __kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - 0) { __start___kcrctab_gpl = .; *(__kcrctab_gpl) __stop___kcrctab_gpl = .; } __ksymtab_strings : AT(ADDR(__ksymtab_strings) - 0) { *(__ksymtab_strings) } __param : AT(ADDR(__param) - 0) { __start___param = .; *(__param) __stop___param = .; }
+  .fini : { *(.fini) } =0
+  .ctors : { *(.ctors) }
+  .dtors : { *(.dtors) }
+  .fixup : { *(.fixup) }
+ __ex_table : {
+  __start___ex_table = .;
+  *(__ex_table)
+  __stop___ex_table = .;
+ }
+ __bug_table : {
+  __start___bug_table = .;
+  *(__bug_table)
+  __stop___bug_table = .;
+ }
+  /* Read-write section, merged into data segment: */
+  . = ALIGN(4096);
+  .data :
+  {
+    *(.data)
+    *(.data1)
+    *(.sdata)
+    *(.sdata2)
+    *(.got.plt) *(.got)
+    *(.dynamic)
+    CONSTRUCTORS
+  }
+
+  . = ALIGN(4096);
+  __nosave_begin = .;
+  .data_nosave : { *(.data.nosave) }
+  . = ALIGN(4096);
+  __nosave_end = .;
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  _edata = .;
+  PROVIDE (edata = .);
+
+  . = ALIGN(8192);
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);
+  __init_begin = .;
+  .init.text : {
+ _sinittext = .;
+ *(.init.text)
+ _einittext = .;
+  }
+  /* .exit.text is discarded at runtime, not link time,
+     to deal with references from __bug_table */
+  .exit.text : { *(.exit.text) }
+  .init.data : {
+    *(.init.data);
+    __vtop_table_begin = .;
+    *(.vtop_fixup);
+    __vtop_table_end = .;
+    __ptov_table_begin = .;
+    *(.ptov_fixup);
+    __ptov_table_end = .;
+  }
+  . = ALIGN(16);
+  __setup_start = .;
+  .init.setup : { *(.init.setup) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : {
+ *(.initcall1.init)
+ *(.initcall2.init)
+ *(.initcall3.init)
+ *(.initcall4.init)
+ *(.initcall5.init)
+ *(.initcall6.init)
+ *(.initcall7.init)
+  }
+  __initcall_end = .;
+
+  __con_initcall_start = .;
+  .con_initcall.init : { *(.con_initcall.init) }
+  __con_initcall_end = .;
+
+  .security_initcall.init : AT(ADDR(.security_initcall.init) - 0) { __security_initcall_start = .; *(.security_initcall.init) __security_initcall_end = .; }
+
+  __start___ftr_fixup = .;
+  __ftr_fixup : { *(__ftr_fixup) }
+  __stop___ftr_fixup = .;
+
+  . = ALIGN(32);
+  __per_cpu_start = .;
+  .data.percpu : { *(.data.percpu) }
+  __per_cpu_end = .;
+
+  . = ALIGN(4096);
+  __initramfs_start = .;
+  .init.ramfs : { *(.init.ramfs) }
+  __initramfs_end = .;
+
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  _sextratext = .;
+  _eextratext = .;
+
+  __bss_start = .;
+  .bss :
+  {
+   *(.sbss) *(.scommon)
+   *(.dynbss)
+   *(.bss)
+   *(COMMON)
+  }
+  __bss_stop = .;
+
+  _end = . ;
+  PROVIDE (end = .);
+
+  /* Sections to be discarded. */
+  /DISCARD/ : {
+    *(.exitcall.exit)
+    *(.exit.data)
+  }
+}
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
new file mode 100644
index 00000000000..09c6525cfa6
--- /dev/null
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -0,0 +1,172 @@
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_ARCH(powerpc:common)
+jiffies = jiffies_64 + 4;
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+  . = + SIZEOF_HEADERS;
+  .interp : { *(.interp) }
+  .hash          : { *(.hash)		}
+  .dynsym        : { *(.dynsym)		}
+  .dynstr        : { *(.dynstr)		}
+  .rel.text      : { *(.rel.text)		}
+  .rela.text     : { *(.rela.text) 	}
+  .rel.data      : { *(.rel.data)		}
+  .rela.data     : { *(.rela.data) 	}
+  .rel.rodata    : { *(.rel.rodata) 	}
+  .rela.rodata   : { *(.rela.rodata) 	}
+  .rel.got       : { *(.rel.got)		}
+  .rela.got      : { *(.rela.got)		}
+  .rel.ctors     : { *(.rel.ctors)	}
+  .rela.ctors    : { *(.rela.ctors)	}
+  .rel.dtors     : { *(.rel.dtors)	}
+  .rela.dtors    : { *(.rela.dtors)	}
+  .rel.bss       : { *(.rel.bss)		}
+  .rela.bss      : { *(.rela.bss)		}
+  .rel.plt       : { *(.rel.plt)		}
+  .rela.plt      : { *(.rela.plt)		}
+/*  .init          : { *(.init)	} =0*/
+  .plt : { *(.plt) }
+  .text      :
+  {
+    *(.text)
+    SCHED_TEXT
+    LOCK_TEXT
+    *(.fixup)
+    *(.got1)
+    __got2_start = .;
+    *(.got2)
+    __got2_end = .;
+  }
+  _etext = .;
+  PROVIDE (etext = .);
+
+  RODATA
+  .fini      : { *(.fini)    } =0
+  .ctors     : { *(.ctors)   }
+  .dtors     : { *(.dtors)   }
+
+  .fixup   : { *(.fixup) }
+
+	__ex_table : {
+		__start___ex_table = .;
+		*(__ex_table)
+		__stop___ex_table = .;
+	}
+
+	__bug_table : {
+		__start___bug_table = .;
+		*(__bug_table)
+		__stop___bug_table = .;
+	}
+
+  /* Read-write section, merged into data segment: */
+  . = ALIGN(4096);
+  .data    :
+  {
+    *(.data)
+    *(.data1)
+    *(.sdata)
+    *(.sdata2)
+    *(.got.plt) *(.got)
+    *(.dynamic)
+    CONSTRUCTORS
+  }
+
+  . = ALIGN(4096);
+  __nosave_begin = .;
+  .data_nosave : { *(.data.nosave) }
+  . = ALIGN(4096);
+  __nosave_end = .;
+
+  . = ALIGN(32);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  _edata  =  .;
+  PROVIDE (edata = .);
+
+  . = ALIGN(8192);
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);
+  __init_begin = .;
+  .init.text : {
+	_sinittext = .;
+	*(.init.text)
+	_einittext = .;
+  }
+  /* .exit.text is discarded at runtime, not link time,
+     to deal with references from __bug_table */
+  .exit.text : { *(.exit.text) }
+  .init.data : {
+    *(.init.data);
+    __vtop_table_begin = .;
+    *(.vtop_fixup);
+    __vtop_table_end = .;
+    __ptov_table_begin = .;
+    *(.ptov_fixup);
+    __ptov_table_end = .;
+  }
+  . = ALIGN(16);
+  __setup_start = .;
+  .init.setup : { *(.init.setup) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : {
+	*(.initcall1.init)
+	*(.initcall2.init)
+	*(.initcall3.init)
+	*(.initcall4.init)
+	*(.initcall5.init)
+	*(.initcall6.init)
+	*(.initcall7.init)
+  }
+  __initcall_end = .;
+
+  __con_initcall_start = .;
+  .con_initcall.init : { *(.con_initcall.init) }
+  __con_initcall_end = .;
+
+  SECURITY_INIT
+
+  __start___ftr_fixup = .;
+  __ftr_fixup : { *(__ftr_fixup) }
+  __stop___ftr_fixup = .;
+
+  . = ALIGN(32);
+  __per_cpu_start = .;
+  .data.percpu  : { *(.data.percpu) }
+  __per_cpu_end = .;
+
+  . = ALIGN(4096);
+  __initramfs_start = .;
+  .init.ramfs : { *(.init.ramfs) }
+  __initramfs_end = .;
+
+  . = ALIGN(4096);
+  __init_end = .;
+
+  . = ALIGN(4096);
+  _sextratext = .;
+  _eextratext = .;
+
+  __bss_start = .;
+  .bss       :
+  {
+   *(.sbss) *(.scommon)
+   *(.dynbss)
+   *(.bss)
+   *(COMMON)
+  }
+  __bss_stop = .;
+
+  _end = . ;
+  PROVIDE (end = .);
+
+  /* Sections to be discarded. */
+  /DISCARD/ : {
+    *(.exitcall.exit)
+    *(.exit.data)
+  }
+}
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
new file mode 100644
index 00000000000..347f9798e43
--- /dev/null
+++ b/arch/powerpc/lib/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for ppc-specific library files..
+#
+
+obj-y			:= strcase.o string.o
+obj-$(CONFIG_PPC32)	+= div64.o copy32.o checksum.o
+obj-$(CONFIG_PPC64)	+= copypage.o copyuser.o memcpy.o usercopy.o \
+			   sstep.o checksum64.o
+obj-$(CONFIG_PPC_ISERIES) += e2a.o
diff --git a/arch/powerpc/lib/checksum.S b/arch/powerpc/lib/checksum.S
new file mode 100644
index 00000000000..7874e8a8045
--- /dev/null
+++ b/arch/powerpc/lib/checksum.S
@@ -0,0 +1,225 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+/*
+ * ip_fast_csum(buf, len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * csum_partial(buff, len, sum)
+ */
+_GLOBAL(csum_partial)
+	addic	r0,r5,0
+	subi	r3,r3,4
+	srwi.	r6,r4,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r5,r3,2		/* Align buffer to longword boundary */
+	beq+	1f
+	lhz	r5,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r4,r4,2
+	addc	r0,r0,r5
+	srwi.	r6,r4,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+2:	lwzu	r5,4(r3)	/* the bdnz has zero overhead, so it should */
+	adde	r0,r0,r5	/* be unnecessary to unroll this loop */
+	bdnz	2b
+	andi.	r4,r4,3
+3:	cmpwi	0,r4,2
+	blt+	4f
+	lhz	r5,4(r3)
+	addi	r3,r3,2
+	subi	r4,r4,2
+	adde	r0,r0,r5
+4:	cmpwi	0,r4,1
+	bne+	5f
+	lbz	r5,4(r3)
+	slwi	r5,r5,8		/* Upper byte of word */
+	adde	r0,r0,r5
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	srwi.	r6,r5,4		/* # groups of 4 words to do */
+	beq	10f
+	mtctr	r6
+71:	lwz	r6,4(r3)
+72:	lwz	r9,8(r3)
+73:	lwz	r10,12(r3)
+74:	lwzu	r11,16(r3)
+	adde	r0,r0,r6
+75:	stw	r6,4(r4)
+	adde	r0,r0,r9
+76:	stw	r9,8(r4)
+	adde	r0,r0,r10
+77:	stw	r10,12(r4)
+	adde	r0,r0,r11
+78:	stwu	r11,16(r4)
+	bdnz	71b
+10:	rlwinm.	r6,r5,30,30,31	/* # words left to do */
+	beq	13f
+	mtctr	r6
+82:	lwzu	r9,4(r3)
+92:	stwu	r9,4(r4)
+	adde	r0,r0,r9
+	bdnz	82b
+13:	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+src_error_4:
+	mfctr	r6		/* update # bytes remaining from ctr */
+	rlwimi	r5,r6,4,0,27
+	b	79f
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+79:	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+src_error:
+	cmpwi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+dst_error:
+	cmpwi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.long	81b,src_error_1
+	.long	91b,dst_error
+	.long	71b,src_error_4
+	.long	72b,src_error_4
+	.long	73b,src_error_4
+	.long	74b,src_error_4
+	.long	75b,dst_error
+	.long	76b,dst_error
+	.long	77b,dst_error
+	.long	78b,dst_error
+	.long	82b,src_error_2
+	.long	92b,dst_error
+	.long	83b,src_error_3
+	.long	93b,dst_error
+	.long	84b,src_error_3
+	.long	94b,dst_error
+	.long	95b,dst_error
+	.long	96b,dst_error
+	.long	97b,dst_error
diff --git a/arch/powerpc/lib/checksum64.S b/arch/powerpc/lib/checksum64.S
new file mode 100644
index 00000000000..ef96c6c58ef
--- /dev/null
+++ b/arch/powerpc/lib/checksum64.S
@@ -0,0 +1,229 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ *
+ * In practice len == 5, but this is not guaranteed.  So this code does not
+ * attempt to use doubleword instructions.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+        rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
+        add     r0,r0,r4
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
+ * No real gain trying to do this specially for 64 bit, but
+ * the 32 bit addition may spill into the upper bits of
+ * the doubleword so we still must fold it down from 64.
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+        rldicl  r4,r0,32,0      /* fold 64 bit value */
+        add     r0,r4,r0
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * This code assumes at least halfword alignment, though the length
+ * can be any number of bytes.  The sum is accumulated in r5.
+ *
+ * csum_partial(r3=buff, r4=len, r5=sum)
+ */
+_GLOBAL(csum_partial)
+        subi	r3,r3,8		/* we'll offset by 8 for the loads */
+        srdi.	r6,r4,3         /* divide by 8 for doubleword count */
+        addic   r5,r5,0         /* clear carry */
+        beq	3f              /* if we're doing < 8 bytes */
+        andi.	r0,r3,2         /* aligned on a word boundary already? */
+        beq+	1f
+        lhz     r6,8(r3)        /* do 2 bytes to get aligned */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        addc    r5,r5,r6
+        srdi.   r6,r4,3         /* recompute number of doublewords */
+        beq     3f              /* any left? */
+1:      mtctr   r6
+2:      ldu     r6,8(r3)        /* main sum loop */
+        adde    r5,r5,r6
+        bdnz    2b
+        andi.	r4,r4,7         /* compute bytes left to sum after doublewords */
+3:	cmpwi	0,r4,4		/* is at least a full word left? */
+	blt	4f
+	lwz	r6,8(r3)	/* sum this word */
+	addi	r3,r3,4
+	subi	r4,r4,4
+	adde	r5,r5,r6
+4:	cmpwi	0,r4,2		/* is at least a halfword left? */
+        blt+	5f
+        lhz     r6,8(r3)        /* sum this halfword */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        adde    r5,r5,r6
+5:	cmpwi	0,r4,1		/* is at least a byte left? */
+        bne+    6f
+        lbz     r6,8(r3)        /* sum this byte */
+        slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
+        adde    r5,r5,r6
+6:      addze	r5,r5		/* add in final carry */
+	rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
+        add     r3,r4,r5
+        srdi    r3,r3,32
+        blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * This code needs to be reworked to take advantage of 64 bit sum+copy.
+ * However, due to tokenring halfword alignment problems this will be very
+ * tricky.  For now we'll leave it until we instrument it somehow.
+ *
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+82:	lwzu	r6,4(r3)	/* the bdnz has zero overhead, so it should */
+92:	stwu	r6,4(r4)	/* be unnecessary to unroll this loop */
+	adde	r0,r0,r6
+	bdnz	82b
+	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry (unlikely with 64-bit regs) */
+        rldicl  r4,r3,32,0      /* fold 64 bit value */
+        add     r3,r4,r3
+        srdi    r3,r3,32
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+	.globl src_error_1
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+	.globl src_error_2
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+	.globl src_error_3
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+	.globl src_error
+src_error:
+	cmpdi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+	.globl dst_error
+dst_error:
+	cmpdi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.align  3
+	.llong	81b,src_error_1
+	.llong	91b,dst_error
+	.llong	82b,src_error_2
+	.llong	92b,dst_error
+	.llong	83b,src_error_3
+	.llong	93b,dst_error
+	.llong	84b,src_error_3
+	.llong	94b,dst_error
+	.llong	95b,dst_error
+	.llong	96b,dst_error
+	.llong	97b,dst_error
diff --git a/arch/powerpc/lib/copy32.S b/arch/powerpc/lib/copy32.S
new file mode 100644
index 00000000000..420a912198a
--- /dev/null
+++ b/arch/powerpc/lib/copy32.S
@@ -0,0 +1,543 @@
+/*
+ * Memory copy functions for 32-bit PowerPC.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+#define COPY_16_BYTES		\
+	lwz	r7,4(r4);	\
+	lwz	r8,8(r4);	\
+	lwz	r9,12(r4);	\
+	lwzu	r10,16(r4);	\
+	stw	r7,4(r6);	\
+	stw	r8,8(r6);	\
+	stw	r9,12(r6);	\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n)	\
+8 ## n ## 0:			\
+	lwz	r7,4(r4);	\
+8 ## n ## 1:			\
+	lwz	r8,8(r4);	\
+8 ## n ## 2:			\
+	lwz	r9,12(r4);	\
+8 ## n ## 3:			\
+	lwzu	r10,16(r4);	\
+8 ## n ## 4:			\
+	stw	r7,4(r6);	\
+8 ## n ## 5:			\
+	stw	r8,8(r6);	\
+8 ## n ## 6:			\
+	stw	r9,12(r6);	\
+8 ## n ## 7:			\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n)			\
+9 ## n ## 0:					\
+	addi	r5,r5,-(16 * n);		\
+	b	104f;				\
+9 ## n ## 1:					\
+	addi	r5,r5,-(16 * n);		\
+	b	105f;				\
+.section __ex_table,"a";			\
+	.align	2;				\
+	.long	8 ## n ## 0b,9 ## n ## 0b;	\
+	.long	8 ## n ## 1b,9 ## n ## 0b;	\
+	.long	8 ## n ## 2b,9 ## n ## 0b;	\
+	.long	8 ## n ## 3b,9 ## n ## 0b;	\
+	.long	8 ## n ## 4b,9 ## n ## 1b;	\
+	.long	8 ## n ## 5b,9 ## n ## 1b;	\
+	.long	8 ## n ## 6b,9 ## n ## 1b;	\
+	.long	8 ## n ## 7b,9 ## n ## 1b;	\
+	.text
+
+	.text
+	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
+	.stabs	"copy32.S",N_SO,0,0,0f
+0:
+
+CACHELINE_BYTES = L1_CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
+CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.  -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+	mr	r5,r4
+	li	r4,0
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
+	add	r8,r7,r5
+	srwi	r9,r8,LG_CACHELINE_BYTES
+	addic.	r9,r9,-1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0,r7,CACHELINE_MASK & ~3
+	srwi.	r0,r0,2
+	beq	3f
+	mtctr	r0
+4:	stwu	r4,4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7,4
+#if !defined(CONFIG_8xx)
+10:	dcbz	r7,r6
+#else
+10:	stw	r4, 4(r6)
+	stw	r4, 8(r6)
+	stw	r4, 12(r6)
+	stw	r4, 16(r6)
+#if CACHE_LINE_SIZE >= 32
+	stw	r4, 20(r6)
+	stw	r4, 24(r6)
+	stw	r4, 28(r6)
+	stw	r4, 32(r6)
+#endif /* CACHE_LINE_SIZE */
+#endif
+	addi	r6,r6,CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
+	addi	r5,r5,4
+2:	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+_GLOBAL(memset)
+	rlwimi	r4,r4,8,16,23
+	rlwimi	r4,r4,16,0,15
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic.  This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBAL(cacheable_memcpy)
+	add	r7,r3,r5		/* test if the src & dst overlap */
+	add	r8,r4,r5
+	cmplw	0,r4,r7
+	cmplw	1,r3,r8
+	crand	0,0,4			/* cr0.lt &= cr1.lt */
+	blt	memcpy			/* if regions overlap */
+
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	subf	r5,r0,r5
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+	stwu	r9,4(r6)
+	bdnz	72b
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	mtctr	r0
+	beq	63f
+53:
+#if !defined(CONFIG_8xx)
+	dcbz	r11,r6
+#endif
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+#endif
+#endif
+#endif
+	bdnz	53b
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	blr
+
+_GLOBAL(memmove)
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+_GLOBAL(memcpy)
+	srwi.	r7,r5,3
+	addi	r6,r3,-4
+	addi	r4,r4,-4
+	beq	2f			/* if less than 8 bytes to do */
+	andi.	r0,r6,3			/* get dest word aligned */
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,4(r4)
+	lwzu	r8,8(r4)
+	stw	r7,4(r6)
+	stwu	r8,8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,4(r4)
+	addi	r5,r5,-4
+	stwu	r0,4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r4,r4,3
+	addi	r6,r6,3
+4:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	4b
+	blr
+5:	subfic	r0,r0,4
+	mtctr	r0
+6:	lbz	r7,4(r4)
+	addi	r4,r4,1
+	stb	r7,4(r6)
+	addi	r6,r6,1
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(__copy_tofrom_user)
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+71:	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	subf	r5,r0,r5
+	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+73:	stwu	r9,4(r6)
+	bdnz	72b
+
+	.section __ex_table,"a"
+	.align	2
+	.long	70b,100f
+	.long	71b,101f
+	.long	72b,102f
+	.long	73b,103f
+	.text
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	beq	63f
+
+#ifdef CONFIG_8xx
+	/* Don't use prefetch on 8xx */
+	mtctr	r0
+	li	r0,0
+53:	COPY_16_BYTES_WITHEX(0)
+	bdnz	53b
+
+#else /* not CONFIG_8xx */
+	/* Here we decide how far ahead to prefetch the source */
+	li	r3,4
+	cmpwi	r0,1
+	li	r7,0
+	ble	114f
+	li	r7,1
+#if MAX_COPY_PREFETCH > 1
+	/* Heuristically, for large transfers we prefetch
+	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
+	   we prefetch 1 cacheline ahead. */
+	cmpwi	r0,MAX_COPY_PREFETCH
+	ble	112f
+	li	r7,MAX_COPY_PREFETCH
+112:	mtctr	r7
+111:	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+	bdnz	111b
+#else
+	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114:	subf	r8,r7,r0
+	mr	r0,r7
+	mtctr	r8
+
+53:	dcbt	r3,r4
+54:	dcbz	r11,r6
+	.section __ex_table,"a"
+	.align	2
+	.long	54b,105f
+	.text
+/* the main body of the cacheline loop */
+	COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_WITHEX(2)
+	COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_WITHEX(4)
+	COPY_16_BYTES_WITHEX(5)
+	COPY_16_BYTES_WITHEX(6)
+	COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+	bdnz	53b
+	cmpwi	r0,0
+	li	r3,4
+	li	r7,0
+	bne	114b
+#endif /* CONFIG_8xx */
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+31:	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+41:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	li	r3,0
+	blr
+
+/* read fault, initial single-byte copy */
+100:	li	r9,0
+	b	90f
+/* write fault, initial single-byte copy */
+101:	li	r9,1
+90:	subf	r5,r8,r5
+	li	r3,0
+	b	99f
+/* read fault, initial word copy */
+102:	li	r9,0
+	b	91f
+/* write fault, initial word copy */
+103:	li	r9,1
+91:	li	r3,2
+	b	99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+	COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_EXCODE(2)
+	COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_EXCODE(4)
+	COPY_16_BYTES_EXCODE(5)
+	COPY_16_BYTES_EXCODE(6)
+	COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104:	li	r9,0
+	b	92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105:	li	r9,1
+92:	li	r3,LG_CACHELINE_BYTES
+	mfctr	r8
+	add	r0,r0,r8
+	b	106f
+/* read fault in final word loop */
+108:	li	r9,0
+	b	93f
+/* write fault in final word loop */
+109:	li	r9,1
+93:	andi.	r5,r5,3
+	li	r3,2
+	b	99f
+/* read fault in final byte loop */
+110:	li	r9,0
+	b	94f
+/* write fault in final byte loop */
+111:	li	r9,1
+94:	li	r5,0
+	li	r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
+ */
+99:	mfctr	r0
+106:	slw	r3,r0,r3
+	add.	r3,r3,r5
+	beq	120f			/* shouldn't happen */
+	cmpwi	0,r9,0
+	bne	120f
+/* for a read fault, first try to continue the copy one byte at a time */
+	mtctr	r3
+130:	lbz	r0,4(r4)
+131:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	130b
+/* then clear out the destination: r3 bytes starting at 4(r6) */
+132:	mfctr	r3
+	srwi.	r0,r3,2
+	li	r9,0
+	mtctr	r0
+	beq	113f
+112:	stwu	r9,4(r6)
+	bdnz	112b
+113:	andi.	r0,r3,3
+	mtctr	r0
+	beq	120f
+114:	stb	r9,4(r6)
+	addi	r6,r6,1
+	bdnz	114b
+120:	blr
+
+	.section __ex_table,"a"
+	.align	2
+	.long	30b,108b
+	.long	31b,109b
+	.long	40b,110b
+	.long	41b,111b
+	.long	130b,132b
+	.long	131b,120b
+	.long	112b,120b
+	.long	114b,120b
+	.text
diff --git a/arch/powerpc/lib/copypage.S b/arch/powerpc/lib/copypage.S
new file mode 100644
index 00000000000..733d61618bb
--- /dev/null
+++ b/arch/powerpc/lib/copypage.S
@@ -0,0 +1,121 @@
+/*
+ * arch/ppc64/lib/copypage.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_page)
+	std	r31,-8(1)
+	std	r30,-16(1)
+	std	r29,-24(1)
+	std	r28,-32(1)
+	std	r27,-40(1)
+	std	r26,-48(1)
+	std	r25,-56(1)
+	std	r24,-64(1)
+	std	r23,-72(1)
+	std	r22,-80(1)
+	std	r21,-88(1)
+	std	r20,-96(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r12,5
+0:	addi	r5,r5,-24
+	mtctr	r12
+	ld	r22,640(4)
+	ld	r21,512(4)
+	ld	r20,384(4)
+	ld	r11,256(4)
+	ld	r9,128(4)
+	ld	r7,0(4)
+	ld	r25,648(4)
+	ld	r24,520(4)
+	ld	r23,392(4)
+	ld	r10,264(4)
+	ld	r8,136(4)
+	ldu	r6,8(4)
+	cmpwi	r5,24
+1:	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	ld	r28,648(4)
+	ld	r27,520(4)
+	ld	r26,392(4)
+	ld	r31,264(4)
+	ld	r30,136(4)
+	ld	r29,8(4)
+	std	r25,656(3)
+	std	r24,528(3)
+	std	r23,400(3)
+	std	r10,272(3)
+	std	r8,144(3)
+	std	r6,16(3)
+	ld	r22,656(4)
+	ld	r21,528(4)
+	ld	r20,400(4)
+	ld	r11,272(4)
+	ld	r9,144(4)
+	ld	r7,16(4)
+	std	r28,664(3)
+	std	r27,536(3)
+	std	r26,408(3)
+	std	r31,280(3)
+	std	r30,152(3)
+	stdu	r29,24(3)
+	ld	r25,664(4)
+	ld	r24,536(4)
+	ld	r23,408(4)
+	ld	r10,280(4)
+	ld	r8,152(4)
+	ldu	r6,24(4)
+	bdnz	1b
+	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	addi	r4,r4,640
+	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+	ld	r7,0(4)
+	ld	r8,8(4)
+	ldu	r9,16(4)
+3:	ld	r10,8(4)
+	std	r7,8(3)
+	ld	r7,16(4)
+	std	r8,16(3)
+	ld	r8,24(4)
+	std	r9,24(3)
+	ldu	r9,32(4)
+	stdu	r10,32(3)
+	bdnz	3b
+4:	ld	r10,8(4)
+	std	r7,8(3)
+	std	r8,16(3)
+	std	r9,24(3)
+	std	r10,32(3)
+9:	ld	r20,-96(1)
+	ld	r21,-88(1)
+	ld	r22,-80(1)
+	ld	r23,-72(1)
+	ld	r24,-64(1)
+	ld	r25,-56(1)
+	ld	r26,-48(1)
+	ld	r27,-40(1)
+	ld	r28,-32(1)
+	ld	r29,-24(1)
+	ld	r30,-16(1)
+	ld	r31,-8(1)
+	blr
diff --git a/arch/powerpc/lib/copyuser.S b/arch/powerpc/lib/copyuser.S
new file mode 100644
index 00000000000..a0b3fbbd6fb
--- /dev/null
+++ b/arch/powerpc/lib/copyuser.S
@@ -0,0 +1,576 @@
+/*
+ * arch/ppc64/lib/copyuser.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(__copy_tofrom_user)
+	/* first check for a whole page copy on a page boundary */
+	cmpldi	cr1,r5,16
+	cmpdi	cr6,r5,4096
+	or	r0,r3,r4
+	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
+	andi.	r0,r0,4095
+	std	r3,-24(r1)
+	crand	cr0*4+2,cr0*4+2,cr6*4+2
+	std	r4,-16(r1)
+	std	r5,-8(r1)
+	dcbt	0,r4
+	beq	.Lcopy_page
+	andi.	r6,r6,7
+	mtcrf	0x01,r5
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+20:	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,22f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,72f
+21:	ld	r9,8(r4)
+70:	std	r8,8(r3)
+22:	ldu	r8,16(r4)
+71:	stdu	r9,16(r3)
+	bdnz	21b
+72:	std	r8,8(r3)
+	beq+	3f
+	addi	r3,r3,16
+23:	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+73:	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+74:	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+75:	stb	r9,0(r3)
+3:	li	r3,0
+	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpldi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+	bt	cr7*4+0,28f
+
+24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
+25:	ld	r0,8(r4)
+	sld	r6,r9,r10
+26:	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,79f
+27:	ld	r0,8(r4)
+	b	2f
+
+28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
+29:	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+30:	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+31:	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,78f
+
+1:	or	r7,r7,r6
+32:	ld	r0,8(r4)
+76:	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+33:	ldu	r9,16(r4)
+	or	r12,r8,r12
+77:	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+78:	std	r12,8(r3)
+	or	r7,r7,r6
+79:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+80:	std	r12,24(r3)
+	bne	6f
+	li	r3,0
+	blr
+6:	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+34:	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+35:	lbz	r0,0(r4)
+81:	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+36:	lhzx	r0,r7,r4
+82:	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+37:	lwzx	r0,r7,r4
+83:	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+38:	lwz	r0,0(r4)
+39:	lwz	r9,4(r4)
+	addi	r4,r4,8
+84:	stw	r0,0(r3)
+85:	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+40:	lwz	r0,0(r4)
+	addi	r4,r4,4
+86:	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+41:	lhz	r0,0(r4)
+	addi	r4,r4,2
+87:	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+42:	lbz	r0,0(r4)
+88:	stb	r0,0(r3)
+4:	li	r3,0
+	blr
+
+/*
+ * exception handlers follow
+ * we have to return the number of bytes not copied
+ * for an exception on a load, we set the rest of the destination to 0
+ */
+
+136:
+137:
+	add	r3,r3,r7
+	b	1f
+130:
+131:
+	addi	r3,r3,8
+120:
+122:
+124:
+125:
+126:
+127:
+128:
+129:
+133:
+	addi	r3,r3,8
+121:
+132:
+	addi	r3,r3,8
+123:
+134:
+135:
+138:
+139:
+140:
+141:
+142:
+
+/*
+ * here we have had a fault on a load and r3 points to the first
+ * unmodified byte of the destination
+ */
+1:	ld	r6,-24(r1)
+	ld	r4,-16(r1)
+	ld	r5,-8(r1)
+	subf	r6,r6,r3
+	add	r4,r4,r6
+	subf	r5,r6,r5	/* #bytes left to go */
+
+/*
+ * first see if we can copy any more bytes before hitting another exception
+ */
+	mtctr	r5
+43:	lbz	r0,0(r4)
+	addi	r4,r4,1
+89:	stb	r0,0(r3)
+	addi	r3,r3,1
+	bdnz	43b
+	li	r3,0		/* huh? all copied successfully this time? */
+	blr
+
+/*
+ * here we have trapped again, need to clear ctr bytes starting at r3
+ */
+143:	mfctr	r5
+	li	r0,0
+	mr	r4,r3
+	mr	r3,r5		/* return the number of bytes not copied */
+1:	andi.	r9,r4,7
+	beq	3f
+90:	stb	r0,0(r4)
+	addic.	r5,r5,-1
+	addi	r4,r4,1
+	bne	1b
+	blr
+3:	cmpldi	cr1,r5,8
+	srdi	r9,r5,3
+	andi.	r5,r5,7
+	blt	cr1,93f
+	mtctr	r9
+91:	std	r0,0(r4)
+	addi	r4,r4,8
+	bdnz	91b
+93:	beqlr
+	mtctr	r5	
+92:	stb	r0,0(r4)
+	addi	r4,r4,1
+	bdnz	92b
+	blr
+
+/*
+ * exception handlers for stores: we just need to work
+ * out how many bytes weren't copied
+ */
+182:
+183:
+	add	r3,r3,r7
+	b	1f
+180:
+	addi	r3,r3,8
+171:
+177:
+	addi	r3,r3,8
+170:
+172:
+176:
+178:
+	addi	r3,r3,4
+185:
+	addi	r3,r3,4
+173:
+174:
+175:
+179:
+181:
+184:
+186:
+187:
+188:
+189:	
+1:
+	ld	r6,-24(r1)
+	ld	r5,-8(r1)
+	add	r6,r6,r5
+	subf	r3,r3,r6	/* #bytes not copied */
+190:
+191:
+192:
+	blr			/* #bytes not copied in r3 */
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,120b
+	.llong	21b,121b
+	.llong	70b,170b
+	.llong	22b,122b
+	.llong	71b,171b
+	.llong	72b,172b
+	.llong	23b,123b
+	.llong	73b,173b
+	.llong	74b,174b
+	.llong	75b,175b
+	.llong	24b,124b
+	.llong	25b,125b
+	.llong	26b,126b
+	.llong	27b,127b
+	.llong	28b,128b
+	.llong	29b,129b
+	.llong	30b,130b
+	.llong	31b,131b
+	.llong	32b,132b
+	.llong	76b,176b
+	.llong	33b,133b
+	.llong	77b,177b
+	.llong	78b,178b
+	.llong	79b,179b
+	.llong	80b,180b
+	.llong	34b,134b
+	.llong	35b,135b
+	.llong	81b,181b
+	.llong	36b,136b
+	.llong	82b,182b
+	.llong	37b,137b
+	.llong	83b,183b
+	.llong	38b,138b
+	.llong	39b,139b
+	.llong	84b,184b
+	.llong	85b,185b
+	.llong	40b,140b
+	.llong	86b,186b
+	.llong	41b,141b
+	.llong	87b,187b
+	.llong	42b,142b
+	.llong	88b,188b
+	.llong	43b,143b
+	.llong	89b,189b
+	.llong	90b,190b
+	.llong	91b,191b
+	.llong	92b,192b
+	
+	.text
+
+/*
+ * Routine to copy a whole page of data, optimized for POWER4.
+ * On POWER4 it is more than 50% faster than the simple loop
+ * above (following the .Ldst_aligned label) but it runs slightly
+ * slower on POWER3.
+ */
+.Lcopy_page:
+	std	r31,-32(1)
+	std	r30,-40(1)
+	std	r29,-48(1)
+	std	r28,-56(1)
+	std	r27,-64(1)
+	std	r26,-72(1)
+	std	r25,-80(1)
+	std	r24,-88(1)
+	std	r23,-96(1)
+	std	r22,-104(1)
+	std	r21,-112(1)
+	std	r20,-120(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r0,5
+0:	addi	r5,r5,-24
+	mtctr	r0
+20:	ld	r22,640(4)
+21:	ld	r21,512(4)
+22:	ld	r20,384(4)
+23:	ld	r11,256(4)
+24:	ld	r9,128(4)
+25:	ld	r7,0(4)
+26:	ld	r25,648(4)
+27:	ld	r24,520(4)
+28:	ld	r23,392(4)
+29:	ld	r10,264(4)
+30:	ld	r8,136(4)
+31:	ldu	r6,8(4)
+	cmpwi	r5,24
+1:
+32:	std	r22,648(3)
+33:	std	r21,520(3)
+34:	std	r20,392(3)
+35:	std	r11,264(3)
+36:	std	r9,136(3)
+37:	std	r7,8(3)
+38:	ld	r28,648(4)
+39:	ld	r27,520(4)
+40:	ld	r26,392(4)
+41:	ld	r31,264(4)
+42:	ld	r30,136(4)
+43:	ld	r29,8(4)
+44:	std	r25,656(3)
+45:	std	r24,528(3)
+46:	std	r23,400(3)
+47:	std	r10,272(3)
+48:	std	r8,144(3)
+49:	std	r6,16(3)
+50:	ld	r22,656(4)
+51:	ld	r21,528(4)
+52:	ld	r20,400(4)
+53:	ld	r11,272(4)
+54:	ld	r9,144(4)
+55:	ld	r7,16(4)
+56:	std	r28,664(3)
+57:	std	r27,536(3)
+58:	std	r26,408(3)
+59:	std	r31,280(3)
+60:	std	r30,152(3)
+61:	stdu	r29,24(3)
+62:	ld	r25,664(4)
+63:	ld	r24,536(4)
+64:	ld	r23,408(4)
+65:	ld	r10,280(4)
+66:	ld	r8,152(4)
+67:	ldu	r6,24(4)
+	bdnz	1b
+68:	std	r22,648(3)
+69:	std	r21,520(3)
+70:	std	r20,392(3)
+71:	std	r11,264(3)
+72:	std	r9,136(3)
+73:	std	r7,8(3)
+74:	addi	r4,r4,640
+75:	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+76:	ld	r7,0(4)
+77:	ld	r8,8(4)
+78:	ldu	r9,16(4)
+3:
+79:	ld	r10,8(4)
+80:	std	r7,8(3)
+81:	ld	r7,16(4)
+82:	std	r8,16(3)
+83:	ld	r8,24(4)
+84:	std	r9,24(3)
+85:	ldu	r9,32(4)
+86:	stdu	r10,32(3)
+	bdnz	3b
+4:
+87:	ld	r10,8(4)
+88:	std	r7,8(3)
+89:	std	r8,16(3)
+90:	std	r9,24(3)
+91:	std	r10,32(3)
+9:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	li	r3,0
+	blr
+
+/*
+ * on an exception, reset to the beginning and jump back into the
+ * standard __copy_tofrom_user
+ */
+100:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	ld	r3,-24(r1)
+	ld	r4,-16(r1)
+	li	r5,4096
+	b	.Ldst_aligned
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,100b
+	.llong	21b,100b
+	.llong	22b,100b
+	.llong	23b,100b
+	.llong	24b,100b
+	.llong	25b,100b
+	.llong	26b,100b
+	.llong	27b,100b
+	.llong	28b,100b
+	.llong	29b,100b
+	.llong	30b,100b
+	.llong	31b,100b
+	.llong	32b,100b
+	.llong	33b,100b
+	.llong	34b,100b
+	.llong	35b,100b
+	.llong	36b,100b
+	.llong	37b,100b
+	.llong	38b,100b
+	.llong	39b,100b
+	.llong	40b,100b
+	.llong	41b,100b
+	.llong	42b,100b
+	.llong	43b,100b
+	.llong	44b,100b
+	.llong	45b,100b
+	.llong	46b,100b
+	.llong	47b,100b
+	.llong	48b,100b
+	.llong	49b,100b
+	.llong	50b,100b
+	.llong	51b,100b
+	.llong	52b,100b
+	.llong	53b,100b
+	.llong	54b,100b
+	.llong	55b,100b
+	.llong	56b,100b
+	.llong	57b,100b
+	.llong	58b,100b
+	.llong	59b,100b
+	.llong	60b,100b
+	.llong	61b,100b
+	.llong	62b,100b
+	.llong	63b,100b
+	.llong	64b,100b
+	.llong	65b,100b
+	.llong	66b,100b
+	.llong	67b,100b
+	.llong	68b,100b
+	.llong	69b,100b
+	.llong	70b,100b
+	.llong	71b,100b
+	.llong	72b,100b
+	.llong	73b,100b
+	.llong	74b,100b
+	.llong	75b,100b
+	.llong	76b,100b
+	.llong	77b,100b
+	.llong	78b,100b
+	.llong	79b,100b
+	.llong	80b,100b
+	.llong	81b,100b
+	.llong	82b,100b
+	.llong	83b,100b
+	.llong	84b,100b
+	.llong	85b,100b
+	.llong	86b,100b
+	.llong	87b,100b
+	.llong	88b,100b
+	.llong	89b,100b
+	.llong	90b,100b
+	.llong	91b,100b
diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S
new file mode 100644
index 00000000000..3527569e992
--- /dev/null
+++ b/arch/powerpc/lib/div64.S
@@ -0,0 +1,58 @@
+/*
+ * Divide a 64-bit unsigned number by a 32-bit unsigned number.
+ * This routine assumes that the top 32 bits of the dividend are
+ * non-zero to start with.
+ * On entry, r3 points to the dividend, which get overwritten with
+ * the 64-bit quotient, and r4 contains the divisor.
+ * On exit, r3 contains the remainder.
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+_GLOBAL(__div64_32)
+	lwz	r5,0(r3)	# get the dividend into r5/r6
+	lwz	r6,4(r3)
+	cmplw	r5,r4
+	li	r7,0
+	li	r8,0
+	blt	1f
+	divwu	r7,r5,r4	# if dividend.hi >= divisor,
+	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
+	subf.	r5,r0,r5	# dividend.hi %= divisor
+	beq	3f
+1:	mr	r11,r5		# here dividend.hi != 0
+	andis.	r0,r5,0xc000
+	bne	2f
+	cntlzw	r0,r5		# we are shifting the dividend right
+	li	r10,-1		# to make it < 2^32, and shifting
+	srw	r10,r10,r0	# the divisor right the same amount,
+	add	r9,r4,r10	# rounding up (so the estimate cannot
+	andc	r11,r6,r10	# ever be too large, only too small)
+	andc	r9,r9,r10
+	or	r11,r5,r11
+	rotlw	r9,r9,r0
+	rotlw	r11,r11,r0
+	divwu	r11,r11,r9	# then we divide the shifted quantities
+2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
+	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
+	subfc	r6,r10,r6	# take the product from the divisor,
+	add	r8,r8,r11	# and add the estimate to the accumulated
+	subfe.	r5,r9,r5	# quotient
+	bne	1b
+3:	cmplw	r6,r4
+	blt	4f
+	divwu	r0,r6,r4	# perform the remaining 32-bit division
+	mullw	r10,r0,r4	# and get the remainder
+	add	r8,r8,r0
+	subf	r6,r10,r6
+4:	stw	r7,0(r3)	# return the quotient in *r3
+	stw	r8,4(r3)
+	mr	r3,r6		# return the remainder in r3
+	blr
diff --git a/arch/powerpc/lib/e2a.c b/arch/powerpc/lib/e2a.c
new file mode 100644
index 00000000000..d2b83488792
--- /dev/null
+++ b/arch/powerpc/lib/e2a.c
@@ -0,0 +1,108 @@
+/*
+ *  arch/ppc64/lib/e2a.c
+ *
+ *  EBCDIC to ASCII conversion
+ *
+ * This function moved here from arch/ppc64/kernel/viopath.c
+ *
+ * (C) Copyright 2000-2004 IBM Corporation
+ *
+ * This program is free software;  you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) anyu later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/module.h>
+
+unsigned char e2a(unsigned char x)
+{
+	switch (x) {
+	case 0xF0:
+		return '0';
+	case 0xF1:
+		return '1';
+	case 0xF2:
+		return '2';
+	case 0xF3:
+		return '3';
+	case 0xF4:
+		return '4';
+	case 0xF5:
+		return '5';
+	case 0xF6:
+		return '6';
+	case 0xF7:
+		return '7';
+	case 0xF8:
+		return '8';
+	case 0xF9:
+		return '9';
+	case 0xC1:
+		return 'A';
+	case 0xC2:
+		return 'B';
+	case 0xC3:
+		return 'C';
+	case 0xC4:
+		return 'D';
+	case 0xC5:
+		return 'E';
+	case 0xC6:
+		return 'F';
+	case 0xC7:
+		return 'G';
+	case 0xC8:
+		return 'H';
+	case 0xC9:
+		return 'I';
+	case 0xD1:
+		return 'J';
+	case 0xD2:
+		return 'K';
+	case 0xD3:
+		return 'L';
+	case 0xD4:
+		return 'M';
+	case 0xD5:
+		return 'N';
+	case 0xD6:
+		return 'O';
+	case 0xD7:
+		return 'P';
+	case 0xD8:
+		return 'Q';
+	case 0xD9:
+		return 'R';
+	case 0xE2:
+		return 'S';
+	case 0xE3:
+		return 'T';
+	case 0xE4:
+		return 'U';
+	case 0xE5:
+		return 'V';
+	case 0xE6:
+		return 'W';
+	case 0xE7:
+		return 'X';
+	case 0xE8:
+		return 'Y';
+	case 0xE9:
+		return 'Z';
+	}
+	return ' ';
+}
+EXPORT_SYMBOL(e2a);
+
+
diff --git a/arch/powerpc/lib/memcpy.S b/arch/powerpc/lib/memcpy.S
new file mode 100644
index 00000000000..9ccacdf5bcb
--- /dev/null
+++ b/arch/powerpc/lib/memcpy.S
@@ -0,0 +1,172 @@
+/*
+ * arch/ppc64/lib/memcpy.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(memcpy)
+	mtcrf	0x01,r5
+	cmpldi	cr1,r5,16
+	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
+	andi.	r6,r6,7
+	dcbt	0,r4
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,2f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,3f
+1:	ld	r9,8(r4)
+	std	r8,8(r3)
+2:	ldu	r8,16(r4)
+	stdu	r9,16(r3)
+	bdnz	1b
+3:	std	r8,8(r3)
+	beqlr
+	addi	r3,r3,16
+	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+	stb	r9,0(r3)
+3:	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpdi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+
+	bt	cr7*4+0,0f
+
+	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
+	ld	r0,8(r4)
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,4f
+	ld	r0,8(r4)
+	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
+	b	2f
+
+0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
+	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,3f
+
+	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
+1:	or	r7,r7,r6
+	ld	r0,8(r4)
+	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+3:	std	r12,8(r3)
+	or	r7,r7,r6
+4:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+	std	r12,24(r3)
+	beqlr
+	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		# put #bytes to 8B bdry into cr7
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+	lhzx	r0,r7,r4
+	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+	lwzx	r0,r7,r4
+	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+	lwz	r0,0(r4)
+	lwz	r9,4(r4)
+	addi	r4,r4,8
+	stw	r0,0(r3)
+	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+4:	blr
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
new file mode 100644
index 00000000000..42c5de2c898
--- /dev/null
+++ b/arch/powerpc/lib/rheap.c
@@ -0,0 +1,693 @@
+/*
+ * arch/ppc/syslib/rheap.c
+ *
+ * A Remote Heap.  Remote means that we don't touch the memory that the
+ * heap points to. Normal heap implementations use the memory they manage
+ * to place their list. We cannot do that because the memory we manage may
+ * have special properties, for example it is uncachable or of different
+ * endianess.
+ *
+ * Author: Pantelis Antoniou <panto@intracom.gr>
+ *
+ * 2004 (c) INTRACOM S.A. Greece. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/rheap.h>
+
+/*
+ * Fixup a list_head, needed when copying lists.  If the pointers fall
+ * between s and e, apply the delta.  This assumes that
+ * sizeof(struct list_head *) == sizeof(unsigned long *).
+ */
+static inline void fixup(unsigned long s, unsigned long e, int d,
+			 struct list_head *l)
+{
+	unsigned long *pp;
+
+	pp = (unsigned long *)&l->next;
+	if (*pp >= s && *pp < e)
+		*pp += d;
+
+	pp = (unsigned long *)&l->prev;
+	if (*pp >= s && *pp < e)
+		*pp += d;
+}
+
+/* Grow the allocated blocks */
+static int grow(rh_info_t * info, int max_blocks)
+{
+	rh_block_t *block, *blk;
+	int i, new_blocks;
+	int delta;
+	unsigned long blks, blke;
+
+	if (max_blocks <= info->max_blocks)
+		return -EINVAL;
+
+	new_blocks = max_blocks - info->max_blocks;
+
+	block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_KERNEL);
+	if (block == NULL)
+		return -ENOMEM;
+
+	if (info->max_blocks > 0) {
+
+		/* copy old block area */
+		memcpy(block, info->block,
+		       sizeof(rh_block_t) * info->max_blocks);
+
+		delta = (char *)block - (char *)info->block;
+
+		/* and fixup list pointers */
+		blks = (unsigned long)info->block;
+		blke = (unsigned long)(info->block + info->max_blocks);
+
+		for (i = 0, blk = block; i < info->max_blocks; i++, blk++)
+			fixup(blks, blke, delta, &blk->list);
+
+		fixup(blks, blke, delta, &info->empty_list);
+		fixup(blks, blke, delta, &info->free_list);
+		fixup(blks, blke, delta, &info->taken_list);
+
+		/* free the old allocated memory */
+		if ((info->flags & RHIF_STATIC_BLOCK) == 0)
+			kfree(info->block);
+	}
+
+	info->block = block;
+	info->empty_slots += new_blocks;
+	info->max_blocks = max_blocks;
+	info->flags &= ~RHIF_STATIC_BLOCK;
+
+	/* add all new blocks to the free list */
+	for (i = 0, blk = block + info->max_blocks; i < new_blocks; i++, blk++)
+		list_add(&blk->list, &info->empty_list);
+
+	return 0;
+}
+
+/*
+ * Assure at least the required amount of empty slots.  If this function
+ * causes a grow in the block area then all pointers kept to the block
+ * area are invalid!
+ */
+static int assure_empty(rh_info_t * info, int slots)
+{
+	int max_blocks;
+
+	/* This function is not meant to be used to grow uncontrollably */
+	if (slots >= 4)
+		return -EINVAL;
+
+	/* Enough space */
+	if (info->empty_slots >= slots)
+		return 0;
+
+	/* Next 16 sized block */
+	max_blocks = ((info->max_blocks + slots) + 15) & ~15;
+
+	return grow(info, max_blocks);
+}
+
+static rh_block_t *get_slot(rh_info_t * info)
+{
+	rh_block_t *blk;
+
+	/* If no more free slots, and failure to extend. */
+	/* XXX: You should have called assure_empty before */
+	if (info->empty_slots == 0) {
+		printk(KERN_ERR "rh: out of slots; crash is imminent.\n");
+		return NULL;
+	}
+
+	/* Get empty slot to use */
+	blk = list_entry(info->empty_list.next, rh_block_t, list);
+	list_del_init(&blk->list);
+	info->empty_slots--;
+
+	/* Initialize */
+	blk->start = NULL;
+	blk->size = 0;
+	blk->owner = NULL;
+
+	return blk;
+}
+
+static inline void release_slot(rh_info_t * info, rh_block_t * blk)
+{
+	list_add(&blk->list, &info->empty_list);
+	info->empty_slots++;
+}
+
+static void attach_free_block(rh_info_t * info, rh_block_t * blkn)
+{
+	rh_block_t *blk;
+	rh_block_t *before;
+	rh_block_t *after;
+	rh_block_t *next;
+	int size;
+	unsigned long s, e, bs, be;
+	struct list_head *l;
+
+	/* We assume that they are aligned properly */
+	size = blkn->size;
+	s = (unsigned long)blkn->start;
+	e = s + size;
+
+	/* Find the blocks immediately before and after the given one
+	 * (if any) */
+	before = NULL;
+	after = NULL;
+	next = NULL;
+
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+
+		bs = (unsigned long)blk->start;
+		be = bs + blk->size;
+
+		if (next == NULL && s >= bs)
+			next = blk;
+
+		if (be == s)
+			before = blk;
+
+		if (e == bs)
+			after = blk;
+
+		/* If both are not null, break now */
+		if (before != NULL && after != NULL)
+			break;
+	}
+
+	/* Now check if they are really adjacent */
+	if (before != NULL && s != (unsigned long)before->start + before->size)
+		before = NULL;
+
+	if (after != NULL && e != (unsigned long)after->start)
+		after = NULL;
+
+	/* No coalescing; list insert and return */
+	if (before == NULL && after == NULL) {
+
+		if (next != NULL)
+			list_add(&blkn->list, &next->list);
+		else
+			list_add(&blkn->list, &info->free_list);
+
+		return;
+	}
+
+	/* We don't need it anymore */
+	release_slot(info, blkn);
+
+	/* Grow the before block */
+	if (before != NULL && after == NULL) {
+		before->size += size;
+		return;
+	}
+
+	/* Grow the after block backwards */
+	if (before == NULL && after != NULL) {
+		after->start = (int8_t *)after->start - size;
+		after->size += size;
+		return;
+	}
+
+	/* Grow the before block, and release the after block */
+	before->size += size + after->size;
+	list_del(&after->list);
+	release_slot(info, after);
+}
+
+static void attach_taken_block(rh_info_t * info, rh_block_t * blkn)
+{
+	rh_block_t *blk;
+	struct list_head *l;
+
+	/* Find the block immediately before the given one (if any) */
+	list_for_each(l, &info->taken_list) {
+		blk = list_entry(l, rh_block_t, list);
+		if (blk->start > blkn->start) {
+			list_add_tail(&blkn->list, &blk->list);
+			return;
+		}
+	}
+
+	list_add_tail(&blkn->list, &info->taken_list);
+}
+
+/*
+ * Create a remote heap dynamically.  Note that no memory for the blocks
+ * are allocated.  It will upon the first allocation
+ */
+rh_info_t *rh_create(unsigned int alignment)
+{
+	rh_info_t *info;
+
+	/* Alignment must be a power of two */
+	if ((alignment & (alignment - 1)) != 0)
+		return ERR_PTR(-EINVAL);
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (info == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	info->alignment = alignment;
+
+	/* Initially everything as empty */
+	info->block = NULL;
+	info->max_blocks = 0;
+	info->empty_slots = 0;
+	info->flags = 0;
+
+	INIT_LIST_HEAD(&info->empty_list);
+	INIT_LIST_HEAD(&info->free_list);
+	INIT_LIST_HEAD(&info->taken_list);
+
+	return info;
+}
+
+/*
+ * Destroy a dynamically created remote heap.  Deallocate only if the areas
+ * are not static
+ */
+void rh_destroy(rh_info_t * info)
+{
+	if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
+		kfree(info->block);
+
+	if ((info->flags & RHIF_STATIC_INFO) == 0)
+		kfree(info);
+}
+
+/*
+ * Initialize in place a remote heap info block.  This is needed to support
+ * operation very early in the startup of the kernel, when it is not yet safe
+ * to call kmalloc.
+ */
+void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
+	     rh_block_t * block)
+{
+	int i;
+	rh_block_t *blk;
+
+	/* Alignment must be a power of two */
+	if ((alignment & (alignment - 1)) != 0)
+		return;
+
+	info->alignment = alignment;
+
+	/* Initially everything as empty */
+	info->block = block;
+	info->max_blocks = max_blocks;
+	info->empty_slots = max_blocks;
+	info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK;
+
+	INIT_LIST_HEAD(&info->empty_list);
+	INIT_LIST_HEAD(&info->free_list);
+	INIT_LIST_HEAD(&info->taken_list);
+
+	/* Add all new blocks to the free list */
+	for (i = 0, blk = block; i < max_blocks; i++, blk++)
+		list_add(&blk->list, &info->empty_list);
+}
+
+/* Attach a free memory region, coalesces regions if adjuscent */
+int rh_attach_region(rh_info_t * info, void *start, int size)
+{
+	rh_block_t *blk;
+	unsigned long s, e, m;
+	int r;
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	/* Take final values */
+	start = (void *)s;
+	size = (int)(e - s);
+
+	/* Grow the blocks, if needed */
+	r = assure_empty(info, 1);
+	if (r < 0)
+		return r;
+
+	blk = get_slot(info);
+	blk->start = start;
+	blk->size = size;
+	blk->owner = NULL;
+
+	attach_free_block(info, blk);
+
+	return 0;
+}
+
+/* Detatch given address range, splits free block if needed. */
+void *rh_detach_region(rh_info_t * info, void *start, int size)
+{
+	struct list_head *l;
+	rh_block_t *blk, *newblk;
+	unsigned long s, e, m, bs, be;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (assure_empty(info, 1) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		/* The range must lie entirely inside one free block */
+		bs = (unsigned long)blk->start;
+		be = (unsigned long)blk->start + blk->size;
+		if (s >= bs && e <= be)
+			break;
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Perfect fit */
+	if (bs == s && be == e) {
+		/* Delete from free list, release slot */
+		list_del(&blk->list);
+		release_slot(info, blk);
+		return (void *)s;
+	}
+
+	/* blk still in free list, with updated start and/or size */
+	if (bs == s || be == e) {
+		if (bs == s)
+			blk->start = (int8_t *)blk->start + size;
+		blk->size -= size;
+
+	} else {
+		/* The front free fragment */
+		blk->size = s - bs;
+
+		/* the back free fragment */
+		newblk = get_slot(info);
+		newblk->start = (void *)e;
+		newblk->size = be - e;
+
+		list_add(&newblk->list, &blk->list);
+	}
+
+	return (void *)s;
+}
+
+void *rh_alloc(rh_info_t * info, int size, const char *owner)
+{
+	struct list_head *l;
+	rh_block_t *blk;
+	rh_block_t *newblk;
+	void *start;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* Align to configured alignment */
+	size = (size + (info->alignment - 1)) & ~(info->alignment - 1);
+
+	if (assure_empty(info, 1) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		if (size <= blk->size)
+			break;
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Just fits */
+	if (blk->size == size) {
+		/* Move from free list to taken list */
+		list_del(&blk->list);
+		blk->owner = owner;
+		start = blk->start;
+
+		attach_taken_block(info, blk);
+
+		return start;
+	}
+
+	newblk = get_slot(info);
+	newblk->start = blk->start;
+	newblk->size = size;
+	newblk->owner = owner;
+
+	/* blk still in free list, with updated start, size */
+	blk->start = (int8_t *)blk->start + size;
+	blk->size -= size;
+
+	start = newblk->start;
+
+	attach_taken_block(info, newblk);
+
+	return start;
+}
+
+/* allocate at precisely the given address */
+void *rh_alloc_fixed(rh_info_t * info, void *start, int size, const char *owner)
+{
+	struct list_head *l;
+	rh_block_t *blk, *newblk1, *newblk2;
+	unsigned long s, e, m, bs, be;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (assure_empty(info, 2) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		/* The range must lie entirely inside one free block */
+		bs = (unsigned long)blk->start;
+		be = (unsigned long)blk->start + blk->size;
+		if (s >= bs && e <= be)
+			break;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Perfect fit */
+	if (bs == s && be == e) {
+		/* Move from free list to taken list */
+		list_del(&blk->list);
+		blk->owner = owner;
+
+		start = blk->start;
+		attach_taken_block(info, blk);
+
+		return start;
+
+	}
+
+	/* blk still in free list, with updated start and/or size */
+	if (bs == s || be == e) {
+		if (bs == s)
+			blk->start = (int8_t *)blk->start + size;
+		blk->size -= size;
+
+	} else {
+		/* The front free fragment */
+		blk->size = s - bs;
+
+		/* The back free fragment */
+		newblk2 = get_slot(info);
+		newblk2->start = (void *)e;
+		newblk2->size = be - e;
+
+		list_add(&newblk2->list, &blk->list);
+	}
+
+	newblk1 = get_slot(info);
+	newblk1->start = (void *)s;
+	newblk1->size = e - s;
+	newblk1->owner = owner;
+
+	start = newblk1->start;
+	attach_taken_block(info, newblk1);
+
+	return start;
+}
+
+int rh_free(rh_info_t * info, void *start)
+{
+	rh_block_t *blk, *blk2;
+	struct list_head *l;
+	int size;
+
+	/* Linear search for block */
+	blk = NULL;
+	list_for_each(l, &info->taken_list) {
+		blk2 = list_entry(l, rh_block_t, list);
+		if (start < blk2->start)
+			break;
+		blk = blk2;
+	}
+
+	if (blk == NULL || start > (blk->start + blk->size))
+		return -EINVAL;
+
+	/* Remove from taken list */
+	list_del(&blk->list);
+
+	/* Get size of freed block */
+	size = blk->size;
+	attach_free_block(info, blk);
+
+	return size;
+}
+
+int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats)
+{
+	rh_block_t *blk;
+	struct list_head *l;
+	struct list_head *h;
+	int nr;
+
+	switch (what) {
+
+	case RHGS_FREE:
+		h = &info->free_list;
+		break;
+
+	case RHGS_TAKEN:
+		h = &info->taken_list;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Linear search for block */
+	nr = 0;
+	list_for_each(l, h) {
+		blk = list_entry(l, rh_block_t, list);
+		if (stats != NULL && nr < max_stats) {
+			stats->start = blk->start;
+			stats->size = blk->size;
+			stats->owner = blk->owner;
+			stats++;
+		}
+		nr++;
+	}
+
+	return nr;
+}
+
+int rh_set_owner(rh_info_t * info, void *start, const char *owner)
+{
+	rh_block_t *blk, *blk2;
+	struct list_head *l;
+	int size;
+
+	/* Linear search for block */
+	blk = NULL;
+	list_for_each(l, &info->taken_list) {
+		blk2 = list_entry(l, rh_block_t, list);
+		if (start < blk2->start)
+			break;
+		blk = blk2;
+	}
+
+	if (blk == NULL || start > (blk->start + blk->size))
+		return -EINVAL;
+
+	blk->owner = owner;
+	size = blk->size;
+
+	return size;
+}
+
+void rh_dump(rh_info_t * info)
+{
+	static rh_stats_t st[32];	/* XXX maximum 32 blocks */
+	int maxnr;
+	int i, nr;
+
+	maxnr = sizeof(st) / sizeof(st[0]);
+
+	printk(KERN_INFO
+	       "info @0x%p (%d slots empty / %d max)\n",
+	       info, info->empty_slots, info->max_blocks);
+
+	printk(KERN_INFO "  Free:\n");
+	nr = rh_get_stats(info, RHGS_FREE, maxnr, st);
+	if (nr > maxnr)
+		nr = maxnr;
+	for (i = 0; i < nr; i++)
+		printk(KERN_INFO
+		       "    0x%p-0x%p (%u)\n",
+		       st[i].start, (int8_t *) st[i].start + st[i].size,
+		       st[i].size);
+	printk(KERN_INFO "\n");
+
+	printk(KERN_INFO "  Taken:\n");
+	nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st);
+	if (nr > maxnr)
+		nr = maxnr;
+	for (i = 0; i < nr; i++)
+		printk(KERN_INFO
+		       "    0x%p-0x%p (%u) %s\n",
+		       st[i].start, (int8_t *) st[i].start + st[i].size,
+		       st[i].size, st[i].owner != NULL ? st[i].owner : "");
+	printk(KERN_INFO "\n");
+}
+
+void rh_dump_blk(rh_info_t * info, rh_block_t * blk)
+{
+	printk(KERN_INFO
+	       "blk @0x%p: 0x%p-0x%p (%u)\n",
+	       blk, blk->start, (int8_t *) blk->start + blk->size, blk->size);
+}
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
new file mode 100644
index 00000000000..e79123d1485
--- /dev/null
+++ b/arch/powerpc/lib/sstep.c
@@ -0,0 +1,141 @@
+/*
+ * Single-step support.
+ *
+ * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <asm/sstep.h>
+#include <asm/processor.h>
+
+extern char system_call_common[];
+
+/* Bits in SRR1 that are copied from MSR */
+#define MSR_MASK	0xffffffff87c0ffff
+
+/*
+ * Determine whether a conditional branch instruction would branch.
+ */
+static int branch_taken(unsigned int instr, struct pt_regs *regs)
+{
+	unsigned int bo = (instr >> 21) & 0x1f;
+	unsigned int bi;
+
+	if ((bo & 4) == 0) {
+		/* decrement counter */
+		--regs->ctr;
+		if (((bo >> 1) & 1) ^ (regs->ctr == 0))
+			return 0;
+	}
+	if ((bo & 0x10) == 0) {
+		/* check bit from CR */
+		bi = (instr >> 16) & 0x1f;
+		if (((regs->ccr >> (31 - bi)) & 1) != ((bo >> 3) & 1))
+			return 0;
+	}
+	return 1;
+}
+
+/*
+ * Emulate instructions that cause a transfer of control.
+ * Returns 1 if the step was emulated, 0 if not,
+ * or -1 if the instruction is one that should not be stepped,
+ * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ */
+int emulate_step(struct pt_regs *regs, unsigned int instr)
+{
+	unsigned int opcode, rd;
+	unsigned long int imm;
+
+	opcode = instr >> 26;
+	switch (opcode) {
+	case 16:	/* bc */
+		imm = (signed short)(instr & 0xfffc);
+		if ((instr & 2) == 0)
+			imm += regs->nip;
+		regs->nip += 4;
+		if ((regs->msr & MSR_SF) == 0)
+			regs->nip &= 0xffffffffUL;
+		if (instr & 1)
+			regs->link = regs->nip;
+		if (branch_taken(instr, regs))
+			regs->nip = imm;
+		return 1;
+	case 17:	/* sc */
+		/*
+		 * N.B. this uses knowledge about how the syscall
+		 * entry code works.  If that is changed, this will
+		 * need to be changed also.
+		 */
+		regs->gpr[9] = regs->gpr[13];
+		regs->gpr[11] = regs->nip + 4;
+		regs->gpr[12] = regs->msr & MSR_MASK;
+		regs->gpr[13] = (unsigned long) get_paca();
+		regs->nip = (unsigned long) &system_call_common;
+		regs->msr = MSR_KERNEL;
+		return 1;
+	case 18:	/* b */
+		imm = instr & 0x03fffffc;
+		if (imm & 0x02000000)
+			imm -= 0x04000000;
+		if ((instr & 2) == 0)
+			imm += regs->nip;
+		if (instr & 1) {
+			regs->link = regs->nip + 4;
+			if ((regs->msr & MSR_SF) == 0)
+				regs->link &= 0xffffffffUL;
+		}
+		if ((regs->msr & MSR_SF) == 0)
+			imm &= 0xffffffffUL;
+		regs->nip = imm;
+		return 1;
+	case 19:
+		switch (instr & 0x7fe) {
+		case 0x20:	/* bclr */
+		case 0x420:	/* bcctr */
+			imm = (instr & 0x400)? regs->ctr: regs->link;
+			regs->nip += 4;
+			if ((regs->msr & MSR_SF) == 0) {
+				regs->nip &= 0xffffffffUL;
+				imm &= 0xffffffffUL;
+			}
+			if (instr & 1)
+				regs->link = regs->nip;
+			if (branch_taken(instr, regs))
+				regs->nip = imm;
+			return 1;
+		case 0x24:	/* rfid, scary */
+			return -1;
+		}
+	case 31:
+		rd = (instr >> 21) & 0x1f;
+		switch (instr & 0x7fe) {
+		case 0xa6:	/* mfmsr */
+			regs->gpr[rd] = regs->msr & MSR_MASK;
+			regs->nip += 4;
+			if ((regs->msr & MSR_SF) == 0)
+				regs->nip &= 0xffffffffUL;
+			return 1;
+		case 0x164:	/* mtmsrd */
+			/* only MSR_EE and MSR_RI get changed if bit 15 set */
+			/* mtmsrd doesn't change MSR_HV and MSR_ME */
+			imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL;
+			imm = (regs->msr & MSR_MASK & ~imm)
+				| (regs->gpr[rd] & imm);
+			if ((imm & MSR_RI) == 0)
+				/* can't step mtmsrd that would clear MSR_RI */
+				return -1;
+			regs->msr = imm;
+			regs->nip += 4;
+			if ((imm & MSR_SF) == 0)
+				regs->nip &= 0xffffffffUL;
+			return 1;
+		}
+	}
+	return 0;
+}
diff --git a/arch/powerpc/lib/strcase.c b/arch/powerpc/lib/strcase.c
new file mode 100644
index 00000000000..36b521091bb
--- /dev/null
+++ b/arch/powerpc/lib/strcase.c
@@ -0,0 +1,23 @@
+#include <linux/ctype.h>
+
+int strcasecmp(const char *s1, const char *s2)
+{
+	int c1, c2;
+
+	do {
+		c1 = tolower(*s1++);
+		c2 = tolower(*s2++);
+	} while (c1 == c2 && c1 != 0);
+	return c1 - c2;
+}
+
+int strncasecmp(const char *s1, const char *s2, int n)
+{
+	int c1, c2;
+
+	do {
+		c1 = tolower(*s1++);
+		c2 = tolower(*s2++);
+	} while ((--n > 0) && c1 == c2 && c1 != 0);
+	return c1 - c2;
+}
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
new file mode 100644
index 00000000000..15d40e9ef8b
--- /dev/null
+++ b/arch/powerpc/lib/string.S
@@ -0,0 +1,203 @@
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+	.text
+	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
+	.stabs	"string.S",N_SO,0,0,0f
+0:
+
+	.section __ex_table,"a"
+#ifdef CONFIG_PPC64
+	.align	3
+#define EXTBL	.llong
+#else
+	.align	2
+#define EXTBL	.long
+#endif
+	.text
+	
+_GLOBAL(strcpy)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+/* This clears out any unused part of the destination buffer,
+   just as the libc version does.  -- paulus */
+_GLOBAL(strncpy)
+	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	bnelr			/* if we didn't hit a null char, we're done */
+	mfctr	r5
+	cmpwi	0,r5,0		/* any space left in destination buffer? */
+	beqlr			/* we know r0 == 0 here */
+2:	stbu	r0,1(r6)	/* clear it out if so */
+	bdnz	2b
+	blr
+
+_GLOBAL(strcat)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r5)
+	cmpwi	0,r0,0
+	bne	1b
+	addi	r5,r5,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+_GLOBAL(strcmp)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r5)
+	cmpwi	1,r3,0
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	beqlr	1
+	beq	1b
+	blr
+
+_GLOBAL(strlen)
+	addi	r4,r3,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	bne	1b
+	subf	r3,r3,r4
+	blr
+
+_GLOBAL(memcmp)
+	cmpwi	0,r5,0
+	ble-	2f
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r6)
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	bdnzt	2,1b
+	blr
+2:	li	r3,0
+	blr
+
+_GLOBAL(memchr)
+	cmpwi	0,r5,0
+	ble-	2f
+	mtctr	r5
+	addi	r3,r3,-1
+1:	lbzu	r0,1(r3)
+	cmpw	0,r0,r4
+	bdnzf	2,1b
+	beqlr
+2:	li	r3,0
+	blr
+
+_GLOBAL(__clear_user)
+	addi	r6,r3,-4
+	li	r3,0
+	li	r5,0
+	cmplwi	0,r4,4
+	blt	7f
+	/* clear a single word */
+11:	stwu	r5,4(r6)
+	beqlr
+	/* clear word sized chunks */
+	andi.	r0,r6,3
+	add	r4,r0,r4
+	subf	r6,r0,r6
+	srwi	r0,r4,2
+	andi.	r4,r4,3
+	mtctr	r0
+	bdz	7f
+1:	stwu	r5,4(r6)
+	bdnz	1b
+	/* clear byte sized chunks */
+7:	cmpwi	0,r4,0
+	beqlr
+	mtctr	r4
+	addi	r6,r6,3
+8:	stbu	r5,1(r6)
+	bdnz	8b
+	blr
+90:	mr	r3,r4
+	blr
+91:	mfctr	r3
+	slwi	r3,r3,2
+	add	r3,r3,r4
+	blr
+92:	mfctr	r3
+	blr
+
+	.section __ex_table,"a"
+	EXTBL	11b,90b
+	EXTBL	1b,91b
+	EXTBL	8b,92b
+	.text
+
+_GLOBAL(__strncpy_from_user)
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+	cmpwi	0,r5,0
+	beq	2f
+	mtctr	r5
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	beq	3f
+2:	addi	r6,r6,1
+3:	subf	r3,r3,r6
+	blr
+99:	li	r3,-EFAULT
+	blr
+
+	.section __ex_table,"a"
+	EXTBL	1b,99b
+	.text
+
+/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
+_GLOBAL(__strnlen_user)
+	addi	r7,r3,-1
+	subf	r6,r7,r5	/* top+1 - str */
+	cmplw	0,r4,r6
+	bge	0f
+	mr	r6,r4
+0:	mtctr	r6		/* ctr = min(len, top - str) */
+1:	lbzu	r0,1(r7)	/* get next byte */
+	cmpwi	0,r0,0
+	bdnzf	2,1b		/* loop if --ctr != 0 && byte != 0 */
+	addi	r7,r7,1
+	subf	r3,r3,r7	/* number of bytes we have looked at */
+	beqlr			/* return if we found a 0 byte */
+	cmpw	0,r3,r4		/* did we look at all len bytes? */
+	blt	99f		/* if not, must have hit top */
+	addi	r3,r4,1		/* return len + 1 to indicate no null found */
+	blr
+99:	li	r3,0		/* bad address, return 0 */
+	blr
+
+	.section __ex_table,"a"
+	EXTBL	1b,99b
diff --git a/arch/powerpc/lib/usercopy.c b/arch/powerpc/lib/usercopy.c
new file mode 100644
index 00000000000..5eea6f3c1e0
--- /dev/null
+++ b/arch/powerpc/lib/usercopy.c
@@ -0,0 +1,41 @@
+/*
+ * Functions which are too large to be inlined.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_READ, from, n)))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_WRITE, to, n)))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+
+unsigned long copy_in_user(void __user *to, const void __user *from,
+			   unsigned long n)
+{
+	might_sleep();
+	if (likely(access_ok(VERIFY_READ, from, n) &&
+	    access_ok(VERIFY_WRITE, to, n)))
+		n =__copy_tofrom_user(to, from, n);
+	return n;
+}
+
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(copy_in_user);
+
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
new file mode 100644
index 00000000000..3d79ce281b6
--- /dev/null
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -0,0 +1,120 @@
+/*
+ * Modifications by Matt Porter (mporter@mvista.com) to support
+ * PPC44x Book E processors.
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+
+#include "mmu_decl.h"
+
+extern char etext[], _stext[];
+
+/* Used by the 44x TLB replacement exception handler.
+ * Just needed it declared someplace.
+ */
+unsigned int tlb_44x_index = 0;
+unsigned int tlb_44x_hwater = 62;
+
+/*
+ * "Pins" a 256MB TLB entry in AS0 for kernel lowmem
+ */
+static void __init
+ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys)
+{
+	unsigned long attrib = 0;
+
+	__asm__ __volatile__("\
+	clrrwi	%2,%2,10\n\
+	ori	%2,%2,%4\n\
+	clrrwi	%1,%1,10\n\
+	li	%0,0\n\
+	ori	%0,%0,%5\n\
+	tlbwe	%2,%3,%6\n\
+	tlbwe	%1,%3,%7\n\
+	tlbwe	%0,%3,%8"
+	:
+	: "r" (attrib), "r" (phys), "r" (virt), "r" (slot),
+	  "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M),
+	  "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+	  "i" (PPC44x_TLB_PAGEID),
+	  "i" (PPC44x_TLB_XLAT),
+	  "i" (PPC44x_TLB_ATTRIB));
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	flush_instruction_cache();
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	unsigned int pinned_tlbs = 1;
+	int i;
+
+	/* Determine number of entries necessary to cover lowmem */
+	pinned_tlbs = (unsigned int)
+		(_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT);
+
+	/* Write upper watermark to save location */
+	tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs;
+
+	/* If necessary, set additional pinned TLBs */
+	if (pinned_tlbs > 1)
+		for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) {
+			unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE;
+			ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr);
+		}
+
+	return total_lowmem;
+}
diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c
new file mode 100644
index 00000000000..b7bcbc232f3
--- /dev/null
+++ b/arch/powerpc/mm/4xx_mmu.c
@@ -0,0 +1,141 @@
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+#include "mmu_decl.h"
+
+extern int __map_without_ltlbs;
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	/*
+	 * The Zone Protection Register (ZPR) defines how protection will
+	 * be applied to every page which is a member of a given zone. At
+	 * present, we utilize only two of the 4xx's zones.
+	 * The zone index bits (of ZSEL) in the PTE are used for software
+	 * indicators, except the LSB.  For user access, zone 1 is used,
+	 * for kernel access, zone 0 is used.  We set all but zone 1
+	 * to zero, allowing only kernel access as indicated in the PTE.
+	 * For zone 1, we set a 01 binary (a value of 10 will not work)
+	 * to allow user access as indicated in the PTE.  This also allows
+	 * kernel access as indicated in the PTE.
+	 */
+
+        mtspr(SPRN_ZPR, 0x10000000);
+
+	flush_instruction_cache();
+
+	/*
+	 * Set up the real-mode cache parameters for the exception vector
+	 * handlers (which are run in real-mode).
+	 */
+
+        mtspr(SPRN_DCWR, 0x00000000);	/* All caching is write-back */
+
+        /*
+	 * Cache instruction and data space where the exception
+	 * vectors and the kernel live in real-mode.
+	 */
+
+        mtspr(SPRN_DCCR, 0xF0000000);	/* 512 MB of data space at 0x0. */
+        mtspr(SPRN_ICCR, 0xF0000000);	/* 512 MB of instr. space at 0x0. */
+}
+
+#define LARGE_PAGE_SIZE_16M	(1<<24)
+#define LARGE_PAGE_SIZE_4M	(1<<22)
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	unsigned long v, s;
+	phys_addr_t p;
+
+	v = KERNELBASE;
+	p = PPC_MEMSTART;
+	s = 0;
+
+	if (__map_without_ltlbs) {
+		return s;
+	}
+
+	while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
+		pmd_t *pmdp;
+		unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
+
+		spin_lock(&init_mm.page_table_lock);
+		pmdp = pmd_offset(pgd_offset_k(v), v);
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		pmd_val(*pmdp++) = val;
+		spin_unlock(&init_mm.page_table_lock);
+
+		v += LARGE_PAGE_SIZE_16M;
+		p += LARGE_PAGE_SIZE_16M;
+		s += LARGE_PAGE_SIZE_16M;
+	}
+
+	while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
+		pmd_t *pmdp;
+		unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
+
+		spin_lock(&init_mm.page_table_lock);
+		pmdp = pmd_offset(pgd_offset_k(v), v);
+		pmd_val(*pmdp) = val;
+		spin_unlock(&init_mm.page_table_lock);
+
+		v += LARGE_PAGE_SIZE_4M;
+		p += LARGE_PAGE_SIZE_4M;
+		s += LARGE_PAGE_SIZE_4M;
+	}
+
+	return s;
+}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
new file mode 100644
index 00000000000..9f52c26acd8
--- /dev/null
+++ b/arch/powerpc/mm/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the linux ppc-specific parts of the memory manager.
+#
+
+obj-y				:= fault.o mem.o
+obj-$(CONFIG_PPC32)		+= init.o pgtable.o mmu_context.o \
+				   mem_pieces.o tlb.o
+obj-$(CONFIG_PPC64)		+= init64.o pgtable64.o mmu_context64.o
+obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu.o hash_32.o
+obj-$(CONFIG_40x)		+= 4xx_mmu.o
+obj-$(CONFIG_44x)		+= 44x_mmu.o
+obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
new file mode 100644
index 00000000000..3df641fa789
--- /dev/null
+++ b/arch/powerpc/mm/fault.c
@@ -0,0 +1,391 @@
+/*
+ *  arch/ppc/mm/fault.c
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Derived from "arch/i386/mm/fault.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Modified by Cort Dougan and Paul Mackerras.
+ *
+ *  Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/highmem.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/tlbflush.h>
+#include <asm/kdebug.h>
+#include <asm/siginfo.h>
+
+/*
+ * Check whether the instruction at regs->nip is a store using
+ * an update addressing form which will update r1.
+ */
+static int store_updates_sp(struct pt_regs *regs)
+{
+	unsigned int inst;
+
+	if (get_user(inst, (unsigned int __user *)regs->nip))
+		return 0;
+	/* check for 1 in the rA field */
+	if (((inst >> 16) & 0x1f) != 1)
+		return 0;
+	/* check major opcode */
+	switch (inst >> 26) {
+	case 37:	/* stwu */
+	case 39:	/* stbu */
+	case 45:	/* sthu */
+	case 53:	/* stfsu */
+	case 55:	/* stfdu */
+		return 1;
+	case 62:	/* std or stdu */
+		return (inst & 3) == 1;
+	case 31:
+		/* check minor opcode */
+		switch ((inst >> 1) & 0x3ff) {
+		case 181:	/* stdux */
+		case 183:	/* stwux */
+		case 247:	/* stbux */
+		case 439:	/* sthux */
+		case 695:	/* stfsux */
+		case 759:	/* stfdux */
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static void do_dabr(struct pt_regs *regs, unsigned long error_code)
+{
+	siginfo_t info;
+
+	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+			11, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (debugger_dabr_match(regs))
+		return;
+
+	/* Clear the DABR */
+	set_dabr(0);
+
+	/* Deliver the signal to userspace */
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code = TRAP_HWBKPT;
+	info.si_addr = (void __user *)regs->nip;
+	force_sig_info(SIGTRAP, &info, current);
+}
+
+/*
+ * For 600- and 800-family processors, the error_code parameter is DSISR
+ * for a data fault, SRR1 for an instruction fault. For 400-family processors
+ * the error_code parameter is ESR for a data fault, 0 for an instruction
+ * fault.
+ * For 64-bit processors, the error_code parameter is
+ *  - DSISR for a non-SLB data access fault,
+ *  - SRR1 & 0x08000000 for a non-SLB instruction access fault
+ *  - 0 any SLB fault.
+ *
+ * The return value is 0 if the fault was handled, or the signal
+ * number if this is a kernel fault that can't be handled here.
+ */
+int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
+			    unsigned long error_code)
+{
+	struct vm_area_struct * vma;
+	struct mm_struct *mm = current->mm;
+	siginfo_t info;
+	int code = SEGV_MAPERR;
+	int is_write = 0;
+	int trap = TRAP(regs);
+ 	int is_exec = trap == 0x400;
+
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+	/*
+	 * Fortunately the bit assignments in SRR1 for an instruction
+	 * fault and DSISR for a data fault are mostly the same for the
+	 * bits we are interested in.  But there are some bits which
+	 * indicate errors in DSISR but can validly be set in SRR1.
+	 */
+	if (trap == 0x400)
+		error_code &= 0x48200000;
+	else
+		is_write = error_code & DSISR_ISSTORE;
+#else
+	is_write = error_code & ESR_DST;
+#endif /* CONFIG_4xx || CONFIG_BOOKE */
+
+	if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
+				11, SIGSEGV) == NOTIFY_STOP)
+		return 0;
+
+	if (trap == 0x300) {
+		if (debugger_fault_handler(regs))
+			return 0;
+	}
+
+	/* On a kernel SLB miss we can only check for a valid exception entry */
+	if (!user_mode(regs) && (address >= TASK_SIZE))
+		return SIGSEGV;
+
+#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+  	if (error_code & DSISR_DABRMATCH) {
+		/* DABR match */
+		do_dabr(regs, error_code);
+		return 0;
+	}
+#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
+
+	if (in_atomic() || mm == NULL) {
+		if (!user_mode(regs))
+			return SIGSEGV;
+		/* in_atomic() in user mode is really bad,
+		   as is current->mm == NULL. */
+		printk(KERN_EMERG "Page fault in user mode with"
+		       "in_atomic() = %d mm = %p\n", in_atomic(), mm);
+		printk(KERN_EMERG "NIP = %lx  MSR = %lx\n",
+		       regs->nip, regs->msr);
+		die("Weird page fault", regs, SIGSEGV);
+	}
+
+	/* When running in the kernel we expect faults to occur only to
+	 * addresses in user space.  All other faults represent errors in the
+	 * kernel and should generate an OOPS.  Unfortunatly, in the case of an
+	 * erroneous fault occuring in a code path which already holds mmap_sem
+	 * we will deadlock attempting to validate the fault against the
+	 * address space.  Luckily the kernel only validly references user
+	 * space from well defined areas of code, which are listed in the
+	 * exceptions table.
+	 *
+	 * As the vast majority of faults will be valid we will only perform
+	 * the source reference check when there is a possibilty of a deadlock.
+	 * Attempt to lock the address space, if we cannot we then validate the
+	 * source.  If this is invalid we can skip the address space check,
+	 * thus avoiding the deadlock.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (!user_mode(regs) && !search_exception_tables(regs->nip))
+			goto bad_area_nosemaphore;
+
+		down_read(&mm->mmap_sem);
+	}
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+
+	/*
+	 * N.B. The POWER/Open ABI allows programs to access up to
+	 * 288 bytes below the stack pointer.
+	 * The kernel signal delivery code writes up to about 1.5kB
+	 * below the stack pointer (r1) before decrementing it.
+	 * The exec code can write slightly over 640kB to the stack
+	 * before setting the user r1.  Thus we allow the stack to
+	 * expand to 1MB without further checks.
+	 */
+	if (address + 0x100000 < vma->vm_end) {
+		/* get user regs even if this fault is in kernel mode */
+		struct pt_regs *uregs = current->thread.regs;
+		if (uregs == NULL)
+			goto bad_area;
+
+		/*
+		 * A user-mode access to an address a long way below
+		 * the stack pointer is only valid if the instruction
+		 * is one which would update the stack pointer to the
+		 * address accessed if the instruction completed,
+		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
+		 * (or the byte, halfword, float or double forms).
+		 *
+		 * If we don't check this then any write to the area
+		 * between the last mapped region and the stack will
+		 * expand the stack rather than segfaulting.
+		 */
+		if (address + 2048 < uregs->gpr[1]
+		    && (!user_mode(regs) || !store_updates_sp(regs)))
+			goto bad_area;
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+good_area:
+	code = SEGV_ACCERR;
+#if defined(CONFIG_6xx)
+	if (error_code & 0x95700000)
+		/* an error such as lwarx to I/O controller space,
+		   address matching DABR, eciwx, etc. */
+		goto bad_area;
+#endif /* CONFIG_6xx */
+#if defined(CONFIG_8xx)
+        /* The MPC8xx seems to always set 0x80000000, which is
+         * "undefined".  Of those that can be set, this is the only
+         * one which seems bad.
+         */
+	if (error_code & 0x10000000)
+                /* Guarded storage error. */
+		goto bad_area;
+#endif /* CONFIG_8xx */
+
+	if (is_exec) {
+#ifdef CONFIG_PPC64
+		/* protection fault */
+		if (error_code & DSISR_PROTFAULT)
+			goto bad_area;
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+#endif
+#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+		pte_t *ptep;
+
+		/* Since 4xx/Book-E supports per-page execute permission,
+		 * we lazily flush dcache to icache. */
+		ptep = NULL;
+		if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
+			struct page *page = pte_page(*ptep);
+
+			if (! test_bit(PG_arch_1, &page->flags)) {
+				flush_dcache_icache_page(page);
+				set_bit(PG_arch_1, &page->flags);
+			}
+			pte_update(ptep, 0, _PAGE_HWEXEC);
+			_tlbie(address);
+			pte_unmap(ptep);
+			up_read(&mm->mmap_sem);
+			return 0;
+		}
+		if (ptep != NULL)
+			pte_unmap(ptep);
+#endif
+	/* a write */
+	} else if (is_write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	/* a read */
+	} else {
+		/* protection fault */
+		if (error_code & 0x08000000)
+			goto bad_area;
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+ survive:
+	switch (handle_mm_fault(mm, vma, address, is_write)) {
+
+	case VM_FAULT_MINOR:
+		current->min_flt++;
+		break;
+	case VM_FAULT_MAJOR:
+		current->maj_flt++;
+		break;
+	case VM_FAULT_SIGBUS:
+		goto do_sigbus;
+	case VM_FAULT_OOM:
+		goto out_of_memory;
+	default:
+		BUG();
+	}
+
+	up_read(&mm->mmap_sem);
+	return 0;
+
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	/* User mode accesses cause a SIGSEGV */
+	if (user_mode(regs)) {
+		_exception(SIGSEGV, regs, code, address);
+		return 0;
+	}
+
+	if (is_exec && (error_code & DSISR_PROTFAULT)
+	    && printk_ratelimit())
+		printk(KERN_CRIT "kernel tried to execute NX-protected"
+		       " page (%lx) - exploit attempt? (uid: %d)\n",
+		       address, current->uid);
+
+	return SIGSEGV;
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk("VM: killing process %s\n", current->comm);
+	if (user_mode(regs))
+		do_exit(SIGKILL);
+	return SIGKILL;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+	if (user_mode(regs)) {
+		info.si_signo = SIGBUS;
+		info.si_errno = 0;
+		info.si_code = BUS_ADRERR;
+		info.si_addr = (void __user *)address;
+		force_sig_info(SIGBUS, &info, current);
+		return 0;
+	}
+	return SIGBUS;
+}
+
+/*
+ * bad_page_fault is called when we have a bad access from the kernel.
+ * It is called from the DSI and ISI handlers in head.S and from some
+ * of the procedures in traps.c.
+ */
+void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+{
+	const struct exception_table_entry *entry;
+
+	/* Are we prepared to handle this fault?  */
+	if ((entry = search_exception_tables(regs->nip)) != NULL) {
+		regs->nip = entry->fixup;
+		return;
+	}
+
+	/* kernel has accessed a bad area */
+	die("Kernel access of bad area", regs, sig);
+}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
new file mode 100644
index 00000000000..af9ca0eb6d5
--- /dev/null
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -0,0 +1,237 @@
+/*
+ * Modifications by Kumar Gala (kumar.gala@freescale.com) to support
+ * E500 Book E processors.
+ *
+ * Copyright 2004 Freescale Semiconductor, Inc
+ *
+ * This file contains the routines for initializing the MMU
+ * on the 4xx series of chips.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/bootx.h>
+#include <asm/machdep.h>
+#include <asm/setup.h>
+
+extern void loadcam_entry(unsigned int index);
+unsigned int tlbcam_index;
+unsigned int num_tlbcam_entries;
+static unsigned long __cam0, __cam1, __cam2;
+extern unsigned long total_lowmem;
+extern unsigned long __max_low_memory;
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+#define NUM_TLBCAMS	(16)
+
+struct tlbcam {
+   	u32	MAS0;
+	u32	MAS1;
+	u32	MAS2;
+	u32	MAS3;
+	u32	MAS7;
+} TLBCAM[NUM_TLBCAMS];
+
+struct tlbcamrange {
+   	unsigned long start;
+	unsigned long limit;
+	phys_addr_t phys;
+} tlbcam_addrs[NUM_TLBCAMS];
+
+extern unsigned int tlbcam_index;
+
+/*
+ * Return PA for this VA if it is mapped by a CAM, or 0
+ */
+unsigned long v_mapped_by_tlbcam(unsigned long va)
+{
+	int b;
+	for (b = 0; b < tlbcam_index; ++b)
+		if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit)
+			return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_mapped_by_tlbcam(unsigned long pa)
+{
+	int b;
+	for (b = 0; b < tlbcam_index; ++b)
+		if (pa >= tlbcam_addrs[b].phys
+	    	    && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start)
+		              +tlbcam_addrs[b].phys)
+			return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
+	return 0;
+}
+
+/*
+ * Set up one of the I/D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 4 between 4k and 256M.
+ */
+void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+		unsigned int size, int flags, unsigned int pid)
+{
+	unsigned int tsize, lz;
+
+	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size));
+	tsize = (21 - lz) / 2;
+
+#ifdef CONFIG_SMP
+	if ((flags & _PAGE_NO_CACHE) == 0)
+		flags |= _PAGE_COHERENT;
+#endif
+
+	TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1);
+	TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
+	TLBCAM[index].MAS2 = virt & PAGE_MASK;
+
+	TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
+	TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
+
+	TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR;
+	TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
+
+#ifndef CONFIG_KGDB /* want user access for breakpoints */
+	if (flags & _PAGE_USER) {
+	   TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
+	   TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
+	}
+#else
+	TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
+	TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
+#endif
+
+	tlbcam_addrs[index].start = virt;
+	tlbcam_addrs[index].limit = virt + size - 1;
+	tlbcam_addrs[index].phys = phys;
+
+	loadcam_entry(index);
+}
+
+void invalidate_tlbcam_entry(int index)
+{
+	TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index);
+	TLBCAM[index].MAS1 = ~MAS1_VALID;
+
+	loadcam_entry(index);
+}
+
+void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1,
+		unsigned long cam2)
+{
+	settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0);
+	tlbcam_index++;
+	if (cam1) {
+		tlbcam_index++;
+		settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0);
+	}
+	if (cam2) {
+		tlbcam_index++;
+		settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0);
+	}
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+	flush_instruction_cache();
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+	cam_mapin_ram(__cam0, __cam1, __cam2);
+
+	return __cam0 + __cam1 + __cam2;
+}
+
+
+void __init
+adjust_total_lowmem(void)
+{
+	unsigned long max_low_mem = MAX_LOW_MEM;
+	unsigned long cam_max = 0x10000000;
+	unsigned long ram;
+
+	/* adjust CAM size to max_low_mem */
+	if (max_low_mem < cam_max)
+		cam_max = max_low_mem;
+
+	/* adjust lowmem size to max_low_mem */
+	if (max_low_mem < total_lowmem)
+		ram = max_low_mem;
+	else
+		ram = total_lowmem;
+
+	/* Calculate CAM values */
+	__cam0 = 1UL << 2 * (__ilog2(ram) / 2);
+	if (__cam0 > cam_max)
+		__cam0 = cam_max;
+	ram -= __cam0;
+	if (ram) {
+		__cam1 = 1UL << 2 * (__ilog2(ram) / 2);
+		if (__cam1 > cam_max)
+			__cam1 = cam_max;
+		ram -= __cam1;
+	}
+	if (ram) {
+		__cam2 = 1UL << 2 * (__ilog2(ram) / 2);
+		if (__cam2 > cam_max)
+			__cam2 = cam_max;
+		ram -= __cam2;
+	}
+
+	printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb,"
+			" CAM2=%ldMb residual: %ldMb\n",
+			__cam0 >> 20, __cam1 >> 20, __cam2 >> 20,
+			(total_lowmem - __cam0 - __cam1 - __cam2) >> 20);
+	__max_low_memory = max_low_mem = __cam0 + __cam1 + __cam2;
+}
diff --git a/arch/powerpc/mm/hash_32.S b/arch/powerpc/mm/hash_32.S
new file mode 100644
index 00000000000..57278a8dd13
--- /dev/null
+++ b/arch/powerpc/mm/hash_32.S
@@ -0,0 +1,618 @@
+/*
+ *  arch/ppc/kernel/hashtable.S
+ *
+ *  $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ *  Adapted for Power Macintosh by Paul Mackerras.
+ *  Low-level exception handlers and MMU support
+ *  rewritten by Paul Mackerras.
+ *    Copyright (C) 1996 Paul Mackerras.
+ *
+ *  This file contains low-level assembler routines for managing
+ *  the PowerPC MMU hash table.  (PPC 8xx processors don't use a
+ *  hash table, so this file is not used on them.)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/cputable.h>
+#include <asm/ppc_asm.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_SMP
+	.comm	mmu_hash_lock,4
+#endif /* CONFIG_SMP */
+
+/*
+ * Sync CPUs with hash_page taking & releasing the hash
+ * table lock
+ */
+#ifdef CONFIG_SMP
+	.text
+_GLOBAL(hash_page_sync)
+	lis	r8,mmu_hash_lock@h
+	ori	r8,r8,mmu_hash_lock@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+	eieio
+	li	r0,0
+	stw	r0,0(r8)
+	blr	
+#endif
+
+/*
+ * Load a PTE into the hash table, if possible.
+ * The address is in r4, and r3 contains an access flag:
+ * _PAGE_RW (0x400) if a write.
+ * r9 contains the SRR1 value, from which we use the MSR_PR bit.
+ * SPRG3 contains the physical address of the current task's thread.
+ *
+ * Returns to the caller if the access is illegal or there is no
+ * mapping for the address.  Otherwise it places an appropriate PTE
+ * in the hash table and returns from the exception.
+ * Uses r0, r3 - r8, ctr, lr.
+ */
+	.text
+_GLOBAL(hash_page)
+#ifdef CONFIG_PPC64BRIDGE
+	mfmsr	r0
+	clrldi	r0,r0,1		/* make sure it's in 32-bit mode */
+	MTMSRD(r0)
+	isync
+#endif
+	tophys(r7,0)			/* gets -KERNELBASE into r7 */
+#ifdef CONFIG_SMP
+	addis	r8,r7,mmu_hash_lock@h
+	ori	r8,r8,mmu_hash_lock@l
+	lis	r0,0x0fff
+	b	10f
+11:	lwz	r6,0(r8)
+	cmpwi	0,r6,0
+	bne	11b
+10:	lwarx	r6,0,r8
+	cmpwi	0,r6,0
+	bne-	11b
+	stwcx.	r0,0,r8
+	bne-	10b
+	isync
+#endif
+	/* Get PTE (linux-style) and check access */
+	lis	r0,KERNELBASE@h		/* check if kernel address */
+	cmplw	0,r4,r0
+	mfspr	r8,SPRN_SPRG3		/* current task's THREAD (phys) */
+	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
+	lwz	r5,PGDIR(r8)		/* virt page-table root */
+	blt+	112f			/* assume user more likely */
+	lis	r5,swapper_pg_dir@ha	/* if kernel address, use */
+	addi	r5,r5,swapper_pg_dir@l	/* kernel page table */
+	rlwimi	r3,r9,32-12,29,29	/* MSR_PR -> _PAGE_USER */
+112:	add	r5,r5,r7		/* convert to phys addr */
+	rlwimi	r5,r4,12,20,29		/* insert top 10 bits of address */
+	lwz	r8,0(r5)		/* get pmd entry */
+	rlwinm.	r8,r8,0,0,19		/* extract address of pte page */
+#ifdef CONFIG_SMP
+	beq-	hash_page_out		/* return if no mapping */
+#else
+	/* XXX it seems like the 601 will give a machine fault on the
+	   rfi if its alignment is wrong (bottom 4 bits of address are
+	   8 or 0xc) and we have had a not-taken conditional branch
+	   to the address following the rfi. */
+	beqlr-
+#endif
+	rlwimi	r8,r4,22,20,29		/* insert next 10 bits of address */
+	rlwinm	r0,r3,32-3,24,24	/* _PAGE_RW access -> _PAGE_DIRTY */
+	ori	r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
+
+	/*
+	 * Update the linux PTE atomically.  We do the lwarx up-front
+	 * because almost always, there won't be a permission violation
+	 * and there won't already be an HPTE, and thus we will have
+	 * to update the PTE to set _PAGE_HASHPTE.  -- paulus.
+	 */
+retry:
+	lwarx	r6,0,r8			/* get linux-style pte */
+	andc.	r5,r3,r6		/* check access & ~permission */
+#ifdef CONFIG_SMP
+	bne-	hash_page_out		/* return if access not permitted */
+#else
+	bnelr-
+#endif
+	or	r5,r0,r6		/* set accessed/dirty bits */
+	stwcx.	r5,0,r8			/* attempt to update PTE */
+	bne-	retry			/* retry if someone got there first */
+
+	mfsrin	r3,r4			/* get segment reg for segment */
+	mfctr	r0
+	stw	r0,_CTR(r11)
+	bl	create_hpte		/* add the hash table entry */
+
+#ifdef CONFIG_SMP
+	eieio
+	addis	r8,r7,mmu_hash_lock@ha
+	li	r0,0
+	stw	r0,mmu_hash_lock@l(r8)
+#endif
+
+	/* Return from the exception */
+	lwz	r5,_CTR(r11)
+	mtctr	r5
+	lwz	r0,GPR0(r11)
+	lwz	r7,GPR7(r11)
+	lwz	r8,GPR8(r11)
+	b	fast_exception_return
+
+#ifdef CONFIG_SMP
+hash_page_out:
+	eieio
+	addis	r8,r7,mmu_hash_lock@ha
+	li	r0,0
+	stw	r0,mmu_hash_lock@l(r8)
+	blr
+#endif /* CONFIG_SMP */
+
+/*
+ * Add an entry for a particular page to the hash table.
+ *
+ * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval)
+ *
+ * We assume any necessary modifications to the pte (e.g. setting
+ * the accessed bit) have already been done and that there is actually
+ * a hash table in use (i.e. we're not on a 603).
+ */
+_GLOBAL(add_hash_page)
+	mflr	r0
+	stw	r0,4(r1)
+
+	/* Convert context and va to VSID */
+	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
+
+#ifdef CONFIG_SMP
+	rlwinm	r8,r1,0,0,18		/* use cpu number to make tag */
+	lwz	r8,TI_CPU(r8)		/* to go in mmu_hash_lock */
+	oris	r8,r8,12
+#endif /* CONFIG_SMP */
+
+	/*
+	 * We disable interrupts here, even on UP, because we don't
+	 * want to race with hash_page, and because we want the
+	 * _PAGE_HASHPTE bit to be a reliable indication of whether
+	 * the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	SYNC
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	SYNC_601
+	isync
+
+	tophys(r7,0)
+
+#ifdef CONFIG_SMP
+	addis	r9,r7,mmu_hash_lock@ha
+	addi	r9,r9,mmu_hash_lock@l
+10:	lwarx	r0,0,r9			/* take the mmu_hash_lock */
+	cmpi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Fetch the linux pte and test and set _PAGE_HASHPTE atomically.
+	 * If _PAGE_HASHPTE was already set, we don't replace the existing
+	 * HPTE, so we just unlock and return.
+	 */
+	mr	r8,r5
+	rlwimi	r8,r4,22,20,29
+1:	lwarx	r6,0,r8
+	andi.	r0,r6,_PAGE_HASHPTE
+	bne	9f			/* if HASHPTE already set, done */
+	ori	r5,r6,_PAGE_HASHPTE
+	stwcx.	r5,0,r8
+	bne-	1b
+
+	bl	create_hpte
+
+9:
+#ifdef CONFIG_SMP
+	eieio
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+	/* reenable interrupts and DR */
+	mtmsr	r10
+	SYNC_601
+	isync
+
+	lwz	r0,4(r1)
+	mtlr	r0
+	blr
+
+/*
+ * This routine adds a hardware PTE to the hash table.
+ * It is designed to be called with the MMU either on or off.
+ * r3 contains the VSID, r4 contains the virtual address,
+ * r5 contains the linux PTE, r6 contains the old value of the
+ * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
+ * offset to be added to addresses (0 if the MMU is on,
+ * -KERNELBASE if it is off).
+ * On SMP, the caller should have the mmu_hash_lock held.
+ * We assume that the caller has (or will) set the _PAGE_HASHPTE
+ * bit in the linux PTE in memory.  The value passed in r6 should
+ * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set
+ * this routine will skip the search for an existing HPTE.
+ * This procedure modifies r0, r3 - r6, r8, cr0.
+ *  -- paulus.
+ *
+ * For speed, 4 of the instructions get patched once the size and
+ * physical address of the hash table are known.  These definitions
+ * of Hash_base and Hash_bits below are just an example.
+ */
+Hash_base = 0xc0180000
+Hash_bits = 12				/* e.g. 256kB hash table */
+Hash_msk = (((1 << Hash_bits) - 1) * 64)
+
+#ifndef CONFIG_PPC64BRIDGE
+/* defines for the PTE format for 32-bit PPCs */
+#define PTE_SIZE	8
+#define PTEG_SIZE	64
+#define LG_PTEG_SIZE	6
+#define LDPTEu		lwzu
+#define STPTE		stw
+#define CMPPTE		cmpw
+#define PTE_H		0x40
+#define PTE_V		0x80000000
+#define TST_V(r)	rlwinm. r,r,0,0,0
+#define SET_V(r)	oris r,r,PTE_V@h
+#define CLR_V(r,t)	rlwinm r,r,0,1,31
+
+#else
+/* defines for the PTE format for 64-bit PPCs */
+#define PTE_SIZE	16
+#define PTEG_SIZE	128
+#define LG_PTEG_SIZE	7
+#define LDPTEu		ldu
+#define STPTE		std
+#define CMPPTE		cmpd
+#define PTE_H		2
+#define PTE_V		1
+#define TST_V(r)	andi. r,r,PTE_V
+#define SET_V(r)	ori r,r,PTE_V
+#define CLR_V(r,t)	li t,PTE_V; andc r,r,t
+#endif /* CONFIG_PPC64BRIDGE */
+
+#define HASH_LEFT	31-(LG_PTEG_SIZE+Hash_bits-1)
+#define HASH_RIGHT	31-LG_PTEG_SIZE
+
+_GLOBAL(create_hpte)
+	/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
+	rlwinm	r8,r5,32-10,31,31	/* _PAGE_RW -> PP lsb */
+	rlwinm	r0,r5,32-7,31,31	/* _PAGE_DIRTY -> PP lsb */
+	and	r8,r8,r0		/* writable if _RW & _DIRTY */
+	rlwimi	r5,r5,32-1,30,30	/* _PAGE_USER -> PP msb */
+	rlwimi	r5,r5,32-2,31,31	/* _PAGE_USER -> PP lsb */
+	ori	r8,r8,0xe14		/* clear out reserved bits and M */
+	andc	r8,r5,r8		/* PP = user? (rw&dirty? 2: 3): 0 */
+BEGIN_FTR_SECTION
+	ori	r8,r8,_PAGE_COHERENT	/* set M (coherence required) */
+END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT)
+
+	/* Construct the high word of the PPC-style PTE (r5) */
+#ifndef CONFIG_PPC64BRIDGE
+	rlwinm	r5,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r5,r4,10,26,31		/* put in API (abbrev page index) */
+#else /* CONFIG_PPC64BRIDGE */
+	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
+	sldi	r5,r3,12		/* shift vsid into position */
+	rlwimi	r5,r4,16,20,24		/* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64BRIDGE */
+	SET_V(r5)			/* set V (valid) bit */
+
+	/* Get the address of the primary PTE group in the hash table (r3) */
+_GLOBAL(hash_page_patch_A)
+	addis	r0,r7,Hash_base@h	/* base address of hash table */
+	rlwimi	r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+	rlwinm	r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r3,r3,r0		/* make primary hash */
+	li	r0,8			/* PTEs/group */
+
+	/*
+	 * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search
+	 * if it is clear, meaning that the HPTE isn't there already...
+	 */
+	andi.	r6,r6,_PAGE_HASHPTE
+	beq+	10f			/* no PTE: go look for an empty slot */
+	tlbie	r4
+
+	addis	r4,r7,htab_hash_searches@ha
+	lwz	r6,htab_hash_searches@l(r4)
+	addi	r6,r6,1			/* count how many searches we do */
+	stw	r6,htab_hash_searches@l(r4)
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	mtctr	r0
+	addi	r4,r3,-PTE_SIZE
+1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
+	CMPPTE	0,r6,r5
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_slot
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+_GLOBAL(hash_page_patch_B)
+	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,PTE_SIZE(r4)
+	CMPPTE	0,r6,r5
+	bdnzf	2,2b
+	beq+	found_slot
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/* Search the primary PTEG for an empty slot */
+10:	mtctr	r0
+	addi	r4,r3,-PTE_SIZE		/* search primary PTEG */
+1:	LDPTEu	r6,PTE_SIZE(r4)		/* get next PTE */
+	TST_V(r6)			/* test valid bit */
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_empty
+
+	/* update counter of times that the primary PTEG is full */
+	addis	r4,r7,primary_pteg_full@ha
+	lwz	r6,primary_pteg_full@l(r4)
+	addi	r6,r6,1
+	stw	r6,primary_pteg_full@l(r4)
+
+	/* Search the secondary PTEG for an empty slot */
+	ori	r5,r5,PTE_H		/* set H (secondary hash) bit */
+_GLOBAL(hash_page_patch_C)
+	xoris	r4,r3,Hash_msk>>16	/* compute secondary hash */
+	xori	r4,r4,(-PTEG_SIZE & 0xffff)
+	addi	r4,r4,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r6,PTE_SIZE(r4)
+	TST_V(r6)
+	bdnzf	2,2b
+	beq+	found_empty
+	xori	r5,r5,PTE_H		/* clear H bit again */
+
+	/*
+	 * Choose an arbitrary slot in the primary PTEG to overwrite.
+	 * Since both the primary and secondary PTEGs are full, and we
+	 * have no information that the PTEs in the primary PTEG are
+	 * more important or useful than those in the secondary PTEG,
+	 * and we know there is a definite (although small) speed
+	 * advantage to putting the PTE in the primary PTEG, we always
+	 * put the PTE in the primary PTEG.
+	 */
+	addis	r4,r7,next_slot@ha
+	lwz	r6,next_slot@l(r4)
+	addi	r6,r6,PTE_SIZE
+	andi.	r6,r6,7*PTE_SIZE
+	stw	r6,next_slot@l(r4)
+	add	r4,r3,r6
+
+#ifndef CONFIG_SMP
+	/* Store PTE in PTEG */
+found_empty:
+	STPTE	r5,0(r4)
+found_slot:
+	STPTE	r8,PTE_SIZE/2(r4)
+
+#else /* CONFIG_SMP */
+/*
+ * Between the tlbie above and updating the hash table entry below,
+ * another CPU could read the hash table entry and put it in its TLB.
+ * There are 3 cases:
+ * 1. using an empty slot
+ * 2. updating an earlier entry to change permissions (i.e. enable write)
+ * 3. taking over the PTE for an unrelated address
+ *
+ * In each case it doesn't really matter if the other CPUs have the old
+ * PTE in their TLB.  So we don't need to bother with another tlbie here,
+ * which is convenient as we've overwritten the register that had the
+ * address. :-)  The tlbie above is mainly to make sure that this CPU comes
+ * and gets the new PTE from the hash table.
+ *
+ * We do however have to make sure that the PTE is never in an invalid
+ * state with the V bit set.
+ */
+found_empty:
+found_slot:
+	CLR_V(r5,r0)		/* clear V (valid) bit in PTE */
+	STPTE	r5,0(r4)
+	sync
+	TLBSYNC
+	STPTE	r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */
+	sync
+	SET_V(r5)
+	STPTE	r5,0(r4)	/* finally set V bit in PTE */
+#endif /* CONFIG_SMP */
+
+	sync		/* make sure pte updates get to memory */
+	blr
+
+	.comm	next_slot,4
+	.comm	primary_pteg_full,4
+	.comm	htab_hash_searches,4
+
+/*
+ * Flush the entry for a particular page from the hash table.
+ *
+ * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval,
+ *		    int count)
+ *
+ * We assume that there is a hash table in use (Hash != 0).
+ */
+_GLOBAL(flush_hash_pages)
+	tophys(r7,0)
+
+	/*
+	 * We disable interrupts here, even on UP, because we want
+	 * the _PAGE_HASHPTE bit to be a reliable indication of
+	 * whether the HPTE exists (or at least whether one did once).
+	 * We also turn off the MMU for data accesses so that we
+	 * we can't take a hash table miss (assuming the code is
+	 * covered by a BAT).  -- paulus
+	 */
+	mfmsr	r10
+	SYNC
+	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
+	rlwinm	r0,r0,0,28,26		/* clear MSR_DR */
+	mtmsr	r0
+	SYNC_601
+	isync
+
+	/* First find a PTE in the range that has _PAGE_HASHPTE set */
+	rlwimi	r5,r4,22,20,29
+1:	lwz	r0,0(r5)
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	2f
+	ble	cr1,19f
+	addi	r4,r4,0x1000
+	addi	r5,r5,4
+	addi	r6,r6,-1
+	b	1b
+
+	/* Convert context and va to VSID */
+2:	mulli	r3,r3,897*16		/* multiply context by context skew */
+	rlwinm	r0,r4,4,28,31		/* get ESID (top 4 bits of va) */
+	mulli	r0,r0,0x111		/* multiply by ESID skew */
+	add	r3,r3,r0		/* note code below trims to 24 bits */
+
+	/* Construct the high word of the PPC-style PTE (r11) */
+#ifndef CONFIG_PPC64BRIDGE
+	rlwinm	r11,r3,7,1,24		/* put VSID in 0x7fffff80 bits */
+	rlwimi	r11,r4,10,26,31		/* put in API (abbrev page index) */
+#else /* CONFIG_PPC64BRIDGE */
+	clrlwi	r3,r3,8			/* reduce vsid to 24 bits */
+	sldi	r11,r3,12		/* shift vsid into position */
+	rlwimi	r11,r4,16,20,24		/* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64BRIDGE */
+	SET_V(r11)			/* set V (valid) bit */
+
+#ifdef CONFIG_SMP
+	addis	r9,r7,mmu_hash_lock@ha
+	addi	r9,r9,mmu_hash_lock@l
+	rlwinm	r8,r1,0,0,18
+	add	r8,r8,r7
+	lwz	r8,TI_CPU(r8)
+	oris	r8,r8,9
+10:	lwarx	r0,0,r9
+	cmpi	0,r0,0
+	bne-	11f
+	stwcx.	r8,0,r9
+	beq+	12f
+11:	lwz	r0,0(r9)
+	cmpi	0,r0,0
+	beq	10b
+	b	11b
+12:	isync
+#endif
+
+	/*
+	 * Check the _PAGE_HASHPTE bit in the linux PTE.  If it is
+	 * already clear, we're done (for this pte).  If not,
+	 * clear it (atomically) and proceed.  -- paulus.
+	 */
+33:	lwarx	r8,0,r5			/* fetch the pte */
+	andi.	r0,r8,_PAGE_HASHPTE
+	beq	8f			/* done if HASHPTE is already clear */
+	rlwinm	r8,r8,0,31,29		/* clear HASHPTE bit */
+	stwcx.	r8,0,r5			/* update the pte */
+	bne-	33b
+
+	/* Get the address of the primary PTE group in the hash table (r3) */
+_GLOBAL(flush_hash_patch_A)
+	addis	r8,r7,Hash_base@h	/* base address of hash table */
+	rlwimi	r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT    /* VSID -> hash */
+	rlwinm	r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+	xor	r8,r0,r8		/* make primary hash */
+
+	/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
+	li	r0,8			/* PTEs/group */
+	mtctr	r0
+	addi	r12,r8,-PTE_SIZE
+1:	LDPTEu	r0,PTE_SIZE(r12)	/* get next PTE */
+	CMPPTE	0,r0,r11
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	3f
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r11,r11,PTE_H		/* set H (secondary hash) bit */
+	li	r0,8			/* PTEs/group */
+_GLOBAL(flush_hash_patch_B)
+	xoris	r12,r8,Hash_msk>>16	/* compute secondary hash */
+	xori	r12,r12,(-PTEG_SIZE & 0xffff)
+	addi	r12,r12,-PTE_SIZE
+	mtctr	r0
+2:	LDPTEu	r0,PTE_SIZE(r12)
+	CMPPTE	0,r0,r11
+	bdnzf	2,2b
+	xori	r11,r11,PTE_H		/* clear H again */
+	bne-	4f			/* should rarely fail to find it */
+
+3:	li	r0,0
+	STPTE	r0,0(r12)		/* invalidate entry */
+4:	sync
+	tlbie	r4			/* in hw tlb too */
+	sync
+
+8:	ble	cr1,9f			/* if all ptes checked */
+81:	addi	r6,r6,-1
+	addi	r5,r5,4			/* advance to next pte */
+	addi	r4,r4,0x1000
+	lwz	r0,0(r5)		/* check next pte */
+	cmpwi	cr1,r6,1
+	andi.	r0,r0,_PAGE_HASHPTE
+	bne	33b
+	bgt	cr1,81b
+
+9:
+#ifdef CONFIG_SMP
+	TLBSYNC
+	li	r0,0
+	stw	r0,0(r9)		/* clear mmu_hash_lock */
+#endif
+
+19:	mtmsr	r10
+	SYNC_601
+	isync
+	blr
diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c
new file mode 100644
index 00000000000..f4d983a6e52
--- /dev/null
+++ b/arch/powerpc/mm/init.c
@@ -0,0 +1,581 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/pagemap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/btext.h>
+#include <asm/tlb.h>
+#include <asm/bootinfo.h>
+#include <asm/prom.h>
+
+#include "mem_pieces.h"
+#include "mmu_decl.h"
+
+#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
+/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */
+#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE))
+#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL"
+#endif
+#endif
+#define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+unsigned long total_memory;
+unsigned long total_lowmem;
+
+unsigned long ppc_memstart;
+unsigned long ppc_memoffset = PAGE_OFFSET;
+
+int mem_init_done;
+int init_bootmem_done;
+int boot_mapsize;
+#ifdef CONFIG_PPC_PMAC
+unsigned long agp_special_page;
+#endif
+
+extern char _end[];
+extern char etext[], _stext[];
+extern char __init_begin, __init_end;
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+EXPORT_SYMBOL(kmap_prot);
+EXPORT_SYMBOL(kmap_pte);
+#endif
+
+void MMU_init(void);
+void set_phys_avail(unsigned long total_ram);
+
+/* XXX should be in current.h  -- paulus */
+extern struct task_struct *current_set[NR_CPUS];
+
+char *klimit = _end;
+struct mem_pieces phys_avail;
+struct device_node *memory_node;
+
+/*
+ * this tells the system to map all of ram with the segregs
+ * (i.e. page tables) instead of the bats.
+ * -- Cort
+ */
+int __map_without_bats;
+int __map_without_ltlbs;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+/* max amount of low RAM to map in */
+unsigned long __max_low_memory = MAX_LOW_MEM;
+
+/*
+ * Read in a property describing some pieces of memory.
+ */
+static int __init get_mem_prop(char *name, struct mem_pieces *mp)
+{
+	struct reg_property *rp;
+	int i, s;
+	unsigned int *ip;
+	int nac = prom_n_addr_cells(memory_node);
+	int nsc = prom_n_size_cells(memory_node);
+
+	ip = (unsigned int *) get_property(memory_node, name, &s);
+	if (ip == NULL) {
+		printk(KERN_ERR "error: couldn't get %s property on /memory\n",
+		       name);
+		return 0;
+	}
+	s /= (nsc + nac) * 4;
+	rp = mp->regions;
+	for (i = 0; i < s; ++i, ip += nac+nsc) {
+		if (nac >= 2 && ip[nac-2] != 0)
+			continue;
+		rp->address = ip[nac-1];
+		if (nsc >= 2 && ip[nac+nsc-2] != 0)
+			rp->size = ~0U;
+		else
+			rp->size = ip[nac+nsc-1];
+		++rp;
+	}
+	mp->n_regions = rp - mp->regions;
+
+	/* Make sure the pieces are sorted. */
+	mem_pieces_sort(mp);
+	mem_pieces_coalesce(mp);
+	return 1;
+}
+
+/*
+ * Collect information about physical RAM and which pieces are
+ * already in use from the device tree.
+ */
+unsigned long __init find_end_of_memory(void)
+{
+	unsigned long a, total;
+	struct mem_pieces phys_mem;
+
+	/*
+	 * Find out where physical memory is, and check that it
+	 * starts at 0 and is contiguous.  It seems that RAM is
+	 * always physically contiguous on Power Macintoshes.
+	 *
+	 * Supporting discontiguous physical memory isn't hard,
+	 * it just makes the virtual <-> physical mapping functions
+	 * more complicated (or else you end up wasting space
+	 * in mem_map).
+	 */
+	memory_node = find_devices("memory");
+	if (memory_node == NULL || !get_mem_prop("reg", &phys_mem)
+	    || phys_mem.n_regions == 0)
+		panic("No RAM??");
+	a = phys_mem.regions[0].address;
+	if (a != 0)
+		panic("RAM doesn't start at physical address 0");
+	total = phys_mem.regions[0].size;
+
+	if (phys_mem.n_regions > 1) {
+		printk("RAM starting at 0x%x is not contiguous\n",
+		       phys_mem.regions[1].address);
+		printk("Using RAM from 0 to 0x%lx\n", total-1);
+	}
+
+	return total;
+}
+
+/*
+ * Check for command-line options that affect what MMU_init will do.
+ */
+void MMU_setup(void)
+{
+	/* Check for nobats option (used in mapin_ram). */
+	if (strstr(cmd_line, "nobats")) {
+		__map_without_bats = 1;
+	}
+
+	if (strstr(cmd_line, "noltlbs")) {
+		__map_without_ltlbs = 1;
+	}
+
+	/* Look for mem= option on command line */
+	if (strstr(cmd_line, "mem=")) {
+		char *p, *q;
+		unsigned long maxmem = 0;
+
+		for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) {
+			q = p + 4;
+			if (p > cmd_line && p[-1] != ' ')
+				continue;
+			maxmem = simple_strtoul(q, &q, 0);
+			if (*q == 'k' || *q == 'K') {
+				maxmem <<= 10;
+				++q;
+			} else if (*q == 'm' || *q == 'M') {
+				maxmem <<= 20;
+				++q;
+			}
+		}
+		__max_memory = maxmem;
+	}
+}
+
+/*
+ * MMU_init sets up the basic memory mappings for the kernel,
+ * including both RAM and possibly some I/O regions,
+ * and sets up the page tables and the MMU hardware ready to go.
+ */
+void __init MMU_init(void)
+{
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:enter", 0x111);
+
+	/* parse args from command line */
+	MMU_setup();
+
+	/*
+	 * Figure out how much memory we have, how much
+	 * is lowmem, and how much is highmem.  If we were
+	 * passed the total memory size from the bootloader,
+	 * just use it.
+	 */
+	if (boot_mem_size)
+		total_memory = boot_mem_size;
+	else
+		total_memory = ppc_md.find_end_of_memory();
+
+	if (__max_memory && total_memory > __max_memory)
+		total_memory = __max_memory;
+	total_lowmem = total_memory;
+#ifdef CONFIG_FSL_BOOKE
+	/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
+	 * entries, so we need to adjust lowmem to match the amount we can map
+	 * in the fixed entries */
+	adjust_total_lowmem();
+#endif /* CONFIG_FSL_BOOKE */
+	if (total_lowmem > __max_low_memory) {
+		total_lowmem = __max_low_memory;
+#ifndef CONFIG_HIGHMEM
+		total_memory = total_lowmem;
+#endif /* CONFIG_HIGHMEM */
+	}
+	set_phys_avail(total_lowmem);
+
+	/* Initialize the MMU hardware */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:hw init", 0x300);
+	MMU_init_hw();
+
+	/* Map in all of RAM starting at KERNELBASE */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:mapin", 0x301);
+	mapin_ram();
+
+#ifdef CONFIG_HIGHMEM
+	ioremap_base = PKMAP_BASE;
+#else
+	ioremap_base = 0xfe000000UL;	/* for now, could be 0xfffff000 */
+#endif /* CONFIG_HIGHMEM */
+	ioremap_bot = ioremap_base;
+
+	/* Map in I/O resources */
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:setio", 0x302);
+	if (ppc_md.setup_io_mappings)
+		ppc_md.setup_io_mappings();
+
+	/* Initialize the context management stuff */
+	mmu_context_init();
+
+	if (ppc_md.progress)
+		ppc_md.progress("MMU:exit", 0x211);
+
+#ifdef CONFIG_BOOTX_TEXT
+	/* By default, we are no longer mapped */
+       	boot_text_mapped = 0;
+	/* Must be done last, or ppc_md.progress will die. */
+	map_boot_text();
+#endif
+}
+
+/* This is only called until mem_init is done. */
+void __init *early_get_page(void)
+{
+	void *p;
+
+	if (init_bootmem_done) {
+		p = alloc_bootmem_pages(PAGE_SIZE);
+	} else {
+		p = mem_pieces_find(PAGE_SIZE, PAGE_SIZE);
+	}
+	return p;
+}
+
+/* Free up now-unused memory */
+static void free_sec(unsigned long start, unsigned long end, const char *name)
+{
+	unsigned long cnt = 0;
+
+	while (start < end) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		cnt++;
+		start += PAGE_SIZE;
+ 	}
+	if (cnt) {
+		printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name);
+		totalram_pages += cnt;
+	}
+}
+
+void free_initmem(void)
+{
+#define FREESEC(TYPE) \
+	free_sec((unsigned long)(&__ ## TYPE ## _begin), \
+		 (unsigned long)(&__ ## TYPE ## _end), \
+		 #TYPE);
+
+	printk ("Freeing unused kernel memory:");
+	FREESEC(init);
+ 	printk("\n");
+	ppc_md.progress = NULL;
+#undef FREESEC
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
+
+/*
+ * Initialize the bootmem system and give it all the memory we
+ * have available.
+ */
+void __init do_init_bootmem(void)
+{
+	unsigned long start, size;
+	int i;
+
+	/*
+	 * Find an area to use for the bootmem bitmap.
+	 * We look for the first area which is at least
+	 * 128kB in length (128kB is enough for a bitmap
+	 * for 4GB of memory, using 4kB pages), plus 1 page
+	 * (in case the address isn't page-aligned).
+	 */
+	start = 0;
+	size = 0;
+	for (i = 0; i < phys_avail.n_regions; ++i) {
+		unsigned long a = phys_avail.regions[i].address;
+		unsigned long s = phys_avail.regions[i].size;
+		if (s <= size)
+			continue;
+		start = a;
+		size = s;
+		if (s >= 33 * PAGE_SIZE)
+			break;
+	}
+	start = PAGE_ALIGN(start);
+
+	min_low_pfn = start >> PAGE_SHIFT;
+	max_low_pfn = (PPC_MEMSTART + total_lowmem) >> PAGE_SHIFT;
+	max_pfn = (PPC_MEMSTART + total_memory) >> PAGE_SHIFT;
+	boot_mapsize = init_bootmem_node(&contig_page_data, min_low_pfn,
+					 PPC_MEMSTART >> PAGE_SHIFT,
+					 max_low_pfn);
+
+	/* remove the bootmem bitmap from the available memory */
+	mem_pieces_remove(&phys_avail, start, boot_mapsize, 1);
+
+	/* add everything in phys_avail into the bootmem map */
+	for (i = 0; i < phys_avail.n_regions; ++i)
+		free_bootmem(phys_avail.regions[i].address,
+			     phys_avail.regions[i].size);
+
+	init_bootmem_done = 1;
+}
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES], i;
+
+#ifdef CONFIG_HIGHMEM
+	map_page(PKMAP_BASE, 0, 0);	/* XXX gross */
+	pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
+			(PKMAP_BASE), PKMAP_BASE), PKMAP_BASE);
+	map_page(KMAP_FIX_BEGIN, 0, 0);	/* XXX gross */
+	kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k
+			(KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
+	kmap_prot = PAGE_KERNEL;
+#endif /* CONFIG_HIGHMEM */
+
+	/*
+	 * All pages are DMA-able so we put them all in the DMA zone.
+	 */
+	zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
+	for (i = 1; i < MAX_NR_ZONES; i++)
+		zones_size[i] = 0;
+
+#ifdef CONFIG_HIGHMEM
+	zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
+#endif /* CONFIG_HIGHMEM */
+
+	free_area_init(zones_size);
+}
+
+void __init mem_init(void)
+{
+	unsigned long addr;
+	int codepages = 0;
+	int datapages = 0;
+	int initpages = 0;
+#ifdef CONFIG_HIGHMEM
+	unsigned long highmem_mapnr;
+
+	highmem_mapnr = total_lowmem >> PAGE_SHIFT;
+#endif /* CONFIG_HIGHMEM */
+	max_mapnr = total_memory >> PAGE_SHIFT;
+
+	high_memory = (void *) __va(PPC_MEMSTART + total_lowmem);
+	num_physpages = max_mapnr;	/* RAM is assumed contiguous */
+
+	totalram_pages += free_all_bootmem();
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/* if we are booted from BootX with an initial ramdisk,
+	   make sure the ramdisk pages aren't reserved. */
+	if (initrd_start) {
+		for (addr = initrd_start; addr < initrd_end; addr += PAGE_SIZE)
+			ClearPageReserved(virt_to_page(addr));
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+#ifdef CONFIG_PPC_OF
+	/* mark the RTAS pages as reserved */
+	if ( rtas_data )
+		for (addr = (ulong)__va(rtas_data);
+		     addr < PAGE_ALIGN((ulong)__va(rtas_data)+rtas_size) ;
+		     addr += PAGE_SIZE)
+			SetPageReserved(virt_to_page(addr));
+#endif
+#ifdef CONFIG_PPC_PMAC
+	if (agp_special_page)
+		SetPageReserved(virt_to_page(agp_special_page));
+#endif
+	for (addr = PAGE_OFFSET; addr < (unsigned long)high_memory;
+	     addr += PAGE_SIZE) {
+		if (!PageReserved(virt_to_page(addr)))
+			continue;
+		if (addr < (ulong) etext)
+			codepages++;
+		else if (addr >= (unsigned long)&__init_begin
+			 && addr < (unsigned long)&__init_end)
+			initpages++;
+		else if (addr < (ulong) klimit)
+			datapages++;
+	}
+
+#ifdef CONFIG_HIGHMEM
+	{
+		unsigned long pfn;
+
+		for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
+			struct page *page = mem_map + pfn;
+
+			ClearPageReserved(page);
+			set_page_count(page, 1);
+			__free_page(page);
+			totalhigh_pages++;
+		}
+		totalram_pages += totalhigh_pages;
+	}
+#endif /* CONFIG_HIGHMEM */
+
+        printk("Memory: %luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n",
+	       (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10),
+	       codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10),
+	       initpages<< (PAGE_SHIFT-10),
+	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+
+#ifdef CONFIG_PPC_PMAC
+	if (agp_special_page)
+		printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page);
+#endif
+
+	mem_init_done = 1;
+}
+
+/*
+ * Set phys_avail to the amount of physical memory,
+ * less the kernel text/data/bss.
+ */
+void __init
+set_phys_avail(unsigned long total_memory)
+{
+	unsigned long kstart, ksize;
+
+	/*
+	 * Initially, available physical memory is equivalent to all
+	 * physical memory.
+	 */
+
+	phys_avail.regions[0].address = PPC_MEMSTART;
+	phys_avail.regions[0].size = total_memory;
+	phys_avail.n_regions = 1;
+
+	/*
+	 * Map out the kernel text/data/bss from the available physical
+	 * memory.
+	 */
+
+	kstart = __pa(_stext);	/* should be 0 */
+	ksize = PAGE_ALIGN(klimit - _stext);
+
+	mem_pieces_remove(&phys_avail, kstart, ksize, 0);
+	mem_pieces_remove(&phys_avail, 0, 0x4000, 0);
+
+#if defined(CONFIG_BLK_DEV_INITRD)
+	/* Remove the init RAM disk from the available memory. */
+	if (initrd_start) {
+		mem_pieces_remove(&phys_avail, __pa(initrd_start),
+				  initrd_end - initrd_start, 1);
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+#ifdef CONFIG_PPC_OF
+	/* remove the RTAS pages from the available memory */
+	if (rtas_data)
+		mem_pieces_remove(&phys_avail, rtas_data, rtas_size, 1);
+#endif
+#ifdef CONFIG_PPC_PMAC
+	/* Because of some uninorth weirdness, we need a page of
+	 * memory as high as possible (it must be outside of the
+	 * bus address seen as the AGP aperture). It will be used
+	 * by the r128 DRM driver
+	 *
+	 * FIXME: We need to make sure that page doesn't overlap any of the\
+	 * above. This could be done by improving mem_pieces_find to be able
+	 * to do a backward search from the end of the list.
+	 */
+	if (_machine == _MACH_Pmac && find_devices("uni-north-agp")) {
+		agp_special_page = (total_memory - PAGE_SIZE);
+		mem_pieces_remove(&phys_avail, agp_special_page, PAGE_SIZE, 0);
+		agp_special_page = (unsigned long)__va(agp_special_page);
+	}
+#endif /* CONFIG_PPC_PMAC */
+}
+
+/* Mark some memory as reserved by removing it from phys_avail. */
+void __init reserve_phys_mem(unsigned long start, unsigned long size)
+{
+	mem_pieces_remove(&phys_avail, start, size, 1);
+}
diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c
new file mode 100644
index 00000000000..81f6745b31e
--- /dev/null
+++ b/arch/powerpc/mm/init64.c
@@ -0,0 +1,385 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+extern pgd_t swapper_pg_dir[];
+extern struct task_struct *current_set[NR_CPUS];
+
+unsigned long klimit = (unsigned long)_end;
+
+unsigned long _SDR1=0;
+unsigned long _ASR=0;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* info on what we think the IO hole is */
+unsigned long 	io_hole_start;
+unsigned long	io_hole_size;
+
+/*
+ * Do very early mm setup.
+ */
+void __init mm_init_ppc64(void)
+{
+#ifndef CONFIG_PPC_ISERIES
+	unsigned long i;
+#endif
+
+	ppc64_boot_msg(0x100, "MM Init");
+
+	/* This is the story of the IO hole... please, keep seated,
+	 * unfortunately, we are out of oxygen masks at the moment.
+	 * So we need some rough way to tell where your big IO hole
+	 * is. On pmac, it's between 2G and 4G, on POWER3, it's around
+	 * that area as well, on POWER4 we don't have one, etc...
+	 * We need that as a "hint" when sizing the TCE table on POWER3
+	 * So far, the simplest way that seem work well enough for us it
+	 * to just assume that the first discontinuity in our physical
+	 * RAM layout is the IO hole. That may not be correct in the future
+	 * (and isn't on iSeries but then we don't care ;)
+	 */
+
+#ifndef CONFIG_PPC_ISERIES
+	for (i = 1; i < lmb.memory.cnt; i++) {
+		unsigned long base, prevbase, prevsize;
+
+		prevbase = lmb.memory.region[i-1].base;
+		prevsize = lmb.memory.region[i-1].size;
+		base = lmb.memory.region[i].base;
+		if (base > (prevbase + prevsize)) {
+			io_hole_start = prevbase + prevsize;
+			io_hole_size = base  - (prevbase + prevsize);
+			break;
+		}
+	}
+#endif /* CONFIG_PPC_ISERIES */
+	if (io_hole_start)
+		printk("IO Hole assumed to be %lx -> %lx\n",
+		       io_hole_start, io_hole_start + io_hole_size - 1);
+
+	ppc64_boot_msg(0x100, "MM Init Done");
+}
+
+void free_initmem(void)
+{
+	unsigned long addr;
+
+	addr = (unsigned long)__init_begin;
+	for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
+		memset((void *)addr, 0xcc, PAGE_SIZE);
+		ClearPageReserved(virt_to_page(addr));
+		set_page_count(virt_to_page(addr), 1);
+		free_page(addr);
+		totalram_pages++;
+	}
+	printk ("Freeing unused kernel memory: %luk freed\n",
+		((unsigned long)__init_end - (unsigned long)__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (start < end)
+		printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		set_page_count(virt_to_page(start), 1);
+		free_page(start);
+		totalram_pages++;
+	}
+}
+#endif
+
+/*
+ * Initialize the bootmem system and give it all the memory we
+ * have available.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+void __init do_init_bootmem(void)
+{
+	unsigned long i;
+	unsigned long start, bootmap_pages;
+	unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	int boot_mapsize;
+
+	/*
+	 * Find an area to use for the bootmem bitmap.  Calculate the size of
+	 * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
+	 * Add 1 additional page in case the address isn't page-aligned.
+	 */
+	bootmap_pages = bootmem_bootmap_pages(total_pages);
+
+	start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
+	BUG_ON(!start);
+
+	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
+
+	max_pfn = max_low_pfn;
+
+	/* Add all physical memory to the bootmem map, mark each area
+	 * present.
+	 */
+	for (i=0; i < lmb.memory.cnt; i++)
+		free_bootmem(lmb.memory.region[i].base,
+			     lmb_size_bytes(&lmb.memory, i));
+
+	/* reserve the sections we're already using */
+	for (i=0; i < lmb.reserved.cnt; i++)
+		reserve_bootmem(lmb.reserved.region[i].base,
+				lmb_size_bytes(&lmb.reserved, i));
+
+	for (i=0; i < lmb.memory.cnt; i++)
+		memory_present(0, lmb_start_pfn(&lmb.memory, i),
+			       lmb_end_pfn(&lmb.memory, i));
+}
+
+/*
+ * paging_init() sets up the page tables - in fact we've already done this.
+ */
+void __init paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES];
+	unsigned long zholes_size[MAX_NR_ZONES];
+	unsigned long total_ram = lmb_phys_mem_size();
+	unsigned long top_of_ram = lmb_end_of_DRAM();
+
+	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
+	       top_of_ram, total_ram);
+	printk(KERN_INFO "Memory hole size: %ldMB\n",
+	       (top_of_ram - total_ram) >> 20);
+	/*
+	 * All pages are DMA-able so we put them all in the DMA zone.
+	 */
+	memset(zones_size, 0, sizeof(zones_size));
+	memset(zholes_size, 0, sizeof(zholes_size));
+
+	zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+	zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
+
+	free_area_init_node(0, NODE_DATA(0), zones_size,
+			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
+}
+#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
+
+static struct kcore_list kcore_vmem;
+
+static int __init setup_kcore(void)
+{
+	int i;
+
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base, size;
+		struct kcore_list *kcore_mem;
+
+		base = lmb.memory.region[i].base;
+		size = lmb.memory.region[i].size;
+
+		/* GFP_ATOMIC to avoid might_sleep warnings during boot */
+		kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC);
+		if (!kcore_mem)
+			panic("mem_init: kmalloc failed\n");
+
+		kclist_add(kcore_mem, __va(base), size);
+	}
+
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
+
+	return 0;
+}
+module_init(setup_kcore);
+
+void __init mem_init(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int nid;
+#endif
+	pg_data_t *pgdat;
+	unsigned long i;
+	struct page *page;
+	unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize;
+
+	num_physpages = max_low_pfn;	/* RAM is assumed contiguous */
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+        for_each_online_node(nid) {
+		if (NODE_DATA(nid)->node_spanned_pages != 0) {
+			printk("freeing bootmem node %x\n", nid);
+			totalram_pages +=
+				free_all_bootmem_node(NODE_DATA(nid));
+		}
+	}
+#else
+	max_mapnr = num_physpages;
+	totalram_pages += free_all_bootmem();
+#endif
+
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			if (PageReserved(page))
+				reservedpages++;
+		}
+	}
+
+	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
+	initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin;
+	datasize = (unsigned long)&_edata - (unsigned long)&__init_end;
+	bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start;
+
+	printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, "
+	       "%luk reserved, %luk data, %luk bss, %luk init)\n",
+		(unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+		num_physpages << (PAGE_SHIFT-10),
+		codesize >> 10,
+		reservedpages << (PAGE_SHIFT-10),
+		datasize >> 10,
+		bsssize >> 10,
+		initsize >> 10);
+
+	mem_init_done = 1;
+
+	/* Initialize the vDSO */
+	vdso_init();
+}
+
+void __iomem * reserve_phb_iospace(unsigned long size)
+{
+	void __iomem *virt_addr;
+		
+	if (phbs_io_bot >= IMALLOC_BASE) 
+		panic("reserve_phb_iospace(): phb io space overflow\n");
+			
+	virt_addr = (void __iomem *) phbs_io_bot;
+	phbs_io_bot += size;
+
+	return virt_addr;
+}
+
+static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
+{
+	memset(addr, 0, kmem_cache_size(cache));
+}
+
+static const int pgtable_cache_size[2] = {
+	PTE_TABLE_SIZE, PMD_TABLE_SIZE
+};
+static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
+	"pgd_pte_cache", "pud_pmd_cache",
+};
+
+kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];
+
+void pgtable_cache_init(void)
+{
+	int i;
+
+	BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
+	BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
+	BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
+	BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);
+
+	for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
+		int size = pgtable_cache_size[i];
+		const char *name = pgtable_cache_name[i];
+
+		pgtable_cache[i] = kmem_cache_create(name,
+						     size, size,
+						     SLAB_HWCACHE_ALIGN
+						     | SLAB_MUST_HWCACHE_ALIGN,
+						     zero_ctor,
+						     NULL);
+		if (! pgtable_cache[i])
+			panic("pgtable_cache_init(): could not create %s!\n",
+			      name);
+	}
+}
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
new file mode 100644
index 00000000000..345db08e5d2
--- /dev/null
+++ b/arch/powerpc/mm/mem.c
@@ -0,0 +1,299 @@
+/*
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/initrd.h>
+#include <linux/pagemap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/btext.h>
+#include <asm/tlb.h>
+#include <asm/bootinfo.h>
+#include <asm/prom.h>
+
+#include "mem_pieces.h"
+#include "mmu_decl.h"
+
+#ifndef CPU_FTR_COHERENT_ICACHE
+#define CPU_FTR_COHERENT_ICACHE	0	/* XXX for now */
+#define CPU_FTR_NOEXECUTE	0
+#endif
+
+/*
+ * This is called by /dev/mem to know if a given address has to
+ * be mapped non-cacheable or not
+ */
+int page_is_ram(unsigned long pfn)
+{
+	unsigned long paddr = (pfn << PAGE_SHIFT);
+
+#ifndef CONFIG_PPC64	/* XXX for now */
+	return paddr < __pa(high_memory);
+#else
+	int i;
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base;
+
+		base = lmb.memory.region[i].base;
+
+		if ((paddr >= base) &&
+			(paddr < (base + lmb.memory.region[i].size))) {
+			return 1;
+		}
+	}
+
+	return 0;
+#endif
+}
+EXPORT_SYMBOL(page_is_ram);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+
+void show_mem(void)
+{
+	unsigned long total = 0, reserved = 0;
+	unsigned long shared = 0, cached = 0;
+	unsigned long highmem = 0;
+	struct page *page;
+	pg_data_t *pgdat;
+	unsigned long i;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			total++;
+			if (PageHighMem(page))
+				highmem++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page) - 1;
+		}
+	}
+	printk("%ld pages of RAM\n", total);
+#ifdef CONFIG_HIGHMEM
+	printk("%ld pages of HIGHMEM\n", highmem);
+#endif
+	printk("%ld reserved pages\n", reserved);
+	printk("%ld pages shared\n", shared);
+	printk("%ld pages swap cached\n", cached);
+}
+
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &page->flags))
+		clear_bit(PG_arch_1, &page->flags);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void flush_dcache_icache_page(struct page *page)
+{
+#ifdef CONFIG_BOOKE
+	void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE);
+	__flush_dcache_icache(start);
+	kunmap_atomic(start, KM_PPC_SYNC_ICACHE);
+#elif defined(CONFIG_8xx)
+	/* On 8xx there is no need to kmap since highmem is not supported */
+	__flush_dcache_icache(page_address(page)); 
+#else
+	__flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
+#endif
+
+}
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+	clear_page(page);
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/*
+	 * We shouldnt have to do this, but some versions of glibc
+	 * require it (ld.so assumes zero filled pages are icache clean)
+	 * - Anton
+	 */
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+		    struct page *pg)
+{
+	copy_page(vto, vfrom);
+
+	/*
+	 * We should be able to use the following optimisation, however
+	 * there are two problems.
+	 * Firstly a bug in some versions of binutils meant PLT sections
+	 * were not marked executable.
+	 * Secondly the first word in the GOT section is blrl, used
+	 * to establish the GOT address. Until recently the GOT was
+	 * not marked executable.
+	 * - Anton
+	 */
+#if 0
+	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+		return;
+#endif
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	unsigned long maddr;
+
+	maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
+	flush_icache_range(maddr, maddr + len);
+	kunmap(page);
+}
+EXPORT_SYMBOL(flush_icache_user_range);
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ * 
+ * This must always be called with the mm->page_table_lock held
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+		      pte_t pte)
+{
+	/* handle i-cache coherency */
+	unsigned long pfn = pte_pfn(pte);
+#ifdef CONFIG_PPC32
+	pmd_t *pmd;
+#else
+	unsigned long vsid;
+	void *pgdir;
+	pte_t *ptep;
+	int local = 0;
+	cpumask_t tmp;
+	unsigned long flags;
+#endif
+
+	/* handle i-cache coherency */
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
+	    !cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    pfn_valid(pfn)) {
+		struct page *page = pfn_to_page(pfn);
+		if (!PageReserved(page)
+		    && !test_bit(PG_arch_1, &page->flags)) {
+			if (vma->vm_mm == current->active_mm) {
+#ifdef CONFIG_8xx
+			/* On 8xx, cache control instructions (particularly 
+		 	 * "dcbst" from flush_dcache_icache) fault as write 
+			 * operation if there is an unpopulated TLB entry 
+			 * for the address in question. To workaround that, 
+			 * we invalidate the TLB here, thus avoiding dcbst 
+			 * misbehaviour.
+			 */
+				_tlbie(address);
+#endif
+				__flush_dcache_icache((void *) address);
+			} else
+				flush_dcache_icache_page(page);
+			set_bit(PG_arch_1, &page->flags);
+		}
+	}
+
+#ifdef CONFIG_PPC_STD_MMU
+	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+	if (!pte_young(pte) || address >= TASK_SIZE)
+		return;
+#ifdef CONFIG_PPC32
+	if (Hash == 0)
+		return;
+	pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address);
+	if (!pmd_none(*pmd))
+		add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd));
+#else
+	pgdir = vma->vm_mm->pgd;
+	if (pgdir == NULL)
+		return;
+
+	ptep = find_linux_pte(pgdir, ea);
+	if (!ptep)
+		return;
+
+	vsid = get_vsid(vma->vm_mm->context.id, ea);
+
+	local_irq_save(flags);
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
+		    0x300, local);
+	local_irq_restore(flags);
+#endif
+#endif
+}
diff --git a/arch/powerpc/mm/mem64.c b/arch/powerpc/mm/mem64.c
new file mode 100644
index 00000000000..ef765a84433
--- /dev/null
+++ b/arch/powerpc/mm/mem64.c
@@ -0,0 +1,259 @@
+/*
+ *  PowerPC version 
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+/*
+ * This is called by /dev/mem to know if a given address has to
+ * be mapped non-cacheable or not
+ */
+int page_is_ram(unsigned long pfn)
+{
+	int i;
+	unsigned long paddr = (pfn << PAGE_SHIFT);
+
+	for (i=0; i < lmb.memory.cnt; i++) {
+		unsigned long base;
+
+		base = lmb.memory.region[i].base;
+
+		if ((paddr >= base) &&
+			(paddr < (base + lmb.memory.region[i].size))) {
+			return 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(page_is_ram);
+
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (ppc_md.phys_mem_access_prot)
+		return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot);
+
+	if (!page_is_ram(addr >> PAGE_SHIFT))
+		vma_prot = __pgprot(pgprot_val(vma_prot)
+				    | _PAGE_GUARDED | _PAGE_NO_CACHE);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+
+void show_mem(void)
+{
+	unsigned long total = 0, reserved = 0;
+	unsigned long shared = 0, cached = 0;
+	struct page *page;
+	pg_data_t *pgdat;
+	unsigned long i;
+
+	printk("Mem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_pgdat(pgdat) {
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			page = pgdat_page_nr(pgdat, i);
+			total++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page) - 1;
+		}
+	}
+	printk("%ld pages of RAM\n", total);
+	printk("%ld reserved pages\n", reserved);
+	printk("%ld pages shared\n", shared);
+	printk("%ld pages swap cached\n", cached);
+}
+
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean.  We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+void flush_dcache_page(struct page *page)
+{
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &page->flags))
+		clear_bit(PG_arch_1, &page->flags);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+	clear_page(page);
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+	/*
+	 * We shouldnt have to do this, but some versions of glibc
+	 * require it (ld.so assumes zero filled pages are icache clean)
+	 * - Anton
+	 */
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+		    struct page *pg)
+{
+	copy_page(vto, vfrom);
+
+	/*
+	 * We should be able to use the following optimisation, however
+	 * there are two problems.
+	 * Firstly a bug in some versions of binutils meant PLT sections
+	 * were not marked executable.
+	 * Secondly the first word in the GOT section is blrl, used
+	 * to establish the GOT address. Until recently the GOT was
+	 * not marked executable.
+	 * - Anton
+	 */
+#if 0
+	if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+		return;
+#endif
+
+	if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		return;
+
+	/* avoid an atomic op if possible */
+	if (test_bit(PG_arch_1, &pg->flags))
+		clear_bit(PG_arch_1, &pg->flags);
+}
+
+void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
+			     unsigned long addr, int len)
+{
+	unsigned long maddr;
+
+	maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK);
+	flush_icache_range(maddr, maddr + len);
+}
+EXPORT_SYMBOL(flush_icache_user_range);
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ * 
+ * This must always be called with the mm->page_table_lock held
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
+		      pte_t pte)
+{
+	unsigned long vsid;
+	void *pgdir;
+	pte_t *ptep;
+	int local = 0;
+	cpumask_t tmp;
+	unsigned long flags;
+
+	/* handle i-cache coherency */
+	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) &&
+	    !cpu_has_feature(CPU_FTR_NOEXECUTE)) {
+		unsigned long pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			struct page *page = pfn_to_page(pfn);
+			if (!PageReserved(page)
+			    && !test_bit(PG_arch_1, &page->flags)) {
+				__flush_dcache_icache(page_address(page));
+				set_bit(PG_arch_1, &page->flags);
+			}
+		}
+	}
+
+	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+	if (!pte_young(pte))
+		return;
+
+	pgdir = vma->vm_mm->pgd;
+	if (pgdir == NULL)
+		return;
+
+	ptep = find_linux_pte(pgdir, ea);
+	if (!ptep)
+		return;
+
+	vsid = get_vsid(vma->vm_mm->context.id, ea);
+
+	local_irq_save(flags);
+	tmp = cpumask_of_cpu(smp_processor_id());
+	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
+		local = 1;
+
+	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
+		    0x300, local);
+	local_irq_restore(flags);
+}
diff --git a/arch/powerpc/mm/mem_pieces.c b/arch/powerpc/mm/mem_pieces.c
new file mode 100644
index 00000000000..3d639052017
--- /dev/null
+++ b/arch/powerpc/mm/mem_pieces.c
@@ -0,0 +1,163 @@
+/*
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Changes to accommodate Power Macintoshes.
+ *    Cort Dougan <cort@cs.nmt.edu>
+ *      Rewrites.
+ *    Grant Erickson <grant@lcse.umn.edu>
+ *      General rework and split from mm/init.c.
+ *
+ *    Module name: mem_pieces.c
+ *
+ *    Description:
+ *      Routines and data structures for manipulating and representing
+ *      phyiscal memory extents (i.e. address/length pairs).
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <asm/page.h>
+
+#include "mem_pieces.h"
+
+extern struct mem_pieces phys_avail;
+
+static void mem_pieces_print(struct mem_pieces *);
+
+/*
+ * Scan a region for a piece of a given size with the required alignment.
+ */
+void __init *
+mem_pieces_find(unsigned int size, unsigned int align)
+{
+	int i;
+	unsigned a, e;
+	struct mem_pieces *mp = &phys_avail;
+
+	for (i = 0; i < mp->n_regions; ++i) {
+		a = mp->regions[i].address;
+		e = a + mp->regions[i].size;
+		a = (a + align - 1) & -align;
+		if (a + size <= e) {
+			mem_pieces_remove(mp, a, size, 1);
+			return (void *) __va(a);
+		}
+	}
+	panic("Couldn't find %u bytes at %u alignment\n", size, align);
+
+	return NULL;
+}
+
+/*
+ * Remove some memory from an array of pieces
+ */
+void __init
+mem_pieces_remove(struct mem_pieces *mp, unsigned int start, unsigned int size,
+		  int must_exist)
+{
+	int i, j;
+	unsigned int end, rs, re;
+	struct reg_property *rp;
+
+	end = start + size;
+	for (i = 0, rp = mp->regions; i < mp->n_regions; ++i, ++rp) {
+		if (end > rp->address && start < rp->address + rp->size)
+			break;
+	}
+	if (i >= mp->n_regions) {
+		if (must_exist)
+			printk("mem_pieces_remove: [%x,%x) not in any region\n",
+			       start, end);
+		return;
+	}
+	for (; i < mp->n_regions && end > rp->address; ++i, ++rp) {
+		rs = rp->address;
+		re = rs + rp->size;
+		if (must_exist && (start < rs || end > re)) {
+			printk("mem_pieces_remove: bad overlap [%x,%x) with",
+			       start, end);
+			mem_pieces_print(mp);
+			must_exist = 0;
+		}
+		if (start > rs) {
+			rp->size = start - rs;
+			if (end < re) {
+				/* need to split this entry */
+				if (mp->n_regions >= MEM_PIECES_MAX)
+					panic("eek... mem_pieces overflow");
+				for (j = mp->n_regions; j > i + 1; --j)
+					mp->regions[j] = mp->regions[j-1];
+				++mp->n_regions;
+				rp[1].address = end;
+				rp[1].size = re - end;
+			}
+		} else {
+			if (end < re) {
+				rp->address = end;
+				rp->size = re - end;
+			} else {
+				/* need to delete this entry */
+				for (j = i; j < mp->n_regions - 1; ++j)
+					mp->regions[j] = mp->regions[j+1];
+				--mp->n_regions;
+				--i;
+				--rp;
+			}
+		}
+	}
+}
+
+static void __init
+mem_pieces_print(struct mem_pieces *mp)
+{
+	int i;
+
+	for (i = 0; i < mp->n_regions; ++i)
+		printk(" [%x, %x)", mp->regions[i].address,
+		       mp->regions[i].address + mp->regions[i].size);
+	printk("\n");
+}
+
+void __init
+mem_pieces_sort(struct mem_pieces *mp)
+{
+	unsigned long a, s;
+	int i, j;
+
+	for (i = 1; i < mp->n_regions; ++i) {
+		a = mp->regions[i].address;
+		s = mp->regions[i].size;
+		for (j = i - 1; j >= 0; --j) {
+			if (a >= mp->regions[j].address)
+				break;
+			mp->regions[j+1] = mp->regions[j];
+		}
+		mp->regions[j+1].address = a;
+		mp->regions[j+1].size = s;
+	}
+}
+
+void __init
+mem_pieces_coalesce(struct mem_pieces *mp)
+{
+	unsigned long a, s, ns;
+	int i, j, d;
+
+	d = 0;
+	for (i = 0; i < mp->n_regions; i = j) {
+		a = mp->regions[i].address;
+		s = mp->regions[i].size;
+		for (j = i + 1; j < mp->n_regions
+			     && mp->regions[j].address - a <= s; ++j) {
+			ns = mp->regions[j].address + mp->regions[j].size - a;
+			if (ns > s)
+				s = ns;
+		}
+		mp->regions[d].address = a;
+		mp->regions[d].size = s;
+		++d;
+	}
+	mp->n_regions = d;
+}
diff --git a/arch/powerpc/mm/mem_pieces.h b/arch/powerpc/mm/mem_pieces.h
new file mode 100644
index 00000000000..e2b700dc7f1
--- /dev/null
+++ b/arch/powerpc/mm/mem_pieces.h
@@ -0,0 +1,48 @@
+/*
+ *    Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ *      Changes to accommodate Power Macintoshes.
+ *    Cort Dougan <cort@cs.nmt.edu>
+ *      Rewrites.
+ *    Grant Erickson <grant@lcse.umn.edu>
+ *      General rework and split from mm/init.c.
+ *
+ *    Module name: mem_pieces.h
+ *
+ *    Description:
+ *      Routines and data structures for manipulating and representing
+ *      phyiscal memory extents (i.e. address/length pairs).
+ *
+ */
+
+#ifndef __MEM_PIECES_H__
+#define	__MEM_PIECES_H__
+
+#include <asm/prom.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Type Definitions */
+
+#define	MEM_PIECES_MAX	32
+
+struct mem_pieces {
+    int n_regions;
+    struct reg_property regions[MEM_PIECES_MAX];
+};
+
+/* Function Prototypes */
+
+extern void	*mem_pieces_find(unsigned int size, unsigned int align);
+extern void	 mem_pieces_remove(struct mem_pieces *mp, unsigned int start,
+				   unsigned int size, int must_exist);
+extern void	 mem_pieces_coalesce(struct mem_pieces *mp);
+extern void	 mem_pieces_sort(struct mem_pieces *mp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __MEM_PIECES_H__ */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
new file mode 100644
index 00000000000..a8816e0f6a8
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context.c
@@ -0,0 +1,86 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+
+mm_context_t next_mmu_context;
+unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
+#ifdef FEW_CONTEXTS
+atomic_t nr_free_contexts;
+struct mm_struct *context_mm[LAST_CONTEXT+1];
+void steal_context(void);
+#endif /* FEW_CONTEXTS */
+
+/*
+ * Initialize the context management stuff.
+ */
+void __init
+mmu_context_init(void)
+{
+	/*
+	 * Some processors have too few contexts to reserve one for
+	 * init_mm, and require using context 0 for a normal task.
+	 * Other processors reserve the use of context zero for the kernel.
+	 * This code assumes FIRST_CONTEXT < 32.
+	 */
+	context_map[0] = (1 << FIRST_CONTEXT) - 1;
+	next_mmu_context = FIRST_CONTEXT;
+#ifdef FEW_CONTEXTS
+	atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1);
+#endif /* FEW_CONTEXTS */
+}
+
+#ifdef FEW_CONTEXTS
+/*
+ * Steal a context from a task that has one at the moment.
+ * This is only used on 8xx and 4xx and we presently assume that
+ * they don't do SMP.  If they do then this will have to check
+ * whether the MM we steal is in use.
+ * We also assume that this is only used on systems that don't
+ * use an MMU hash table - this is true for 8xx and 4xx.
+ * This isn't an LRU system, it just frees up each context in
+ * turn (sort-of pseudo-random replacement :).  This would be the
+ * place to implement an LRU scheme if anyone was motivated to do it.
+ *  -- paulus
+ */
+void
+steal_context(void)
+{
+	struct mm_struct *mm;
+
+	/* free up context `next_mmu_context' */
+	/* if we shouldn't free context 0, don't... */
+	if (next_mmu_context < FIRST_CONTEXT)
+		next_mmu_context = FIRST_CONTEXT;
+	mm = context_mm[next_mmu_context];
+	flush_tlb_mm(mm);
+	destroy_context(mm);
+}
+#endif /* FEW_CONTEXTS */
diff --git a/arch/powerpc/mm/mmu_context64.c b/arch/powerpc/mm/mmu_context64.c
new file mode 100644
index 00000000000..714a84dd8d5
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context64.c
@@ -0,0 +1,63 @@
+/*
+ *  MMU context allocation for 64-bit kernels.
+ *
+ *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+
+#include <asm/mmu_context.h>
+
+static DEFINE_SPINLOCK(mmu_context_lock);
+static DEFINE_IDR(mmu_context_idr);
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	int index;
+	int err;
+
+again:
+	if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
+		return -ENOMEM;
+
+	spin_lock(&mmu_context_lock);
+	err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index);
+	spin_unlock(&mmu_context_lock);
+
+	if (err == -EAGAIN)
+		goto again;
+	else if (err)
+		return err;
+
+	if (index > MAX_CONTEXT) {
+		idr_remove(&mmu_context_idr, index);
+		return -ENOMEM;
+	}
+
+	mm->context.id = index;
+
+	return 0;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+	spin_lock(&mmu_context_lock);
+	idr_remove(&mmu_context_idr, mm->context.id);
+	spin_unlock(&mmu_context_lock);
+
+	mm->context.id = NO_CONTEXT;
+}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
new file mode 100644
index 00000000000..540f3292b22
--- /dev/null
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -0,0 +1,85 @@
+/*
+ * Declarations of procedures and variables shared between files
+ * in arch/ppc/mm/.
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+extern void mapin_ram(void);
+extern int map_page(unsigned long va, phys_addr_t pa, int flags);
+extern void setbat(int index, unsigned long virt, unsigned long phys,
+		   unsigned int size, int flags);
+extern void reserve_phys_mem(unsigned long start, unsigned long size);
+extern void settlbcam(int index, unsigned long virt, phys_addr_t phys,
+		      unsigned int size, int flags, unsigned int pid);
+extern void invalidate_tlbcam_entry(int index);
+
+extern int __map_without_bats;
+extern unsigned long ioremap_base;
+extern unsigned long ioremap_bot;
+extern unsigned int rtas_data, rtas_size;
+
+extern unsigned long total_memory;
+extern unsigned long total_lowmem;
+extern int mem_init_done;
+
+extern PTE *Hash, *Hash_end;
+extern unsigned long Hash_size, Hash_mask;
+
+extern unsigned int num_tlbcam_entries;
+
+/* ...and now those things that may be slightly different between processor
+ * architectures.  -- Dan
+ */
+#if defined(CONFIG_8xx)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+#define MMU_init_hw()		do { } while(0)
+#define mmu_mapin_ram()		(0UL)
+
+#elif defined(CONFIG_4xx)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+
+#elif defined(CONFIG_FSL_BOOKE)
+#define flush_HPTE(X, va, pg)	_tlbie(va)
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+extern void adjust_total_lowmem(void);
+
+#else
+/* anything except 4xx or 8xx */
+extern void MMU_init_hw(void);
+extern unsigned long mmu_mapin_ram(void);
+
+/* Be careful....this needs to be updated if we ever encounter 603 SMPs,
+ * which includes all new 82xx processors.  We need tlbie/tlbsync here
+ * in that case (I think). -- Dan.
+ */
+static inline void flush_HPTE(unsigned context, unsigned long va,
+			      unsigned long pdval)
+{
+	if ((Hash != 0) &&
+	    cpu_has_feature(CPU_FTR_HPTE_TABLE))
+		flush_hash_pages(0, va, pdval, 1);
+	else
+		_tlbie(va);
+}
+#endif
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
new file mode 100644
index 00000000000..81a3d7446d3
--- /dev/null
+++ b/arch/powerpc/mm/pgtable.c
@@ -0,0 +1,470 @@
+/*
+ * This file contains the routines setting up the linux page tables.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/io.h>
+
+#include "mmu_decl.h"
+
+unsigned long ioremap_base;
+unsigned long ioremap_bot;
+int io_bat_index;
+
+#if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
+#define HAVE_BATS	1
+#endif
+
+#if defined(CONFIG_FSL_BOOKE)
+#define HAVE_TLBCAM	1
+#endif
+
+extern char etext[], _stext[];
+
+#ifdef CONFIG_SMP
+extern void hash_page_sync(void);
+#endif
+
+#ifdef HAVE_BATS
+extern unsigned long v_mapped_by_bats(unsigned long va);
+extern unsigned long p_mapped_by_bats(unsigned long pa);
+void setbat(int index, unsigned long virt, unsigned long phys,
+	    unsigned int size, int flags);
+
+#else /* !HAVE_BATS */
+#define v_mapped_by_bats(x)	(0UL)
+#define p_mapped_by_bats(x)	(0UL)
+#endif /* HAVE_BATS */
+
+#ifdef HAVE_TLBCAM
+extern unsigned int tlbcam_index;
+extern unsigned long v_mapped_by_tlbcam(unsigned long va);
+extern unsigned long p_mapped_by_tlbcam(unsigned long pa);
+#else /* !HAVE_TLBCAM */
+#define v_mapped_by_tlbcam(x)	(0UL)
+#define p_mapped_by_tlbcam(x)	(0UL)
+#endif /* HAVE_TLBCAM */
+
+#ifdef CONFIG_PTE_64BIT
+/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */
+#define PGDIR_ORDER	1
+#else
+#define PGDIR_ORDER	0
+#endif
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *ret;
+
+	ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER);
+	return ret;
+}
+
+void pgd_free(pgd_t *pgd)
+{
+	free_pages((unsigned long)pgd, PGDIR_ORDER);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	pte_t *pte;
+	extern int mem_init_done;
+	extern void *early_get_page(void);
+
+	if (mem_init_done) {
+		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	} else {
+		pte = (pte_t *)early_get_page();
+		if (pte)
+			clear_page(pte);
+	}
+	return pte;
+}
+
+struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *ptepage;
+
+#ifdef CONFIG_HIGHPTE
+	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+#else
+	int flags = GFP_KERNEL | __GFP_REPEAT;
+#endif
+
+	ptepage = alloc_pages(flags, 0);
+	if (ptepage)
+		clear_highpage(ptepage);
+	return ptepage;
+}
+
+void pte_free_kernel(pte_t *pte)
+{
+#ifdef CONFIG_SMP
+	hash_page_sync();
+#endif
+	free_page((unsigned long)pte);
+}
+
+void pte_free(struct page *ptepage)
+{
+#ifdef CONFIG_SMP
+	hash_page_sync();
+#endif
+	__free_page(ptepage);
+}
+
+#ifndef CONFIG_PHYS_64BIT
+void __iomem *
+ioremap(phys_addr_t addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+#else /* CONFIG_PHYS_64BIT */
+void __iomem *
+ioremap64(unsigned long long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE);
+}
+
+void __iomem *
+ioremap(phys_addr_t addr, unsigned long size)
+{
+	phys_addr_t addr64 = fixup_bigphys_addr(addr, size);
+
+	return ioremap64(addr64, size);
+}
+#endif /* CONFIG_PHYS_64BIT */
+
+void __iomem *
+__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
+{
+	unsigned long v, i;
+	phys_addr_t p;
+	int err;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the vmalloc system is running, we use it.
+	 * Before then, we use space going down from ioremap_base
+	 * (ioremap_bot records where we're up to).
+	 */
+	p = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - p;
+
+	/*
+	 * If the address lies within the first 16 MB, assume it's in ISA
+	 * memory space
+	 */
+	if (p < 16*1024*1024)
+		p += _ISA_MEM_BASE;
+
+	/*
+	 * Don't allow anybody to remap normal RAM that we're using.
+	 * mem_init() sets high_memory so only do the check after that.
+	 */
+	if ( mem_init_done && (p < virt_to_phys(high_memory)) )
+	{
+		printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p,
+		       __builtin_return_address(0));
+		return NULL;
+	}
+
+	if (size == 0)
+		return NULL;
+
+	/*
+	 * Is it already mapped?  Perhaps overlapped by a previous
+	 * BAT mapping.  If the whole area is mapped then we're done,
+	 * otherwise remap it since we want to keep the virt addrs for
+	 * each request contiguous.
+	 *
+	 * We make the assumption here that if the bottom and top
+	 * of the range we want are mapped then it's mapped to the
+	 * same virt address (and this is contiguous).
+	 *  -- Cort
+	 */
+	if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
+		goto out;
+
+	if ((v = p_mapped_by_tlbcam(p)))
+		goto out;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = get_vm_area(size, VM_IOREMAP);
+		if (area == 0)
+			return NULL;
+		v = (unsigned long) area->addr;
+	} else {
+		v = (ioremap_bot -= size);
+	}
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= _PAGE_KERNEL;
+	if (flags & _PAGE_NO_CACHE)
+		flags |= _PAGE_GUARDED;
+
+	/*
+	 * Should check if it is a candidate for a BAT mapping
+	 */
+
+	err = 0;
+	for (i = 0; i < size && err == 0; i += PAGE_SIZE)
+		err = map_page(v+i, p+i, flags);
+	if (err) {
+		if (mem_init_done)
+			vunmap((void *)v);
+		return NULL;
+	}
+
+out:
+	return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	/*
+	 * If mapped by BATs then there is nothing to do.
+	 * Calling vfree() generates a benign warning.
+	 */
+	if (v_mapped_by_bats((unsigned long)addr)) return;
+
+	if (addr > high_memory && (unsigned long) addr < ioremap_bot)
+		vunmap((void *) (PAGE_MASK & (unsigned long)addr));
+}
+
+void __iomem *ioport_map(unsigned long port, unsigned int len)
+{
+	return (void __iomem *) (port + _IO_BASE);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+	/* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+int
+map_page(unsigned long va, phys_addr_t pa, int flags)
+{
+	pmd_t *pd;
+	pte_t *pg;
+	int err = -ENOMEM;
+
+	spin_lock(&init_mm.page_table_lock);
+	/* Use upper 10 bits of VA to index the first level map */
+	pd = pmd_offset(pgd_offset_k(va), va);
+	/* Use middle 10 bits of VA to index the second-level map */
+	pg = pte_alloc_kernel(&init_mm, pd, va);
+	if (pg != 0) {
+		err = 0;
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
+		if (mem_init_done)
+			flush_HPTE(0, va, pmd_val(*pd));
+	}
+	spin_unlock(&init_mm.page_table_lock);
+	return err;
+}
+
+/*
+ * Map in all of physical memory starting at KERNELBASE.
+ */
+void __init mapin_ram(void)
+{
+	unsigned long v, p, s, f;
+
+	s = mmu_mapin_ram();
+	v = KERNELBASE + s;
+	p = PPC_MEMSTART + s;
+	for (; s < total_lowmem; s += PAGE_SIZE) {
+		if ((char *) v >= _stext && (char *) v < etext)
+			f = _PAGE_RAM_TEXT;
+		else
+			f = _PAGE_RAM;
+		map_page(v, p, f);
+		v += PAGE_SIZE;
+		p += PAGE_SIZE;
+	}
+}
+
+/* is x a power of 2? */
+#define is_power_of_2(x)	((x) != 0 && (((x) & ((x) - 1)) == 0))
+
+/* is x a power of 4? */
+#define is_power_of_4(x)	((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1))
+
+/*
+ * Set up a mapping for a block of I/O.
+ * virt, phys, size must all be page-aligned.
+ * This should only be called before ioremap is called.
+ */
+void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+			     unsigned int size, int flags)
+{
+	int i;
+
+	if (virt > KERNELBASE && virt < ioremap_bot)
+		ioremap_bot = ioremap_base = virt;
+
+#ifdef HAVE_BATS
+	/*
+	 * Use a BAT for this if possible...
+	 */
+	if (io_bat_index < 2 && is_power_of_2(size)
+	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+		setbat(io_bat_index, virt, phys, size, flags);
+		++io_bat_index;
+		return;
+	}
+#endif /* HAVE_BATS */
+
+#ifdef HAVE_TLBCAM
+	/*
+	 * Use a CAM for this if possible...
+	 */
+	if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
+	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+		settlbcam(tlbcam_index, virt, phys, size, flags, 0);
+		++tlbcam_index;
+		return;
+	}
+#endif /* HAVE_TLBCAM */
+
+	/* No BATs available, put it in the page tables. */
+	for (i = 0; i < size; i += PAGE_SIZE)
+		map_page(virt + i, phys + i, flags);
+}
+
+/* Scan the real Linux page tables and return a PTE pointer for
+ * a virtual address in a context.
+ * Returns true (1) if PTE was found, zero otherwise.  The pointer to
+ * the PTE pointer is unmodified if PTE is not found.
+ */
+int
+get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep)
+{
+        pgd_t	*pgd;
+        pmd_t	*pmd;
+        pte_t	*pte;
+        int     retval = 0;
+
+        pgd = pgd_offset(mm, addr & PAGE_MASK);
+        if (pgd) {
+                pmd = pmd_offset(pgd, addr & PAGE_MASK);
+                if (pmd_present(*pmd)) {
+                        pte = pte_offset_map(pmd, addr & PAGE_MASK);
+                        if (pte) {
+				retval = 1;
+				*ptep = pte;
+				/* XXX caller needs to do pte_unmap, yuck */
+                        }
+                }
+        }
+        return(retval);
+}
+
+/* Find physical address for this virtual address.  Normally used by
+ * I/O functions, but anyone can call it.
+ */
+unsigned long iopa(unsigned long addr)
+{
+	unsigned long pa;
+
+	/* I don't know why this won't work on PMacs or CHRP.  It
+	 * appears there is some bug, or there is some implicit
+	 * mapping done not properly represented by BATs or in page
+	 * tables.......I am actively working on resolving this, but
+	 * can't hold up other stuff.  -- Dan
+	 */
+	pte_t *pte;
+	struct mm_struct *mm;
+
+	/* Check the BATs */
+	pa = v_mapped_by_bats(addr);
+	if (pa)
+		return pa;
+
+	/* Allow mapping of user addresses (within the thread)
+	 * for DMA if necessary.
+	 */
+	if (addr < TASK_SIZE)
+		mm = current->mm;
+	else
+		mm = &init_mm;
+
+	pa = 0;
+	if (get_pteptr(mm, addr, &pte)) {
+		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+		pte_unmap(pte);
+	}
+
+	return(pa);
+}
+
+/* This is will find the virtual address for a physical one....
+ * Swiped from APUS, could be dangerous :-).
+ * This is only a placeholder until I really find a way to make this
+ * work.  -- Dan
+ */
+unsigned long
+mm_ptov (unsigned long paddr)
+{
+	unsigned long ret;
+#if 0
+	if (paddr < 16*1024*1024)
+		ret = ZTWO_VADDR(paddr);
+	else {
+		int i;
+
+		for (i = 0; i < kmap_chunk_count;){
+			unsigned long phys = kmap_chunks[i++];
+			unsigned long size = kmap_chunks[i++];
+			unsigned long virt = kmap_chunks[i++];
+			if (paddr >= phys
+			    && paddr < (phys + size)){
+				ret = virt + paddr - phys;
+				goto exit;
+			}
+		}
+	
+		ret = (unsigned long) __va(paddr);
+	}
+exit:
+#ifdef DEBUGPV
+	printk ("PTOV(%lx)=%lx\n", paddr, ret);
+#endif
+#else
+	ret = (unsigned long)paddr + KERNELBASE;
+#endif
+	return ret;
+}
+
diff --git a/arch/powerpc/mm/pgtable64.c b/arch/powerpc/mm/pgtable64.c
new file mode 100644
index 00000000000..724f97e5dee
--- /dev/null
+++ b/arch/powerpc/mm/pgtable64.c
@@ -0,0 +1,357 @@
+/*
+ *  This file contains ioremap and related functions for 64-bit machines.
+ *
+ *  Derived from arch/ppc64/mm/init.c
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  Dave Engebretsen <engebret@us.ibm.com>
+ *      Rework for PPC64 port.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/idr.h>
+#include <linux/nodemask.h>
+#include <linux/module.h>
+
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/lmb.h>
+#include <asm/rtas.h>
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/mmu.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/tlb.h>
+#include <asm/eeh.h>
+#include <asm/processor.h>
+#include <asm/mmzone.h>
+#include <asm/cputable.h>
+#include <asm/ppcdebug.h>
+#include <asm/sections.h>
+#include <asm/system.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
+#include <asm/vdso.h>
+#include <asm/imalloc.h>
+
+#if PGTABLE_RANGE > USER_VSID_RANGE
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
+#warning TASK_SIZE is smaller than it needs to be.
+#endif
+
+int mem_init_done;
+unsigned long ioremap_bot = IMALLOC_BASE;
+static unsigned long phbs_io_bot = PHBS_IO_BASE;
+
+extern pgd_t swapper_pg_dir[];
+extern struct task_struct *current_set[NR_CPUS];
+
+unsigned long klimit = (unsigned long)_end;
+
+/* max amount of RAM to use */
+unsigned long __max_memory;
+
+/* info on what we think the IO hole is */
+unsigned long 	io_hole_start;
+unsigned long	io_hole_size;
+
+#ifdef CONFIG_PPC_ISERIES
+
+void __iomem *ioremap(unsigned long addr, unsigned long size)
+{
+	return (void __iomem *)addr;
+}
+
+extern void __iomem *__ioremap(unsigned long addr, unsigned long size,
+		       unsigned long flags)
+{
+	return (void __iomem *)addr;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+	return;
+}
+
+#else
+
+/*
+ * map_io_page currently only called by __ioremap
+ * map_io_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+static int map_io_page(unsigned long ea, unsigned long pa, int flags)
+{
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	unsigned long vsid;
+
+	if (mem_init_done) {
+		spin_lock(&init_mm.page_table_lock);
+		pgdp = pgd_offset_k(ea);
+		pudp = pud_alloc(&init_mm, pgdp, ea);
+		if (!pudp)
+			return -ENOMEM;
+		pmdp = pmd_alloc(&init_mm, pudp, ea);
+		if (!pmdp)
+			return -ENOMEM;
+		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
+		if (!ptep)
+			return -ENOMEM;
+		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
+							  __pgprot(flags)));
+		spin_unlock(&init_mm.page_table_lock);
+	} else {
+		unsigned long va, vpn, hash, hpteg;
+
+		/*
+		 * If the mm subsystem is not fully up, we cannot create a
+		 * linux page table entry for this mapping.  Simply bolt an
+		 * entry in the hardware page table.
+		 */
+		vsid = get_kernel_vsid(ea);
+		va = (vsid << 28) | (ea & 0xFFFFFFF);
+		vpn = va >> PAGE_SHIFT;
+
+		hash = hpt_hash(vpn, 0);
+
+		hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+		/* Panic if a pte grpup is full */
+		if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT,
+				       HPTE_V_BOLTED,
+				       _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX)
+		    == -1) {
+			panic("map_io_page: could not insert mapping");
+		}
+	}
+	return 0;
+}
+
+
+static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa,
+			    unsigned long ea, unsigned long size,
+			    unsigned long flags)
+{
+	unsigned long i;
+
+	if ((flags & _PAGE_PRESENT) == 0)
+		flags |= pgprot_val(PAGE_KERNEL);
+
+	for (i = 0; i < size; i += PAGE_SIZE)
+		if (map_io_page(ea+i, pa+i, flags))
+			return NULL;
+
+	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
+}
+
+
+void __iomem *
+ioremap(unsigned long addr, unsigned long size)
+{
+	return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+void __iomem * __ioremap(unsigned long addr, unsigned long size,
+			 unsigned long flags)
+{
+	unsigned long pa, ea;
+	void __iomem *ret;
+
+	/*
+	 * Choose an address to map it to.
+	 * Once the imalloc system is running, we use it.
+	 * Before that, we map using addresses going
+	 * up from ioremap_bot.  imalloc will use
+	 * the addresses from ioremap_bot through
+	 * IMALLOC_END
+	 * 
+	 */
+	pa = addr & PAGE_MASK;
+	size = PAGE_ALIGN(addr + size) - pa;
+
+	if (size == 0)
+		return NULL;
+
+	if (mem_init_done) {
+		struct vm_struct *area;
+		area = im_get_free_area(size);
+		if (area == NULL)
+			return NULL;
+		ea = (unsigned long)(area->addr);
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (!ret)
+			im_free(area->addr);
+	} else {
+		ea = ioremap_bot;
+		ret = __ioremap_com(addr, pa, ea, size, flags);
+		if (ret)
+			ioremap_bot += size;
+	}
+	return ret;
+}
+
+#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
+
+int __ioremap_explicit(unsigned long pa, unsigned long ea,
+		       unsigned long size, unsigned long flags)
+{
+	struct vm_struct *area;
+	void __iomem *ret;
+	
+	/* For now, require page-aligned values for pa, ea, and size */
+	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
+	    !IS_PAGE_ALIGNED(size)) {
+		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
+		return 1;
+	}
+	
+	if (!mem_init_done) {
+		/* Two things to consider in this case:
+		 * 1) No records will be kept (imalloc, etc) that the region
+		 *    has been remapped
+		 * 2) It won't be easy to iounmap() the region later (because
+		 *    of 1)
+		 */
+		;
+	} else {
+		area = im_get_area(ea, size,
+			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
+		if (area == NULL) {
+			/* Expected when PHB-dlpar is in play */
+			return 1;
+		}
+		if (ea != (unsigned long) area->addr) {
+			printk(KERN_ERR "unexpected addr return from "
+			       "im_get_area\n");
+			return 1;
+		}
+	}
+	
+	ret = __ioremap_com(pa, pa, ea, size, flags);
+	if (ret == NULL) {
+		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
+		return 1;
+	}
+	if (ret != (void *) ea) {
+		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/*  
+ * Unmap an IO region and remove it from imalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ * This code is modeled after vmalloc code - unmap_vm_area()
+ *
+ * XXX	what about calls before mem_init_done (ie python_countermeasures())
+ */
+void iounmap(volatile void __iomem *token)
+{
+	void *addr;
+
+	if (!mem_init_done)
+		return;
+	
+	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
+
+	im_free(addr);
+}
+
+static int iounmap_subset_regions(unsigned long addr, unsigned long size)
+{
+	struct vm_struct *area;
+
+	/* Check whether subsets of this region exist */
+	area = im_get_area(addr, size, IM_REGION_SUPERSET);
+	if (area == NULL)
+		return 1;
+
+	while (area) {
+		iounmap((void __iomem *) area->addr);
+		area = im_get_area(addr, size,
+				IM_REGION_SUPERSET);
+	}
+
+	return 0;
+}
+
+int iounmap_explicit(volatile void __iomem *start, unsigned long size)
+{
+	struct vm_struct *area;
+	unsigned long addr;
+	int rc;
+	
+	addr = (unsigned long __force) start & PAGE_MASK;
+
+	/* Verify that the region either exists or is a subset of an existing
+	 * region.  In the latter case, split the parent region to create 
+	 * the exact region 
+	 */
+	area = im_get_area(addr, size, 
+			    IM_REGION_EXISTS | IM_REGION_SUBSET);
+	if (area == NULL) {
+		/* Determine whether subset regions exist.  If so, unmap */
+		rc = iounmap_subset_regions(addr, size);
+		if (rc) {
+			printk(KERN_ERR
+			       "%s() cannot unmap nonexistent range 0x%lx\n",
+ 				__FUNCTION__, addr);
+			return 1;
+		}
+	} else {
+		iounmap((void __iomem *) area->addr);
+	}
+	/*
+	 * FIXME! This can't be right:
+	iounmap(area->addr);
+	 * Maybe it should be "iounmap(area);"
+	 */
+	return 0;
+}
+
+#endif
+
+EXPORT_SYMBOL(ioremap);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c
new file mode 100644
index 00000000000..9a381ed5eb2
--- /dev/null
+++ b/arch/powerpc/mm/ppc_mmu.c
@@ -0,0 +1,296 @@
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification.  This includes the 6xx, 7xx, 7xxx,
+ * 8260, and POWER3 implementations but excludes the 8xx and 4xx.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+#include <asm/prom.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+
+#include "mmu_decl.h"
+#include "mem_pieces.h"
+
+PTE *Hash, *Hash_end;
+unsigned long Hash_size, Hash_mask;
+unsigned long _SDR1;
+
+union ubat {			/* BAT register values to be loaded */
+	BAT	bat;
+#ifdef CONFIG_PPC64BRIDGE
+	u64	word[2];
+#else
+	u32	word[2];
+#endif
+} BATS[4][2];			/* 4 pairs of IBAT, DBAT */
+
+struct batrange {		/* stores address ranges mapped by BATs */
+	unsigned long start;
+	unsigned long limit;
+	unsigned long phys;
+} bat_addrs[4];
+
+/*
+ * Return PA for this VA if it is mapped by a BAT, or 0
+ */
+unsigned long v_mapped_by_bats(unsigned long va)
+{
+	int b;
+	for (b = 0; b < 4; ++b)
+		if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
+			return bat_addrs[b].phys + (va - bat_addrs[b].start);
+	return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_mapped_by_bats(unsigned long pa)
+{
+	int b;
+	for (b = 0; b < 4; ++b)
+		if (pa >= bat_addrs[b].phys
+	    	    && pa < (bat_addrs[b].limit-bat_addrs[b].start)
+		              +bat_addrs[b].phys)
+			return bat_addrs[b].start+(pa-bat_addrs[b].phys);
+	return 0;
+}
+
+unsigned long __init mmu_mapin_ram(void)
+{
+#ifdef CONFIG_POWER4
+	return 0;
+#else
+	unsigned long tot, bl, done;
+	unsigned long max_size = (256<<20);
+	unsigned long align;
+
+	if (__map_without_bats)
+		return 0;
+
+	/* Set up BAT2 and if necessary BAT3 to cover RAM. */
+
+	/* Make sure we don't map a block larger than the
+	   smallest alignment of the physical address. */
+	/* alignment of PPC_MEMSTART */
+	align = ~(PPC_MEMSTART-1) & PPC_MEMSTART;
+	/* set BAT block size to MIN(max_size, align) */
+	if (align && align < max_size)
+		max_size = align;
+
+	tot = total_lowmem;
+	for (bl = 128<<10; bl < max_size; bl <<= 1) {
+		if (bl * 2 > tot)
+			break;
+	}
+
+	setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM);
+	done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1;
+	if ((done < tot) && !bat_addrs[3].limit) {
+		/* use BAT3 to cover a bit more */
+		tot -= done;
+		for (bl = 128<<10; bl < max_size; bl <<= 1)
+			if (bl * 2 > tot)
+				break;
+		setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM);
+		done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1;
+	}
+
+	return done;
+#endif
+}
+
+/*
+ * Set up one of the I/D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ */
+void __init setbat(int index, unsigned long virt, unsigned long phys,
+		   unsigned int size, int flags)
+{
+	unsigned int bl;
+	int wimgxpp;
+	union ubat *bat = BATS[index];
+
+	if (((flags & _PAGE_NO_CACHE) == 0) &&
+	    cpu_has_feature(CPU_FTR_NEED_COHERENT))
+		flags |= _PAGE_COHERENT;
+
+	bl = (size >> 17) - 1;
+	if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
+		/* 603, 604, etc. */
+		/* Do DBAT first */
+		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+				   | _PAGE_COHERENT | _PAGE_GUARDED);
+		wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
+		bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+		bat[1].word[1] = phys | wimgxpp;
+#ifndef CONFIG_KGDB /* want user access for breakpoints */
+		if (flags & _PAGE_USER)
+#endif
+			bat[1].bat.batu.vp = 1;
+		if (flags & _PAGE_GUARDED) {
+			/* G bit must be zero in IBATs */
+			bat[0].word[0] = bat[0].word[1] = 0;
+		} else {
+			/* make IBAT same as DBAT */
+			bat[0] = bat[1];
+		}
+	} else {
+		/* 601 cpu */
+		if (bl > BL_8M)
+			bl = BL_8M;
+		wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+				   | _PAGE_COHERENT);
+		wimgxpp |= (flags & _PAGE_RW)?
+			((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
+		bat->word[0] = virt | wimgxpp | 4;	/* Ks=0, Ku=1 */
+		bat->word[1] = phys | bl | 0x40;	/* V=1 */
+	}
+
+	bat_addrs[index].start = virt;
+	bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+	bat_addrs[index].phys = phys;
+}
+
+/*
+ * Initialize the hash table and patch the instructions in hashtable.S.
+ */
+void __init MMU_init_hw(void)
+{
+	unsigned int hmask, mb, mb2;
+	unsigned int n_hpteg, lg_n_hpteg;
+
+	extern unsigned int hash_page_patch_A[];
+	extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
+	extern unsigned int hash_page[];
+	extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
+
+	if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) {
+		/*
+		 * Put a blr (procedure return) instruction at the
+		 * start of hash_page, since we can still get DSI
+		 * exceptions on a 603.
+		 */
+		hash_page[0] = 0x4e800020;
+		flush_icache_range((unsigned long) &hash_page[0],
+				   (unsigned long) &hash_page[1]);
+		return;
+	}
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
+
+#ifdef CONFIG_PPC64BRIDGE
+#define LG_HPTEG_SIZE	7		/* 128 bytes per HPTEG */
+#define SDR1_LOW_BITS	(lg_n_hpteg - 11)
+#define MIN_N_HPTEG	2048		/* min 256kB hash table */
+#else
+#define LG_HPTEG_SIZE	6		/* 64 bytes per HPTEG */
+#define SDR1_LOW_BITS	((n_hpteg - 1) >> 10)
+#define MIN_N_HPTEG	1024		/* min 64kB hash table */
+#endif
+
+#ifdef CONFIG_POWER4
+	/* The hash table has already been allocated and initialized
+	   in prom.c */
+	n_hpteg = Hash_size >> LG_HPTEG_SIZE;
+	lg_n_hpteg = __ilog2(n_hpteg);
+
+	/* Remove the hash table from the available memory */
+	if (Hash)
+		reserve_phys_mem(__pa(Hash), Hash_size);
+
+#else /* CONFIG_POWER4 */
+	/*
+	 * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
+	 * This is less than the recommended amount, but then
+	 * Linux ain't AIX.
+	 */
+	n_hpteg = total_memory / (PAGE_SIZE * 8);
+	if (n_hpteg < MIN_N_HPTEG)
+		n_hpteg = MIN_N_HPTEG;
+	lg_n_hpteg = __ilog2(n_hpteg);
+	if (n_hpteg & (n_hpteg - 1)) {
+		++lg_n_hpteg;		/* round up if not power of 2 */
+		n_hpteg = 1 << lg_n_hpteg;
+	}
+	Hash_size = n_hpteg << LG_HPTEG_SIZE;
+
+	/*
+	 * Find some memory for the hash table.
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
+	Hash = mem_pieces_find(Hash_size, Hash_size);
+	cacheable_memzero(Hash, Hash_size);
+	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+#endif /* CONFIG_POWER4 */
+
+	Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
+
+	printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
+	       total_memory >> 20, Hash_size >> 10, Hash);
+
+
+	/*
+	 * Patch up the instructions in hashtable.S:create_hpte
+	 */
+	if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
+	Hash_mask = n_hpteg - 1;
+	hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+	mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+	if (lg_n_hpteg > 16)
+		mb2 = 16 - LG_HPTEG_SIZE;
+
+	hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
+		| ((unsigned int)(Hash) >> 16);
+	hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
+	hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
+	hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
+	hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
+
+	/*
+	 * Ensure that the locations we've patched have been written
+	 * out from the data cache and invalidated in the instruction
+	 * cache, on those machines with split caches.
+	 */
+	flush_icache_range((unsigned long) &hash_page_patch_A[0],
+			   (unsigned long) &hash_page_patch_C[1]);
+
+	/*
+	 * Patch up the instructions in hashtable.S:flush_hash_page
+	 */
+	flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
+		| ((unsigned int)(Hash) >> 16);
+	flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
+	flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
+	flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
+	flush_icache_range((unsigned long) &flush_hash_patch_A[0],
+			   (unsigned long) &flush_hash_patch_B[1]);
+
+	if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
+}
diff --git a/arch/powerpc/mm/tlb.c b/arch/powerpc/mm/tlb.c
new file mode 100644
index 00000000000..6c3dc3c44c8
--- /dev/null
+++ b/arch/powerpc/mm/tlb.c
@@ -0,0 +1,183 @@
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU uses a hash table to store virtual to
+ * physical translations, these routines flush entries from the
+ * hash table also.
+ *  -- paulus
+ *
+ *  Derived from arch/ppc/mm/init.c:
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ *    Copyright (C) 1996 Paul Mackerras
+ *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+ *
+ *  Derived from "arch/i386/mm/init.c"
+ *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#include "mmu_decl.h"
+
+/*
+ * Called when unmapping pages to flush entries from the TLB/hash table.
+ */
+void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
+{
+	unsigned long ptephys;
+
+	if (Hash != 0) {
+		ptephys = __pa(ptep) & PAGE_MASK;
+		flush_hash_pages(mm->context, addr, ptephys, 1);
+	}
+}
+
+/*
+ * Called by ptep_set_access_flags, must flush on CPUs for which the
+ * DSI handler can't just "fixup" the TLB on a write fault
+ */
+void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
+{
+	if (Hash != 0)
+		return;
+	_tlbie(addr);
+}
+
+/*
+ * Called at the end of a mmu_gather operation to make sure the
+ * TLB flush is completely done.
+ */
+void tlb_flush(struct mmu_gather *tlb)
+{
+	if (Hash == 0) {
+		/*
+		 * 603 needs to flush the whole TLB here since
+		 * it doesn't use a hash table.
+		 */
+		_tlbia();
+	}
+}
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * since the hardware hash table functions as an extension of the
+ * tlb as far as the linux tables are concerned, flush it too.
+ *    -- Cort
+ */
+
+/*
+ * 750 SMP is a Bad Idea because the 750 doesn't broadcast all
+ * the cache operations on the bus.  Hence we need to use an IPI
+ * to get the other CPU(s) to invalidate their TLBs.
+ */
+#ifdef CONFIG_SMP_750
+#define FINISH_FLUSH	smp_send_tlb_invalidate(0)
+#else
+#define FINISH_FLUSH	do { } while (0)
+#endif
+
+static void flush_range(struct mm_struct *mm, unsigned long start,
+			unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long pmd_end;
+	int count;
+	unsigned int ctx = mm->context;
+
+	if (Hash == 0) {
+		_tlbia();
+		return;
+	}
+	start &= PAGE_MASK;
+	if (start >= end)
+		return;
+	end = (end - 1) | ~PAGE_MASK;
+	pmd = pmd_offset(pgd_offset(mm, start), start);
+	for (;;) {
+		pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
+		if (pmd_end > end)
+			pmd_end = end;
+		if (!pmd_none(*pmd)) {
+			count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
+			flush_hash_pages(ctx, start, pmd_val(*pmd), count);
+		}
+		if (pmd_end == end)
+			break;
+		start = pmd_end + 1;
+		++pmd;
+	}
+}
+
+/*
+ * Flush kernel TLB entries in the given range
+ */
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	flush_range(&init_mm, start, end);
+	FINISH_FLUSH;
+}
+
+/*
+ * Flush all the (user) entries for the address space described by mm.
+ */
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	struct vm_area_struct *mp;
+
+	if (Hash == 0) {
+		_tlbia();
+		return;
+	}
+
+	for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
+		flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
+	FINISH_FLUSH;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+	struct mm_struct *mm;
+	pmd_t *pmd;
+
+	if (Hash == 0) {
+		_tlbie(vmaddr);
+		return;
+	}
+	mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
+	pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr);
+	if (!pmd_none(*pmd))
+		flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1);
+	FINISH_FLUSH;
+}
+
+/*
+ * For each address in the range, find the pte for the address
+ * and check _PAGE_HASHPTE bit; if it is set, find and destroy
+ * the corresponding HPTE.
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	flush_range(vma->vm_mm, start, end);
+	FINISH_FLUSH;
+}
diff --git a/arch/powerpc/platforms/4xx/Kconfig b/arch/powerpc/platforms/4xx/Kconfig
new file mode 100644
index 00000000000..ed39d6a3d22
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/Kconfig
@@ -0,0 +1,280 @@
+config 4xx
+	bool
+	depends on 40x || 44x
+	default y
+
+config WANT_EARLY_SERIAL
+	bool
+	select SERIAL_8250
+	default n
+
+menu "AMCC 4xx options"
+	depends on 4xx
+
+choice
+	prompt "Machine Type"
+	depends on 40x
+	default WALNUT
+
+config BUBINGA
+	bool "Bubinga"
+	select WANT_EARLY_SERIAL
+	help
+	  This option enables support for the IBM 405EP evaluation board.
+
+config CPCI405
+	bool "CPCI405"
+	help
+	  This option enables support for the CPCI405 board.
+
+config EP405
+	bool "EP405/EP405PC"
+	help
+	  This option enables support for the EP405/EP405PC boards.
+
+config REDWOOD_5
+	bool "Redwood-5"
+	help
+	  This option enables support for the IBM STB04 evaluation board.
+
+config REDWOOD_6
+	bool "Redwood-6"
+	help
+	  This option enables support for the IBM STBx25xx evaluation board.
+
+config SYCAMORE
+	bool "Sycamore"
+	help
+	  This option enables support for the IBM PPC405GPr evaluation board.
+
+config WALNUT
+	bool "Walnut"
+	help
+	  This option enables support for the IBM PPC405GP evaluation board.
+
+config XILINX_ML300
+	bool "Xilinx-ML300"
+	help
+	  This option enables support for the Xilinx ML300 evaluation board.
+
+endchoice
+
+choice
+	prompt "Machine Type"
+	depends on 44x
+	default EBONY
+
+config BAMBOO
+	bool "Bamboo"
+	select WANT_EARLY_SERIAL
+	help
+	  This option enables support for the IBM PPC440EP evaluation board.
+
+config EBONY
+	bool "Ebony"
+	select WANT_EARLY_SERIAL
+	help
+	  This option enables support for the IBM PPC440GP evaluation board.
+
+config LUAN
+	bool "Luan"
+	select WANT_EARLY_SERIAL
+	help
+	  This option enables support for the IBM PPC440SP evaluation board.
+
+config OCOTEA
+	bool "Ocotea"
+	select WANT_EARLY_SERIAL
+	help
+	  This option enables support for the IBM PPC440GX evaluation board.
+
+endchoice
+
+config EP405PC
+	bool "EP405PC Support"
+	depends on EP405
+
+
+# It's often necessary to know the specific 4xx processor type.
+# Fortunately, it is impled (so far) from the board type, so we
+# don't need to ask more redundant questions.
+config NP405H
+	bool
+	depends on ASH
+	default y
+
+config 440EP
+	bool
+	depends on BAMBOO
+	select PPC_FPU
+	default y
+
+config 440GP
+	bool
+	depends on EBONY
+	default y
+
+config 440GX
+	bool
+	depends on OCOTEA
+	default y
+
+config 440SP
+	bool
+	depends on LUAN
+	default y
+
+config 440
+	bool
+	depends on 440GP || 440SP || 440EP
+	default y
+
+config 440A
+	bool
+	depends on 440GX
+	default y
+
+config IBM440EP_ERR42
+	bool
+	depends on 440EP
+	default y
+
+# All 405-based cores up until the 405GPR and 405EP have this errata.
+config IBM405_ERR77
+	bool
+	depends on 40x && !403GCX && !405GPR && !405EP
+	default y
+
+# All 40x-based cores, up until the 405GPR and 405EP have this errata.
+config IBM405_ERR51
+	bool
+	depends on 40x && !405GPR && !405EP
+	default y
+
+config BOOKE
+	bool
+	depends on 44x
+	default y
+
+config IBM_OCP
+	bool
+	depends on ASH || BAMBOO || BUBINGA || CPCI405 || EBONY || EP405 || LUAN || OCOTEA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT
+	default y
+
+config XILINX_OCP
+	bool
+	depends on XILINX_ML300
+	default y
+
+config IBM_EMAC4
+	bool
+	depends on 440GX || 440SP
+	default y
+
+config BIOS_FIXUP
+	bool
+	depends on BUBINGA || EP405 || SYCAMORE || WALNUT
+	default y
+
+# OAK doesn't exist but wanted to keep this around for any future 403GCX boards
+config 403GCX
+	bool
+	depends OAK
+	default y
+
+config 405EP
+	bool
+	depends on BUBINGA
+	default y
+
+config 405GP
+	bool
+	depends on CPCI405 || EP405 || WALNUT
+	default y
+
+config 405GPR
+	bool
+	depends on SYCAMORE
+	default y
+
+config VIRTEX_II_PRO
+	bool
+	depends on XILINX_ML300
+	default y
+
+config STB03xxx
+	bool
+	depends on REDWOOD_5 || REDWOOD_6
+	default y
+
+config EMBEDDEDBOOT
+	bool
+	depends on EP405 || XILINX_ML300
+	default y
+
+config IBM_OPENBIOS
+	bool
+	depends on ASH || BUBINGA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT
+	default y
+
+config PPC4xx_DMA
+	bool "PPC4xx DMA controller support"
+	depends on 4xx
+
+config PPC4xx_EDMA
+	bool
+	depends on !STB03xxx && PPC4xx_DMA
+	default y
+
+config PPC_GEN550
+	bool
+	depends on 4xx
+	default y
+
+choice
+	prompt "TTYS0 device and default console"
+	depends on 40x
+	default UART0_TTYS0
+
+config UART0_TTYS0
+	bool "UART0"
+
+config UART0_TTYS1
+	bool "UART1"
+
+endchoice
+
+config SERIAL_SICC
+	bool "SICC Serial port support"
+	depends on STB03xxx
+
+config UART1_DFLT_CONSOLE
+	bool
+	depends on SERIAL_SICC && UART0_TTYS1
+	default y
+
+config SERIAL_SICC_CONSOLE
+	bool
+	depends on SERIAL_SICC && UART0_TTYS1
+	default y
+endmenu
+
+
+menu "IBM 40x options"
+	depends on 40x
+
+config SERIAL_SICC
+	bool "SICC Serial port"
+	depends on STB03xxx
+
+config UART1_DFLT_CONSOLE
+	bool
+	depends on SERIAL_SICC && UART0_TTYS1
+	default y
+
+config SERIAL_SICC_CONSOLE
+	bool
+	depends on SERIAL_SICC && UART0_TTYS1
+	default y
+
+endmenu
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
new file mode 100644
index 00000000000..c5bc2821d99
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -0,0 +1,86 @@
+config 85xx
+	bool
+	depends on E500
+	default y
+
+config PPC_INDIRECT_PCI_BE
+	bool
+	depends on 85xx
+	default y
+
+menu "Freescale 85xx options"
+	depends on E500
+
+choice
+	prompt "Machine Type"
+	depends on 85xx
+	default MPC8540_ADS
+
+config MPC8540_ADS
+	bool "Freescale MPC8540 ADS"
+	help
+	  This option enables support for the MPC 8540 ADS evaluation board.
+
+config MPC8548_CDS
+	bool "Freescale MPC8548 CDS"
+	help
+	  This option enablese support for the MPC8548 CDS evaluation board.
+
+config MPC8555_CDS
+	bool "Freescale MPC8555 CDS"
+	help
+	  This option enablese support for the MPC8555 CDS evaluation board.
+
+config MPC8560_ADS
+	bool "Freescale MPC8560 ADS"
+	help
+	  This option enables support for the MPC 8560 ADS evaluation board.
+
+config SBC8560
+	bool "WindRiver PowerQUICC III SBC8560"
+	help
+	  This option enables support for the WindRiver PowerQUICC III 
+	  SBC8560 board.
+
+config STX_GP3
+	bool "Silicon Turnkey Express GP3"
+	help
+	  This option enables support for the Silicon Turnkey Express GP3
+	  board.
+
+endchoice
+
+# It's often necessary to know the specific 85xx processor type.
+# Fortunately, it is implied (so far) from the board type, so we
+# don't need to ask more redundant questions.
+config MPC8540
+	bool
+	depends on MPC8540_ADS
+	default y
+
+config MPC8548
+	bool
+	depends on MPC8548_CDS
+	default y
+
+config MPC8555
+	bool
+	depends on MPC8555_CDS
+	default y
+
+config MPC8560
+	bool
+	depends on SBC8560 || MPC8560_ADS || STX_GP3
+	default y
+
+config 85xx_PCI2
+	bool "Supprt for 2nd PCI host controller"
+	depends on MPC8555_CDS
+	default y
+
+config PPC_GEN550
+	bool
+	depends on MPC8540 || SBC8560 || MPC8555
+	default y
+
+endmenu
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
new file mode 100644
index 00000000000..c8c0ba3cf8e
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -0,0 +1,352 @@
+config FADS
+	bool
+
+choice
+	prompt "8xx Machine Type"
+	depends on 8xx
+	default RPXLITE
+
+config RPXLITE
+	bool "RPX-Lite"
+	---help---
+	  Single-board computers based around the PowerPC MPC8xx chips and
+	  intended for embedded applications.  The following types are
+	  supported:
+
+	  RPX-Lite:
+	  Embedded Planet RPX Lite. PC104 form-factor SBC based on the MPC823.
+
+	  RPX-Classic:
+	  Embedded Planet RPX Classic Low-fat. Credit-card-size SBC based on
+	  the MPC 860
+
+	  BSE-IP:
+	  Bright Star Engineering ip-Engine.
+
+	  TQM823L:
+	  TQM850L:
+	  TQM855L:
+	  TQM860L:
+	  MPC8xx based family of mini modules, half credit card size,
+	  up to 64 MB of RAM, 8 MB Flash, (Fast) Ethernet, 2 x serial ports,
+	  2 x CAN bus interface, ...
+	  Manufacturer: TQ Components, www.tq-group.de
+	  Date of Release: October (?) 1999
+	  End of Life: not yet :-)
+	  URL:
+	  - module: <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>
+	  - starter kit: <http://www.denx.de/PDF/STK8xxLHWM201.pdf>
+	  - images: <http://www.denx.de/embedded-ppc-en.html>
+
+	  FPS850L:
+	  FingerPrint Sensor System (based on TQM850L)
+	  Manufacturer: IKENDI AG, <http://www.ikendi.com/>
+	  Date of Release: November 1999
+	  End of life: end 2000 ?
+	  URL: see TQM850L
+
+	  IVMS8:
+	  MPC860 based board used in the "Integrated Voice Mail System",
+	  Small Version (8 voice channels)
+	  Manufacturer: Speech Design, <http://www.speech-design.de/>
+	  Date of Release: December 2000 (?)
+	  End of life: -
+	  URL: <http://www.speech-design.de/>
+
+	  IVML24:
+	  MPC860 based board used in the "Integrated Voice Mail System",
+	  Large Version (24 voice channels)
+	  Manufacturer: Speech Design, <http://www.speech-design.de/>
+	  Date of Release: March 2001  (?)
+	  End of life: -
+	  URL: <http://www.speech-design.de/>
+
+	  HERMES:
+	  Hermes-Pro ISDN/LAN router with integrated 8 x hub
+	  Manufacturer: Multidata Gesellschaft fur Datentechnik und Informatik
+	  <http://www.multidata.de/>
+	  Date of Release: 2000 (?)
+	  End of life: -
+	  URL: <http://www.multidata.de/english/products/hpro.htm>
+
+	  IP860:
+	  VMEBus IP (Industry Pack) carrier board with MPC860
+	  Manufacturer: MicroSys GmbH, <http://www.microsys.de/>
+	  Date of Release: ?
+	  End of life: -
+	  URL: <http://www.microsys.de/html/ip860.html>
+
+	  PCU_E:
+	  PCU = Peripheral Controller Unit, Extended
+	  Manufacturer: Siemens AG, ICN (Information and Communication Networks)
+	  	<http://www.siemens.de/page/1,3771,224315-1-999_2_226207-0,00.html>
+	  Date of Release: April 2001
+	  End of life: August 2001
+	  URL: n. a.
+
+config RPXCLASSIC
+	bool "RPX-Classic"
+	help
+	  The RPX-Classic is a single-board computer based on the Motorola
+	  MPC860.  It features 16MB of DRAM and a variable amount of flash,
+	  I2C EEPROM, thermal monitoring, a PCMCIA slot, a DIP switch and two
+	  LEDs.  Variants with Ethernet ports exist.  Say Y here to support it
+	  directly.
+
+config BSEIP
+	bool "BSE-IP"
+	help
+	  Say Y here to support the Bright Star Engineering ipEngine SBC.
+	  This is a credit-card-sized device featuring a MPC823 processor,
+	  26MB DRAM, 4MB flash, Ethernet, a 16K-gate FPGA, USB, an LCD/video
+	  controller, and two RS232 ports.
+
+config MPC8XXFADS
+	bool "FADS"
+	select FADS
+
+config MPC86XADS
+	bool "MPC86XADS"
+	help
+	  MPC86x Application Development System by Freescale Semiconductor.
+	  The MPC86xADS is meant to serve as a platform for s/w and h/w
+	  development around the MPC86X processor families.
+	select FADS
+
+config MPC885ADS
+	bool "MPC885ADS"
+	help
+	  Freescale Semiconductor MPC885 Application Development System (ADS).
+	  Also known as DUET.
+	  The MPC885ADS is meant to serve as a platform for s/w and h/w
+	  development around the MPC885 processor family.
+
+config TQM823L
+	bool "TQM823L"
+	help
+	  Say Y here to support the TQM823L, one of an MPC8xx-based family of
+	  mini SBCs (half credit-card size) from TQ Components first released
+	  in late 1999.  Technical references are at
+	  <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and
+	  <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at
+	  <http://www.denx.de/embedded-ppc-en.html>.
+
+config TQM850L
+	bool "TQM850L"
+	help
+	  Say Y here to support the TQM850L, one of an MPC8xx-based family of
+	  mini SBCs (half credit-card size) from TQ Components first released
+	  in late 1999.  Technical references are at
+	  <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and
+	  <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at
+	  <http://www.denx.de/embedded-ppc-en.html>.
+
+config TQM855L
+	bool "TQM855L"
+	help
+	  Say Y here to support the TQM855L, one of an MPC8xx-based family of
+	  mini SBCs (half credit-card size) from TQ Components first released
+	  in late 1999.  Technical references are at
+	  <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and
+	  <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at
+	  <http://www.denx.de/embedded-ppc-en.html>.
+
+config TQM860L
+	bool "TQM860L"
+	help
+	  Say Y here to support the TQM860L, one of an MPC8xx-based family of
+	  mini SBCs (half credit-card size) from TQ Components first released
+	  in late 1999.  Technical references are at
+	  <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and
+	  <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at
+	  <http://www.denx.de/embedded-ppc-en.html>.
+
+config FPS850L
+	bool "FPS850L"
+
+config IVMS8
+	bool "IVMS8"
+	help
+	  Say Y here to support the Integrated Voice-Mail Small 8-channel SBC
+	  from Speech Design, released March 2001.  The manufacturer's website
+	  is at <http://www.speech-design.de/>.
+
+config IVML24
+	bool "IVML24"
+	help
+	  Say Y here to support the Integrated Voice-Mail Large 24-channel SBC
+	  from Speech Design, released March 2001.  The manufacturer's website
+	  is at <http://www.speech-design.de/>.
+
+config HERMES_PRO
+	bool "HERMES"
+
+config IP860
+	bool "IP860"
+
+config LWMON
+	bool "LWMON"
+
+config PCU_E
+	bool "PCU_E"
+
+config CCM
+	bool "CCM"
+
+config LANTEC
+	bool "LANTEC"
+
+config MBX
+	bool "MBX"
+	help
+	  MBX is a line of Motorola single-board computer based around the
+	  MPC821 and MPC860 processors, and intended for embedded-controller
+	  applications.  Say Y here to support these boards directly.
+
+config WINCEPT
+	bool "WinCept"
+	help
+	  The Wincept 100/110 is a Motorola single-board computer based on the
+	  MPC821 PowerPC, introduced in 1998 and designed to be used in
+	  thin-client machines.  Say Y to support it directly.
+
+endchoice
+
+#
+# MPC8xx Communication options
+#
+
+menu "MPC8xx CPM Options"
+	depends on 8xx
+
+config SCC_ENET
+	bool "CPM SCC Ethernet"
+	depends on NET_ETHERNET
+	help
+	  Enable Ethernet support via the Motorola MPC8xx serial
+	  communications controller.
+
+choice
+	prompt "SCC used for Ethernet"
+	depends on SCC_ENET
+	default SCC1_ENET
+
+config SCC1_ENET
+	bool "SCC1"
+	help
+	  Use MPC8xx serial communications controller 1 to drive Ethernet
+	  (default).
+
+config SCC2_ENET
+	bool "SCC2"
+	help
+	  Use MPC8xx serial communications controller 2 to drive Ethernet.
+
+config SCC3_ENET
+	bool "SCC3"
+	help
+	  Use MPC8xx serial communications controller 3 to drive Ethernet.
+
+endchoice
+
+config FEC_ENET
+	bool "860T FEC Ethernet"
+	depends on NET_ETHERNET
+	help
+	  Enable Ethernet support via the Fast Ethernet Controller (FCC) on
+	  the Motorola MPC8260.
+
+config USE_MDIO
+	bool "Use MDIO for PHY configuration"
+	depends on FEC_ENET
+	help
+	  On some boards the hardware configuration of the ethernet PHY can be
+	  used without any software interaction over the MDIO interface, so
+	  all MII code can be omitted. Say N here if unsure or if you don't
+	  need link status reports.
+
+config  FEC_AM79C874
+	bool "Support AMD79C874 PHY"
+	depends on USE_MDIO
+
+config FEC_LXT970
+	bool "Support LXT970 PHY"
+	depends on USE_MDIO
+
+config FEC_LXT971
+	bool "Support LXT971 PHY"
+	depends on USE_MDIO
+	
+config FEC_QS6612
+	bool "Support QS6612 PHY"
+	depends on USE_MDIO
+	
+config ENET_BIG_BUFFERS
+	bool "Use Big CPM Ethernet Buffers"
+	depends on SCC_ENET || FEC_ENET
+	help
+	  Allocate large buffers for MPC8xx Ethernet. Increases throughput
+	  and decreases the likelihood of dropped packets, but costs memory.
+
+config HTDMSOUND
+	bool "Embedded Planet HIOX Audio"
+	depends on SOUND=y
+
+# This doesn't really belong here, but it is convenient to ask
+# 8xx specific questions.
+comment "Generic MPC8xx Options"
+
+config 8xx_COPYBACK
+	bool "Copy-Back Data Cache (else Writethrough)"
+	help
+	  Saying Y here will cause the cache on an MPC8xx processor to be used
+	  in Copy-Back mode.  If you say N here, it is used in Writethrough
+	  mode.
+
+	  If in doubt, say Y here.
+
+config 8xx_CPU6
+	bool "CPU6 Silicon Errata (860 Pre Rev. C)"
+	help
+	  MPC860 CPUs, prior to Rev C have some bugs in the silicon, which
+	  require workarounds for Linux (and most other OSes to work).  If you
+	  get a BUG() very early in boot, this might fix the problem.  For
+	  more details read the document entitled "MPC860 Family Device Errata
+	  Reference" on Motorola's website.  This option also incurs a
+	  performance hit.
+
+	  If in doubt, say N here.
+
+choice
+	prompt "Microcode patch selection"
+	default NO_UCODE_PATCH
+	help
+	  Help not implemented yet, coming soon.
+
+config NO_UCODE_PATCH
+	bool "None"
+
+config USB_SOF_UCODE_PATCH
+	bool "USB SOF patch"
+	help
+	  Help not implemented yet, coming soon.
+
+config I2C_SPI_UCODE_PATCH
+	bool "I2C/SPI relocation patch"
+	help
+	  Help not implemented yet, coming soon.
+
+config I2C_SPI_SMC1_UCODE_PATCH
+	bool "I2C/SPI/SMC1 relocation patch"
+	help
+	  Help not implemented yet, coming soon.
+
+endchoice
+
+config UCODE_PATCH
+	bool
+	default y
+	depends on !NO_UCODE_PATCH
+
+endmenu
+
diff --git a/arch/powerpc/platforms/apus/Kconfig b/arch/powerpc/platforms/apus/Kconfig
new file mode 100644
index 00000000000..6bde3bffed8
--- /dev/null
+++ b/arch/powerpc/platforms/apus/Kconfig
@@ -0,0 +1,130 @@
+
+config AMIGA
+	bool
+	depends on APUS
+	default y
+	help
+	  This option enables support for the Amiga series of computers.
+
+config ZORRO
+	bool
+	depends on APUS
+	default y
+	help
+	  This enables support for the Zorro bus in the Amiga. If you have
+	  expansion cards in your Amiga that conform to the Amiga
+	  AutoConfig(tm) specification, say Y, otherwise N. Note that even
+	  expansion cards that do not fit in the Zorro slots but fit in e.g.
+	  the CPU slot may fall in this category, so you have to say Y to let
+	  Linux use these.
+
+config ABSTRACT_CONSOLE
+	bool
+	depends on APUS
+	default y
+
+config APUS_FAST_EXCEPT
+	bool
+	depends on APUS
+	default y
+
+config AMIGA_PCMCIA
+	bool "Amiga 1200/600 PCMCIA support"
+	depends on APUS && EXPERIMENTAL
+	help
+	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
+	  600. If you intend to use pcmcia cards say Y; otherwise say N.
+
+config AMIGA_BUILTIN_SERIAL
+	tristate "Amiga builtin serial support"
+	depends on APUS
+	help
+	  If you want to use your Amiga's built-in serial port in Linux,
+	  answer Y.
+
+	  To compile this driver as a module, choose M here.
+
+config GVPIOEXT
+	tristate "GVP IO-Extender support"
+	depends on APUS
+	help
+	  If you want to use a GVP IO-Extender serial card in Linux, say Y.
+	  Otherwise, say N.
+
+config GVPIOEXT_LP
+	tristate "GVP IO-Extender parallel printer support"
+	depends on GVPIOEXT
+	help
+	  Say Y to enable driving a printer from the parallel port on your
+	  GVP IO-Extender card, N otherwise.
+
+config GVPIOEXT_PLIP
+	tristate "GVP IO-Extender PLIP support"
+	depends on GVPIOEXT
+	help
+	  Say Y to enable doing IP over the parallel port on your GVP
+	  IO-Extender card, N otherwise.
+
+config MULTIFACE_III_TTY
+	tristate "Multiface Card III serial support"
+	depends on APUS
+	help
+	  If you want to use a Multiface III card's serial port in Linux,
+	  answer Y.
+
+	  To compile this driver as a module, choose M here.
+
+config A2232
+	tristate "Commodore A2232 serial support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && APUS
+	---help---
+	  This option supports the 2232 7-port serial card shipped with the
+	  Amiga 2000 and other Zorro-bus machines, dating from 1989.  At
+	  a max of 19,200 bps, the ports are served by a 6551 ACIA UART chip
+	  each, plus a 8520 CIA, and a master 6502 CPU and buffer as well. The
+	  ports were connected with 8 pin DIN connectors on the card bracket,
+	  for which 8 pin to DB25 adapters were supplied. The card also had
+	  jumpers internally to toggle various pinning configurations.
+
+	  This driver can be built as a module; but then "generic_serial"
+	  will also be built as a module. This has to be loaded before
+	  "ser_a2232". If you want to do this, answer M here.
+
+config WHIPPET_SERIAL
+	tristate "Hisoft Whippet PCMCIA serial support"
+	depends on AMIGA_PCMCIA
+	help
+	  HiSoft has a web page at <http://www.hisoft.co.uk/>, but there
+	  is no listing for the Whippet in their Amiga section.
+
+config APNE
+	tristate "PCMCIA NE2000 support"
+	depends on AMIGA_PCMCIA
+	help
+	  If you have a PCMCIA NE2000 compatible adapter, say Y.  Otherwise,
+	  say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called apne.
+
+config SERIAL_CONSOLE
+	bool "Support for serial port console"
+	depends on APUS && (AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y)
+
+config HEARTBEAT
+	bool "Use power LED as a heartbeat"
+	depends on APUS
+	help
+	  Use the power-on LED on your machine as a load meter.  The exact
+	  behavior is platform-dependent, but normally the flash frequency is
+	  a hyperbolic function of the 5-minute load average.
+
+config PROC_HARDWARE
+	bool "/proc/hardware support"
+	depends on APUS
+
+source "drivers/zorro/Kconfig"
+
+config PCI_PERMEDIA
+	bool "PCI for Permedia2"
+	depends on !4xx && !8xx && APUS
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
new file mode 100644
index 00000000000..4f355143059
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -0,0 +1,313 @@
+choice
+	prompt "Machine Type"
+	depends on EMBEDDED6xx
+
+config APUS
+	bool "Amiga-APUS"
+	depends on BROKEN
+	help
+	  Select APUS if configuring for a PowerUP Amiga.
+	  More information is available at:
+	  <http://linux-apus.sourceforge.net/>.
+
+config KATANA
+	bool "Artesyn-Katana"
+	help
+	  Select KATANA if configuring an Artesyn KATANA 750i or 3750
+	  cPCI board.
+
+config WILLOW
+	bool "Cogent-Willow"
+
+config CPCI690
+	bool "Force-CPCI690"
+	help
+	  Select CPCI690 if configuring a Force CPCI690 cPCI board.
+
+config POWERPMC250
+	bool "Force-PowerPMC250"
+
+config CHESTNUT
+	bool "IBM 750FX Eval board or 750GX Eval board"
+	help
+	  Select CHESTNUT if configuring an IBM 750FX Eval Board or a
+	  IBM 750GX Eval board.
+
+config SPRUCE
+	bool "IBM-Spruce"
+
+config HDPU
+	bool "Sky-HDPU"
+	help
+	  Select HDPU if configuring a Sky Computers Compute Blade.
+
+config HDPU_FEATURES
+	depends HDPU
+	tristate "HDPU-Features"
+	help
+	  Select to enable HDPU enhanced features.
+
+config EV64260
+	bool "Marvell-EV64260BP"
+	help
+	  Select EV64260 if configuring a Marvell (formerly Galileo)
+	  EV64260BP Evaluation platform.
+
+config LOPEC
+	bool "Motorola-LoPEC"
+
+config MVME5100
+	bool "Motorola-MVME5100"
+
+config PPLUS
+	bool "Motorola-PowerPlus"
+
+config PRPMC750
+	bool "Motorola-PrPMC750"
+
+config PRPMC800
+	bool "Motorola-PrPMC800"
+
+config SANDPOINT
+	bool "Motorola-Sandpoint"
+	help
+	  Select SANDPOINT if configuring for a Motorola Sandpoint X3
+	  (any flavor).
+
+config RADSTONE_PPC7D
+	bool "Radstone Technology PPC7D board"
+
+config PAL4
+	bool "SBS-Palomar4"
+
+config GEMINI
+	bool "Synergy-Gemini"
+	depends on BROKEN
+	help
+	  Select Gemini if configuring for a Synergy Microsystems' Gemini
+	  series Single Board Computer.  More information is available at:
+	  <http://www.synergymicro.com/PressRel/97_10_15.html>.
+
+config EST8260
+	bool "EST8260"
+	---help---
+	  The EST8260 is a single-board computer manufactured by Wind River
+	  Systems, Inc. (formerly Embedded Support Tools Corp.) and based on
+	  the MPC8260.  Wind River Systems has a website at
+	  <http://www.windriver.com/>, but the EST8260 cannot be found on it
+	  and has probably been discontinued or rebadged.
+
+config SBC82xx
+	bool "SBC82xx"
+	---help---
+	  SBC PowerQUICC II, single-board computer with MPC82xx CPU
+	  Manufacturer: Wind River Systems, Inc.
+	  Date of Release: May 2003
+	  End of Life: -
+	  URL: <http://www.windriver.com/>
+
+config SBS8260
+	bool "SBS8260"
+
+config RPX8260
+	bool "RPXSUPER"
+
+config TQM8260
+	bool "TQM8260"
+	---help---
+	  MPC8260 based module, little larger than credit card,
+	  up to 128 MB global + 64 MB local RAM, 32 MB Flash,
+	  32 kB EEPROM, 256 kB L@ Cache, 10baseT + 100baseT Ethernet,
+	  2 x serial ports, ...
+	  Manufacturer: TQ Components, www.tq-group.de
+	  Date of Release: June 2001
+	  End of Life: not yet :-)
+	  URL: <http://www.denx.de/PDF/TQM82xx_SPEC_Rev005.pdf>
+
+config ADS8272
+	bool "ADS8272"
+
+config PQ2FADS
+	bool "Freescale-PQ2FADS"
+	help
+	  Select PQ2FADS if you wish to configure for a Freescale
+	  PQ2FADS board (-VR or -ZU).
+
+config LITE5200
+	bool "Freescale LITE5200 / (IceCube)"
+	select PPC_MPC52xx
+	help
+	  Support for the LITE5200 dev board for the MPC5200 from Freescale.
+	  This is for the LITE5200 version 2.0 board. Don't know if it changes
+	  much but it's only been tested on this board version. I think this
+	  board is also known as IceCube.
+
+config MPC834x_SYS
+	bool "Freescale MPC834x SYS"
+	help
+	  This option enables support for the MPC 834x SYS evaluation board.
+
+	  Be aware that PCI buses can only function when SYS board is plugged
+	  into the PIB (Platform IO Board) board from Freescale which provide
+	  3 PCI slots.  The PIBs PCI initialization is the bootloader's
+	  responsiblilty.
+
+config EV64360
+	bool "Marvell-EV64360BP"
+	help
+	  Select EV64360 if configuring a Marvell EV64360BP Evaluation
+	  platform.
+endchoice
+
+config PQ2ADS
+	bool
+	depends on ADS8272
+	default y
+
+config TQM8xxL
+	bool
+	depends on 8xx && (TQM823L || TQM850L || FPS850L || TQM855L || TQM860L)
+	default y
+
+config PPC_MPC52xx
+	bool
+
+config 8260
+	bool "CPM2 Support" if WILLOW
+	depends on 6xx
+	default y if TQM8260 || RPX8260 || EST8260 || SBS8260 || SBC82xx || PQ2FADS
+	help
+	  The MPC8260 is a typical embedded CPU made by Motorola.  Selecting
+	  this option means that you wish to build a kernel for a machine with
+	  an 8260 class CPU.
+
+config 8272
+	bool
+	depends on 6xx
+	default y if ADS8272
+	select 8260
+	help
+	  The MPC8272 CPM has a different internal dpram setup than other CPM2
+	  devices
+
+config 83xx
+	bool
+	default y if MPC834x_SYS
+
+config MPC834x
+	bool
+	default y if MPC834x_SYS
+
+config CPM2
+	bool
+	depends on 8260 || MPC8560 || MPC8555
+	default y
+	help
+	  The CPM2 (Communications Processor Module) is a coprocessor on
+	  embedded CPUs made by Motorola.  Selecting this option means that
+	  you wish to build a kernel for a machine with a CPM2 coprocessor
+	  on it (826x, 827x, 8560).
+
+config PPC_GEN550
+	bool
+	depends on SANDPOINT || SPRUCE || PPLUS || \
+		PRPMC750 || PRPMC800 || LOPEC || \
+		(EV64260 && !SERIAL_MPSC) || CHESTNUT || RADSTONE_PPC7D || \
+		83xx
+	default y
+
+config FORCE
+	bool
+	depends on 6xx && POWERPMC250
+	default y
+
+config GT64260
+	bool
+	depends on EV64260 || CPCI690
+	default y
+
+config MV64360		# Really MV64360 & MV64460
+	bool
+	depends on CHESTNUT || KATANA || RADSTONE_PPC7D || HDPU || EV64360
+	default y
+
+config MV64X60
+	bool
+	depends on (GT64260 || MV64360)
+	default y
+
+menu "Set bridge options"
+	depends on MV64X60
+
+config NOT_COHERENT_CACHE
+	bool "Turn off Cache Coherency"
+	default n
+	help
+	  Some 64x60 bridges lock up when trying to enforce cache coherency.
+	  When this option is selected, cache coherency will be turned off.
+	  Note that this can cause other problems (e.g., stale data being
+	  speculatively loaded via a cached mapping).  Use at your own risk.
+
+config MV64X60_BASE
+	hex "Set bridge base used by firmware"
+	default "0xf1000000"
+	help
+	  A firmware can leave the base address of the bridge's registers at
+	  a non-standard location.  If so, set this value to reflect the
+	  address of that non-standard location.
+
+config MV64X60_NEW_BASE
+	hex "Set bridge base used by kernel"
+	default "0xf1000000"
+	help
+	  If the current base address of the bridge's registers is not where
+	  you want it, set this value to the address that you want it moved to.
+
+endmenu
+
+config NONMONARCH_SUPPORT
+	bool "Enable Non-Monarch Support"
+	depends on PRPMC800
+
+config HARRIER
+	bool
+	depends on PRPMC800
+	default y
+
+config EPIC_SERIAL_MODE
+	bool
+	depends on 6xx && (LOPEC || SANDPOINT)
+	default y
+
+config MPC10X_BRIDGE
+	bool
+	depends on POWERPMC250 || LOPEC || SANDPOINT
+	default y
+
+config MPC10X_OPENPIC
+	bool
+	depends on POWERPMC250 || LOPEC || SANDPOINT
+	default y
+
+config MPC10X_STORE_GATHERING
+	bool "Enable MPC10x store gathering"
+	depends on MPC10X_BRIDGE
+
+config SANDPOINT_ENABLE_UART1
+	bool "Enable DUART mode on Sandpoint"
+	depends on SANDPOINT
+	help
+	  If this option is enabled then the MPC824x processor will run
+	  in DUART mode instead of UART mode.
+
+config HARRIER_STORE_GATHERING
+	bool "Enable Harrier store gathering"
+	depends on HARRIER
+
+config MVME5100_IPMC761_PRESENT
+	bool "MVME5100 configured with an IPMC761"
+	depends on MVME5100
+
+config SPRUCE_BAUD_33M
+	bool "Spruce baud clock support"
+	depends on SPRUCE
diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig
new file mode 100644
index 00000000000..3d957a30c8c
--- /dev/null
+++ b/arch/powerpc/platforms/iseries/Kconfig
@@ -0,0 +1,31 @@
+
+menu "iSeries device drivers"
+	depends on PPC_ISERIES
+
+config VIOCONS
+	tristate "iSeries Virtual Console Support"
+
+config VIODASD
+	tristate "iSeries Virtual I/O disk support"
+	help
+	  If you are running on an iSeries system and you want to use
+ 	  virtual disks created and managed by OS/400, say Y.
+
+config VIOCD
+	tristate "iSeries Virtual I/O CD support"
+	help
+	  If you are running Linux on an IBM iSeries system and you want to
+	  read a CD drive owned by OS/400, say Y here.
+
+config VIOTAPE
+	tristate "iSeries Virtual Tape Support"
+	help
+	  If you are running Linux on an iSeries system and you want Linux
+	  to read and/or write a tape drive owned by OS/400, say Y here.
+
+endmenu
+
+config VIOPATH
+	bool
+	depends on VIOCONS || VIODASD || VIOCD || VIOTAPE || VETH
+	default y
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
new file mode 100644
index 00000000000..37b7341396e
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_PPC_PMAC)		+= pmac_pic.o pmac_setup.o pmac_time.o \
+				   pmac_feature.o pmac_pci.o pmac_sleep.o \
+				   pmac_low_i2c.o pmac_cache.o
+obj-$(CONFIG_PMAC_BACKLIGHT)	+= pmac_backlight.o
+obj-$(CONFIG_CPU_FREQ_PMAC)	+= pmac_cpufreq.o
+ifeq ($(CONFIG_PPC_PMAC),y)
+obj-$(CONFIG_NVRAM)		+= pmac_nvram.o
+obj-$(CONFIG_SMP)		+= pmac_smp.o
+endif
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
new file mode 100644
index 00000000000..40e1c5030f7
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -0,0 +1,31 @@
+#ifndef __PMAC_H__
+#define __PMAC_H__
+
+#include <linux/pci.h>
+#include <linux/ide.h>
+
+/*
+ * Declaration for the various functions exported by the
+ * pmac_* files. Mostly for use by pmac_setup
+ */
+
+extern void pmac_get_boot_time(struct rtc_time *tm);
+extern void pmac_get_rtc_time(struct rtc_time *tm);
+extern int  pmac_set_rtc_time(struct rtc_time *tm);
+extern void pmac_read_rtc_time(void);
+extern void pmac_calibrate_decr(void);
+
+extern void pmac_pcibios_fixup(void);
+extern void pmac_pci_init(void);
+extern void pmac_setup_pci_dma(void);
+extern void pmac_check_ht_link(void);
+
+extern void pmac_setup_smp(void);
+
+extern unsigned long pmac_ide_get_base(int index);
+extern void pmac_ide_init_hwif_ports(hw_regs_t *hw,
+	unsigned long data_port, unsigned long ctrl_port, int *irq);
+
+extern void pmac_nvram_init(void);
+
+#endif /* __PMAC_H__ */
diff --git a/arch/powerpc/platforms/powermac/pmac_backlight.c b/arch/powerpc/platforms/powermac/pmac_backlight.c
new file mode 100644
index 00000000000..8be2f7d071f
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_backlight.c
@@ -0,0 +1,202 @@
+/*
+ * Miscellaneous procedures for dealing with the PowerMac hardware.
+ * Contains support for the backlight.
+ *
+ *   Copyright (C) 2000 Benjamin Herrenschmidt
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/reboot.h>
+#include <linux/nvram.h>
+#include <linux/console.h>
+#include <asm/sections.h>
+#include <asm/ptrace.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/nvram.h>
+#include <asm/backlight.h>
+
+#include <linux/adb.h>
+#include <linux/pmu.h>
+
+static struct backlight_controller *backlighter;
+static void* backlighter_data;
+static int backlight_autosave;
+static int backlight_level = BACKLIGHT_MAX;
+static int backlight_enabled = 1;
+static int backlight_req_level = -1;
+static int backlight_req_enable = -1;
+
+static void backlight_callback(void *);
+static DECLARE_WORK(backlight_work, backlight_callback, NULL);
+
+void register_backlight_controller(struct backlight_controller *ctrler,
+					  void *data, char *type)
+{
+	struct device_node* bk_node;
+	char *prop;
+	int valid = 0;
+
+	/* There's already a matching controller, bail out */
+	if (backlighter != NULL)
+		return;
+
+	bk_node = find_devices("backlight");
+
+#ifdef CONFIG_ADB_PMU
+	/* Special case for the old PowerBook since I can't test on it */
+	backlight_autosave = machine_is_compatible("AAPL,3400/2400")
+		|| machine_is_compatible("AAPL,3500");
+	if ((backlight_autosave
+	     || machine_is_compatible("AAPL,PowerBook1998")
+	     || machine_is_compatible("PowerBook1,1"))
+	    && !strcmp(type, "pmu"))
+		valid = 1;
+#endif
+	if (bk_node) {
+		prop = get_property(bk_node, "backlight-control", NULL);
+		if (prop && !strncmp(prop, type, strlen(type)))
+			valid = 1;
+	}
+	if (!valid)
+		return;
+	backlighter = ctrler;
+	backlighter_data = data;
+
+	if (bk_node && !backlight_autosave)
+		prop = get_property(bk_node, "bklt", NULL);
+	else
+		prop = NULL;
+	if (prop) {
+		backlight_level = ((*prop)+1) >> 1;
+		if (backlight_level > BACKLIGHT_MAX)
+			backlight_level = BACKLIGHT_MAX;
+	}
+
+#ifdef CONFIG_ADB_PMU
+	if (backlight_autosave) {
+		struct adb_request req;
+		pmu_request(&req, NULL, 2, 0xd9, 0);
+		while (!req.complete)
+			pmu_poll();
+		backlight_level = req.reply[0] >> 4;
+	}
+#endif
+	acquire_console_sem();
+	if (!backlighter->set_enable(1, backlight_level, data))
+		backlight_enabled = 1;
+	release_console_sem();
+
+	printk(KERN_INFO "Registered \"%s\" backlight controller,"
+	       "level: %d/15\n", type, backlight_level);
+}
+EXPORT_SYMBOL(register_backlight_controller);
+
+void unregister_backlight_controller(struct backlight_controller
+					    *ctrler, void *data)
+{
+	/* We keep the current backlight level (for now) */
+	if (ctrler == backlighter && data == backlighter_data)
+		backlighter = NULL;
+}
+EXPORT_SYMBOL(unregister_backlight_controller);
+
+static int __set_backlight_enable(int enable)
+{
+	int rc;
+
+	if (!backlighter)
+		return -ENODEV;
+	acquire_console_sem();
+	rc = backlighter->set_enable(enable, backlight_level,
+				     backlighter_data);
+	if (!rc)
+		backlight_enabled = enable;
+	release_console_sem();
+	return rc;
+}
+int set_backlight_enable(int enable)
+{
+	if (!backlighter)
+		return -ENODEV;
+	backlight_req_enable = enable;
+	schedule_work(&backlight_work);
+	return 0;
+}
+
+EXPORT_SYMBOL(set_backlight_enable);
+
+int get_backlight_enable(void)
+{
+	if (!backlighter)
+		return -ENODEV;
+	return backlight_enabled;
+}
+EXPORT_SYMBOL(get_backlight_enable);
+
+static int __set_backlight_level(int level)
+{
+	int rc = 0;
+
+	if (!backlighter)
+		return -ENODEV;
+	if (level < BACKLIGHT_MIN)
+		level = BACKLIGHT_OFF;
+	if (level > BACKLIGHT_MAX)
+		level = BACKLIGHT_MAX;
+	acquire_console_sem();
+	if (backlight_enabled)
+		rc = backlighter->set_level(level, backlighter_data);
+	if (!rc)
+		backlight_level = level;
+	release_console_sem();
+	if (!rc && !backlight_autosave) {
+		level <<=1;
+		if (level & 0x10)
+			level |= 0x01;
+		// -- todo: save to property "bklt"
+	}
+	return rc;
+}
+int set_backlight_level(int level)
+{
+	if (!backlighter)
+		return -ENODEV;
+	backlight_req_level = level;
+	schedule_work(&backlight_work);
+	return 0;
+}
+
+EXPORT_SYMBOL(set_backlight_level);
+
+int get_backlight_level(void)
+{
+	if (!backlighter)
+		return -ENODEV;
+	return backlight_level;
+}
+EXPORT_SYMBOL(get_backlight_level);
+
+static void backlight_callback(void *dummy)
+{
+	int level, enable;
+
+	do {
+		level = backlight_req_level;
+		enable = backlight_req_enable;
+		mb();
+
+		if (level >= 0)
+			__set_backlight_level(level);
+		if (enable >= 0)
+			__set_backlight_enable(enable);
+	} while(cmpxchg(&backlight_req_level, level, -1) != level ||
+		cmpxchg(&backlight_req_enable, enable, -1) != enable);
+}
diff --git a/arch/powerpc/platforms/powermac/pmac_cache.S b/arch/powerpc/platforms/powermac/pmac_cache.S
new file mode 100644
index 00000000000..fb977de6b70
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_cache.S
@@ -0,0 +1,359 @@
+/*
+ * This file contains low-level cache management functions
+ * used for sleep and CPU speed changes on Apple machines.
+ * (In fact the only thing that is Apple-specific is that we assume
+ * that we can read from ROM at physical address 0xfff00000.)
+ *
+ *    Copyright (C) 2004 Paul Mackerras (paulus@samba.org) and
+ *                       Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+
+/*
+ * Flush and disable all data caches (dL1, L2, L3). This is used
+ * when going to sleep, when doing a PMU based cpufreq transition,
+ * or when "offlining" a CPU on SMP machines. This code is over
+ * paranoid, but I've had enough issues with various CPU revs and
+ * bugs that I decided it was worth beeing over cautious
+ */
+
+_GLOBAL(flush_disable_caches)
+#ifndef CONFIG_6xx
+	blr
+#else
+BEGIN_FTR_SECTION
+	b	flush_disable_745x
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+BEGIN_FTR_SECTION
+	b	flush_disable_75x
+END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
+	b	__flush_disable_L1
+
+/* This is the code for G3 and 74[01]0 */
+flush_disable_75x:
+	mflr	r10
+
+	/* Turn off EE and DR in MSR */
+	mfmsr	r11
+	rlwinm	r0,r11,0,~MSR_EE
+	rlwinm	r0,r0,0,~MSR_DR
+	sync
+	mtmsr	r0
+	isync
+
+	/* Stop DST streams */
+BEGIN_FTR_SECTION
+	DSSALL
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+	/* Stop DPM */
+	mfspr	r8,SPRN_HID0		/* Save SPRN_HID0 in r8 */
+	rlwinm	r4,r8,0,12,10		/* Turn off HID0[DPM] */
+	sync
+	mtspr	SPRN_HID0,r4		/* Disable DPM */
+	sync
+
+	/* Disp-flush L1. We have a weird problem here that I never
+	 * totally figured out. On 750FX, using the ROM for the flush
+	 * results in a non-working flush. We use that workaround for
+	 * now until I finally understand what's going on. --BenH
+	 */
+
+	/* ROM base by default */
+	lis	r4,0xfff0
+	mfpvr	r3
+	srwi	r3,r3,16
+	cmplwi	cr0,r3,0x7000
+	bne+	1f
+	/* RAM base on 750FX */
+	li	r4,0
+1:	li	r4,0x4000
+	mtctr	r4
+1:	lwz	r0,0(r4)
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+
+	/* Disable / invalidate / enable L1 data */
+	mfspr	r3,SPRN_HID0
+	rlwinm	r3,r3,0,~(HID0_DCE | HID0_ICE)
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+	ori	r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI)
+	sync
+	isync
+	mtspr	SPRN_HID0,r3
+	xori	r3,r3,(HID0_DCI|HID0_ICFI)
+	mtspr	SPRN_HID0,r3
+	sync
+
+	/* Get the current enable bit of the L2CR into r4 */
+	mfspr	r5,SPRN_L2CR
+	/* Set to data-only (pre-745x bit) */
+	oris	r3,r5,L2CR_L2DO@h
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r3
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	/* disp-flush L2. The interesting thing here is that the L2 can be
+	 * up to 2Mb ... so using the ROM, we'll end up wrapping back to memory
+	 * but that is probbaly fine. We disp-flush over 4Mb to be safe
+	 */
+	lis	r4,2
+	mtctr	r4
+	lis	r4,0xfff0
+1:	lwz	r0,0(r4)
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+	lis	r4,2
+	mtctr	r4
+	lis	r4,0xfff0
+1:	dcbf	0,r4
+	addi	r4,r4,32
+	bdnz	1b
+	sync
+	isync
+
+	/* now disable L2 */
+	rlwinm	r5,r5,0,~L2CR_L2E
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r5
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	/* Invalidate L2. This is pre-745x, we clear the L2I bit ourselves */
+	oris	r4,r5,L2CR_L2I@h
+	mtspr	SPRN_L2CR,r4
+	sync
+	isync
+
+	/* Wait for the invalidation to complete */
+1:	mfspr	r3,SPRN_L2CR
+	rlwinm.	r0,r3,0,31,31
+	bne	1b
+
+	/* Clear L2I */
+	xoris	r4,r4,L2CR_L2I@h
+	sync
+	mtspr	SPRN_L2CR,r4
+	sync
+
+	/* now disable the L1 data cache */
+	mfspr	r0,SPRN_HID0
+	rlwinm	r0,r0,0,~(HID0_DCE|HID0_ICE)
+	mtspr	SPRN_HID0,r0
+	sync
+	isync
+
+	/* Restore HID0[DPM] to whatever it was before */
+	sync
+	mfspr	r0,SPRN_HID0
+	rlwimi	r0,r8,0,11,11		/* Turn back HID0[DPM] */
+	mtspr	SPRN_HID0,r0
+	sync
+
+	/* restore DR and EE */
+	sync
+	mtmsr	r11
+	isync
+
+	mtlr	r10
+	blr
+
+/* This code is for 745x processors */
+flush_disable_745x:
+	/* Turn off EE and DR in MSR */
+	mfmsr	r11
+	rlwinm	r0,r11,0,~MSR_EE
+	rlwinm	r0,r0,0,~MSR_DR
+	sync
+	mtmsr	r0
+	isync
+
+	/* Stop prefetch streams */
+	DSSALL
+	sync
+
+	/* Disable L2 prefetching */
+	mfspr	r0,SPRN_MSSCR0
+	rlwinm	r0,r0,0,0,29
+	mtspr	SPRN_MSSCR0,r0
+	sync
+	isync
+	lis	r4,0
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+	dcbf	0,r4
+
+	/* Due to a bug with the HW flush on some CPU revs, we occasionally
+	 * experience data corruption. I'm adding a displacement flush along
+	 * with a dcbf loop over a few Mb to "help". The problem isn't totally
+	 * fixed by this in theory, but at least, in practice, I couldn't reproduce
+	 * it even with a big hammer...
+	 */
+
+        lis     r4,0x0002
+        mtctr   r4
+ 	li      r4,0
+1:
+        lwz     r0,0(r4)
+        addi    r4,r4,32                /* Go to start of next cache line */
+        bdnz    1b
+        isync
+
+        /* Now, flush the first 4MB of memory */
+        lis     r4,0x0002
+        mtctr   r4
+	li      r4,0
+        sync
+1:
+        dcbf    0,r4
+        addi    r4,r4,32                /* Go to start of next cache line */
+        bdnz    1b
+
+	/* Flush and disable the L1 data cache */
+	mfspr	r6,SPRN_LDSTCR
+	lis	r3,0xfff0	/* read from ROM for displacement flush */
+	li	r4,0xfe		/* start with only way 0 unlocked */
+	li	r5,128		/* 128 lines in each way */
+1:	mtctr	r5
+	rlwimi	r6,r4,0,24,31
+	mtspr	SPRN_LDSTCR,r6
+	sync
+	isync
+2:	lwz	r0,0(r3)	/* touch each cache line */
+	addi	r3,r3,32
+	bdnz	2b
+	rlwinm	r4,r4,1,24,30	/* move on to the next way */
+	ori	r4,r4,1
+	cmpwi	r4,0xff		/* all done? */
+	bne	1b
+	/* now unlock the L1 data cache */
+	li	r4,0
+	rlwimi	r6,r4,0,24,31
+	sync
+	mtspr	SPRN_LDSTCR,r6
+	sync
+	isync
+
+	/* Flush the L2 cache using the hardware assist */
+	mfspr	r3,SPRN_L2CR
+	cmpwi	r3,0		/* check if it is enabled first */
+	bge	4f
+	oris	r0,r3,(L2CR_L2IO_745x|L2CR_L2DO_745x)@h
+	b	2f
+	/* When disabling/locking L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r0	/* lock the L2 cache */
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	ori	r0,r3,L2CR_L2HWF_745x
+	sync
+	mtspr	SPRN_L2CR,r0	/* set the hardware flush bit */
+3:	mfspr	r0,SPRN_L2CR	/* wait for it to go to 0 */
+	andi.	r0,r0,L2CR_L2HWF_745x
+	bne	3b
+	sync
+	rlwinm	r3,r3,0,~L2CR_L2E
+	b	2f
+	/* When disabling L2, code must be in L1 */
+	.balign 32
+1:	mtspr	SPRN_L2CR,r3	/* disable the L2 cache */
+3:	sync
+	isync
+	b	1f
+2:	b	3f
+3:	sync
+	isync
+	b	1b
+1:	sync
+	isync
+	oris	r4,r3,L2CR_L2I@h
+	mtspr	SPRN_L2CR,r4
+	sync
+	isync
+1:	mfspr	r4,SPRN_L2CR
+	andis.	r0,r4,L2CR_L2I@h
+	bne	1b
+	sync
+
+BEGIN_FTR_SECTION
+	/* Flush the L3 cache using the hardware assist */
+4:	mfspr	r3,SPRN_L3CR
+	cmpwi	r3,0		/* check if it is enabled */
+	bge	6f
+	oris	r0,r3,L3CR_L3IO@h
+	ori	r0,r0,L3CR_L3DO
+	sync
+	mtspr	SPRN_L3CR,r0	/* lock the L3 cache */
+	sync
+	isync
+	ori	r0,r0,L3CR_L3HWF
+	sync
+	mtspr	SPRN_L3CR,r0	/* set the hardware flush bit */
+5:	mfspr	r0,SPRN_L3CR	/* wait for it to go to zero */
+	andi.	r0,r0,L3CR_L3HWF
+	bne	5b
+	rlwinm	r3,r3,0,~L3CR_L3E
+	sync
+	mtspr	SPRN_L3CR,r3	/* disable the L3 cache */
+	sync
+	ori	r4,r3,L3CR_L3I
+	mtspr	SPRN_L3CR,r4
+1:	mfspr	r4,SPRN_L3CR
+	andi.	r0,r4,L3CR_L3I
+	bne	1b
+	sync
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+
+6:	mfspr	r0,SPRN_HID0	/* now disable the L1 data cache */
+	rlwinm	r0,r0,0,~HID0_DCE
+	mtspr	SPRN_HID0,r0
+	sync
+	isync
+	mtmsr	r11		/* restore DR and EE */
+	isync
+	blr
+#endif	/* CONFIG_6xx */
diff --git a/arch/powerpc/platforms/powermac/pmac_cpufreq.c b/arch/powerpc/platforms/powermac/pmac_cpufreq.c
new file mode 100644
index 00000000000..6d32d99402b
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_cpufreq.c
@@ -0,0 +1,728 @@
+/*
+ *  arch/ppc/platforms/pmac_cpufreq.c
+ *
+ *  Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *  Copyright (C) 2004        John Steele Scott <toojays@toojays.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * TODO: Need a big cleanup here. Basically, we need to have different
+ * cpufreq_driver structures for the different type of HW instead of the
+ * current mess. We also need to better deal with the detection of the
+ * type of machine.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/slab.h>
+#include <linux/cpufreq.h>
+#include <linux/init.h>
+#include <linux/sysdev.h>
+#include <linux/i2c.h>
+#include <linux/hardirq.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/pmac_feature.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/system.h>
+#include <asm/mpic.h>
+#include <asm/keylargo.h>
+
+/* WARNING !!! This will cause calibrate_delay() to be called,
+ * but this is an __init function ! So you MUST go edit
+ * init/main.c to make it non-init before enabling DEBUG_FREQ
+ */
+#undef DEBUG_FREQ
+
+/*
+ * There is a problem with the core cpufreq code on SMP kernels,
+ * it won't recalculate the Bogomips properly
+ */
+#ifdef CONFIG_SMP
+#warning "WARNING, CPUFREQ not recommended on SMP kernels"
+#endif
+
+extern void low_choose_7447a_dfs(int dfs);
+extern void low_choose_750fx_pll(int pll);
+extern void low_sleep_handler(void);
+
+/*
+ * Currently, PowerMac cpufreq supports only high & low frequencies
+ * that are set by the firmware
+ */
+static unsigned int low_freq;
+static unsigned int hi_freq;
+static unsigned int cur_freq;
+static unsigned int sleep_freq;
+
+/*
+ * Different models uses different mecanisms to switch the frequency
+ */
+static int (*set_speed_proc)(int low_speed);
+static unsigned int (*get_speed_proc)(void);
+
+/*
+ * Some definitions used by the various speedprocs
+ */
+static u32 voltage_gpio;
+static u32 frequency_gpio;
+static u32 slew_done_gpio;
+static int no_schedule;
+static int has_cpu_l2lve;
+static int is_pmu_based;
+
+/* There are only two frequency states for each processor. Values
+ * are in kHz for the time being.
+ */
+#define CPUFREQ_HIGH                  0
+#define CPUFREQ_LOW                   1
+
+static struct cpufreq_frequency_table pmac_cpu_freqs[] = {
+	{CPUFREQ_HIGH, 		0},
+	{CPUFREQ_LOW,		0},
+	{0,			CPUFREQ_TABLE_END},
+};
+
+static struct freq_attr* pmac_cpu_freqs_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static inline void local_delay(unsigned long ms)
+{
+	if (no_schedule)
+		mdelay(ms);
+	else
+		msleep(ms);
+}
+
+static inline void wakeup_decrementer(void)
+{
+	set_dec(tb_ticks_per_jiffy);
+	/* No currently-supported powerbook has a 601,
+	 * so use get_tbl, not native
+	 */
+	last_jiffy_stamp(0) = tb_last_stamp = get_tbl();
+}
+
+#ifdef DEBUG_FREQ
+static inline void debug_calc_bogomips(void)
+{
+	/* This will cause a recalc of bogomips and display the
+	 * result. We backup/restore the value to avoid affecting the
+	 * core cpufreq framework's own calculation.
+	 */
+	extern void calibrate_delay(void);
+
+	unsigned long save_lpj = loops_per_jiffy;
+	calibrate_delay();
+	loops_per_jiffy = save_lpj;
+}
+#endif /* DEBUG_FREQ */
+
+/* Switch CPU speed under 750FX CPU control
+ */
+static int cpu_750fx_cpu_speed(int low_speed)
+{
+	u32 hid2;
+
+	if (low_speed == 0) {
+		/* ramping up, set voltage first */
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
+		/* Make sure we sleep for at least 1ms */
+		local_delay(10);
+
+		/* tweak L2 for high voltage */
+		if (has_cpu_l2lve) {
+			hid2 = mfspr(SPRN_HID2);
+			hid2 &= ~0x2000;
+			mtspr(SPRN_HID2, hid2);
+		}
+	}
+#ifdef CONFIG_6xx
+	low_choose_750fx_pll(low_speed);
+#endif
+	if (low_speed == 1) {
+		/* tweak L2 for low voltage */
+		if (has_cpu_l2lve) {
+			hid2 = mfspr(SPRN_HID2);
+			hid2 |= 0x2000;
+			mtspr(SPRN_HID2, hid2);
+		}
+
+		/* ramping down, set voltage last */
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
+		local_delay(10);
+	}
+
+	return 0;
+}
+
+static unsigned int cpu_750fx_get_cpu_speed(void)
+{
+	if (mfspr(SPRN_HID1) & HID1_PS)
+		return low_freq;
+	else
+		return hi_freq;
+}
+
+/* Switch CPU speed using DFS */
+static int dfs_set_cpu_speed(int low_speed)
+{
+	if (low_speed == 0) {
+		/* ramping up, set voltage first */
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
+		/* Make sure we sleep for at least 1ms */
+		local_delay(1);
+	}
+
+	/* set frequency */
+#ifdef CONFIG_6xx
+	low_choose_7447a_dfs(low_speed);
+#endif
+	udelay(100);
+
+	if (low_speed == 1) {
+		/* ramping down, set voltage last */
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
+		local_delay(1);
+	}
+
+	return 0;
+}
+
+static unsigned int dfs_get_cpu_speed(void)
+{
+	if (mfspr(SPRN_HID1) & HID1_DFS)
+		return low_freq;
+	else
+		return hi_freq;
+}
+
+
+/* Switch CPU speed using slewing GPIOs
+ */
+static int gpios_set_cpu_speed(int low_speed)
+{
+	int gpio, timeout = 0;
+
+	/* If ramping up, set voltage first */
+	if (low_speed == 0) {
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
+		/* Delay is way too big but it's ok, we schedule */
+		local_delay(10);
+	}
+
+	/* Set frequency */
+	gpio = 	pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
+	if (low_speed == ((gpio & 0x01) == 0))
+		goto skip;
+
+	pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio,
+			  low_speed ? 0x04 : 0x05);
+	udelay(200);
+	do {
+		if (++timeout > 100)
+			break;
+		local_delay(1);
+		gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0);
+	} while((gpio & 0x02) == 0);
+ skip:
+	/* If ramping down, set voltage last */
+	if (low_speed == 1) {
+		pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
+		/* Delay is way too big but it's ok, we schedule */
+		local_delay(10);
+	}
+
+#ifdef DEBUG_FREQ
+	debug_calc_bogomips();
+#endif
+
+	return 0;
+}
+
+/* Switch CPU speed under PMU control
+ */
+static int pmu_set_cpu_speed(int low_speed)
+{
+	struct adb_request req;
+	unsigned long save_l2cr;
+	unsigned long save_l3cr;
+	unsigned int pic_prio;
+	unsigned long flags;
+
+	preempt_disable();
+
+#ifdef DEBUG_FREQ
+	printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1));
+#endif
+	pmu_suspend();
+
+	/* Disable all interrupt sources on openpic */
+ 	pic_prio = mpic_cpu_get_priority();
+	mpic_cpu_set_priority(0xf);
+
+	/* Make sure the decrementer won't interrupt us */
+	asm volatile("mtdec %0" : : "r" (0x7fffffff));
+	/* Make sure any pending DEC interrupt occuring while we did
+	 * the above didn't re-enable the DEC */
+	mb();
+	asm volatile("mtdec %0" : : "r" (0x7fffffff));
+
+	/* We can now disable MSR_EE */
+	local_irq_save(flags);
+
+	/* Giveup the FPU & vec */
+	enable_kernel_fp();
+
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		enable_kernel_altivec();
+#endif /* CONFIG_ALTIVEC */
+
+	/* Save & disable L2 and L3 caches */
+	save_l3cr = _get_L3CR();	/* (returns -1 if not available) */
+	save_l2cr = _get_L2CR();	/* (returns -1 if not available) */
+
+	/* Send the new speed command. My assumption is that this command
+	 * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep
+	 */
+	pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed);
+	while (!req.complete)
+		pmu_poll();
+
+	/* Prepare the northbridge for the speed transition */
+	pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1);
+
+	/* Call low level code to backup CPU state and recover from
+	 * hardware reset
+	 */
+	low_sleep_handler();
+
+	/* Restore the northbridge */
+	pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0);
+
+	/* Restore L2 cache */
+	if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0)
+ 		_set_L2CR(save_l2cr);
+	/* Restore L3 cache */
+	if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0)
+ 		_set_L3CR(save_l3cr);
+
+	/* Restore userland MMU context */
+	set_context(current->active_mm->context, current->active_mm->pgd);
+
+#ifdef DEBUG_FREQ
+	printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));
+#endif
+
+	/* Restore low level PMU operations */
+	pmu_unlock();
+
+	/* Restore decrementer */
+	wakeup_decrementer();
+
+	/* Restore interrupts */
+ 	mpic_cpu_set_priority(pic_prio);
+
+	/* Let interrupts flow again ... */
+	local_irq_restore(flags);
+
+#ifdef DEBUG_FREQ
+	debug_calc_bogomips();
+#endif
+
+	pmu_resume();
+
+	preempt_enable();
+
+	return 0;
+}
+
+static int do_set_cpu_speed(int speed_mode, int notify)
+{
+	struct cpufreq_freqs freqs;
+	unsigned long l3cr;
+	static unsigned long prev_l3cr;
+
+	freqs.old = cur_freq;
+	freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq;
+	freqs.cpu = smp_processor_id();
+
+	if (freqs.old == freqs.new)
+		return 0;
+
+	if (notify)
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	if (speed_mode == CPUFREQ_LOW &&
+	    cpu_has_feature(CPU_FTR_L3CR)) {
+		l3cr = _get_L3CR();
+		if (l3cr & L3CR_L3E) {
+			prev_l3cr = l3cr;
+			_set_L3CR(0);
+		}
+	}
+	set_speed_proc(speed_mode == CPUFREQ_LOW);
+	if (speed_mode == CPUFREQ_HIGH &&
+	    cpu_has_feature(CPU_FTR_L3CR)) {
+		l3cr = _get_L3CR();
+		if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr)
+			_set_L3CR(prev_l3cr);
+	}
+	if (notify)
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq;
+
+	return 0;
+}
+
+static unsigned int pmac_cpufreq_get_speed(unsigned int cpu)
+{
+	return cur_freq;
+}
+
+static int pmac_cpufreq_verify(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs);
+}
+
+static int pmac_cpufreq_target(	struct cpufreq_policy *policy,
+					unsigned int target_freq,
+					unsigned int relation)
+{
+	unsigned int    newstate = 0;
+
+	if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs,
+			target_freq, relation, &newstate))
+		return -EINVAL;
+
+	return do_set_cpu_speed(newstate, 1);
+}
+
+unsigned int pmac_get_one_cpufreq(int i)
+{
+	/* Supports only one CPU for now */
+	return (i == 0) ? cur_freq : 0;
+}
+
+static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy)
+{
+	if (policy->cpu != 0)
+		return -ENODEV;
+
+	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+	policy->cpuinfo.transition_latency	= CPUFREQ_ETERNAL;
+	policy->cur = cur_freq;
+
+	cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu);
+	return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs);
+}
+
+static u32 read_gpio(struct device_node *np)
+{
+	u32 *reg = (u32 *)get_property(np, "reg", NULL);
+	u32 offset;
+
+	if (reg == NULL)
+		return 0;
+	/* That works for all keylargos but shall be fixed properly
+	 * some day... The problem is that it seems we can't rely
+	 * on the "reg" property of the GPIO nodes, they are either
+	 * relative to the base of KeyLargo or to the base of the
+	 * GPIO space, and the device-tree doesn't help.
+	 */
+	offset = *reg;
+	if (offset < KEYLARGO_GPIO_LEVELS0)
+		offset += KEYLARGO_GPIO_LEVELS0;
+	return offset;
+}
+
+static int pmac_cpufreq_suspend(struct cpufreq_policy *policy, pm_message_t pmsg)
+{
+	/* Ok, this could be made a bit smarter, but let's be robust for now. We
+	 * always force a speed change to high speed before sleep, to make sure
+	 * we have appropriate voltage and/or bus speed for the wakeup process,
+	 * and to make sure our loops_per_jiffies are "good enough", that is will
+	 * not cause too short delays if we sleep in low speed and wake in high
+	 * speed..
+	 */
+	no_schedule = 1;
+	sleep_freq = cur_freq;
+	if (cur_freq == low_freq && !is_pmu_based)
+		do_set_cpu_speed(CPUFREQ_HIGH, 0);
+	return 0;
+}
+
+static int pmac_cpufreq_resume(struct cpufreq_policy *policy)
+{
+	/* If we resume, first check if we have a get() function */
+	if (get_speed_proc)
+		cur_freq = get_speed_proc();
+	else
+		cur_freq = 0;
+
+	/* We don't, hrm... we don't really know our speed here, best
+	 * is that we force a switch to whatever it was, which is
+	 * probably high speed due to our suspend() routine
+	 */
+	do_set_cpu_speed(sleep_freq == low_freq ?
+			 CPUFREQ_LOW : CPUFREQ_HIGH, 0);
+
+	no_schedule = 0;
+	return 0;
+}
+
+static struct cpufreq_driver pmac_cpufreq_driver = {
+	.verify 	= pmac_cpufreq_verify,
+	.target 	= pmac_cpufreq_target,
+	.get		= pmac_cpufreq_get_speed,
+	.init		= pmac_cpufreq_cpu_init,
+	.suspend	= pmac_cpufreq_suspend,
+	.resume		= pmac_cpufreq_resume,
+	.flags		= CPUFREQ_PM_NO_WARN,
+	.attr		= pmac_cpu_freqs_attr,
+	.name		= "powermac",
+	.owner		= THIS_MODULE,
+};
+
+
+static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode)
+{
+	struct device_node *volt_gpio_np = of_find_node_by_name(NULL,
+								"voltage-gpio");
+	struct device_node *freq_gpio_np = of_find_node_by_name(NULL,
+								"frequency-gpio");
+	struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL,
+								     "slewing-done");
+	u32 *value;
+
+	/*
+	 * Check to see if it's GPIO driven or PMU only
+	 *
+	 * The way we extract the GPIO address is slightly hackish, but it
+	 * works well enough for now. We need to abstract the whole GPIO
+	 * stuff sooner or later anyway
+	 */
+
+	if (volt_gpio_np)
+		voltage_gpio = read_gpio(volt_gpio_np);
+	if (freq_gpio_np)
+		frequency_gpio = read_gpio(freq_gpio_np);
+	if (slew_done_gpio_np)
+		slew_done_gpio = read_gpio(slew_done_gpio_np);
+
+	/* If we use the frequency GPIOs, calculate the min/max speeds based
+	 * on the bus frequencies
+	 */
+	if (frequency_gpio && slew_done_gpio) {
+		int lenp, rc;
+		u32 *freqs, *ratio;
+
+		freqs = (u32 *)get_property(cpunode, "bus-frequencies", &lenp);
+		lenp /= sizeof(u32);
+		if (freqs == NULL || lenp != 2) {
+			printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n");
+			return 1;
+		}
+		ratio = (u32 *)get_property(cpunode, "processor-to-bus-ratio*2", NULL);
+		if (ratio == NULL) {
+			printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n");
+			return 1;
+		}
+
+		/* Get the min/max bus frequencies */
+		low_freq = min(freqs[0], freqs[1]);
+		hi_freq = max(freqs[0], freqs[1]);
+
+		/* Grrrr.. It _seems_ that the device-tree is lying on the low bus
+		 * frequency, it claims it to be around 84Mhz on some models while
+		 * it appears to be approx. 101Mhz on all. Let's hack around here...
+		 * fortunately, we don't need to be too precise
+		 */
+		if (low_freq < 98000000)
+			low_freq = 101000000;
+			
+		/* Convert those to CPU core clocks */
+		low_freq = (low_freq * (*ratio)) / 2000;
+		hi_freq = (hi_freq * (*ratio)) / 2000;
+
+		/* Now we get the frequencies, we read the GPIO to see what is out current
+		 * speed
+		 */
+		rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
+		cur_freq = (rc & 0x01) ? hi_freq : low_freq;
+
+		set_speed_proc = gpios_set_cpu_speed;
+		return 1;
+	}
+
+	/* If we use the PMU, look for the min & max frequencies in the
+	 * device-tree
+	 */
+	value = (u32 *)get_property(cpunode, "min-clock-frequency", NULL);
+	if (!value)
+		return 1;
+	low_freq = (*value) / 1000;
+	/* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree
+	 * here */
+	if (low_freq < 100000)
+		low_freq *= 10;
+
+	value = (u32 *)get_property(cpunode, "max-clock-frequency", NULL);
+	if (!value)
+		return 1;
+	hi_freq = (*value) / 1000;
+	set_speed_proc = pmu_set_cpu_speed;
+	is_pmu_based = 1;
+
+	return 0;
+}
+
+static int pmac_cpufreq_init_7447A(struct device_node *cpunode)
+{
+	struct device_node *volt_gpio_np;
+
+	if (get_property(cpunode, "dynamic-power-step", NULL) == NULL)
+		return 1;
+
+	volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
+	if (volt_gpio_np)
+		voltage_gpio = read_gpio(volt_gpio_np);
+	if (!voltage_gpio){
+		printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n");
+		return 1;
+	}
+
+	/* OF only reports the high frequency */
+	hi_freq = cur_freq;
+	low_freq = cur_freq/2;
+
+	/* Read actual frequency from CPU */
+	cur_freq = dfs_get_cpu_speed();
+	set_speed_proc = dfs_set_cpu_speed;
+	get_speed_proc = dfs_get_cpu_speed;
+
+	return 0;
+}
+
+static int pmac_cpufreq_init_750FX(struct device_node *cpunode)
+{
+	struct device_node *volt_gpio_np;
+	u32 pvr, *value;
+
+	if (get_property(cpunode, "dynamic-power-step", NULL) == NULL)
+		return 1;
+
+	hi_freq = cur_freq;
+	value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL);
+	if (!value)
+		return 1;
+	low_freq = (*value) / 1000;
+
+	volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
+	if (volt_gpio_np)
+		voltage_gpio = read_gpio(volt_gpio_np);
+
+	pvr = mfspr(SPRN_PVR);
+	has_cpu_l2lve = !((pvr & 0xf00) == 0x100);
+
+	set_speed_proc = cpu_750fx_cpu_speed;
+	get_speed_proc = cpu_750fx_get_cpu_speed;
+	cur_freq = cpu_750fx_get_cpu_speed();
+
+	return 0;
+}
+
+/* Currently, we support the following machines:
+ *
+ *  - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz)
+ *  - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz)
+ *  - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz)
+ *  - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz)
+ *  - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz)
+ *  - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage)
+ *  - Recent MacRISC3 laptops
+ *  - All new machines with 7447A CPUs
+ */
+static int __init pmac_cpufreq_setup(void)
+{
+	struct device_node	*cpunode;
+	u32			*value;
+
+	if (strstr(cmd_line, "nocpufreq"))
+		return 0;
+
+	/* Assume only one CPU */
+	cpunode = find_type_devices("cpu");
+	if (!cpunode)
+		goto out;
+
+	/* Get current cpu clock freq */
+	value = (u32 *)get_property(cpunode, "clock-frequency", NULL);
+	if (!value)
+		goto out;
+	cur_freq = (*value) / 1000;
+
+	/*  Check for 7447A based MacRISC3 */
+	if (machine_is_compatible("MacRISC3") &&
+	    get_property(cpunode, "dynamic-power-step", NULL) &&
+	    PVR_VER(mfspr(SPRN_PVR)) == 0x8003) {
+		pmac_cpufreq_init_7447A(cpunode);
+	/* Check for other MacRISC3 machines */
+	} else if (machine_is_compatible("PowerBook3,4") ||
+		   machine_is_compatible("PowerBook3,5") ||
+		   machine_is_compatible("MacRISC3")) {
+		pmac_cpufreq_init_MacRISC3(cpunode);
+	/* Else check for iBook2 500/600 */
+	} else if (machine_is_compatible("PowerBook4,1")) {
+		hi_freq = cur_freq;
+		low_freq = 400000;
+		set_speed_proc = pmu_set_cpu_speed;
+		is_pmu_based = 1;
+	}
+	/* Else check for TiPb 400 & 500 */
+	else if (machine_is_compatible("PowerBook3,2")) {
+		/* We only know about the 400 MHz and the 500Mhz model
+		 * they both have 300 MHz as low frequency
+		 */
+		if (cur_freq < 350000 || cur_freq > 550000)
+			goto out;
+		hi_freq = cur_freq;
+		low_freq = 300000;
+		set_speed_proc = pmu_set_cpu_speed;
+		is_pmu_based = 1;
+	}
+	/* Else check for 750FX */
+	else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000)
+		pmac_cpufreq_init_750FX(cpunode);
+out:
+	if (set_speed_proc == NULL)
+		return -ENODEV;
+
+	pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq;
+	pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq;
+
+	printk(KERN_INFO "Registering PowerMac CPU frequency driver\n");
+	printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n",
+	       low_freq/1000, hi_freq/1000, cur_freq/1000);
+
+	return cpufreq_register_driver(&pmac_cpufreq_driver);
+}
+
+module_init(pmac_cpufreq_setup);
+
diff --git a/arch/powerpc/platforms/powermac/pmac_feature.c b/arch/powerpc/platforms/powermac/pmac_feature.c
new file mode 100644
index 00000000000..2cba670c71b
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_feature.c
@@ -0,0 +1,3062 @@
+/*
+ *  arch/ppc/platforms/pmac_feature.c
+ *
+ *  Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au)
+ *                          Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ *  TODO:
+ *
+ *   - Replace mdelay with some schedule loop if possible
+ *   - Shorten some obfuscated delays on some routines (like modem
+ *     power)
+ *   - Refcount some clocks (see darwin)
+ *   - Split split split...
+ *
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <asm/sections.h>
+#include <asm/errno.h>
+#include <asm/ohare.h>
+#include <asm/heathrow.h>
+#include <asm/keylargo.h>
+#include <asm/uninorth.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/dbdma.h>
+#include <asm/pci-bridge.h>
+#include <asm/pmac_low_i2c.h>
+
+#undef DEBUG_FEATURE
+
+#ifdef DEBUG_FEATURE
+#define DBG(fmt...) printk(KERN_DEBUG fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_6xx
+extern int powersave_lowspeed;
+#endif
+
+extern int powersave_nap;
+extern struct device_node *k2_skiplist[2];
+
+
+/*
+ * We use a single global lock to protect accesses. Each driver has
+ * to take care of its own locking
+ */
+static DEFINE_SPINLOCK(feature_lock);
+
+#define LOCK(flags)	spin_lock_irqsave(&feature_lock, flags);
+#define UNLOCK(flags)	spin_unlock_irqrestore(&feature_lock, flags);
+
+
+/*
+ * Instance of some macio stuffs
+ */
+struct macio_chip macio_chips[MAX_MACIO_CHIPS];
+
+struct macio_chip *macio_find(struct device_node *child, int type)
+{
+	while(child) {
+		int	i;
+
+		for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++)
+			if (child == macio_chips[i].of_node &&
+			    (!type || macio_chips[i].type == type))
+				return &macio_chips[i];
+		child = child->parent;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(macio_find);
+
+static const char *macio_names[] =
+{
+	"Unknown",
+	"Grand Central",
+	"OHare",
+	"OHareII",
+	"Heathrow",
+	"Gatwick",
+	"Paddington",
+	"Keylargo",
+	"Pangea",
+	"Intrepid",
+	"K2"
+};
+
+
+
+/*
+ * Uninorth reg. access. Note that Uni-N regs are big endian
+ */
+
+#define UN_REG(r)	(uninorth_base + ((r) >> 2))
+#define UN_IN(r)	(in_be32(UN_REG(r)))
+#define UN_OUT(r,v)	(out_be32(UN_REG(r), (v)))
+#define UN_BIS(r,v)	(UN_OUT((r), UN_IN(r) | (v)))
+#define UN_BIC(r,v)	(UN_OUT((r), UN_IN(r) & ~(v)))
+
+static struct device_node *uninorth_node;
+static u32 __iomem *uninorth_base;
+static u32 uninorth_rev;
+static int uninorth_u3;
+static void __iomem *u3_ht;
+
+/*
+ * For each motherboard family, we have a table of functions pointers
+ * that handle the various features.
+ */
+
+typedef long (*feature_call)(struct device_node *node, long param, long value);
+
+struct feature_table_entry {
+	unsigned int	selector;
+	feature_call	function;
+};
+
+struct pmac_mb_def
+{
+	const char*			model_string;
+	const char*			model_name;
+	int				model_id;
+	struct feature_table_entry*	features;
+	unsigned long			board_flags;
+};
+static struct pmac_mb_def pmac_mb;
+
+/*
+ * Here are the chip specific feature functions
+ */
+
+static inline int simple_feature_tweak(struct device_node *node, int type,
+				       int reg, u32 mask, int value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, type);
+	if (!macio)
+		return -ENODEV;
+	LOCK(flags);
+	if (value)
+		MACIO_BIS(reg, mask);
+	else
+		MACIO_BIC(reg, mask);
+	(void)MACIO_IN32(reg);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+#ifndef CONFIG_POWER4
+
+static long ohare_htw_scc_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		chan_mask;
+	unsigned long		fcr;
+	unsigned long		flags;
+	int			htw, trans;
+	unsigned long		rmask;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (!strcmp(node->name, "ch-a"))
+		chan_mask = MACIO_FLAG_SCCA_ON;
+	else if (!strcmp(node->name, "ch-b"))
+		chan_mask = MACIO_FLAG_SCCB_ON;
+	else
+		return -ENODEV;
+
+	htw = (macio->type == macio_heathrow || macio->type == macio_paddington
+		|| macio->type == macio_gatwick);
+	/* On these machines, the HRW_SCC_TRANS_EN_N bit mustn't be touched */
+	trans = (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+		 pmac_mb.model_id != PMAC_TYPE_YIKES);
+	if (value) {
+#ifdef CONFIG_ADB_PMU
+		if ((param & 0xfff) == PMAC_SCC_IRDA)
+			pmu_enable_irled(1);
+#endif /* CONFIG_ADB_PMU */
+		LOCK(flags);
+		fcr = MACIO_IN32(OHARE_FCR);
+		/* Check if scc cell need enabling */
+		if (!(fcr & OH_SCC_ENABLE)) {
+			fcr |= OH_SCC_ENABLE;
+			if (htw) {
+				/* Side effect: this will also power up the
+				 * modem, but it's too messy to figure out on which
+				 * ports this controls the tranceiver and on which
+				 * it controls the modem
+				 */
+				if (trans)
+					fcr &= ~HRW_SCC_TRANS_EN_N;
+				MACIO_OUT32(OHARE_FCR, fcr);
+				fcr |= (rmask = HRW_RESET_SCC);
+				MACIO_OUT32(OHARE_FCR, fcr);
+			} else {
+				fcr |= (rmask = OH_SCC_RESET);
+				MACIO_OUT32(OHARE_FCR, fcr);
+			}
+			UNLOCK(flags);
+			(void)MACIO_IN32(OHARE_FCR);
+			mdelay(15);
+			LOCK(flags);
+			fcr &= ~rmask;
+			MACIO_OUT32(OHARE_FCR, fcr);
+		}
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr |= OH_SCCA_IO;
+		if (chan_mask & MACIO_FLAG_SCCB_ON)
+			fcr |= OH_SCCB_IO;
+		MACIO_OUT32(OHARE_FCR, fcr);
+		macio->flags |= chan_mask;
+		UNLOCK(flags);
+		if (param & PMAC_SCC_FLAG_XMON)
+			macio->flags |= MACIO_FLAG_SCC_LOCKED;
+	} else {
+		if (macio->flags & MACIO_FLAG_SCC_LOCKED)
+			return -EPERM;
+		LOCK(flags);
+		fcr = MACIO_IN32(OHARE_FCR);
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr &= ~OH_SCCA_IO;
+		if (chan_mask & MACIO_FLAG_SCCB_ON)
+			fcr &= ~OH_SCCB_IO;
+		MACIO_OUT32(OHARE_FCR, fcr);
+		if ((fcr & (OH_SCCA_IO | OH_SCCB_IO)) == 0) {
+			fcr &= ~OH_SCC_ENABLE;
+			if (htw && trans)
+				fcr |= HRW_SCC_TRANS_EN_N;
+			MACIO_OUT32(OHARE_FCR, fcr);
+		}
+		macio->flags &= ~(chan_mask);
+		UNLOCK(flags);
+		mdelay(10);
+#ifdef CONFIG_ADB_PMU
+		if ((param & 0xfff) == PMAC_SCC_IRDA)
+			pmu_enable_irled(0);
+#endif /* CONFIG_ADB_PMU */
+	}
+	return 0;
+}
+
+static long ohare_floppy_enable(struct device_node *node, long param,
+				long value)
+{
+	return simple_feature_tweak(node, macio_ohare,
+		OHARE_FCR, OH_FLOPPY_ENABLE, value);
+}
+
+static long ohare_mesh_enable(struct device_node *node, long param, long value)
+{
+	return simple_feature_tweak(node, macio_ohare,
+		OHARE_FCR, OH_MESH_ENABLE, value);
+}
+
+static long ohare_ide_enable(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	case 0:
+		/* For some reason, setting the bit in set_initial_features()
+		 * doesn't stick. I'm still investigating... --BenH.
+		 */
+		if (value)
+			simple_feature_tweak(node, macio_ohare,
+				OHARE_FCR, OH_IOBUS_ENABLE, 1);
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE0_ENABLE, value);
+	case 1:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_BAY_IDE_ENABLE, value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long ohare_ide_reset(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE0_RESET_N, !value);
+	case 1:
+		return simple_feature_tweak(node, macio_ohare,
+			OHARE_FCR, OH_IDE1_RESET_N, !value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long ohare_sleep_state(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio = &macio_chips[0];
+
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+	if (value == 1) {
+		MACIO_BIC(OHARE_FCR, OH_IOBUS_ENABLE);
+	} else if (value == 0) {
+		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+	}
+
+	return 0;
+}
+
+static long heathrow_modem_enable(struct device_node *node, long param,
+				  long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	macio = macio_find(node, macio_unknown);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1;
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	if (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+	    pmac_mb.model_id != PMAC_TYPE_YIKES) {
+		LOCK(flags);
+		if (value)
+			MACIO_BIC(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+		else
+			MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(250);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1);
+		(void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long heathrow_floppy_enable(struct device_node *node, long param,
+				   long value)
+{
+	return simple_feature_tweak(node, macio_unknown,
+		HEATHROW_FCR,
+		HRW_SWIM_ENABLE|HRW_BAY_FLOPPY_ENABLE,
+		value);
+}
+
+static long heathrow_mesh_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, macio_unknown);
+	if (!macio)
+		return -ENODEV;
+	LOCK(flags);
+	/* Set clear mesh cell enable */
+	if (value)
+		MACIO_BIS(HEATHROW_FCR, HRW_MESH_ENABLE);
+	else
+		MACIO_BIC(HEATHROW_FCR, HRW_MESH_ENABLE);
+	(void)MACIO_IN32(HEATHROW_FCR);
+	udelay(10);
+	/* Set/Clear termination power */
+	if (value)
+		MACIO_BIC(HEATHROW_MBCR, 0x04000000);
+	else
+		MACIO_BIS(HEATHROW_MBCR, 0x04000000);
+	(void)MACIO_IN32(HEATHROW_MBCR);
+	udelay(10);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long heathrow_ide_enable(struct device_node *node, long param,
+				long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE0_ENABLE, value);
+	case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_BAY_IDE_ENABLE, value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long heathrow_ide_reset(struct device_node *node, long param,
+			       long value)
+{
+	switch(param) {
+	case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE0_RESET_N, !value);
+	case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			HEATHROW_FCR, HRW_IDE1_RESET_N, !value);
+	default:
+		return -ENODEV;
+	}
+}
+
+static long heathrow_bmac_enable(struct device_node *node, long param,
+				 long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (value) {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_BMAC_IO_ENABLE);
+		MACIO_BIS(HEATHROW_FCR, HRW_BMAC_RESET);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(10);
+		LOCK(flags);
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_RESET);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+		mdelay(10);
+	} else {
+		LOCK(flags);
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static long heathrow_sound_enable(struct device_node *node, long param,
+				  long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	/* B&W G3 and Yikes don't support that properly (the
+	 * sound appear to never come back after beeing shut down).
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE ||
+	    pmac_mb.model_id == PMAC_TYPE_YIKES)
+		return 0;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (value) {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		UNLOCK(flags);
+		(void)MACIO_IN32(HEATHROW_FCR);
+	} else {
+		LOCK(flags);
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static u32 save_fcr[6];
+static u32 save_mbcr;
+static u32 save_gpio_levels[2];
+static u8 save_gpio_extint[KEYLARGO_GPIO_EXTINT_CNT];
+static u8 save_gpio_normal[KEYLARGO_GPIO_CNT];
+static u32 save_unin_clock_ctl;
+static struct dbdma_regs save_dbdma[13];
+static struct dbdma_regs save_alt_dbdma[13];
+
+static void dbdma_save(struct macio_chip *macio, struct dbdma_regs *save)
+{
+	int i;
+
+	/* Save state & config of DBDMA channels */
+	for (i = 0; i < 13; i++) {
+		volatile struct dbdma_regs __iomem * chan = (void __iomem *)
+			(macio->base + ((0x8000+i*0x100)>>2));
+		save[i].cmdptr_hi = in_le32(&chan->cmdptr_hi);
+		save[i].cmdptr = in_le32(&chan->cmdptr);
+		save[i].intr_sel = in_le32(&chan->intr_sel);
+		save[i].br_sel = in_le32(&chan->br_sel);
+		save[i].wait_sel = in_le32(&chan->wait_sel);
+	}
+}
+
+static void dbdma_restore(struct macio_chip *macio, struct dbdma_regs *save)
+{
+	int i;
+
+	/* Save state & config of DBDMA channels */
+	for (i = 0; i < 13; i++) {
+		volatile struct dbdma_regs __iomem * chan = (void __iomem *)
+			(macio->base + ((0x8000+i*0x100)>>2));
+		out_le32(&chan->control, (ACTIVE|DEAD|WAKE|FLUSH|PAUSE|RUN)<<16);
+		while (in_le32(&chan->status) & ACTIVE)
+			mb();
+		out_le32(&chan->cmdptr_hi, save[i].cmdptr_hi);
+		out_le32(&chan->cmdptr, save[i].cmdptr);
+		out_le32(&chan->intr_sel, save[i].intr_sel);
+		out_le32(&chan->br_sel, save[i].br_sel);
+		out_le32(&chan->wait_sel, save[i].wait_sel);
+	}
+}
+
+static void heathrow_sleep(struct macio_chip *macio, int secondary)
+{
+	if (secondary) {
+		dbdma_save(macio, save_alt_dbdma);
+		save_fcr[2] = MACIO_IN32(0x38);
+		save_fcr[3] = MACIO_IN32(0x3c);
+	} else {
+		dbdma_save(macio, save_dbdma);
+		save_fcr[0] = MACIO_IN32(0x38);
+		save_fcr[1] = MACIO_IN32(0x3c);
+		save_mbcr = MACIO_IN32(0x34);
+		/* Make sure sound is shut down */
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		/* This seems to be necessary as well or the fan
+		 * keeps coming up and battery drains fast */
+		MACIO_BIC(HEATHROW_FCR, HRW_IOBUS_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_IDE0_RESET_N);
+		/* Make sure eth is down even if module or sleep
+		 * won't work properly */
+		MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE | HRW_BMAC_RESET);
+	}
+	/* Make sure modem is shut down */
+	MACIO_OUT8(HRW_GPIO_MODEM_RESET,
+		MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1);
+	MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+	MACIO_BIC(HEATHROW_FCR, OH_SCCA_IO|OH_SCCB_IO|HRW_SCC_ENABLE);
+
+	/* Let things settle */
+	(void)MACIO_IN32(HEATHROW_FCR);
+}
+
+static void heathrow_wakeup(struct macio_chip *macio, int secondary)
+{
+	if (secondary) {
+		MACIO_OUT32(0x38, save_fcr[2]);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x3c, save_fcr[3]);
+		(void)MACIO_IN32(0x38);
+		mdelay(10);
+		dbdma_restore(macio, save_alt_dbdma);
+	} else {
+		MACIO_OUT32(0x38, save_fcr[0] | HRW_IOBUS_ENABLE);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x3c, save_fcr[1]);
+		(void)MACIO_IN32(0x38);
+		mdelay(1);
+		MACIO_OUT32(0x34, save_mbcr);
+		(void)MACIO_IN32(0x38);
+		mdelay(10);
+		dbdma_restore(macio, save_dbdma);
+	}
+}
+
+static long heathrow_sleep_state(struct device_node *node, long param,
+				 long value)
+{
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+	if (value == 1) {
+		if (macio_chips[1].type == macio_gatwick)
+			heathrow_sleep(&macio_chips[0], 1);
+		heathrow_sleep(&macio_chips[0], 0);
+	} else if (value == 0) {
+		heathrow_wakeup(&macio_chips[0], 0);
+		if (macio_chips[1].type == macio_gatwick)
+			heathrow_wakeup(&macio_chips[0], 1);
+	}
+	return 0;
+}
+
+static long core99_scc_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+	unsigned long		chan_mask;
+	u32			fcr;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	if (!strcmp(node->name, "ch-a"))
+		chan_mask = MACIO_FLAG_SCCA_ON;
+	else if (!strcmp(node->name, "ch-b"))
+		chan_mask = MACIO_FLAG_SCCB_ON;
+	else
+		return -ENODEV;
+
+	if (value) {
+		int need_reset_scc = 0;
+		int need_reset_irda = 0;
+
+		LOCK(flags);
+		fcr = MACIO_IN32(KEYLARGO_FCR0);
+		/* Check if scc cell need enabling */
+		if (!(fcr & KL0_SCC_CELL_ENABLE)) {
+			fcr |= KL0_SCC_CELL_ENABLE;
+			need_reset_scc = 1;
+		}
+		if (chan_mask & MACIO_FLAG_SCCA_ON) {
+			fcr |= KL0_SCCA_ENABLE;
+			/* Don't enable line drivers for I2S modem */
+			if ((param & 0xfff) == PMAC_SCC_I2S1)
+				fcr &= ~KL0_SCC_A_INTF_ENABLE;
+			else
+				fcr |= KL0_SCC_A_INTF_ENABLE;
+		}
+		if (chan_mask & MACIO_FLAG_SCCB_ON) {
+			fcr |= KL0_SCCB_ENABLE;
+			/* Perform irda specific inits */
+			if ((param & 0xfff) == PMAC_SCC_IRDA) {
+				fcr &= ~KL0_SCC_B_INTF_ENABLE;
+				fcr |= KL0_IRDA_ENABLE;
+				fcr |= KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE;
+				fcr |= KL0_IRDA_SOURCE1_SEL;
+				fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0);
+				fcr &= ~(KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND);
+				need_reset_irda = 1;
+			} else
+				fcr |= KL0_SCC_B_INTF_ENABLE;
+		}
+		MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		macio->flags |= chan_mask;
+		if (need_reset_scc)  {
+			MACIO_BIS(KEYLARGO_FCR0, KL0_SCC_RESET);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(15);
+			LOCK(flags);
+			MACIO_BIC(KEYLARGO_FCR0, KL0_SCC_RESET);
+		}
+		if (need_reset_irda)  {
+			MACIO_BIS(KEYLARGO_FCR0, KL0_IRDA_RESET);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(15);
+			LOCK(flags);
+			MACIO_BIC(KEYLARGO_FCR0, KL0_IRDA_RESET);
+		}
+		UNLOCK(flags);
+		if (param & PMAC_SCC_FLAG_XMON)
+			macio->flags |= MACIO_FLAG_SCC_LOCKED;
+	} else {
+		if (macio->flags & MACIO_FLAG_SCC_LOCKED)
+			return -EPERM;
+		LOCK(flags);
+		fcr = MACIO_IN32(KEYLARGO_FCR0);
+		if (chan_mask & MACIO_FLAG_SCCA_ON)
+			fcr &= ~KL0_SCCA_ENABLE;
+		if (chan_mask & MACIO_FLAG_SCCB_ON) {
+			fcr &= ~KL0_SCCB_ENABLE;
+			/* Perform irda specific clears */
+			if ((param & 0xfff) == PMAC_SCC_IRDA) {
+				fcr &= ~KL0_IRDA_ENABLE;
+				fcr &= ~(KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE);
+				fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0);
+				fcr &= ~(KL0_IRDA_SOURCE1_SEL|KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND);
+			}
+		}
+		MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		if ((fcr & (KL0_SCCA_ENABLE | KL0_SCCB_ENABLE)) == 0) {
+			fcr &= ~KL0_SCC_CELL_ENABLE;
+			MACIO_OUT32(KEYLARGO_FCR0, fcr);
+		}
+		macio->flags &= ~(chan_mask);
+		UNLOCK(flags);
+		mdelay(10);
+	}
+	return 0;
+}
+
+static long
+core99_modem_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	/* Hack for internal USB modem */
+	if (node == NULL) {
+		if (macio_chips[0].type != macio_keylargo)
+			return -ENODEV;
+		node = macio_chips[0].of_node;
+	}
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+	gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+	gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	LOCK(flags);
+	if (value) {
+		MACIO_BIC(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+		UNLOCK(flags);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		mdelay(250);
+	} else {
+		MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+		UNLOCK(flags);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long
+pangea_modem_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	u8			gpio;
+	unsigned long		flags;
+
+	/* Hack for internal USB modem */
+	if (node == NULL) {
+		if (macio_chips[0].type != macio_pangea &&
+		    macio_chips[0].type != macio_intrepid)
+			return -ENODEV;
+		node = macio_chips[0].of_node;
+	}
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+	gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+	gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+	gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+
+	if (!value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		UNLOCK(flags);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		mdelay(250);
+	}
+	LOCK(flags);
+	if (value) {
+		MACIO_OUT8(KL_GPIO_MODEM_POWER,
+			KEYLARGO_GPIO_OUTPUT_ENABLE);
+		UNLOCK(flags);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		mdelay(250);
+	} else {
+		MACIO_OUT8(KL_GPIO_MODEM_POWER,
+			KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+		UNLOCK(flags);
+	}
+	if (value) {
+		LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250); LOCK(flags);
+		MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+		(void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+		UNLOCK(flags); mdelay(250);
+	}
+	return 0;
+}
+
+static long
+core99_ata100_enable(struct device_node *node, long value)
+{
+	unsigned long flags;
+	struct pci_dev *pdev = NULL;
+	u8 pbus, pid;
+
+	if (uninorth_rev < 0x24)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value)
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+	else
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+	(void)UN_IN(UNI_N_CLOCK_CNTL);
+	UNLOCK(flags);
+	udelay(20);
+
+	if (value) {
+		if (pci_device_from_OF_node(node, &pbus, &pid) == 0)
+			pdev = pci_find_slot(pbus, pid);
+		if (pdev == NULL)
+			return 0;
+		pci_enable_device(pdev);
+		pci_set_master(pdev);
+	}
+	return 0;
+}
+
+static long
+core99_ide_enable(struct device_node *node, long param, long value)
+{
+	/* Bus ID 0 to 2 are KeyLargo based IDE, busID 3 is U2
+	 * based ata-100
+	 */
+	switch(param) {
+	    case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE0_ENABLE, value);
+	    case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE1_ENABLE, value);
+	    case 2:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_UIDE_ENABLE, value);
+	    case 3:
+		return core99_ata100_enable(node, value);
+	    default:
+		return -ENODEV;
+	}
+}
+
+static long
+core99_ide_reset(struct device_node *node, long param, long value)
+{
+	switch(param) {
+	    case 0:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE0_RESET_N, !value);
+	    case 1:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_EIDE1_RESET_N, !value);
+	    case 2:
+		return simple_feature_tweak(node, macio_unknown,
+			KEYLARGO_FCR1, KL1_UIDE_RESET_N, !value);
+	    default:
+		return -ENODEV;
+	}
+}
+
+static long
+core99_gmac_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+
+	LOCK(flags);
+	if (value)
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC);
+	else
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC);
+	(void)UN_IN(UNI_N_CLOCK_CNTL);
+	UNLOCK(flags);
+	udelay(20);
+
+	return 0;
+}
+
+static long
+core99_gmac_phy_reset(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	LOCK(flags);
+	MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(KL_GPIO_ETH_PHY_RESET);
+	UNLOCK(flags);
+	mdelay(10);
+	LOCK(flags);
+	MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, /*KEYLARGO_GPIO_OUTPUT_ENABLE | */
+		KEYLARGO_GPIO_OUTOUT_DATA);
+	UNLOCK(flags);
+	mdelay(10);
+
+	return 0;
+}
+
+static long
+core99_sound_chip_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+
+	/* Do a better probe code, screamer G4 desktops &
+	 * iMacs can do that too, add a recalibrate  in
+	 * the driver as well
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_PISMO ||
+	    pmac_mb.model_id == PMAC_TYPE_TITANIUM) {
+		LOCK(flags);
+		if (value)
+			MACIO_OUT8(KL_GPIO_SOUND_POWER,
+				KEYLARGO_GPIO_OUTPUT_ENABLE |
+				KEYLARGO_GPIO_OUTOUT_DATA);
+		else
+			MACIO_OUT8(KL_GPIO_SOUND_POWER,
+				KEYLARGO_GPIO_OUTPUT_ENABLE);
+		(void)MACIO_IN8(KL_GPIO_SOUND_POWER);
+		UNLOCK(flags);
+	}
+	return 0;
+}
+
+static long
+core99_airport_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip*	macio;
+	unsigned long		flags;
+	int			state;
+
+	macio = macio_find(node, 0);
+	if (!macio)
+		return -ENODEV;
+
+	/* Hint: we allow passing of macio itself for the sake of the
+	 * sleep code
+	 */
+	if (node != macio->of_node &&
+	    (!node->parent || node->parent != macio->of_node))
+		return -ENODEV;
+	state = (macio->flags & MACIO_FLAG_AIRPORT_ON) != 0;
+	if (value == state)
+		return 0;
+	if (value) {
+		/* This code is a reproduction of OF enable-cardslot
+		 * and init-wireless methods, slightly hacked until
+		 * I got it working.
+		 */
+		LOCK(flags);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 5);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xf);
+		UNLOCK(flags);
+		mdelay(10);
+		LOCK(flags);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 4);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xf);
+		UNLOCK(flags);
+
+		mdelay(10);
+
+		LOCK(flags);
+		MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xb, 0);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xb);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xa, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xa);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xd, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xd);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xd, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xd);
+		udelay(10);
+		MACIO_OUT8(KEYLARGO_GPIO_0+0xe, 0x28);
+		(void)MACIO_IN8(KEYLARGO_GPIO_0+0xe);
+		UNLOCK(flags);
+		udelay(10);
+		MACIO_OUT32(0x1c000, 0);
+		mdelay(1);
+		MACIO_OUT8(0x1a3e0, 0x41);
+		(void)MACIO_IN8(0x1a3e0);
+		udelay(10);
+		LOCK(flags);
+		MACIO_BIS(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		UNLOCK(flags);
+		mdelay(100);
+
+		macio->flags |= MACIO_FLAG_AIRPORT_ON;
+	} else {
+		LOCK(flags);
+		MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16);
+		(void)MACIO_IN32(KEYLARGO_FCR2);
+		MACIO_OUT8(KL_GPIO_AIRPORT_0, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_1, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_2, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_3, 0);
+		MACIO_OUT8(KL_GPIO_AIRPORT_4, 0);
+		(void)MACIO_IN8(KL_GPIO_AIRPORT_4);
+		UNLOCK(flags);
+
+		macio->flags &= ~MACIO_FLAG_AIRPORT_ON;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+static long
+core99_reset_cpu(struct device_node *node, long param, long value)
+{
+	unsigned int reset_io = 0;
+	unsigned long flags;
+	struct macio_chip *macio;
+	struct device_node *np;
+	const int dflt_reset_lines[] = {	KL_GPIO_RESET_CPU0,
+						KL_GPIO_RESET_CPU1,
+						KL_GPIO_RESET_CPU2,
+						KL_GPIO_RESET_CPU3 };
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo)
+		return -ENODEV;
+
+	np = find_path_device("/cpus");
+	if (np == NULL)
+		return -ENODEV;
+	for (np = np->child; np != NULL; np = np->sibling) {
+		u32 *num = (u32 *)get_property(np, "reg", NULL);
+		u32 *rst = (u32 *)get_property(np, "soft-reset", NULL);
+		if (num == NULL || rst == NULL)
+			continue;
+		if (param == *num) {
+			reset_io = *rst;
+			break;
+		}
+	}
+	if (np == NULL || reset_io == 0)
+		reset_io = dflt_reset_lines[param];
+
+	LOCK(flags);
+	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(reset_io);
+	udelay(1);
+	MACIO_OUT8(reset_io, 0);
+	(void)MACIO_IN8(reset_io);
+	UNLOCK(flags);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+static long
+core99_usb_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio;
+	unsigned long flags;
+	char *prop;
+	int number;
+	u32 reg;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	prop = (char *)get_property(node, "AAPL,clock-id", NULL);
+	if (!prop)
+		return -ENODEV;
+	if (strncmp(prop, "usb0u048", 8) == 0)
+		number = 0;
+	else if (strncmp(prop, "usb1u148", 8) == 0)
+		number = 2;
+	else if (strncmp(prop, "usb2u248", 8) == 0)
+		number = 4;
+	else
+		return -ENODEV;
+
+	/* Sorry for the brute-force locking, but this is only used during
+	 * sleep and the timing seem to be critical
+	 */
+	LOCK(flags);
+	if (value) {
+		/* Turn ON */
+		if (number == 0) {
+			MACIO_BIC(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			UNLOCK(flags);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE);
+		} else if (number == 2) {
+			MACIO_BIC(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1));
+			UNLOCK(flags);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE);
+		} else if (number == 4) {
+			MACIO_BIC(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1));
+			UNLOCK(flags);
+			(void)MACIO_IN32(KEYLARGO_FCR1);
+			mdelay(1);
+			LOCK(flags);
+			MACIO_BIS(KEYLARGO_FCR1, KL1_USB2_CELL_ENABLE);
+		}
+		if (number < 4) {
+			reg = MACIO_IN32(KEYLARGO_FCR4);
+			reg &=	~(KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) |
+				KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number));
+			reg &=	~(KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) |
+				KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1));
+			MACIO_OUT32(KEYLARGO_FCR4, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR4);
+			udelay(10);
+		} else {
+			reg = MACIO_IN32(KEYLARGO_FCR3);
+			reg &=	~(KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0));
+			reg &=	~(KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1));
+			MACIO_OUT32(KEYLARGO_FCR3, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR3);
+			udelay(10);
+		}
+		if (macio->type == macio_intrepid) {
+			/* wait for clock stopped bits to clear */
+			u32 test0 = 0, test1 = 0;
+			u32 status0, status1;
+			int timeout = 1000;
+
+			UNLOCK(flags);
+			switch (number) {
+			case 0:
+				test0 = UNI_N_CLOCK_STOPPED_USB0;
+				test1 = UNI_N_CLOCK_STOPPED_USB0PCI;
+				break;
+			case 2:
+				test0 = UNI_N_CLOCK_STOPPED_USB1;
+				test1 = UNI_N_CLOCK_STOPPED_USB1PCI;
+				break;
+			case 4:
+				test0 = UNI_N_CLOCK_STOPPED_USB2;
+				test1 = UNI_N_CLOCK_STOPPED_USB2PCI;
+				break;
+			}
+			do {
+				if (--timeout <= 0) {
+					printk(KERN_ERR "core99_usb_enable: "
+					       "Timeout waiting for clocks\n");
+					break;
+				}
+				mdelay(1);
+				status0 = UN_IN(UNI_N_CLOCK_STOP_STATUS0);
+				status1 = UN_IN(UNI_N_CLOCK_STOP_STATUS1);
+			} while ((status0 & test0) | (status1 & test1));
+			LOCK(flags);
+		}
+	} else {
+		/* Turn OFF */
+		if (number < 4) {
+			reg = MACIO_IN32(KEYLARGO_FCR4);
+			reg |=	KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) |
+				KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number);
+			reg |=	KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) |
+				KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1);
+			MACIO_OUT32(KEYLARGO_FCR4, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR4);
+			udelay(1);
+		} else {
+			reg = MACIO_IN32(KEYLARGO_FCR3);
+			reg |=	KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0);
+			reg |=	KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) |
+				KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1);
+			MACIO_OUT32(KEYLARGO_FCR3, reg);
+			(void)MACIO_IN32(KEYLARGO_FCR3);
+			udelay(1);
+		}
+		if (number == 0) {
+			if (macio->type != macio_intrepid)
+				MACIO_BIC(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+		} else if (number == 2) {
+			if (macio->type != macio_intrepid)
+				MACIO_BIC(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE);
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR0);
+		} else if (number == 4) {
+			udelay(1);
+			MACIO_BIS(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1));
+			(void)MACIO_IN32(KEYLARGO_FCR1);
+		}
+		udelay(1);
+	}
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long
+core99_firewire_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+	if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW);
+		(void)UN_IN(UNI_N_CLOCK_CNTL);
+	} else {
+		UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW);
+		(void)UN_IN(UNI_N_CLOCK_CNTL);
+	}
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long
+core99_firewire_cable_power(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+	struct macio_chip *macio;
+
+	/* Trick: we allow NULL node */
+	if ((pmac_mb.board_flags & PMAC_MB_HAS_FW_POWER) == 0)
+		return -ENODEV;
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+	if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 0);
+		MACIO_IN8(KL_GPIO_FW_CABLE_POWER);
+		udelay(10);
+	} else {
+		MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 4);
+		MACIO_IN8(KL_GPIO_FW_CABLE_POWER); udelay(10);
+	}
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long
+intrepid_aack_delay_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+
+	if (uninorth_rev < 0xd2)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (param)
+		UN_BIS(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE);
+	else
+		UN_BIC(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+
+#endif /* CONFIG_POWER4 */
+
+static long
+core99_read_gpio(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+
+	return MACIO_IN8(param);
+}
+
+
+static long
+core99_write_gpio(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+
+	MACIO_OUT8(param, (u8)(value & 0xff));
+	return 0;
+}
+
+#ifdef CONFIG_POWER4
+static long g5_gmac_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+
+	if (node == NULL)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
+		mb();
+		k2_skiplist[0] = NULL;
+	} else {
+		k2_skiplist[0] = node;
+		mb();
+		MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
+	}
+	
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long g5_fw_enable(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+
+	if (node == NULL)
+		return -ENODEV;
+
+	LOCK(flags);
+	if (value) {
+		MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
+		mb();
+		k2_skiplist[1] = NULL;
+	} else {
+		k2_skiplist[1] = node;
+		mb();
+		MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
+	}
+	
+	UNLOCK(flags);
+	mdelay(1);
+
+	return 0;
+}
+
+static long g5_mpic_enable(struct device_node *node, long param, long value)
+{
+	unsigned long flags;
+
+	if (node->parent == NULL || strcmp(node->parent->name, "u3"))
+		return 0;
+
+	LOCK(flags);
+	UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE);
+	UNLOCK(flags);
+
+	return 0;
+}
+
+static long g5_eth_phy_reset(struct device_node *node, long param, long value)
+{
+	struct macio_chip *macio = &macio_chips[0];
+	struct device_node *phy;
+	int need_reset;
+
+	/*
+	 * We must not reset the combo PHYs, only the BCM5221 found in
+	 * the iMac G5.
+	 */
+	phy = of_get_next_child(node, NULL);
+	if (!phy)
+		return -ENODEV;
+	need_reset = device_is_compatible(phy, "B5221");
+	of_node_put(phy);
+	if (!need_reset)
+		return 0;
+
+	/* PHY reset is GPIO 29, not in device-tree unfortunately */
+	MACIO_OUT8(K2_GPIO_EXTINT_0 + 29,
+		   KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+	/* Thankfully, this is now always called at a time when we can
+	 * schedule by sungem.
+	 */
+	msleep(10);
+	MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0);
+
+	return 0;
+}
+
+static long g5_i2s_enable(struct device_node *node, long param, long value)
+{
+	/* Very crude implementation for now */
+	struct macio_chip *macio = &macio_chips[0];
+	unsigned long flags;
+
+	if (value == 0)
+		return 0; /* don't disable yet */
+
+	LOCK(flags);
+	MACIO_BIS(KEYLARGO_FCR3, KL3_CLK45_ENABLE | KL3_CLK49_ENABLE |
+		  KL3_I2S0_CLK18_ENABLE);
+	udelay(10);
+	MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_I2S0_CELL_ENABLE |
+		  K2_FCR1_I2S0_CLK_ENABLE_BIT | K2_FCR1_I2S0_ENABLE);
+	udelay(10);
+	MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_I2S0_RESET);
+	UNLOCK(flags);
+	udelay(10);
+
+	return 0;
+}
+
+
+#ifdef CONFIG_SMP
+static long g5_reset_cpu(struct device_node *node, long param, long value)
+{
+	unsigned int reset_io = 0;
+	unsigned long flags;
+	struct macio_chip *macio;
+	struct device_node *np;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo2)
+		return -ENODEV;
+
+	np = find_path_device("/cpus");
+	if (np == NULL)
+		return -ENODEV;
+	for (np = np->child; np != NULL; np = np->sibling) {
+		u32 *num = (u32 *)get_property(np, "reg", NULL);
+		u32 *rst = (u32 *)get_property(np, "soft-reset", NULL);
+		if (num == NULL || rst == NULL)
+			continue;
+		if (param == *num) {
+			reset_io = *rst;
+			break;
+		}
+	}
+	if (np == NULL || reset_io == 0)
+		return -ENODEV;
+
+	LOCK(flags);
+	MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
+	(void)MACIO_IN8(reset_io);
+	udelay(1);
+	MACIO_OUT8(reset_io, 0);
+	(void)MACIO_IN8(reset_io);
+	UNLOCK(flags);
+
+	return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * This can be called from pmac_smp so isn't static
+ *
+ * This takes the second CPU off the bus on dual CPU machines
+ * running UP
+ */
+void g5_phy_disable_cpu1(void)
+{
+	UN_OUT(U3_API_PHY_CONFIG_1, 0);
+}
+#endif /* CONFIG_POWER4 */
+
+#ifndef CONFIG_POWER4
+
+static void
+keylargo_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	if (sleep_mode) {
+		mdelay(1);
+		MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND);
+		(void)MACIO_IN32(KEYLARGO_FCR0);
+		mdelay(1);
+	}
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+				KL0_SCC_CELL_ENABLE |
+				KL0_IRDA_ENABLE | KL0_IRDA_CLK32_ENABLE |
+				KL0_IRDA_CLK19_ENABLE);
+
+	MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK);
+	MACIO_BIS(KEYLARGO_MBCR, KL_MBCR_MB0_IDE_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
+		KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+		KL1_EIDE0_ENABLE | KL1_EIDE0_RESET_N |
+		KL1_EIDE1_ENABLE | KL1_EIDE1_RESET_N |
+		KL1_UIDE_ENABLE);
+
+	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+	MACIO_BIC(KEYLARGO_FCR2, KL2_IOBUS_ENABLE);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	if (macio->rev >= 2) {
+		temp |= KL3_SHUTDOWN_PLL2X;
+		if (sleep_mode)
+			temp |= KL3_SHUTDOWN_PLL_TOTAL;
+	}
+
+	temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
+		KL3_SHUTDOWN_PLLKW35;
+	if (sleep_mode)
+		temp |= KL3_SHUTDOWN_PLLKW12;
+	temp &= ~(KL3_CLK66_ENABLE | KL3_CLK49_ENABLE | KL3_CLK45_ENABLE
+		| KL3_CLK31_ENABLE | KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void
+pangea_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+				KL0_SCC_CELL_ENABLE |
+				KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
+		KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+		KL1_UIDE_ENABLE);
+	if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+		MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+	MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
+		KL3_SHUTDOWN_PLLKW35;
+	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | KL3_CLK31_ENABLE
+		| KL3_I2S0_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_VIA_CLK16_ENABLE | KL3_TIMER_CLK18_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void
+intrepid_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+	u32 temp;
+
+	MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+		  KL0_SCC_CELL_ENABLE);
+
+	MACIO_BIC(KEYLARGO_FCR1,
+		  /*KL1_USB2_CELL_ENABLE |*/
+		KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+		KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+		KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE);
+	if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+		MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+	temp = MACIO_IN32(KEYLARGO_FCR3);
+	temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE |
+		  KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+	if (sleep_mode)
+		temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_IT_VIA_CLK32_ENABLE);
+	MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+	/* Flush posted writes & wait a bit */
+	(void)MACIO_IN32(KEYLARGO_FCR0);
+	mdelay(10);
+}
+
+
+void pmac_tweak_clock_spreading(int enable)
+{
+	struct macio_chip *macio = &macio_chips[0];
+
+	/* Hack for doing clock spreading on some machines PowerBooks and
+	 * iBooks. This implements the "platform-do-clockspreading" OF
+	 * property as decoded manually on various models. For safety, we also
+	 * check the product ID in the device-tree in cases we'll whack the i2c
+	 * chip to make reasonably sure we won't set wrong values in there
+	 *
+	 * Of course, ultimately, we have to implement a real parser for
+	 * the platform-do-* stuff...
+	 */
+
+	if (macio->type == macio_intrepid) {
+		if (enable)
+			UN_OUT(UNI_N_CLOCK_SPREADING, 2);
+		else
+			UN_OUT(UNI_N_CLOCK_SPREADING, 0);
+		mdelay(40);
+	}
+
+	while (machine_is_compatible("PowerBook5,2") ||
+	       machine_is_compatible("PowerBook5,3") ||
+	       machine_is_compatible("PowerBook6,2") ||
+	       machine_is_compatible("PowerBook6,3")) {
+		struct device_node *ui2c = of_find_node_by_type(NULL, "i2c");
+		struct device_node *dt = of_find_node_by_name(NULL, "device-tree");
+		u8 buffer[9];
+		u32 *productID;
+		int i, rc, changed = 0;
+
+		if (dt == NULL)
+			break;
+		productID = (u32 *)get_property(dt, "pid#", NULL);
+		if (productID == NULL)
+			break;
+		while(ui2c) {
+			struct device_node *p = of_get_parent(ui2c);
+			if (p && !strcmp(p->name, "uni-n"))
+				break;
+			ui2c = of_find_node_by_type(ui2c, "i2c");
+		}
+		if (ui2c == NULL)
+			break;
+		DBG("Trying to bump clock speed for PID: %08x...\n", *productID);
+		rc = pmac_low_i2c_open(ui2c, 1);
+		if (rc != 0)
+			break;
+		pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined);
+		rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9);
+		DBG("read result: %d,", rc);
+		if (rc != 0) {
+			pmac_low_i2c_close(ui2c);
+			break;
+		}
+		for (i=0; i<9; i++)
+			DBG(" %02x", buffer[i]);
+		DBG("\n");
+
+		switch(*productID) {
+		case 0x1182:	/* AlBook 12" rev 2 */
+		case 0x1183:	/* iBook G4 12" */
+			buffer[0] = (buffer[0] & 0x8f) | 0x70;
+			buffer[2] = (buffer[2] & 0x7f) | 0x00;
+			buffer[5] = (buffer[5] & 0x80) | 0x31;
+			buffer[6] = (buffer[6] & 0x40) | 0xb0;
+			buffer[7] = (buffer[7] & 0x00) | (enable ? 0xc0 : 0xba);
+			buffer[8] = (buffer[8] & 0x00) | 0x30;
+			changed = 1;
+			break;
+		case 0x3142:	/* AlBook 15" (ATI M10) */
+		case 0x3143:	/* AlBook 17" (ATI M10) */
+			buffer[0] = (buffer[0] & 0xaf) | 0x50;
+			buffer[2] = (buffer[2] & 0x7f) | 0x00;
+			buffer[5] = (buffer[5] & 0x80) | 0x31;
+			buffer[6] = (buffer[6] & 0x40) | 0xb0;
+			buffer[7] = (buffer[7] & 0x00) | (enable ? 0xd0 : 0xc0);
+			buffer[8] = (buffer[8] & 0x00) | 0x30;
+			changed = 1;
+			break;
+		default:
+			DBG("i2c-hwclock: Machine model not handled\n");
+			break;
+		}
+		if (!changed) {
+			pmac_low_i2c_close(ui2c);
+			break;
+		}
+		pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_stdsub);
+		rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_write, 0x80, buffer, 9);
+		DBG("write result: %d,", rc);
+		pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined);
+		rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9);
+		DBG("read result: %d,", rc);
+		if (rc != 0) {
+			pmac_low_i2c_close(ui2c);
+			break;
+		}
+		for (i=0; i<9; i++)
+			DBG(" %02x", buffer[i]);
+		pmac_low_i2c_close(ui2c);
+		break;
+	}
+}
+
+
+static int
+core99_sleep(void)
+{
+	struct macio_chip *macio;
+	int i;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	/* We power off the wireless slot in case it was not done
+	 * by the driver. We don't power it on automatically however
+	 */
+	if (macio->flags & MACIO_FLAG_AIRPORT_ON)
+		core99_airport_enable(macio->of_node, 0, 0);
+
+	/* We power off the FW cable. Should be done by the driver... */
+	if (macio->flags & MACIO_FLAG_FW_SUPPORTED) {
+		core99_firewire_enable(NULL, 0, 0);
+		core99_firewire_cable_power(NULL, 0, 0);
+	}
+
+	/* We make sure int. modem is off (in case driver lost it) */
+	if (macio->type == macio_keylargo)
+		core99_modem_enable(macio->of_node, 0, 0);
+	else
+		pangea_modem_enable(macio->of_node, 0, 0);
+
+	/* We make sure the sound is off as well */
+	core99_sound_chip_enable(macio->of_node, 0, 0);
+
+	/*
+	 * Save various bits of KeyLargo
+	 */
+
+	/* Save the state of the various GPIOs */
+	save_gpio_levels[0] = MACIO_IN32(KEYLARGO_GPIO_LEVELS0);
+	save_gpio_levels[1] = MACIO_IN32(KEYLARGO_GPIO_LEVELS1);
+	for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++)
+		save_gpio_extint[i] = MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+i);
+	for (i=0; i<KEYLARGO_GPIO_CNT; i++)
+		save_gpio_normal[i] = MACIO_IN8(KEYLARGO_GPIO_0+i);
+
+	/* Save the FCRs */
+	if (macio->type == macio_keylargo)
+		save_mbcr = MACIO_IN32(KEYLARGO_MBCR);
+	save_fcr[0] = MACIO_IN32(KEYLARGO_FCR0);
+	save_fcr[1] = MACIO_IN32(KEYLARGO_FCR1);
+	save_fcr[2] = MACIO_IN32(KEYLARGO_FCR2);
+	save_fcr[3] = MACIO_IN32(KEYLARGO_FCR3);
+	save_fcr[4] = MACIO_IN32(KEYLARGO_FCR4);
+	if (macio->type == macio_pangea || macio->type == macio_intrepid)
+		save_fcr[5] = MACIO_IN32(KEYLARGO_FCR5);
+
+	/* Save state & config of DBDMA channels */
+	dbdma_save(macio, save_dbdma);
+
+	/*
+	 * Turn off as much as we can
+	 */
+	if (macio->type == macio_pangea)
+		pangea_shutdown(macio, 1);
+	else if (macio->type == macio_intrepid)
+		intrepid_shutdown(macio, 1);
+	else if (macio->type == macio_keylargo)
+		keylargo_shutdown(macio, 1);
+
+	/*
+	 * Put the host bridge to sleep
+	 */
+
+	save_unin_clock_ctl = UN_IN(UNI_N_CLOCK_CNTL);
+	/* Note: do not switch GMAC off, driver does it when necessary, WOL must keep it
+	 * enabled !
+	 */
+	UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl &
+	       ~(/*UNI_N_CLOCK_CNTL_GMAC|*/UNI_N_CLOCK_CNTL_FW/*|UNI_N_CLOCK_CNTL_PCI*/));
+	udelay(100);
+	UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+	UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_SLEEP);
+	mdelay(10);
+
+	/*
+	 * FIXME: A bit of black magic with OpenPIC (don't ask me why)
+	 */
+	if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) {
+		MACIO_BIS(0x506e0, 0x00400000);
+		MACIO_BIS(0x506e0, 0x80000000);
+	}
+	return 0;
+}
+
+static int
+core99_wake_up(void)
+{
+	struct macio_chip *macio;
+	int i;
+
+	macio = &macio_chips[0];
+	if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+	    macio->type != macio_intrepid)
+		return -ENODEV;
+
+	/*
+	 * Wakeup the host bridge
+	 */
+	UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+	udelay(10);
+	UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+	udelay(10);
+
+	/*
+	 * Restore KeyLargo
+	 */
+
+	if (macio->type == macio_keylargo) {
+		MACIO_OUT32(KEYLARGO_MBCR, save_mbcr);
+		(void)MACIO_IN32(KEYLARGO_MBCR); udelay(10);
+	}
+	MACIO_OUT32(KEYLARGO_FCR0, save_fcr[0]);
+	(void)MACIO_IN32(KEYLARGO_FCR0); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR1, save_fcr[1]);
+	(void)MACIO_IN32(KEYLARGO_FCR1); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR2, save_fcr[2]);
+	(void)MACIO_IN32(KEYLARGO_FCR2); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR3, save_fcr[3]);
+	(void)MACIO_IN32(KEYLARGO_FCR3); udelay(10);
+	MACIO_OUT32(KEYLARGO_FCR4, save_fcr[4]);
+	(void)MACIO_IN32(KEYLARGO_FCR4); udelay(10);
+	if (macio->type == macio_pangea || macio->type == macio_intrepid) {
+		MACIO_OUT32(KEYLARGO_FCR5, save_fcr[5]);
+		(void)MACIO_IN32(KEYLARGO_FCR5); udelay(10);
+	}
+
+	dbdma_restore(macio, save_dbdma);
+
+	MACIO_OUT32(KEYLARGO_GPIO_LEVELS0, save_gpio_levels[0]);
+	MACIO_OUT32(KEYLARGO_GPIO_LEVELS1, save_gpio_levels[1]);
+	for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++)
+		MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+i, save_gpio_extint[i]);
+	for (i=0; i<KEYLARGO_GPIO_CNT; i++)
+		MACIO_OUT8(KEYLARGO_GPIO_0+i, save_gpio_normal[i]);
+
+	/* FIXME more black magic with OpenPIC ... */
+	if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) {
+		MACIO_BIC(0x506e0, 0x00400000);
+		MACIO_BIC(0x506e0, 0x80000000);
+	}
+
+	UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl);
+	udelay(100);
+
+	return 0;
+}
+
+static long
+core99_sleep_state(struct device_node *node, long param, long value)
+{
+	/* Param == 1 means to enter the "fake sleep" mode that is
+	 * used for CPU speed switch
+	 */
+	if (param == 1) {
+		if (value == 1) {
+			UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+			UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_IDLE2);
+		} else {
+			UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+			udelay(10);
+			UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+			udelay(10);
+		}
+		return 0;
+	}
+	if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+		return -EPERM;
+
+	if (value == 1)
+		return core99_sleep();
+	else if (value == 0)
+		return core99_wake_up();
+	return 0;
+}
+
+#endif /* CONFIG_POWER4 */
+
+static long
+generic_dev_can_wake(struct device_node *node, long param, long value)
+{
+	/* Todo: eventually check we are really dealing with on-board
+	 * video device ...
+	 */
+
+	if (pmac_mb.board_flags & PMAC_MB_MAY_SLEEP)
+		pmac_mb.board_flags |= PMAC_MB_CAN_SLEEP;
+	return 0;
+}
+
+static long generic_get_mb_info(struct device_node *node, long param, long value)
+{
+	switch(param) {
+		case PMAC_MB_INFO_MODEL:
+			return pmac_mb.model_id;
+		case PMAC_MB_INFO_FLAGS:
+			return pmac_mb.board_flags;
+		case PMAC_MB_INFO_NAME:
+			/* hack hack hack... but should work */
+			*((const char **)value) = pmac_mb.model_name;
+			return 0;
+	}
+	return -EINVAL;
+}
+
+
+/*
+ * Table definitions
+ */
+
+/* Used on any machine
+ */
+static struct feature_table_entry any_features[] = {
+	{ PMAC_FTR_GET_MB_INFO,		generic_get_mb_info },
+	{ PMAC_FTR_DEVICE_CAN_WAKE,	generic_dev_can_wake },
+	{ 0, NULL }
+};
+
+#ifndef CONFIG_POWER4
+
+/* OHare based motherboards. Currently, we only use these on the
+ * 2400,3400 and 3500 series powerbooks. Some older desktops seem
+ * to have issues with turning on/off those asic cells
+ */
+static struct feature_table_entry ohare_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	ohare_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		ohare_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		ohare_ide_enable},
+	{ PMAC_FTR_IDE_RESET,		ohare_ide_reset},
+	{ PMAC_FTR_SLEEP_STATE,		ohare_sleep_state },
+	{ 0, NULL }
+};
+
+/* Heathrow desktop machines (Beige G3).
+ * Separated as some features couldn't be properly tested
+ * and the serial port control bits appear to confuse it.
+ */
+static struct feature_table_entry heathrow_desktop_features[] = {
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ 0, NULL }
+};
+
+/* Heathrow based laptop, that is the Wallstreet and mainstreet
+ * powerbooks.
+ */
+static struct feature_table_entry heathrow_laptop_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	heathrow_modem_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	heathrow_sound_enable },
+	{ PMAC_FTR_SLEEP_STATE,		heathrow_sleep_state },
+	{ 0, NULL }
+};
+
+/* Paddington based machines
+ * The lombard (101) powerbook, first iMac models, B&W G3 and Yikes G4.
+ */
+static struct feature_table_entry paddington_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		ohare_htw_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	heathrow_modem_enable },
+	{ PMAC_FTR_SWIM3_ENABLE,	heathrow_floppy_enable },
+	{ PMAC_FTR_MESH_ENABLE,		heathrow_mesh_enable },
+	{ PMAC_FTR_IDE_ENABLE,		heathrow_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		heathrow_ide_reset },
+	{ PMAC_FTR_BMAC_ENABLE,		heathrow_bmac_enable },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	heathrow_sound_enable },
+	{ PMAC_FTR_SLEEP_STATE,		heathrow_sleep_state },
+	{ 0, NULL }
+};
+
+/* Core99 & MacRISC 2 machines (all machines released since the
+ * iBook (included), that is all AGP machines, except pangea
+ * chipset. The pangea chipset is the "combo" UniNorth/KeyLargo
+ * used on iBook2 & iMac "flow power".
+ */
+static struct feature_table_entry core99_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	core99_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		core99_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* RackMac
+ */
+static struct feature_table_entry rackmac_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		core99_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* Pangea features
+ */
+static struct feature_table_entry pangea_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	pangea_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+/* Intrepid features
+ */
+static struct feature_table_entry intrepid_features[] = {
+	{ PMAC_FTR_SCC_ENABLE,		core99_scc_enable },
+	{ PMAC_FTR_MODEM_ENABLE,	pangea_modem_enable },
+	{ PMAC_FTR_IDE_ENABLE,		core99_ide_enable },
+	{ PMAC_FTR_IDE_RESET,		core99_ide_reset },
+	{ PMAC_FTR_GMAC_ENABLE,		core99_gmac_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	core99_gmac_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	core99_sound_chip_enable },
+	{ PMAC_FTR_AIRPORT_ENABLE,	core99_airport_enable },
+	{ PMAC_FTR_USB_ENABLE,		core99_usb_enable },
+	{ PMAC_FTR_1394_ENABLE,		core99_firewire_enable },
+	{ PMAC_FTR_1394_CABLE_POWER,	core99_firewire_cable_power },
+	{ PMAC_FTR_SLEEP_STATE,		core99_sleep_state },
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ PMAC_FTR_AACK_DELAY_ENABLE,	intrepid_aack_delay_enable },
+	{ 0, NULL }
+};
+
+#else /* CONFIG_POWER4 */
+
+/* G5 features
+ */
+static struct feature_table_entry g5_features[] = {
+	{ PMAC_FTR_GMAC_ENABLE,		g5_gmac_enable },
+	{ PMAC_FTR_1394_ENABLE,		g5_fw_enable },
+	{ PMAC_FTR_ENABLE_MPIC,		g5_mpic_enable },
+	{ PMAC_FTR_GMAC_PHY_RESET,	g5_eth_phy_reset },
+	{ PMAC_FTR_SOUND_CHIP_ENABLE,	g5_i2s_enable },
+#ifdef CONFIG_SMP
+	{ PMAC_FTR_RESET_CPU,		g5_reset_cpu },
+#endif /* CONFIG_SMP */
+	{ PMAC_FTR_READ_GPIO,		core99_read_gpio },
+	{ PMAC_FTR_WRITE_GPIO,		core99_write_gpio },
+	{ 0, NULL }
+};
+
+#endif /* CONFIG_POWER4 */
+
+static struct pmac_mb_def pmac_mb_defs[] = {
+#ifndef CONFIG_POWER4
+	/*
+	 * Desktops
+	 */
+
+	{	"AAPL,8500",			"PowerMac 8500/8600",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,9500",			"PowerMac 9500/9600",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7200",			"PowerMac 7200",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7300",			"PowerMac 7200/7300",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,7500",			"PowerMac 7500",
+		PMAC_TYPE_PSURGE,		NULL,
+		0
+	},
+	{	"AAPL,ShinerESB",		"Apple Network Server",
+		PMAC_TYPE_ANS,			NULL,
+		0
+	},
+	{	"AAPL,e407",			"Alchemy",
+		PMAC_TYPE_ALCHEMY,		NULL,
+		0
+	},
+	{	"AAPL,e411",			"Gazelle",
+		PMAC_TYPE_GAZELLE,		NULL,
+		0
+	},
+	{	"AAPL,Gossamer",		"PowerMac G3 (Gossamer)",
+		PMAC_TYPE_GOSSAMER,		heathrow_desktop_features,
+		0
+	},
+	{	"AAPL,PowerMac G3",		"PowerMac G3 (Silk)",
+		PMAC_TYPE_SILK,			heathrow_desktop_features,
+		0
+	},
+	{	"PowerMac1,1",			"Blue&White G3",
+		PMAC_TYPE_YOSEMITE,		paddington_features,
+		0
+	},
+	{	"PowerMac1,2",			"PowerMac G4 PCI Graphics",
+		PMAC_TYPE_YIKES,		paddington_features,
+		0
+	},
+	{	"PowerMac2,1",			"iMac FireWire",
+		PMAC_TYPE_FW_IMAC,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac2,2",			"iMac FireWire",
+		PMAC_TYPE_FW_IMAC,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,1",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,2",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,3",			"PowerMac G4 AGP Graphics",
+		PMAC_TYPE_SAWTOOTH,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac3,4",			"PowerMac G4 Silver",
+		PMAC_TYPE_QUICKSILVER,		core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac3,5",			"PowerMac G4 Silver",
+		PMAC_TYPE_QUICKSILVER,		core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac3,6",			"PowerMac G4 Windtunnel",
+		PMAC_TYPE_WINDTUNNEL,		core99_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac4,1",			"iMac \"Flower Power\"",
+		PMAC_TYPE_PANGEA_IMAC,		pangea_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac4,2",			"Flat panel iMac",
+		PMAC_TYPE_FLAT_PANEL_IMAC,	pangea_features,
+		PMAC_MB_CAN_SLEEP
+	},
+	{	"PowerMac4,4",			"eMac",
+		PMAC_TYPE_EMAC,			core99_features,
+		PMAC_MB_MAY_SLEEP
+	},
+	{	"PowerMac5,1",			"PowerMac G4 Cube",
+		PMAC_TYPE_CUBE,			core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+	},
+	{	"PowerMac6,1",			"Flat panel iMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac6,3",			"Flat panel iMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac6,4",			"eMac",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP,
+	},
+	{	"PowerMac10,1",			"Mac mini",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER,
+	},
+	{	"iMac,1",			"iMac (first generation)",
+		PMAC_TYPE_ORIG_IMAC,		paddington_features,
+		0
+	},
+
+	/*
+	 * Xserve's
+	 */
+
+	{	"RackMac1,1",			"XServe",
+		PMAC_TYPE_RACKMAC,		rackmac_features,
+		0,
+	},
+	{	"RackMac1,2",			"XServe rev. 2",
+		PMAC_TYPE_RACKMAC,		rackmac_features,
+		0,
+	},
+
+	/*
+	 * Laptops
+	 */
+
+	{	"AAPL,3400/2400",		"PowerBook 3400",
+		PMAC_TYPE_HOOPER,		ohare_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"AAPL,3500",			"PowerBook 3500",
+		PMAC_TYPE_KANGA,		ohare_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"AAPL,PowerBook1998",		"PowerBook Wallstreet",
+		PMAC_TYPE_WALLSTREET,		heathrow_laptop_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"PowerBook1,1",			"PowerBook 101 (Lombard)",
+		PMAC_TYPE_101_PBOOK,		paddington_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+	},
+	{	"PowerBook2,1",			"iBook (first generation)",
+		PMAC_TYPE_ORIG_IBOOK,		core99_features,
+		PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook2,2",			"iBook FireWire",
+		PMAC_TYPE_FW_IBOOK,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER |
+		PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,1",			"PowerBook Pismo",
+		PMAC_TYPE_PISMO,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER |
+		PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,2",			"PowerBook Titanium",
+		PMAC_TYPE_TITANIUM,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,3",			"PowerBook Titanium II",
+		PMAC_TYPE_TITANIUM2,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,4",			"PowerBook Titanium III",
+		PMAC_TYPE_TITANIUM3,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook3,5",			"PowerBook Titanium IV",
+		PMAC_TYPE_TITANIUM4,		core99_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,1",			"iBook 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,2",			"iBook 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook4,3",			"iBook 2 rev. 2",
+		PMAC_TYPE_IBOOK2,		pangea_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+	},
+	{	"PowerBook5,1",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,2",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,3",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,4",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,5",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,6",			"PowerBook G4 15\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook5,7",			"PowerBook G4 17\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,1",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,2",			"PowerBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,3",			"iBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,4",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,5",			"iBook G4",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+	{	"PowerBook6,8",			"PowerBook G4 12\"",
+		PMAC_TYPE_UNKNOWN_INTREPID,	intrepid_features,
+		PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+	},
+#else /* CONFIG_POWER4 */
+	{	"PowerMac7,2",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5,		g5_features,
+		0,
+	},
+#ifdef CONFIG_PPC64
+	{	"PowerMac7,3",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5,		g5_features,
+		0,
+	},
+	{	"PowerMac8,1",			"iMac G5",
+		PMAC_TYPE_IMAC_G5,		g5_features,
+		0,
+	},
+	{	"PowerMac9,1",			"PowerMac G5",
+		PMAC_TYPE_POWERMAC_G5_U3L,	g5_features,
+		0,
+	},
+	{       "RackMac3,1",                   "XServe G5",
+		PMAC_TYPE_XSERVE_G5,		g5_features,
+		0,
+	},
+#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_POWER4 */
+};
+
+/*
+ * The toplevel feature_call callback
+ */
+long pmac_do_feature_call(unsigned int selector, ...)
+{
+	struct device_node *node;
+	long param, value;
+	int i;
+	feature_call func = NULL;
+	va_list args;
+
+	if (pmac_mb.features)
+		for (i=0; pmac_mb.features[i].function; i++)
+			if (pmac_mb.features[i].selector == selector) {
+				func = pmac_mb.features[i].function;
+				break;
+			}
+	if (!func)
+		for (i=0; any_features[i].function; i++)
+			if (any_features[i].selector == selector) {
+				func = any_features[i].function;
+				break;
+			}
+	if (!func)
+		return -ENODEV;
+
+	va_start(args, selector);
+	node = (struct device_node*)va_arg(args, void*);
+	param = va_arg(args, long);
+	value = va_arg(args, long);
+	va_end(args);
+
+	return func(node, param, value);
+}
+
+static int __init probe_motherboard(void)
+{
+	int i;
+	struct macio_chip *macio = &macio_chips[0];
+	const char *model = NULL;
+	struct device_node *dt;
+
+	/* Lookup known motherboard type in device-tree. First try an
+	 * exact match on the "model" property, then try a "compatible"
+	 * match is none is found.
+	 */
+	dt = find_devices("device-tree");
+	if (dt != NULL)
+		model = (const char *) get_property(dt, "model", NULL);
+	for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) {
+	    if (strcmp(model, pmac_mb_defs[i].model_string) == 0) {
+		pmac_mb = pmac_mb_defs[i];
+		goto found;
+	    }
+	}
+	for(i=0; i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) {
+	    if (machine_is_compatible(pmac_mb_defs[i].model_string)) {
+		pmac_mb = pmac_mb_defs[i];
+		goto found;
+	    }
+	}
+
+	/* Fallback to selection depending on mac-io chip type */
+	switch(macio->type) {
+#ifndef CONFIG_POWER4
+	    case macio_grand_central:
+		pmac_mb.model_id = PMAC_TYPE_PSURGE;
+		pmac_mb.model_name = "Unknown PowerSurge";
+		break;
+	    case macio_ohare:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_OHARE;
+		pmac_mb.model_name = "Unknown OHare-based";
+		break;
+	    case macio_heathrow:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_HEATHROW;
+		pmac_mb.model_name = "Unknown Heathrow-based";
+		pmac_mb.features = heathrow_desktop_features;
+		break;
+	    case macio_paddington:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PADDINGTON;
+		pmac_mb.model_name = "Unknown Paddington-based";
+		pmac_mb.features = paddington_features;
+		break;
+	    case macio_keylargo:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_CORE99;
+		pmac_mb.model_name = "Unknown Keylargo-based";
+		pmac_mb.features = core99_features;
+		break;
+	    case macio_pangea:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PANGEA;
+		pmac_mb.model_name = "Unknown Pangea-based";
+		pmac_mb.features = pangea_features;
+		break;
+	    case macio_intrepid:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_INTREPID;
+		pmac_mb.model_name = "Unknown Intrepid-based";
+		pmac_mb.features = intrepid_features;
+		break;
+#else /* CONFIG_POWER4 */
+	case macio_keylargo2:
+		pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2;
+		pmac_mb.model_name = "Unknown K2-based";
+		pmac_mb.features = g5_features;
+		break;
+#endif /* CONFIG_POWER4 */
+	default:
+		return -ENODEV;
+	}
+found:
+#ifndef CONFIG_POWER4
+	/* Fixup Hooper vs. Comet */
+	if (pmac_mb.model_id == PMAC_TYPE_HOOPER) {
+		u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4);
+		if (!mach_id_ptr)
+			return -ENODEV;
+		/* Here, I used to disable the media-bay on comet. It
+		 * appears this is wrong, the floppy connector is actually
+		 * a kind of media-bay and works with the current driver.
+		 */
+		if (__raw_readl(mach_id_ptr) & 0x20000000UL)
+			pmac_mb.model_id = PMAC_TYPE_COMET;
+		iounmap(mach_id_ptr);
+	}
+#endif /* CONFIG_POWER4 */
+
+#ifdef CONFIG_6xx
+	/* Set default value of powersave_nap on machines that support it.
+	 * It appears that uninorth rev 3 has a problem with it, we don't
+	 * enable it on those. In theory, the flush-on-lock property is
+	 * supposed to be set when not supported, but I'm not very confident
+	 * that all Apple OF revs did it properly, I do it the paranoid way.
+	 */
+	while (uninorth_base && uninorth_rev > 3) {
+		struct device_node *np = find_path_device("/cpus");
+		if (!np || !np->child) {
+			printk(KERN_WARNING "Can't find CPU(s) in device tree !\n");
+			break;
+		}
+		np = np->child;
+		/* Nap mode not supported on SMP */
+		if (np->sibling)
+			break;
+		/* Nap mode not supported if flush-on-lock property is present */
+		if (get_property(np, "flush-on-lock", NULL))
+			break;
+		powersave_nap = 1;
+		printk(KERN_INFO "Processor NAP mode on idle enabled.\n");
+		break;
+	}
+
+	/* On CPUs that support it (750FX), lowspeed by default during
+	 * NAP mode
+	 */
+	powersave_lowspeed = 1;
+#endif /* CONFIG_6xx */
+#ifdef CONFIG_POWER4
+	powersave_nap = 1;
+#endif
+	/* Check for "mobile" machine */
+	if (model && (strncmp(model, "PowerBook", 9) == 0
+		   || strncmp(model, "iBook", 5) == 0))
+		pmac_mb.board_flags |= PMAC_MB_MOBILE;
+
+
+	printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name);
+	return 0;
+}
+
+/* Initialize the Core99 UniNorth host bridge and memory controller
+ */
+static void __init probe_uninorth(void)
+{
+	unsigned long actrl;
+
+	/* Locate core99 Uni-N */
+	uninorth_node = of_find_node_by_name(NULL, "uni-n");
+	/* Locate G5 u3 */
+	if (uninorth_node == NULL) {
+		uninorth_node = of_find_node_by_name(NULL, "u3");
+		uninorth_u3 = 1;
+	}
+	if (uninorth_node && uninorth_node->n_addrs > 0) {
+		unsigned long address = uninorth_node->addrs[0].address;
+		uninorth_base = ioremap(address, 0x40000);
+		uninorth_rev = in_be32(UN_REG(UNI_N_VERSION));
+		if (uninorth_u3)
+			u3_ht = ioremap(address + U3_HT_CONFIG_BASE, 0x1000);
+	} else
+		uninorth_node = NULL;
+
+	if (!uninorth_node)
+		return;
+
+	printk(KERN_INFO "Found %s memory controller & host bridge, revision: %d\n",
+	       uninorth_u3 ? "U3" : "UniNorth", uninorth_rev);
+	printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base);
+
+	/* Set the arbitrer QAck delay according to what Apple does
+	 */
+	if (uninorth_rev < 0x11) {
+		actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK;
+		actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 :
+			UNI_N_ARB_CTRL_QACK_DELAY) << UNI_N_ARB_CTRL_QACK_DELAY_SHIFT;
+		UN_OUT(UNI_N_ARB_CTRL, actrl);
+	}
+
+	/* Some more magic as done by them in recent MacOS X on UniNorth
+	 * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI
+	 * memory timeout
+	 */
+	if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) || uninorth_rev == 0xc0)
+		UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff);
+}
+
+static void __init probe_one_macio(const char *name, const char *compat, int type)
+{
+	struct device_node*	node;
+	int			i;
+	volatile u32 __iomem *	base;
+	u32*			revp;
+
+	node = find_devices(name);
+	if (!node || !node->n_addrs)
+		return;
+	if (compat)
+		do {
+			if (device_is_compatible(node, compat))
+				break;
+			node = node->next;
+		} while (node);
+	if (!node)
+		return;
+	for(i=0; i<MAX_MACIO_CHIPS; i++) {
+		if (!macio_chips[i].of_node)
+			break;
+		if (macio_chips[i].of_node == node)
+			return;
+	}
+	if (i >= MAX_MACIO_CHIPS) {
+		printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n");
+		printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name);
+		return;
+	}
+	base = ioremap(node->addrs[0].address, node->addrs[0].size);
+	if (!base) {
+		printk(KERN_ERR "pmac_feature: Can't map mac-io chip !\n");
+		return;
+	}
+	if (type == macio_keylargo) {
+		u32 *did = (u32 *)get_property(node, "device-id", NULL);
+		if (*did == 0x00000025)
+			type = macio_pangea;
+		if (*did == 0x0000003e)
+			type = macio_intrepid;
+	}
+	macio_chips[i].of_node	= node;
+	macio_chips[i].type	= type;
+	macio_chips[i].base	= base;
+	macio_chips[i].flags	= MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON;
+	macio_chips[i].name	= macio_names[type];
+	revp = (u32 *)get_property(node, "revision-id", NULL);
+	if (revp)
+		macio_chips[i].rev = *revp;
+	printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n",
+		macio_names[type], macio_chips[i].rev, macio_chips[i].base);
+}
+
+static int __init
+probe_macios(void)
+{
+	/* Warning, ordering is important */
+	probe_one_macio("gc", NULL, macio_grand_central);
+	probe_one_macio("ohare", NULL, macio_ohare);
+	probe_one_macio("pci106b,7", NULL, macio_ohareII);
+	probe_one_macio("mac-io", "keylargo", macio_keylargo);
+	probe_one_macio("mac-io", "paddington", macio_paddington);
+	probe_one_macio("mac-io", "gatwick", macio_gatwick);
+	probe_one_macio("mac-io", "heathrow", macio_heathrow);
+	probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2);
+
+	/* Make sure the "main" macio chip appear first */
+	if (macio_chips[0].type == macio_gatwick
+	    && macio_chips[1].type == macio_heathrow) {
+		struct macio_chip temp = macio_chips[0];
+		macio_chips[0] = macio_chips[1];
+		macio_chips[1] = temp;
+	}
+	if (macio_chips[0].type == macio_ohareII
+	    && macio_chips[1].type == macio_ohare) {
+		struct macio_chip temp = macio_chips[0];
+		macio_chips[0] = macio_chips[1];
+		macio_chips[1] = temp;
+	}
+	macio_chips[0].lbus.index = 0;
+	macio_chips[1].lbus.index = 1;
+
+	return (macio_chips[0].of_node == NULL) ? -ENODEV : 0;
+}
+
+static void __init
+initial_serial_shutdown(struct device_node *np)
+{
+	int len;
+	struct slot_names_prop {
+		int	count;
+		char	name[1];
+	} *slots;
+	char *conn;
+	int port_type = PMAC_SCC_ASYNC;
+	int modem = 0;
+
+	slots = (struct slot_names_prop *)get_property(np, "slot-names", &len);
+	conn = get_property(np, "AAPL,connector", &len);
+	if (conn && (strcmp(conn, "infrared") == 0))
+		port_type = PMAC_SCC_IRDA;
+	else if (device_is_compatible(np, "cobalt"))
+		modem = 1;
+	else if (slots && slots->count > 0) {
+		if (strcmp(slots->name, "IrDA") == 0)
+			port_type = PMAC_SCC_IRDA;
+		else if (strcmp(slots->name, "Modem") == 0)
+			modem = 1;
+	}
+	if (modem)
+		pmac_call_feature(PMAC_FTR_MODEM_ENABLE, np, 0, 0);
+	pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, port_type, 0);
+}
+
+static void __init
+set_initial_features(void)
+{
+	struct device_node *np;
+
+	/* That hack appears to be necessary for some StarMax motherboards
+	 * but I'm not too sure it was audited for side-effects on other
+	 * ohare based machines...
+	 * Since I still have difficulties figuring the right way to
+	 * differenciate them all and since that hack was there for a long
+	 * time, I'll keep it around
+	 */
+	if (macio_chips[0].type == macio_ohare && !find_devices("via-pmu")) {
+		struct macio_chip *macio = &macio_chips[0];
+		MACIO_OUT32(OHARE_FCR, STARMAX_FEATURES);
+	} else if (macio_chips[0].type == macio_ohare) {
+		struct macio_chip *macio = &macio_chips[0];
+		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+	} else if (macio_chips[1].type == macio_ohare) {
+		struct macio_chip *macio = &macio_chips[1];
+		MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+	}
+
+#ifdef CONFIG_POWER4
+	if (macio_chips[0].type == macio_keylargo2) {
+#ifndef CONFIG_SMP
+		/* On SMP machines running UP, we have the second CPU eating
+		 * bus cycles. We need to take it off the bus. This is done
+		 * from pmac_smp for SMP kernels running on one CPU
+		 */
+		np = of_find_node_by_type(NULL, "cpu");
+		if (np != NULL)
+			np = of_find_node_by_type(np, "cpu");
+		if (np != NULL) {
+			g5_phy_disable_cpu1();
+			of_node_put(np);
+		}
+#endif /* CONFIG_SMP */
+		/* Enable GMAC for now for PCI probing. It will be disabled
+		 * later on after PCI probe
+		 */
+		np = of_find_node_by_name(NULL, "ethernet");
+		while(np) {
+			if (device_is_compatible(np, "K2-GMAC"))
+				g5_gmac_enable(np, 0, 1);
+			np = of_find_node_by_name(np, "ethernet");
+		}
+
+		/* Enable FW before PCI probe. Will be disabled later on
+		 * Note: We should have a batter way to check that we are
+		 * dealing with uninorth internal cell and not a PCI cell
+		 * on the external PCI. The code below works though.
+		 */
+		np = of_find_node_by_name(NULL, "firewire");
+		while(np) {
+			if (device_is_compatible(np, "pci106b,5811")) {
+				macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
+				g5_fw_enable(np, 0, 1);
+			}
+			np = of_find_node_by_name(np, "firewire");
+		}
+	}
+#else /* CONFIG_POWER4 */
+
+	if (macio_chips[0].type == macio_keylargo ||
+	    macio_chips[0].type == macio_pangea ||
+	    macio_chips[0].type == macio_intrepid) {
+		/* Enable GMAC for now for PCI probing. It will be disabled
+		 * later on after PCI probe
+		 */
+		np = of_find_node_by_name(NULL, "ethernet");
+		while(np) {
+			if (np->parent
+			    && device_is_compatible(np->parent, "uni-north")
+			    && device_is_compatible(np, "gmac"))
+				core99_gmac_enable(np, 0, 1);
+			np = of_find_node_by_name(np, "ethernet");
+		}
+
+		/* Enable FW before PCI probe. Will be disabled later on
+		 * Note: We should have a batter way to check that we are
+		 * dealing with uninorth internal cell and not a PCI cell
+		 * on the external PCI. The code below works though.
+		 */
+		np = of_find_node_by_name(NULL, "firewire");
+		while(np) {
+			if (np->parent
+			    && device_is_compatible(np->parent, "uni-north")
+			    && (device_is_compatible(np, "pci106b,18") ||
+			        device_is_compatible(np, "pci106b,30") ||
+			        device_is_compatible(np, "pci11c1,5811"))) {
+				macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
+				core99_firewire_enable(np, 0, 1);
+			}
+			np = of_find_node_by_name(np, "firewire");
+		}
+
+		/* Enable ATA-100 before PCI probe. */
+		np = of_find_node_by_name(NULL, "ata-6");
+		while(np) {
+			if (np->parent
+			    && device_is_compatible(np->parent, "uni-north")
+			    && device_is_compatible(np, "kauai-ata")) {
+				core99_ata100_enable(np, 1);
+			}
+			np = of_find_node_by_name(np, "ata-6");
+		}
+
+		/* Switch airport off */
+		np = find_devices("radio");
+		while(np) {
+			if (np && np->parent == macio_chips[0].of_node) {
+				macio_chips[0].flags |= MACIO_FLAG_AIRPORT_ON;
+				core99_airport_enable(np, 0, 0);
+			}
+			np = np->next;
+		}
+	}
+
+	/* On all machines that support sound PM, switch sound off */
+	if (macio_chips[0].of_node)
+		pmac_do_feature_call(PMAC_FTR_SOUND_CHIP_ENABLE,
+			macio_chips[0].of_node, 0, 0);
+
+	/* While on some desktop G3s, we turn it back on */
+	if (macio_chips[0].of_node && macio_chips[0].type == macio_heathrow
+		&& (pmac_mb.model_id == PMAC_TYPE_GOSSAMER ||
+		    pmac_mb.model_id == PMAC_TYPE_SILK)) {
+		struct macio_chip *macio = &macio_chips[0];
+		MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+		MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
+	}
+
+	/* Some machine models need the clock chip to be properly setup for
+	 * clock spreading now. This should be a platform function but we
+	 * don't do these at the moment
+	 */
+	pmac_tweak_clock_spreading(1);
+
+#endif /* CONFIG_POWER4 */
+
+	/* On all machines, switch modem & serial ports off */
+	np = find_devices("ch-a");
+	while(np) {
+		initial_serial_shutdown(np);
+		np = np->next;
+	}
+	np = find_devices("ch-b");
+	while(np) {
+		initial_serial_shutdown(np);
+		np = np->next;
+	}
+}
+
+void __init
+pmac_feature_init(void)
+{
+	/* Detect the UniNorth memory controller */
+	probe_uninorth();
+
+	/* Probe mac-io controllers */
+	if (probe_macios()) {
+		printk(KERN_WARNING "No mac-io chip found\n");
+		return;
+	}
+
+	/* Setup low-level i2c stuffs */
+	pmac_init_low_i2c();
+
+	/* Probe machine type */
+	if (probe_motherboard())
+		printk(KERN_WARNING "Unknown PowerMac !\n");
+
+	/* Set some initial features (turn off some chips that will
+	 * be later turned on)
+	 */
+	set_initial_features();
+}
+
+int __init pmac_feature_late_init(void)
+{
+#if 0
+	struct device_node *np;
+
+	/* Request some resources late */
+	if (uninorth_node)
+		request_OF_resource(uninorth_node, 0, NULL);
+	np = find_devices("hammerhead");
+	if (np)
+		request_OF_resource(np, 0, NULL);
+	np = find_devices("interrupt-controller");
+	if (np)
+		request_OF_resource(np, 0, NULL);
+#endif
+	return 0;
+}
+
+device_initcall(pmac_feature_late_init);
+
+#if 0
+static void dump_HT_speeds(char *name, u32 cfg, u32 frq)
+{
+	int	freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 };
+	int	bits[8] = { 8,16,0,32,2,4,0,0 };
+	int	freq = (frq >> 8) & 0xf;
+
+	if (freqs[freq] == 0)
+		printk("%s: Unknown HT link frequency %x\n", name, freq);
+	else
+		printk("%s: %d MHz on main link, (%d in / %d out) bits width\n",
+		       name, freqs[freq],
+		       bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]);
+}
+
+void __init pmac_check_ht_link(void)
+{
+#if 0 /* Disabled for now */
+	u32	ufreq, freq, ucfg, cfg;
+	struct device_node *pcix_node;
+	u8	px_bus, px_devfn;
+	struct pci_controller *px_hose;
+
+	(void)in_be32(u3_ht + U3_HT_LINK_COMMAND);
+	ucfg = cfg = in_be32(u3_ht + U3_HT_LINK_CONFIG);
+	ufreq = freq = in_be32(u3_ht + U3_HT_LINK_FREQ);
+	dump_HT_speeds("U3 HyperTransport", cfg, freq);
+
+	pcix_node = of_find_compatible_node(NULL, "pci", "pci-x");
+	if (pcix_node == NULL) {
+		printk("No PCI-X bridge found\n");
+		return;
+	}
+	if (pci_device_from_OF_node(pcix_node, &px_bus, &px_devfn) != 0) {
+		printk("PCI-X bridge found but not matched to pci\n");
+		return;
+	}
+	px_hose = pci_find_hose_for_OF_device(pcix_node);
+	if (px_hose == NULL) {
+		printk("PCI-X bridge found but not matched to host\n");
+		return;
+	}	
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq);
+	dump_HT_speeds("PCI-X HT Uplink", cfg, freq);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg);
+	early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq);
+	dump_HT_speeds("PCI-X HT Downlink", cfg, freq);
+#endif
+}
+
+#endif /* CONFIG_POWER4 */
+
+/*
+ * Early video resume hook
+ */
+
+static void (*pmac_early_vresume_proc)(void *data);
+static void *pmac_early_vresume_data;
+
+void pmac_set_early_video_resume(void (*proc)(void *data), void *data)
+{
+	if (_machine != _MACH_Pmac)
+		return;
+	preempt_disable();
+	pmac_early_vresume_proc = proc;
+	pmac_early_vresume_data = data;
+	preempt_enable();
+}
+EXPORT_SYMBOL(pmac_set_early_video_resume);
+
+void pmac_call_early_video_resume(void)
+{
+	if (pmac_early_vresume_proc)
+		pmac_early_vresume_proc(pmac_early_vresume_data);
+}
+
+/*
+ * AGP related suspend/resume code
+ */
+
+static struct pci_dev *pmac_agp_bridge;
+static int (*pmac_agp_suspend)(struct pci_dev *bridge);
+static int (*pmac_agp_resume)(struct pci_dev *bridge);
+
+void pmac_register_agp_pm(struct pci_dev *bridge,
+				 int (*suspend)(struct pci_dev *bridge),
+				 int (*resume)(struct pci_dev *bridge))
+{
+	if (suspend || resume) {
+		pmac_agp_bridge = bridge;
+		pmac_agp_suspend = suspend;
+		pmac_agp_resume = resume;
+		return;
+	}
+	if (bridge != pmac_agp_bridge)
+		return;
+	pmac_agp_suspend = pmac_agp_resume = NULL;
+	return;
+}
+EXPORT_SYMBOL(pmac_register_agp_pm);
+
+void pmac_suspend_agp_for_card(struct pci_dev *dev)
+{
+	if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL)
+		return;
+	if (pmac_agp_bridge->bus != dev->bus)
+		return;
+	pmac_agp_suspend(pmac_agp_bridge);
+}
+EXPORT_SYMBOL(pmac_suspend_agp_for_card);
+
+void pmac_resume_agp_for_card(struct pci_dev *dev)
+{
+	if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL)
+		return;
+	if (pmac_agp_bridge->bus != dev->bus)
+		return;
+	pmac_agp_resume(pmac_agp_bridge);
+}
+EXPORT_SYMBOL(pmac_resume_agp_for_card);
diff --git a/arch/powerpc/platforms/powermac/pmac_low_i2c.c b/arch/powerpc/platforms/powermac/pmac_low_i2c.c
new file mode 100644
index 00000000000..f3f39e8e337
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_low_i2c.c
@@ -0,0 +1,523 @@
+/*
+ *  arch/ppc/platforms/pmac_low_i2c.c
+ *
+ *  Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ *  This file contains some low-level i2c access routines that
+ *  need to be used by various bits of the PowerMac platform code
+ *  at times where the real asynchronous & interrupt driven driver
+ *  cannot be used. The API borrows some semantics from the darwin
+ *  driver in order to ease the implementation of the platform
+ *  properties parser
+ */
+
+#undef DEBUG
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <asm/keylargo.h>
+#include <asm/uninorth.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/pmac_low_i2c.h>
+
+#define MAX_LOW_I2C_HOST	4
+
+#ifdef DEBUG
+#define DBG(x...) do {\
+		printk(KERN_DEBUG "KW:" x);	\
+	} while(0)
+#else
+#define DBG(x...)
+#endif
+
+struct low_i2c_host;
+
+typedef int (*low_i2c_func_t)(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len);
+
+struct low_i2c_host
+{
+	struct device_node	*np;		/* OF device node */
+	struct semaphore	mutex;		/* Access mutex for use by i2c-keywest */
+	low_i2c_func_t		func;		/* Access function */
+	unsigned int		is_open : 1;	/* Poor man's access control */
+	int			mode;		/* Current mode */
+	int			channel;	/* Current channel */
+	int			num_channels;	/* Number of channels */
+	void __iomem		*base;		/* For keywest-i2c, base address */
+	int			bsteps;		/* And register stepping */
+	int			speed;		/* And speed */
+};
+
+static struct low_i2c_host	low_i2c_hosts[MAX_LOW_I2C_HOST];
+
+/* No locking is necessary on allocation, we are running way before
+ * anything can race with us
+ */
+static struct low_i2c_host *find_low_i2c_host(struct device_node *np)
+{
+	int i;
+
+	for (i = 0; i < MAX_LOW_I2C_HOST; i++)
+		if (low_i2c_hosts[i].np == np)
+			return &low_i2c_hosts[i];
+	return NULL;
+}
+
+/*
+ *
+ * i2c-keywest implementation (UniNorth, U2, U3, Keylargo's)
+ *
+ */
+
+/*
+ * Keywest i2c definitions borrowed from drivers/i2c/i2c-keywest.h,
+ * should be moved somewhere in include/asm-ppc/
+ */
+/* Register indices */
+typedef enum {
+	reg_mode = 0,
+	reg_control,
+	reg_status,
+	reg_isr,
+	reg_ier,
+	reg_addr,
+	reg_subaddr,
+	reg_data
+} reg_t;
+
+
+/* Mode register */
+#define KW_I2C_MODE_100KHZ	0x00
+#define KW_I2C_MODE_50KHZ	0x01
+#define KW_I2C_MODE_25KHZ	0x02
+#define KW_I2C_MODE_DUMB	0x00
+#define KW_I2C_MODE_STANDARD	0x04
+#define KW_I2C_MODE_STANDARDSUB	0x08
+#define KW_I2C_MODE_COMBINED	0x0C
+#define KW_I2C_MODE_MODE_MASK	0x0C
+#define KW_I2C_MODE_CHAN_MASK	0xF0
+
+/* Control register */
+#define KW_I2C_CTL_AAK		0x01
+#define KW_I2C_CTL_XADDR	0x02
+#define KW_I2C_CTL_STOP		0x04
+#define KW_I2C_CTL_START	0x08
+
+/* Status register */
+#define KW_I2C_STAT_BUSY	0x01
+#define KW_I2C_STAT_LAST_AAK	0x02
+#define KW_I2C_STAT_LAST_RW	0x04
+#define KW_I2C_STAT_SDA		0x08
+#define KW_I2C_STAT_SCL		0x10
+
+/* IER & ISR registers */
+#define KW_I2C_IRQ_DATA		0x01
+#define KW_I2C_IRQ_ADDR		0x02
+#define KW_I2C_IRQ_STOP		0x04
+#define KW_I2C_IRQ_START	0x08
+#define KW_I2C_IRQ_MASK		0x0F
+
+/* State machine states */
+enum {
+	state_idle,
+	state_addr,
+	state_read,
+	state_write,
+	state_stop,
+	state_dead
+};
+
+#define WRONG_STATE(name) do {\
+		printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s (isr: %02x)\n", \
+		       name, __kw_state_names[state], isr); \
+	} while(0)
+
+static const char *__kw_state_names[] = {
+	"state_idle",
+	"state_addr",
+	"state_read",
+	"state_write",
+	"state_stop",
+	"state_dead"
+};
+
+static inline u8 __kw_read_reg(struct low_i2c_host *host, reg_t reg)
+{
+	return readb(host->base + (((unsigned int)reg) << host->bsteps));
+}
+
+static inline void __kw_write_reg(struct low_i2c_host *host, reg_t reg, u8 val)
+{
+	writeb(val, host->base + (((unsigned)reg) << host->bsteps));
+	(void)__kw_read_reg(host, reg_subaddr);
+}
+
+#define kw_write_reg(reg, val)	__kw_write_reg(host, reg, val) 
+#define kw_read_reg(reg)	__kw_read_reg(host, reg) 
+
+
+/* Don't schedule, the g5 fan controller is too
+ * timing sensitive
+ */
+static u8 kw_wait_interrupt(struct low_i2c_host* host)
+{
+	int i, j;
+	u8 isr;
+	
+	for (i = 0; i < 100000; i++) {
+		isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK;
+		if (isr != 0)
+			return isr;
+
+		/* This code is used with the timebase frozen, we cannot rely
+		 * on udelay ! For now, just use a bogus loop
+		 */
+		for (j = 1; j < 10000; j++)
+			mb();
+	}
+	return isr;
+}
+
+static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int *rc, u8 **data, int *len, u8 isr)
+{
+	u8 ack;
+
+	DBG("kw_handle_interrupt(%s, isr: %x)\n", __kw_state_names[state], isr);
+
+	if (isr == 0) {
+		if (state != state_stop) {
+			DBG("KW: Timeout !\n");
+			*rc = -EIO;
+			goto stop;
+		}
+		if (state == state_stop) {
+			ack = kw_read_reg(reg_status);
+			if (!(ack & KW_I2C_STAT_BUSY)) {
+				state = state_idle;
+				kw_write_reg(reg_ier, 0x00);
+			}
+		}
+		return state;
+	}
+
+	if (isr & KW_I2C_IRQ_ADDR) {
+		ack = kw_read_reg(reg_status);
+		if (state != state_addr) {
+			kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR);
+			WRONG_STATE("KW_I2C_IRQ_ADDR"); 
+			*rc = -EIO;
+			goto stop;
+		}
+		if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {			
+			*rc = -ENODEV;
+			DBG("KW: NAK on address\n");
+			return state_stop;		     
+		} else {
+			if (rw) {
+				state = state_read;
+				if (*len > 1)
+					kw_write_reg(reg_control, KW_I2C_CTL_AAK);
+			} else {
+				state = state_write;
+				kw_write_reg(reg_data, **data);
+				(*data)++; (*len)--;
+			}
+		}
+		kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR);
+	}
+
+	if (isr & KW_I2C_IRQ_DATA) {
+		if (state == state_read) {
+			**data = kw_read_reg(reg_data);
+			(*data)++; (*len)--;
+			kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+			if ((*len) == 0)
+				state = state_stop;
+			else if ((*len) == 1)
+				kw_write_reg(reg_control, 0);
+		} else if (state == state_write) {
+			ack = kw_read_reg(reg_status);
+			if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
+				DBG("KW: nack on data write\n");
+				*rc = -EIO;
+				goto stop;
+			} else if (*len) {
+				kw_write_reg(reg_data, **data);
+				(*data)++; (*len)--;
+			} else {
+				kw_write_reg(reg_control, KW_I2C_CTL_STOP);
+				state = state_stop;
+				*rc = 0;
+			}
+			kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+		} else {
+			kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+			WRONG_STATE("KW_I2C_IRQ_DATA"); 
+			if (state != state_stop) {
+				*rc = -EIO;
+				goto stop;
+			}
+		}
+	}
+
+	if (isr & KW_I2C_IRQ_STOP) {
+		kw_write_reg(reg_isr, KW_I2C_IRQ_STOP);
+		if (state != state_stop) {
+			WRONG_STATE("KW_I2C_IRQ_STOP");
+			*rc = -EIO;
+		}
+		return state_idle;
+	}
+
+	if (isr & KW_I2C_IRQ_START)
+		kw_write_reg(reg_isr, KW_I2C_IRQ_START);
+
+	return state;
+
+ stop:
+	kw_write_reg(reg_control, KW_I2C_CTL_STOP);	
+	return state_stop;
+}
+
+static int keywest_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 subaddr, u8 *data, int len)
+{
+	u8 mode_reg = host->speed;
+	int state = state_addr;
+	int rc = 0;
+
+	/* Setup mode & subaddress if any */
+	switch(host->mode) {
+	case pmac_low_i2c_mode_dumb:
+		printk(KERN_ERR "low_i2c: Dumb mode not supported !\n");
+		return -EINVAL;
+	case pmac_low_i2c_mode_std:
+		mode_reg |= KW_I2C_MODE_STANDARD;
+		break;
+	case pmac_low_i2c_mode_stdsub:
+		mode_reg |= KW_I2C_MODE_STANDARDSUB;
+		break;
+	case pmac_low_i2c_mode_combined:
+		mode_reg |= KW_I2C_MODE_COMBINED;
+		break;
+	}
+
+	/* Setup channel & clear pending irqs */
+	kw_write_reg(reg_isr, kw_read_reg(reg_isr));
+	kw_write_reg(reg_mode, mode_reg | (host->channel << 4));
+	kw_write_reg(reg_status, 0);
+
+	/* Set up address and r/w bit */
+	kw_write_reg(reg_addr, addr);
+
+	/* Set up the sub address */
+	if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB
+	    || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED)
+		kw_write_reg(reg_subaddr, subaddr);
+
+	/* Start sending address & disable interrupt*/
+	kw_write_reg(reg_ier, 0 /*KW_I2C_IRQ_MASK*/);
+	kw_write_reg(reg_control, KW_I2C_CTL_XADDR);
+
+	/* State machine, to turn into an interrupt handler */
+	while(state != state_idle) {
+		u8 isr = kw_wait_interrupt(host);
+		state = kw_handle_interrupt(host, state, addr & 1, &rc, &data, &len, isr);
+	}
+
+	return rc;
+}
+
+static void keywest_low_i2c_add(struct device_node *np)
+{
+	struct low_i2c_host	*host = find_low_i2c_host(NULL);
+	u32			*psteps, *prate, steps, aoffset = 0;
+	struct device_node	*parent;
+
+	if (host == NULL) {
+		printk(KERN_ERR "low_i2c: Can't allocate host for %s\n",
+		       np->full_name);
+		return;
+	}
+	memset(host, 0, sizeof(*host));
+
+	init_MUTEX(&host->mutex);
+	host->np = of_node_get(np);	
+	psteps = (u32 *)get_property(np, "AAPL,address-step", NULL);
+	steps = psteps ? (*psteps) : 0x10;
+	for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++)
+		steps >>= 1;
+	parent = of_get_parent(np);
+	host->num_channels = 1;
+	if (parent && parent->name[0] == 'u') {
+		host->num_channels = 2;
+		aoffset = 3;
+	}
+	/* Select interface rate */
+	host->speed = KW_I2C_MODE_100KHZ;
+	prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL);
+	if (prate) switch(*prate) {
+	case 100:
+		host->speed = KW_I2C_MODE_100KHZ;
+		break;
+	case 50:
+		host->speed = KW_I2C_MODE_50KHZ;
+		break;
+	case 25:
+		host->speed = KW_I2C_MODE_25KHZ;
+		break;
+	}	
+
+	host->mode = pmac_low_i2c_mode_std;
+	host->base = ioremap(np->addrs[0].address + aoffset,
+						np->addrs[0].size);
+	host->func = keywest_low_i2c_func;
+}
+
+/*
+ *
+ * PMU implementation
+ *
+ */
+
+
+#ifdef CONFIG_ADB_PMU
+
+static int pmu_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len)
+{
+	// TODO
+	return -ENODEV;
+}
+
+static void pmu_low_i2c_add(struct device_node *np)
+{
+	struct low_i2c_host	*host = find_low_i2c_host(NULL);
+
+	if (host == NULL) {
+		printk(KERN_ERR "low_i2c: Can't allocate host for %s\n",
+		       np->full_name);
+		return;
+	}
+	memset(host, 0, sizeof(*host));
+
+	init_MUTEX(&host->mutex);
+	host->np = of_node_get(np);	
+	host->num_channels = 3;
+	host->mode = pmac_low_i2c_mode_std;
+	host->func = pmu_low_i2c_func;
+}
+
+#endif /* CONFIG_ADB_PMU */
+
+void __init pmac_init_low_i2c(void)
+{
+	struct device_node *np;
+
+	/* Probe keywest-i2c busses */
+	np = of_find_compatible_node(NULL, "i2c", "keywest-i2c");
+	while(np) {
+		keywest_low_i2c_add(np);
+		np = of_find_compatible_node(np, "i2c", "keywest-i2c");
+	}
+
+#ifdef CONFIG_ADB_PMU
+	/* Probe PMU busses */
+	np = of_find_node_by_name(NULL, "via-pmu");
+	if (np)
+		pmu_low_i2c_add(np);
+#endif /* CONFIG_ADB_PMU */
+
+	/* TODO: Add CUDA support as well */
+}
+
+int pmac_low_i2c_lock(struct device_node *np)
+{
+	struct low_i2c_host *host = find_low_i2c_host(np);
+
+	if (!host)
+		return -ENODEV;
+	down(&host->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(pmac_low_i2c_lock);
+
+int pmac_low_i2c_unlock(struct device_node *np)
+{
+	struct low_i2c_host *host = find_low_i2c_host(np);
+
+	if (!host)
+		return -ENODEV;
+	up(&host->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(pmac_low_i2c_unlock);
+
+
+int pmac_low_i2c_open(struct device_node *np, int channel)
+{
+	struct low_i2c_host *host = find_low_i2c_host(np);
+
+	if (!host)
+		return -ENODEV;
+
+	if (channel >= host->num_channels)
+		return -EINVAL;
+
+	down(&host->mutex);
+	host->is_open = 1;
+	host->channel = channel;
+
+	return 0;
+}
+EXPORT_SYMBOL(pmac_low_i2c_open);
+
+int pmac_low_i2c_close(struct device_node *np)
+{
+	struct low_i2c_host *host = find_low_i2c_host(np);
+
+	if (!host)
+		return -ENODEV;
+
+	host->is_open = 0;
+	up(&host->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(pmac_low_i2c_close);
+
+int pmac_low_i2c_setmode(struct device_node *np, int mode)
+{
+	struct low_i2c_host *host = find_low_i2c_host(np);
+
+	if (!host)
+		return -ENODEV;
+	WARN_ON(!host->is_open);
+	host->mode = mode;
+
+	return 0;
+}
+EXPORT_SYMBOL(pmac_low_i2c_setmode);
+
+int pmac_low_i2c_xfer(struct device_node *np, u8 addrdir, u8 subaddr, u8 *data, int len)
+{
+	struct low_i2c_host *host = find_low_i2c_host(np);
+
+	if (!host)
+		return -ENODEV;
+	WARN_ON(!host->is_open);
+
+	return host->func(host, addrdir, subaddr, data, len);
+}
+EXPORT_SYMBOL(pmac_low_i2c_xfer);
+
diff --git a/arch/powerpc/platforms/powermac/pmac_nvram.c b/arch/powerpc/platforms/powermac/pmac_nvram.c
new file mode 100644
index 00000000000..8c9b008c722
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_nvram.c
@@ -0,0 +1,584 @@
+/*
+ *  arch/ppc/platforms/pmac_nvram.c
+ *
+ *  Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ *  Todo: - add support for the OF persistent properties
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/bootmem.h>
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/system.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/nvram.h>
+
+#define DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define NVRAM_SIZE		0x2000	/* 8kB of non-volatile RAM */
+
+#define CORE99_SIGNATURE	0x5a
+#define CORE99_ADLER_START	0x14
+
+/* On Core99, nvram is either a sharp, a micron or an AMD flash */
+#define SM_FLASH_STATUS_DONE	0x80
+#define SM_FLASH_STATUS_ERR		0x38
+#define SM_FLASH_CMD_ERASE_CONFIRM	0xd0
+#define SM_FLASH_CMD_ERASE_SETUP	0x20
+#define SM_FLASH_CMD_RESET		0xff
+#define SM_FLASH_CMD_WRITE_SETUP	0x40
+#define SM_FLASH_CMD_CLEAR_STATUS	0x50
+#define SM_FLASH_CMD_READ_STATUS	0x70
+
+/* CHRP NVRAM header */
+struct chrp_header {
+  u8		signature;
+  u8		cksum;
+  u16		len;
+  char          name[12];
+  u8		data[0];
+};
+
+struct core99_header {
+  struct chrp_header	hdr;
+  u32			adler;
+  u32			generation;
+  u32			reserved[2];
+};
+
+/*
+ * Read and write the non-volatile RAM on PowerMacs and CHRP machines.
+ */
+static int nvram_naddrs;
+static volatile unsigned char *nvram_addr;
+static volatile unsigned char *nvram_data;
+static int nvram_mult, is_core_99;
+static int core99_bank = 0;
+static int nvram_partitions[3];
+static DEFINE_SPINLOCK(nv_lock);
+
+extern int pmac_newworld;
+extern int system_running;
+
+static int (*core99_write_bank)(int bank, u8* datas);
+static int (*core99_erase_bank)(int bank);
+
+static char *nvram_image;
+
+
+static unsigned char core99_nvram_read_byte(int addr)
+{
+	if (nvram_image == NULL)
+		return 0xff;
+	return nvram_image[addr];
+}
+
+static void core99_nvram_write_byte(int addr, unsigned char val)
+{
+	if (nvram_image == NULL)
+		return;
+	nvram_image[addr] = val;
+}
+
+
+static unsigned char direct_nvram_read_byte(int addr)
+{
+	return in_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]);
+}
+
+static void direct_nvram_write_byte(int addr, unsigned char val)
+{
+	out_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult], val);
+}
+
+
+static unsigned char indirect_nvram_read_byte(int addr)
+{
+	unsigned char val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&nv_lock, flags);
+	out_8(nvram_addr, addr >> 5);
+	val = in_8(&nvram_data[(addr & 0x1f) << 4]);
+	spin_unlock_irqrestore(&nv_lock, flags);
+
+	return val;
+}
+
+static void indirect_nvram_write_byte(int addr, unsigned char val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&nv_lock, flags);
+	out_8(nvram_addr, addr >> 5);
+	out_8(&nvram_data[(addr & 0x1f) << 4], val);
+	spin_unlock_irqrestore(&nv_lock, flags);
+}
+
+
+#ifdef CONFIG_ADB_PMU
+
+static void pmu_nvram_complete(struct adb_request *req)
+{
+	if (req->arg)
+		complete((struct completion *)req->arg);
+}
+
+static unsigned char pmu_nvram_read_byte(int addr)
+{
+	struct adb_request req;
+	DECLARE_COMPLETION(req_complete); 
+	
+	req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL;
+	if (pmu_request(&req, pmu_nvram_complete, 3, PMU_READ_NVRAM,
+			(addr >> 8) & 0xff, addr & 0xff))
+		return 0xff;
+	if (system_state == SYSTEM_RUNNING)
+		wait_for_completion(&req_complete);
+	while (!req.complete)
+		pmu_poll();
+	return req.reply[0];
+}
+
+static void pmu_nvram_write_byte(int addr, unsigned char val)
+{
+	struct adb_request req;
+	DECLARE_COMPLETION(req_complete); 
+	
+	req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL;
+	if (pmu_request(&req, pmu_nvram_complete, 4, PMU_WRITE_NVRAM,
+			(addr >> 8) & 0xff, addr & 0xff, val))
+		return;
+	if (system_state == SYSTEM_RUNNING)
+		wait_for_completion(&req_complete);
+	while (!req.complete)
+		pmu_poll();
+}
+
+#endif /* CONFIG_ADB_PMU */
+
+
+static u8 chrp_checksum(struct chrp_header* hdr)
+{
+	u8 *ptr;
+	u16 sum = hdr->signature;
+	for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++)
+		sum += *ptr;
+	while (sum > 0xFF)
+		sum = (sum & 0xFF) + (sum>>8);
+	return sum;
+}
+
+static u32 core99_calc_adler(u8 *buffer)
+{
+	int cnt;
+	u32 low, high;
+
+   	buffer += CORE99_ADLER_START;
+	low = 1;
+	high = 0;
+	for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) {
+		if ((cnt % 5000) == 0) {
+			high  %= 65521UL;
+			high %= 65521UL;
+		}
+		low += buffer[cnt];
+		high += low;
+	}
+	low  %= 65521UL;
+	high %= 65521UL;
+
+	return (high << 16) | low;
+}
+
+static u32 core99_check(u8* datas)
+{
+	struct core99_header* hdr99 = (struct core99_header*)datas;
+
+	if (hdr99->hdr.signature != CORE99_SIGNATURE) {
+		DBG("Invalid signature\n");
+		return 0;
+	}
+	if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) {
+		DBG("Invalid checksum\n");
+		return 0;
+	}
+	if (hdr99->adler != core99_calc_adler(datas)) {
+		DBG("Invalid adler\n");
+		return 0;
+	}
+	return hdr99->generation;
+}
+
+static int sm_erase_bank(int bank)
+{
+	int stat, i;
+	unsigned long timeout;
+
+	u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank);
+
+	out_8(base, SM_FLASH_CMD_ERASE_SETUP);
+	out_8(base, SM_FLASH_CMD_ERASE_CONFIRM);
+	timeout = 0;
+	do {
+		if (++timeout > 1000000) {
+			printk(KERN_ERR "nvram: Sharp/Miron flash erase timeout !\n");
+			break;
+		}
+		out_8(base, SM_FLASH_CMD_READ_STATUS);
+		stat = in_8(base);
+	} while (!(stat & SM_FLASH_STATUS_DONE));
+
+	out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
+	out_8(base, SM_FLASH_CMD_RESET);
+
+	for (i=0; i<NVRAM_SIZE; i++)
+		if (base[i] != 0xff) {
+			printk(KERN_ERR "nvram: Sharp/Micron flash erase failed !\n");
+			return -ENXIO;
+		}
+	return 0;
+}
+
+static int sm_write_bank(int bank, u8* datas)
+{
+	int i, stat = 0;
+	unsigned long timeout;
+
+	u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: Sharp/Micron Writing bank %d...\n", bank);
+
+	for (i=0; i<NVRAM_SIZE; i++) {
+		out_8(base+i, SM_FLASH_CMD_WRITE_SETUP);
+		udelay(1);
+		out_8(base+i, datas[i]);
+		timeout = 0;
+		do {
+			if (++timeout > 1000000) {
+				printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n");
+				break;
+			}
+			out_8(base, SM_FLASH_CMD_READ_STATUS);
+			stat = in_8(base);
+		} while (!(stat & SM_FLASH_STATUS_DONE));
+		if (!(stat & SM_FLASH_STATUS_DONE))
+			break;
+	}
+	out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
+	out_8(base, SM_FLASH_CMD_RESET);
+	for (i=0; i<NVRAM_SIZE; i++)
+		if (base[i] != datas[i]) {
+			printk(KERN_ERR "nvram: Sharp/Micron flash write failed !\n");
+			return -ENXIO;
+		}
+	return 0;
+}
+
+static int amd_erase_bank(int bank)
+{
+	int i, stat = 0;
+	unsigned long timeout;
+
+	u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: AMD Erasing bank %d...\n", bank);
+
+	/* Unlock 1 */
+	out_8(base+0x555, 0xaa);
+	udelay(1);
+	/* Unlock 2 */
+	out_8(base+0x2aa, 0x55);
+	udelay(1);
+
+	/* Sector-Erase */
+	out_8(base+0x555, 0x80);
+	udelay(1);
+	out_8(base+0x555, 0xaa);
+	udelay(1);
+	out_8(base+0x2aa, 0x55);
+	udelay(1);
+	out_8(base, 0x30);
+	udelay(1);
+
+	timeout = 0;
+	do {
+		if (++timeout > 1000000) {
+			printk(KERN_ERR "nvram: AMD flash erase timeout !\n");
+			break;
+		}
+		stat = in_8(base) ^ in_8(base);
+	} while (stat != 0);
+	
+	/* Reset */
+	out_8(base, 0xf0);
+	udelay(1);
+	
+	for (i=0; i<NVRAM_SIZE; i++)
+		if (base[i] != 0xff) {
+			printk(KERN_ERR "nvram: AMD flash erase failed !\n");
+			return -ENXIO;
+		}
+	return 0;
+}
+
+static int amd_write_bank(int bank, u8* datas)
+{
+	int i, stat = 0;
+	unsigned long timeout;
+
+	u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE;
+
+       	DBG("nvram: AMD Writing bank %d...\n", bank);
+
+	for (i=0; i<NVRAM_SIZE; i++) {
+		/* Unlock 1 */
+		out_8(base+0x555, 0xaa);
+		udelay(1);
+		/* Unlock 2 */
+		out_8(base+0x2aa, 0x55);
+		udelay(1);
+
+		/* Write single word */
+		out_8(base+0x555, 0xa0);
+		udelay(1);
+		out_8(base+i, datas[i]);
+		
+		timeout = 0;
+		do {
+			if (++timeout > 1000000) {
+				printk(KERN_ERR "nvram: AMD flash write timeout !\n");
+				break;
+			}
+			stat = in_8(base) ^ in_8(base);
+		} while (stat != 0);
+		if (stat != 0)
+			break;
+	}
+
+	/* Reset */
+	out_8(base, 0xf0);
+	udelay(1);
+
+	for (i=0; i<NVRAM_SIZE; i++)
+		if (base[i] != datas[i]) {
+			printk(KERN_ERR "nvram: AMD flash write failed !\n");
+			return -ENXIO;
+		}
+	return 0;
+}
+
+static void __init lookup_partitions(void)
+{
+	u8 buffer[17];
+	int i, offset;
+	struct chrp_header* hdr;
+
+	if (pmac_newworld) {
+		nvram_partitions[pmac_nvram_OF] = -1;
+		nvram_partitions[pmac_nvram_XPRAM] = -1;
+		nvram_partitions[pmac_nvram_NR] = -1;
+		hdr = (struct chrp_header *)buffer;
+
+		offset = 0;
+		buffer[16] = 0;
+		do {
+			for (i=0;i<16;i++)
+				buffer[i] = nvram_read_byte(offset+i);
+			if (!strcmp(hdr->name, "common"))
+				nvram_partitions[pmac_nvram_OF] = offset + 0x10;
+			if (!strcmp(hdr->name, "APL,MacOS75")) {
+				nvram_partitions[pmac_nvram_XPRAM] = offset + 0x10;
+				nvram_partitions[pmac_nvram_NR] = offset + 0x110;
+			}
+			offset += (hdr->len * 0x10);
+		} while(offset < NVRAM_SIZE);
+	} else {
+		nvram_partitions[pmac_nvram_OF] = 0x1800;
+		nvram_partitions[pmac_nvram_XPRAM] = 0x1300;
+		nvram_partitions[pmac_nvram_NR] = 0x1400;
+	}
+	DBG("nvram: OF partition at 0x%x\n", nvram_partitions[pmac_nvram_OF]);
+	DBG("nvram: XP partition at 0x%x\n", nvram_partitions[pmac_nvram_XPRAM]);
+	DBG("nvram: NR partition at 0x%x\n", nvram_partitions[pmac_nvram_NR]);
+}
+
+static void core99_nvram_sync(void)
+{
+	struct core99_header* hdr99;
+	unsigned long flags;
+
+	if (!is_core_99 || !nvram_data || !nvram_image)
+		return;
+
+	spin_lock_irqsave(&nv_lock, flags);
+	if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE,
+		NVRAM_SIZE))
+		goto bail;
+
+	DBG("Updating nvram...\n");
+
+	hdr99 = (struct core99_header*)nvram_image;
+	hdr99->generation++;
+	hdr99->hdr.signature = CORE99_SIGNATURE;
+	hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr);
+	hdr99->adler = core99_calc_adler(nvram_image);
+	core99_bank = core99_bank ? 0 : 1;
+	if (core99_erase_bank)
+		if (core99_erase_bank(core99_bank)) {
+			printk("nvram: Error erasing bank %d\n", core99_bank);
+			goto bail;
+		}
+	if (core99_write_bank)
+		if (core99_write_bank(core99_bank, nvram_image))
+			printk("nvram: Error writing bank %d\n", core99_bank);
+ bail:
+	spin_unlock_irqrestore(&nv_lock, flags);
+
+#ifdef DEBUG
+       	mdelay(2000);
+#endif
+}
+
+void __init pmac_nvram_init(void)
+{
+	struct device_node *dp;
+
+	nvram_naddrs = 0;
+
+	dp = find_devices("nvram");
+	if (dp == NULL) {
+		printk(KERN_ERR "Can't find NVRAM device\n");
+		return;
+	}
+	nvram_naddrs = dp->n_addrs;
+	is_core_99 = device_is_compatible(dp, "nvram,flash");
+	if (is_core_99) {
+		int i;
+		u32 gen_bank0, gen_bank1;
+
+		if (nvram_naddrs < 1) {
+			printk(KERN_ERR "nvram: no address\n");
+			return;
+		}
+		nvram_image = alloc_bootmem(NVRAM_SIZE);
+		if (nvram_image == NULL) {
+			printk(KERN_ERR "nvram: can't allocate ram image\n");
+			return;
+		}
+		nvram_data = ioremap(dp->addrs[0].address, NVRAM_SIZE*2);
+		nvram_naddrs = 1; /* Make sure we get the correct case */
+
+		DBG("nvram: Checking bank 0...\n");
+
+		gen_bank0 = core99_check((u8 *)nvram_data);
+		gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE);
+		core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0;
+
+		DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1);
+		DBG("nvram: Active bank is: %d\n", core99_bank);
+
+		for (i=0; i<NVRAM_SIZE; i++)
+			nvram_image[i] = nvram_data[i + core99_bank*NVRAM_SIZE];
+
+		ppc_md.nvram_read_val	= core99_nvram_read_byte;
+		ppc_md.nvram_write_val	= core99_nvram_write_byte;
+		ppc_md.nvram_sync	= core99_nvram_sync;
+		/* 
+		 * Maybe we could be smarter here though making an exclusive list
+		 * of known flash chips is a bit nasty as older OF didn't provide us
+		 * with a useful "compatible" entry. A solution would be to really
+		 * identify the chip using flash id commands and base ourselves on
+		 * a list of known chips IDs
+		 */
+		if (device_is_compatible(dp, "amd-0137")) {
+			core99_erase_bank = amd_erase_bank;
+			core99_write_bank = amd_write_bank;
+		} else {
+			core99_erase_bank = sm_erase_bank;
+			core99_write_bank = sm_write_bank;
+		}
+	} else if (_machine == _MACH_chrp && nvram_naddrs == 1) {
+		nvram_data = ioremap(dp->addrs[0].address + isa_mem_base,
+				     dp->addrs[0].size);
+		nvram_mult = 1;
+		ppc_md.nvram_read_val	= direct_nvram_read_byte;
+		ppc_md.nvram_write_val	= direct_nvram_write_byte;
+	} else if (nvram_naddrs == 1) {
+		nvram_data = ioremap(dp->addrs[0].address, dp->addrs[0].size);
+		nvram_mult = (dp->addrs[0].size + NVRAM_SIZE - 1) / NVRAM_SIZE;
+		ppc_md.nvram_read_val	= direct_nvram_read_byte;
+		ppc_md.nvram_write_val	= direct_nvram_write_byte;
+	} else if (nvram_naddrs == 2) {
+		nvram_addr = ioremap(dp->addrs[0].address, dp->addrs[0].size);
+		nvram_data = ioremap(dp->addrs[1].address, dp->addrs[1].size);
+		ppc_md.nvram_read_val	= indirect_nvram_read_byte;
+		ppc_md.nvram_write_val	= indirect_nvram_write_byte;
+	} else if (nvram_naddrs == 0 && sys_ctrler == SYS_CTRLER_PMU) {
+#ifdef CONFIG_ADB_PMU
+		nvram_naddrs = -1;
+		ppc_md.nvram_read_val	= pmu_nvram_read_byte;
+		ppc_md.nvram_write_val	= pmu_nvram_write_byte;
+#endif /* CONFIG_ADB_PMU */
+	} else {
+		printk(KERN_ERR "Don't know how to access NVRAM with %d addresses\n",
+		       nvram_naddrs);
+	}
+	lookup_partitions();
+}
+
+int pmac_get_partition(int partition)
+{
+	return nvram_partitions[partition];
+}
+
+u8 pmac_xpram_read(int xpaddr)
+{
+	int offset = nvram_partitions[pmac_nvram_XPRAM];
+
+	if (offset < 0)
+		return 0xff;
+
+	return ppc_md.nvram_read_val(xpaddr + offset);
+}
+
+void pmac_xpram_write(int xpaddr, u8 data)
+{
+	int offset = nvram_partitions[pmac_nvram_XPRAM];
+
+	if (offset < 0)
+		return;
+
+	ppc_md.nvram_write_val(xpaddr + offset, data);
+}
+
+EXPORT_SYMBOL(pmac_get_partition);
+EXPORT_SYMBOL(pmac_xpram_read);
+EXPORT_SYMBOL(pmac_xpram_write);
diff --git a/arch/powerpc/platforms/powermac/pmac_pci.c b/arch/powerpc/platforms/powermac/pmac_pci.c
new file mode 100644
index 00000000000..40bcd3e55af
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_pci.c
@@ -0,0 +1,1341 @@
+/*
+ * Support for PCI bridges found on Power Macintoshes.
+ * At present the "bandit" and "chaos" bridges are supported.
+ * Fortunately you access configuration space in the same
+ * way with either bridge.
+ *
+ * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org)
+ * Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+static int add_bridge(struct device_node *dev);
+extern void pmac_check_ht_link(void);
+
+/* XXX Could be per-controller, but I don't think we risk anything by
+ * assuming we won't have both UniNorth and Bandit */
+static int has_uninorth;
+#ifdef CONFIG_POWER4
+static struct pci_controller *u3_agp;
+#endif /* CONFIG_POWER4 */
+
+extern u8 pci_cache_line_size;
+extern int pcibios_assign_bus_offset;
+
+struct device_node *k2_skiplist[2];
+
+/*
+ * Magic constants for enabling cache coherency in the bandit/PSX bridge.
+ */
+#define BANDIT_DEVID_2	8
+#define BANDIT_REVID	3
+
+#define BANDIT_DEVNUM	11
+#define BANDIT_MAGIC	0x50
+#define BANDIT_COHERENT	0x40
+
+static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
+{
+	for (; node != 0;node = node->sibling) {
+		int * bus_range;
+		unsigned int *class_code;
+		int len;
+
+		/* For PCI<->PCI bridges or CardBus bridges, we go down */
+		class_code = (unsigned int *) get_property(node, "class-code", NULL);
+		if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+			(*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+			continue;
+		bus_range = (int *) get_property(node, "bus-range", &len);
+		if (bus_range != NULL && len > 2 * sizeof(int)) {
+			if (bus_range[1] > higher)
+				higher = bus_range[1];
+		}
+		higher = fixup_one_level_bus_range(node->child, higher);
+	}
+	return higher;
+}
+
+/* This routine fixes the "bus-range" property of all bridges in the
+ * system since they tend to have their "last" member wrong on macs
+ *
+ * Note that the bus numbers manipulated here are OF bus numbers, they
+ * are not Linux bus numbers.
+ */
+static void __init fixup_bus_range(struct device_node *bridge)
+{
+	int * bus_range;
+	int len;
+
+	/* Lookup the "bus-range" property for the hose */
+	bus_range = (int *) get_property(bridge, "bus-range", &len);
+	if (bus_range == NULL || len < 2 * sizeof(int)) {
+		printk(KERN_WARNING "Can't get bus-range for %s\n",
+			       bridge->full_name);
+		return;
+	}
+	bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
+}
+
+/*
+ * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers.
+ *
+ * The "Bandit" version is present in all early PCI PowerMacs,
+ * and up to the first ones using Grackle. Some machines may
+ * have 2 bandit controllers (2 PCI busses).
+ *
+ * "Chaos" is used in some "Bandit"-type machines as a bridge
+ * for the separate display bus. It is accessed the same
+ * way as bandit, but cannot be probed for devices. It therefore
+ * has its own config access functions.
+ *
+ * The "UniNorth" version is present in all Core99 machines
+ * (iBook, G4, new IMacs, and all the recent Apple machines).
+ * It contains 3 controllers in one ASIC.
+ *
+ * The U3 is the bridge used on G5 machines. It contains an
+ * AGP bus which is dealt with the old UniNorth access routines
+ * and a HyperTransport bus which uses its own set of access
+ * functions.
+ */
+
+#define MACRISC_CFA0(devfn, off)	\
+	((1 << (unsigned long)PCI_SLOT(dev_fn)) \
+	| (((unsigned long)PCI_FUNC(dev_fn)) << 8) \
+	| (((unsigned long)(off)) & 0xFCUL))
+
+#define MACRISC_CFA1(bus, devfn, off)	\
+	((((unsigned long)(bus)) << 16) \
+	|(((unsigned long)(devfn)) << 8) \
+	|(((unsigned long)(off)) & 0xFCUL) \
+	|1UL)
+
+static unsigned long macrisc_cfg_access(struct pci_controller* hose,
+					       u8 bus, u8 dev_fn, u8 offset)
+{
+	unsigned int caddr;
+
+	if (bus == hose->first_busno) {
+		if (dev_fn < (11 << 3))
+			return 0;
+		caddr = MACRISC_CFA0(dev_fn, offset);
+	} else
+		caddr = MACRISC_CFA1(bus, dev_fn, offset);
+
+	/* Uninorth will return garbage if we don't read back the value ! */
+	do {
+		out_le32(hose->cfg_addr, caddr);
+	} while (in_le32(hose->cfg_addr) != caddr);
+
+	offset &= has_uninorth ? 0x07 : 0x03;
+	return ((unsigned long)hose->cfg_data) + offset;
+}
+
+static int macrisc_read_config(struct pci_bus *bus, unsigned int devfn,
+				      int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = bus->sysdata;
+	unsigned long addr;
+
+	addr = macrisc_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8((u8 *)addr);
+		break;
+	case 2:
+		*val = in_le16((u16 *)addr);
+		break;
+	default:
+		*val = in_le32((u32 *)addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int macrisc_write_config(struct pci_bus *bus, unsigned int devfn,
+				       int offset, int len, u32 val)
+{
+	struct pci_controller *hose = bus->sysdata;
+	unsigned long addr;
+
+	addr = macrisc_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8((u8 *)addr, val);
+		(void) in_8((u8 *)addr);
+		break;
+	case 2:
+		out_le16((u16 *)addr, val);
+		(void) in_le16((u16 *)addr);
+		break;
+	default:
+		out_le32((u32 *)addr, val);
+		(void) in_le32((u32 *)addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops macrisc_pci_ops =
+{
+	macrisc_read_config,
+	macrisc_write_config
+};
+
+/*
+ * Verifiy that a specific (bus, dev_fn) exists on chaos
+ */
+static int
+chaos_validate_dev(struct pci_bus *bus, int devfn, int offset)
+{
+	struct device_node *np;
+	u32 *vendor, *device;
+
+	np = pci_busdev_to_OF_node(bus, devfn);
+	if (np == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	vendor = (u32 *)get_property(np, "vendor-id", NULL);
+	device = (u32 *)get_property(np, "device-id", NULL);
+	if (vendor == NULL || device == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if ((*vendor == 0x106b) && (*device == 3) && (offset >= 0x10)
+	    && (offset != 0x14) && (offset != 0x18) && (offset <= 0x24))
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+chaos_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+		  int len, u32 *val)
+{
+	int result = chaos_validate_dev(bus, devfn, offset);
+	if (result == PCIBIOS_BAD_REGISTER_NUMBER)
+		*val = ~0U;
+	if (result != PCIBIOS_SUCCESSFUL)
+		return result;
+	return macrisc_read_config(bus, devfn, offset, len, val);
+}
+
+static int
+chaos_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
+		   int len, u32 val)
+{
+	int result = chaos_validate_dev(bus, devfn, offset);
+	if (result != PCIBIOS_SUCCESSFUL)
+		return result;
+	return macrisc_write_config(bus, devfn, offset, len, val);
+}
+
+static struct pci_ops chaos_pci_ops =
+{
+	chaos_read_config,
+	chaos_write_config
+};
+
+#ifdef CONFIG_POWER4
+
+/*
+ * These versions of U3 HyperTransport config space access ops do not
+ * implement self-view of the HT host yet
+ */
+
+/*
+ * This function deals with some "special cases" devices.
+ *
+ *  0 -> No special case
+ *  1 -> Skip the device but act as if the access was successfull
+ *       (return 0xff's on reads, eventually, cache config space
+ *       accesses in a later version)
+ * -1 -> Hide the device (unsuccessful acess)
+ */
+static int u3_ht_skip_device(struct pci_controller *hose,
+			     struct pci_bus *bus, unsigned int devfn)
+{
+	struct device_node *busdn, *dn;
+	int i;
+
+	/* We only allow config cycles to devices that are in OF device-tree
+	 * as we are apparently having some weird things going on with some
+	 * revs of K2 on recent G5s
+	 */
+	if (bus->self)
+		busdn = pci_device_to_OF_node(bus->self);
+	else
+		busdn = hose->arch_data;
+	for (dn = busdn->child; dn; dn = dn->sibling)
+		if (dn->data && PCI_DN(dn)->devfn == devfn)
+			break;
+	if (dn == NULL)
+		return -1;
+
+	/*
+	 * When a device in K2 is powered down, we die on config
+	 * cycle accesses. Fix that here.
+	 */
+	for (i=0; i<2; i++)
+		if (k2_skiplist[i] == dn)
+			return 1;
+
+	return 0;
+}
+
+#define U3_HT_CFA0(devfn, off)		\
+		((((unsigned long)devfn) << 8) | offset)
+#define U3_HT_CFA1(bus, devfn, off)	\
+		(U3_HT_CFA0(devfn, off) \
+		+ (((unsigned long)bus) << 16) \
+		+ 0x01000000UL)
+
+static unsigned long u3_ht_cfg_access(struct pci_controller* hose,
+					     u8 bus, u8 devfn, u8 offset)
+{
+	if (bus == hose->first_busno) {
+		/* For now, we don't self probe U3 HT bridge */
+		if (PCI_SLOT(devfn) == 0)
+			return 0;
+		return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset);
+	} else
+		return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset);
+}
+
+static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
+				    int offset, int len, u32 *val)
+{
+	struct pci_controller *hose = bus->sysdata;
+	unsigned long addr;
+
+	struct device_node *np = pci_busdev_to_OF_node(bus, devfn);
+	if (np == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (u3_ht_skip_device(hose, bus, devfn)) {
+	case 0:
+		break;
+	case 1:
+			switch (len) {
+			case 1:
+				*val = 0xff; break;
+			case 2:
+				*val = 0xffff; break;
+			default:
+				*val = 0xfffffffful; break;
+			}
+			return PCIBIOS_SUCCESSFUL;
+	default:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+	    
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		*val = in_8((u8 *)addr);
+		break;
+	case 2:
+		*val = in_le16((u16 *)addr);
+		break;
+	default:
+		*val = in_le32((u32 *)addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
+				     int offset, int len, u32 val)
+{
+	struct pci_controller *hose = bus->sysdata;
+	unsigned long addr;
+
+	struct device_node *np = pci_busdev_to_OF_node(bus, devfn);
+	if (np == NULL)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
+	if (!addr)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	switch (u3_ht_skip_device(hose, bus, devfn)) {
+	case 0:
+		break;
+	case 1:
+		return PCIBIOS_SUCCESSFUL;
+	default:
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	/*
+	 * Note: the caller has already checked that offset is
+	 * suitably aligned and that len is 1, 2 or 4.
+	 */
+	switch (len) {
+	case 1:
+		out_8((u8 *)addr, val);
+		(void) in_8((u8 *)addr);
+		break;
+	case 2:
+		out_le16((u16 *)addr, val);
+		(void) in_le16((u16 *)addr);
+		break;
+	default:
+		out_le32((u32 *)addr, val);
+		(void) in_le32((u32 *)addr);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_ht_pci_ops =
+{
+	u3_ht_read_config,
+	u3_ht_write_config
+};
+
+#endif /* CONFIG_POWER4 */
+
+/*
+ * For a bandit bridge, turn on cache coherency if necessary.
+ * N.B. we could clean this up using the hose ops directly.
+ */
+static void __init
+init_bandit(struct pci_controller *bp)
+{
+	unsigned int vendev, magic;
+	int rev;
+
+	/* read the word at offset 0 in config space for device 11 */
+	out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + PCI_VENDOR_ID);
+	udelay(2);
+	vendev = in_le32(bp->cfg_data);
+	if (vendev == (PCI_DEVICE_ID_APPLE_BANDIT << 16) +
+			PCI_VENDOR_ID_APPLE) {
+		/* read the revision id */
+		out_le32(bp->cfg_addr,
+			 (1UL << BANDIT_DEVNUM) + PCI_REVISION_ID);
+		udelay(2);
+		rev = in_8(bp->cfg_data);
+		if (rev != BANDIT_REVID)
+			printk(KERN_WARNING
+			       "Unknown revision %d for bandit\n", rev);
+	} else if (vendev != (BANDIT_DEVID_2 << 16) + PCI_VENDOR_ID_APPLE) {
+		printk(KERN_WARNING "bandit isn't? (%x)\n", vendev);
+		return;
+	}
+
+	/* read the word at offset 0x50 */
+	out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + BANDIT_MAGIC);
+	udelay(2);
+	magic = in_le32(bp->cfg_data);
+	if ((magic & BANDIT_COHERENT) != 0)
+		return;
+	magic |= BANDIT_COHERENT;
+	udelay(2);
+	out_le32(bp->cfg_data, magic);
+	printk(KERN_INFO "Cache coherency enabled for bandit/PSX\n");
+}
+
+
+/*
+ * Tweak the PCI-PCI bridge chip on the blue & white G3s.
+ */
+static void __init
+init_p2pbridge(void)
+{
+	struct device_node *p2pbridge;
+	struct pci_controller* hose;
+	u8 bus, devfn;
+	u16 val;
+
+	/* XXX it would be better here to identify the specific
+	   PCI-PCI bridge chip we have. */
+	if ((p2pbridge = find_devices("pci-bridge")) == 0
+	    || p2pbridge->parent == NULL
+	    || strcmp(p2pbridge->parent->name, "pci") != 0)
+		return;
+	if (pci_device_from_OF_node(p2pbridge, &bus, &devfn) < 0) {
+		DBG("Can't find PCI infos for PCI<->PCI bridge\n");
+		return;
+	}
+	/* Warning: At this point, we have not yet renumbered all busses.
+	 * So we must use OF walking to find out hose
+	 */
+	hose = pci_find_hose_for_OF_device(p2pbridge);
+	if (!hose) {
+		DBG("Can't find hose for PCI<->PCI bridge\n");
+		return;
+	}
+	if (early_read_config_word(hose, bus, devfn,
+				   PCI_BRIDGE_CONTROL, &val) < 0) {
+		printk(KERN_ERR "init_p2pbridge: couldn't read bridge control\n");
+		return;
+	}
+	val &= ~PCI_BRIDGE_CTL_MASTER_ABORT;
+	early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val);
+}
+
+/*
+ * Some Apple desktop machines have a NEC PD720100A USB2 controller
+ * on the motherboard. Open Firmware, on these, will disable the
+ * EHCI part of it so it behaves like a pair of OHCI's. This fixup
+ * code re-enables it ;)
+ */
+static void __init
+fixup_nec_usb2(void)
+{
+	struct device_node *nec;
+
+	for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) {
+		struct pci_controller *hose;
+		u32 data, *prop;
+		u8 bus, devfn;
+		
+		prop = (u32 *)get_property(nec, "vendor-id", NULL);
+		if (prop == NULL)
+			continue;
+		if (0x1033 != *prop)
+			continue;
+		prop = (u32 *)get_property(nec, "device-id", NULL);
+		if (prop == NULL)
+			continue;
+		if (0x0035 != *prop)
+			continue;
+		prop = (u32 *)get_property(nec, "reg", NULL);
+		if (prop == NULL)
+			continue;
+		devfn = (prop[0] >> 8) & 0xff;
+		bus = (prop[0] >> 16) & 0xff;
+		if (PCI_FUNC(devfn) != 0)
+			continue;
+		hose = pci_find_hose_for_OF_device(nec);
+		if (!hose)
+			continue;
+		early_read_config_dword(hose, bus, devfn, 0xe4, &data);
+		if (data & 1UL) {
+			printk("Found NEC PD720100A USB2 chip with disabled EHCI, fixing up...\n");
+			data &= ~1UL;
+			early_write_config_dword(hose, bus, devfn, 0xe4, data);
+			early_write_config_byte(hose, bus, devfn | 2, PCI_INTERRUPT_LINE,
+				nec->intrs[0].line);
+		}
+	}
+}
+
+void __init
+pmac_find_bridges(void)
+{
+	struct device_node *np, *root;
+	struct device_node *ht = NULL;
+
+	root = of_find_node_by_path("/");
+	if (root == NULL) {
+		printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n");
+		return;
+	}
+	for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) {
+		if (np->name == NULL)
+			continue;
+		if (strcmp(np->name, "bandit") == 0
+		    || strcmp(np->name, "chaos") == 0
+		    || strcmp(np->name, "pci") == 0) {
+			if (add_bridge(np) == 0)
+				of_node_get(np);
+		}
+		if (strcmp(np->name, "ht") == 0) {
+			of_node_get(np);
+			ht = np;
+		}
+	}
+	of_node_put(root);
+
+	/* Probe HT last as it relies on the agp resources to be already
+	 * setup
+	 */
+	if (ht && add_bridge(ht) != 0)
+		of_node_put(ht);
+
+	init_p2pbridge();
+	fixup_nec_usb2();
+	
+	/* We are still having some issues with the Xserve G4, enabling
+	 * some offset between bus number and domains for now when we
+	 * assign all busses should help for now
+	 */
+	if (pci_assign_all_busses)
+		pcibios_assign_bus_offset = 0x10;
+
+#ifdef CONFIG_POWER4 
+	/* There is something wrong with DMA on U3/HT. I haven't figured out
+	 * the details yet, but if I set the cache line size to 128 bytes like
+	 * it should, I'm getting memory corruption caused by devices like
+	 * sungem (even without the MWI bit set, but maybe sungem doesn't
+	 * care). Right now, it appears that setting up a 64 bytes line size
+	 * works properly, 64 bytes beeing the max transfer size of HT, I
+	 * suppose this is related the way HT/PCI are hooked together. I still
+	 * need to dive into more specs though to be really sure of what's
+	 * going on. --BenH.
+	 *
+	 * Ok, apparently, it's just that HT can't do more than 64 bytes
+	 * transactions. MWI seem to be meaningless there as well, it may
+	 * be worth nop'ing out pci_set_mwi too though I haven't done that
+	 * yet.
+	 *
+	 * Note that it's a bit different for whatever is in the AGP slot.
+	 * For now, I don't care, but this can become a real issue, we
+	 * should probably hook pci_set_mwi anyway to make sure it sets
+	 * the real cache line size in there.
+	 */
+	if (machine_is_compatible("MacRISC4"))
+		pci_cache_line_size = 16; /* 64 bytes */
+
+	pmac_check_ht_link();
+#endif /* CONFIG_POWER4 */
+}
+
+#define GRACKLE_CFA(b, d, o)	(0x80 | ((b) << 8) | ((d) << 16) \
+				 | (((o) & ~3) << 24))
+
+#define GRACKLE_PICR1_STG		0x00000040
+#define GRACKLE_PICR1_LOOPSNOOP		0x00000010
+
+/* N.B. this is called before bridges is initialized, so we can't
+   use grackle_pcibios_{read,write}_config_dword. */
+static inline void grackle_set_stg(struct pci_controller* bp, int enable)
+{
+	unsigned int val;
+
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	val = in_le32(bp->cfg_data);
+	val = enable? (val | GRACKLE_PICR1_STG) :
+		(val & ~GRACKLE_PICR1_STG);
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	out_le32(bp->cfg_data, val);
+	(void)in_le32(bp->cfg_data);
+}
+
+static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable)
+{
+	unsigned int val;
+
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	val = in_le32(bp->cfg_data);
+	val = enable? (val | GRACKLE_PICR1_LOOPSNOOP) :
+		(val & ~GRACKLE_PICR1_LOOPSNOOP);
+	out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8));
+	out_le32(bp->cfg_data, val);
+	(void)in_le32(bp->cfg_data);
+}
+
+static int __init
+setup_uninorth(struct pci_controller* hose, struct reg_property* addr)
+{
+	pci_assign_all_busses = 1;
+	has_uninorth = 1;
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000);
+	/* We "know" that the bridge at f2000000 has the PCI slots. */
+	return addr->address == 0xf2000000;
+}
+
+static void __init
+setup_bandit(struct pci_controller* hose, struct reg_property* addr)
+{
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000);
+	init_bandit(hose);
+}
+
+static void __init
+setup_chaos(struct pci_controller* hose, struct reg_property* addr)
+{
+	/* assume a `chaos' bridge */
+	hose->ops = &chaos_pci_ops;
+	hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000);
+}
+
+#ifdef CONFIG_POWER4
+
+static void __init setup_u3_agp(struct pci_controller* hose)
+{
+	/* On G5, we move AGP up to high bus number so we don't need
+	 * to reassign bus numbers for HT. If we ever have P2P bridges
+	 * on AGP, we'll have to move pci_assign_all_busses to the
+	 * pci_controller structure so we enable it for AGP and not for
+	 * HT childs.
+	 * We hard code the address because of the different size of
+	 * the reg address cell, we shall fix that by killing struct
+	 * reg_property and using some accessor functions instead
+	 */
+       	hose->first_busno = 0xf0;
+	hose->last_busno = 0xff;
+	has_uninorth = 1;
+	hose->ops = &macrisc_pci_ops;
+	hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+	hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+	u3_agp = hose;
+}
+
+static void __init setup_u3_ht(struct pci_controller* hose)
+{
+	struct device_node *np = (struct device_node *)hose->arch_data;
+	int i, cur;
+
+	hose->ops = &u3_ht_pci_ops;
+
+	/* We hard code the address because of the different size of
+	 * the reg address cell, we shall fix that by killing struct
+	 * reg_property and using some accessor functions instead
+	 */
+	hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000);
+
+	/*
+	 * /ht node doesn't expose a "ranges" property, so we "remove" regions that
+	 * have been allocated to AGP. So far, this version of the code doesn't assign
+	 * any of the 0xfxxxxxxx "fine" memory regions to /ht.
+	 * We need to fix that sooner or later by either parsing all child "ranges"
+	 * properties or figuring out the U3 address space decoding logic and
+	 * then read its configuration register (if any).
+	 */
+	hose->io_base_phys = 0xf4000000;
+	hose->io_base_virt = ioremap(hose->io_base_phys, 0x00400000);
+	isa_io_base = (unsigned long) hose->io_base_virt;
+	hose->io_resource.name = np->full_name;
+	hose->io_resource.start = 0;
+	hose->io_resource.end = 0x003fffff;
+	hose->io_resource.flags = IORESOURCE_IO;
+	hose->pci_mem_offset = 0;
+	hose->first_busno = 0;
+	hose->last_busno = 0xef;
+	hose->mem_resources[0].name = np->full_name;
+	hose->mem_resources[0].start = 0x80000000;
+	hose->mem_resources[0].end = 0xefffffff;
+	hose->mem_resources[0].flags = IORESOURCE_MEM;
+
+	if (u3_agp == NULL) {
+		DBG("U3 has no AGP, using full resource range\n");
+		return;
+	}
+
+	/* We "remove" the AGP resources from the resources allocated to HT, that
+	 * is we create "holes". However, that code does assumptions that so far
+	 * happen to be true (cross fingers...), typically that resources in the
+	 * AGP node are properly ordered
+	 */
+	cur = 0;
+	for (i=0; i<3; i++) {
+		struct resource *res = &u3_agp->mem_resources[i];
+		if (res->flags != IORESOURCE_MEM)
+			continue;
+		/* We don't care about "fine" resources */
+		if (res->start >= 0xf0000000)
+			continue;
+		/* Check if it's just a matter of "shrinking" us in one direction */
+		if (hose->mem_resources[cur].start == res->start) {
+			DBG("U3/HT: shrink start of %d, %08lx -> %08lx\n",
+			    cur, hose->mem_resources[cur].start, res->end + 1);
+			hose->mem_resources[cur].start = res->end + 1;
+			continue;
+		}
+		if (hose->mem_resources[cur].end == res->end) {
+			DBG("U3/HT: shrink end of %d, %08lx -> %08lx\n",
+			    cur, hose->mem_resources[cur].end, res->start - 1);
+			hose->mem_resources[cur].end = res->start - 1;
+			continue;
+		}
+		/* No, it's not the case, we need a hole */
+		if (cur == 2) {
+			/* not enough resources to make a hole, we drop part of the range */
+			printk(KERN_WARNING "Running out of resources for /ht host !\n");
+			hose->mem_resources[cur].end = res->start - 1;
+			continue;
+		}		
+		cur++;
+       		DBG("U3/HT: hole, %d end at %08lx, %d start at %08lx\n",
+		    cur-1, res->start - 1, cur, res->end + 1);
+		hose->mem_resources[cur].name = np->full_name;
+		hose->mem_resources[cur].flags = IORESOURCE_MEM;
+		hose->mem_resources[cur].start = res->end + 1;
+		hose->mem_resources[cur].end = hose->mem_resources[cur-1].end;
+		hose->mem_resources[cur-1].end = res->start - 1;
+	}
+}
+
+#endif /* CONFIG_POWER4 */
+
+void __init
+setup_grackle(struct pci_controller *hose)
+{
+	setup_indirect_pci(hose, 0xfec00000, 0xfee00000);
+	if (machine_is_compatible("AAPL,PowerBook1998"))
+		grackle_set_loop_snoop(hose, 1);
+#if 0	/* Disabled for now, HW problems ??? */
+	grackle_set_stg(hose, 1);
+#endif
+}
+
+static void __init pmac_process_bridge_OF_ranges(struct pci_controller *hose,
+			   struct device_node *dev, int primary)
+{
+	static unsigned int static_lc_ranges[2024];
+	unsigned int *dt_ranges, *lc_ranges, *ranges, *prev;
+	unsigned int size;
+	int rlen = 0, orig_rlen;
+	int memno = 0;
+	struct resource *res;
+	int np, na = prom_n_addr_cells(dev);
+
+	np = na + 5;
+
+	/* First we try to merge ranges to fix a problem with some pmacs
+	 * that can have more than 3 ranges, fortunately using contiguous
+	 * addresses -- BenH
+	 */
+	dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen);
+	if (!dt_ranges)
+		return;
+	/*	lc_ranges = alloc_bootmem(rlen);*/
+	lc_ranges = static_lc_ranges;
+	if (!lc_ranges)
+		return; /* what can we do here ? */
+	memcpy(lc_ranges, dt_ranges, rlen);
+	orig_rlen = rlen;
+
+	/* Let's work on a copy of the "ranges" property instead of damaging
+	 * the device-tree image in memory
+	 */
+	ranges = lc_ranges;
+	prev = NULL;
+	while ((rlen -= np * sizeof(unsigned int)) >= 0) {
+		if (prev) {
+			if (prev[0] == ranges[0] && prev[1] == ranges[1] &&
+				(prev[2] + prev[na+4]) == ranges[2] &&
+				(prev[na+2] + prev[na+4]) == ranges[na+2]) {
+				prev[na+4] += ranges[na+4];
+				ranges[0] = 0;
+				ranges += np;
+				continue;
+			}
+		}
+		prev = ranges;
+		ranges += np;
+	}
+
+	/*
+	 * The ranges property is laid out as an array of elements,
+	 * each of which comprises:
+	 *   cells 0 - 2:	a PCI address
+	 *   cells 3 or 3+4:	a CPU physical address
+	 *			(size depending on dev->n_addr_cells)
+	 *   cells 4+5 or 5+6:	the size of the range
+	 */
+	ranges = lc_ranges;
+	rlen = orig_rlen;
+	while (ranges && (rlen -= np * sizeof(unsigned int)) >= 0) {
+		res = NULL;
+		size = ranges[na+4];
+		switch (ranges[0] >> 24) {
+		case 1:		/* I/O space */
+			if (ranges[2] != 0)
+				break;
+			hose->io_base_phys = ranges[na+2];
+			/* limit I/O space to 16MB */
+			if (size > 0x01000000)
+				size = 0x01000000;
+			hose->io_base_virt = ioremap(ranges[na+2], size);
+			if (primary)
+				isa_io_base = (unsigned long) hose->io_base_virt;
+			res = &hose->io_resource;
+			res->flags = IORESOURCE_IO;
+			res->start = ranges[2];
+			break;
+		case 2:		/* memory space */
+			memno = 0;
+			if (ranges[1] == 0 && ranges[2] == 0
+			    && ranges[na+4] <= (16 << 20)) {
+				/* 1st 16MB, i.e. ISA memory area */
+#if 0
+				if (primary)
+					isa_mem_base = ranges[na+2];
+#endif
+				memno = 1;
+			}
+			while (memno < 3 && hose->mem_resources[memno].flags)
+				++memno;
+			if (memno == 0)
+				hose->pci_mem_offset = ranges[na+2] - ranges[2];
+			if (memno < 3) {
+				res = &hose->mem_resources[memno];
+				res->flags = IORESOURCE_MEM;
+				res->start = ranges[na+2];
+			}
+			break;
+		}
+		if (res != NULL) {
+			res->name = dev->full_name;
+			res->end = res->start + size - 1;
+			res->parent = NULL;
+			res->sibling = NULL;
+			res->child = NULL;
+		}
+		ranges += np;
+	}
+}
+
+/*
+ * We assume that if we have a G3 powermac, we have one bridge called
+ * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise,
+ * if we have one or more bandit or chaos bridges, we don't have a MPC106.
+ */
+static int __init add_bridge(struct device_node *dev)
+{
+	int len;
+	struct pci_controller *hose;
+	struct reg_property *addr;
+	char* disp_name;
+	int *bus_range;
+	int primary = 1;
+
+	DBG("Adding PCI host bridge %s\n", dev->full_name);
+
+       	addr = (struct reg_property *) get_property(dev, "reg", &len);
+       	if (addr == NULL || len < sizeof(*addr)) {
+       		printk(KERN_WARNING "Can't use %s: no address\n",
+       		       dev->full_name);
+       		return -ENODEV;
+       	}
+       	bus_range = (int *) get_property(dev, "bus-range", &len);
+       	if (bus_range == NULL || len < 2 * sizeof(int)) {
+       		printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n",
+       			       dev->full_name);
+       	}
+
+       	hose = pcibios_alloc_controller();
+       	if (!hose)
+       		return -ENOMEM;
+       	hose->arch_data = dev;
+       	hose->first_busno = bus_range ? bus_range[0] : 0;
+       	hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+	disp_name = NULL;
+#ifdef CONFIG_POWER4
+       	if (device_is_compatible(dev, "u3-agp")) {
+       		setup_u3_agp(hose, addr);
+       		disp_name = "U3-AGP";
+       		primary = 0;
+       	} else if (device_is_compatible(dev, "u3-ht")) {
+       		setup_u3_ht(hose, addr);
+       		disp_name = "U3-HT";
+       		primary = 1;
+       	} else
+#endif /* CONFIG_POWER4 */
+	if (device_is_compatible(dev, "uni-north")) {
+       		primary = setup_uninorth(hose, addr);
+       		disp_name = "UniNorth";
+       	} else if (strcmp(dev->name, "pci") == 0) {
+       		/* XXX assume this is a mpc106 (grackle) */
+       		setup_grackle(hose);
+       		disp_name = "Grackle (MPC106)";
+       	} else if (strcmp(dev->name, "bandit") == 0) {
+       		setup_bandit(hose, addr);
+       		disp_name = "Bandit";
+       	} else if (strcmp(dev->name, "chaos") == 0) {
+       		setup_chaos(hose, addr);
+       		disp_name = "Chaos";
+       		primary = 0;
+       	}
+       	printk(KERN_INFO "Found %s PCI host bridge at 0x%08x. Firmware bus number: %d->%d\n",
+       		disp_name, addr->address, hose->first_busno, hose->last_busno);
+       	DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
+       		hose, hose->cfg_addr, hose->cfg_data);
+
+       	/* Interpret the "ranges" property */
+       	/* This also maps the I/O region and sets isa_io/mem_base */
+       	pci_process_bridge_OF_ranges(hose, dev, primary);
+
+       	/* Fixup "bus-range" OF property */
+       	fixup_bus_range(dev);
+
+	return 0;
+}
+
+static void __init
+pcibios_fixup_OF_interrupts(void)
+{
+	struct pci_dev* dev = NULL;
+
+	/*
+	 * Open Firmware often doesn't initialize the
+	 * PCI_INTERRUPT_LINE config register properly, so we
+	 * should find the device node and apply the interrupt
+	 * obtained from the OF device-tree
+	 */
+	for_each_pci_dev(dev) {
+		struct device_node *node;
+		node = pci_device_to_OF_node(dev);
+		/* this is the node, see if it has interrupts */
+		if (node && node->n_intrs > 0)
+			dev->irq = node->intrs[0].line;
+		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+	}
+}
+
+void __init
+pmac_pcibios_fixup(void)
+{
+	/* Fixup interrupts according to OF tree */
+	pcibios_fixup_OF_interrupts();
+}
+
+int
+pmac_pci_enable_device_hook(struct pci_dev *dev, int initial)
+{
+	struct device_node* node;
+	int updatecfg = 0;
+	int uninorth_child;
+
+	node = pci_device_to_OF_node(dev);
+
+	/* We don't want to enable USB controllers absent from the OF tree
+	 * (iBook second controller)
+	 */
+	if (dev->vendor == PCI_VENDOR_ID_APPLE
+	    && (dev->class == ((PCI_CLASS_SERIAL_USB << 8) | 0x10))
+	    && !node) {
+		printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n",
+		       pci_name(dev));
+		return -EINVAL;
+	}
+
+	if (!node)
+		return 0;
+
+	uninorth_child = node->parent &&
+		device_is_compatible(node->parent, "uni-north");
+	
+	/* Firewire & GMAC were disabled after PCI probe, the driver is
+	 * claiming them, we must re-enable them now.
+	 */
+	if (uninorth_child && !strcmp(node->name, "firewire") &&
+	    (device_is_compatible(node, "pci106b,18") ||
+	     device_is_compatible(node, "pci106b,30") ||
+	     device_is_compatible(node, "pci11c1,5811"))) {
+		pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, node, 0, 1);
+		pmac_call_feature(PMAC_FTR_1394_ENABLE, node, 0, 1);
+		updatecfg = 1;
+	}
+	if (uninorth_child && !strcmp(node->name, "ethernet") &&
+	    device_is_compatible(node, "gmac")) {
+		pmac_call_feature(PMAC_FTR_GMAC_ENABLE, node, 0, 1);
+		updatecfg = 1;
+	}
+
+	if (updatecfg) {
+		u16 cmd;
+	
+		/*
+		 * Make sure PCI is correctly configured
+		 *
+		 * We use old pci_bios versions of the function since, by
+		 * default, gmac is not powered up, and so will be absent
+		 * from the kernel initial PCI lookup.
+		 *
+		 * Should be replaced by 2.4 new PCI mechanisms and really
+		 * register the device.
+		 */
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE;
+    		pci_write_config_word(dev, PCI_COMMAND, cmd);
+    		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16);
+    		pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, pci_cache_line_size);
+	}
+
+	return 0;
+}
+
+/* We power down some devices after they have been probed. They'll
+ * be powered back on later on
+ */
+void __init
+pmac_pcibios_after_init(void)
+{
+	struct device_node* nd;
+
+#ifdef CONFIG_BLK_DEV_IDE
+	struct pci_dev *dev = NULL;
+
+	/* OF fails to initialize IDE controllers on macs
+	 * (and maybe other machines)
+	 *
+	 * Ideally, this should be moved to the IDE layer, but we need
+	 * to check specifically with Andre Hedrick how to do it cleanly
+	 * since the common IDE code seem to care about the fact that the
+	 * BIOS may have disabled a controller.
+	 *
+	 * -- BenH
+	 */
+	for_each_pci_dev(dev) {
+		if ((dev->class >> 16) == PCI_BASE_CLASS_STORAGE)
+			pci_enable_device(dev);
+	}
+#endif /* CONFIG_BLK_DEV_IDE */
+
+	nd = find_devices("firewire");
+	while (nd) {
+		if (nd->parent && (device_is_compatible(nd, "pci106b,18") ||
+				   device_is_compatible(nd, "pci106b,30") ||
+				   device_is_compatible(nd, "pci11c1,5811"))
+		    && device_is_compatible(nd->parent, "uni-north")) {
+			pmac_call_feature(PMAC_FTR_1394_ENABLE, nd, 0, 0);
+			pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, nd, 0, 0);
+		}
+		nd = nd->next;
+	}
+	nd = find_devices("ethernet");
+	while (nd) {
+		if (nd->parent && device_is_compatible(nd, "gmac")
+		    && device_is_compatible(nd->parent, "uni-north"))
+			pmac_call_feature(PMAC_FTR_GMAC_ENABLE, nd, 0, 0);
+		nd = nd->next;
+	}
+}
+
+#ifdef CONFIG_PPC64
+static void __init pmac_fixup_phb_resources(void)
+{
+	struct pci_controller *hose, *tmp;
+	
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
+		hose->io_resource.start += offset;
+		hose->io_resource.end += offset;
+		printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
+		       hose->global_number,
+		       hose->io_resource.start, hose->io_resource.end);
+	}
+}
+
+void __init pmac_pci_init(void)
+{
+	struct device_node *np, *root;
+	struct device_node *ht = NULL;
+
+	/* Probe root PCI hosts, that is on U3 the AGP host and the
+	 * HyperTransport host. That one is actually "kept" around
+	 * and actually added last as it's resource management relies
+	 * on the AGP resources to have been setup first
+	 */
+	root = of_find_node_by_path("/");
+	if (root == NULL) {
+		printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n");
+		return;
+	}
+	for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) {
+		if (np->name == NULL)
+			continue;
+		if (strcmp(np->name, "pci") == 0) {
+			if (add_bridge(np) == 0)
+				of_node_get(np);
+		}
+		if (strcmp(np->name, "ht") == 0) {
+			of_node_get(np);
+			ht = np;
+		}
+	}
+	of_node_put(root);
+
+	/* Now setup the HyperTransport host if we found any
+	 */
+	if (ht && add_bridge(ht) != 0)
+		of_node_put(ht);
+
+	/* Fixup the IO resources on our host bridges as the common code
+	 * does it only for childs of the host bridges
+	 */
+	pmac_fixup_phb_resources();
+
+	/* Setup the linkage between OF nodes and PHBs */ 
+	pci_devs_phb_init();
+
+	/* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
+	 * assume there is no P2P bridge on the AGP bus, which should be a
+	 * safe assumptions hopefully.
+	 */
+	if (u3_agp) {
+		struct device_node *np = u3_agp->arch_data;
+		PCI_DN(np)->busno = 0xf0;
+		for (np = np->child; np; np = np->sibling)
+			PCI_DN(np)->busno = 0xf0;
+	}
+
+	pmac_check_ht_link();
+
+	/* Tell pci.c to not use the common resource allocation mecanism */
+	pci_probe_only = 1;
+	
+	/* Allow all IO */
+	io_page_mask = -1;
+}
+#endif
+
+#ifdef CONFIG_PPC32
+void pmac_pci_fixup_cardbus(struct pci_dev* dev)
+{
+	if (_machine != _MACH_Pmac)
+		return;
+	/*
+	 * Fix the interrupt routing on the various cardbus bridges
+	 * used on powerbooks
+	 */
+	if (dev->vendor != PCI_VENDOR_ID_TI)
+		return;
+	if (dev->device == PCI_DEVICE_ID_TI_1130 ||
+	    dev->device == PCI_DEVICE_ID_TI_1131) {
+		u8 val;
+	    	/* Enable PCI interrupt */
+		if (pci_read_config_byte(dev, 0x91, &val) == 0)
+			pci_write_config_byte(dev, 0x91, val | 0x30);
+		/* Disable ISA interrupt mode */
+		if (pci_read_config_byte(dev, 0x92, &val) == 0)
+			pci_write_config_byte(dev, 0x92, val & ~0x06);
+	}
+	if (dev->device == PCI_DEVICE_ID_TI_1210 ||
+	    dev->device == PCI_DEVICE_ID_TI_1211 ||
+	    dev->device == PCI_DEVICE_ID_TI_1410 ||
+	    dev->device == PCI_DEVICE_ID_TI_1510) {
+		u8 val;
+		/* 0x8c == TI122X_IRQMUX, 2 says to route the INTA
+		   signal out the MFUNC0 pin */
+		if (pci_read_config_byte(dev, 0x8c, &val) == 0)
+			pci_write_config_byte(dev, 0x8c, (val & ~0x0f) | 2);
+		/* Disable ISA interrupt mode */
+		if (pci_read_config_byte(dev, 0x92, &val) == 0)
+			pci_write_config_byte(dev, 0x92, val & ~0x06);
+	}
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus);
+
+void pmac_pci_fixup_pciata(struct pci_dev* dev)
+{
+       u8 progif = 0;
+
+       /*
+        * On PowerMacs, we try to switch any PCI ATA controller to
+	* fully native mode
+        */
+	if (_machine != _MACH_Pmac)
+		return;
+	/* Some controllers don't have the class IDE */
+	if (dev->vendor == PCI_VENDOR_ID_PROMISE)
+		switch(dev->device) {
+		case PCI_DEVICE_ID_PROMISE_20246:
+		case PCI_DEVICE_ID_PROMISE_20262:
+		case PCI_DEVICE_ID_PROMISE_20263:
+		case PCI_DEVICE_ID_PROMISE_20265:
+		case PCI_DEVICE_ID_PROMISE_20267:
+		case PCI_DEVICE_ID_PROMISE_20268:
+		case PCI_DEVICE_ID_PROMISE_20269:
+		case PCI_DEVICE_ID_PROMISE_20270:
+		case PCI_DEVICE_ID_PROMISE_20271:
+		case PCI_DEVICE_ID_PROMISE_20275:
+		case PCI_DEVICE_ID_PROMISE_20276:
+		case PCI_DEVICE_ID_PROMISE_20277:
+			goto good;
+		}
+	/* Others, check PCI class */
+	if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+		return;
+ good:
+	pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
+	if ((progif & 5) != 5) {
+		printk(KERN_INFO "Forcing PCI IDE into native mode: %s\n", pci_name(dev));
+		(void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
+		if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) ||
+		    (progif & 5) != 5)
+			printk(KERN_ERR "Rewrite of PROGIF failed !\n");
+	}
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pmac_pci_fixup_pciata);
+#endif
+
+/*
+ * Disable second function on K2-SATA, it's broken
+ * and disable IO BARs on first one
+ */
+static void fixup_k2_sata(struct pci_dev* dev)
+{
+	int i;
+	u16 cmd;
+
+	if (PCI_FUNC(dev->devfn) > 0) {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+		for (i = 0; i < 6; i++) {
+			dev->resource[i].start = dev->resource[i].end = 0;
+			dev->resource[i].flags = 0;
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0);
+		}
+	} else {
+		pci_read_config_word(dev, PCI_COMMAND, &cmd);
+		cmd &= ~PCI_COMMAND_IO;
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+		for (i = 0; i < 5; i++) {
+			dev->resource[i].start = dev->resource[i].end = 0;
+			dev->resource[i].flags = 0;
+			pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0);
+		}
+	}
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata);
+
diff --git a/arch/powerpc/platforms/powermac/pmac_pic.c b/arch/powerpc/platforms/powermac/pmac_pic.c
new file mode 100644
index 00000000000..bf3e1899a4c
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_pic.c
@@ -0,0 +1,655 @@
+/*
+ *  Support for the interrupt controllers found on Power Macintosh,
+ *  currently Apple's "Grand Central" interrupt controller in all
+ *  it's incarnations. OpenPIC support used on newer machines is
+ *  in a separate file
+ *
+ *  Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ *
+ *  Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/sysdev.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+#include <asm/time.h>
+#include <asm/open_pic.h>
+#include <asm/xmon.h>
+#include <asm/pmac_feature.h>
+#include <asm/mpic.h>
+
+#include "pmac_pic.h"
+
+/*
+ * XXX this should be in xmon.h, but putting it there means xmon.h
+ * has to include <linux/interrupt.h> (to get irqreturn_t), which
+ * causes all sorts of problems.  -- paulus
+ */
+extern irqreturn_t xmon_irq(int, void *, struct pt_regs *);
+
+struct pmac_irq_hw {
+        unsigned int    event;
+        unsigned int    enable;
+        unsigned int    ack;
+        unsigned int    level;
+};
+
+/* Default addresses */
+static volatile struct pmac_irq_hw *pmac_irq_hw[4] = {
+        (struct pmac_irq_hw *) 0xf3000020,
+        (struct pmac_irq_hw *) 0xf3000010,
+        (struct pmac_irq_hw *) 0xf4000020,
+        (struct pmac_irq_hw *) 0xf4000010,
+};
+
+#define GC_LEVEL_MASK		0x3ff00000
+#define OHARE_LEVEL_MASK	0x1ff00000
+#define HEATHROW_LEVEL_MASK	0x1ff00000
+
+static int max_irqs;
+static int max_real_irqs;
+static u32 level_mask[4];
+
+static DEFINE_SPINLOCK(pmac_pic_lock);
+
+
+#define GATWICK_IRQ_POOL_SIZE        10
+static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE];
+
+/*
+ * Mark an irq as "lost".  This is only used on the pmac
+ * since it can lose interrupts (see pmac_set_irq_mask).
+ * -- Cort
+ */
+void
+__set_lost(unsigned long irq_nr, int nokick)
+{
+	if (!test_and_set_bit(irq_nr, ppc_lost_interrupts)) {
+		atomic_inc(&ppc_n_lost_interrupts);
+		if (!nokick)
+			set_dec(1);
+	}
+}
+
+static void
+pmac_mask_and_ack_irq(unsigned int irq_nr)
+{
+        unsigned long bit = 1UL << (irq_nr & 0x1f);
+        int i = irq_nr >> 5;
+        unsigned long flags;
+
+        if ((unsigned)irq_nr >= max_irqs)
+                return;
+
+        clear_bit(irq_nr, ppc_cached_irq_mask);
+        if (test_and_clear_bit(irq_nr, ppc_lost_interrupts))
+                atomic_dec(&ppc_n_lost_interrupts);
+	spin_lock_irqsave(&pmac_pic_lock, flags);
+        out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
+        out_le32(&pmac_irq_hw[i]->ack, bit);
+        do {
+                /* make sure ack gets to controller before we enable
+                   interrupts */
+                mb();
+        } while((in_le32(&pmac_irq_hw[i]->enable) & bit)
+                != (ppc_cached_irq_mask[i] & bit));
+	spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
+{
+        unsigned long bit = 1UL << (irq_nr & 0x1f);
+        int i = irq_nr >> 5;
+        unsigned long flags;
+
+        if ((unsigned)irq_nr >= max_irqs)
+                return;
+
+	spin_lock_irqsave(&pmac_pic_lock, flags);
+        /* enable unmasked interrupts */
+        out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
+
+        do {
+                /* make sure mask gets to controller before we
+                   return to user */
+                mb();
+        } while((in_le32(&pmac_irq_hw[i]->enable) & bit)
+                != (ppc_cached_irq_mask[i] & bit));
+
+        /*
+         * Unfortunately, setting the bit in the enable register
+         * when the device interrupt is already on *doesn't* set
+         * the bit in the flag register or request another interrupt.
+         */
+        if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level))
+		__set_lost((ulong)irq_nr, nokicklost);
+	spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+/* When an irq gets requested for the first client, if it's an
+ * edge interrupt, we clear any previous one on the controller
+ */
+static unsigned int pmac_startup_irq(unsigned int irq_nr)
+{
+        unsigned long bit = 1UL << (irq_nr & 0x1f);
+        int i = irq_nr >> 5;
+
+	if ((irq_desc[irq_nr].status & IRQ_LEVEL) == 0)
+		out_le32(&pmac_irq_hw[i]->ack, bit);
+        set_bit(irq_nr, ppc_cached_irq_mask);
+        pmac_set_irq_mask(irq_nr, 0);
+
+	return 0;
+}
+
+static void pmac_mask_irq(unsigned int irq_nr)
+{
+        clear_bit(irq_nr, ppc_cached_irq_mask);
+        pmac_set_irq_mask(irq_nr, 0);
+        mb();
+}
+
+static void pmac_unmask_irq(unsigned int irq_nr)
+{
+        set_bit(irq_nr, ppc_cached_irq_mask);
+        pmac_set_irq_mask(irq_nr, 0);
+}
+
+static void pmac_end_irq(unsigned int irq_nr)
+{
+	if (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS))
+	    && irq_desc[irq_nr].action) {
+        	set_bit(irq_nr, ppc_cached_irq_mask);
+	        pmac_set_irq_mask(irq_nr, 1);
+	}
+}
+
+
+struct hw_interrupt_type pmac_pic = {
+	.typename	= " PMAC-PIC ",
+	.startup	= pmac_startup_irq,
+	.enable		= pmac_unmask_irq,
+	.disable	= pmac_mask_irq,
+	.ack		= pmac_mask_and_ack_irq,
+	.end		= pmac_end_irq,
+};
+
+struct hw_interrupt_type gatwick_pic = {
+	.typename	= " GATWICK  ",
+	.startup	= pmac_startup_irq,
+	.enable		= pmac_unmask_irq,
+	.disable	= pmac_mask_irq,
+	.ack		= pmac_mask_and_ack_irq,
+	.end		= pmac_end_irq,
+};
+
+static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs)
+{
+	int irq, bits;
+
+	for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) {
+		int i = irq >> 5;
+		bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
+		/* We must read level interrupts from the level register */
+		bits |= (in_le32(&pmac_irq_hw[i]->level) & level_mask[i]);
+		bits &= ppc_cached_irq_mask[i];
+		if (bits == 0)
+			continue;
+		irq += __ilog2(bits);
+		__do_IRQ(irq, regs);
+		return IRQ_HANDLED;
+	}
+	printk("gatwick irq not from gatwick pic\n");
+	return IRQ_NONE;
+}
+
+int
+pmac_get_irq(struct pt_regs *regs)
+{
+	int irq;
+	unsigned long bits = 0;
+
+#ifdef CONFIG_SMP
+	void psurge_smp_message_recv(struct pt_regs *);
+
+       	/* IPI's are a hack on the powersurge -- Cort */
+       	if ( smp_processor_id() != 0 ) {
+		psurge_smp_message_recv(regs);
+		return -2;	/* ignore, already handled */
+        }
+#endif /* CONFIG_SMP */
+	for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
+		int i = irq >> 5;
+		bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
+		/* We must read level interrupts from the level register */
+		bits |= (in_le32(&pmac_irq_hw[i]->level) & level_mask[i]);
+		bits &= ppc_cached_irq_mask[i];
+		if (bits == 0)
+			continue;
+		irq += __ilog2(bits);
+		break;
+	}
+
+	return irq;
+}
+
+/* This routine will fix some missing interrupt values in the device tree
+ * on the gatwick mac-io controller used by some PowerBooks
+ */
+static void __init
+pmac_fix_gatwick_interrupts(struct device_node *gw, int irq_base)
+{
+	struct device_node *node;
+	int count;
+
+	memset(gatwick_int_pool, 0, sizeof(gatwick_int_pool));
+	node = gw->child;
+	count = 0;
+	while(node)
+	{
+		/* Fix SCC */
+		if (strcasecmp(node->name, "escc") == 0)
+			if (node->child) {
+				if (node->child->n_intrs < 3) {
+					node->child->intrs = &gatwick_int_pool[count];
+					count += 3;
+				}
+				node->child->n_intrs = 3;
+				node->child->intrs[0].line = 15+irq_base;
+				node->child->intrs[1].line =  4+irq_base;
+				node->child->intrs[2].line =  5+irq_base;
+				printk(KERN_INFO "irq: fixed SCC on second controller (%d,%d,%d)\n",
+					node->child->intrs[0].line,
+					node->child->intrs[1].line,
+					node->child->intrs[2].line);
+			}
+		/* Fix media-bay & left SWIM */
+		if (strcasecmp(node->name, "media-bay") == 0) {
+			struct device_node* ya_node;
+
+			if (node->n_intrs == 0)
+				node->intrs = &gatwick_int_pool[count++];
+			node->n_intrs = 1;
+			node->intrs[0].line = 29+irq_base;
+			printk(KERN_INFO "irq: fixed media-bay on second controller (%d)\n",
+					node->intrs[0].line);
+
+			ya_node = node->child;
+			while(ya_node)
+			{
+				if (strcasecmp(ya_node->name, "floppy") == 0) {
+					if (ya_node->n_intrs < 2) {
+						ya_node->intrs = &gatwick_int_pool[count];
+						count += 2;
+					}
+					ya_node->n_intrs = 2;
+					ya_node->intrs[0].line = 19+irq_base;
+					ya_node->intrs[1].line =  1+irq_base;
+					printk(KERN_INFO "irq: fixed floppy on second controller (%d,%d)\n",
+						ya_node->intrs[0].line, ya_node->intrs[1].line);
+				}
+				if (strcasecmp(ya_node->name, "ata4") == 0) {
+					if (ya_node->n_intrs < 2) {
+						ya_node->intrs = &gatwick_int_pool[count];
+						count += 2;
+					}
+					ya_node->n_intrs = 2;
+					ya_node->intrs[0].line = 14+irq_base;
+					ya_node->intrs[1].line =  3+irq_base;
+					printk(KERN_INFO "irq: fixed ide on second controller (%d,%d)\n",
+						ya_node->intrs[0].line, ya_node->intrs[1].line);
+				}
+				ya_node = ya_node->sibling;
+			}
+		}
+		node = node->sibling;
+	}
+	if (count > 10) {
+		printk("WARNING !! Gatwick interrupt pool overflow\n");
+		printk("  GATWICK_IRQ_POOL_SIZE = %d\n", GATWICK_IRQ_POOL_SIZE);
+		printk("              requested = %d\n", count);
+	}
+}
+
+/*
+ * The PowerBook 3400/2400/3500 can have a combo ethernet/modem
+ * card which includes an ohare chip that acts as a second interrupt
+ * controller.  If we find this second ohare, set it up and fix the
+ * interrupt value in the device tree for the ethernet chip.
+ */
+static int __init enable_second_ohare(void)
+{
+	unsigned char bus, devfn;
+	unsigned short cmd;
+        unsigned long addr;
+	struct device_node *irqctrler = find_devices("pci106b,7");
+	struct device_node *ether;
+
+	if (irqctrler == NULL || irqctrler->n_addrs <= 0)
+		return -1;
+	addr = (unsigned long) ioremap(irqctrler->addrs[0].address, 0x40);
+	pmac_irq_hw[1] = (volatile struct pmac_irq_hw *)(addr + 0x20);
+	max_irqs = 64;
+	if (pci_device_from_OF_node(irqctrler, &bus, &devfn) == 0) {
+		struct pci_controller* hose = pci_find_hose_for_OF_device(irqctrler);
+		if (!hose)
+		    printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
+		else {
+		    early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
+		    cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+	  	    cmd &= ~PCI_COMMAND_IO;
+		    early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
+		}
+	}
+
+	/* Fix interrupt for the modem/ethernet combo controller. The number
+	   in the device tree (27) is bogus (correct for the ethernet-only
+	   board but not the combo ethernet/modem board).
+	   The real interrupt is 28 on the second controller -> 28+32 = 60.
+	*/
+	ether = find_devices("pci1011,14");
+	if (ether && ether->n_intrs > 0) {
+		ether->intrs[0].line = 60;
+		printk(KERN_INFO "irq: Fixed ethernet IRQ to %d\n",
+		       ether->intrs[0].line);
+	}
+
+	/* Return the interrupt number of the cascade */
+	return irqctrler->intrs[0].line;
+}
+
+static int pmac_u3_cascade(struct pt_regs *regs, void *data)
+{
+	return mpic_get_one_irq((struct mpic *)data, regs);
+}
+
+#ifdef CONFIG_XMON
+static struct irqaction xmon_action = {
+	.handler	= xmon_irq,
+	.flags		= 0,
+	.mask		= CPU_MASK_NONE,
+	.name		= "NMI - XMON"
+};
+#endif
+
+static struct irqaction gatwick_cascade_action = {
+	.handler	= gatwick_action,
+	.flags		= SA_INTERRUPT,
+	.mask		= CPU_MASK_NONE,
+	.name		= "cascade",
+};
+
+void __init pmac_pic_init(void)
+{
+        int i;
+        struct device_node *irqctrler  = NULL;
+        struct device_node *irqctrler2 = NULL;
+	struct device_node *np;
+        unsigned long addr;
+	int irq_cascade = -1;
+	struct mpic *mpic1, *mpic2;
+
+	/* We first try to detect Apple's new Core99 chipset, since mac-io
+	 * is quite different on those machines and contains an IBM MPIC2.
+	 */
+	np = find_type_devices("open-pic");
+	while (np) {
+		if (np->parent && !strcmp(np->parent->name, "u3"))
+			irqctrler2 = np;
+		else
+			irqctrler = np;
+		np = np->next;
+	}
+	if (irqctrler != NULL && irqctrler->n_addrs > 0) {
+		unsigned char senses[128];
+
+		printk(KERN_INFO "PowerMac using OpenPIC irq controller at 0x%08x\n",
+		       (unsigned int)irqctrler->addrs[0].address);
+
+		prom_get_irq_senses(senses, 0, 128);
+		mpic1 = mpic_alloc(irqctrler->addrs[0].address,
+				   MPIC_PRIMARY | MPIC_WANTS_RESET,
+				   0, 0, 128, 256, senses, 128, " K2-MPIC  ");
+		BUG_ON(mpic1 == NULL);
+		mpic_init(mpic1);		
+
+		if (irqctrler2 != NULL && irqctrler2->n_intrs > 0 &&
+		    irqctrler2->n_addrs > 0) {
+			printk(KERN_INFO "Slave OpenPIC at 0x%08x hooked on IRQ %d\n",
+			       (u32)irqctrler2->addrs[0].address,
+			       irqctrler2->intrs[0].line);
+
+			pmac_call_feature(PMAC_FTR_ENABLE_MPIC, irqctrler2, 0, 0);
+			prom_get_irq_senses(senses, 128, 128 + 128);
+
+			/* We don't need to set MPIC_BROKEN_U3 here since we don't have
+			 * hypertransport interrupts routed to it
+			 */
+			mpic2 = mpic_alloc(irqctrler2->addrs[0].address,
+					   MPIC_BIG_ENDIAN | MPIC_WANTS_RESET,
+					   0, 128, 128, 0, senses, 128, " U3-MPIC  ");
+			BUG_ON(mpic2 == NULL);
+			mpic_init(mpic2);
+			mpic_setup_cascade(irqctrler2->intrs[0].line,
+					   pmac_u3_cascade, mpic2);
+		}
+	}
+
+	/* Get the level/edge settings, assume if it's not
+	 * a Grand Central nor an OHare, then it's an Heathrow
+	 * (or Paddington).
+	 */
+	if (find_devices("gc"))
+		level_mask[0] = GC_LEVEL_MASK;
+	else if (find_devices("ohare")) {
+		level_mask[0] = OHARE_LEVEL_MASK;
+		/* We might have a second cascaded ohare */
+		level_mask[1] = OHARE_LEVEL_MASK;
+	} else {
+		level_mask[0] = HEATHROW_LEVEL_MASK;
+		level_mask[1] = 0;
+		/* We might have a second cascaded heathrow */
+		level_mask[2] = HEATHROW_LEVEL_MASK;
+		level_mask[3] = 0;
+	}
+
+	/*
+	 * G3 powermacs and 1999 G3 PowerBooks have 64 interrupts,
+	 * 1998 G3 Series PowerBooks have 128,
+	 * other powermacs have 32.
+	 * The combo ethernet/modem card for the Powerstar powerbooks
+	 * (2400/3400/3500, ohare based) has a second ohare chip
+	 * effectively making a total of 64.
+	 */
+	max_irqs = max_real_irqs = 32;
+	irqctrler = find_devices("mac-io");
+	if (irqctrler)
+	{
+		max_real_irqs = 64;
+		if (irqctrler->next)
+			max_irqs = 128;
+		else
+			max_irqs = 64;
+	}
+	for ( i = 0; i < max_real_irqs ; i++ )
+		irq_desc[i].handler = &pmac_pic;
+
+	/* get addresses of first controller */
+	if (irqctrler) {
+		if  (irqctrler->n_addrs > 0) {
+			addr = (unsigned long)
+				ioremap(irqctrler->addrs[0].address, 0x40);
+			for (i = 0; i < 2; ++i)
+				pmac_irq_hw[i] = (volatile struct pmac_irq_hw*)
+					(addr + (2 - i) * 0x10);
+		}
+
+		/* get addresses of second controller */
+		irqctrler = irqctrler->next;
+		if (irqctrler && irqctrler->n_addrs > 0) {
+			addr = (unsigned long)
+				ioremap(irqctrler->addrs[0].address, 0x40);
+			for (i = 2; i < 4; ++i)
+				pmac_irq_hw[i] = (volatile struct pmac_irq_hw*)
+					(addr + (4 - i) * 0x10);
+			irq_cascade = irqctrler->intrs[0].line;
+			if (device_is_compatible(irqctrler, "gatwick"))
+				pmac_fix_gatwick_interrupts(irqctrler, max_real_irqs);
+		}
+	} else {
+		/* older powermacs have a GC (grand central) or ohare at
+		   f3000000, with interrupt control registers at f3000020. */
+		addr = (unsigned long) ioremap(0xf3000000, 0x40);
+		pmac_irq_hw[0] = (volatile struct pmac_irq_hw *) (addr + 0x20);
+	}
+
+	/* PowerBooks 3400 and 3500 can have a second controller in a second
+	   ohare chip, on the combo ethernet/modem card */
+	if (machine_is_compatible("AAPL,3400/2400")
+	     || machine_is_compatible("AAPL,3500"))
+		irq_cascade = enable_second_ohare();
+
+	/* disable all interrupts in all controllers */
+	for (i = 0; i * 32 < max_irqs; ++i)
+		out_le32(&pmac_irq_hw[i]->enable, 0);
+	/* mark level interrupts */
+	for (i = 0; i < max_irqs; i++)
+		if (level_mask[i >> 5] & (1UL << (i & 0x1f)))
+			irq_desc[i].status = IRQ_LEVEL;
+
+	/* get interrupt line of secondary interrupt controller */
+	if (irq_cascade >= 0) {
+		printk(KERN_INFO "irq: secondary controller on irq %d\n",
+			(int)irq_cascade);
+		for ( i = max_real_irqs ; i < max_irqs ; i++ )
+			irq_desc[i].handler = &gatwick_pic;
+		setup_irq(irq_cascade, &gatwick_cascade_action);
+	}
+	printk("System has %d possible interrupts\n", max_irqs);
+	if (max_irqs != max_real_irqs)
+		printk(KERN_DEBUG "%d interrupts on main controller\n",
+			max_real_irqs);
+
+#ifdef CONFIG_XMON
+	setup_irq(20, &xmon_action);
+#endif	/* CONFIG_XMON */
+}
+
+#ifdef CONFIG_PM
+/*
+ * These procedures are used in implementing sleep on the powerbooks.
+ * sleep_save_intrs() saves the states of all interrupt enables
+ * and disables all interrupts except for the nominated one.
+ * sleep_restore_intrs() restores the states of all interrupt enables.
+ */
+unsigned long sleep_save_mask[2];
+
+/* This used to be passed by the PMU driver but that link got
+ * broken with the new driver model. We use this tweak for now...
+ */
+static int pmacpic_find_viaint(void)
+{
+	int viaint = -1;
+
+#ifdef CONFIG_ADB_PMU
+	struct device_node *np;
+
+	if (pmu_get_model() != PMU_OHARE_BASED)
+		goto not_found;
+	np = of_find_node_by_name(NULL, "via-pmu");
+	if (np == NULL)
+		goto not_found;
+	viaint = np->intrs[0].line;
+#endif /* CONFIG_ADB_PMU */
+
+not_found:
+	return viaint;
+}
+
+static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state)
+{
+	int viaint = pmacpic_find_viaint();
+
+	sleep_save_mask[0] = ppc_cached_irq_mask[0];
+	sleep_save_mask[1] = ppc_cached_irq_mask[1];
+	ppc_cached_irq_mask[0] = 0;
+	ppc_cached_irq_mask[1] = 0;
+	if (viaint > 0)
+		set_bit(viaint, ppc_cached_irq_mask);
+	out_le32(&pmac_irq_hw[0]->enable, ppc_cached_irq_mask[0]);
+	if (max_real_irqs > 32)
+		out_le32(&pmac_irq_hw[1]->enable, ppc_cached_irq_mask[1]);
+	(void)in_le32(&pmac_irq_hw[0]->event);
+	/* make sure mask gets to controller before we return to caller */
+	mb();
+        (void)in_le32(&pmac_irq_hw[0]->enable);
+
+        return 0;
+}
+
+static int pmacpic_resume(struct sys_device *sysdev)
+{
+	int i;
+
+	out_le32(&pmac_irq_hw[0]->enable, 0);
+	if (max_real_irqs > 32)
+		out_le32(&pmac_irq_hw[1]->enable, 0);
+	mb();
+	for (i = 0; i < max_real_irqs; ++i)
+		if (test_bit(i, sleep_save_mask))
+			pmac_unmask_irq(i);
+
+	return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static struct sysdev_class pmacpic_sysclass = {
+	set_kset_name("pmac_pic"),
+};
+
+static struct sys_device device_pmacpic = {
+	.id		= 0,
+	.cls		= &pmacpic_sysclass,
+};
+
+static struct sysdev_driver driver_pmacpic = {
+#ifdef CONFIG_PM
+	.suspend	= &pmacpic_suspend,
+	.resume		= &pmacpic_resume,
+#endif /* CONFIG_PM */
+};
+
+static int __init init_pmacpic_sysfs(void)
+{
+	if (max_irqs == 0)
+		return -ENODEV;
+
+	printk(KERN_DEBUG "Registering pmac pic with sysfs...\n");
+	sysdev_class_register(&pmacpic_sysclass);
+	sysdev_register(&device_pmacpic);
+	sysdev_driver_register(&pmacpic_sysclass, &driver_pmacpic);
+	return 0;
+}
+
+subsys_initcall(init_pmacpic_sysfs);
+
diff --git a/arch/powerpc/platforms/powermac/pmac_pic.h b/arch/powerpc/platforms/powermac/pmac_pic.h
new file mode 100644
index 00000000000..664103dfeef
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_pic.h
@@ -0,0 +1,11 @@
+#ifndef __PPC_PLATFORMS_PMAC_PIC_H
+#define __PPC_PLATFORMS_PMAC_PIC_H
+
+#include <linux/irq.h>
+
+extern struct hw_interrupt_type pmac_pic;
+
+void pmac_pic_init(void);
+int pmac_get_irq(struct pt_regs *regs);
+
+#endif /* __PPC_PLATFORMS_PMAC_PIC_H */
diff --git a/arch/powerpc/platforms/powermac/pmac_setup.c b/arch/powerpc/platforms/powermac/pmac_setup.c
new file mode 100644
index 00000000000..dbc921a084c
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_setup.c
@@ -0,0 +1,662 @@
+/*
+ *  arch/ppc/platforms/setup.c
+ *
+ *  PowerPC version
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  Adapted for Power Macintosh by Paul Mackerras
+ *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ *  Derived from "arch/alpha/kernel/setup.c"
+ *    Copyright (C) 1995 Linus Torvalds
+ *
+ *  Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/major.h>
+#include <linux/initrd.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+#include <linux/ide.h>
+#include <linux/pci.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/bitops.h>
+#include <linux/suspend.h>
+
+#include <asm/reg.h>
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/ohare.h>
+#include <asm/mediabay.h>
+#include <asm/machdep.h>
+#include <asm/dma.h>
+#include <asm/bootx.h>
+#include <asm/cputable.h>
+#include <asm/btext.h>
+#include <asm/pmac_feature.h>
+#include <asm/time.h>
+#include <asm/of_device.h>
+#include <asm/mmu_context.h>
+
+#include "pmac_pic.h"
+
+#undef SHOW_GATWICK_IRQS
+
+extern long pmac_time_init(void);
+extern unsigned long pmac_get_rtc_time(void);
+extern int pmac_set_rtc_time(unsigned long nowtime);
+extern void pmac_read_rtc_time(void);
+extern void pmac_calibrate_decr(void);
+extern void pmac_pcibios_fixup(void);
+extern void pmac_find_bridges(void);
+extern unsigned long pmac_ide_get_base(int index);
+extern void pmac_ide_init_hwif_ports(hw_regs_t *hw,
+	unsigned long data_port, unsigned long ctrl_port, int *irq);
+
+extern void pmac_nvram_update(void);
+extern unsigned char pmac_nvram_read_byte(int addr);
+extern void pmac_nvram_write_byte(int addr, unsigned char val);
+extern int pmac_pci_enable_device_hook(struct pci_dev *dev, int initial);
+extern void pmac_pcibios_after_init(void);
+extern int of_show_percpuinfo(struct seq_file *m, int i);
+
+unsigned char drive_info;
+
+int ppc_override_l2cr = 0;
+int ppc_override_l2cr_value;
+int has_l2cache = 0;
+
+static int current_root_goodness = -1;
+
+extern int pmac_newworld;
+
+#define DEFAULT_ROOT_DEVICE Root_SDA1	/* sda1 - slightly silly choice */
+
+extern void zs_kgdb_hook(int tty_num);
+static void ohare_init(void);
+#ifdef CONFIG_BOOTX_TEXT
+static void pmac_progress(char *s, unsigned short hex);
+#endif
+
+sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
+
+#ifdef CONFIG_SMP
+extern struct smp_ops_t psurge_smp_ops;
+extern struct smp_ops_t core99_smp_ops;
+#endif /* CONFIG_SMP */
+
+static int
+pmac_show_cpuinfo(struct seq_file *m)
+{
+	struct device_node *np;
+	char *pp;
+	int plen;
+	int mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO,
+		NULL, PMAC_MB_INFO_MODEL, 0);
+	unsigned int mbflags = (unsigned int)pmac_call_feature(PMAC_FTR_GET_MB_INFO,
+		NULL, PMAC_MB_INFO_FLAGS, 0);
+	char* mbname;
+
+	if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME, (int)&mbname) != 0)
+		mbname = "Unknown";
+
+	/* find motherboard type */
+	seq_printf(m, "machine\t\t: ");
+	np = find_devices("device-tree");
+	if (np != NULL) {
+		pp = (char *) get_property(np, "model", NULL);
+		if (pp != NULL)
+			seq_printf(m, "%s\n", pp);
+		else
+			seq_printf(m, "PowerMac\n");
+		pp = (char *) get_property(np, "compatible", &plen);
+		if (pp != NULL) {
+			seq_printf(m, "motherboard\t:");
+			while (plen > 0) {
+				int l = strlen(pp) + 1;
+				seq_printf(m, " %s", pp);
+				plen -= l;
+				pp += l;
+			}
+			seq_printf(m, "\n");
+		}
+	} else
+		seq_printf(m, "PowerMac\n");
+
+	/* print parsed model */
+	seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname);
+	seq_printf(m, "pmac flags\t: %08x\n", mbflags);
+
+	/* find l2 cache info */
+	np = find_devices("l2-cache");
+	if (np == 0)
+		np = find_type_devices("cache");
+	if (np != 0) {
+		unsigned int *ic = (unsigned int *)
+			get_property(np, "i-cache-size", NULL);
+		unsigned int *dc = (unsigned int *)
+			get_property(np, "d-cache-size", NULL);
+		seq_printf(m, "L2 cache\t:");
+		has_l2cache = 1;
+		if (get_property(np, "cache-unified", NULL) != 0 && dc) {
+			seq_printf(m, " %dK unified", *dc / 1024);
+		} else {
+			if (ic)
+				seq_printf(m, " %dK instruction", *ic / 1024);
+			if (dc)
+				seq_printf(m, "%s %dK data",
+					   (ic? " +": ""), *dc / 1024);
+		}
+		pp = get_property(np, "ram-type", NULL);
+		if (pp)
+			seq_printf(m, " %s", pp);
+		seq_printf(m, "\n");
+	}
+
+	/* find ram info */
+	np = find_devices("memory");
+	if (np != 0) {
+		int n;
+		struct reg_property *reg = (struct reg_property *)
+			get_property(np, "reg", &n);
+
+		if (reg != 0) {
+			unsigned long total = 0;
+
+			for (n /= sizeof(struct reg_property); n > 0; --n)
+				total += (reg++)->size;
+			seq_printf(m, "memory\t\t: %luMB\n", total >> 20);
+		}
+	}
+
+	/* Checks "l2cr-value" property in the registry */
+	np = find_devices("cpus");
+	if (np == 0)
+		np = find_type_devices("cpu");
+	if (np != 0) {
+		unsigned int *l2cr = (unsigned int *)
+			get_property(np, "l2cr-value", NULL);
+		if (l2cr != 0) {
+			seq_printf(m, "l2cr override\t: 0x%x\n", *l2cr);
+		}
+	}
+
+	/* Indicate newworld/oldworld */
+	seq_printf(m, "pmac-generation\t: %s\n",
+		   pmac_newworld ? "NewWorld" : "OldWorld");
+
+
+	return 0;
+}
+
+static int
+pmac_show_percpuinfo(struct seq_file *m, int i)
+{
+#ifdef CONFIG_CPU_FREQ_PMAC
+	extern unsigned int pmac_get_one_cpufreq(int i);
+	unsigned int freq = pmac_get_one_cpufreq(i);
+	if (freq != 0) {
+		seq_printf(m, "clock\t\t: %dMHz\n", freq/1000);
+		return 0;
+	}
+#endif /* CONFIG_CPU_FREQ_PMAC */
+	return of_show_percpuinfo(m, i);
+}
+
+static volatile u32 *sysctrl_regs;
+
+void __init
+pmac_setup_arch(void)
+{
+	struct device_node *cpu;
+	int *fp;
+	unsigned long pvr;
+
+	pvr = PVR_VER(mfspr(SPRN_PVR));
+
+	/* Set loops_per_jiffy to a half-way reasonable value,
+	   for use until calibrate_delay gets called. */
+	cpu = find_type_devices("cpu");
+	if (cpu != 0) {
+		fp = (int *) get_property(cpu, "clock-frequency", NULL);
+		if (fp != 0) {
+			if (pvr == 4 || pvr >= 8)
+				/* 604, G3, G4 etc. */
+				loops_per_jiffy = *fp / HZ;
+			else
+				/* 601, 603, etc. */
+				loops_per_jiffy = *fp / (2*HZ);
+		} else
+			loops_per_jiffy = 50000000 / HZ;
+	}
+
+	/* this area has the CPU identification register
+	   and some registers used by smp boards */
+	sysctrl_regs = (volatile u32 *) ioremap(0xf8000000, 0x1000);
+	ohare_init();
+
+	/* Lookup PCI hosts */
+	pmac_find_bridges();
+
+	/* Checks "l2cr-value" property in the registry */
+	if (cpu_has_feature(CPU_FTR_L2CR)) {
+		struct device_node *np = find_devices("cpus");
+		if (np == 0)
+			np = find_type_devices("cpu");
+		if (np != 0) {
+			unsigned int *l2cr = (unsigned int *)
+				get_property(np, "l2cr-value", NULL);
+			if (l2cr != 0) {
+				ppc_override_l2cr = 1;
+				ppc_override_l2cr_value = *l2cr;
+				_set_L2CR(0);
+				_set_L2CR(ppc_override_l2cr_value);
+			}
+		}
+	}
+
+	if (ppc_override_l2cr)
+		printk(KERN_INFO "L2CR overriden (0x%x), backside cache is %s\n",
+			ppc_override_l2cr_value, (ppc_override_l2cr_value & 0x80000000)
+				? "enabled" : "disabled");
+
+#ifdef CONFIG_KGDB
+	zs_kgdb_hook(0);
+#endif
+
+#ifdef CONFIG_ADB_CUDA
+	find_via_cuda();
+#else
+	if (find_devices("via-cuda")) {
+		printk("WARNING ! Your machine is Cuda based but your kernel\n");
+		printk("          wasn't compiled with CONFIG_ADB_CUDA option !\n");
+	}
+#endif
+#ifdef CONFIG_ADB_PMU
+	find_via_pmu();
+#else
+	if (find_devices("via-pmu")) {
+		printk("WARNING ! Your machine is PMU based but your kernel\n");
+		printk("          wasn't compiled with CONFIG_ADB_PMU option !\n");
+	}
+#endif
+#ifdef CONFIG_NVRAM
+	pmac_nvram_init();
+#endif
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (initrd_start)
+		ROOT_DEV = Root_RAM0;
+	else
+#endif
+		ROOT_DEV = DEFAULT_ROOT_DEVICE;
+
+#ifdef CONFIG_SMP
+	/* Check for Core99 */
+	if (find_devices("uni-n") || find_devices("u3"))
+		ppc_md.smp_ops = &core99_smp_ops;
+	else
+		ppc_md.smp_ops = &psurge_smp_ops;
+#endif /* CONFIG_SMP */
+
+	pci_create_OF_bus_map();
+}
+
+static void __init ohare_init(void)
+{
+	/*
+	 * Turn on the L2 cache.
+	 * We assume that we have a PSX memory controller iff
+	 * we have an ohare I/O controller.
+	 */
+	if (find_devices("ohare") != NULL) {
+		if (((sysctrl_regs[2] >> 24) & 0xf) >= 3) {
+			if (sysctrl_regs[4] & 0x10)
+				sysctrl_regs[4] |= 0x04000020;
+			else
+				sysctrl_regs[4] |= 0x04000000;
+			if(has_l2cache)
+				printk(KERN_INFO "Level 2 cache enabled\n");
+		}
+	}
+}
+
+extern char *bootpath;
+extern char *bootdevice;
+void *boot_host;
+int boot_target;
+int boot_part;
+extern dev_t boot_dev;
+
+#ifdef CONFIG_SCSI
+void __init
+note_scsi_host(struct device_node *node, void *host)
+{
+	int l;
+	char *p;
+
+	l = strlen(node->full_name);
+	if (bootpath != NULL && bootdevice != NULL
+	    && strncmp(node->full_name, bootdevice, l) == 0
+	    && (bootdevice[l] == '/' || bootdevice[l] == 0)) {
+		boot_host = host;
+		/*
+		 * There's a bug in OF 1.0.5.  (Why am I not surprised.)
+		 * If you pass a path like scsi/sd@1:0 to canon, it returns
+		 * something like /bandit@F2000000/gc@10/53c94@10000/sd@0,0
+		 * That is, the scsi target number doesn't get preserved.
+		 * So we pick the target number out of bootpath and use that.
+		 */
+		p = strstr(bootpath, "/sd@");
+		if (p != NULL) {
+			p += 4;
+			boot_target = simple_strtoul(p, NULL, 10);
+			p = strchr(p, ':');
+			if (p != NULL)
+				boot_part = simple_strtoul(p + 1, NULL, 10);
+		}
+	}
+}
+#endif
+
+#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC)
+static dev_t __init
+find_ide_boot(void)
+{
+	char *p;
+	int n;
+	dev_t __init pmac_find_ide_boot(char *bootdevice, int n);
+
+	if (bootdevice == NULL)
+		return 0;
+	p = strrchr(bootdevice, '/');
+	if (p == NULL)
+		return 0;
+	n = p - bootdevice;
+
+	return pmac_find_ide_boot(bootdevice, n);
+}
+#endif /* CONFIG_BLK_DEV_IDE && CONFIG_BLK_DEV_IDE_PMAC */
+
+static void __init
+find_boot_device(void)
+{
+#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC)
+	boot_dev = find_ide_boot();
+#endif
+}
+
+static int initializing = 1;
+/* TODO: Merge the suspend-to-ram with the common code !!!
+ * currently, this is a stub implementation for suspend-to-disk
+ * only
+ */
+
+#ifdef CONFIG_SOFTWARE_SUSPEND
+
+static int pmac_pm_prepare(suspend_state_t state)
+{
+	printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state);
+
+	return 0;
+}
+
+static int pmac_pm_enter(suspend_state_t state)
+{
+	printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state);
+
+	/* Giveup the lazy FPU & vec so we don't have to back them
+	 * up from the low level code
+	 */
+	enable_kernel_fp();
+
+#ifdef CONFIG_ALTIVEC
+	if (cur_cpu_spec[0]->cpu_features & CPU_FTR_ALTIVEC)
+		enable_kernel_altivec();
+#endif /* CONFIG_ALTIVEC */
+
+	return 0;
+}
+
+static int pmac_pm_finish(suspend_state_t state)
+{
+	printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state);
+
+	/* Restore userland MMU context */
+	set_context(current->active_mm->context, current->active_mm->pgd);
+
+	return 0;
+}
+
+static struct pm_ops pmac_pm_ops = {
+	.pm_disk_mode	= PM_DISK_SHUTDOWN,
+	.prepare	= pmac_pm_prepare,
+	.enter		= pmac_pm_enter,
+	.finish		= pmac_pm_finish,
+};
+
+#endif /* CONFIG_SOFTWARE_SUSPEND */
+
+static int pmac_late_init(void)
+{
+	initializing = 0;
+#ifdef CONFIG_SOFTWARE_SUSPEND
+	pm_set_ops(&pmac_pm_ops);
+#endif /* CONFIG_SOFTWARE_SUSPEND */
+	return 0;
+}
+
+late_initcall(pmac_late_init);
+
+/* can't be __init - can be called whenever a disk is first accessed */
+void
+note_bootable_part(dev_t dev, int part, int goodness)
+{
+	static int found_boot = 0;
+	char *p;
+
+	if (!initializing)
+		return;
+	if ((goodness <= current_root_goodness) &&
+	    ROOT_DEV != DEFAULT_ROOT_DEVICE)
+		return;
+	p = strstr(saved_command_line, "root=");
+	if (p != NULL && (p == saved_command_line || p[-1] == ' '))
+		return;
+
+	if (!found_boot) {
+		find_boot_device();
+		found_boot = 1;
+	}
+	if (!boot_dev || dev == boot_dev) {
+		ROOT_DEV = dev + part;
+		boot_dev = 0;
+		current_root_goodness = goodness;
+	}
+}
+
+static void
+pmac_restart(char *cmd)
+{
+#ifdef CONFIG_ADB_CUDA
+	struct adb_request req;
+#endif /* CONFIG_ADB_CUDA */
+
+	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+	case SYS_CTRLER_CUDA:
+		cuda_request(&req, NULL, 2, CUDA_PACKET,
+			     CUDA_RESET_SYSTEM);
+		for (;;)
+			cuda_poll();
+		break;
+#endif /* CONFIG_ADB_CUDA */
+#ifdef CONFIG_ADB_PMU
+	case SYS_CTRLER_PMU:
+		pmu_restart();
+		break;
+#endif /* CONFIG_ADB_PMU */
+	default: ;
+	}
+}
+
+static void
+pmac_power_off(void)
+{
+#ifdef CONFIG_ADB_CUDA
+	struct adb_request req;
+#endif /* CONFIG_ADB_CUDA */
+
+	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+	case SYS_CTRLER_CUDA:
+		cuda_request(&req, NULL, 2, CUDA_PACKET,
+			     CUDA_POWERDOWN);
+		for (;;)
+			cuda_poll();
+		break;
+#endif /* CONFIG_ADB_CUDA */
+#ifdef CONFIG_ADB_PMU
+	case SYS_CTRLER_PMU:
+		pmu_shutdown();
+		break;
+#endif /* CONFIG_ADB_PMU */
+	default: ;
+	}
+}
+
+static void
+pmac_halt(void)
+{
+	pmac_power_off();
+}
+
+void __init
+pmac_init(unsigned long r3, unsigned long r4, unsigned long r5,
+	  unsigned long r6, unsigned long r7)
+{
+	/* isa_io_base gets set in pmac_find_bridges */
+	isa_mem_base = PMAC_ISA_MEM_BASE;
+	pci_dram_offset = PMAC_PCI_DRAM_OFFSET;
+	ISA_DMA_THRESHOLD = ~0L;
+	DMA_MODE_READ = 1;
+	DMA_MODE_WRITE = 2;
+
+	ppc_md.setup_arch     = pmac_setup_arch;
+	ppc_md.show_cpuinfo   = pmac_show_cpuinfo;
+	ppc_md.show_percpuinfo = pmac_show_percpuinfo;
+	ppc_md.irq_canonicalize = NULL;
+	ppc_md.init_IRQ       = pmac_pic_init;
+	ppc_md.get_irq        = pmac_get_irq; /* Changed later on ... */
+
+	ppc_md.pcibios_fixup  = pmac_pcibios_fixup;
+	ppc_md.pcibios_enable_device_hook = pmac_pci_enable_device_hook;
+	ppc_md.pcibios_after_init = pmac_pcibios_after_init;
+	ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot;
+
+	ppc_md.restart        = pmac_restart;
+	ppc_md.power_off      = pmac_power_off;
+	ppc_md.halt           = pmac_halt;
+
+	ppc_md.time_init      = pmac_time_init;
+	ppc_md.set_rtc_time   = pmac_set_rtc_time;
+	ppc_md.get_rtc_time   = pmac_get_rtc_time;
+	ppc_md.calibrate_decr = pmac_calibrate_decr;
+
+	ppc_md.feature_call   = pmac_do_feature_call;
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
+#ifdef CONFIG_BLK_DEV_IDE_PMAC
+        ppc_ide_md.ide_init_hwif	= pmac_ide_init_hwif_ports;
+        ppc_ide_md.default_io_base	= pmac_ide_get_base;
+#endif /* CONFIG_BLK_DEV_IDE_PMAC */
+#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
+
+#ifdef CONFIG_BOOTX_TEXT
+	ppc_md.progress = pmac_progress;
+#endif /* CONFIG_BOOTX_TEXT */
+
+	if (ppc_md.progress) ppc_md.progress("pmac_init(): exit", 0);
+
+}
+
+#ifdef CONFIG_BOOTX_TEXT
+static void __init
+pmac_progress(char *s, unsigned short hex)
+{
+	if (boot_text_mapped) {
+		btext_drawstring(s);
+		btext_drawchar('\n');
+	}
+}
+#endif /* CONFIG_BOOTX_TEXT */
+
+static int __init
+pmac_declare_of_platform_devices(void)
+{
+	struct device_node *np;
+
+	np = find_devices("uni-n");
+	if (np) {
+		for (np = np->child; np != NULL; np = np->sibling)
+			if (strncmp(np->name, "i2c", 3) == 0) {
+				of_platform_device_create(np, "uni-n-i2c",
+							  NULL);
+				break;
+			}
+	}
+	np = find_devices("u3");
+	if (np) {
+		for (np = np->child; np != NULL; np = np->sibling)
+			if (strncmp(np->name, "i2c", 3) == 0) {
+				of_platform_device_create(np, "u3-i2c",
+							  NULL);
+				break;
+			}
+	}
+
+	np = find_devices("valkyrie");
+	if (np)
+		of_platform_device_create(np, "valkyrie", NULL);
+	np = find_devices("platinum");
+	if (np)
+		of_platform_device_create(np, "platinum", NULL);
+
+	return 0;
+}
+
+device_initcall(pmac_declare_of_platform_devices);
diff --git a/arch/powerpc/platforms/powermac/pmac_sleep.S b/arch/powerpc/platforms/powermac/pmac_sleep.S
new file mode 100644
index 00000000000..88419c77ac4
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_sleep.S
@@ -0,0 +1,396 @@
+/*
+ * This file contains sleep low-level functions for PowerBook G3.
+ *    Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *    and Paul Mackerras (paulus@samba.org).
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+#define MAGIC	0x4c617273	/* 'Lars' */
+
+/*
+ * Structure for storing CPU registers on the stack.
+ */
+#define SL_SP		0
+#define SL_PC		4
+#define SL_MSR		8
+#define SL_SDR1		0xc
+#define SL_SPRG0	0x10	/* 4 sprg's */
+#define SL_DBAT0	0x20
+#define SL_IBAT0	0x28
+#define SL_DBAT1	0x30
+#define SL_IBAT1	0x38
+#define SL_DBAT2	0x40
+#define SL_IBAT2	0x48
+#define SL_DBAT3	0x50
+#define SL_IBAT3	0x58
+#define SL_TB		0x60
+#define SL_R2		0x68
+#define SL_CR		0x6c
+#define SL_R12		0x70	/* r12 to r31 */
+#define SL_SIZE		(SL_R12 + 80)
+
+	.section .text
+	.align	5
+
+#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC)
+
+/* This gets called by via-pmu.c late during the sleep process.
+ * The PMU was already send the sleep command and will shut us down
+ * soon. We need to save all that is needed and setup the wakeup
+ * vector that will be called by the ROM on wakeup
+ */
+_GLOBAL(low_sleep_handler)
+#ifndef CONFIG_6xx
+	blr
+#else
+	mflr	r0
+	stw	r0,4(r1)
+	stwu	r1,-SL_SIZE(r1)
+	mfcr	r0
+	stw	r0,SL_CR(r1)
+	stw	r2,SL_R2(r1)
+	stmw	r12,SL_R12(r1)
+
+	/* Save MSR & SDR1 */
+	mfmsr	r4
+	stw	r4,SL_MSR(r1)
+	mfsdr1	r4
+	stw	r4,SL_SDR1(r1)
+
+	/* Get a stable timebase and save it */
+1:	mftbu	r4
+	stw	r4,SL_TB(r1)
+	mftb	r5
+	stw	r5,SL_TB+4(r1)
+	mftbu	r3
+	cmpw	r3,r4
+	bne	1b
+
+	/* Save SPRGs */
+	mfsprg	r4,0
+	stw	r4,SL_SPRG0(r1)
+	mfsprg	r4,1
+	stw	r4,SL_SPRG0+4(r1)
+	mfsprg	r4,2
+	stw	r4,SL_SPRG0+8(r1)
+	mfsprg	r4,3
+	stw	r4,SL_SPRG0+12(r1)
+
+	/* Save BATs */
+	mfdbatu	r4,0
+	stw	r4,SL_DBAT0(r1)
+	mfdbatl	r4,0
+	stw	r4,SL_DBAT0+4(r1)
+	mfdbatu	r4,1
+	stw	r4,SL_DBAT1(r1)
+	mfdbatl	r4,1
+	stw	r4,SL_DBAT1+4(r1)
+	mfdbatu	r4,2
+	stw	r4,SL_DBAT2(r1)
+	mfdbatl	r4,2
+	stw	r4,SL_DBAT2+4(r1)
+	mfdbatu	r4,3
+	stw	r4,SL_DBAT3(r1)
+	mfdbatl	r4,3
+	stw	r4,SL_DBAT3+4(r1)
+	mfibatu	r4,0
+	stw	r4,SL_IBAT0(r1)
+	mfibatl	r4,0
+	stw	r4,SL_IBAT0+4(r1)
+	mfibatu	r4,1
+	stw	r4,SL_IBAT1(r1)
+	mfibatl	r4,1
+	stw	r4,SL_IBAT1+4(r1)
+	mfibatu	r4,2
+	stw	r4,SL_IBAT2(r1)
+	mfibatl	r4,2
+	stw	r4,SL_IBAT2+4(r1)
+	mfibatu	r4,3
+	stw	r4,SL_IBAT3(r1)
+	mfibatl	r4,3
+	stw	r4,SL_IBAT3+4(r1)
+
+	/* Backup various CPU config stuffs */
+	bl	__save_cpu_setup
+
+	/* The ROM can wake us up via 2 different vectors:
+	 *  - On wallstreet & lombard, we must write a magic
+	 *    value 'Lars' at address 4 and a pointer to a
+	 *    memory location containing the PC to resume from
+	 *    at address 0.
+	 *  - On Core99, we must store the wakeup vector at
+	 *    address 0x80 and eventually it's parameters
+	 *    at address 0x84. I've have some trouble with those
+	 *    parameters however and I no longer use them.
+	 */
+	lis	r5,grackle_wake_up@ha
+	addi	r5,r5,grackle_wake_up@l
+	tophys(r5,r5)
+	stw	r5,SL_PC(r1)
+	lis	r4,KERNELBASE@h
+	tophys(r5,r1)
+	addi	r5,r5,SL_PC
+	lis	r6,MAGIC@ha
+	addi	r6,r6,MAGIC@l
+	stw	r5,0(r4)
+	stw	r6,4(r4)
+	/* Setup stuffs at 0x80-0x84 for Core99 */
+	lis	r3,core99_wake_up@ha
+	addi	r3,r3,core99_wake_up@l
+	tophys(r3,r3)
+	stw	r3,0x80(r4)
+	stw	r5,0x84(r4)
+	/* Store a pointer to our backup storage into
+	 * a kernel global
+	 */
+	lis r3,sleep_storage@ha
+	addi r3,r3,sleep_storage@l
+	stw r5,0(r3)
+
+	.globl	low_cpu_die
+low_cpu_die:
+	/* Flush & disable all caches */
+	bl	flush_disable_caches
+
+	/* Turn off data relocation. */
+	mfmsr	r3		/* Save MSR in r7 */
+	rlwinm	r3,r3,0,28,26	/* Turn off DR bit */
+	sync
+	mtmsr	r3
+	isync
+
+BEGIN_FTR_SECTION
+	/* Flush any pending L2 data prefetches to work around HW bug */
+	sync
+	lis	r3,0xfff0
+	lwz	r0,0(r3)	/* perform cache-inhibited load to ROM */
+	sync			/* (caches are disabled at this point) */
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+/*
+ * Set the HID0 and MSR for sleep.
+ */
+	mfspr	r2,SPRN_HID0
+	rlwinm	r2,r2,0,10,7	/* clear doze, nap */
+	oris	r2,r2,HID0_SLEEP@h
+	sync
+	isync
+	mtspr	SPRN_HID0,r2
+	sync
+
+/* This loop puts us back to sleep in case we have a spurrious
+ * wakeup so that the host bridge properly stays asleep. The
+ * CPU will be turned off, either after a known time (about 1
+ * second) on wallstreet & lombard, or as soon as the CPU enters
+ * SLEEP mode on core99
+ */
+	mfmsr	r2
+	oris	r2,r2,MSR_POW@h
+1:	sync
+	mtmsr	r2
+	isync
+	b	1b
+
+/*
+ * Here is the resume code.
+ */
+
+
+/*
+ * Core99 machines resume here
+ * r4 has the physical address of SL_PC(sp) (unused)
+ */
+_GLOBAL(core99_wake_up)
+	/* Make sure HID0 no longer contains any sleep bit and that data cache
+	 * is disabled
+	 */
+	mfspr	r3,SPRN_HID0
+	rlwinm	r3,r3,0,11,7		/* clear SLEEP, NAP, DOZE bits */
+	rlwinm	3,r3,0,18,15		/* clear DCE, ICE */
+	mtspr	SPRN_HID0,r3
+	sync
+	isync
+
+	/* sanitize MSR */
+	mfmsr	r3
+	ori	r3,r3,MSR_EE|MSR_IP
+	xori	r3,r3,MSR_EE|MSR_IP
+	sync
+	isync
+	mtmsr	r3
+	sync
+	isync
+
+	/* Recover sleep storage */
+	lis	r3,sleep_storage@ha
+	addi	r3,r3,sleep_storage@l
+	tophys(r3,r3)
+	lwz	r1,0(r3)
+
+	/* Pass thru to older resume code ... */
+/*
+ * Here is the resume code for older machines.
+ * r1 has the physical address of SL_PC(sp).
+ */
+
+grackle_wake_up:
+
+	/* Restore the kernel's segment registers before
+	 * we do any r1 memory access as we are not sure they
+	 * are in a sane state above the first 256Mb region
+	 */
+	li	r0,16		/* load up segment register values */
+	mtctr	r0		/* for context 0 */
+	lis	r3,0x2000	/* Ku = 1, VSID = 0 */
+	li	r4,0
+3:	mtsrin	r3,r4
+	addi	r3,r3,0x111	/* increment VSID */
+	addis	r4,r4,0x1000	/* address of next segment */
+	bdnz	3b
+	sync
+	isync
+
+	subi	r1,r1,SL_PC
+
+	/* Restore various CPU config stuffs */
+	bl	__restore_cpu_setup
+
+	/* Make sure all FPRs have been initialized */
+	bl	reloc_offset
+	bl	__init_fpu_registers
+
+	/* Invalidate & enable L1 cache, we don't care about
+	 * whatever the ROM may have tried to write to memory
+	 */
+	bl	__inval_enable_L1
+
+	/* Restore the BATs, and SDR1.  Then we can turn on the MMU. */
+	lwz	r4,SL_SDR1(r1)
+	mtsdr1	r4
+	lwz	r4,SL_SPRG0(r1)
+	mtsprg	0,r4
+	lwz	r4,SL_SPRG0+4(r1)
+	mtsprg	1,r4
+	lwz	r4,SL_SPRG0+8(r1)
+	mtsprg	2,r4
+	lwz	r4,SL_SPRG0+12(r1)
+	mtsprg	3,r4
+
+	lwz	r4,SL_DBAT0(r1)
+	mtdbatu	0,r4
+	lwz	r4,SL_DBAT0+4(r1)
+	mtdbatl	0,r4
+	lwz	r4,SL_DBAT1(r1)
+	mtdbatu	1,r4
+	lwz	r4,SL_DBAT1+4(r1)
+	mtdbatl	1,r4
+	lwz	r4,SL_DBAT2(r1)
+	mtdbatu	2,r4
+	lwz	r4,SL_DBAT2+4(r1)
+	mtdbatl	2,r4
+	lwz	r4,SL_DBAT3(r1)
+	mtdbatu	3,r4
+	lwz	r4,SL_DBAT3+4(r1)
+	mtdbatl	3,r4
+	lwz	r4,SL_IBAT0(r1)
+	mtibatu	0,r4
+	lwz	r4,SL_IBAT0+4(r1)
+	mtibatl	0,r4
+	lwz	r4,SL_IBAT1(r1)
+	mtibatu	1,r4
+	lwz	r4,SL_IBAT1+4(r1)
+	mtibatl	1,r4
+	lwz	r4,SL_IBAT2(r1)
+	mtibatu	2,r4
+	lwz	r4,SL_IBAT2+4(r1)
+	mtibatl	2,r4
+	lwz	r4,SL_IBAT3(r1)
+	mtibatu	3,r4
+	lwz	r4,SL_IBAT3+4(r1)
+	mtibatl	3,r4
+
+BEGIN_FTR_SECTION
+	li	r4,0
+	mtspr	SPRN_DBAT4U,r4
+	mtspr	SPRN_DBAT4L,r4
+	mtspr	SPRN_DBAT5U,r4
+	mtspr	SPRN_DBAT5L,r4
+	mtspr	SPRN_DBAT6U,r4
+	mtspr	SPRN_DBAT6L,r4
+	mtspr	SPRN_DBAT7U,r4
+	mtspr	SPRN_DBAT7L,r4
+	mtspr	SPRN_IBAT4U,r4
+	mtspr	SPRN_IBAT4L,r4
+	mtspr	SPRN_IBAT5U,r4
+	mtspr	SPRN_IBAT5L,r4
+	mtspr	SPRN_IBAT6U,r4
+	mtspr	SPRN_IBAT6L,r4
+	mtspr	SPRN_IBAT7U,r4
+	mtspr	SPRN_IBAT7L,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS)
+
+	/* Flush all TLBs */
+	lis	r4,0x1000
+1:	addic.	r4,r4,-0x1000
+	tlbie	r4
+	blt	1b
+	sync
+
+	/* restore the MSR and turn on the MMU */
+	lwz	r3,SL_MSR(r1)
+	bl	turn_on_mmu
+
+	/* get back the stack pointer */
+	tovirt(r1,r1)
+
+	/* Restore TB */
+	li	r3,0
+	mttbl	r3
+	lwz	r3,SL_TB(r1)
+	lwz	r4,SL_TB+4(r1)
+	mttbu	r3
+	mttbl	r4
+
+	/* Restore the callee-saved registers and return */
+	lwz	r0,SL_CR(r1)
+	mtcr	r0
+	lwz	r2,SL_R2(r1)
+	lmw	r12,SL_R12(r1)
+	addi	r1,r1,SL_SIZE
+	lwz	r0,4(r1)
+	mtlr	r0
+	blr
+
+turn_on_mmu:
+	mflr	r4
+	tovirt(r4,r4)
+	mtsrr0	r4
+	mtsrr1	r3
+	sync
+	isync
+	rfi
+
+#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
+
+	.section .data
+	.balign	L1_CACHE_LINE_SIZE
+sleep_storage:
+	.long 0
+	.balign	L1_CACHE_LINE_SIZE, 0
+
+#endif /* CONFIG_6xx */
+	.section .text
diff --git a/arch/powerpc/platforms/powermac/pmac_smp.c b/arch/powerpc/platforms/powermac/pmac_smp.c
new file mode 100644
index 00000000000..995e9095d86
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_smp.c
@@ -0,0 +1,716 @@
+/*
+ * SMP support for power macintosh.
+ *
+ * We support both the old "powersurge" SMP architecture
+ * and the current Core99 (G4 PowerMac) machines.
+ *
+ * Note that we don't support the very first rev. of
+ * Apple/DayStar 2 CPUs board, the one with the funky
+ * watchdog. Hopefully, none of these should be there except
+ * maybe internally to Apple. I should probably still add some
+ * code to detect this card though and disable SMP. --BenH.
+ *
+ * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net)
+ * and Ben Herrenschmidt <benh@kernel.crashing.org>.
+ *
+ * Support for DayStar quad CPU cards
+ * Copyright (C) XLR8, Inc. 1994-2000
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/residual.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/time.h>
+#include <asm/open_pic.h>
+#include <asm/cacheflush.h>
+#include <asm/keylargo.h>
+
+/*
+ * Powersurge (old powermac SMP) support.
+ */
+
+extern void __secondary_start_pmac_0(void);
+
+/* Addresses for powersurge registers */
+#define HAMMERHEAD_BASE		0xf8000000
+#define HHEAD_CONFIG		0x90
+#define HHEAD_SEC_INTR		0xc0
+
+/* register for interrupting the primary processor on the powersurge */
+/* N.B. this is actually the ethernet ROM! */
+#define PSURGE_PRI_INTR		0xf3019000
+
+/* register for storing the start address for the secondary processor */
+/* N.B. this is the PCI config space address register for the 1st bridge */
+#define PSURGE_START		0xf2800000
+
+/* Daystar/XLR8 4-CPU card */
+#define PSURGE_QUAD_REG_ADDR	0xf8800000
+
+#define PSURGE_QUAD_IRQ_SET	0
+#define PSURGE_QUAD_IRQ_CLR	1
+#define PSURGE_QUAD_IRQ_PRIMARY	2
+#define PSURGE_QUAD_CKSTOP_CTL	3
+#define PSURGE_QUAD_PRIMARY_ARB	4
+#define PSURGE_QUAD_BOARD_ID	6
+#define PSURGE_QUAD_WHICH_CPU	7
+#define PSURGE_QUAD_CKSTOP_RDBK	8
+#define PSURGE_QUAD_RESET_CTL	11
+
+#define PSURGE_QUAD_OUT(r, v)	(out_8(quad_base + ((r) << 4) + 4, (v)))
+#define PSURGE_QUAD_IN(r)	(in_8(quad_base + ((r) << 4) + 4) & 0x0f)
+#define PSURGE_QUAD_BIS(r, v)	(PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) | (v)))
+#define PSURGE_QUAD_BIC(r, v)	(PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) & ~(v)))
+
+/* virtual addresses for the above */
+static volatile u8 __iomem *hhead_base;
+static volatile u8 __iomem *quad_base;
+static volatile u32 __iomem *psurge_pri_intr;
+static volatile u8 __iomem *psurge_sec_intr;
+static volatile u32 __iomem *psurge_start;
+
+/* values for psurge_type */
+#define PSURGE_NONE		-1
+#define PSURGE_DUAL		0
+#define PSURGE_QUAD_OKEE	1
+#define PSURGE_QUAD_COTTON	2
+#define PSURGE_QUAD_ICEGRASS	3
+
+/* what sort of powersurge board we have */
+static int psurge_type = PSURGE_NONE;
+
+/* L2 and L3 cache settings to pass from CPU0 to CPU1 */
+volatile static long int core99_l2_cache;
+volatile static long int core99_l3_cache;
+
+/* Timebase freeze GPIO */
+static unsigned int core99_tb_gpio;
+
+/* Sync flag for HW tb sync */
+static volatile int sec_tb_reset = 0;
+static unsigned int pri_tb_hi, pri_tb_lo;
+static unsigned int pri_tb_stamp;
+
+static void __devinit core99_init_caches(int cpu)
+{
+	if (!cpu_has_feature(CPU_FTR_L2CR))
+		return;
+
+	if (cpu == 0) {
+		core99_l2_cache = _get_L2CR();
+		printk("CPU0: L2CR is %lx\n", core99_l2_cache);
+	} else {
+		printk("CPU%d: L2CR was %lx\n", cpu, _get_L2CR());
+		_set_L2CR(0);
+		_set_L2CR(core99_l2_cache);
+		printk("CPU%d: L2CR set to %lx\n", cpu, core99_l2_cache);
+	}
+
+	if (!cpu_has_feature(CPU_FTR_L3CR))
+		return;
+
+	if (cpu == 0){
+		core99_l3_cache = _get_L3CR();
+		printk("CPU0: L3CR is %lx\n", core99_l3_cache);
+	} else {
+		printk("CPU%d: L3CR was %lx\n", cpu, _get_L3CR());
+		_set_L3CR(0);
+		_set_L3CR(core99_l3_cache);
+		printk("CPU%d: L3CR set to %lx\n", cpu, core99_l3_cache);
+	}
+}
+
+/*
+ * Set and clear IPIs for powersurge.
+ */
+static inline void psurge_set_ipi(int cpu)
+{
+	if (psurge_type == PSURGE_NONE)
+		return;
+	if (cpu == 0)
+		in_be32(psurge_pri_intr);
+	else if (psurge_type == PSURGE_DUAL)
+		out_8(psurge_sec_intr, 0);
+	else
+		PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_SET, 1 << cpu);
+}
+
+static inline void psurge_clr_ipi(int cpu)
+{
+	if (cpu > 0) {
+		switch(psurge_type) {
+		case PSURGE_DUAL:
+			out_8(psurge_sec_intr, ~0);
+		case PSURGE_NONE:
+			break;
+		default:
+			PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, 1 << cpu);
+		}
+	}
+}
+
+/*
+ * On powersurge (old SMP powermac architecture) we don't have
+ * separate IPIs for separate messages like openpic does.  Instead
+ * we have a bitmap for each processor, where a 1 bit means that
+ * the corresponding message is pending for that processor.
+ * Ideally each cpu's entry would be in a different cache line.
+ *  -- paulus.
+ */
+static unsigned long psurge_smp_message[NR_CPUS];
+
+void psurge_smp_message_recv(struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+	int msg;
+
+	/* clear interrupt */
+	psurge_clr_ipi(cpu);
+
+	if (num_online_cpus() < 2)
+		return;
+
+	/* make sure there is a message there */
+	for (msg = 0; msg < 4; msg++)
+		if (test_and_clear_bit(msg, &psurge_smp_message[cpu]))
+			smp_message_recv(msg, regs);
+}
+
+irqreturn_t psurge_primary_intr(int irq, void *d, struct pt_regs *regs)
+{
+	psurge_smp_message_recv(regs);
+	return IRQ_HANDLED;
+}
+
+static void smp_psurge_message_pass(int target, int msg, unsigned long data,
+					   int wait)
+{
+	int i;
+
+	if (num_online_cpus() < 2)
+		return;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i))
+			continue;
+		if (target == MSG_ALL
+		    || (target == MSG_ALL_BUT_SELF && i != smp_processor_id())
+		    || target == i) {
+			set_bit(msg, &psurge_smp_message[i]);
+			psurge_set_ipi(i);
+		}
+	}
+}
+
+/*
+ * Determine a quad card presence. We read the board ID register, we
+ * force the data bus to change to something else, and we read it again.
+ * It it's stable, then the register probably exist (ugh !)
+ */
+static int __init psurge_quad_probe(void)
+{
+	int type;
+	unsigned int i;
+
+	type = PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID);
+	if (type < PSURGE_QUAD_OKEE || type > PSURGE_QUAD_ICEGRASS
+	    || type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID))
+		return PSURGE_DUAL;
+
+	/* looks OK, try a slightly more rigorous test */
+	/* bogus is not necessarily cacheline-aligned,
+	   though I don't suppose that really matters.  -- paulus */
+	for (i = 0; i < 100; i++) {
+		volatile u32 bogus[8];
+		bogus[(0+i)%8] = 0x00000000;
+		bogus[(1+i)%8] = 0x55555555;
+		bogus[(2+i)%8] = 0xFFFFFFFF;
+		bogus[(3+i)%8] = 0xAAAAAAAA;
+		bogus[(4+i)%8] = 0x33333333;
+		bogus[(5+i)%8] = 0xCCCCCCCC;
+		bogus[(6+i)%8] = 0xCCCCCCCC;
+		bogus[(7+i)%8] = 0x33333333;
+		wmb();
+		asm volatile("dcbf 0,%0" : : "r" (bogus) : "memory");
+		mb();
+		if (type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID))
+			return PSURGE_DUAL;
+	}
+	return type;
+}
+
+static void __init psurge_quad_init(void)
+{
+	int procbits;
+
+	if (ppc_md.progress) ppc_md.progress("psurge_quad_init", 0x351);
+	procbits = ~PSURGE_QUAD_IN(PSURGE_QUAD_WHICH_CPU);
+	if (psurge_type == PSURGE_QUAD_ICEGRASS)
+		PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits);
+	else
+		PSURGE_QUAD_BIC(PSURGE_QUAD_CKSTOP_CTL, procbits);
+	mdelay(33);
+	out_8(psurge_sec_intr, ~0);
+	PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, procbits);
+	PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits);
+	if (psurge_type != PSURGE_QUAD_ICEGRASS)
+		PSURGE_QUAD_BIS(PSURGE_QUAD_CKSTOP_CTL, procbits);
+	PSURGE_QUAD_BIC(PSURGE_QUAD_PRIMARY_ARB, procbits);
+	mdelay(33);
+	PSURGE_QUAD_BIC(PSURGE_QUAD_RESET_CTL, procbits);
+	mdelay(33);
+	PSURGE_QUAD_BIS(PSURGE_QUAD_PRIMARY_ARB, procbits);
+	mdelay(33);
+}
+
+static int __init smp_psurge_probe(void)
+{
+	int i, ncpus;
+
+	/* We don't do SMP on the PPC601 -- paulus */
+	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+		return 1;
+
+	/*
+	 * The powersurge cpu board can be used in the generation
+	 * of powermacs that have a socket for an upgradeable cpu card,
+	 * including the 7500, 8500, 9500, 9600.
+	 * The device tree doesn't tell you if you have 2 cpus because
+	 * OF doesn't know anything about the 2nd processor.
+	 * Instead we look for magic bits in magic registers,
+	 * in the hammerhead memory controller in the case of the
+	 * dual-cpu powersurge board.  -- paulus.
+	 */
+	if (find_devices("hammerhead") == NULL)
+		return 1;
+
+	hhead_base = ioremap(HAMMERHEAD_BASE, 0x800);
+	quad_base = ioremap(PSURGE_QUAD_REG_ADDR, 1024);
+	psurge_sec_intr = hhead_base + HHEAD_SEC_INTR;
+
+	psurge_type = psurge_quad_probe();
+	if (psurge_type != PSURGE_DUAL) {
+		psurge_quad_init();
+		/* All released cards using this HW design have 4 CPUs */
+		ncpus = 4;
+	} else {
+		iounmap(quad_base);
+		if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) {
+			/* not a dual-cpu card */
+			iounmap(hhead_base);
+			psurge_type = PSURGE_NONE;
+			return 1;
+		}
+		ncpus = 2;
+	}
+
+	psurge_start = ioremap(PSURGE_START, 4);
+	psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
+
+	/* this is not actually strictly necessary -- paulus. */
+	for (i = 1; i < ncpus; ++i)
+		smp_hw_index[i] = i;
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
+
+	return ncpus;
+}
+
+static void __init smp_psurge_kick_cpu(int nr)
+{
+	unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
+	unsigned long a;
+
+	/* may need to flush here if secondary bats aren't setup */
+	for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32)
+		asm volatile("dcbf 0,%0" : : "r" (a) : "memory");
+	asm volatile("sync");
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353);
+
+	out_be32(psurge_start, start);
+	mb();
+
+	psurge_set_ipi(nr);
+	udelay(10);
+	psurge_clr_ipi(nr);
+
+	if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
+}
+
+/*
+ * With the dual-cpu powersurge board, the decrementers and timebases
+ * of both cpus are frozen after the secondary cpu is started up,
+ * until we give the secondary cpu another interrupt.  This routine
+ * uses this to get the timebases synchronized.
+ *  -- paulus.
+ */
+static void __init psurge_dual_sync_tb(int cpu_nr)
+{
+	int t;
+
+	set_dec(tb_ticks_per_jiffy);
+	set_tb(0, 0);
+	last_jiffy_stamp(cpu_nr) = 0;
+
+	if (cpu_nr > 0) {
+		mb();
+		sec_tb_reset = 1;
+		return;
+	}
+
+	/* wait for the secondary to have reset its TB before proceeding */
+	for (t = 10000000; t > 0 && !sec_tb_reset; --t)
+		;
+
+	/* now interrupt the secondary, starting both TBs */
+	psurge_set_ipi(1);
+
+	smp_tb_synchronized = 1;
+}
+
+static struct irqaction psurge_irqaction = {
+	.handler = psurge_primary_intr,
+	.flags = SA_INTERRUPT,
+	.mask = CPU_MASK_NONE,
+	.name = "primary IPI",
+};
+
+static void __init smp_psurge_setup_cpu(int cpu_nr)
+{
+
+	if (cpu_nr == 0) {
+		/* If we failed to start the second CPU, we should still
+		 * send it an IPI to start the timebase & DEC or we might
+		 * have them stuck.
+		 */
+		if (num_online_cpus() < 2) {
+			if (psurge_type == PSURGE_DUAL)
+				psurge_set_ipi(1);
+			return;
+		}
+		/* reset the entry point so if we get another intr we won't
+		 * try to startup again */
+		out_be32(psurge_start, 0x100);
+		if (setup_irq(30, &psurge_irqaction))
+			printk(KERN_ERR "Couldn't get primary IPI interrupt");
+	}
+
+	if (psurge_type == PSURGE_DUAL)
+		psurge_dual_sync_tb(cpu_nr);
+}
+
+void __init smp_psurge_take_timebase(void)
+{
+	/* Dummy implementation */
+}
+
+void __init smp_psurge_give_timebase(void)
+{
+	/* Dummy implementation */
+}
+
+static int __init smp_core99_probe(void)
+{
+#ifdef CONFIG_6xx
+	extern int powersave_nap;
+#endif
+	struct device_node *cpus, *firstcpu;
+	int i, ncpus = 0, boot_cpu = -1;
+	u32 *tbprop = NULL;
+
+	if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345);
+	cpus = firstcpu = find_type_devices("cpu");
+	while(cpus != NULL) {
+		u32 *regprop = (u32 *)get_property(cpus, "reg", NULL);
+		char *stateprop = (char *)get_property(cpus, "state", NULL);
+		if (regprop != NULL && stateprop != NULL &&
+		    !strncmp(stateprop, "running", 7))
+			boot_cpu = *regprop;
+		++ncpus;
+		cpus = cpus->next;
+	}
+	if (boot_cpu == -1)
+		printk(KERN_WARNING "Couldn't detect boot CPU !\n");
+	if (boot_cpu != 0)
+		printk(KERN_WARNING "Boot CPU is %d, unsupported setup !\n", boot_cpu);
+
+	if (machine_is_compatible("MacRISC4")) {
+		extern struct smp_ops_t core99_smp_ops;
+
+		core99_smp_ops.take_timebase = smp_generic_take_timebase;
+		core99_smp_ops.give_timebase = smp_generic_give_timebase;
+	} else {
+		if (firstcpu != NULL)
+			tbprop = (u32 *)get_property(firstcpu, "timebase-enable", NULL);
+		if (tbprop)
+			core99_tb_gpio = *tbprop;
+		else
+			core99_tb_gpio = KL_GPIO_TB_ENABLE;
+	}
+
+	if (ncpus > 1) {
+		mpic_request_ipis();
+		for (i = 1; i < ncpus; ++i)
+			smp_hw_index[i] = i;
+#ifdef CONFIG_6xx
+		powersave_nap = 0;
+#endif
+		core99_init_caches(0);
+	}
+
+	return ncpus;
+}
+
+static void __devinit smp_core99_kick_cpu(int nr)
+{
+	unsigned long save_vector, new_vector;
+	unsigned long flags;
+
+	volatile unsigned long *vector
+		 = ((volatile unsigned long *)(KERNELBASE+0x100));
+	if (nr < 0 || nr > 3)
+		return;
+	if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346);
+
+	local_irq_save(flags);
+	local_irq_disable();
+
+	/* Save reset vector */
+	save_vector = *vector;
+
+	/* Setup fake reset vector that does	
+	 *   b __secondary_start_pmac_0 + nr*8 - KERNELBASE
+	 */
+	new_vector = (unsigned long) __secondary_start_pmac_0 + nr * 8;
+	*vector = 0x48000002 + new_vector - KERNELBASE;
+
+	/* flush data cache and inval instruction cache */
+	flush_icache_range((unsigned long) vector, (unsigned long) vector + 4);
+
+	/* Put some life in our friend */
+	pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
+
+	/* FIXME: We wait a bit for the CPU to take the exception, I should
+	 * instead wait for the entry code to set something for me. Well,
+	 * ideally, all that crap will be done in prom.c and the CPU left
+	 * in a RAM-based wait loop like CHRP.
+	 */
+	mdelay(1);
+
+	/* Restore our exception vector */
+	*vector = save_vector;
+	flush_icache_range((unsigned long) vector, (unsigned long) vector + 4);
+
+	local_irq_restore(flags);
+	if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
+}
+
+static void __devinit smp_core99_setup_cpu(int cpu_nr)
+{
+	/* Setup L2/L3 */
+	if (cpu_nr != 0)
+		core99_init_caches(cpu_nr);
+
+	/* Setup openpic */
+	mpic_setup_this_cpu();
+
+	if (cpu_nr == 0) {
+#ifdef CONFIG_POWER4
+		extern void g5_phy_disable_cpu1(void);
+
+		/* If we didn't start the second CPU, we must take
+		 * it off the bus
+		 */
+		if (machine_is_compatible("MacRISC4") &&
+		    num_online_cpus() < 2)		
+			g5_phy_disable_cpu1();
+#endif /* CONFIG_POWER4 */
+		if (ppc_md.progress) ppc_md.progress("core99_setup_cpu 0 done", 0x349);
+	}
+}
+
+/* not __init, called in sleep/wakeup code */
+void smp_core99_take_timebase(void)
+{
+	unsigned long flags;
+
+	/* tell the primary we're here */
+	sec_tb_reset = 1;
+	mb();
+
+	/* wait for the primary to set pri_tb_hi/lo */
+	while (sec_tb_reset < 2)
+		mb();
+
+	/* set our stuff the same as the primary */
+	local_irq_save(flags);
+	set_dec(1);
+	set_tb(pri_tb_hi, pri_tb_lo);
+	last_jiffy_stamp(smp_processor_id()) = pri_tb_stamp;
+	mb();
+
+	/* tell the primary we're done */
+       	sec_tb_reset = 0;
+	mb();
+	local_irq_restore(flags);
+}
+
+/* not __init, called in sleep/wakeup code */
+void smp_core99_give_timebase(void)
+{
+	unsigned long flags;
+	unsigned int t;
+
+	/* wait for the secondary to be in take_timebase */
+	for (t = 100000; t > 0 && !sec_tb_reset; --t)
+		udelay(10);
+	if (!sec_tb_reset) {
+		printk(KERN_WARNING "Timeout waiting sync on second CPU\n");
+		return;
+	}
+
+	/* freeze the timebase and read it */
+	/* disable interrupts so the timebase is disabled for the
+	   shortest possible time */
+	local_irq_save(flags);
+	pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4);
+	pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0);
+	mb();
+	pri_tb_hi = get_tbu();
+	pri_tb_lo = get_tbl();
+	pri_tb_stamp = last_jiffy_stamp(smp_processor_id());
+	mb();
+
+	/* tell the secondary we're ready */
+	sec_tb_reset = 2;
+	mb();
+
+	/* wait for the secondary to have taken it */
+	for (t = 100000; t > 0 && sec_tb_reset; --t)
+		udelay(10);
+	if (sec_tb_reset)
+		printk(KERN_WARNING "Timeout waiting sync(2) on second CPU\n");
+	else
+		smp_tb_synchronized = 1;
+
+	/* Now, restart the timebase by leaving the GPIO to an open collector */
+       	pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0);
+        pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0);
+	local_irq_restore(flags);
+}
+
+void smp_core99_message_pass(int target, int msg, unsigned long data, int wait)
+{
+	cpumask_t mask = CPU_MASK_ALL;
+	/* make sure we're sending something that translates to an IPI */
+	if (msg > 0x3) {
+		printk("SMP %d: smp_message_pass: unknown msg %d\n",
+		       smp_processor_id(), msg);
+		return;
+	}
+	switch (target) {
+	case MSG_ALL:
+		mpic_send_ipi(msg, mask);
+		break;
+	case MSG_ALL_BUT_SELF:
+		cpu_clear(smp_processor_id(), mask);
+		mpic_send_ipi(msg, mask);
+		break;
+	default:
+		mpic_send_ipi(msg, cpumask_of_cpu(target));
+		break;
+	}
+}
+
+
+/* PowerSurge-style Macs */
+struct smp_ops_t psurge_smp_ops = {
+	.message_pass	= smp_psurge_message_pass,
+	.probe		= smp_psurge_probe,
+	.kick_cpu	= smp_psurge_kick_cpu,
+	.setup_cpu	= smp_psurge_setup_cpu,
+	.give_timebase	= smp_psurge_give_timebase,
+	.take_timebase	= smp_psurge_take_timebase,
+};
+
+/* Core99 Macs (dual G4s) */
+struct smp_ops_t core99_smp_ops = {
+	.message_pass	= smp_core99_message_pass,
+	.probe		= smp_core99_probe,
+	.kick_cpu	= smp_core99_kick_cpu,
+	.setup_cpu	= smp_core99_setup_cpu,
+	.give_timebase	= smp_core99_give_timebase,
+	.take_timebase	= smp_core99_take_timebase,
+};
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+int __cpu_disable(void)
+{
+	cpu_clear(smp_processor_id(), cpu_online_map);
+
+	/* XXX reset cpu affinity here */
+	openpic_set_priority(0xf);
+	asm volatile("mtdec %0" : : "r" (0x7fffffff));
+	mb();
+	udelay(20);
+	asm volatile("mtdec %0" : : "r" (0x7fffffff));
+	return 0;
+}
+
+extern void low_cpu_die(void) __attribute__((noreturn)); /* in pmac_sleep.S */
+static int cpu_dead[NR_CPUS];
+
+void cpu_die(void)
+{
+	local_irq_disable();
+	cpu_dead[smp_processor_id()] = 1;
+	mb();
+	low_cpu_die();
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	int timeout;
+
+	timeout = 1000;
+	while (!cpu_dead[cpu]) {
+		if (--timeout == 0) {
+			printk("CPU %u refused to die!\n", cpu);
+			break;
+		}
+		msleep(1);
+	}
+	cpu_callin_map[cpu] = 0;
+	cpu_dead[cpu] = 0;
+}
+
+#endif
diff --git a/arch/powerpc/platforms/powermac/pmac_time.c b/arch/powerpc/platforms/powermac/pmac_time.c
new file mode 100644
index 00000000000..ff6adff36cb
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac_time.c
@@ -0,0 +1,291 @@
+/*
+ * Support for periodic interrupts (100 per second) and for getting
+ * the current time from the RTC on Power Macintoshes.
+ *
+ * We use the decrementer register for our periodic interrupts.
+ *
+ * Paul Mackerras	August 1996.
+ * Copyright (C) 1996 Paul Mackerras.
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/hardirq.h>
+
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+
+/* Apparently the RTC stores seconds since 1 Jan 1904 */
+#define RTC_OFFSET	2082844800
+
+/*
+ * Calibrate the decrementer frequency with the VIA timer 1.
+ */
+#define VIA_TIMER_FREQ_6	4700000	/* time 1 frequency * 6 */
+
+/* VIA registers */
+#define RS		0x200		/* skip between registers */
+#define T1CL		(4*RS)		/* Timer 1 ctr/latch (low 8 bits) */
+#define T1CH		(5*RS)		/* Timer 1 counter (high 8 bits) */
+#define T1LL		(6*RS)		/* Timer 1 latch (low 8 bits) */
+#define T1LH		(7*RS)		/* Timer 1 latch (high 8 bits) */
+#define ACR		(11*RS)		/* Auxiliary control register */
+#define IFR		(13*RS)		/* Interrupt flag register */
+
+/* Bits in ACR */
+#define T1MODE		0xc0		/* Timer 1 mode */
+#define T1MODE_CONT	0x40		/*  continuous interrupts */
+
+/* Bits in IFR and IER */
+#define T1_INT		0x40		/* Timer 1 interrupt */
+
+extern struct timezone sys_tz;
+
+long __init
+pmac_time_init(void)
+{
+#ifdef CONFIG_NVRAM
+	s32 delta = 0;
+	int dst;
+	
+	delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16;
+	delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8;
+	delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb);
+	if (delta & 0x00800000UL)
+		delta |= 0xFF000000UL;
+	dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0);
+	printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60,
+		dst ? "on" : "off");
+	return delta;
+#else
+	return 0;
+#endif
+}
+
+unsigned long
+pmac_get_rtc_time(void)
+{
+#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU)
+	struct adb_request req;
+	unsigned long now;
+#endif
+
+	/* Get the time from the RTC */
+	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+	case SYS_CTRLER_CUDA:
+		if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
+			return 0;
+		while (!req.complete)
+			cuda_poll();
+		if (req.reply_len != 7)
+			printk(KERN_ERR "pmac_get_rtc_time: got %d byte reply\n",
+			       req.reply_len);
+		now = (req.reply[3] << 24) + (req.reply[4] << 16)
+			+ (req.reply[5] << 8) + req.reply[6];
+		return now - RTC_OFFSET;
+#endif /* CONFIG_ADB_CUDA */
+#ifdef CONFIG_ADB_PMU
+	case SYS_CTRLER_PMU:
+		if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
+			return 0;
+		while (!req.complete)
+			pmu_poll();
+		if (req.reply_len != 4)
+			printk(KERN_ERR "pmac_get_rtc_time: got %d byte reply\n",
+			       req.reply_len);
+		now = (req.reply[0] << 24) + (req.reply[1] << 16)
+			+ (req.reply[2] << 8) + req.reply[3];
+		return now - RTC_OFFSET;
+#endif /* CONFIG_ADB_PMU */
+	default: ;
+	}
+	return 0;
+}
+
+int
+pmac_set_rtc_time(unsigned long nowtime)
+{
+#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU)
+	struct adb_request req;
+#endif
+
+	nowtime += RTC_OFFSET;
+
+	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+	case SYS_CTRLER_CUDA:
+		if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
+				 nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0)
+			return 0;
+		while (!req.complete)
+			cuda_poll();
+		if ((req.reply_len != 3) && (req.reply_len != 7))
+			printk(KERN_ERR "pmac_set_rtc_time: got %d byte reply\n",
+			       req.reply_len);
+		return 1;
+#endif /* CONFIG_ADB_CUDA */
+#ifdef CONFIG_ADB_PMU
+	case SYS_CTRLER_PMU:
+		if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
+				nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0)
+			return 0;
+		while (!req.complete)
+			pmu_poll();
+		if (req.reply_len != 0)
+			printk(KERN_ERR "pmac_set_rtc_time: got %d byte reply\n",
+			       req.reply_len);
+		return 1;
+#endif /* CONFIG_ADB_PMU */
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Calibrate the decrementer register using VIA timer 1.
+ * This is used both on powermacs and CHRP machines.
+ */
+int __init
+via_calibrate_decr(void)
+{
+	struct device_node *vias;
+	volatile unsigned char __iomem *via;
+	int count = VIA_TIMER_FREQ_6 / 100;
+	unsigned int dstart, dend;
+
+	vias = find_devices("via-cuda");
+	if (vias == 0)
+		vias = find_devices("via-pmu");
+	if (vias == 0)
+		vias = find_devices("via");
+	if (vias == 0 || vias->n_addrs == 0)
+		return 0;
+	via = ioremap(vias->addrs[0].address, vias->addrs[0].size);
+
+	/* set timer 1 for continuous interrupts */
+	out_8(&via[ACR], (via[ACR] & ~T1MODE) | T1MODE_CONT);
+	/* set the counter to a small value */
+	out_8(&via[T1CH], 2);
+	/* set the latch to `count' */
+	out_8(&via[T1LL], count);
+	out_8(&via[T1LH], count >> 8);
+	/* wait until it hits 0 */
+	while ((in_8(&via[IFR]) & T1_INT) == 0)
+		;
+	dstart = get_dec();
+	/* clear the interrupt & wait until it hits 0 again */
+	in_8(&via[T1CL]);
+	while ((in_8(&via[IFR]) & T1_INT) == 0)
+		;
+	dend = get_dec();
+
+	tb_ticks_per_jiffy = (dstart - dend) / (6 * (HZ/100));
+	tb_to_us = mulhwu_scale_factor(dstart - dend, 60000);
+
+	printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n",
+	       tb_ticks_per_jiffy, dstart - dend);
+
+	iounmap(via);
+	
+	return 1;
+}
+
+#ifdef CONFIG_PM
+/*
+ * Reset the time after a sleep.
+ */
+static int
+time_sleep_notify(struct pmu_sleep_notifier *self, int when)
+{
+	static unsigned long time_diff;
+	unsigned long flags;
+	unsigned long seq;
+
+	switch (when) {
+	case PBOOK_SLEEP_NOW:
+		do {
+			seq = read_seqbegin_irqsave(&xtime_lock, flags);
+			time_diff = xtime.tv_sec - pmac_get_rtc_time();
+		} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
+		break;
+	case PBOOK_WAKE:
+		write_seqlock_irqsave(&xtime_lock, flags);
+		xtime.tv_sec = pmac_get_rtc_time() + time_diff;
+		xtime.tv_nsec = 0;
+		last_rtc_update = xtime.tv_sec;
+		write_sequnlock_irqrestore(&xtime_lock, flags);
+		break;
+	}
+	return PBOOK_SLEEP_OK;
+}
+
+static struct pmu_sleep_notifier time_sleep_notifier = {
+	time_sleep_notify, SLEEP_LEVEL_MISC,
+};
+#endif /* CONFIG_PM */
+
+/*
+ * Query the OF and get the decr frequency.
+ * This was taken from the pmac time_init() when merging the prep/pmac
+ * time functions.
+ */
+void __init
+pmac_calibrate_decr(void)
+{
+	struct device_node *cpu;
+	unsigned int freq, *fp;
+
+#ifdef CONFIG_PM
+	pmu_register_sleep_notifier(&time_sleep_notifier);
+#endif /* CONFIG_PM */
+
+	/* We assume MacRISC2 machines have correct device-tree
+	 * calibration. That's better since the VIA itself seems
+	 * to be slightly off. --BenH
+	 */
+	if (!machine_is_compatible("MacRISC2") &&
+	    !machine_is_compatible("MacRISC3") &&
+	    !machine_is_compatible("MacRISC4"))
+		if (via_calibrate_decr())
+			return;
+
+	/* Special case: QuickSilver G4s seem to have a badly calibrated
+	 * timebase-frequency in OF, VIA is much better on these. We should
+	 * probably implement calibration based on the KL timer on these
+	 * machines anyway... -BenH
+	 */
+	if (machine_is_compatible("PowerMac3,5"))
+		if (via_calibrate_decr())
+			return;
+	/*
+	 * The cpu node should have a timebase-frequency property
+	 * to tell us the rate at which the decrementer counts.
+	 */
+	cpu = find_type_devices("cpu");
+	if (cpu == 0)
+		panic("can't find cpu node in time_init");
+	fp = (unsigned int *) get_property(cpu, "timebase-frequency", NULL);
+	if (fp == 0)
+		panic("can't get cpu timebase frequency");
+	freq = *fp;
+	printk("time_init: decrementer frequency = %u.%.6u MHz\n",
+	       freq/1000000, freq%1000000);
+	tb_ticks_per_jiffy = freq / HZ;
+	tb_to_us = mulhwu_scale_factor(freq, 1000000);
+}
diff --git a/arch/powerpc/platforms/prep/Kconfig b/arch/powerpc/platforms/prep/Kconfig
new file mode 100644
index 00000000000..673ac47a162
--- /dev/null
+++ b/arch/powerpc/platforms/prep/Kconfig
@@ -0,0 +1,22 @@
+
+config PREP_RESIDUAL
+	bool "Support for PReP Residual Data"
+	depends on PPC_PREP
+	help
+	  Some PReP systems have residual data passed to the kernel by the
+	  firmware.  This allows detection of memory size, devices present and
+	  other useful pieces of information.  Sometimes this information is
+	  not present or incorrect, in which case it could lead to the machine 
+	  behaving incorrectly.  If this happens, either disable PREP_RESIDUAL
+	  or pass the 'noresidual' option to the kernel.
+
+	  If you are running a PReP system, say Y here, otherwise say N.
+
+config PROC_PREPRESIDUAL
+	bool "Support for reading of PReP Residual Data in /proc"
+	depends on PREP_RESIDUAL && PROC_FS
+	help
+	  Enabling this option will create a /proc/residual file which allows
+	  you to get at the residual data on PReP systems.  You will need a tool
+	  (lsresidual) to parse it.  If you aren't on a PReP system, you don't
+	  want this.
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
new file mode 100644
index 00000000000..7a3b6fc4d97
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -0,0 +1,47 @@
+
+config PPC_SPLPAR
+	depends on PPC_PSERIES
+	bool "Support for shared-processor logical partitions"
+	default n
+	help
+	  Enabling this option will make the kernel run more efficiently
+	  on logically-partitioned pSeries systems which use shared
+	  processors, that is, which share physical processors between
+	  two or more partitions.
+
+config HMT
+	bool "Hardware multithreading"
+	depends on SMP && PPC_PSERIES && BROKEN
+	help
+	  This option enables hardware multithreading on RS64 cpus.
+	  pSeries systems p620 and p660 have such a cpu type.
+
+config EEH
+	bool "PCI Extended Error Handling (EEH)" if EMBEDDED
+	depends on PPC_PSERIES
+	default y if !EMBEDDED
+
+config PPC_RTAS
+	bool
+	depends on PPC_PSERIES || PPC_BPA
+	default y
+
+config RTAS_PROC
+	bool "Proc interface to RTAS"
+	depends on PPC_RTAS
+	default y
+
+config RTAS_FLASH
+	tristate "Firmware flash interface"
+	depends on PPC64 && RTAS_PROC
+
+config SCANLOG
+	tristate "Scanlog dump interface"
+	depends on RTAS_PROC && PPC_PSERIES
+
+config LPARCFG
+	tristate "LPAR Configuration Data"
+	depends on PPC_PSERIES || PPC_ISERIES
+	help
+	Provide system capacity information via human readable
+	<key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
new file mode 100644
index 00000000000..26bdcd9a2a4
--- /dev/null
+++ b/arch/powerpc/sysdev/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MPIC)	+= mpic.o
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
new file mode 100644
index 00000000000..c660e7d7c64
--- /dev/null
+++ b/arch/powerpc/sysdev/mpic.c
@@ -0,0 +1,904 @@
+/*
+ *  arch/powerpc/kernel/mpic.c
+ *
+ *  Driver for interrupt controllers following the OpenPIC standard, the
+ *  common implementation beeing IBM's MPIC. This driver also can deal
+ *  with various broken implementations of this HW.
+ *
+ *  Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive
+ *  for more details.
+ */
+
+#undef DEBUG
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/bootmem.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+
+#include <asm/ptrace.h>
+#include <asm/signal.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/smp.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static struct mpic *mpics;
+static struct mpic *mpic_primary;
+static DEFINE_SPINLOCK(mpic_lock);
+
+
+/*
+ * Register accessor functions
+ */
+
+
+static inline u32 _mpic_read(unsigned int be, volatile u32 __iomem *base,
+			    unsigned int reg)
+{
+	if (be)
+		return in_be32(base + (reg >> 2));
+	else
+		return in_le32(base + (reg >> 2));
+}
+
+static inline void _mpic_write(unsigned int be, volatile u32 __iomem *base,
+			      unsigned int reg, u32 value)
+{
+	if (be)
+		out_be32(base + (reg >> 2), value);
+	else
+		out_le32(base + (reg >> 2), value);
+}
+
+static inline u32 _mpic_ipi_read(struct mpic *mpic, unsigned int ipi)
+{
+	unsigned int be = (mpic->flags & MPIC_BIG_ENDIAN) != 0;
+	unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10);
+
+	if (mpic->flags & MPIC_BROKEN_IPI)
+		be = !be;
+	return _mpic_read(be, mpic->gregs, offset);
+}
+
+static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 value)
+{
+	unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10);
+
+	_mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->gregs, offset, value);
+}
+
+static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg)
+{
+	unsigned int cpu = 0;
+
+	if (mpic->flags & MPIC_PRIMARY)
+		cpu = hard_smp_processor_id();
+
+	return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg);
+}
+
+static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value)
+{
+	unsigned int cpu = 0;
+
+	if (mpic->flags & MPIC_PRIMARY)
+		cpu = hard_smp_processor_id();
+
+	_mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg, value);
+}
+
+static inline u32 _mpic_irq_read(struct mpic *mpic, unsigned int src_no, unsigned int reg)
+{
+	unsigned int	isu = src_no >> mpic->isu_shift;
+	unsigned int	idx = src_no & mpic->isu_mask;
+
+	return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu],
+			  reg + (idx * MPIC_IRQ_STRIDE));
+}
+
+static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no,
+				   unsigned int reg, u32 value)
+{
+	unsigned int	isu = src_no >> mpic->isu_shift;
+	unsigned int	idx = src_no & mpic->isu_mask;
+
+	_mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu],
+		    reg + (idx * MPIC_IRQ_STRIDE), value);
+}
+
+#define mpic_read(b,r)		_mpic_read(mpic->flags & MPIC_BIG_ENDIAN,(b),(r))
+#define mpic_write(b,r,v)	_mpic_write(mpic->flags & MPIC_BIG_ENDIAN,(b),(r),(v))
+#define mpic_ipi_read(i)	_mpic_ipi_read(mpic,(i))
+#define mpic_ipi_write(i,v)	_mpic_ipi_write(mpic,(i),(v))
+#define mpic_cpu_read(i)	_mpic_cpu_read(mpic,(i))
+#define mpic_cpu_write(i,v)	_mpic_cpu_write(mpic,(i),(v))
+#define mpic_irq_read(s,r)	_mpic_irq_read(mpic,(s),(r))
+#define mpic_irq_write(s,r,v)	_mpic_irq_write(mpic,(s),(r),(v))
+
+
+/*
+ * Low level utility functions
+ */
+
+
+
+/* Check if we have one of those nice broken MPICs with a flipped endian on
+ * reads from IPI registers
+ */
+static void __init mpic_test_broken_ipi(struct mpic *mpic)
+{
+	u32 r;
+
+	mpic_write(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0, MPIC_VECPRI_MASK);
+	r = mpic_read(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0);
+
+	if (r == le32_to_cpu(MPIC_VECPRI_MASK)) {
+		printk(KERN_INFO "mpic: Detected reversed IPI registers\n");
+		mpic->flags |= MPIC_BROKEN_IPI;
+	}
+}
+
+#ifdef CONFIG_MPIC_BROKEN_U3
+
+/* Test if an interrupt is sourced from HyperTransport (used on broken U3s)
+ * to force the edge setting on the MPIC and do the ack workaround.
+ */
+static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source_no)
+{
+	if (source_no >= 128 || !mpic->fixups)
+		return 0;
+	return mpic->fixups[source_no].base != NULL;
+}
+
+static inline void mpic_apic_end_irq(struct mpic *mpic, unsigned int source_no)
+{
+	struct mpic_irq_fixup *fixup = &mpic->fixups[source_no];
+	u32 tmp;
+
+	spin_lock(&mpic->fixup_lock);
+	writeb(0x11 + 2 * fixup->irq, fixup->base);
+	tmp = readl(fixup->base + 2);
+	writel(tmp | 0x80000000ul, fixup->base + 2);
+	/* config writes shouldn't be posted but let's be safe ... */
+	(void)readl(fixup->base + 2);
+	spin_unlock(&mpic->fixup_lock);
+}
+
+
+static void __init mpic_amd8111_read_irq(struct mpic *mpic, u8 __iomem *devbase)
+{
+	int i, irq;
+	u32 tmp;
+
+	printk(KERN_INFO "mpic:    - Workarounds on AMD 8111 @ %p\n", devbase);
+
+	for (i=0; i < 24; i++) {
+		writeb(0x10 + 2*i, devbase + 0xf2);
+		tmp = readl(devbase + 0xf4);
+		if ((tmp & 0x1) || !(tmp & 0x20))
+			continue;
+		irq = (tmp >> 16) & 0xff;
+		mpic->fixups[irq].irq = i;
+		mpic->fixups[irq].base = devbase + 0xf2;
+	}
+}
+ 
+static void __init mpic_amd8131_read_irq(struct mpic *mpic, u8 __iomem *devbase)
+{
+	int i, irq;
+	u32 tmp;
+
+	printk(KERN_INFO "mpic:    - Workarounds on AMD 8131 @ %p\n", devbase);
+
+	for (i=0; i < 4; i++) {
+		writeb(0x10 + 2*i, devbase + 0xba);
+		tmp = readl(devbase + 0xbc);
+		if ((tmp & 0x1) || !(tmp & 0x20))
+			continue;
+		irq = (tmp >> 16) & 0xff;
+		mpic->fixups[irq].irq = i;
+		mpic->fixups[irq].base = devbase + 0xba;
+	}
+}
+ 
+static void __init mpic_scan_ioapics(struct mpic *mpic)
+{
+	unsigned int devfn;
+	u8 __iomem *cfgspace;
+
+	printk(KERN_INFO "mpic: Setting up IO-APICs workarounds for U3\n");
+
+	/* Allocate fixups array */
+	mpic->fixups = alloc_bootmem(128 * sizeof(struct mpic_irq_fixup));
+	BUG_ON(mpic->fixups == NULL);
+	memset(mpic->fixups, 0, 128 * sizeof(struct mpic_irq_fixup));
+
+	/* Init spinlock */
+	spin_lock_init(&mpic->fixup_lock);
+
+	/* Map u3 config space. We assume all IO-APICs are on the primary bus
+	 * and slot will never be above "0xf" so we only need to map 32k
+	 */
+	cfgspace = (unsigned char __iomem *)ioremap(0xf2000000, 0x8000);
+	BUG_ON(cfgspace == NULL);
+
+	/* Now we scan all slots. We do a very quick scan, we read the header type,
+	 * vendor ID and device ID only, that's plenty enough
+	 */
+	for (devfn = 0; devfn < PCI_DEVFN(0x10,0); devfn ++) {
+		u8 __iomem *devbase = cfgspace + (devfn << 8);
+		u8 hdr_type = readb(devbase + PCI_HEADER_TYPE);
+		u32 l = readl(devbase + PCI_VENDOR_ID);
+		u16 vendor_id, device_id;
+		int multifunc = 0;
+
+		DBG("devfn %x, l: %x\n", devfn, l);
+
+		/* If no device, skip */
+		if (l == 0xffffffff || l == 0x00000000 ||
+		    l == 0x0000ffff || l == 0xffff0000)
+			goto next;
+
+		/* Check if it's a multifunction device (only really used
+		 * to function 0 though
+		 */
+		multifunc = !!(hdr_type & 0x80);
+		vendor_id = l & 0xffff;
+		device_id = (l >> 16) & 0xffff;
+
+		/* If a known device, go to fixup setup code */
+		if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7460)
+			mpic_amd8111_read_irq(mpic, devbase);
+		if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7450)
+			mpic_amd8131_read_irq(mpic, devbase);
+	next:
+		/* next device, if function 0 */
+		if ((PCI_FUNC(devfn) == 0) && !multifunc)
+			devfn += 7;
+	}
+}
+
+#endif /* CONFIG_MPIC_BROKEN_U3 */
+
+
+/* Find an mpic associated with a given linux interrupt */
+static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi)
+{
+	struct mpic *mpic = mpics;
+
+	while(mpic) {
+		/* search IPIs first since they may override the main interrupts */
+		if (irq >= mpic->ipi_offset && irq < (mpic->ipi_offset + 4)) {
+			if (is_ipi)
+				*is_ipi = 1;
+			return mpic;
+		}
+		if (irq >= mpic->irq_offset &&
+		    irq < (mpic->irq_offset + mpic->irq_count)) {
+			if (is_ipi)
+				*is_ipi = 0;
+			return mpic;
+		}
+		mpic = mpic -> next;
+	}
+	return NULL;
+}
+
+/* Convert a cpu mask from logical to physical cpu numbers. */
+static inline u32 mpic_physmask(u32 cpumask)
+{
+	int i;
+	u32 mask = 0;
+
+	for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1)
+		mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
+	return mask;
+}
+
+#ifdef CONFIG_SMP
+/* Get the mpic structure from the IPI number */
+static inline struct mpic * mpic_from_ipi(unsigned int ipi)
+{
+	return container_of(irq_desc[ipi].handler, struct mpic, hc_ipi);
+}
+#endif
+
+/* Get the mpic structure from the irq number */
+static inline struct mpic * mpic_from_irq(unsigned int irq)
+{
+	return container_of(irq_desc[irq].handler, struct mpic, hc_irq);
+}
+
+/* Send an EOI */
+static inline void mpic_eoi(struct mpic *mpic)
+{
+	mpic_cpu_write(MPIC_CPU_EOI, 0);
+	(void)mpic_cpu_read(MPIC_CPU_WHOAMI);
+}
+
+#ifdef CONFIG_SMP
+static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct mpic *mpic = dev_id;
+
+	smp_message_recv(irq - mpic->ipi_offset, regs);
+	return IRQ_HANDLED;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Linux descriptor level callbacks
+ */
+
+
+static void mpic_enable_irq(unsigned int irq)
+{
+	unsigned int loops = 100000;
+	struct mpic *mpic = mpic_from_irq(irq);
+	unsigned int src = irq - mpic->irq_offset;
+
+	DBG("%s: enable_irq: %d (src %d)\n", mpic->name, irq, src);
+
+	mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
+		       mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_MASK);
+
+	/* make sure mask gets to controller before we return to user */
+	do {
+		if (!loops--) {
+			printk(KERN_ERR "mpic_enable_irq timeout\n");
+			break;
+		}
+	} while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK);	
+}
+
+static void mpic_disable_irq(unsigned int irq)
+{
+	unsigned int loops = 100000;
+	struct mpic *mpic = mpic_from_irq(irq);
+	unsigned int src = irq - mpic->irq_offset;
+
+	DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
+
+	mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
+		       mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | MPIC_VECPRI_MASK);
+
+	/* make sure mask gets to controller before we return to user */
+	do {
+		if (!loops--) {
+			printk(KERN_ERR "mpic_enable_irq timeout\n");
+			break;
+		}
+	} while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK));
+}
+
+static void mpic_end_irq(unsigned int irq)
+{
+	struct mpic *mpic = mpic_from_irq(irq);
+
+	DBG("%s: end_irq: %d\n", mpic->name, irq);
+
+	/* We always EOI on end_irq() even for edge interrupts since that
+	 * should only lower the priority, the MPIC should have properly
+	 * latched another edge interrupt coming in anyway
+	 */
+
+#ifdef CONFIG_MPIC_BROKEN_U3
+	if (mpic->flags & MPIC_BROKEN_U3) {
+		unsigned int src = irq - mpic->irq_offset;
+		if (mpic_is_ht_interrupt(mpic, src))
+			mpic_apic_end_irq(mpic, src);
+	}
+#endif /* CONFIG_MPIC_BROKEN_U3 */
+
+	mpic_eoi(mpic);
+}
+
+#ifdef CONFIG_SMP
+
+static void mpic_enable_ipi(unsigned int irq)
+{
+	struct mpic *mpic = mpic_from_ipi(irq);
+	unsigned int src = irq - mpic->ipi_offset;
+
+	DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src);
+	mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
+}
+
+static void mpic_disable_ipi(unsigned int irq)
+{
+	/* NEVER disable an IPI... that's just plain wrong! */
+}
+
+static void mpic_end_ipi(unsigned int irq)
+{
+	struct mpic *mpic = mpic_from_ipi(irq);
+
+	/*
+	 * IPIs are marked IRQ_PER_CPU. This has the side effect of
+	 * preventing the IRQ_PENDING/IRQ_INPROGRESS logic from
+	 * applying to them. We EOI them late to avoid re-entering.
+	 * We mark IPI's with SA_INTERRUPT as they must run with
+	 * irqs disabled.
+	 */
+	mpic_eoi(mpic);
+}
+
+#endif /* CONFIG_SMP */
+
+static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask)
+{
+	struct mpic *mpic = mpic_from_irq(irq);
+
+	cpumask_t tmp;
+
+	cpus_and(tmp, cpumask, cpu_online_map);
+
+	mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION,
+		       mpic_physmask(cpus_addr(tmp)[0]));	
+}
+
+
+/*
+ * Exported functions
+ */
+
+
+struct mpic * __init mpic_alloc(unsigned long phys_addr,
+				unsigned int flags,
+				unsigned int isu_size,
+				unsigned int irq_offset,
+				unsigned int irq_count,
+				unsigned int ipi_offset,
+				unsigned char *senses,
+				unsigned int senses_count,
+				const char *name)
+{
+	struct mpic	*mpic;
+	u32		reg;
+	const char	*vers;
+	int		i;
+
+	mpic = alloc_bootmem(sizeof(struct mpic));
+	if (mpic == NULL)
+		return NULL;
+	
+
+	memset(mpic, 0, sizeof(struct mpic));
+	mpic->name = name;
+
+	mpic->hc_irq.typename = name;
+	mpic->hc_irq.enable = mpic_enable_irq;
+	mpic->hc_irq.disable = mpic_disable_irq;
+	mpic->hc_irq.end = mpic_end_irq;
+	if (flags & MPIC_PRIMARY)
+		mpic->hc_irq.set_affinity = mpic_set_affinity;
+#ifdef CONFIG_SMP
+	mpic->hc_ipi.typename = name;
+	mpic->hc_ipi.enable = mpic_enable_ipi;
+	mpic->hc_ipi.disable = mpic_disable_ipi;
+	mpic->hc_ipi.end = mpic_end_ipi;
+#endif /* CONFIG_SMP */
+
+	mpic->flags = flags;
+	mpic->isu_size = isu_size;
+	mpic->irq_offset = irq_offset;
+	mpic->irq_count = irq_count;
+	mpic->ipi_offset = ipi_offset;
+	mpic->num_sources = 0; /* so far */
+	mpic->senses = senses;
+	mpic->senses_count = senses_count;
+
+	/* Map the global registers */
+	mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000);
+	mpic->tmregs = mpic->gregs + (MPIC_TIMER_BASE >> 2);
+	BUG_ON(mpic->gregs == NULL);
+
+	/* Reset */
+	if (flags & MPIC_WANTS_RESET) {
+		mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0,
+			   mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
+			   | MPIC_GREG_GCONF_RESET);
+		while( mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
+		       & MPIC_GREG_GCONF_RESET)
+			mb();
+	}
+
+	/* Read feature register, calculate num CPUs and, for non-ISU
+	 * MPICs, num sources as well. On ISU MPICs, sources are counted
+	 * as ISUs are added
+	 */
+	reg = mpic_read(mpic->gregs, MPIC_GREG_FEATURE_0);
+	mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK)
+			  >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1;
+	if (isu_size == 0)
+		mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK)
+				     >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1;
+
+	/* Map the per-CPU registers */
+	for (i = 0; i < mpic->num_cpus; i++) {
+		mpic->cpuregs[i] = ioremap(phys_addr + MPIC_CPU_BASE +
+					   i * MPIC_CPU_STRIDE, 0x1000);
+		BUG_ON(mpic->cpuregs[i] == NULL);
+	}
+
+	/* Initialize main ISU if none provided */
+	if (mpic->isu_size == 0) {
+		mpic->isu_size = mpic->num_sources;
+		mpic->isus[0] = ioremap(phys_addr + MPIC_IRQ_BASE,
+					MPIC_IRQ_STRIDE * mpic->isu_size);
+		BUG_ON(mpic->isus[0] == NULL);
+	}
+	mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1);
+	mpic->isu_mask = (1 << mpic->isu_shift) - 1;
+
+	/* Display version */
+	switch (reg & MPIC_GREG_FEATURE_VERSION_MASK) {
+	case 1:
+		vers = "1.0";
+		break;
+	case 2:
+		vers = "1.2";
+		break;
+	case 3:
+		vers = "1.3";
+		break;
+	default:
+		vers = "<unknown>";
+		break;
+	}
+	printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %lx, max %d CPUs\n",
+	       name, vers, phys_addr, mpic->num_cpus);
+	printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n", mpic->isu_size,
+	       mpic->isu_shift, mpic->isu_mask);
+
+	mpic->next = mpics;
+	mpics = mpic;
+
+	if (flags & MPIC_PRIMARY)
+		mpic_primary = mpic;
+
+	return mpic;
+}
+
+void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
+			    unsigned long phys_addr)
+{
+	unsigned int isu_first = isu_num * mpic->isu_size;
+
+	BUG_ON(isu_num >= MPIC_MAX_ISU);
+
+	mpic->isus[isu_num] = ioremap(phys_addr, MPIC_IRQ_STRIDE * mpic->isu_size);
+	if ((isu_first + mpic->isu_size) > mpic->num_sources)
+		mpic->num_sources = isu_first + mpic->isu_size;
+}
+
+void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler,
+			       void *data)
+{
+	struct mpic *mpic = mpic_find(irq, NULL);
+	unsigned long flags;
+
+	/* Synchronization here is a bit dodgy, so don't try to replace cascade
+	 * interrupts on the fly too often ... but normally it's set up at boot.
+	 */
+	spin_lock_irqsave(&mpic_lock, flags);
+	if (mpic->cascade)	       
+		mpic_disable_irq(mpic->cascade_vec + mpic->irq_offset);
+	mpic->cascade = NULL;
+	wmb();
+	mpic->cascade_vec = irq - mpic->irq_offset;
+	mpic->cascade_data = data;
+	wmb();
+	mpic->cascade = handler;
+	mpic_enable_irq(irq);
+	spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+void __init mpic_init(struct mpic *mpic)
+{
+	int i;
+
+	BUG_ON(mpic->num_sources == 0);
+
+	printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources);
+
+	/* Set current processor priority to max */
+	mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf);
+
+	/* Initialize timers: just disable them all */
+	for (i = 0; i < 4; i++) {
+		mpic_write(mpic->tmregs,
+			   i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0);
+		mpic_write(mpic->tmregs,
+			   i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI,
+			   MPIC_VECPRI_MASK |
+			   (MPIC_VEC_TIMER_0 + i));
+	}
+
+	/* Initialize IPIs to our reserved vectors and mark them disabled for now */
+	mpic_test_broken_ipi(mpic);
+	for (i = 0; i < 4; i++) {
+		mpic_ipi_write(i,
+			       MPIC_VECPRI_MASK |
+			       (10 << MPIC_VECPRI_PRIORITY_SHIFT) |
+			       (MPIC_VEC_IPI_0 + i));
+#ifdef CONFIG_SMP
+		if (!(mpic->flags & MPIC_PRIMARY))
+			continue;
+		irq_desc[mpic->ipi_offset+i].status |= IRQ_PER_CPU;
+		irq_desc[mpic->ipi_offset+i].handler = &mpic->hc_ipi;
+		
+#endif /* CONFIG_SMP */
+	}
+
+	/* Initialize interrupt sources */
+	if (mpic->irq_count == 0)
+		mpic->irq_count = mpic->num_sources;
+
+#ifdef CONFIG_MPIC_BROKEN_U3
+	/* Do the ioapic fixups on U3 broken mpic */
+	DBG("MPIC flags: %x\n", mpic->flags);
+	if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY))
+		mpic_scan_ioapics(mpic);
+#endif /* CONFIG_MPIC_BROKEN_U3 */
+
+	for (i = 0; i < mpic->num_sources; i++) {
+		/* start with vector = source number, and masked */
+		u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT);
+		int level = 0;
+		
+		/* if it's an IPI, we skip it */
+		if ((mpic->irq_offset + i) >= (mpic->ipi_offset + i) &&
+		    (mpic->irq_offset + i) <  (mpic->ipi_offset + i + 4))
+			continue;
+
+		/* do senses munging */
+		if (mpic->senses && i < mpic->senses_count) {
+			if (mpic->senses[i] & IRQ_SENSE_LEVEL)
+				vecpri |= MPIC_VECPRI_SENSE_LEVEL;
+			if (mpic->senses[i] & IRQ_POLARITY_POSITIVE)
+				vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
+		} else
+			vecpri |= MPIC_VECPRI_SENSE_LEVEL;
+
+		/* remember if it was a level interrupts */
+		level = (vecpri & MPIC_VECPRI_SENSE_LEVEL);
+
+		/* deal with broken U3 */
+		if (mpic->flags & MPIC_BROKEN_U3) {
+#ifdef CONFIG_MPIC_BROKEN_U3
+			if (mpic_is_ht_interrupt(mpic, i)) {
+				vecpri &= ~(MPIC_VECPRI_SENSE_MASK |
+					    MPIC_VECPRI_POLARITY_MASK);
+				vecpri |= MPIC_VECPRI_POLARITY_POSITIVE;
+			}
+#else
+			printk(KERN_ERR "mpic: BROKEN_U3 set, but CONFIG doesn't match\n");
+#endif
+		}
+
+		DBG("setup source %d, vecpri: %08x, level: %d\n", i, vecpri,
+		    (level != 0));
+
+		/* init hw */
+		mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri);
+		mpic_irq_write(i, MPIC_IRQ_DESTINATION,
+			       1 << hard_smp_processor_id());
+
+		/* init linux descriptors */
+		if (i < mpic->irq_count) {
+			irq_desc[mpic->irq_offset+i].status = level ? IRQ_LEVEL : 0;
+			irq_desc[mpic->irq_offset+i].handler = &mpic->hc_irq;
+		}
+	}
+	
+	/* Init spurrious vector */
+	mpic_write(mpic->gregs, MPIC_GREG_SPURIOUS, MPIC_VEC_SPURRIOUS);
+
+	/* Disable 8259 passthrough */
+	mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0,
+		   mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0)
+		   | MPIC_GREG_GCONF_8259_PTHROU_DIS);
+
+	/* Set current processor priority to 0 */
+	mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0);
+}
+
+
+
+void mpic_irq_set_priority(unsigned int irq, unsigned int pri)
+{
+	int is_ipi;
+	struct mpic *mpic = mpic_find(irq, &is_ipi);
+	unsigned long flags;
+	u32 reg;
+
+	spin_lock_irqsave(&mpic_lock, flags);
+	if (is_ipi) {
+		reg = mpic_ipi_read(irq - mpic->ipi_offset) & MPIC_VECPRI_PRIORITY_MASK;
+		mpic_ipi_write(irq - mpic->ipi_offset,
+			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	} else {
+		reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI)
+			& MPIC_VECPRI_PRIORITY_MASK;
+		mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI,
+			       reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT));
+	}
+	spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+unsigned int mpic_irq_get_priority(unsigned int irq)
+{
+	int is_ipi;
+	struct mpic *mpic = mpic_find(irq, &is_ipi);
+	unsigned long flags;
+	u32 reg;
+
+	spin_lock_irqsave(&mpic_lock, flags);
+	if (is_ipi)
+		reg = mpic_ipi_read(irq - mpic->ipi_offset);
+	else
+		reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI);
+	spin_unlock_irqrestore(&mpic_lock, flags);
+	return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT;
+}
+
+void mpic_setup_this_cpu(void)
+{
+#ifdef CONFIG_SMP
+	struct mpic *mpic = mpic_primary;
+	unsigned long flags;
+	u32 msk = 1 << hard_smp_processor_id();
+	unsigned int i;
+
+	BUG_ON(mpic == NULL);
+
+	DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+
+	spin_lock_irqsave(&mpic_lock, flags);
+
+ 	/* let the mpic know we want intrs. default affinity is 0xffffffff
+	 * until changed via /proc. That's how it's done on x86. If we want
+	 * it differently, then we should make sure we also change the default
+	 * values of irq_affinity in irq.c.
+ 	 */
+	if (distribute_irqs) {
+	 	for (i = 0; i < mpic->num_sources ; i++)
+			mpic_irq_write(i, MPIC_IRQ_DESTINATION,
+				mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk);
+	}
+
+	/* Set current processor priority to 0 */
+	mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0);
+
+	spin_unlock_irqrestore(&mpic_lock, flags);
+#endif /* CONFIG_SMP */
+}
+
+int mpic_cpu_get_priority(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	return mpic_cpu_read(MPIC_CPU_CURRENT_TASK_PRI);
+}
+
+void mpic_cpu_set_priority(int prio)
+{
+	struct mpic *mpic = mpic_primary;
+
+	prio &= MPIC_CPU_TASKPRI_MASK;
+	mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, prio);
+}
+
+/*
+ * XXX: someone who knows mpic should check this.
+ * do we need to eoi the ipi including for kexec cpu here (see xics comments)?
+ * or can we reset the mpic in the new kernel?
+ */
+void mpic_teardown_this_cpu(int secondary)
+{
+	struct mpic *mpic = mpic_primary;
+	unsigned long flags;
+	u32 msk = 1 << hard_smp_processor_id();
+	unsigned int i;
+
+	BUG_ON(mpic == NULL);
+
+	DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+	spin_lock_irqsave(&mpic_lock, flags);
+
+	/* let the mpic know we don't want intrs.  */
+	for (i = 0; i < mpic->num_sources ; i++)
+		mpic_irq_write(i, MPIC_IRQ_DESTINATION,
+			mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk);
+
+	/* Set current processor priority to max */
+	mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf);
+
+	spin_unlock_irqrestore(&mpic_lock, flags);
+}
+
+
+void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask)
+{
+	struct mpic *mpic = mpic_primary;
+
+	BUG_ON(mpic == NULL);
+
+	DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no);
+
+	mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10,
+		       mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0]));
+}
+
+int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs)
+{
+	u32 irq;
+
+	irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK;
+	DBG("%s: get_one_irq(): %d\n", mpic->name, irq);
+
+	if (mpic->cascade && irq == mpic->cascade_vec) {
+		DBG("%s: cascading ...\n", mpic->name);
+		irq = mpic->cascade(regs, mpic->cascade_data);
+		mpic_eoi(mpic);
+		return irq;
+	}
+	if (unlikely(irq == MPIC_VEC_SPURRIOUS))
+		return -1;
+	if (irq < MPIC_VEC_IPI_0) 
+		return irq + mpic->irq_offset;
+       	DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
+	return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset;
+}
+
+int mpic_get_irq(struct pt_regs *regs)
+{
+	struct mpic *mpic = mpic_primary;
+
+	BUG_ON(mpic == NULL);
+
+	return mpic_get_one_irq(mpic, regs);
+}
+
+
+#ifdef CONFIG_SMP
+void mpic_request_ipis(void)
+{
+	struct mpic *mpic = mpic_primary;
+
+	BUG_ON(mpic == NULL);
+	
+	printk("requesting IPIs ... \n");
+
+	/* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */
+	request_irq(mpic->ipi_offset+0, mpic_ipi_action, SA_INTERRUPT,
+		    "IPI0 (call function)", mpic);
+	request_irq(mpic->ipi_offset+1, mpic_ipi_action, SA_INTERRUPT,
+		   "IPI1 (reschedule)", mpic);
+	request_irq(mpic->ipi_offset+2, mpic_ipi_action, SA_INTERRUPT,
+		   "IPI2 (unused)", mpic);
+	request_irq(mpic->ipi_offset+3, mpic_ipi_action, SA_INTERRUPT,
+		   "IPI3 (debugger break)", mpic);
+
+	printk("IPIs requested... \n");
+}
+#endif /* CONFIG_SMP */
diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile
index abf10dcb787..0649540bc7d 100644
--- a/arch/ppc/kernel/Makefile
+++ b/arch/ppc/kernel/Makefile
@@ -1,6 +1,7 @@
 #
 # Makefile for the linux kernel.
 #
+ifneq ($(CONFIG_PPC_MERGE),y)
 
 extra-$(CONFIG_PPC_STD_MMU)	:= head.o
 extra-$(CONFIG_40x)		:= head_4xx.o
@@ -15,9 +16,8 @@ extra-y				+= vmlinux.lds
 obj-y				:= entry.o traps.o irq.o idle.o time.o misc.o \
 					process.o signal.o ptrace.o align.o \
 					semaphore.o syscalls.o setup.o \
-					cputable.o ppc_htab.o
+					cputable.o ppc_htab.o perfmon.o
 obj-$(CONFIG_6xx)		+= l2cr.o cpu_setup_6xx.o
-obj-$(CONFIG_E500)		+= perfmon.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o
 obj-$(CONFIG_POWER4)		+= cpu_setup_power4.o
 obj-$(CONFIG_MODULES)		+= module.o ppc_ksyms.o
@@ -38,3 +38,23 @@ endif
 
 # These are here while we do the architecture merge
 vecemu-y			+= ../../powerpc/kernel/vecemu.o
+
+else
+obj-y				:= entry.o irq.o idle.o time.o misc.o \
+					signal.o ptrace.o align.o \
+					syscalls.o setup.o \
+					cputable.o perfmon.o
+obj-$(CONFIG_6xx)		+= l2cr.o cpu_setup_6xx.o
+obj-$(CONFIG_SOFTWARE_SUSPEND)	+= swsusp.o
+obj-$(CONFIG_POWER4)		+= cpu_setup_power4.o
+obj-$(CONFIG_MODULES)		+= module.o ppc_ksyms.o
+obj-$(CONFIG_NOT_COHERENT_CACHE)	+= dma-mapping.o
+obj-$(CONFIG_PCI)		+= pci.o
+obj-$(CONFIG_KGDB)		+= ppc-stub.o
+obj-$(CONFIG_SMP)		+= smp.o smp-tbsync.o
+obj-$(CONFIG_TAU)		+= temp.o
+ifndef CONFIG_E200
+obj-$(CONFIG_FSL_BOOKE)		+= perfmon_fsl_booke.o
+endif
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+endif
diff --git a/arch/ppc/kernel/perfmon.c b/arch/ppc/kernel/perfmon.c
index 91e2786ea09..f9b27d939f7 100644
--- a/arch/ppc/kernel/perfmon.c
+++ b/arch/ppc/kernel/perfmon.c
@@ -45,7 +45,7 @@ static void dummy_perf(struct pt_regs *regs)
 	mtpmr(PMRN_PMGC0, pmgc0);
 }
 
-#else
+#elif CONFIG_6xx
 /* Ensure exceptions are disabled */
 
 static void dummy_perf(struct pt_regs *regs)
@@ -55,6 +55,10 @@ static void dummy_perf(struct pt_regs *regs)
 	mmcr0 &= ~MMCR0_PMXE;
 	mtspr(SPRN_MMCR0, mmcr0);
 }
+#else
+static void dummy_perf(struct pt_regs *regs)
+{
+}
 #endif
 
 void (*perf_irq)(struct pt_regs *) = dummy_perf;
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index 8b06b8e0033..1b891b806f3 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -71,7 +71,8 @@ struct ide_machdep_calls ppc_ide_md;
 unsigned long boot_mem_size;
 
 unsigned long ISA_DMA_THRESHOLD;
-unsigned long DMA_MODE_READ, DMA_MODE_WRITE;
+unsigned int DMA_MODE_READ;
+unsigned int DMA_MODE_WRITE;
 
 #ifdef CONFIG_PPC_MULTIPLATFORM
 int _machine = 0;
@@ -82,6 +83,8 @@ extern void pmac_init(unsigned long r3, unsigned long r4,
 		unsigned long r5, unsigned long r6, unsigned long r7);
 extern void chrp_init(unsigned long r3, unsigned long r4,
 		unsigned long r5, unsigned long r6, unsigned long r7);
+
+dev_t boot_dev;
 #endif /* CONFIG_PPC_MULTIPLATFORM */
 
 #ifdef CONFIG_MAGIC_SYSRQ
@@ -404,11 +407,13 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 			_machine = _MACH_prep;
 	}
 
+#ifdef CONFIG_PPC_PREP
 	/* not much more to do here, if prep */
 	if (_machine == _MACH_prep) {
 		prep_init(r3, r4, r5, r6, r7);
 		return;
 	}
+#endif
 
 	/* prom_init has already been called from __start */
 	if (boot_infos)
@@ -479,12 +484,16 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 #endif /* CONFIG_ADB */
 
 	switch (_machine) {
+#ifdef CONFIG_PPC_PMAC
 	case _MACH_Pmac:
 		pmac_init(r3, r4, r5, r6, r7);
 		break;
+#endif
+#ifdef CONFIG_PPC_CHRP
 	case _MACH_chrp:
 		chrp_init(r3, r4, r5, r6, r7);
 		break;
+#endif
 	}
 }
 
diff --git a/arch/ppc/platforms/pmac_setup.c b/arch/ppc/platforms/pmac_setup.c
index 12cbc85555d..1ad779ecc8f 100644
--- a/arch/ppc/platforms/pmac_setup.c
+++ b/arch/ppc/platforms/pmac_setup.c
@@ -719,7 +719,8 @@ pmac_declare_of_platform_devices(void)
 	if (np) {
 		for (np = np->child; np != NULL; np = np->sibling)
 			if (strncmp(np->name, "i2c", 3) == 0) {
-				of_platform_device_create(np, "uni-n-i2c");
+				of_platform_device_create(np, "uni-n-i2c",
+							  NULL);
 				break;
 			}
 	}
@@ -727,17 +728,18 @@ pmac_declare_of_platform_devices(void)
 	if (np) {
 		for (np = np->child; np != NULL; np = np->sibling)
 			if (strncmp(np->name, "i2c", 3) == 0) {
-				of_platform_device_create(np, "u3-i2c");
+				of_platform_device_create(np, "u3-i2c",
+							  NULL);
 				break;
 			}
 	}
 
 	np = find_devices("valkyrie");
 	if (np)
-		of_platform_device_create(np, "valkyrie");
+		of_platform_device_create(np, "valkyrie", NULL);
 	np = find_devices("platinum");
 	if (np)
-		of_platform_device_create(np, "platinum");
+		of_platform_device_create(np, "platinum", NULL);
 
 	return 0;
 }
diff --git a/arch/ppc/platforms/prep_setup.c b/arch/ppc/platforms/prep_setup.c
index fccafbcd4b5..8bc734fe668 100644
--- a/arch/ppc/platforms/prep_setup.c
+++ b/arch/ppc/platforms/prep_setup.c
@@ -89,9 +89,6 @@ extern void prep_tiger1_setup_pci(char *irq_edge_mask_lo, char *irq_edge_mask_hi
 #define cached_21	(((char *)(ppc_cached_irq_mask))[3])
 #define cached_A1	(((char *)(ppc_cached_irq_mask))[2])
 
-/* for the mac fs */
-dev_t boot_dev;
-
 #ifdef CONFIG_SOUND_CS4232
 long ppc_cs4232_dma, ppc_cs4232_dma2;
 #endif
diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile
index b8d08f33f7e..1b0a84931af 100644
--- a/arch/ppc/syslib/Makefile
+++ b/arch/ppc/syslib/Makefile
@@ -5,6 +5,7 @@
 CFLAGS_prom_init.o      += -fPIC
 CFLAGS_btext.o          += -fPIC
 
+ifneq ($(CONFIG_PPC_MERGE),y)
 wdt-mpc8xx-$(CONFIG_8xx_WDT)	+= m8xx_wdt.o
 
 obj-$(CONFIG_PPCBUG_NVRAM)	+= prep_nvram.o
@@ -109,3 +110,16 @@ obj-$(CONFIG_PPC_MPC52xx)	+= mpc52xx_setup.o mpc52xx_pic.o \
 ifeq ($(CONFIG_PPC_MPC52xx),y)
 obj-$(CONFIG_PCI)		+= mpc52xx_pci.o
 endif
+
+else
+# Stuff still needed by the merged powerpc sources
+
+obj-$(CONFIG_PPCBUG_NVRAM)	+= prep_nvram.o
+obj-$(CONFIG_PPC_OF)		+= prom_init.o prom.o of_device.o
+obj-$(CONFIG_PPC_PMAC)		+= indirect_pci.o
+obj-$(CONFIG_PPC_CHRP)		+= indirect_pci.o i8259.o
+obj-$(CONFIG_PPC_PREP)		+= indirect_pci.o i8259.o todc_time.o
+obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
+obj-$(CONFIG_MPC10X_BRIDGE)	+= mpc10x_common.o indirect_pci.o ppc_sys.o
+
+endif
diff --git a/arch/ppc/syslib/of_device.c b/arch/ppc/syslib/of_device.c
index da8a0f2128d..93c7231ea70 100644
--- a/arch/ppc/syslib/of_device.c
+++ b/arch/ppc/syslib/of_device.c
@@ -234,7 +234,9 @@ void of_device_unregister(struct of_device *ofdev)
 	device_unregister(&ofdev->dev);
 }
 
-struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id)
+struct of_device* of_platform_device_create(struct device_node *np,
+					    const char *bus_id,
+					    struct device *parent)
 {
 	struct of_device *dev;
 	u32 *reg;
@@ -247,7 +249,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
 	dev->node = of_node_get(np);
 	dev->dma_mask = 0xffffffffUL;
 	dev->dev.dma_mask = &dev->dma_mask;
-	dev->dev.parent = NULL;
+	dev->dev.parent = parent;
 	dev->dev.bus = &of_platform_bus_type;
 	dev->dev.release = of_release_dev;
 
diff --git a/arch/ppc/syslib/ppc85xx_setup.c b/arch/ppc/syslib/ppc85xx_setup.c
index b7242f1bd93..832b8bf99ae 100644
--- a/arch/ppc/syslib/ppc85xx_setup.c
+++ b/arch/ppc/syslib/ppc85xx_setup.c
@@ -184,8 +184,8 @@ mpc85xx_setup_pci1(struct pci_controller *hose)
 	pci->powar1 = 0x80044000 |
 	   (__ilog2(MPC85XX_PCI1_UPPER_MEM - MPC85XX_PCI1_LOWER_MEM + 1) - 1);
 
-	/* Setup outboud IO windows @ MPC85XX_PCI1_IO_BASE */
-	pci->potar2 = 0x00000000;
+	/* Setup outbound IO windows @ MPC85XX_PCI1_IO_BASE */
+	pci->potar2 = (MPC85XX_PCI1_LOWER_IO >> 12) & 0x000fffff;
 	pci->potear2 = 0x00000000;
 	pci->powbar2 = (MPC85XX_PCI1_IO_BASE >> 12) & 0x000fffff;
 	/* Enable, IO R/W */
@@ -235,8 +235,8 @@ mpc85xx_setup_pci2(struct pci_controller *hose)
 	pci->powar1 = 0x80044000 |
 	   (__ilog2(MPC85XX_PCI2_UPPER_MEM - MPC85XX_PCI2_LOWER_MEM + 1) - 1);
 
-	/* Setup outboud IO windows @ MPC85XX_PCI2_IO_BASE */
-	pci->potar2 = 0x00000000;
+	/* Setup outbound IO windows @ MPC85XX_PCI2_IO_BASE */
+	pci->potar2 = (MPC85XX_PCI2_LOWER_IO >> 12) & 0x000fffff;;
 	pci->potear2 = 0x00000000;
 	pci->powbar2 = (MPC85XX_PCI2_IO_BASE >> 12) & 0x000fffff;
 	/* Enable, IO R/W */
diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile
index a8877881bb4..40675b3f924 100644
--- a/arch/ppc64/Makefile
+++ b/arch/ppc64/Makefile
@@ -107,7 +107,7 @@ install: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
 
 defaultimage-$(CONFIG_PPC_PSERIES) := zImage
-defaultimage-$(CONFIG_PPC_PMAC) := vmlinux
+defaultimage-$(CONFIG_PPC_PMAC) := zImage.vmode
 defaultimage-$(CONFIG_PPC_MAPLE) := zImage
 defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux
 KBUILD_IMAGE := $(defaultimage-y)
diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c
index da580812ddf..9f200f0f2ad 100644
--- a/arch/ppc64/kernel/of_device.c
+++ b/arch/ppc64/kernel/of_device.c
@@ -233,7 +233,9 @@ void of_device_unregister(struct of_device *ofdev)
 	device_unregister(&ofdev->dev);
 }
 
-struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id)
+struct of_device* of_platform_device_create(struct device_node *np,
+					    const char *bus_id,
+					    struct device *parent)
 {
 	struct of_device *dev;
 
@@ -245,7 +247,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
 	dev->node = np;
 	dev->dma_mask = 0xffffffffUL;
 	dev->dev.dma_mask = &dev->dma_mask;
-	dev->dev.parent = NULL;
+	dev->dev.parent = parent;
 	dev->dev.bus = &of_platform_bus_type;
 	dev->dev.release = of_release_dev;
 
@@ -259,6 +261,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char *
 	return dev;
 }
 
+
 EXPORT_SYMBOL(of_match_device);
 EXPORT_SYMBOL(of_platform_bus_type);
 EXPORT_SYMBOL(of_register_driver);
diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c
index 2b5e622732f..5914f61a152 100644
--- a/arch/ppc64/kernel/pSeries_iommu.c
+++ b/arch/ppc64/kernel/pSeries_iommu.c
@@ -281,8 +281,10 @@ static void iommu_table_setparms(struct pci_controller *phb,
 	tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
 	
 	/* Test if we are going over 2GB of DMA space */
-	if (phb->dma_window_base_cur + phb->dma_window_size > (1L << 31))
+	if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
+		udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
 		panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); 
+	}
 	
 	phb->dma_window_base_cur += phb->dma_window_size;
 
@@ -326,92 +328,85 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
 
 static void iommu_bus_setup_pSeries(struct pci_bus *bus)
 {
-	struct device_node *dn, *pdn;
-	struct pci_dn *pci;
+	struct device_node *dn;
 	struct iommu_table *tbl;
+	struct device_node *isa_dn, *isa_dn_orig;
+	struct device_node *tmp;
+	struct pci_dn *pci;
+	int children;
 
 	DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self);
 
-	/* For each (root) bus, we carve up the available DMA space in 256MB
-	 * pieces. Since each piece is used by one (sub) bus/device, that would
-	 * give a maximum of 7 devices per PHB. In most cases, this is plenty.
-	 *
-	 * The exception is on Python PHBs (pre-POWER4). Here we don't have EADS
-	 * bridges below the PHB to allocate the sectioned tables to, so instead
-	 * we allocate a 1GB table at the PHB level.
+	dn = pci_bus_to_OF_node(bus);
+	pci = PCI_DN(dn);
+
+	if (bus->self) {
+		/* This is not a root bus, any setup will be done for the
+		 * device-side of the bridge in iommu_dev_setup_pSeries().
+		 */
+		return;
+	}
+
+	/* Check if the ISA bus on the system is under
+	 * this PHB.
 	 */
+	isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
 
-	dn = pci_bus_to_OF_node(bus);
-	pci = dn->data;
-
-	if (!bus->self) {
-		/* Root bus */
-		if (is_python(dn)) {
-			unsigned int *iohole;
-
-			DBG("Python root bus %s\n", bus->name);
-
-			iohole = (unsigned int *)get_property(dn, "io-hole", 0);
-
-			if (iohole) {
-				/* On first bus we need to leave room for the
-				 * ISA address space. Just skip the first 256MB
-				 * alltogether. This leaves 768MB for the window.
-				 */
-				DBG("PHB has io-hole, reserving 256MB\n");
-				pci->phb->dma_window_size = 3 << 28;
-				pci->phb->dma_window_base_cur = 1 << 28;
-			} else {
-				/* 1GB window by default */
-				pci->phb->dma_window_size = 1 << 30;
-				pci->phb->dma_window_base_cur = 0;
-			}
-
-			tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
-
-			iommu_table_setparms(pci->phb, dn, tbl);
-			pci->iommu_table = iommu_init_table(tbl);
-		} else {
-			/* Do a 128MB table at root. This is used for the IDE
-			 * controller on some SMP-mode POWER4 machines. It
-			 * doesn't hurt to allocate it on other machines
-			 * -- it'll just be unused since new tables are
-			 * allocated on the EADS level.
-			 *
-			 * Allocate at offset 128MB to avoid having to deal
-			 * with ISA holes; 128MB table for IDE is plenty.
-			 */
-			pci->phb->dma_window_size = 1 << 27;
-			pci->phb->dma_window_base_cur = 1 << 27;
-
-			tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
-
-			iommu_table_setparms(pci->phb, dn, tbl);
-			pci->iommu_table = iommu_init_table(tbl);
-
-			/* All child buses have 256MB tables */
-			pci->phb->dma_window_size = 1 << 28;
-		}
-	} else {
-		pdn = pci_bus_to_OF_node(bus->parent);
+	while (isa_dn && isa_dn != dn)
+		isa_dn = isa_dn->parent;
+
+	if (isa_dn_orig)
+		of_node_put(isa_dn_orig);
 
-		if (!bus->parent->self && !is_python(pdn)) {
-			struct iommu_table *tbl;
-			/* First child and not python means this is the EADS
-			 * level. Allocate new table for this slot with 256MB
-			 * window.
-			 */
+	/* Count number of direct PCI children of the PHB.
+	 * All PCI device nodes have class-code property, so it's
+	 * an easy way to find them.
+	 */
+	for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
+		if (get_property(tmp, "class-code", NULL))
+			children++;
 
-			tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
+	DBG("Children: %d\n", children);
 
-			iommu_table_setparms(pci->phb, dn, tbl);
+	/* Calculate amount of DMA window per slot. Each window must be
+	 * a power of two (due to pci_alloc_consistent requirements).
+	 *
+	 * Keep 256MB aside for PHBs with ISA.
+	 */
 
-			pci->iommu_table = iommu_init_table(tbl);
-		} else {
-			/* Lower than first child or under python, use parent table */
-			pci->iommu_table = PCI_DN(pdn)->iommu_table;
-		}
+	if (!isa_dn) {
+		/* No ISA/IDE - just set window size and return */
+		pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
+
+		while (pci->phb->dma_window_size * children > 0x80000000ul)
+			pci->phb->dma_window_size >>= 1;
+		DBG("No ISA/IDE, window size is 0x%lx\n",
+			pci->phb->dma_window_size);
+		pci->phb->dma_window_base_cur = 0;
+
+		return;
 	}
+
+	/* If we have ISA, then we probably have an IDE
+	 * controller too. Allocate a 128MB table but
+	 * skip the first 128MB to avoid stepping on ISA
+	 * space.
+	 */
+	pci->phb->dma_window_size = 0x8000000ul;
+	pci->phb->dma_window_base_cur = 0x8000000ul;
+
+	tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
+
+	iommu_table_setparms(pci->phb, dn, tbl);
+	pci->iommu_table = iommu_init_table(tbl);
+
+	/* Divide the rest (1.75GB) among the children */
+	pci->phb->dma_window_size = 0x80000000ul;
+	while (pci->phb->dma_window_size * children > 0x70000000ul)
+		pci->phb->dma_window_size >>= 1;
+
+	DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size);
+
 }
 
 
@@ -462,21 +457,36 @@ static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus)
 static void iommu_dev_setup_pSeries(struct pci_dev *dev)
 {
 	struct device_node *dn, *mydn;
+	struct iommu_table *tbl;
+
+	DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, pci_name(dev));
 
-	DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, dev->pretty_name);
-	/* Now copy the iommu_table ptr from the bus device down to the
-	 * pci device_node.  This means get_iommu_table() won't need to search
-	 * up the device tree to find it.
-	 */
 	mydn = dn = pci_device_to_OF_node(dev);
 
+	/* If we're the direct child of a root bus, then we need to allocate
+	 * an iommu table ourselves. The bus setup code should have setup
+	 * the window sizes already.
+	 */
+	if (!dev->bus->self) {
+		DBG(" --> first child, no bridge. Allocating iommu table.\n");
+		tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL);
+		iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl);
+		PCI_DN(mydn)->iommu_table = iommu_init_table(tbl);
+
+		return;
+	}
+
+	/* If this device is further down the bus tree, search upwards until
+	 * an already allocated iommu table is found and use that.
+	 */
+
 	while (dn && dn->data && PCI_DN(dn)->iommu_table == NULL)
 		dn = dn->parent;
 
 	if (dn && dn->data) {
 		PCI_DN(mydn)->iommu_table = PCI_DN(dn)->iommu_table;
 	} else {
-		DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, dev->pretty_name);
+		DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, pci_name(dev));
 	}
 }
 
@@ -510,7 +520,7 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
 	int *dma_window = NULL;
 	struct pci_dn *pci;
 
-	DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, dev->pretty_name);
+	DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev));
 
 	/* dev setup for LPAR is a little tricky, since the device tree might
 	 * contain the dma-window properties per-device and not neccesarily
@@ -532,9 +542,8 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
 	 * slots on POWER4 machines.
 	 */
 	if (dma_window == NULL || pdn->parent == NULL) {
-		/* Fall back to regular (non-LPAR) dev setup */
-		DBG("No dma window for device, falling back to regular setup\n");
-		iommu_dev_setup_pSeries(dev);
+		DBG("No dma window for device, linking to parent\n");
+		PCI_DN(dn)->iommu_table = PCI_DN(pdn)->iommu_table;
 		return;
 	} else {
 		DBG("Found DMA window, allocating table\n");
diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c
index 861138ad092..ff4be1da69d 100644
--- a/arch/ppc64/kernel/pci.c
+++ b/arch/ppc64/kernel/pci.c
@@ -246,11 +246,14 @@ static unsigned int pci_parse_of_flags(u32 addr0)
 	unsigned int flags = 0;
 
 	if (addr0 & 0x02000000) {
-		flags |= IORESOURCE_MEM;
+		flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+		flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
+		flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
 		if (addr0 & 0x40000000)
-			flags |= IORESOURCE_PREFETCH;
+			flags |= IORESOURCE_PREFETCH
+				 | PCI_BASE_ADDRESS_MEM_PREFETCH;
 	} else if (addr0 & 0x01000000)
-		flags |= IORESOURCE_IO;
+		flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
 	return flags;
 }
 
diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c
index 1f5118078f7..bb0c3bfbb7e 100644
--- a/arch/ppc64/kernel/pmac_setup.c
+++ b/arch/ppc64/kernel/pmac_setup.c
@@ -434,15 +434,23 @@ static int pmac_check_legacy_ioport(unsigned int baseport)
 
 static int __init pmac_declare_of_platform_devices(void)
 {
-	struct device_node *np;
+	struct device_node *np, *npp;
 
-	np = find_devices("u3");
-	if (np) {
-		for (np = np->child; np != NULL; np = np->sibling)
+	npp = of_find_node_by_name(NULL, "u3");
+	if (npp) {
+		for (np = NULL; (np = of_get_next_child(npp, np)) != NULL;) {
 			if (strncmp(np->name, "i2c", 3) == 0) {
-				of_platform_device_create(np, "u3-i2c");
+				of_platform_device_create(np, "u3-i2c", NULL);
+				of_node_put(np);
 				break;
 			}
+		}
+		of_node_put(npp);
+	}
+        npp = of_find_node_by_type(NULL, "smu");
+        if (npp) {
+		of_platform_device_create(npp, "smu", NULL);
+		of_node_put(npp);
 	}
 
 	return 0;
diff --git a/arch/ppc64/kernel/pmac_time.c b/arch/ppc64/kernel/pmac_time.c
index 6c8c99295e7..9d8c97decd3 100644
--- a/arch/ppc64/kernel/pmac_time.c
+++ b/arch/ppc64/kernel/pmac_time.c
@@ -84,7 +84,7 @@ void pmac_get_rtc_time(struct rtc_time *tm)
 
 #ifdef CONFIG_PMAC_SMU
 	case SYS_CTRLER_SMU:
-		smu_get_rtc_time(tm);
+		smu_get_rtc_time(tm, 1);
 		break;
 #endif /* CONFIG_PMAC_SMU */
 	default:
@@ -128,7 +128,7 @@ int pmac_set_rtc_time(struct rtc_time *tm)
 
 #ifdef CONFIG_PMAC_SMU
 	case SYS_CTRLER_SMU:
-		return smu_set_rtc_time(tm);
+		return smu_set_rtc_time(tm, 1);
 #endif /* CONFIG_PMAC_SMU */
 	default:
 		return -ENODEV;
diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c
index 9979919cdf9..f252670874a 100644
--- a/arch/ppc64/kernel/prom_init.c
+++ b/arch/ppc64/kernel/prom_init.c
@@ -1711,6 +1711,7 @@ static void __init flatten_device_tree(void)
 	unsigned long offset = reloc_offset();
 	unsigned long mem_start, mem_end, room;
 	struct boot_param_header *hdr;
+	struct prom_t *_prom = PTRRELOC(&prom);
 	char *namep;
 	u64 *rsvmap;
 
@@ -1765,6 +1766,7 @@ static void __init flatten_device_tree(void)
 	RELOC(dt_struct_end) = PAGE_ALIGN(mem_start);
 
 	/* Finish header */
+	hdr->boot_cpuid_phys = _prom->cpu;
 	hdr->magic = OF_DT_HEADER;
 	hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start);
 	hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start);
@@ -1854,7 +1856,6 @@ static void __init prom_find_boot_cpu(void)
 
 	cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu);
 
-	prom_setprop(cpu_pkg, "linux,boot-cpu", NULL, 0);
 	prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval));
 	_prom->cpu = getprop_rval;
 
diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c
index 85ed3188a91..b1c044ca575 100644
--- a/arch/ppc64/kernel/ptrace.c
+++ b/arch/ppc64/kernel/ptrace.c
@@ -219,6 +219,7 @@ int sys_ptrace(long request, long pid, long addr, long data)
 
 	case PTRACE_SET_DEBUGREG:
 		ret = ptrace_set_debugreg(child, addr, data);
+		break;
 
 	case PTRACE_DETACH:
 		ret = ptrace_detach(child, data);
diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c
index 29b074505d3..874cf96938f 100644
--- a/arch/ppc64/mm/hash_native.c
+++ b/arch/ppc64/mm/hash_native.c
@@ -342,15 +342,14 @@ static void native_flush_hash_range(unsigned long number, int local)
 	hpte_t *hptep;
 	unsigned long hpte_v;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-
-	/* XXX fix for large ptes */
-	unsigned long large = 0;
+	unsigned long large;
 
 	local_irq_save(flags);
 
 	j = 0;
 	for (i = 0; i < number; i++) {
 		va = batch->vaddr[j];
+		large = pte_huge(batch->pte[i]);
 		if (large)
 			vpn = va >> HPAGE_SHIFT;
 		else
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index 338771ec70d..0ea0994ed97 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -710,10 +710,13 @@ repeat:
 			hpte_group = ((~hash & htab_hash_mask) *
 				      HPTES_PER_GROUP) & ~0x7UL; 
 			slot = ppc_md.hpte_insert(hpte_group, va, prpn,
-						  HPTE_V_LARGE, rflags);
+						  HPTE_V_LARGE |
+						  HPTE_V_SECONDARY,
+						  rflags);
 			if (slot == -1) {
 				if (mftb() & 0x1)
-					hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+					hpte_group = ((hash & htab_hash_mask) *
+						      HPTES_PER_GROUP)&~0x7UL;
 
 				ppc_md.hpte_remove(hpte_group);
 				goto repeat;
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 3e0badb820c..b4834952785 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -42,19 +42,15 @@
  * executing (see inherit_locked_prom_mappings() rant).
  */
 sparc64_vpte_nucleus:
-	/* Load 0xf0000000, which is LOW_OBP_ADDRESS.  */
-	mov		0xf, %g5
-	sllx		%g5, 28, %g5
-
-	/* Is addr >= LOW_OBP_ADDRESS?  */
+	/* Note that kvmap below has verified that the address is
+	 * in the range MODULES_VADDR --> VMALLOC_END already.  So
+	 * here we need only check if it is an OBP address or not.
+	 */
+	sethi		%hi(LOW_OBP_ADDRESS), %g5
 	cmp		%g4, %g5
 	blu,pn		%xcc, sparc64_vpte_patchme1
 	 mov		0x1, %g5
-
-	/* Load 0x100000000, which is HI_OBP_ADDRESS.  */
 	sllx		%g5, 32, %g5
-
-	/* Is addr < HI_OBP_ADDRESS?  */
 	cmp		%g4, %g5
 	blu,pn		%xcc, obp_iaddr_patch
 	 nop
@@ -156,26 +152,29 @@ obp_daddr_patch:
  * rather, use information saved during inherit_prom_mappings() using 8k
  * pagesize.
  */
+	.align		32
 kvmap:
-	/* Load 0xf0000000, which is LOW_OBP_ADDRESS.  */
-	mov		0xf, %g5
-	sllx		%g5, 28, %g5
+	sethi		%hi(MODULES_VADDR), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, longpath
+	 mov		(VMALLOC_END >> 24), %g5
+	sllx		%g5, 24, %g5
+	cmp		%g4, %g5
+	bgeu,pn		%xcc, longpath
+	 nop
 
-	/* Is addr >= LOW_OBP_ADDRESS?  */
+kvmap_check_obp:
+	sethi		%hi(LOW_OBP_ADDRESS), %g5
 	cmp		%g4, %g5
-	blu,pn		%xcc, vmalloc_addr
+	blu,pn		%xcc, kvmap_vmalloc_addr
 	 mov		0x1, %g5
-
-	/* Load 0x100000000, which is HI_OBP_ADDRESS.  */
 	sllx		%g5, 32, %g5
-
-	/* Is addr < HI_OBP_ADDRESS?  */
 	cmp		%g4, %g5
 	blu,pn		%xcc, obp_daddr_patch
 	 nop
 
-vmalloc_addr:
-	/* If we get here, a vmalloc addr accessed, load kernel VPTE.  */
+kvmap_vmalloc_addr:
+	/* If we get here, a vmalloc addr was accessed, load kernel VPTE.  */
 	ldxa		[%g3 + %g6] ASI_N, %g5
 	brgez,pn	%g5, longpath
 	 nop
diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
index 23ad839d113..5efbff90d66 100644
--- a/arch/sparc64/kernel/ptrace.c
+++ b/arch/sparc64/kernel/ptrace.c
@@ -30,6 +30,7 @@
 #include <asm/psrcompat.h>
 #include <asm/visasm.h>
 #include <asm/spitfire.h>
+#include <asm/page.h>
 
 /* Returning from ptrace is a bit tricky because the syscall return
  * low level code assumes any value returned which is negative and
@@ -128,20 +129,20 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 	 * is mapped to in the user's address space, we can skip the
 	 * D-cache flush.
 	 */
-	if ((uaddr ^ kaddr) & (1UL << 13)) {
+	if ((uaddr ^ (unsigned long) kaddr) & (1UL << 13)) {
 		unsigned long start = __pa(kaddr);
 		unsigned long end = start + len;
 
 		if (tlb_type == spitfire) {
 			for (; start < end; start += 32)
-				spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
+				spitfire_put_dcache_tag(start & 0x3fe0, 0x0);
 		} else {
 			for (; start < end; start += 32)
 				__asm__ __volatile__(
 					"stxa %%g0, [%0] %1\n\t"
 					"membar #Sync"
 					: /* no outputs */
-					: "r" (va),
+					: "r" (start),
 					"i" (ASI_DCACHE_INVALIDATE));
 		}
 	}
diff --git a/arch/sparc64/kernel/una_asm.S b/arch/sparc64/kernel/una_asm.S
index cbb40585253..da48400bcc9 100644
--- a/arch/sparc64/kernel/una_asm.S
+++ b/arch/sparc64/kernel/una_asm.S
@@ -17,7 +17,7 @@ kernel_unaligned_trap_fault:
 __do_int_store:
 	rd	%asi, %o4
 	wr	%o3, 0, %asi
-	ldx	[%o2], %g3
+	mov	%o2, %g3
 	cmp	%o1, 2
 	be,pn	%icc, 2f
 	 cmp	%o1, 4
diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c
index da9739f0d43..42718f6a7d3 100644
--- a/arch/sparc64/kernel/unaligned.c
+++ b/arch/sparc64/kernel/unaligned.c
@@ -184,13 +184,14 @@ extern void do_int_load(unsigned long *dest_reg, int size,
 			unsigned long *saddr, int is_signed, int asi);
 	
 extern void __do_int_store(unsigned long *dst_addr, int size,
-			   unsigned long *src_val, int asi);
+			   unsigned long src_val, int asi);
 
 static inline void do_int_store(int reg_num, int size, unsigned long *dst_addr,
-				struct pt_regs *regs, int asi)
+				struct pt_regs *regs, int asi, int orig_asi)
 {
 	unsigned long zero = 0;
-	unsigned long *src_val = &zero;
+	unsigned long *src_val_p = &zero;
+	unsigned long src_val;
 
 	if (size == 16) {
 		size = 8;
@@ -198,7 +199,25 @@ static inline void do_int_store(int reg_num, int size, unsigned long *dst_addr,
 		        (unsigned)fetch_reg(reg_num, regs) : 0)) << 32) |
 			(unsigned)fetch_reg(reg_num + 1, regs);
 	} else if (reg_num) {
-		src_val = fetch_reg_addr(reg_num, regs);
+		src_val_p = fetch_reg_addr(reg_num, regs);
+	}
+	src_val = *src_val_p;
+	if (unlikely(asi != orig_asi)) {
+		switch (size) {
+		case 2:
+			src_val = swab16(src_val);
+			break;
+		case 4:
+			src_val = swab32(src_val);
+			break;
+		case 8:
+			src_val = swab64(src_val);
+			break;
+		case 16:
+		default:
+			BUG();
+			break;
+		};
 	}
 	__do_int_store(dst_addr, size, src_val, asi);
 }
@@ -276,6 +295,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u
 		kernel_mna_trap_fault();
 	} else {
 		unsigned long addr;
+		int orig_asi, asi;
 
 		addr = compute_effective_address(regs, insn,
 						 ((insn >> 25) & 0x1f));
@@ -285,18 +305,48 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u
 		       regs->tpc, dirstrings[dir], addr, size,
 		       regs->u_regs[UREG_RETPC]);
 #endif
+		orig_asi = asi = decode_asi(insn, regs);
+		switch (asi) {
+		case ASI_NL:
+		case ASI_AIUPL:
+		case ASI_AIUSL:
+		case ASI_PL:
+		case ASI_SL:
+		case ASI_PNFL:
+		case ASI_SNFL:
+			asi &= ~0x08;
+			break;
+		};
 		switch (dir) {
 		case load:
 			do_int_load(fetch_reg_addr(((insn>>25)&0x1f), regs),
 				    size, (unsigned long *) addr,
-				    decode_signedness(insn),
-				    decode_asi(insn, regs));
+				    decode_signedness(insn), asi);
+			if (unlikely(asi != orig_asi)) {
+				unsigned long val_in = *(unsigned long *) addr;
+				switch (size) {
+				case 2:
+					val_in = swab16(val_in);
+					break;
+				case 4:
+					val_in = swab32(val_in);
+					break;
+				case 8:
+					val_in = swab64(val_in);
+					break;
+				case 16:
+				default:
+					BUG();
+					break;
+				};
+				*(unsigned long *) addr = val_in;
+			}
 			break;
 
 		case store:
 			do_int_store(((insn>>25)&0x1f), size,
 				     (unsigned long *) addr, regs,
-				     decode_asi(insn, regs));
+				     asi, orig_asi);
 			break;
 
 		default:
diff --git a/arch/um/Makefile b/arch/um/Makefile
index ce987266dac..5b5af95721a 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -53,9 +53,13 @@ SYS_DIR		:= $(ARCH_DIR)/include/sysdep-$(SUBARCH)
 
 # -Dvmap=kernel_vmap affects everything, and prevents anything from
 # referencing the libpcap.o symbol so named.
+#
+# Same things for in6addr_loopback - found in libc.
 
 CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
-	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap
+	$(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \
+	-Din6addr_loopback=kernel_in6addr_loopback
+
 AFLAGS += $(ARCH_INCLUDE)
 
 USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 14a12d6b3df..16e7dc89f61 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -19,18 +19,44 @@
 #include "line.h"
 #include "os.h"
 
-#ifdef CONFIG_NOCONFIG_CHAN
+/* XXX: could well be moved to somewhere else, if needed. */
+static int my_printf(const char * fmt, ...)
+	__attribute__ ((format (printf, 1, 2)));
+
+static int my_printf(const char * fmt, ...)
+{
+	/* Yes, can be called on atomic context.*/
+	char *buf = kmalloc(4096, GFP_ATOMIC);
+	va_list args;
+	int r;
+
+	if (!buf) {
+		/* We print directly fmt.
+		 * Yes, yes, yes, feel free to complain. */
+		r = strlen(fmt);
+	} else {
+		va_start(args, fmt);
+		r = vsprintf(buf, fmt, args);
+		va_end(args);
+		fmt = buf;
+	}
 
-/* The printk's here are wrong because we are complaining that there is no
- * output device, but printk is printing to that output device.  The user will
- * never see the error.  printf would be better, except it can't run on a
- * kernel stack because it will overflow it.
- * Use printk for now since that will avoid crashing.
- */
+	if (r)
+		r = os_write_file(1, fmt, r);
+	return r;
+
+}
+
+#ifdef CONFIG_NOCONFIG_CHAN
+/* Despite its name, there's no added trailing newline. */
+static int my_puts(const char * buf)
+{
+	return os_write_file(1, buf, strlen(buf));
+}
 
 static void *not_configged_init(char *str, int device, struct chan_opts *opts)
 {
-	printk(KERN_ERR "Using a channel type which is configured out of "
+	my_puts("Using a channel type which is configured out of "
 	       "UML\n");
 	return(NULL);
 }
@@ -38,27 +64,27 @@ static void *not_configged_init(char *str, int device, struct chan_opts *opts)
 static int not_configged_open(int input, int output, int primary, void *data,
 			      char **dev_out)
 {
-	printk(KERN_ERR "Using a channel type which is configured out of "
+	my_puts("Using a channel type which is configured out of "
 	       "UML\n");
 	return(-ENODEV);
 }
 
 static void not_configged_close(int fd, void *data)
 {
-	printk(KERN_ERR "Using a channel type which is configured out of "
+	my_puts("Using a channel type which is configured out of "
 	       "UML\n");
 }
 
 static int not_configged_read(int fd, char *c_out, void *data)
 {
-	printk(KERN_ERR "Using a channel type which is configured out of "
+	my_puts("Using a channel type which is configured out of "
 	       "UML\n");
 	return(-EIO);
 }
 
 static int not_configged_write(int fd, const char *buf, int len, void *data)
 {
-	printk(KERN_ERR "Using a channel type which is configured out of "
+	my_puts("Using a channel type which is configured out of "
 	       "UML\n");
 	return(-EIO);
 }
@@ -66,7 +92,7 @@ static int not_configged_write(int fd, const char *buf, int len, void *data)
 static int not_configged_console_write(int fd, const char *buf, int len,
 				       void *data)
 {
-	printk(KERN_ERR "Using a channel type which is configured out of "
+	my_puts("Using a channel type which is configured out of "
 	       "UML\n");
 	return(-EIO);
 }
@@ -74,14 +100,14 @@ static int not_configged_console_write(int fd, const char *buf, int len,
 static int not_configged_window_size(int fd, void *data, unsigned short *rows,
 				     unsigned short *cols)
 {
-	printk(KERN_ERR "Using a channel type which is configured out of "
+	my_puts("Using a channel type which is configured out of "
 	       "UML\n");
 	return(-ENODEV);
 }
 
 static void not_configged_free(void *data)
 {
-	printf(KERN_ERR "Using a channel type which is configured out of "
+	my_puts("Using a channel type which is configured out of "
 	       "UML\n");
 }
 
@@ -457,7 +483,7 @@ static struct chan *parse_chan(char *str, int pri, int device,
 		}
 	}
 	if(ops == NULL){
-		printk(KERN_ERR "parse_chan couldn't parse \"%s\"\n", 
+		my_printf("parse_chan couldn't parse \"%s\"\n",
 		       str);
 		return(NULL);
 	}
@@ -465,7 +491,7 @@ static struct chan *parse_chan(char *str, int pri, int device,
 	data = (*ops->init)(str, device, opts);
 	if(data == NULL) return(NULL);
 
-	chan = kmalloc(sizeof(*chan), GFP_KERNEL);
+	chan = kmalloc(sizeof(*chan), GFP_ATOMIC);
 	if(chan == NULL) return(NULL);
 	*chan = ((struct chan) { .list	 	= LIST_HEAD_INIT(chan->list),
 				 .primary	= 1,
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index 310c1f823f2..04383f98f4d 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -23,7 +23,7 @@ static struct mconsole_command commands[] = {
 	{ "reboot", mconsole_reboot, MCONSOLE_PROC },
 	{ "config", mconsole_config, MCONSOLE_PROC },
 	{ "remove", mconsole_remove, MCONSOLE_PROC },
-	{ "sysrq", mconsole_sysrq, MCONSOLE_INTR },
+	{ "sysrq", mconsole_sysrq, MCONSOLE_PROC },
 	{ "help", mconsole_help, MCONSOLE_INTR },
 	{ "cad", mconsole_cad, MCONSOLE_INTR },
 	{ "stop", mconsole_stop, MCONSOLE_PROC },
diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h
index 0aa620970ad..782ac3a3baf 100644
--- a/arch/um/include/common-offsets.h
+++ b/arch/um/include/common-offsets.h
@@ -12,4 +12,6 @@ DEFINE_STR(UM_KERN_WARNING, KERN_WARNING);
 DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE);
 DEFINE_STR(UM_KERN_INFO, KERN_INFO);
 DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG);
-DEFINE(HOST_ELF_CLASS, ELF_CLASS);
+DEFINE(UM_ELF_CLASS, ELF_CLASS);
+DEFINE(UM_ELFCLASS32, ELFCLASS32);
+DEFINE(UM_ELFCLASS64, ELFCLASS64);
diff --git a/arch/um/include/user.h b/arch/um/include/user.h
index 57ee9e26122..0f865ef4691 100644
--- a/arch/um/include/user.h
+++ b/arch/um/include/user.h
@@ -14,7 +14,9 @@ extern void *um_kmalloc_atomic(int size);
 extern void kfree(void *ptr);
 extern int in_aton(char *str);
 extern int open_gdb_chan(void);
-extern int strlcpy(char *, const char *, int);
+/* These use size_t, however unsigned long is correct on both i386 and x86_64. */
+extern unsigned long strlcpy(char *, const char *, unsigned long);
+extern unsigned long strlcat(char *, const char *, unsigned long);
 extern void *um_vmalloc(int size);
 extern void vfree(void *ptr);
 
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index 39cf568ccfa..ea65db679e9 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -82,7 +82,8 @@ unsigned long alloc_stack(int order, int atomic)
 	unsigned long page;
 	int flags = GFP_KERNEL;
 
-	if(atomic) flags |= GFP_ATOMIC;
+	if (atomic)
+		flags = GFP_ATOMIC;
 	page = __get_free_pages(flags, order);
 	if(page == 0)
 		return(0);
diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c
index e89218958f3..a52751108aa 100644
--- a/arch/um/kernel/sigio_user.c
+++ b/arch/um/kernel/sigio_user.c
@@ -340,7 +340,7 @@ static int setup_initial_poll(int fd)
 {
 	struct pollfd *p;
 
-	p = um_kmalloc(sizeof(struct pollfd));
+	p = um_kmalloc_atomic(sizeof(struct pollfd));
 	if(p == NULL){
 		printk("setup_initial_poll : failed to allocate poll\n");
 		return(-1);
diff --git a/arch/um/kernel/skas/include/uaccess-skas.h b/arch/um/kernel/skas/include/uaccess-skas.h
index 6ee3f3902e6..7da0c2def0e 100644
--- a/arch/um/kernel/skas/include/uaccess-skas.h
+++ b/arch/um/kernel/skas/include/uaccess-skas.h
@@ -18,12 +18,6 @@
 	  ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \
 	  ((unsigned long) (addr) + (size) >= (unsigned long)(addr))))
 
-static inline int verify_area_skas(int type, const void __user * addr,
-                                   unsigned long size)
-{
-	return(access_ok_skas(type, addr, size) ? 0 : -EFAULT);
-}
-
 extern int copy_from_user_skas(void *to, const void __user *from, int n);
 extern int copy_to_user_skas(void __user *to, const void *from, int n);
 extern int strncpy_from_user_skas(char *dst, const char __user *src, int count);
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index 0a562c3c0fd..f5b0636f9ad 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -193,12 +193,12 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
                 r = pte_read(*npte);
                 w = pte_write(*npte);
                 x = pte_exec(*npte);
-                if(!pte_dirty(*npte))
-                        w = 0;
-                if(!pte_young(*npte)){
-                        r = 0;
-                        w = 0;
-                }
+		if (!pte_young(*npte)) {
+			r = 0;
+			w = 0;
+		} else if (!pte_dirty(*npte)) {
+			w = 0;
+		}
                 if(force || pte_newpage(*npte)){
                         if(pte_present(*npte))
 			  ret = add_mmap(addr,
diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
index 87cc6fd76ce..d297429ac36 100644
--- a/arch/um/kernel/trap_kern.c
+++ b/arch/um/kernel/trap_kern.c
@@ -18,6 +18,7 @@
 #include "asm/a.out.h"
 #include "asm/current.h"
 #include "asm/irq.h"
+#include "sysdep/sigcontext.h"
 #include "user_util.h"
 #include "kern_util.h"
 #include "kern.h"
@@ -39,6 +40,12 @@ int handle_page_fault(unsigned long address, unsigned long ip,
 	int err = -EFAULT;
 
 	*code_out = SEGV_MAPERR;
+
+	/* If the fault was during atomic operation, don't take the fault, just
+	 * fail. */
+	if (in_atomic())
+		goto out_nosemaphore;
+
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, address);
 	if(!vma) 
@@ -89,6 +96,7 @@ survive:
 	flush_tlb_page(vma, address);
 out:
 	up_read(&mm->mmap_sem);
+out_nosemaphore:
 	return(err);
 
 /*
@@ -125,7 +133,15 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc)
         }
 	else if(current->mm == NULL)
 		panic("Segfault with no mm");
-	err = handle_page_fault(address, ip, is_write, is_user, &si.si_code);
+
+	if (SEGV_IS_FIXABLE(&fi))
+		err = handle_page_fault(address, ip, is_write, is_user, &si.si_code);
+	else {
+		err = -EFAULT;
+		/* A thread accessed NULL, we get a fault, but CR2 is invalid.
+		 * This code is used in __do_copy_from_user() of TT mode. */
+		address = 0;
+	}
 
 	catcher = current->thread.fault_catcher;
 	if(!err)
diff --git a/arch/um/kernel/tt/include/uaccess-tt.h b/arch/um/kernel/tt/include/uaccess-tt.h
index aa6db384af8..dc2ebfa8c54 100644
--- a/arch/um/kernel/tt/include/uaccess-tt.h
+++ b/arch/um/kernel/tt/include/uaccess-tt.h
@@ -33,12 +33,6 @@ extern unsigned long uml_physmem;
          (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \
           (under_task_size(addr, size) || is_stack(addr, size))))
 
-static inline int verify_area_tt(int type, const void __user * addr,
-                                 unsigned long size)
-{
-	return(access_ok_tt(type, addr, size) ? 0 : -EFAULT);
-}
-
 extern unsigned long get_fault_addr(void);
 
 extern int __do_copy_from_user(void *to, const void *from, int n,
diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
index 0de05a268b2..cfaa373a6e7 100644
--- a/arch/um/kernel/tt/process_kern.c
+++ b/arch/um/kernel/tt/process_kern.c
@@ -23,10 +23,11 @@
 #include "mem_user.h"
 #include "tlb.h"
 #include "mode.h"
+#include "mode_kern.h"
 #include "init.h"
 #include "tt.h"
 
-int switch_to_tt(void *prev, void *next, void *last)
+void switch_to_tt(void *prev, void *next)
 {
 	struct task_struct *from, *to, *prev_sched;
 	unsigned long flags;
diff --git a/arch/um/kernel/tt/uaccess_user.c b/arch/um/kernel/tt/uaccess_user.c
index f01475512ec..8c220f054b6 100644
--- a/arch/um/kernel/tt/uaccess_user.c
+++ b/arch/um/kernel/tt/uaccess_user.c
@@ -22,8 +22,15 @@ int __do_copy_from_user(void *to, const void *from, int n,
 			       __do_copy, &faulted);
 	TASK_REGS(get_current())->tt = save;
 
-	if(!faulted) return(0);
-	else return(n - (fault - (unsigned long) from));
+	if(!faulted)
+		return 0;
+	else if (fault)
+		return n - (fault - (unsigned long) from);
+	else
+		/* In case of a general protection fault, we don't have the
+		 * fault address, so NULL is used instead. Pretend we didn't
+		 * copy anything. */
+		return n;
 }
 
 static void __do_strncpy(void *dst, const void *src, int count)
diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
index 186c2888501..0b21d59ba0c 100644
--- a/arch/um/kernel/umid.c
+++ b/arch/um/kernel/umid.c
@@ -31,6 +31,8 @@ static char *uml_dir = UML_DIR;
 /* Changed by set_umid */
 static int umid_is_random = 1;
 static int umid_inited = 0;
+/* Have we created the files? Should we remove them? */
+static int umid_owned = 0;
 
 static int make_umid(int (*printer)(const char *fmt, ...));
 
@@ -82,20 +84,21 @@ int __init umid_file_name(char *name, char *buf, int len)
 
 extern int tracing_pid;
 
-static int __init create_pid_file(void)
+static void __init create_pid_file(void)
 {
 	char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")];
 	char pid[sizeof("nnnnn\0")];
 	int fd, n;
 
-	if(umid_file_name("pid", file, sizeof(file))) return 0;
+	if(umid_file_name("pid", file, sizeof(file)))
+		return;
 
 	fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), 
 			  0644);
 	if(fd < 0){
 		printf("Open of machine pid file \"%s\" failed: %s\n",
 		       file, strerror(-fd));
-		return 0;
+		return;
 	}
 
 	sprintf(pid, "%d\n", os_getpid());
@@ -103,7 +106,6 @@ static int __init create_pid_file(void)
 	if(n != strlen(pid))
 		printf("Write of pid file failed - err = %d\n", -n);
 	os_close_file(fd);
-	return 0;
 }
 
 static int actually_do_remove(char *dir)
@@ -147,7 +149,8 @@ static int actually_do_remove(char *dir)
 void remove_umid_dir(void)
 {
 	char dir[strlen(uml_dir) + UMID_LEN + 1];
-	if(!umid_inited) return;
+	if (!umid_owned)
+		return;
 
 	sprintf(dir, "%s%s", uml_dir, umid);
 	actually_do_remove(dir);
@@ -155,11 +158,12 @@ void remove_umid_dir(void)
 
 char *get_umid(int only_if_set)
 {
-	if(only_if_set && umid_is_random) return(NULL);
-	return(umid);
+	if(only_if_set && umid_is_random)
+		return NULL;
+	return umid;
 }
 
-int not_dead_yet(char *dir)
+static int not_dead_yet(char *dir)
 {
 	char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")];
 	char pid[sizeof("nnnnn\0")], *end;
@@ -193,7 +197,8 @@ int not_dead_yet(char *dir)
 		   (p == CHOOSE_MODE(tracing_pid, os_getpid())))
 			dead = 1;
 	}
-	if(!dead) return(1);
+	if(!dead)
+		return(1);
 	return(actually_do_remove(dir));
 }
 
@@ -232,16 +237,13 @@ static int __init make_uml_dir(void)
 		strlcpy(dir, home, sizeof(dir));
 		uml_dir++;
 	}
+	strlcat(dir, uml_dir, sizeof(dir));
 	len = strlen(dir);
-	strncat(dir, uml_dir, sizeof(dir) - len);
-	len = strlen(dir);
-	if((len > 0) && (len < sizeof(dir) - 1) && (dir[len - 1] != '/')){
-		dir[len] = '/';
-		dir[len + 1] = '\0';
-	}
+	if (len > 0 && dir[len - 1] != '/')
+		strlcat(dir, "/", sizeof(dir));
 
 	uml_dir = malloc(strlen(dir) + 1);
-	if(uml_dir == NULL){
+	if (uml_dir == NULL) {
 		printf("make_uml_dir : malloc failed, errno = %d\n", errno);
 		exit(1);
 	}
@@ -286,6 +288,7 @@ static int __init make_umid(int (*printer)(const char *fmt, ...))
 		if(errno == EEXIST){
 			if(not_dead_yet(tmp)){
 				(*printer)("umid '%s' is in use\n", umid);
+				umid_owned = 0;
 				return(-1);
 			}
 			err = mkdir(tmp, 0777);
@@ -296,7 +299,8 @@ static int __init make_umid(int (*printer)(const char *fmt, ...))
 		return(-1);
 	}
 
-	return(0);
+	umid_owned = 1;
+	return 0;
 }
 
 __uml_setup("uml_dir=", set_uml_dir,
@@ -309,7 +313,8 @@ static int __init make_umid_setup(void)
 	/* one function with the ordering we need ... */
 	make_uml_dir();
 	make_umid(printf);
-	return create_pid_file();
+	create_pid_file();
+	return 0;
 }
 __uml_postsetup(make_umid_setup);
 
diff --git a/arch/um/kernel/user_util.c b/arch/um/kernel/user_util.c
index a25f3ea11fd..41d17c71511 100644
--- a/arch/um/kernel/user_util.c
+++ b/arch/um/kernel/user_util.c
@@ -128,6 +128,12 @@ void setup_machinename(char *machine_out)
 	struct utsname host;
 
 	uname(&host);
+#if defined(UML_CONFIG_UML_X86) && !defined(UML_CONFIG_64BIT)
+	if (!strcmp(host.machine, "x86_64")) {
+		strcpy(machine_out, "i686");
+		return;
+	}
+#endif
 	strcpy(machine_out, host.machine);
 }
 
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index 298d5632128..f6e64026f99 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -144,6 +144,7 @@ static int aio_thread(void *arg)
                                "errno = %d\n", errno);
                 }
                 else {
+			/* This is safe as we've just a pointer here. */
 			aio = (struct aio_context *) (long) event.data;
 			if(update_aio(aio, event.res)){
 				do_aio(ctx, aio);
diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c
index e770cb02957..ab33cb3c74e 100644
--- a/arch/um/os-Linux/elf_aux.c
+++ b/arch/um/os-Linux/elf_aux.c
@@ -14,7 +14,8 @@
 #include "mem_user.h"
 #include <kernel-offsets.h>
 
-#if HOST_ELF_CLASS == ELFCLASS32
+/* Use the one from the kernel - the host may miss it, if having old headers. */
+#if UM_ELF_CLASS == UM_ELFCLASS32
 typedef Elf32_auxv_t elf_auxv_t;
 #else
 typedef Elf64_auxv_t elf_auxv_t;
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index d32413e4b4c..d9c52387c4a 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -3,6 +3,7 @@
  * Licensed under the GPL
  */
 
+#include <unistd.h>
 #include <stdio.h>
 #include <errno.h>
 #include <signal.h>
diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c
index bd3c34aa52e..36b5c2c1328 100644
--- a/arch/um/sys-i386/ldt.c
+++ b/arch/um/sys-i386/ldt.c
@@ -83,6 +83,7 @@ int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
 			goto out;
 		}
 		p = buf;
+		break;
 	default:
 		res = -ENOSYS;
 		goto out;
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 0969d570f3b..21afa69a086 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -308,7 +308,7 @@ config HPET_TIMER
 	  present.  The HPET provides a stable time base on SMP
 	  systems, unlike the TSC, but it is more expensive to access,
 	  as it is off-chip.  You can find the HPET spec at
-	  <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
+	  <http://www.intel.com/hardwaredesign/hpetspec.htm>.
 
 config X86_PM_TIMER
 	bool "PM timer"
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 09887c96e9a..de19501aa80 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -402,8 +402,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 	__pci_mmap_set_flags(dev, vma, mmap_state);
 	__pci_mmap_set_pgprot(dev, vma, mmap_state, write_combine);
 
-	ret = io_remap_page_range(vma, vma->vm_start, vma->vm_pgoff<<PAGE_SHIFT,
-			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	ret = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			         vma->vm_end - vma->vm_start,vma->vm_page_prot);
 
 	return ret;
 }
diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c
index cf136278444..03674daabc6 100644
--- a/arch/xtensa/kernel/platform.c
+++ b/arch/xtensa/kernel/platform.c
@@ -39,7 +39,7 @@ _F(int,  pcibios_fixup, (void), { return 0; });
 _F(int, get_rtc_time, (time_t* t), { return 0; });
 _F(int, set_rtc_time, (time_t t), { return 0; });
 
-#if CONFIG_XTENSA_CALIBRATE_CCOUNT
+#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
 _F(void, calibrate_ccount, (void),
 {
   printk ("ERROR: Cannot calibrate cpu frequency! Assuming 100MHz.\n");
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index c83bb0d4178..08ef6d82ee5 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -457,7 +457,7 @@ int
 dump_task_fpu(struct pt_regs *regs, struct task_struct *task, elf_fpregset_t *r)
 {
 /* see asm/coprocessor.h for this magic number 16 */
-#if TOTAL_CPEXTRA_SIZE > 16
+#if XTENSA_CP_EXTRA_SIZE > 16
 	do_save_fpregs (r, regs, task);
 
 	/*  For now, bit 16 means some extra state may be present:  */
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 1f5bf5d624e..513ed8d6776 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -304,7 +304,7 @@ void __init setup_arch(char **cmdline_p)
 # endif
 #endif
 
-#if CONFIG_PCI
+#ifdef CONFIG_PCI
 	platform_pcibios_init();
 #endif
 }
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index dc42cede939..e252b61e45a 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -182,7 +182,7 @@ restore_cpextra (struct _cpstate *buf)
 
 	struct task_struct *tsk = current;
 	release_all_cp(tsk);
-	return __copy_from_user(tsk->thread.cpextra, buf, TOTAL_CPEXTRA_SIZE);
+	return __copy_from_user(tsk->thread.cpextra, buf, XTENSA_CP_EXTRA_SIZE);
 #endif
 	return 0;
 }
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 1ac7d5ce745..8e423d1335c 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -68,7 +68,7 @@ void __init time_init(void)
 	 * speed for the CALIBRATE.
 	 */
 
-#if CONFIG_XTENSA_CALIBRATE_CCOUNT
+#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
 	printk("Calibrating CPU frequency ");
 	platform_calibrate_ccount();
 	printk("%d.%02d MHz\n", (int)ccount_per_jiffy/(1000000/HZ),
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 56aace84aae..5a91d6c9e66 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -239,7 +239,7 @@ void __init mem_init(void)
 	high_memory = (void *) __va(max_mapnr << PAGE_SHIFT);
 	highmemsize = 0;
 
-#if CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
 #error HIGHGMEM not implemented in init.c
 #endif
 
diff --git a/drivers/acorn/char/pcf8583.c b/drivers/acorn/char/pcf8583.c
index 141b4c237a5..2b850e5860a 100644
--- a/drivers/acorn/char/pcf8583.c
+++ b/drivers/acorn/char/pcf8583.c
@@ -23,12 +23,13 @@ static struct i2c_driver pcf8583_driver;
 
 static unsigned short ignore[] = { I2C_CLIENT_END };
 static unsigned short normal_addr[] = { 0x50, I2C_CLIENT_END };
+static unsigned short *forces[] = { NULL };
 
 static struct i2c_client_address_data addr_data = {
 	.normal_i2c		= normal_addr,
 	.probe			= ignore,
 	.ignore			= ignore,
-	.force			= ignore,
+	.forces			= forces,
 };
 
 #define DAT(x) ((unsigned int)(x->dev.driver_data))
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 3b112e3542f..ce23dc8c18c 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -669,6 +669,7 @@ void class_device_destroy(struct class *cls, dev_t devt)
 int class_device_rename(struct class_device *class_dev, char *new_name)
 {
 	int error = 0;
+	char *old_class_name = NULL, *new_class_name = NULL;
 
 	class_dev = class_device_get(class_dev);
 	if (!class_dev)
@@ -677,12 +678,24 @@ int class_device_rename(struct class_device *class_dev, char *new_name)
 	pr_debug("CLASS: renaming '%s' to '%s'\n", class_dev->class_id,
 		 new_name);
 
+	if (class_dev->dev)
+		old_class_name = make_class_name(class_dev);
+
 	strlcpy(class_dev->class_id, new_name, KOBJ_NAME_LEN);
 
 	error = kobject_rename(&class_dev->kobj, new_name);
 
+	if (class_dev->dev) {
+		new_class_name = make_class_name(class_dev);
+		sysfs_create_link(&class_dev->dev->kobj, &class_dev->kobj,
+				  new_class_name);
+		sysfs_remove_link(&class_dev->dev->kobj, old_class_name);
+	}
 	class_device_put(class_dev);
 
+	kfree(old_class_name);
+	kfree(new_class_name);
+
 	return error;
 }
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index d5bbce38282..3565e979530 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -40,6 +40,9 @@
  */
 void device_bind_driver(struct device * dev)
 {
+	if (klist_node_attached(&dev->knode_driver))
+		return;
+
 	pr_debug("bound device '%s' to driver '%s'\n",
 		 dev->bus_id, dev->driver->name);
 	klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index c56f995aada..486b6e1c7df 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -483,9 +483,6 @@ static int cciss_open(struct inode *inode, struct file *filep)
 	printk(KERN_DEBUG "cciss_open %s\n", inode->i_bdev->bd_disk->disk_name);
 #endif /* CCISS_DEBUG */ 
 
-	if (host->busy_initializing)
-		return -EBUSY;
-
 	if (host->busy_initializing || drv->busy_configuring)
 		return -EBUSY;
 	/*
@@ -2991,6 +2988,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
 
 	cciss_procinit(i);
+	hba[i]->busy_initializing = 0;
 
 	for(j=0; j < NWD; j++) { /* mfm */
 		drive_info_struct *drv = &(hba[i]->drv[j]);
@@ -3033,7 +3031,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 		add_disk(disk);
 	}
 
-	hba[i]->busy_initializing = 0;
 	return(1);
 
 clean4:
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index 483d71b10cf..baedac52294 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -2373,44 +2373,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-/**
- * blkdev_scsi_issue_flush_fn - issue flush for SCSI devices
- * @q:		device queue
- * @disk:	gendisk
- * @error_sector:	error offset
- *
- * Description:
- *    Devices understanding the SCSI command set, can use this function as
- *    a helper for issuing a cache flush. Note: driver is required to store
- *    the error offset (in case of error flushing) in ->sector of struct
- *    request.
- */
-int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
-			       sector_t *error_sector)
-{
-	struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	int ret;
-
-	rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
-	rq->sector = 0;
-	memset(rq->cmd, 0, sizeof(rq->cmd));
-	rq->cmd[0] = 0x35;
-	rq->cmd_len = 12;
-	rq->data = NULL;
-	rq->data_len = 0;
-	rq->timeout = 60 * HZ;
-
-	ret = blk_execute_rq(q, disk, rq, 0);
-
-	if (ret && error_sector)
-		*error_sector = rq->sector;
-
-	blk_put_request(rq);
-	return ret;
-}
-
-EXPORT_SYMBOL(blkdev_scsi_issue_flush_fn);
-
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io)
 {
 	int rw = rq_data_dir(rq);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index aa0bf7ee008..ed4d5006fe6 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -172,7 +172,7 @@ struct bulk_cs_wrap {
  */
 struct ub_dev;
 
-#define UB_MAX_REQ_SG	4
+#define UB_MAX_REQ_SG	9	/* cdrecord requires 32KB and maybe a header */
 #define UB_MAX_SECTORS 64
 
 /*
@@ -387,7 +387,7 @@ struct ub_dev {
 	struct bulk_cs_wrap work_bcs;
 	struct usb_ctrlrequest work_cr;
 
-	int sg_stat[UB_MAX_REQ_SG+1];
+	int sg_stat[6];
 	struct ub_scsi_trace tr;
 };
 
@@ -525,12 +525,13 @@ static ssize_t ub_diag_show(struct device *dev, struct device_attribute *attr,
 	    "qlen %d qmax %d\n",
 	    sc->cmd_queue.qlen, sc->cmd_queue.qmax);
 	cnt += sprintf(page + cnt,
-	    "sg %d %d %d %d %d\n",
+	    "sg %d %d %d %d %d .. %d\n",
 	    sc->sg_stat[0],
 	    sc->sg_stat[1],
 	    sc->sg_stat[2],
 	    sc->sg_stat[3],
-	    sc->sg_stat[4]);
+	    sc->sg_stat[4],
+	    sc->sg_stat[5]);
 
 	list_for_each (p, &sc->luns) {
 		lun = list_entry(p, struct ub_lun, link);
@@ -835,7 +836,7 @@ static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun,
 		return -1;
 	}
 	cmd->nsg = n_elem;
-	sc->sg_stat[n_elem]++;
+	sc->sg_stat[n_elem < 5 ? n_elem : 5]++;
 
 	/*
 	 * build the command
@@ -891,7 +892,7 @@ static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun,
 		return -1;
 	}
 	cmd->nsg = n_elem;
-	sc->sg_stat[n_elem]++;
+	sc->sg_stat[n_elem < 5 ? n_elem : 5]++;
 
 	memcpy(&cmd->cdb, rq->cmd, rq->cmd_len);
 	cmd->cdb_len = rq->cmd_len;
@@ -1010,7 +1011,6 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 	sc->last_pipe = sc->send_bulk_pipe;
 	usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->send_bulk_pipe,
 	    bcb, US_BULK_CB_WRAP_LEN, ub_urb_complete, sc);
-	sc->work_urb.transfer_flags = 0;
 
 	/* Fill what we shouldn't be filling, because usb-storage did so. */
 	sc->work_urb.actual_length = 0;
@@ -1019,7 +1019,6 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 
 	if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
 		/* XXX Clear stalls */
-		printk("ub: cmd #%d start failed (%d)\n", cmd->tag, rc); /* P3 */
 		ub_complete(&sc->work_done);
 		return rc;
 	}
@@ -1190,11 +1189,9 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 			return;
 		}
 		if (urb->status != 0) {
-			printk("ub: cmd #%d cmd status (%d)\n", cmd->tag, urb->status); /* P3 */
 			goto Bad_End;
 		}
 		if (urb->actual_length != US_BULK_CB_WRAP_LEN) {
-			printk("ub: cmd #%d xferred %d\n", cmd->tag, urb->actual_length); /* P3 */
 			/* XXX Must do reset here to unconfuse the device */
 			goto Bad_End;
 		}
@@ -1395,14 +1392,12 @@ static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 	usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe,
 	    page_address(sg->page) + sg->offset, sg->length,
 	    ub_urb_complete, sc);
-	sc->work_urb.transfer_flags = 0;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
 
 	if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) {
 		/* XXX Clear stalls */
-		printk("ub: data #%d submit failed (%d)\n", cmd->tag, rc); /* P3 */
 		ub_complete(&sc->work_done);
 		ub_state_done(sc, cmd, rc);
 		return;
@@ -1442,7 +1437,6 @@ static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 	sc->last_pipe = sc->recv_bulk_pipe;
 	usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->recv_bulk_pipe,
 	    &sc->work_bcs, US_BULK_CS_WRAP_LEN, ub_urb_complete, sc);
-	sc->work_urb.transfer_flags = 0;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
@@ -1563,7 +1557,6 @@ static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd,
 
 	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe,
 	    (unsigned char*) cr, NULL, 0, ub_urb_complete, sc);
-	sc->work_urb.transfer_flags = 0;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
@@ -2000,17 +1993,16 @@ static int ub_sync_getmaxlun(struct ub_dev *sc)
 
 	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->recv_ctrl_pipe,
 	    (unsigned char*) cr, p, 1, ub_probe_urb_complete, &compl);
-	sc->work_urb.transfer_flags = 0;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
 
 	if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) {
 		if (rc == -EPIPE) {
-			printk("%s: Stall at GetMaxLUN, using 1 LUN\n",
+			printk("%s: Stall submitting GetMaxLUN, using 1 LUN\n",
 			     sc->name); /* P3 */
 		} else {
-			printk(KERN_WARNING
+			printk(KERN_NOTICE
 			     "%s: Unable to submit GetMaxLUN (%d)\n",
 			     sc->name, rc);
 		}
@@ -2028,6 +2020,18 @@ static int ub_sync_getmaxlun(struct ub_dev *sc)
 	del_timer_sync(&timer);
 	usb_kill_urb(&sc->work_urb);
 
+	if ((rc = sc->work_urb.status) < 0) {
+		if (rc == -EPIPE) {
+			printk("%s: Stall at GetMaxLUN, using 1 LUN\n",
+			     sc->name); /* P3 */
+		} else {
+			printk(KERN_NOTICE
+			     "%s: Error at GetMaxLUN (%d)\n",
+			     sc->name, rc);
+		}
+		goto err_io;
+	}
+
 	if (sc->work_urb.actual_length != 1) {
 		printk("%s: GetMaxLUN returned %d bytes\n", sc->name,
 		    sc->work_urb.actual_length); /* P3 */
@@ -2048,6 +2052,7 @@ static int ub_sync_getmaxlun(struct ub_dev *sc)
 	kfree(p);
 	return nluns;
 
+err_io:
 err_submit:
 	kfree(p);
 err_alloc:
@@ -2080,7 +2085,6 @@ static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe)
 
 	usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe,
 	    (unsigned char*) cr, NULL, 0, ub_probe_urb_complete, &compl);
-	sc->work_urb.transfer_flags = 0;
 	sc->work_urb.actual_length = 0;
 	sc->work_urb.error_count = 0;
 	sc->work_urb.status = 0;
@@ -2213,8 +2217,10 @@ static int ub_probe(struct usb_interface *intf,
 	 * This is needed to clear toggles. It is a problem only if we do
 	 * `rmmod ub && modprobe ub` without disconnects, but we like that.
 	 */
+#if 0 /* iPod Mini fails if we do this (big white iPod works) */
 	ub_probe_clear_stall(sc, sc->recv_bulk_pipe);
 	ub_probe_clear_stall(sc, sc->send_bulk_pipe);
+#endif
 
 	/*
 	 * The way this is used by the startup code is a little specific.
@@ -2241,10 +2247,10 @@ static int ub_probe(struct usb_interface *intf,
 	for (i = 0; i < 3; i++) {
 		if ((rc = ub_sync_getmaxlun(sc)) < 0) {
 			/* 
-			 * Some devices (i.e. Iomega Zip100) need this --
-			 * apparently the bulk pipes get STALLed when the
-			 * GetMaxLUN request is processed.
-			 * XXX I have a ZIP-100, verify it does this.
+			 * This segment is taken from usb-storage. They say
+			 * that ZIP-100 needs this, but my own ZIP-100 works
+			 * fine without this.
+			 * Still, it does not seem to hurt anything.
 			 */
 			if (rc == -EPIPE) {
 				ub_probe_clear_stall(sc, sc->recv_bulk_pipe);
@@ -2313,7 +2319,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum)
 	disk->first_minor = lun->id * UB_MINORS_PER_MAJOR;
 	disk->fops = &ub_bd_fops;
 	disk->private_data = lun;
-	disk->driverfs_dev = &sc->intf->dev;	/* XXX Many to one ok? */
+	disk->driverfs_dev = &sc->intf->dev;
 
 	rc = -ENOMEM;
 	if ((q = blk_init_queue(ub_request_fn, &sc->lock)) == NULL)
@@ -2466,9 +2472,6 @@ static int __init ub_init(void)
 {
 	int rc;
 
-	/* P3 */ printk("ub: sizeof ub_scsi_cmd %zu ub_dev %zu ub_lun %zu\n",
-			sizeof(struct ub_scsi_cmd), sizeof(struct ub_dev), sizeof(struct ub_lun));
-
 	if ((rc = register_blkdev(UB_MAJOR, DRV_NAME)) != 0)
 		goto err_regblkdev;
 	devfs_mk_dir(DEVFS_NAME);
diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index de0379b6d50..c055bb630ff 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -273,7 +273,6 @@ static int hpet_mmap(struct file *file, struct vm_area_struct *vma)
 
 	vma->vm_flags |= VM_IO;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	addr = __pa(addr);
 
 	if (io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
 					PAGE_SIZE, vma->vm_page_prot)) {
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 463351d4f94..32fa82c78c7 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2620,7 +2620,7 @@ void ipmi_smi_msg_received(ipmi_smi_t          intf,
 	spin_lock_irqsave(&(intf->waiting_msgs_lock), flags);
 	if (!list_empty(&(intf->waiting_msgs))) {
 		list_add_tail(&(msg->link), &(intf->waiting_msgs));
-		spin_unlock(&(intf->waiting_msgs_lock));
+		spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);
 		goto out_unlock;
 	}
 	spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);
@@ -2629,9 +2629,9 @@ void ipmi_smi_msg_received(ipmi_smi_t          intf,
 	if (rv > 0) {
 		/* Could not handle the message now, just add it to a
                    list to handle later. */
-		spin_lock(&(intf->waiting_msgs_lock));
+		spin_lock_irqsave(&(intf->waiting_msgs_lock), flags);
 		list_add_tail(&(msg->link), &(intf->waiting_msgs));
-		spin_unlock(&(intf->waiting_msgs_lock));
+		spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);
 	} else if (rv == 0) {
 		ipmi_free_smi_msg(msg);
 	}
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 7e72e922b41..db358cfa7cb 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -418,12 +418,11 @@ config SENSORS_HDAPS
 	help
 	  This driver provides support for the IBM Hard Drive Active Protection
 	  System (hdaps), which provides an accelerometer and other misc. data.
-	  Supported laptops include the IBM ThinkPad T41, T42, T43, and R51.
-	  The accelerometer data is readable via sysfs.
+	  ThinkPads starting with the R50, T41, and X40 are supported.  The
+	  accelerometer data is readable via sysfs.
 
-	  This driver also provides an input class device, allowing the
-	  laptop to act as a pinball machine-esque mouse.  This is off by
-	  default but enabled via sysfs or the module parameter "mousedev".
+	  This driver also provides an absolute input class device, allowing
+	  the laptop to act as a pinball machine-esque joystick.
 
 	  Say Y here if you have an applicable laptop and want to experience
 	  the awesome power of hdaps.
diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c
index 4c56411f399..7f010761382 100644
--- a/drivers/hwmon/hdaps.c
+++ b/drivers/hwmon/hdaps.c
@@ -4,9 +4,9 @@
  * Copyright (C) 2005 Robert Love <rml@novell.com>
  * Copyright (C) 2005 Jesper Juhl <jesper.juhl@gmail.com>
  *
- * The HardDisk Active Protection System (hdaps) is present in the IBM ThinkPad
- * T41, T42, T43, R50, R50p, R51, and X40, at least.  It provides a basic
- * two-axis accelerometer and other data, such as the device's temperature.
+ * The HardDisk Active Protection System (hdaps) is present in IBM ThinkPads
+ * starting with the R40, T41, and X40.  It provides a basic two-axis
+ * accelerometer and other data, such as the device's temperature.
  *
  * This driver is based on the document by Mark A. Smith available at
  * http://www.almaden.ibm.com/cs/people/marksmith/tpaps.html and a lot of trial
@@ -487,24 +487,19 @@ static struct attribute_group hdaps_attribute_group = {
 
 /* Module stuff */
 
-/*
- * XXX: We should be able to return nonzero and halt the detection process.
- * But there is a bug in dmi_check_system() where a nonzero return from the
- * first match will result in a return of failure from dmi_check_system().
- * I fixed this; the patch is 2.6-git.  Once in a released tree, we can make
- * hdaps_dmi_match_invert() return hdaps_dmi_match(), which in turn returns 1.
- */
+/* hdaps_dmi_match - found a match.  return one, short-circuiting the hunt. */
 static int hdaps_dmi_match(struct dmi_system_id *id)
 {
 	printk(KERN_INFO "hdaps: %s detected.\n", id->ident);
-	return 0;
+	return 1;
 }
 
+/* hdaps_dmi_match_invert - found an inverted match. */
 static int hdaps_dmi_match_invert(struct dmi_system_id *id)
 {
 	hdaps_invert = 1;
 	printk(KERN_INFO "hdaps: inverting axis readings.\n");
-	return 0;
+	return hdaps_dmi_match(id);
 }
 
 #define HDAPS_DMI_MATCH_NORMAL(model)	{		\
@@ -534,6 +529,7 @@ static int __init hdaps_init(void)
 		HDAPS_DMI_MATCH_INVERT("ThinkPad R50p"),
 		HDAPS_DMI_MATCH_NORMAL("ThinkPad R50"),
 		HDAPS_DMI_MATCH_NORMAL("ThinkPad R51"),
+		HDAPS_DMI_MATCH_NORMAL("ThinkPad R52"),
 		HDAPS_DMI_MATCH_INVERT("ThinkPad T41p"),
 		HDAPS_DMI_MATCH_NORMAL("ThinkPad T41"),
 		HDAPS_DMI_MATCH_INVERT("ThinkPad T42p"),
@@ -541,6 +537,7 @@ static int __init hdaps_init(void)
 		HDAPS_DMI_MATCH_NORMAL("ThinkPad T43"),
 		HDAPS_DMI_MATCH_NORMAL("ThinkPad X40"),
 		HDAPS_DMI_MATCH_NORMAL("ThinkPad X41 Tablet"),
+		HDAPS_DMI_MATCH_NORMAL("ThinkPad X41"),
 		{ .ident = NULL }
 	};
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 8334496a7e0..3badfec75b1 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -245,6 +245,18 @@ config I2C_KEYWEST
 	  This support is also available as a module.  If so, the module 
 	  will be called i2c-keywest.
 
+config I2C_PMAC_SMU
+	tristate "Powermac SMU I2C interface"
+	depends on I2C && PMAC_SMU
+	help
+	  This supports the use of the I2C interface in the SMU
+	  chip on recent Apple machines like the iMac G5.  It is used
+	  among others by the thermal control driver for those machines.
+	  Say Y if you have such a machine.
+
+	  This support is also available as a module.  If so, the module
+	  will be called i2c-pmac-smu.
+
 config I2C_MPC
 	tristate "MPC107/824x/85xx/52xx"
 	depends on I2C && PPC32
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 980b3e98367..f1df00f66c6 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_I2C_ITE)		+= i2c-ite.o
 obj-$(CONFIG_I2C_IXP2000)	+= i2c-ixp2000.o
 obj-$(CONFIG_I2C_IXP4XX)	+= i2c-ixp4xx.o
 obj-$(CONFIG_I2C_KEYWEST)	+= i2c-keywest.o
+obj-$(CONFIG_I2C_PMAC_SMU)	+= i2c-pmac-smu.o
 obj-$(CONFIG_I2C_MPC)		+= i2c-mpc.o
 obj-$(CONFIG_I2C_MV64XXX)	+= i2c-mv64xxx.o
 obj-$(CONFIG_I2C_NFORCE2)	+= i2c-nforce2.o
diff --git a/drivers/i2c/busses/i2c-pmac-smu.c b/drivers/i2c/busses/i2c-pmac-smu.c
new file mode 100644
index 00000000000..8a9f5648a23
--- /dev/null
+++ b/drivers/i2c/busses/i2c-pmac-smu.c
@@ -0,0 +1,316 @@
+/*
+    i2c Support for Apple SMU Controller
+
+    Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp.
+                       <benh@kernel.crashing.org>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <asm/prom.h>
+#include <asm/of_device.h>
+#include <asm/smu.h>
+
+static int probe;
+
+MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
+MODULE_DESCRIPTION("I2C driver for Apple's SMU");
+MODULE_LICENSE("GPL");
+module_param(probe, bool, 0);
+
+
+/* Physical interface */
+struct smu_iface
+{
+	struct i2c_adapter	adapter;
+	struct completion	complete;
+	u32			busid;
+};
+
+static void smu_i2c_done(struct smu_i2c_cmd *cmd, void *misc)
+{
+	struct smu_iface	*iface = misc;
+	complete(&iface->complete);
+}
+
+/*
+ * SMBUS-type transfer entrypoint
+ */
+static s32 smu_smbus_xfer(	struct i2c_adapter*	adap,
+				u16			addr,
+				unsigned short		flags,
+				char			read_write,
+				u8			command,
+				int			size,
+				union i2c_smbus_data*	data)
+{
+	struct smu_iface	*iface = i2c_get_adapdata(adap);
+	struct smu_i2c_cmd	cmd;
+	int			rc = 0;
+	int			read = (read_write == I2C_SMBUS_READ);
+
+	cmd.info.bus = iface->busid;
+	cmd.info.devaddr = (addr << 1) | (read ? 0x01 : 0x00);
+
+	/* Prepare datas & select mode */
+	switch (size) {
+        case I2C_SMBUS_QUICK:
+		cmd.info.type = SMU_I2C_TRANSFER_SIMPLE;
+		cmd.info.datalen = 0;
+	    	break;
+        case I2C_SMBUS_BYTE:
+		cmd.info.type = SMU_I2C_TRANSFER_SIMPLE;
+		cmd.info.datalen = 1;
+		if (!read)
+			cmd.info.data[0] = data->byte;
+	    	break;
+        case I2C_SMBUS_BYTE_DATA:
+		cmd.info.type = SMU_I2C_TRANSFER_STDSUB;
+		cmd.info.datalen = 1;
+		cmd.info.sublen = 1;
+		cmd.info.subaddr[0] = command;
+		cmd.info.subaddr[1] = 0;
+		cmd.info.subaddr[2] = 0;
+		if (!read)
+			cmd.info.data[0] = data->byte;
+	    	break;
+        case I2C_SMBUS_WORD_DATA:
+		cmd.info.type = SMU_I2C_TRANSFER_STDSUB;
+		cmd.info.datalen = 2;
+		cmd.info.sublen = 1;
+		cmd.info.subaddr[0] = command;
+		cmd.info.subaddr[1] = 0;
+		cmd.info.subaddr[2] = 0;
+		if (!read) {
+			cmd.info.data[0] = data->byte & 0xff;
+			cmd.info.data[1] = (data->byte >> 8) & 0xff;
+		}
+		break;
+	/* Note that these are broken vs. the expected smbus API where
+	 * on reads, the lenght is actually returned from the function,
+	 * but I think the current API makes no sense and I don't want
+	 * any driver that I haven't verified for correctness to go
+	 * anywhere near a pmac i2c bus anyway ...
+	 */
+        case I2C_SMBUS_BLOCK_DATA:
+		cmd.info.type = SMU_I2C_TRANSFER_STDSUB;
+		cmd.info.datalen = data->block[0] + 1;
+		if (cmd.info.datalen > 6)
+			return -EINVAL;
+		if (!read)
+			memcpy(cmd.info.data, data->block, cmd.info.datalen);
+		cmd.info.sublen = 1;
+		cmd.info.subaddr[0] = command;
+		cmd.info.subaddr[1] = 0;
+		cmd.info.subaddr[2] = 0;
+		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		cmd.info.type = SMU_I2C_TRANSFER_STDSUB;
+		cmd.info.datalen = data->block[0];
+		if (cmd.info.datalen > 7)
+			return -EINVAL;
+		if (!read)
+			memcpy(cmd.info.data, &data->block[1],
+			       cmd.info.datalen);
+		cmd.info.sublen = 1;
+		cmd.info.subaddr[0] = command;
+		cmd.info.subaddr[1] = 0;
+		cmd.info.subaddr[2] = 0;
+		break;
+
+        default:
+	    	return -EINVAL;
+	}
+
+	/* Turn a standardsub read into a combined mode access */
+ 	if (read_write == I2C_SMBUS_READ &&
+	    cmd.info.type == SMU_I2C_TRANSFER_STDSUB)
+		cmd.info.type = SMU_I2C_TRANSFER_COMBINED;
+
+	/* Finish filling command and submit it */
+	cmd.done = smu_i2c_done;
+	cmd.misc = iface;
+	rc = smu_queue_i2c(&cmd);
+	if (rc < 0)
+		return rc;
+	wait_for_completion(&iface->complete);
+	rc = cmd.status;
+
+	if (!read || rc < 0)
+		return rc;
+
+	switch (size) {
+        case I2C_SMBUS_BYTE:
+        case I2C_SMBUS_BYTE_DATA:
+		data->byte = cmd.info.data[0];
+	    	break;
+        case I2C_SMBUS_WORD_DATA:
+		data->word = ((u16)cmd.info.data[1]) << 8;
+		data->word |= cmd.info.data[0];
+		break;
+	/* Note that these are broken vs. the expected smbus API where
+	 * on reads, the lenght is actually returned from the function,
+	 * but I think the current API makes no sense and I don't want
+	 * any driver that I haven't verified for correctness to go
+	 * anywhere near a pmac i2c bus anyway ...
+	 */
+        case I2C_SMBUS_BLOCK_DATA:
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		memcpy(&data->block[0], cmd.info.data, cmd.info.datalen);
+		break;
+	}
+
+	return rc;
+}
+
+static u32
+smu_smbus_func(struct i2c_adapter * adapter)
+{
+	return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+	       I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+	       I2C_FUNC_SMBUS_BLOCK_DATA;
+}
+
+/* For now, we only handle combined mode (smbus) */
+static struct i2c_algorithm smu_algorithm = {
+	.smbus_xfer	= smu_smbus_xfer,
+	.functionality	= smu_smbus_func,
+};
+
+static int create_iface(struct device_node *np, struct device *dev)
+{
+	struct smu_iface* iface;
+	u32 *reg, busid;
+	int rc;
+
+	reg = (u32 *)get_property(np, "reg", NULL);
+	if (reg == NULL) {
+		printk(KERN_ERR "i2c-pmac-smu: can't find bus number !\n");
+		return -ENXIO;
+	}
+	busid = *reg;
+
+	iface = kmalloc(sizeof(struct smu_iface), GFP_KERNEL);
+	if (iface == NULL) {
+		printk(KERN_ERR "i2c-pmac-smu: can't allocate inteface !\n");
+		return -ENOMEM;
+	}
+	memset(iface, 0, sizeof(struct smu_iface));
+	init_completion(&iface->complete);
+	iface->busid = busid;
+
+	dev_set_drvdata(dev, iface);
+
+	sprintf(iface->adapter.name, "smu-i2c-%02x", busid);
+	iface->adapter.algo = &smu_algorithm;
+	iface->adapter.algo_data = NULL;
+	iface->adapter.client_register = NULL;
+	iface->adapter.client_unregister = NULL;
+	i2c_set_adapdata(&iface->adapter, iface);
+	iface->adapter.dev.parent = dev;
+
+	rc = i2c_add_adapter(&iface->adapter);
+	if (rc) {
+		printk(KERN_ERR "i2c-pamc-smu.c: Adapter %s registration "
+		       "failed\n", iface->adapter.name);
+		i2c_set_adapdata(&iface->adapter, NULL);
+	}
+
+	if (probe) {
+		unsigned char addr;
+		printk("Probe: ");
+		for (addr = 0x00; addr <= 0x7f; addr++) {
+			if (i2c_smbus_xfer(&iface->adapter,addr,
+					   0,0,0,I2C_SMBUS_QUICK,NULL) >= 0)
+				printk("%02x ", addr);
+		}
+		printk("\n");
+	}
+
+	printk(KERN_INFO "SMU i2c bus %x registered\n", busid);
+
+	return 0;
+}
+
+static int dispose_iface(struct device *dev)
+{
+	struct smu_iface *iface = dev_get_drvdata(dev);
+	int rc;
+
+	rc = i2c_del_adapter(&iface->adapter);
+	i2c_set_adapdata(&iface->adapter, NULL);
+	/* We aren't that prepared to deal with this... */
+	if (rc)
+		printk("i2c-pmac-smu.c: Failed to remove bus %s !\n",
+		       iface->adapter.name);
+	dev_set_drvdata(dev, NULL);
+	kfree(iface);
+
+	return 0;
+}
+
+
+static int create_iface_of_platform(struct of_device* dev,
+				    const struct of_device_id *match)
+{
+	return create_iface(dev->node, &dev->dev);
+}
+
+
+static int dispose_iface_of_platform(struct of_device* dev)
+{
+	return dispose_iface(&dev->dev);
+}
+
+
+static struct of_device_id i2c_smu_match[] =
+{
+	{
+		.compatible	= "smu-i2c",
+	},
+	{},
+};
+static struct of_platform_driver i2c_smu_of_platform_driver =
+{
+	.name 		= "i2c-smu",
+	.match_table	= i2c_smu_match,
+	.probe		= create_iface_of_platform,
+	.remove		= dispose_iface_of_platform
+};
+
+
+static int __init i2c_pmac_smu_init(void)
+{
+	of_register_driver(&i2c_smu_of_platform_driver);
+	return 0;
+}
+
+
+static void __exit i2c_pmac_smu_cleanup(void)
+{
+	of_unregister_driver(&i2c_smu_of_platform_driver);
+}
+
+module_init(i2c_pmac_smu_init);
+module_exit(i2c_pmac_smu_cleanup);
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index 2bd8b1cc57c..e23836d0e21 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -412,8 +412,8 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv)
 
 	hdr_size = data_offset(rmpp_mad->mad_hdr.mgmt_class);
 	data_size = sizeof(struct ib_rmpp_mad) - hdr_size;
-	pad = data_size - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
-	if (pad > data_size || pad < 0)
+	pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
+	if (pad > IB_MGMT_RMPP_DATA || pad < 0)
 		pad = 0;
 
 	return hdr_size + rmpp_recv->seg_num * data_size - pad;
@@ -583,6 +583,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_rmpp_mad *rmpp_mad;
 	int timeout;
+	u32 paylen;
 
 	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
@@ -590,11 +591,9 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
 
 	if (mad_send_wr->seg_num == 1) {
 		rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST;
-		rmpp_mad->rmpp_hdr.paylen_newwin =
-			cpu_to_be32(mad_send_wr->total_seg *
-				    (sizeof(struct ib_rmpp_mad) -
-				       offsetof(struct ib_rmpp_mad, data)) -
-				    mad_send_wr->pad);
+		paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA -
+			 mad_send_wr->pad;
+		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
 		mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad);
 	} else {
 		mad_send_wr->send_wr.num_sge = 2;
@@ -608,10 +607,8 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr)
 
 	if (mad_send_wr->seg_num == mad_send_wr->total_seg) {
 		rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST;
-		rmpp_mad->rmpp_hdr.paylen_newwin =
-			cpu_to_be32(sizeof(struct ib_rmpp_mad) -
-				    offsetof(struct ib_rmpp_mad, data) -
-				    mad_send_wr->pad);
+		paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad;
+		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen);
 	}
 
 	/* 2 seconds for an ACK until we can find the packet lifetime */
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 7c2f03057dd..a64d6b4dcc1 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -334,10 +334,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 			ret = -EINVAL;
 			goto err_ah;
 		}
-		/* Validate that management class can support RMPP */
+
+		/* Validate that the management class can support RMPP */
 		if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
 			hdr_len = offsetof(struct ib_sa_mad, data);
-			data_len = length;
+			data_len = length - hdr_len;
 		} else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
 			    (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) {
 				hdr_len = offsetof(struct ib_vendor_mad, data);
diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c
index 18f0981eb0c..78152a8ad17 100644
--- a/drivers/infiniband/hw/mthca/mthca_eq.c
+++ b/drivers/infiniband/hw/mthca/mthca_eq.c
@@ -476,12 +476,8 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	int i;
 	u8 status;
 
-	/* Make sure EQ size is aligned to a power of 2 size. */
-	for (i = 1; i < nent; i <<= 1)
-		; /* nothing */
-	nent = i;
-
-	eq->dev = dev;
+	eq->dev  = dev;
+	eq->nent = roundup_pow_of_two(max(nent, 2));
 
 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
 				GFP_KERNEL);
@@ -512,7 +508,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 		memset(eq->page_list[i].buf, 0, PAGE_SIZE);
 	}
 
-	for (i = 0; i < nent; ++i)
+	for (i = 0; i < eq->nent; ++i)
 		set_eqe_hw(get_eqe(eq, i));
 
 	eq->eqn = mthca_alloc(&dev->eq_table.alloc);
@@ -528,8 +524,6 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	if (err)
 		goto err_out_free_eq;
 
-	eq->nent = nent;
-
 	memset(eq_context, 0, sizeof *eq_context);
 	eq_context->flags           = cpu_to_be32(MTHCA_EQ_STATUS_OK   |
 						  MTHCA_EQ_OWNER_HW    |
@@ -538,7 +532,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	if (mthca_is_memfree(dev))
 		eq_context->flags  |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL);
 
-	eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+	eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24);
 	if (mthca_is_memfree(dev)) {
 		eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num);
 	} else {
@@ -569,7 +563,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 	dev->eq_table.arm_mask |= eq->eqn_mask;
 
 	mthca_dbg(dev, "Allocated EQ %d with %d entries\n",
-		  eq->eqn, nent);
+		  eq->eqn, eq->nent);
 
 	return err;
 
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index bcef06bf15e..5fa00669f9b 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -227,7 +227,6 @@ static void mthca_wq_init(struct mthca_wq *wq)
 	wq->last_comp = wq->max - 1;
 	wq->head      = 0;
 	wq->tail      = 0;
-	wq->last      = NULL;
 }
 
 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
@@ -687,7 +686,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	}
 
 	if (attr_mask & IB_QP_TIMEOUT) {
-		qp_context->pri_path.ackto = attr->timeout;
+		qp_context->pri_path.ackto = attr->timeout << 3;
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
 	}
 
@@ -1103,6 +1102,9 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
 		}
 	}
 
+	qp->sq.last = get_send_wqe(qp, qp->sq.max - 1);
+	qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1);
+
 	return 0;
 }
 
@@ -1583,15 +1585,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			goto out;
 		}
 
-		if (prev_wqe) {
-			((struct mthca_next_seg *) prev_wqe)->nda_op =
-				cpu_to_be32(((ind << qp->sq.wqe_shift) +
-					     qp->send_wqe_offset) |
-					    mthca_opcode[wr->opcode]);
-			wmb();
-			((struct mthca_next_seg *) prev_wqe)->ee_nds =
-				cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size);
-		}
+		((struct mthca_next_seg *) prev_wqe)->nda_op =
+			cpu_to_be32(((ind << qp->sq.wqe_shift) +
+				     qp->send_wqe_offset) |
+				    mthca_opcode[wr->opcode]);
+		wmb();
+		((struct mthca_next_seg *) prev_wqe)->ee_nds =
+			cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size);
 
 		if (!size0) {
 			size0 = size;
@@ -1688,13 +1688,11 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 		qp->wrid[ind] = wr->wr_id;
 
-		if (likely(prev_wqe)) {
-			((struct mthca_next_seg *) prev_wqe)->nda_op =
-				cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
-			wmb();
-			((struct mthca_next_seg *) prev_wqe)->ee_nds =
-				cpu_to_be32(MTHCA_NEXT_DBD | size);
-		}
+		((struct mthca_next_seg *) prev_wqe)->nda_op =
+			cpu_to_be32((ind << qp->rq.wqe_shift) | 1);
+		wmb();
+		((struct mthca_next_seg *) prev_wqe)->ee_nds =
+			cpu_to_be32(MTHCA_NEXT_DBD | size);
 
 		if (!size0)
 			size0 = size;
@@ -1905,15 +1903,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			goto out;
 		}
 
-		if (likely(prev_wqe)) {
-			((struct mthca_next_seg *) prev_wqe)->nda_op =
-				cpu_to_be32(((ind << qp->sq.wqe_shift) +
-					     qp->send_wqe_offset) |
-					    mthca_opcode[wr->opcode]);
-			wmb();
-			((struct mthca_next_seg *) prev_wqe)->ee_nds =
-				cpu_to_be32(MTHCA_NEXT_DBD | size);
-		}
+		((struct mthca_next_seg *) prev_wqe)->nda_op =
+			cpu_to_be32(((ind << qp->sq.wqe_shift) +
+				     qp->send_wqe_offset) |
+				    mthca_opcode[wr->opcode]);
+		wmb();
+		((struct mthca_next_seg *) prev_wqe)->ee_nds =
+			cpu_to_be32(MTHCA_NEXT_DBD | size);
 
 		if (!size0) {
 			size0 = size;
@@ -2127,5 +2123,6 @@ void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev)
 	for (i = 0; i < 2; ++i)
 		mthca_CONF_SPECIAL_QP(dev, i, 0, &status);
 
+	mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps);
 	mthca_alloc_cleanup(&dev->qp_table.alloc);
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 75cd2d84ef1..18998d48c53 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -172,6 +172,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
 			scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY);
 	}
 
+	srq->last = get_wqe(srq, srq->max - 1);
+
 	return 0;
 }
 
@@ -189,7 +191,6 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 
 	srq->max      = attr->max_wr;
 	srq->max_gs   = attr->max_sge;
-	srq->last     = NULL;
 	srq->counter  = 0;
 
 	if (mthca_is_memfree(dev))
@@ -409,7 +410,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			mthca_err(dev, "SRQ %06x full\n", srq->srqn);
 			err = -ENOMEM;
 			*bad_wr = wr;
-			return nreq;
+			break;
 		}
 
 		wqe       = get_wqe(srq, ind);
@@ -427,7 +428,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			err = -EINVAL;
 			*bad_wr = wr;
 			srq->last = prev_wqe;
-			return nreq;
+			break;
 		}
 
 		for (i = 0; i < wr->num_sge; ++i) {
@@ -446,20 +447,16 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			((struct mthca_data_seg *) wqe)->addr = 0;
 		}
 
-		if (likely(prev_wqe)) {
-			((struct mthca_next_seg *) prev_wqe)->nda_op =
-				cpu_to_be32((ind << srq->wqe_shift) | 1);
-			wmb();
-			((struct mthca_next_seg *) prev_wqe)->ee_nds =
-				cpu_to_be32(MTHCA_NEXT_DBD);
-		}
+		((struct mthca_next_seg *) prev_wqe)->nda_op =
+			cpu_to_be32((ind << srq->wqe_shift) | 1);
+		wmb();
+		((struct mthca_next_seg *) prev_wqe)->ee_nds =
+			cpu_to_be32(MTHCA_NEXT_DBD);
 
 		srq->wrid[ind]  = wr->wr_id;
 		srq->first_free = next_ind;
 	}
 
-	return nreq;
-
 	if (likely(nreq)) {
 		__be32 doorbell[2];
 
@@ -503,7 +500,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 			mthca_err(dev, "SRQ %06x full\n", srq->srqn);
 			err = -ENOMEM;
 			*bad_wr = wr;
-			return nreq;
+			break;
 		}
 
 		wqe       = get_wqe(srq, ind);
@@ -519,7 +516,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 		if (unlikely(wr->num_sge > srq->max_gs)) {
 			err = -EINVAL;
 			*bad_wr = wr;
-			return nreq;
+			break;
 		}
 
 		for (i = 0; i < wr->num_sge; ++i) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index bea960b8191..4ea1c1ca85b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -257,7 +257,7 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
 
 void ipoib_mcast_restart_task(void *dev_ptr);
 int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev);
+int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
 
 void ipoib_mcast_dev_down(struct net_device *dev);
 void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ef0e3894863..f7440096b5e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -432,7 +432,7 @@ int ipoib_ib_dev_down(struct net_device *dev)
 		flush_workqueue(ipoib_workqueue);
 	}
 
-	ipoib_mcast_stop_thread(dev);
+	ipoib_mcast_stop_thread(dev, 1);
 
 	/*
 	 * Flush the multicast groups first so we stop any multicast joins. The
@@ -599,7 +599,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
 
 	ipoib_dbg(priv, "cleaning up ib_dev\n");
 
-	ipoib_mcast_stop_thread(dev);
+	ipoib_mcast_stop_thread(dev, 1);
 
 	/* Delete the broadcast address and the local address */
 	ipoib_mcast_dev_down(dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 49d120d2b92..704f48e0b6a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1005,6 +1005,7 @@ debug_failed:
 
 register_failed:
 	ib_unregister_event_handler(&priv->event_handler);
+	flush_scheduled_work();
 
 event_failed:
 	ipoib_dev_cleanup(priv->dev);
@@ -1057,6 +1058,7 @@ static void ipoib_remove_one(struct ib_device *device)
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
 		ib_unregister_event_handler(&priv->event_handler);
+		flush_scheduled_work();
 
 		unregister_netdev(priv->dev);
 		ipoib_dev_cleanup(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index aca7aea18a6..36ce29836bf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -145,7 +145,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
 
 	mcast->dev = dev;
 	mcast->created = jiffies;
-	mcast->backoff = HZ;
+	mcast->backoff = 1;
 	mcast->logcount = 0;
 
 	INIT_LIST_HEAD(&mcast->list);
@@ -396,7 +396,7 @@ static void ipoib_mcast_join_complete(int status,
 			IPOIB_GID_ARG(mcast->mcmember.mgid), status);
 
 	if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
-		mcast->backoff = HZ;
+		mcast->backoff = 1;
 		down(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
 			queue_work(ipoib_workqueue, &priv->mcast_task);
@@ -496,7 +496,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
 			queue_delayed_work(ipoib_workqueue,
 					   &priv->mcast_task,
-					   mcast->backoff);
+					   mcast->backoff * HZ);
 		up(&mcast_mutex);
 	} else
 		mcast->query_id = ret;
@@ -598,7 +598,7 @@ int ipoib_mcast_start_thread(struct net_device *dev)
 	return 0;
 }
 
-int ipoib_mcast_stop_thread(struct net_device *dev)
+int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_mcast *mcast;
@@ -610,7 +610,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
 	cancel_delayed_work(&priv->mcast_task);
 	up(&mcast_mutex);
 
-	flush_workqueue(ipoib_workqueue);
+	if (flush)
+		flush_workqueue(ipoib_workqueue);
 
 	if (priv->broadcast && priv->broadcast->query) {
 		ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
@@ -832,7 +833,7 @@ void ipoib_mcast_restart_task(void *dev_ptr)
 
 	ipoib_dbg_mcast(priv, "restarting multicast task\n");
 
-	ipoib_mcast_stop_thread(dev);
+	ipoib_mcast_stop_thread(dev, 0);
 
 	spin_lock_irqsave(&priv->lock, flags);
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 88636a20452..14ae5583e19 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -308,6 +308,7 @@ static struct input_device_id *input_match_device(struct input_device_id *id, st
 		MATCH_BIT(ledbit, LED_MAX);
 		MATCH_BIT(sndbit, SND_MAX);
 		MATCH_BIT(ffbit,  FF_MAX);
+		MATCH_BIT(swbit,  SW_MAX);
 
 		return id;
 	}
diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index 0a2536d6240..657817a591f 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -209,9 +209,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode)
 	bcs->mode = mode;
 
 	// Cancel all USB transfers on this B channel
-	b_out->urb[0]->transfer_flags |= URB_ASYNC_UNLINK;
 	usb_unlink_urb(b_out->urb[0]);
-	b_out->urb[1]->transfer_flags |= URB_ASYNC_UNLINK;
 	usb_unlink_urb(b_out->urb[1]);
 	b_out->busy = 0;
 
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c
index ffd5b2d4555..89fbeb58485 100644
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -645,9 +645,7 @@ void st5481_in_mode(struct st5481_in *in, int mode)
 
 	in->mode = mode;
 
-	in->urb[0]->transfer_flags |= URB_ASYNC_UNLINK;
 	usb_unlink_urb(in->urb[0]);
-	in->urb[1]->transfer_flags |= URB_ASYNC_UNLINK;
 	usb_unlink_urb(in->urb[1]);
 
 	if (in->mode != L1_MODE_NULL) {
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index fb535737d17..a85ac18dd21 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -8,21 +8,15 @@
  */
 
 /*
- * For now, this driver includes:
- * - RTC get & set
- * - reboot & shutdown commands
- * all synchronous with IRQ disabled (ugh)
- *
  * TODO:
- *   rework in a way the PMU driver works, that is asynchronous
- *   with a queue of commands. I'll do that as soon as I have an
- *   SMU based machine at hand. Some more cleanup is needed too,
- *   like maybe fitting it into a platform device, etc...
- *   Also check what's up with cache coherency, and if we really
- *   can't do better than flushing the cache, maybe build a table
- *   of command len/reply len like the PMU driver to only flush
- *   what is actually necessary.
- *   --BenH.
+ *  - maybe add timeout to commands ?
+ *  - blocking version of time functions
+ *  - polling version of i2c commands (including timer that works with
+ *    interrutps off)
+ *  - maybe avoid some data copies with i2c by directly using the smu cmd
+ *    buffer and a lower level internal interface
+ *  - understand SMU -> CPU events and implement reception of them via
+ *    the userland interface
  */
 
 #include <linux/config.h>
@@ -36,6 +30,11 @@
 #include <linux/jiffies.h>
 #include <linux/interrupt.h>
 #include <linux/rtc.h>
+#include <linux/completion.h>
+#include <linux/miscdevice.h>
+#include <linux/delay.h>
+#include <linux/sysdev.h>
+#include <linux/poll.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
@@ -45,8 +44,13 @@
 #include <asm/smu.h>
 #include <asm/sections.h>
 #include <asm/abs_addr.h>
+#include <asm/uaccess.h>
+#include <asm/of_device.h>
+
+#define VERSION "0.6"
+#define AUTHOR  "(c) 2005 Benjamin Herrenschmidt, IBM Corp."
 
-#define DEBUG_SMU 1
+#undef DEBUG_SMU
 
 #ifdef DEBUG_SMU
 #define DPRINTK(fmt, args...) do { printk(KERN_DEBUG fmt , ##args); } while (0)
@@ -57,20 +61,30 @@
 /*
  * This is the command buffer passed to the SMU hardware
  */
+#define SMU_MAX_DATA	254
+
 struct smu_cmd_buf {
 	u8 cmd;
 	u8 length;
-	u8 data[0x0FFE];
+	u8 data[SMU_MAX_DATA];
 };
 
 struct smu_device {
 	spinlock_t		lock;
 	struct device_node	*of_node;
-	int			db_ack;		/* doorbell ack GPIO */
-	int			db_req;		/* doorbell req GPIO */
+	struct of_device	*of_dev;
+	int			doorbell;	/* doorbell gpio */
 	u32 __iomem		*db_buf;	/* doorbell buffer */
+	int			db_irq;
+	int			msg;
+	int			msg_irq;
 	struct smu_cmd_buf	*cmd_buf;	/* command buffer virtual */
 	u32			cmd_buf_abs;	/* command buffer absolute */
+	struct list_head	cmd_list;
+	struct smu_cmd		*cmd_cur;	/* pending command */
+	struct list_head	cmd_i2c_list;
+	struct smu_i2c_cmd	*cmd_i2c_cur;	/* pending i2c command */
+	struct timer_list	i2c_timer;
 };
 
 /*
@@ -79,113 +93,243 @@ struct smu_device {
  */
 static struct smu_device	*smu;
 
+
 /*
- * SMU low level communication stuff
+ * SMU driver low level stuff
  */
-static inline int smu_cmd_stat(struct smu_cmd_buf *cmd_buf, u8 cmd_ack)
-{
-	rmb();
-	return cmd_buf->cmd == cmd_ack && cmd_buf->length != 0;
-}
 
-static inline u8 smu_save_ack_cmd(struct smu_cmd_buf *cmd_buf)
+static void smu_start_cmd(void)
 {
-	return (~cmd_buf->cmd) & 0xff;
-}
+	unsigned long faddr, fend;
+	struct smu_cmd *cmd;
 
-static void smu_send_cmd(struct smu_device *dev)
-{
-	/* SMU command buf is currently cacheable, we need a physical
-	 * address. This isn't exactly a DMA mapping here, I suspect
+	if (list_empty(&smu->cmd_list))
+		return;
+
+	/* Fetch first command in queue */
+	cmd = list_entry(smu->cmd_list.next, struct smu_cmd, link);
+	smu->cmd_cur = cmd;
+	list_del(&cmd->link);
+
+	DPRINTK("SMU: starting cmd %x, %d bytes data\n", cmd->cmd,
+		cmd->data_len);
+	DPRINTK("SMU: data buffer: %02x %02x %02x %02x ...\n",
+		((u8 *)cmd->data_buf)[0], ((u8 *)cmd->data_buf)[1],
+		((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3]);
+
+	/* Fill the SMU command buffer */
+	smu->cmd_buf->cmd = cmd->cmd;
+	smu->cmd_buf->length = cmd->data_len;
+	memcpy(smu->cmd_buf->data, cmd->data_buf, cmd->data_len);
+
+	/* Flush command and data to RAM */
+	faddr = (unsigned long)smu->cmd_buf;
+	fend = faddr + smu->cmd_buf->length + 2;
+	flush_inval_dcache_range(faddr, fend);
+
+	/* This isn't exactly a DMA mapping here, I suspect
 	 * the SMU is actually communicating with us via i2c to the
 	 * northbridge or the CPU to access RAM.
 	 */
-	writel(dev->cmd_buf_abs, dev->db_buf);
+	writel(smu->cmd_buf_abs, smu->db_buf);
 
 	/* Ring the SMU doorbell */
-	pmac_do_feature_call(PMAC_FTR_WRITE_GPIO, NULL, dev->db_req, 4);
-	pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, dev->db_req, 4);
+	pmac_do_feature_call(PMAC_FTR_WRITE_GPIO, NULL, smu->doorbell, 4);
 }
 
-static int smu_cmd_done(struct smu_device *dev)
+
+static irqreturn_t smu_db_intr(int irq, void *arg, struct pt_regs *regs)
 {
-	unsigned long wait = 0;
-	int gpio;
+	unsigned long flags;
+	struct smu_cmd *cmd;
+	void (*done)(struct smu_cmd *cmd, void *misc) = NULL;
+	void *misc = NULL;
+	u8 gpio;
+	int rc = 0;
 
-	/* Check the SMU doorbell */
-	do  {
-		gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO,
-					    NULL, dev->db_ack);
-		if ((gpio & 7) == 7)
-			return 0;
-		udelay(100);
-	} while(++wait < 10000);
+	/* SMU completed the command, well, we hope, let's make sure
+	 * of it
+	 */
+	spin_lock_irqsave(&smu->lock, flags);
 
-	printk(KERN_ERR "SMU timeout !\n");
-	return -ENXIO;
+	gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, smu->doorbell);
+	if ((gpio & 7) != 7)
+		return IRQ_HANDLED;
+
+	cmd = smu->cmd_cur;
+	smu->cmd_cur = NULL;
+	if (cmd == NULL)
+		goto bail;
+
+	if (rc == 0) {
+		unsigned long faddr;
+		int reply_len;
+		u8 ack;
+
+		/* CPU might have brought back the cache line, so we need
+		 * to flush again before peeking at the SMU response. We
+		 * flush the entire buffer for now as we haven't read the
+		 * reply lenght (it's only 2 cache lines anyway)
+		 */
+		faddr = (unsigned long)smu->cmd_buf;
+		flush_inval_dcache_range(faddr, faddr + 256);
+
+		/* Now check ack */
+		ack = (~cmd->cmd) & 0xff;
+		if (ack != smu->cmd_buf->cmd) {
+			DPRINTK("SMU: incorrect ack, want %x got %x\n",
+				ack, smu->cmd_buf->cmd);
+			rc = -EIO;
+		}
+		reply_len = rc == 0 ? smu->cmd_buf->length : 0;
+		DPRINTK("SMU: reply len: %d\n", reply_len);
+		if (reply_len > cmd->reply_len) {
+			printk(KERN_WARNING "SMU: reply buffer too small,"
+			       "got %d bytes for a %d bytes buffer\n",
+			       reply_len, cmd->reply_len);
+			reply_len = cmd->reply_len;
+		}
+		cmd->reply_len = reply_len;
+		if (cmd->reply_buf && reply_len)
+			memcpy(cmd->reply_buf, smu->cmd_buf->data, reply_len);
+	}
+
+	/* Now complete the command. Write status last in order as we lost
+	 * ownership of the command structure as soon as it's no longer -1
+	 */
+	done = cmd->done;
+	misc = cmd->misc;
+	mb();
+	cmd->status = rc;
+ bail:
+	/* Start next command if any */
+	smu_start_cmd();
+	spin_unlock_irqrestore(&smu->lock, flags);
+
+	/* Call command completion handler if any */
+	if (done)
+		done(cmd, misc);
+
+	/* It's an edge interrupt, nothing to do */
+	return IRQ_HANDLED;
 }
 
-static int smu_do_cmd(struct smu_device *dev)
+
+static irqreturn_t smu_msg_intr(int irq, void *arg, struct pt_regs *regs)
 {
-	int rc;
-	u8 cmd_ack;
+	/* I don't quite know what to do with this one, we seem to never
+	 * receive it, so I suspect we have to arm it someway in the SMU
+	 * to start getting events that way.
+	 */
+
+	printk(KERN_INFO "SMU: message interrupt !\n");
 
-	DPRINTK("SMU do_cmd %02x len=%d %02x\n",
-		dev->cmd_buf->cmd, dev->cmd_buf->length,
-		dev->cmd_buf->data[0]);
+	/* It's an edge interrupt, nothing to do */
+	return IRQ_HANDLED;
+}
 
-	cmd_ack = smu_save_ack_cmd(dev->cmd_buf);
 
-	/* Clear cmd_buf cache lines */
-	flush_inval_dcache_range((unsigned long)dev->cmd_buf,
-				 ((unsigned long)dev->cmd_buf) +
-				 sizeof(struct smu_cmd_buf));
-	smu_send_cmd(dev);
-	rc = smu_cmd_done(dev);
-	if (rc == 0)
-		rc = smu_cmd_stat(dev->cmd_buf, cmd_ack) ? 0 : -1;
+/*
+ * Queued command management.
+ *
+ */
 
-	DPRINTK("SMU do_cmd %02x len=%d %02x => %d (%02x)\n",
-		dev->cmd_buf->cmd, dev->cmd_buf->length,
-		dev->cmd_buf->data[0], rc, cmd_ack);
+int smu_queue_cmd(struct smu_cmd *cmd)
+{
+	unsigned long flags;
 
-	return rc;
+	if (smu == NULL)
+		return -ENODEV;
+	if (cmd->data_len > SMU_MAX_DATA ||
+	    cmd->reply_len > SMU_MAX_DATA)
+		return -EINVAL;
+
+	cmd->status = 1;
+	spin_lock_irqsave(&smu->lock, flags);
+	list_add_tail(&cmd->link, &smu->cmd_list);
+	if (smu->cmd_cur == NULL)
+		smu_start_cmd();
+	spin_unlock_irqrestore(&smu->lock, flags);
+
+	return 0;
 }
+EXPORT_SYMBOL(smu_queue_cmd);
 
-/* RTC low level commands */
-static inline int bcd2hex (int n)
+
+int smu_queue_simple(struct smu_simple_cmd *scmd, u8 command,
+		     unsigned int data_len,
+		     void (*done)(struct smu_cmd *cmd, void *misc),
+		     void *misc, ...)
 {
-	return (((n & 0xf0) >> 4) * 10) + (n & 0xf);
+	struct smu_cmd *cmd = &scmd->cmd;
+	va_list list;
+	int i;
+
+	if (data_len > sizeof(scmd->buffer))
+		return -EINVAL;
+
+	memset(scmd, 0, sizeof(*scmd));
+	cmd->cmd = command;
+	cmd->data_len = data_len;
+	cmd->data_buf = scmd->buffer;
+	cmd->reply_len = sizeof(scmd->buffer);
+	cmd->reply_buf = scmd->buffer;
+	cmd->done = done;
+	cmd->misc = misc;
+
+	va_start(list, misc);
+	for (i = 0; i < data_len; ++i)
+		scmd->buffer[i] = (u8)va_arg(list, int);
+	va_end(list);
+
+	return smu_queue_cmd(cmd);
 }
+EXPORT_SYMBOL(smu_queue_simple);
 
-static inline int hex2bcd (int n)
+
+void smu_poll(void)
 {
-	return ((n / 10) << 4) + (n % 10);
+	u8 gpio;
+
+	if (smu == NULL)
+		return;
+
+	gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, smu->doorbell);
+	if ((gpio & 7) == 7)
+		smu_db_intr(smu->db_irq, smu, NULL);
 }
+EXPORT_SYMBOL(smu_poll);
+
 
-#if 0
-static inline void smu_fill_set_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf)
+void smu_done_complete(struct smu_cmd *cmd, void *misc)
 {
-	cmd_buf->cmd = 0x8e;
-	cmd_buf->length = 8;
-	cmd_buf->data[0] = 0x00;
-	memset(cmd_buf->data + 1, 0, 7);
+	struct completion *comp = misc;
+
+	complete(comp);
 }
+EXPORT_SYMBOL(smu_done_complete);
+
 
-static inline void smu_fill_get_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf)
+void smu_spinwait_cmd(struct smu_cmd *cmd)
 {
-	cmd_buf->cmd = 0x8e;
-	cmd_buf->length = 1;
-	cmd_buf->data[0] = 0x01;
+	while(cmd->status == 1)
+		smu_poll();
+}
+EXPORT_SYMBOL(smu_spinwait_cmd);
+
+
+/* RTC low level commands */
+static inline int bcd2hex (int n)
+{
+	return (((n & 0xf0) >> 4) * 10) + (n & 0xf);
 }
 
-static inline void smu_fill_dis_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf)
+
+static inline int hex2bcd (int n)
 {
-	cmd_buf->cmd = 0x8e;
-	cmd_buf->length = 1;
-	cmd_buf->data[0] = 0x02;
+	return ((n / 10) << 4) + (n % 10);
 }
-#endif
+
 
 static inline void smu_fill_set_rtc_cmd(struct smu_cmd_buf *cmd_buf,
 					struct rtc_time *time)
@@ -202,100 +346,96 @@ static inline void smu_fill_set_rtc_cmd(struct smu_cmd_buf *cmd_buf,
 	cmd_buf->data[7] = hex2bcd(time->tm_year - 100);
 }
 
-static inline void smu_fill_get_rtc_cmd(struct smu_cmd_buf *cmd_buf)
-{
-	cmd_buf->cmd = 0x8e;
-	cmd_buf->length = 1;
-	cmd_buf->data[0] = 0x81;
-}
 
-static void smu_parse_get_rtc_reply(struct smu_cmd_buf *cmd_buf,
-				    struct rtc_time *time)
+int smu_get_rtc_time(struct rtc_time *time, int spinwait)
 {
-	time->tm_sec = bcd2hex(cmd_buf->data[0]);
-	time->tm_min = bcd2hex(cmd_buf->data[1]);
-	time->tm_hour = bcd2hex(cmd_buf->data[2]);
-	time->tm_wday = bcd2hex(cmd_buf->data[3]);
-	time->tm_mday = bcd2hex(cmd_buf->data[4]);
-	time->tm_mon = bcd2hex(cmd_buf->data[5]) - 1;
-	time->tm_year = bcd2hex(cmd_buf->data[6]) + 100;
-}
-
-int smu_get_rtc_time(struct rtc_time *time)
-{
-	unsigned long flags;
+	struct smu_simple_cmd cmd;
 	int rc;
 
 	if (smu == NULL)
 		return -ENODEV;
 
 	memset(time, 0, sizeof(struct rtc_time));
-	spin_lock_irqsave(&smu->lock, flags);
-	smu_fill_get_rtc_cmd(smu->cmd_buf);
-	rc = smu_do_cmd(smu);
-	if (rc == 0)
-		smu_parse_get_rtc_reply(smu->cmd_buf, time);
-	spin_unlock_irqrestore(&smu->lock, flags);
+	rc = smu_queue_simple(&cmd, SMU_CMD_RTC_COMMAND, 1, NULL, NULL,
+			      SMU_CMD_RTC_GET_DATETIME);
+	if (rc)
+		return rc;
+	smu_spinwait_simple(&cmd);
 
-	return rc;
+	time->tm_sec = bcd2hex(cmd.buffer[0]);
+	time->tm_min = bcd2hex(cmd.buffer[1]);
+	time->tm_hour = bcd2hex(cmd.buffer[2]);
+	time->tm_wday = bcd2hex(cmd.buffer[3]);
+	time->tm_mday = bcd2hex(cmd.buffer[4]);
+	time->tm_mon = bcd2hex(cmd.buffer[5]) - 1;
+	time->tm_year = bcd2hex(cmd.buffer[6]) + 100;
+
+	return 0;
 }
 
-int smu_set_rtc_time(struct rtc_time *time)
+
+int smu_set_rtc_time(struct rtc_time *time, int spinwait)
 {
-	unsigned long flags;
+	struct smu_simple_cmd cmd;
 	int rc;
 
 	if (smu == NULL)
 		return -ENODEV;
 
-	spin_lock_irqsave(&smu->lock, flags);
-	smu_fill_set_rtc_cmd(smu->cmd_buf, time);
-	rc = smu_do_cmd(smu);
-	spin_unlock_irqrestore(&smu->lock, flags);
+	rc = smu_queue_simple(&cmd, SMU_CMD_RTC_COMMAND, 8, NULL, NULL,
+			      SMU_CMD_RTC_SET_DATETIME,
+			      hex2bcd(time->tm_sec),
+			      hex2bcd(time->tm_min),
+			      hex2bcd(time->tm_hour),
+			      time->tm_wday,
+			      hex2bcd(time->tm_mday),
+			      hex2bcd(time->tm_mon) + 1,
+			      hex2bcd(time->tm_year - 100));
+	if (rc)
+		return rc;
+	smu_spinwait_simple(&cmd);
 
-	return rc;
+	return 0;
 }
 
+
 void smu_shutdown(void)
 {
-	const unsigned char *command = "SHUTDOWN";
-	unsigned long flags;
+	struct smu_simple_cmd cmd;
 
 	if (smu == NULL)
 		return;
 
-	spin_lock_irqsave(&smu->lock, flags);
-	smu->cmd_buf->cmd = 0xaa;
-	smu->cmd_buf->length = strlen(command);
-	strcpy(smu->cmd_buf->data, command);
-	smu_do_cmd(smu);
+	if (smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 9, NULL, NULL,
+			     'S', 'H', 'U', 'T', 'D', 'O', 'W', 'N', 0))
+		return;
+	smu_spinwait_simple(&cmd);
 	for (;;)
 		;
-	spin_unlock_irqrestore(&smu->lock, flags);
 }
 
+
 void smu_restart(void)
 {
-	const unsigned char *command = "RESTART";
-	unsigned long flags;
+	struct smu_simple_cmd cmd;
 
 	if (smu == NULL)
 		return;
 
-	spin_lock_irqsave(&smu->lock, flags);
-	smu->cmd_buf->cmd = 0xaa;
-	smu->cmd_buf->length = strlen(command);
-	strcpy(smu->cmd_buf->data, command);
-	smu_do_cmd(smu);
+	if (smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, NULL, NULL,
+			     'R', 'E', 'S', 'T', 'A', 'R', 'T', 0))
+		return;
+	smu_spinwait_simple(&cmd);
 	for (;;)
 		;
-	spin_unlock_irqrestore(&smu->lock, flags);
 }
 
+
 int smu_present(void)
 {
 	return smu != NULL;
 }
+EXPORT_SYMBOL(smu_present);
 
 
 int smu_init (void)
@@ -307,6 +447,8 @@ int smu_init (void)
         if (np == NULL)
 		return -ENODEV;
 
+	printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR);
+
 	if (smu_cmdbuf_abs == 0) {
 		printk(KERN_ERR "SMU: Command buffer not allocated !\n");
 		return -EINVAL;
@@ -318,7 +460,13 @@ int smu_init (void)
 	memset(smu, 0, sizeof(*smu));
 
 	spin_lock_init(&smu->lock);
+	INIT_LIST_HEAD(&smu->cmd_list);
+	INIT_LIST_HEAD(&smu->cmd_i2c_list);
 	smu->of_node = np;
+	smu->db_irq = NO_IRQ;
+	smu->msg_irq = NO_IRQ;
+	init_timer(&smu->i2c_timer);
+
 	/* smu_cmdbuf_abs is in the low 2G of RAM, can be converted to a
 	 * 32 bits value safely
 	 */
@@ -331,8 +479,8 @@ int smu_init (void)
 		goto fail;
 	}
 	data = (u32 *)get_property(np, "reg", NULL);
-	of_node_put(np);
 	if (data == NULL) {
+		of_node_put(np);
 		printk(KERN_ERR "SMU: Can't find doorbell GPIO address !\n");
 		goto fail;
 	}
@@ -341,8 +489,31 @@ int smu_init (void)
 	 * and ack. GPIOs are at 0x50, best would be to find that out
 	 * in the device-tree though.
 	 */
-	smu->db_req = 0x50 + *data;
-	smu->db_ack = 0x50 + *data;
+	smu->doorbell = *data;
+	if (smu->doorbell < 0x50)
+		smu->doorbell += 0x50;
+	if (np->n_intrs > 0)
+		smu->db_irq = np->intrs[0].line;
+
+	of_node_put(np);
+
+	/* Now look for the smu-interrupt GPIO */
+	do {
+		np = of_find_node_by_name(NULL, "smu-interrupt");
+		if (np == NULL)
+			break;
+		data = (u32 *)get_property(np, "reg", NULL);
+		if (data == NULL) {
+			of_node_put(np);
+			break;
+		}
+		smu->msg = *data;
+		if (smu->msg < 0x50)
+			smu->msg += 0x50;
+		if (np->n_intrs > 0)
+			smu->msg_irq = np->intrs[0].line;
+		of_node_put(np);
+	} while(0);
 
 	/* Doorbell buffer is currently hard-coded, I didn't find a proper
 	 * device-tree entry giving the address. Best would probably to use
@@ -362,3 +533,584 @@ int smu_init (void)
 	return -ENXIO;
 
 }
+
+
+static int smu_late_init(void)
+{
+	if (!smu)
+		return 0;
+
+	/*
+	 * Try to request the interrupts
+	 */
+
+	if (smu->db_irq != NO_IRQ) {
+		if (request_irq(smu->db_irq, smu_db_intr,
+				SA_SHIRQ, "SMU doorbell", smu) < 0) {
+			printk(KERN_WARNING "SMU: can't "
+			       "request interrupt %d\n",
+			       smu->db_irq);
+			smu->db_irq = NO_IRQ;
+		}
+	}
+
+	if (smu->msg_irq != NO_IRQ) {
+		if (request_irq(smu->msg_irq, smu_msg_intr,
+				SA_SHIRQ, "SMU message", smu) < 0) {
+			printk(KERN_WARNING "SMU: can't "
+			       "request interrupt %d\n",
+			       smu->msg_irq);
+			smu->msg_irq = NO_IRQ;
+		}
+	}
+
+	return 0;
+}
+arch_initcall(smu_late_init);
+
+/*
+ * sysfs visibility
+ */
+
+static void smu_expose_childs(void *unused)
+{
+	struct device_node *np;
+
+	for (np = NULL; (np = of_get_next_child(smu->of_node, np)) != NULL;) {
+		if (device_is_compatible(np, "smu-i2c")) {
+			char name[32];
+			u32 *reg = (u32 *)get_property(np, "reg", NULL);
+
+			if (reg == NULL)
+				continue;
+			sprintf(name, "smu-i2c-%02x", *reg);
+			of_platform_device_create(np, name, &smu->of_dev->dev);
+		}
+	}
+
+}
+
+static DECLARE_WORK(smu_expose_childs_work, smu_expose_childs, NULL);
+
+static int smu_platform_probe(struct of_device* dev,
+			      const struct of_device_id *match)
+{
+	if (!smu)
+		return -ENODEV;
+	smu->of_dev = dev;
+
+	/*
+	 * Ok, we are matched, now expose all i2c busses. We have to defer
+	 * that unfortunately or it would deadlock inside the device model
+	 */
+	schedule_work(&smu_expose_childs_work);
+
+	return 0;
+}
+
+static struct of_device_id smu_platform_match[] =
+{
+	{
+		.type		= "smu",
+	},
+	{},
+};
+
+static struct of_platform_driver smu_of_platform_driver =
+{
+	.name 		= "smu",
+	.match_table	= smu_platform_match,
+	.probe		= smu_platform_probe,
+};
+
+static int __init smu_init_sysfs(void)
+{
+	int rc;
+
+	/*
+	 * Due to sysfs bogosity, a sysdev is not a real device, so
+	 * we should in fact create both if we want sysdev semantics
+	 * for power management.
+	 * For now, we don't power manage machines with an SMU chip,
+	 * I'm a bit too far from figuring out how that works with those
+	 * new chipsets, but that will come back and bite us
+	 */
+	rc = of_register_driver(&smu_of_platform_driver);
+	return 0;
+}
+
+device_initcall(smu_init_sysfs);
+
+struct of_device *smu_get_ofdev(void)
+{
+	if (!smu)
+		return NULL;
+	return smu->of_dev;
+}
+
+EXPORT_SYMBOL_GPL(smu_get_ofdev);
+
+/*
+ * i2c interface
+ */
+
+static void smu_i2c_complete_command(struct smu_i2c_cmd *cmd, int fail)
+{
+	void (*done)(struct smu_i2c_cmd *cmd, void *misc) = cmd->done;
+	void *misc = cmd->misc;
+	unsigned long flags;
+
+	/* Check for read case */
+	if (!fail && cmd->read) {
+		if (cmd->pdata[0] < 1)
+			fail = 1;
+		else
+			memcpy(cmd->info.data, &cmd->pdata[1],
+			       cmd->info.datalen);
+	}
+
+	DPRINTK("SMU: completing, success: %d\n", !fail);
+
+	/* Update status and mark no pending i2c command with lock
+	 * held so nobody comes in while we dequeue an eventual
+	 * pending next i2c command
+	 */
+	spin_lock_irqsave(&smu->lock, flags);
+	smu->cmd_i2c_cur = NULL;
+	wmb();
+	cmd->status = fail ? -EIO : 0;
+
+	/* Is there another i2c command waiting ? */
+	if (!list_empty(&smu->cmd_i2c_list)) {
+		struct smu_i2c_cmd *newcmd;
+
+		/* Fetch it, new current, remove from list */
+		newcmd = list_entry(smu->cmd_i2c_list.next,
+				    struct smu_i2c_cmd, link);
+		smu->cmd_i2c_cur = newcmd;
+		list_del(&cmd->link);
+
+		/* Queue with low level smu */
+		list_add_tail(&cmd->scmd.link, &smu->cmd_list);
+		if (smu->cmd_cur == NULL)
+			smu_start_cmd();
+	}
+	spin_unlock_irqrestore(&smu->lock, flags);
+
+	/* Call command completion handler if any */
+	if (done)
+		done(cmd, misc);
+
+}
+
+
+static void smu_i2c_retry(unsigned long data)
+{
+	struct smu_i2c_cmd	*cmd = (struct smu_i2c_cmd *)data;
+
+	DPRINTK("SMU: i2c failure, requeuing...\n");
+
+	/* requeue command simply by resetting reply_len */
+	cmd->pdata[0] = 0xff;
+	cmd->scmd.reply_len = 0x10;
+	smu_queue_cmd(&cmd->scmd);
+}
+
+
+static void smu_i2c_low_completion(struct smu_cmd *scmd, void *misc)
+{
+	struct smu_i2c_cmd	*cmd = misc;
+	int			fail = 0;
+
+	DPRINTK("SMU: i2c compl. stage=%d status=%x pdata[0]=%x rlen: %x\n",
+		cmd->stage, scmd->status, cmd->pdata[0], scmd->reply_len);
+
+	/* Check for possible status */
+	if (scmd->status < 0)
+		fail = 1;
+	else if (cmd->read) {
+		if (cmd->stage == 0)
+			fail = cmd->pdata[0] != 0;
+		else
+			fail = cmd->pdata[0] >= 0x80;
+	} else {
+		fail = cmd->pdata[0] != 0;
+	}
+
+	/* Handle failures by requeuing command, after 5ms interval
+	 */
+	if (fail && --cmd->retries > 0) {
+		DPRINTK("SMU: i2c failure, starting timer...\n");
+		smu->i2c_timer.function = smu_i2c_retry;
+		smu->i2c_timer.data = (unsigned long)cmd;
+		smu->i2c_timer.expires = jiffies + msecs_to_jiffies(5);
+		add_timer(&smu->i2c_timer);
+		return;
+	}
+
+	/* If failure or stage 1, command is complete */
+	if (fail || cmd->stage != 0) {
+		smu_i2c_complete_command(cmd, fail);
+		return;
+	}
+
+	DPRINTK("SMU: going to stage 1\n");
+
+	/* Ok, initial command complete, now poll status */
+	scmd->reply_buf = cmd->pdata;
+	scmd->reply_len = 0x10;
+	scmd->data_buf = cmd->pdata;
+	scmd->data_len = 1;
+	cmd->pdata[0] = 0;
+	cmd->stage = 1;
+	cmd->retries = 20;
+	smu_queue_cmd(scmd);
+}
+
+
+int smu_queue_i2c(struct smu_i2c_cmd *cmd)
+{
+	unsigned long flags;
+
+	if (smu == NULL)
+		return -ENODEV;
+
+	/* Fill most fields of scmd */
+	cmd->scmd.cmd = SMU_CMD_I2C_COMMAND;
+	cmd->scmd.done = smu_i2c_low_completion;
+	cmd->scmd.misc = cmd;
+	cmd->scmd.reply_buf = cmd->pdata;
+	cmd->scmd.reply_len = 0x10;
+	cmd->scmd.data_buf = (u8 *)(char *)&cmd->info;
+	cmd->scmd.status = 1;
+	cmd->stage = 0;
+	cmd->pdata[0] = 0xff;
+	cmd->retries = 20;
+	cmd->status = 1;
+
+	/* Check transfer type, sanitize some "info" fields
+	 * based on transfer type and do more checking
+	 */
+	cmd->info.caddr = cmd->info.devaddr;
+	cmd->read = cmd->info.devaddr & 0x01;
+	switch(cmd->info.type) {
+	case SMU_I2C_TRANSFER_SIMPLE:
+		memset(&cmd->info.sublen, 0, 4);
+		break;
+	case SMU_I2C_TRANSFER_COMBINED:
+		cmd->info.devaddr &= 0xfe;
+	case SMU_I2C_TRANSFER_STDSUB:
+		if (cmd->info.sublen > 3)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Finish setting up command based on transfer direction
+	 */
+	if (cmd->read) {
+		if (cmd->info.datalen > SMU_I2C_READ_MAX)
+			return -EINVAL;
+		memset(cmd->info.data, 0xff, cmd->info.datalen);
+		cmd->scmd.data_len = 9;
+	} else {
+		if (cmd->info.datalen > SMU_I2C_WRITE_MAX)
+			return -EINVAL;
+		cmd->scmd.data_len = 9 + cmd->info.datalen;
+	}
+
+	DPRINTK("SMU: i2c enqueuing command\n");
+	DPRINTK("SMU:   %s, len=%d bus=%x addr=%x sub0=%x type=%x\n",
+		cmd->read ? "read" : "write", cmd->info.datalen,
+		cmd->info.bus, cmd->info.caddr,
+		cmd->info.subaddr[0], cmd->info.type);
+
+
+	/* Enqueue command in i2c list, and if empty, enqueue also in
+	 * main command list
+	 */
+	spin_lock_irqsave(&smu->lock, flags);
+	if (smu->cmd_i2c_cur == NULL) {
+		smu->cmd_i2c_cur = cmd;
+		list_add_tail(&cmd->scmd.link, &smu->cmd_list);
+		if (smu->cmd_cur == NULL)
+			smu_start_cmd();
+	} else
+		list_add_tail(&cmd->link, &smu->cmd_i2c_list);
+	spin_unlock_irqrestore(&smu->lock, flags);
+
+	return 0;
+}
+
+
+
+/*
+ * Userland driver interface
+ */
+
+
+static LIST_HEAD(smu_clist);
+static DEFINE_SPINLOCK(smu_clist_lock);
+
+enum smu_file_mode {
+	smu_file_commands,
+	smu_file_events,
+	smu_file_closing
+};
+
+struct smu_private
+{
+	struct list_head	list;
+	enum smu_file_mode	mode;
+	int			busy;
+	struct smu_cmd		cmd;
+	spinlock_t		lock;
+	wait_queue_head_t	wait;
+	u8			buffer[SMU_MAX_DATA];
+};
+
+
+static int smu_open(struct inode *inode, struct file *file)
+{
+	struct smu_private *pp;
+	unsigned long flags;
+
+	pp = kmalloc(sizeof(struct smu_private), GFP_KERNEL);
+	if (pp == 0)
+		return -ENOMEM;
+	memset(pp, 0, sizeof(struct smu_private));
+	spin_lock_init(&pp->lock);
+	pp->mode = smu_file_commands;
+	init_waitqueue_head(&pp->wait);
+
+	spin_lock_irqsave(&smu_clist_lock, flags);
+	list_add(&pp->list, &smu_clist);
+	spin_unlock_irqrestore(&smu_clist_lock, flags);
+	file->private_data = pp;
+
+	return 0;
+}
+
+
+static void smu_user_cmd_done(struct smu_cmd *cmd, void *misc)
+{
+	struct smu_private *pp = misc;
+
+	wake_up_all(&pp->wait);
+}
+
+
+static ssize_t smu_write(struct file *file, const char __user *buf,
+			 size_t count, loff_t *ppos)
+{
+	struct smu_private *pp = file->private_data;
+	unsigned long flags;
+	struct smu_user_cmd_hdr hdr;
+	int rc = 0;
+
+	if (pp->busy)
+		return -EBUSY;
+	else if (copy_from_user(&hdr, buf, sizeof(hdr)))
+		return -EFAULT;
+	else if (hdr.cmdtype == SMU_CMDTYPE_WANTS_EVENTS) {
+		pp->mode = smu_file_events;
+		return 0;
+	} else if (hdr.cmdtype != SMU_CMDTYPE_SMU)
+		return -EINVAL;
+	else if (pp->mode != smu_file_commands)
+		return -EBADFD;
+	else if (hdr.data_len > SMU_MAX_DATA)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pp->lock, flags);
+	if (pp->busy) {
+		spin_unlock_irqrestore(&pp->lock, flags);
+		return -EBUSY;
+	}
+	pp->busy = 1;
+	pp->cmd.status = 1;
+	spin_unlock_irqrestore(&pp->lock, flags);
+
+	if (copy_from_user(pp->buffer, buf + sizeof(hdr), hdr.data_len)) {
+		pp->busy = 0;
+		return -EFAULT;
+	}
+
+	pp->cmd.cmd = hdr.cmd;
+	pp->cmd.data_len = hdr.data_len;
+	pp->cmd.reply_len = SMU_MAX_DATA;
+	pp->cmd.data_buf = pp->buffer;
+	pp->cmd.reply_buf = pp->buffer;
+	pp->cmd.done = smu_user_cmd_done;
+	pp->cmd.misc = pp;
+	rc = smu_queue_cmd(&pp->cmd);
+	if (rc < 0)
+		return rc;
+	return count;
+}
+
+
+static ssize_t smu_read_command(struct file *file, struct smu_private *pp,
+				char __user *buf, size_t count)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct smu_user_reply_hdr hdr;
+	unsigned long flags;
+	int size, rc = 0;
+
+	if (!pp->busy)
+		return 0;
+	if (count < sizeof(struct smu_user_reply_hdr))
+		return -EOVERFLOW;
+	spin_lock_irqsave(&pp->lock, flags);
+	if (pp->cmd.status == 1) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+		add_wait_queue(&pp->wait, &wait);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			rc = 0;
+			if (pp->cmd.status != 1)
+				break;
+			rc = -ERESTARTSYS;
+			if (signal_pending(current))
+				break;
+			spin_unlock_irqrestore(&pp->lock, flags);
+			schedule();
+			spin_lock_irqsave(&pp->lock, flags);
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pp->wait, &wait);
+	}
+	spin_unlock_irqrestore(&pp->lock, flags);
+	if (rc)
+		return rc;
+	if (pp->cmd.status != 0)
+		pp->cmd.reply_len = 0;
+	size = sizeof(hdr) + pp->cmd.reply_len;
+	if (count < size)
+		size = count;
+	rc = size;
+	hdr.status = pp->cmd.status;
+	hdr.reply_len = pp->cmd.reply_len;
+	if (copy_to_user(buf, &hdr, sizeof(hdr)))
+		return -EFAULT;
+	size -= sizeof(hdr);
+	if (size && copy_to_user(buf + sizeof(hdr), pp->buffer, size))
+		return -EFAULT;
+	pp->busy = 0;
+
+	return rc;
+}
+
+
+static ssize_t smu_read_events(struct file *file, struct smu_private *pp,
+			       char __user *buf, size_t count)
+{
+	/* Not implemented */
+	msleep_interruptible(1000);
+	return 0;
+}
+
+
+static ssize_t smu_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
+{
+	struct smu_private *pp = file->private_data;
+
+	if (pp->mode == smu_file_commands)
+		return smu_read_command(file, pp, buf, count);
+	if (pp->mode == smu_file_events)
+		return smu_read_events(file, pp, buf, count);
+
+	return -EBADFD;
+}
+
+static unsigned int smu_fpoll(struct file *file, poll_table *wait)
+{
+	struct smu_private *pp = file->private_data;
+	unsigned int mask = 0;
+	unsigned long flags;
+
+	if (pp == 0)
+		return 0;
+
+	if (pp->mode == smu_file_commands) {
+		poll_wait(file, &pp->wait, wait);
+
+		spin_lock_irqsave(&pp->lock, flags);
+		if (pp->busy && pp->cmd.status != 1)
+			mask |= POLLIN;
+		spin_unlock_irqrestore(&pp->lock, flags);
+	} if (pp->mode == smu_file_events) {
+		/* Not yet implemented */
+	}
+	return mask;
+}
+
+static int smu_release(struct inode *inode, struct file *file)
+{
+	struct smu_private *pp = file->private_data;
+	unsigned long flags;
+	unsigned int busy;
+
+	if (pp == 0)
+		return 0;
+
+	file->private_data = NULL;
+
+	/* Mark file as closing to avoid races with new request */
+	spin_lock_irqsave(&pp->lock, flags);
+	pp->mode = smu_file_closing;
+	busy = pp->busy;
+
+	/* Wait for any pending request to complete */
+	if (busy && pp->cmd.status == 1) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		add_wait_queue(&pp->wait, &wait);
+		for (;;) {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			if (pp->cmd.status != 1)
+				break;
+			spin_lock_irqsave(&pp->lock, flags);
+			schedule();
+			spin_unlock_irqrestore(&pp->lock, flags);
+		}
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pp->wait, &wait);
+	}
+	spin_unlock_irqrestore(&pp->lock, flags);
+
+	spin_lock_irqsave(&smu_clist_lock, flags);
+	list_del(&pp->list);
+	spin_unlock_irqrestore(&smu_clist_lock, flags);
+	kfree(pp);
+
+	return 0;
+}
+
+
+static struct file_operations smu_device_fops __pmacdata = {
+	.llseek		= no_llseek,
+	.read		= smu_read,
+	.write		= smu_write,
+	.poll		= smu_fpoll,
+	.open		= smu_open,
+	.release	= smu_release,
+};
+
+static struct miscdevice pmu_device __pmacdata = {
+	MISC_DYNAMIC_MINOR, "smu", &smu_device_fops
+};
+
+static int smu_device_init(void)
+{
+	if (!smu)
+		return -ENODEV;
+	if (misc_register(&pmu_device) < 0)
+		printk(KERN_ERR "via-pmu: cannot register misc device.\n");
+	return 0;
+}
+device_initcall(smu_device_init);
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index c9ca1118e44..f38696622eb 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -599,7 +599,7 @@ thermostat_init(void)
 		sensor_location[2] = "?";
 	}
 
-	of_dev = of_platform_device_create(np, "temperatures");
+	of_dev = of_platform_device_create(np, "temperatures", NULL);
 	
 	if (of_dev == NULL) {
 		printk(KERN_ERR "Can't register temperatures device !\n");
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 703e3197331..cc507ceef15 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -2051,7 +2051,7 @@ static int __init therm_pm72_init(void)
 			    return -ENODEV;
 		}
 	}
-	of_dev = of_platform_device_create(np, "temperature");
+	of_dev = of_platform_device_create(np, "temperature", NULL);
 	if (of_dev == NULL) {
 		printk(KERN_ERR "Can't register FCU platform device !\n");
 		return -ENODEV;
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index cbb72eb0426..6aaa1df1a64 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -504,7 +504,7 @@ g4fan_init( void )
 	}
 	if( !(np=of_find_node_by_name(NULL, "fan")) )
 		return -ENODEV;
-	x.of_dev = of_platform_device_create( np, "temperature" );
+	x.of_dev = of_platform_device_create(np, "temperature", NULL);
 	of_node_put( np );
 
 	if( !x.of_dev ) {
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 76719451e38..50346188452 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -405,7 +405,7 @@ static int __init via_pmu_start(void)
 	bright_req_2.complete = 1;
 	batt_req.complete = 1;
 
-#ifdef CONFIG_PPC32
+#if defined(CONFIG_PPC32) && !defined(CONFIG_PPC_MERGE)
 	if (pmu_kind == PMU_KEYLARGO_BASED)
 		openpic_set_irq_priority(vias->intrs[0].line,
 					 OPENPIC_PRIORITY_DEFAULT + 1);
diff --git a/drivers/media/video/bttv-driver.c b/drivers/media/video/bttv-driver.c
index a564321db2f..c062a017491 100644
--- a/drivers/media/video/bttv-driver.c
+++ b/drivers/media/video/bttv-driver.c
@@ -763,21 +763,21 @@ static void set_pll(struct bttv *btv)
                 /* no PLL needed */
                 if (btv->pll.pll_current == 0)
                         return;
-		vprintk(KERN_INFO "bttv%d: PLL can sleep, using XTAL (%d).\n",
-			btv->c.nr,btv->pll.pll_ifreq);
+		bttv_printk(KERN_INFO "bttv%d: PLL can sleep, using XTAL (%d).\n",
+                           btv->c.nr,btv->pll.pll_ifreq);
                 btwrite(0x00,BT848_TGCTRL);
                 btwrite(0x00,BT848_PLL_XCI);
                 btv->pll.pll_current = 0;
                 return;
         }
 
-	vprintk(KERN_INFO "bttv%d: PLL: %d => %d ",btv->c.nr,
-		btv->pll.pll_ifreq, btv->pll.pll_ofreq);
+	bttv_printk(KERN_INFO "bttv%d: PLL: %d => %d ",btv->c.nr,
+                   btv->pll.pll_ifreq, btv->pll.pll_ofreq);
 	set_pll_freq(btv, btv->pll.pll_ifreq, btv->pll.pll_ofreq);
 
         for (i=0; i<10; i++) {
 		/*  Let other people run while the PLL stabilizes */
-		vprintk(".");
+		bttv_printk(".");
 		msleep(10);
 
                 if (btread(BT848_DSTATUS) & BT848_DSTATUS_PLOCK) {
@@ -785,12 +785,12 @@ static void set_pll(struct bttv *btv)
                 } else {
                         btwrite(0x08,BT848_TGCTRL);
                         btv->pll.pll_current = btv->pll.pll_ofreq;
-			vprintk(" ok\n");
+			bttv_printk(" ok\n");
                         return;
                 }
         }
         btv->pll.pll_current = -1;
-	vprintk("failed\n");
+	bttv_printk("failed\n");
         return;
 }
 
diff --git a/drivers/media/video/bttvp.h b/drivers/media/video/bttvp.h
index 9b0b7ca035f..7a312f79340 100644
--- a/drivers/media/video/bttvp.h
+++ b/drivers/media/video/bttvp.h
@@ -221,7 +221,7 @@ extern void bttv_gpio_tracking(struct bttv *btv, char *comment);
 extern int init_bttv_i2c(struct bttv *btv);
 extern int fini_bttv_i2c(struct bttv *btv);
 
-#define vprintk  if (bttv_verbose) printk
+#define bttv_printk if (bttv_verbose) printk
 #define dprintk  if (bttv_debug >= 1) printk
 #define d2printk if (bttv_debug >= 2) printk
 
diff --git a/drivers/message/fusion/Kconfig b/drivers/message/fusion/Kconfig
index 33f209a39cb..1883d22cffe 100644
--- a/drivers/message/fusion/Kconfig
+++ b/drivers/message/fusion/Kconfig
@@ -35,6 +35,23 @@ config FUSION_FC
 	  LSIFC929X
 	  LSIFC929XL
 
+config FUSION_SAS
+	tristate "Fusion MPT ScsiHost drivers for SAS"
+	depends on PCI && SCSI
+ 	select FUSION
+	select SCSI_SAS_ATTRS
+	---help---
+	  SCSI HOST support for a SAS host adapters.
+
+	  List of supported controllers:
+
+	  LSISAS1064
+	  LSISAS1066
+	  LSISAS1068
+	  LSISAS1064E
+	  LSISAS1066E
+	  LSISAS1068E
+
 config FUSION_MAX_SGE
 	int "Maximum number of scatter gather entries (16 - 128)"
 	depends on FUSION
diff --git a/drivers/message/fusion/Makefile b/drivers/message/fusion/Makefile
index 1d2f9db813c..8a2e2657f4c 100644
--- a/drivers/message/fusion/Makefile
+++ b/drivers/message/fusion/Makefile
@@ -34,5 +34,6 @@
 
 obj-$(CONFIG_FUSION_SPI)	+= mptbase.o mptscsih.o mptspi.o
 obj-$(CONFIG_FUSION_FC)		+= mptbase.o mptscsih.o mptfc.o
+obj-$(CONFIG_FUSION_SAS)	+= mptbase.o mptscsih.o mptsas.o
 obj-$(CONFIG_FUSION_CTL)	+= mptctl.o
 obj-$(CONFIG_FUSION_LAN)	+= mptlan.o
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index f517d0692d5..790a2932ded 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -135,13 +135,12 @@ static void	mpt_adapter_dispose(MPT_ADAPTER *ioc);
 
 static void	MptDisplayIocCapabilities(MPT_ADAPTER *ioc);
 static int	MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag);
-//static u32	mpt_GetIocState(MPT_ADAPTER *ioc, int cooked);
 static int	GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason);
 static int	GetPortFacts(MPT_ADAPTER *ioc, int portnum, int sleepFlag);
 static int	SendIocInit(MPT_ADAPTER *ioc, int sleepFlag);
 static int	SendPortEnable(MPT_ADAPTER *ioc, int portnum, int sleepFlag);
 static int	mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag);
-static int	mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag);
+static int	mpt_downloadboot(MPT_ADAPTER *ioc, MpiFwHeader_t *pFwHeader, int sleepFlag);
 static int	mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag);
 static int	KickStart(MPT_ADAPTER *ioc, int ignore, int sleepFlag);
 static int	SendIocReset(MPT_ADAPTER *ioc, u8 reset_type, int sleepFlag);
@@ -152,6 +151,7 @@ static int	WaitForDoorbellReply(MPT_ADAPTER *ioc, int howlong, int sleepFlag);
 static int	GetLanConfigPages(MPT_ADAPTER *ioc);
 static int	GetFcPortPage0(MPT_ADAPTER *ioc, int portnum);
 static int	GetIoUnitPage2(MPT_ADAPTER *ioc);
+int		mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode);
 static int	mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum);
 static int	mpt_readScsiDevicePageHeaders(MPT_ADAPTER *ioc, int portnum);
 static void 	mpt_read_ioc_pg_1(MPT_ADAPTER *ioc);
@@ -159,6 +159,8 @@ static void 	mpt_read_ioc_pg_4(MPT_ADAPTER *ioc);
 static void	mpt_timer_expired(unsigned long data);
 static int	SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch);
 static int	SendEventAck(MPT_ADAPTER *ioc, EventNotificationReply_t *evnp);
+static int	mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_value, int sleepFlag);
+static int	mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init);
 
 #ifdef CONFIG_PROC_FS
 static int	procmpt_summary_read(char *buf, char **start, off_t offset,
@@ -175,6 +177,7 @@ static int	ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *
 static void	mpt_sp_ioc_info(MPT_ADAPTER *ioc, u32 ioc_status, MPT_FRAME_HDR *mf);
 static void	mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info);
 static void	mpt_sp_log_info(MPT_ADAPTER *ioc, u32 log_info);
+static void	mpt_sas_log_info(MPT_ADAPTER *ioc, u32 log_info);
 
 /* module entry point */
 static int  __init    fusion_init  (void);
@@ -206,6 +209,144 @@ pci_enable_io_access(struct pci_dev *pdev)
 	pci_write_config_word(pdev, PCI_COMMAND, command_reg);
 }
 
+/*
+ *  Process turbo (context) reply...
+ */
+static void
+mpt_turbo_reply(MPT_ADAPTER *ioc, u32 pa)
+{
+	MPT_FRAME_HDR *mf = NULL;
+	MPT_FRAME_HDR *mr = NULL;
+	int req_idx = 0;
+	int cb_idx;
+
+	dmfprintk((MYIOC_s_INFO_FMT "Got TURBO reply req_idx=%08x\n",
+				ioc->name, pa));
+
+	switch (pa >> MPI_CONTEXT_REPLY_TYPE_SHIFT) {
+	case MPI_CONTEXT_REPLY_TYPE_SCSI_INIT:
+		req_idx = pa & 0x0000FFFF;
+		cb_idx = (pa & 0x00FF0000) >> 16;
+		mf = MPT_INDEX_2_MFPTR(ioc, req_idx);
+		break;
+	case MPI_CONTEXT_REPLY_TYPE_LAN:
+		cb_idx = mpt_lan_index;
+		/*
+		 *  Blind set of mf to NULL here was fatal
+		 *  after lan_reply says "freeme"
+		 *  Fix sort of combined with an optimization here;
+		 *  added explicit check for case where lan_reply
+		 *  was just returning 1 and doing nothing else.
+		 *  For this case skip the callback, but set up
+		 *  proper mf value first here:-)
+		 */
+		if ((pa & 0x58000000) == 0x58000000) {
+			req_idx = pa & 0x0000FFFF;
+			mf = MPT_INDEX_2_MFPTR(ioc, req_idx);
+			mpt_free_msg_frame(ioc, mf);
+			mb();
+			return;
+			break;
+		}
+		mr = (MPT_FRAME_HDR *) CAST_U32_TO_PTR(pa);
+		break;
+	case MPI_CONTEXT_REPLY_TYPE_SCSI_TARGET:
+		cb_idx = mpt_stm_index;
+		mr = (MPT_FRAME_HDR *) CAST_U32_TO_PTR(pa);
+		break;
+	default:
+		cb_idx = 0;
+		BUG();
+	}
+
+	/*  Check for (valid) IO callback!  */
+	if (cb_idx < 1 || cb_idx >= MPT_MAX_PROTOCOL_DRIVERS ||
+			MptCallbacks[cb_idx] == NULL) {
+		printk(MYIOC_s_WARN_FMT "%s: Invalid cb_idx (%d)!\n",
+				__FUNCTION__, ioc->name, cb_idx);
+		goto out;
+	}
+
+	if (MptCallbacks[cb_idx](ioc, mf, mr))
+		mpt_free_msg_frame(ioc, mf);
+ out:
+	mb();
+}
+
+static void
+mpt_reply(MPT_ADAPTER *ioc, u32 pa)
+{
+	MPT_FRAME_HDR	*mf;
+	MPT_FRAME_HDR	*mr;
+	int		 req_idx;
+	int		 cb_idx;
+	int		 freeme;
+
+	u32 reply_dma_low;
+	u16 ioc_stat;
+
+	/* non-TURBO reply!  Hmmm, something may be up...
+	 *  Newest turbo reply mechanism; get address
+	 *  via left shift 1 (get rid of MPI_ADDRESS_REPLY_A_BIT)!
+	 */
+
+	/* Map DMA address of reply header to cpu address.
+	 * pa is 32 bits - but the dma address may be 32 or 64 bits
+	 * get offset based only only the low addresses
+	 */
+
+	reply_dma_low = (pa <<= 1);
+	mr = (MPT_FRAME_HDR *)((u8 *)ioc->reply_frames +
+			 (reply_dma_low - ioc->reply_frames_low_dma));
+
+	req_idx = le16_to_cpu(mr->u.frame.hwhdr.msgctxu.fld.req_idx);
+	cb_idx = mr->u.frame.hwhdr.msgctxu.fld.cb_idx;
+	mf = MPT_INDEX_2_MFPTR(ioc, req_idx);
+
+	dmfprintk((MYIOC_s_INFO_FMT "Got non-TURBO reply=%p req_idx=%x cb_idx=%x Function=%x\n",
+			ioc->name, mr, req_idx, cb_idx, mr->u.hdr.Function));
+	DBG_DUMP_REPLY_FRAME(mr)
+
+	 /*  Check/log IOC log info
+	 */
+	ioc_stat = le16_to_cpu(mr->u.reply.IOCStatus);
+	if (ioc_stat & MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {
+		u32	 log_info = le32_to_cpu(mr->u.reply.IOCLogInfo);
+		if (ioc->bus_type == FC)
+			mpt_fc_log_info(ioc, log_info);
+		else if (ioc->bus_type == SCSI)
+			mpt_sp_log_info(ioc, log_info);
+		else if (ioc->bus_type == SAS)
+			mpt_sas_log_info(ioc, log_info);
+	}
+	if (ioc_stat & MPI_IOCSTATUS_MASK) {
+		if (ioc->bus_type == SCSI &&
+		    cb_idx != mpt_stm_index &&
+		    cb_idx != mpt_lan_index)
+			mpt_sp_ioc_info(ioc, (u32)ioc_stat, mf);
+	}
+
+
+	/*  Check for (valid) IO callback!  */
+	if (cb_idx < 1 || cb_idx >= MPT_MAX_PROTOCOL_DRIVERS ||
+			MptCallbacks[cb_idx] == NULL) {
+		printk(MYIOC_s_WARN_FMT "%s: Invalid cb_idx (%d)!\n",
+				__FUNCTION__, ioc->name, cb_idx);
+		freeme = 0;
+		goto out;
+	}
+
+	freeme = MptCallbacks[cb_idx](ioc, mf, mr);
+
+ out:
+	/*  Flush (non-TURBO) reply with a WRITE!  */
+	CHIPREG_WRITE32(&ioc->chip->ReplyFifo, pa);
+
+	if (freeme)
+		mpt_free_msg_frame(ioc, mf);
+	mb();
+}
+
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
  *	mpt_interrupt - MPT adapter (IOC) specific interrupt handler.
@@ -227,164 +368,21 @@ pci_enable_io_access(struct pci_dev *pdev)
 static irqreturn_t
 mpt_interrupt(int irq, void *bus_id, struct pt_regs *r)
 {
-	MPT_ADAPTER	*ioc;
-	MPT_FRAME_HDR	*mf;
-	MPT_FRAME_HDR	*mr;
-	u32		 pa;
-	int		 req_idx;
-	int		 cb_idx;
-	int		 type;
-	int		 freeme;
-
-	ioc = (MPT_ADAPTER *)bus_id;
+	MPT_ADAPTER *ioc = bus_id;
+	u32 pa;
 
 	/*
 	 *  Drain the reply FIFO!
-	 *
-	 * NOTES: I've seen up to 10 replies processed in this loop, so far...
-	 * Update: I've seen up to 9182 replies processed in this loop! ??
-	 * Update: Limit ourselves to processing max of N replies
-	 *	(bottom of loop).
 	 */
 	while (1) {
-
-		if ((pa = CHIPREG_READ32_dmasync(&ioc->chip->ReplyFifo)) == 0xFFFFFFFF)
+		pa = CHIPREG_READ32_dmasync(&ioc->chip->ReplyFifo);
+		if (pa == 0xFFFFFFFF)
 			return IRQ_HANDLED;
-
-		cb_idx = 0;
-		freeme = 0;
-
-		/*
-		 *  Check for non-TURBO reply!
-		 */
-		if (pa & MPI_ADDRESS_REPLY_A_BIT) {
-			u32 reply_dma_low;
-			u16 ioc_stat;
-
-			/* non-TURBO reply!  Hmmm, something may be up...
-			 *  Newest turbo reply mechanism; get address
-			 *  via left shift 1 (get rid of MPI_ADDRESS_REPLY_A_BIT)!
-			 */
-
-			/* Map DMA address of reply header to cpu address.
-			 * pa is 32 bits - but the dma address may be 32 or 64 bits
-			 * get offset based only only the low addresses
-			 */
-			reply_dma_low = (pa = (pa << 1));
-			mr = (MPT_FRAME_HDR *)((u8 *)ioc->reply_frames +
-					 (reply_dma_low - ioc->reply_frames_low_dma));
-
-			req_idx = le16_to_cpu(mr->u.frame.hwhdr.msgctxu.fld.req_idx);
-			cb_idx = mr->u.frame.hwhdr.msgctxu.fld.cb_idx;
-			mf = MPT_INDEX_2_MFPTR(ioc, req_idx);
-
-			dmfprintk((MYIOC_s_INFO_FMT "Got non-TURBO reply=%p req_idx=%x cb_idx=%x Function=%x\n",
-					ioc->name, mr, req_idx, cb_idx, mr->u.hdr.Function));
-			DBG_DUMP_REPLY_FRAME(mr)
-
-			 /*  Check/log IOC log info
-			 */
-			ioc_stat = le16_to_cpu(mr->u.reply.IOCStatus);
-			if (ioc_stat & MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {
-				u32	 log_info = le32_to_cpu(mr->u.reply.IOCLogInfo);
-				if (ioc->bus_type == FC)
-					mpt_fc_log_info(ioc, log_info);
-				else if (ioc->bus_type == SCSI)
-					mpt_sp_log_info(ioc, log_info);
-			}
-			if (ioc_stat & MPI_IOCSTATUS_MASK) {
-				if (ioc->bus_type == SCSI)
-					mpt_sp_ioc_info(ioc, (u32)ioc_stat, mf);
-			}
-		} else {
-			/*
-			 *  Process turbo (context) reply...
-			 */
-			dmfprintk((MYIOC_s_INFO_FMT "Got TURBO reply req_idx=%08x\n", ioc->name, pa));
-			type = (pa >> MPI_CONTEXT_REPLY_TYPE_SHIFT);
-			if (type == MPI_CONTEXT_REPLY_TYPE_SCSI_TARGET) {
-				cb_idx = mpt_stm_index;
-				mf = NULL;
-				mr = (MPT_FRAME_HDR *) CAST_U32_TO_PTR(pa);
-			} else if (type == MPI_CONTEXT_REPLY_TYPE_LAN) {
-				cb_idx = mpt_lan_index;
-				 /*  Blind set of mf to NULL here was fatal
-				 *  after lan_reply says "freeme"
-				 *  Fix sort of combined with an optimization here;
-				 *  added explicit check for case where lan_reply
-				 *  was just returning 1 and doing nothing else.
-				 *  For this case skip the callback, but set up
-				 *  proper mf value first here:-)
-				 */
-				if ((pa & 0x58000000) == 0x58000000) {
-					req_idx = pa & 0x0000FFFF;
-					mf = MPT_INDEX_2_MFPTR(ioc, req_idx);
-					freeme = 1;
-					/*
-					 *  IMPORTANT!  Invalidate the callback!
-					 */
-					cb_idx = 0;
-				} else {
-					mf = NULL;
-				}
-				mr = (MPT_FRAME_HDR *) CAST_U32_TO_PTR(pa);
-			} else {
-				req_idx = pa & 0x0000FFFF;
-				cb_idx = (pa & 0x00FF0000) >> 16;
-				mf = MPT_INDEX_2_MFPTR(ioc, req_idx);
-				mr = NULL;
-			}
-			pa = 0;					/* No reply flush! */
-		}
-
-#ifdef MPT_DEBUG_IRQ
-		if (ioc->bus_type == SCSI) {
-			/* Verify mf, mr are reasonable.
-			 */
-			if ((mf) && ((mf >= MPT_INDEX_2_MFPTR(ioc, ioc->req_depth))
-				|| (mf < ioc->req_frames)) ) {
-				printk(MYIOC_s_WARN_FMT
-					"mpt_interrupt: Invalid mf (%p)!\n", ioc->name, (void *)mf);
-				cb_idx = 0;
-				pa = 0;
-				freeme = 0;
-			}
-			if ((pa) && (mr) && ((mr >= MPT_INDEX_2_RFPTR(ioc, ioc->req_depth))
-				|| (mr < ioc->reply_frames)) ) {
-				printk(MYIOC_s_WARN_FMT
-					"mpt_interrupt: Invalid rf (%p)!\n", ioc->name, (void *)mr);
-				cb_idx = 0;
-				pa = 0;
-				freeme = 0;
-			}
-			if (cb_idx > (MPT_MAX_PROTOCOL_DRIVERS-1)) {
-				printk(MYIOC_s_WARN_FMT
-					"mpt_interrupt: Invalid cb_idx (%d)!\n", ioc->name, cb_idx);
-				cb_idx = 0;
-				pa = 0;
-				freeme = 0;
-			}
-		}
-#endif
-
-		/*  Check for (valid) IO callback!  */
-		if (cb_idx) {
-			/*  Do the callback!  */
-			freeme = (*(MptCallbacks[cb_idx]))(ioc, mf, mr);
-		}
-
-		if (pa) {
-			/*  Flush (non-TURBO) reply with a WRITE!  */
-			CHIPREG_WRITE32(&ioc->chip->ReplyFifo, pa);
-		}
-
-		if (freeme) {
-			/*  Put Request back on FreeQ!  */
-			mpt_free_msg_frame(ioc, mf);
-		}
-
-		mb();
-	}	/* drain reply FIFO */
+		else if (pa & MPI_ADDRESS_REPLY_A_BIT)
+			mpt_reply(ioc, pa);
+		else
+			mpt_turbo_reply(ioc, pa);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -509,6 +507,14 @@ mpt_base_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *reply)
 			pCfg->wait_done = 1;
 			wake_up(&mpt_waitq);
 		}
+	} else if (func == MPI_FUNCTION_SAS_IO_UNIT_CONTROL) {
+		/* we should be always getting a reply frame */
+		memcpy(ioc->persist_reply_frame, reply,
+		    min(MPT_DEFAULT_FRAME_SIZE,
+		    4*reply->u.reply.MsgLength));
+		del_timer(&ioc->persist_timer);
+		ioc->persist_wait_done = 1;
+		wake_up(&mpt_waitq);
 	} else {
 		printk(MYIOC_s_ERR_FMT "Unexpected msg function (=%02Xh) reply received!\n",
 				ioc->name, func);
@@ -750,6 +756,7 @@ mpt_get_msg_frame(int handle, MPT_ADAPTER *ioc)
 		mf = list_entry(ioc->FreeQ.next, MPT_FRAME_HDR,
 				u.frame.linkage.list);
 		list_del(&mf->u.frame.linkage.list);
+		mf->u.frame.linkage.arg1 = 0;
 		mf->u.frame.hwhdr.msgctxu.fld.cb_idx = handle;	/* byte */
 		req_offset = (u8 *)mf - (u8 *)ioc->req_frames;
 								/* u16! */
@@ -845,6 +852,7 @@ mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
 
 	/*  Put Request back on FreeQ!  */
 	spin_lock_irqsave(&ioc->FreeQlock, flags);
+	mf->u.frame.linkage.arg1 = 0xdeadbeaf; /* signature to know if this mf is freed */
 	list_add_tail(&mf->u.frame.linkage.list, &ioc->FreeQ);
 #ifdef MFCNT
 	ioc->mfcnt--;
@@ -971,12 +979,123 @@ mpt_send_handshake_request(int handle, MPT_ADAPTER *ioc, int reqBytes, u32 *req,
 
 	/* Make sure there are no doorbells */
 	CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
-	
+
 	return r;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
+ * mpt_host_page_access_control - provides mechanism for the host
+ * driver to control the IOC's Host Page Buffer access.
+ * @ioc: Pointer to MPT adapter structure
+ * @access_control_value: define bits below
+ *
+ * Access Control Value - bits[15:12]
+ * 0h Reserved
+ * 1h Enable Access { MPI_DB_HPBAC_ENABLE_ACCESS }
+ * 2h Disable Access { MPI_DB_HPBAC_DISABLE_ACCESS }
+ * 3h Free Buffer { MPI_DB_HPBAC_FREE_BUFFER }
+ *
+ * Returns 0 for success, non-zero for failure.
+ */
+
+static int
+mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_value, int sleepFlag)
+{
+	int	 r = 0;
+
+	/* return if in use */
+	if (CHIPREG_READ32(&ioc->chip->Doorbell)
+	    & MPI_DOORBELL_ACTIVE)
+	    return -1;
+
+	CHIPREG_WRITE32(&ioc->chip->IntStatus, 0);
+
+	CHIPREG_WRITE32(&ioc->chip->Doorbell,
+		((MPI_FUNCTION_HOST_PAGEBUF_ACCESS_CONTROL
+		 <<MPI_DOORBELL_FUNCTION_SHIFT) |
+		 (access_control_value<<12)));
+
+	/* Wait for IOC to clear Doorbell Status bit */
+	if ((r = WaitForDoorbellAck(ioc, 5, sleepFlag)) < 0) {
+		return -2;
+	}else
+		return 0;
+}
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/**
+ *	mpt_host_page_alloc - allocate system memory for the fw
+ *	If we already allocated memory in past, then resend the same pointer.
+ *	ioc@: Pointer to pointer to IOC adapter
+ *	ioc_init@: Pointer to ioc init config page
+ *
+ *	Returns 0 for success, non-zero for failure.
+ */
+static int
+mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init)
+{
+	char	*psge;
+	int	flags_length;
+	u32	host_page_buffer_sz=0;
+
+	if(!ioc->HostPageBuffer) {
+
+		host_page_buffer_sz =
+		    le32_to_cpu(ioc->facts.HostPageBufferSGE.FlagsLength) & 0xFFFFFF;
+
+		if(!host_page_buffer_sz)
+			return 0; /* fw doesn't need any host buffers */
+
+		/* spin till we get enough memory */
+		while(host_page_buffer_sz > 0) {
+
+			if((ioc->HostPageBuffer = pci_alloc_consistent(
+			    ioc->pcidev,
+			    host_page_buffer_sz,
+			    &ioc->HostPageBuffer_dma)) != NULL) {
+
+				dinitprintk((MYIOC_s_INFO_FMT
+				    "host_page_buffer @ %p, dma @ %x, sz=%d bytes\n",
+				    ioc->name,
+				    ioc->HostPageBuffer,
+				    ioc->HostPageBuffer_dma,
+				    host_page_buffer_sz));
+				ioc->alloc_total += host_page_buffer_sz;
+				ioc->HostPageBuffer_sz = host_page_buffer_sz;
+				break;
+			}
+
+			host_page_buffer_sz -= (4*1024);
+		}
+	}
+
+	if(!ioc->HostPageBuffer) {
+		printk(MYIOC_s_ERR_FMT
+		    "Failed to alloc memory for host_page_buffer!\n",
+		    ioc->name);
+		return -999;
+	}
+
+	psge = (char *)&ioc_init->HostPageBufferSGE;
+	flags_length = MPI_SGE_FLAGS_SIMPLE_ELEMENT |
+	    MPI_SGE_FLAGS_SYSTEM_ADDRESS |
+	    MPI_SGE_FLAGS_32_BIT_ADDRESSING |
+	    MPI_SGE_FLAGS_HOST_TO_IOC |
+	    MPI_SGE_FLAGS_END_OF_BUFFER;
+	if (sizeof(dma_addr_t) == sizeof(u64)) {
+	    flags_length |= MPI_SGE_FLAGS_64_BIT_ADDRESSING;
+	}
+	flags_length = flags_length << MPI_SGE_FLAGS_SHIFT;
+	flags_length |= ioc->HostPageBuffer_sz;
+	mpt_add_sge(psge, flags_length, ioc->HostPageBuffer_dma);
+	ioc->facts.HostPageBufferSGE = ioc_init->HostPageBufferSGE;
+
+return 0;
+}
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/**
  *	mpt_verify_adapter - Given a unique IOC identifier, set pointer to
  *	the associated MPT adapter structure.
  *	@iocid: IOC unique identifier (integer)
@@ -1084,7 +1203,7 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	/* Initilize SCSI Config Data structure
 	 */
-	memset(&ioc->spi_data, 0, sizeof(ScsiCfgData));
+	memset(&ioc->spi_data, 0, sizeof(SpiCfgData));
 
 	/* Initialize the running configQ head.
 	 */
@@ -1213,6 +1332,33 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id)
 		ioc->prod_name = "LSI53C1035";
 		ioc->bus_type = SCSI;
 	}
+	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1064) {
+		ioc->prod_name = "LSISAS1064";
+		ioc->bus_type = SAS;
+		ioc->errata_flag_1064 = 1;
+	}
+	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1066) {
+		ioc->prod_name = "LSISAS1066";
+		ioc->bus_type = SAS;
+		ioc->errata_flag_1064 = 1;
+	}
+	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1068) {
+		ioc->prod_name = "LSISAS1068";
+		ioc->bus_type = SAS;
+		ioc->errata_flag_1064 = 1;
+	}
+	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1064E) {
+		ioc->prod_name = "LSISAS1064E";
+		ioc->bus_type = SAS;
+	}
+	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1066E) {
+		ioc->prod_name = "LSISAS1066E";
+		ioc->bus_type = SAS;
+	}
+	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1068E) {
+		ioc->prod_name = "LSISAS1068E";
+		ioc->bus_type = SAS;
+	}
 
 	if (ioc->errata_flag_1064)
 		pci_disable_io_access(pdev);
@@ -1604,8 +1750,23 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 			 */
 			if (ret == 0) {
 				rc = mpt_do_upload(ioc, sleepFlag);
-				if (rc != 0)
+				if (rc == 0) {
+					if (ioc->alt_ioc && ioc->alt_ioc->cached_fw) {
+						/*
+						 * Maintain only one pointer to FW memory
+						 * so there will not be two attempt to
+						 * downloadboot onboard dual function
+						 * chips (mpt_adapter_disable,
+						 * mpt_diag_reset)
+						 */
+						ioc->cached_fw = NULL;
+						ddlprintk((MYIOC_s_INFO_FMT ": mpt_upload:  alt_%s has cached_fw=%p \n",
+							ioc->name, ioc->alt_ioc->name, ioc->alt_ioc->cached_fw));
+					}
+				} else {
 					printk(KERN_WARNING MYNAM ": firmware upload failure!\n");
+					ret = -5;
+				}
 			}
 		}
 	}
@@ -1640,7 +1801,22 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag)
 	 *	and we try GetLanConfigPages again...
 	 */
 	if ((ret == 0) && (reason == MPT_HOSTEVENT_IOC_BRINGUP)) {
-		if (ioc->bus_type == FC) {
+		if (ioc->bus_type == SAS) {
+
+			/* clear persistency table */
+			if(ioc->facts.IOCExceptions &
+			    MPI_IOCFACTS_EXCEPT_PERSISTENT_TABLE_FULL) {
+				ret = mptbase_sas_persist_operation(ioc,
+				    MPI_SAS_OP_CLEAR_NOT_PRESENT);
+				if(ret != 0)
+					return -1;
+			}
+
+			/* Find IM volumes
+			 */
+			mpt_findImVolumes(ioc);
+
+		} else if (ioc->bus_type == FC) {
 			/*
 			 *  Pre-fetch FC port WWN and stuff...
 			 *  (FCPortPage0_t stuff)
@@ -1783,7 +1959,7 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
 
 	if (ioc->cached_fw != NULL) {
 		ddlprintk((KERN_INFO MYNAM ": mpt_adapter_disable: Pushing FW onto adapter\n"));
-		if ((ret = mpt_downloadboot(ioc, NO_SLEEP)) < 0) {
+		if ((ret = mpt_downloadboot(ioc, (MpiFwHeader_t *)ioc->cached_fw, NO_SLEEP)) < 0) {
 			printk(KERN_WARNING MYNAM
 				": firmware downloadboot failure (%d)!\n", ret);
 		}
@@ -1831,9 +2007,9 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
 	}
 
 	kfree(ioc->spi_data.nvram);
-	kfree(ioc->spi_data.pIocPg3);
+	kfree(ioc->raid_data.pIocPg3);
 	ioc->spi_data.nvram = NULL;
-	ioc->spi_data.pIocPg3 = NULL;
+	ioc->raid_data.pIocPg3 = NULL;
 
 	if (ioc->spi_data.pIocPg4 != NULL) {
 		sz = ioc->spi_data.IocPg4Sz;
@@ -1852,6 +2028,23 @@ mpt_adapter_disable(MPT_ADAPTER *ioc)
 
 	kfree(ioc->ChainToChain);
 	ioc->ChainToChain = NULL;
+
+	if (ioc->HostPageBuffer != NULL) {
+		if((ret = mpt_host_page_access_control(ioc,
+		    MPI_DB_HPBAC_FREE_BUFFER, NO_SLEEP)) != 0) {
+			printk(KERN_ERR MYNAM
+			   ": %s: host page buffers free failed (%d)!\n",
+			    __FUNCTION__, ret);
+		}
+		dexitprintk((KERN_INFO MYNAM ": %s HostPageBuffer free  @ %p, sz=%d bytes\n",
+		 	ioc->name, ioc->HostPageBuffer, ioc->HostPageBuffer_sz));
+		pci_free_consistent(ioc->pcidev, ioc->HostPageBuffer_sz,
+				ioc->HostPageBuffer,
+				ioc->HostPageBuffer_dma);
+		ioc->HostPageBuffer = NULL;
+		ioc->HostPageBuffer_sz = 0;
+		ioc->alloc_total -= ioc->HostPageBuffer_sz;
+	}
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -2034,7 +2227,7 @@ MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag)
 	 *  Loop here waiting for IOC to come READY.
 	 */
 	ii = 0;
-	cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * 15;	/* 15 seconds */
+	cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * 5;	/* 5 seconds */
 
 	while ((ioc_state = mpt_GetIocState(ioc, 1)) != MPI_IOC_STATE_READY) {
 		if (ioc_state == MPI_IOC_STATE_OPERATIONAL) {
@@ -2212,6 +2405,7 @@ GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason)
 				le32_to_cpu(facts->CurrentSenseBufferHighAddr);
 		facts->CurReplyFrameSize =
 				le16_to_cpu(facts->CurReplyFrameSize);
+		facts->IOCCapabilities = le32_to_cpu(facts->IOCCapabilities);
 
 		/*
 		 * Handle NEW (!) IOCFactsReply fields in MPI-1.01.xx
@@ -2383,13 +2577,25 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag)
 	ddlprintk((MYIOC_s_INFO_FMT "upload_fw %d facts.Flags=%x\n",
 		   ioc->name, ioc->upload_fw, ioc->facts.Flags));
 
-	if (ioc->bus_type == FC)
+	if(ioc->bus_type == SAS)
+		ioc_init.MaxDevices = ioc->facts.MaxDevices;
+	else if(ioc->bus_type == FC)
 		ioc_init.MaxDevices = MPT_MAX_FC_DEVICES;
 	else
 		ioc_init.MaxDevices = MPT_MAX_SCSI_DEVICES;
-	
 	ioc_init.MaxBuses = MPT_MAX_BUS;
-
+	dinitprintk((MYIOC_s_INFO_FMT "facts.MsgVersion=%x\n",
+		   ioc->name, ioc->facts.MsgVersion));
+	if (ioc->facts.MsgVersion >= MPI_VERSION_01_05) {
+		// set MsgVersion and HeaderVersion host driver was built with
+		ioc_init.MsgVersion = cpu_to_le16(MPI_VERSION);
+	        ioc_init.HeaderVersion = cpu_to_le16(MPI_HEADER_VERSION);
+
+		if (ioc->facts.Flags & MPI_IOCFACTS_FLAGS_HOST_PAGE_BUFFER_PERSISTENT) {
+			ioc_init.HostPageBufferSGE = ioc->facts.HostPageBufferSGE;
+		} else if(mpt_host_page_alloc(ioc, &ioc_init))
+			return -99;
+	}
 	ioc_init.ReplyFrameSize = cpu_to_le16(ioc->reply_sz);	/* in BYTES */
 
 	if (sizeof(dma_addr_t) == sizeof(u64)) {
@@ -2403,17 +2609,21 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag)
 		ioc_init.HostMfaHighAddr = cpu_to_le32(0);
 		ioc_init.SenseBufferHighAddr = cpu_to_le32(0);
 	}
-		
+
 	ioc->facts.CurrentHostMfaHighAddr = ioc_init.HostMfaHighAddr;
 	ioc->facts.CurrentSenseBufferHighAddr = ioc_init.SenseBufferHighAddr;
+	ioc->facts.MaxDevices = ioc_init.MaxDevices;
+	ioc->facts.MaxBuses = ioc_init.MaxBuses;
 
 	dhsprintk((MYIOC_s_INFO_FMT "Sending IOCInit (req @ %p)\n",
 			ioc->name, &ioc_init));
 
 	r = mpt_handshake_req_reply_wait(ioc, sizeof(IOCInit_t), (u32*)&ioc_init,
 				sizeof(MPIDefaultReply_t), (u16*)&init_reply, 10 /*seconds*/, sleepFlag);
-	if (r != 0)
+	if (r != 0) {
+		printk(MYIOC_s_ERR_FMT "Sending IOCInit failed(%d)!\n",ioc->name, r);
 		return r;
+	}
 
 	/* No need to byte swap the multibyte fields in the reply
 	 * since we don't even look at it's contents.
@@ -2472,7 +2682,7 @@ SendPortEnable(MPT_ADAPTER *ioc, int portnum, int sleepFlag)
 {
 	PortEnable_t		 port_enable;
 	MPIDefaultReply_t	 reply_buf;
-	int	 ii;
+	int	 rc;
 	int	 req_sz;
 	int	 reply_sz;
 
@@ -2494,22 +2704,15 @@ SendPortEnable(MPT_ADAPTER *ioc, int portnum, int sleepFlag)
 
 	/* RAID FW may take a long time to enable
 	 */
-	if (ioc->bus_type == FC) {
-		ii = mpt_handshake_req_reply_wait(ioc, req_sz, (u32*)&port_enable,
-				reply_sz, (u16*)&reply_buf, 65 /*seconds*/, sleepFlag);
-	} else {
-		ii = mpt_handshake_req_reply_wait(ioc, req_sz, (u32*)&port_enable,
+	if ( (ioc->facts.ProductID & MPI_FW_HEADER_PID_PROD_MASK)
+			> MPI_FW_HEADER_PID_PROD_TARGET_SCSI ) {
+		rc = mpt_handshake_req_reply_wait(ioc, req_sz, (u32*)&port_enable,
 				reply_sz, (u16*)&reply_buf, 300 /*seconds*/, sleepFlag);
+	} else {
+		rc = mpt_handshake_req_reply_wait(ioc, req_sz, (u32*)&port_enable,
+				reply_sz, (u16*)&reply_buf, 30 /*seconds*/, sleepFlag);
 	}
-
-	if (ii != 0)
-		return ii;
-
-	/* We do not even look at the reply, so we need not
-	 * swap the multi-byte fields.
-	 */
-
-	return 0;
+	return rc;
 }
 
 /*
@@ -2666,9 +2869,8 @@ mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag)
  *		<0 for fw upload failure.
  */
 static int
-mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag)
+mpt_downloadboot(MPT_ADAPTER *ioc, MpiFwHeader_t *pFwHeader, int sleepFlag)
 {
-	MpiFwHeader_t		*pFwHeader;
 	MpiExtImageHeader_t	*pExtImage;
 	u32			 fwSize;
 	u32			 diag0val;
@@ -2679,18 +2881,8 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag)
 	u32			 load_addr;
 	u32 			 ioc_state=0;
 
-	ddlprintk((MYIOC_s_INFO_FMT "downloadboot: fw size 0x%x, ioc FW Ptr %p\n",
-				ioc->name, ioc->facts.FWImageSize, ioc->cached_fw));
-
-	if ( ioc->facts.FWImageSize == 0 )
-		return -1;
-
-	if (ioc->cached_fw == NULL)
-		return -2;
-
-	/* prevent a second downloadboot and memory free with alt_ioc */
-	if (ioc->alt_ioc && ioc->alt_ioc->cached_fw)
-		ioc->alt_ioc->cached_fw = NULL;
+	ddlprintk((MYIOC_s_INFO_FMT "downloadboot: fw size 0x%x (%d), FW Ptr %p\n",
+				ioc->name, pFwHeader->ImageSize, pFwHeader->ImageSize, pFwHeader));
 
 	CHIPREG_WRITE32(&ioc->chip->WriteSequence, 0xFF);
 	CHIPREG_WRITE32(&ioc->chip->WriteSequence, MPI_WRSEQ_1ST_KEY_VALUE);
@@ -2718,16 +2910,17 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag)
 				ioc->name, count));
 			break;
 		}
-		/* wait 1 sec */
+		/* wait .1 sec */
 		if (sleepFlag == CAN_SLEEP) {
-			msleep_interruptible (1000);
+			msleep_interruptible (100);
 		} else {
-			mdelay (1000);
+			mdelay (100);
 		}
 	}
 
 	if ( count == 30 ) {
-		ddlprintk((MYIOC_s_INFO_FMT "downloadboot failed! Unable to RESET_ADAPTER diag0val=%x\n",
+		ddlprintk((MYIOC_s_INFO_FMT "downloadboot failed! "
+		"Unable to get MPI_DIAG_DRWE mode, diag0val=%x\n",
 		ioc->name, diag0val));
 		return -3;
 	}
@@ -2742,7 +2935,6 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag)
 	/* Set the DiagRwEn and Disable ARM bits */
 	CHIPREG_WRITE32(&ioc->chip->Diagnostic, (MPI_DIAG_RW_ENABLE | MPI_DIAG_DISABLE_ARM));
 
-	pFwHeader = (MpiFwHeader_t *) ioc->cached_fw;
 	fwSize = (pFwHeader->ImageSize + 3)/4;
 	ptrFw = (u32 *) pFwHeader;
 
@@ -2792,19 +2984,38 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag)
 	/* Clear the internal flash bad bit - autoincrementing register,
 	 * so must do two writes.
 	 */
-	CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwAddress, 0x3F000000);
-	diagRwData = CHIPREG_PIO_READ32(&ioc->pio_chip->DiagRwData);
-	diagRwData |= 0x4000000;
-	CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwAddress, 0x3F000000);
-	CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwData, diagRwData);
+	if (ioc->bus_type == SCSI) {
+		/*
+		 * 1030 and 1035 H/W errata, workaround to access
+		 * the ClearFlashBadSignatureBit
+		 */
+		CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwAddress, 0x3F000000);
+		diagRwData = CHIPREG_PIO_READ32(&ioc->pio_chip->DiagRwData);
+		diagRwData |= 0x40000000;
+		CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwAddress, 0x3F000000);
+		CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwData, diagRwData);
+
+	} else /* if((ioc->bus_type == SAS) || (ioc->bus_type == FC)) */ {
+		diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
+		CHIPREG_WRITE32(&ioc->chip->Diagnostic, diag0val |
+		    MPI_DIAG_CLEAR_FLASH_BAD_SIG);
+
+		/* wait 1 msec */
+		if (sleepFlag == CAN_SLEEP) {
+			msleep_interruptible (1);
+		} else {
+			mdelay (1);
+		}
+	}
 
 	if (ioc->errata_flag_1064)
 		pci_disable_io_access(ioc->pcidev);
 
 	diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic);
-	ddlprintk((MYIOC_s_INFO_FMT "downloadboot diag0val=%x, turning off PREVENT_IOC_BOOT, DISABLE_ARM\n",
+	ddlprintk((MYIOC_s_INFO_FMT "downloadboot diag0val=%x, "
+		"turning off PREVENT_IOC_BOOT, DISABLE_ARM, RW_ENABLE\n",
 		ioc->name, diag0val));
-	diag0val &= ~(MPI_DIAG_PREVENT_IOC_BOOT | MPI_DIAG_DISABLE_ARM);
+	diag0val &= ~(MPI_DIAG_PREVENT_IOC_BOOT | MPI_DIAG_DISABLE_ARM | MPI_DIAG_RW_ENABLE);
 	ddlprintk((MYIOC_s_INFO_FMT "downloadboot now diag0val=%x\n",
 		ioc->name, diag0val));
 	CHIPREG_WRITE32(&ioc->chip->Diagnostic, diag0val);
@@ -2812,10 +3023,23 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag)
 	/* Write 0xFF to reset the sequencer */
 	CHIPREG_WRITE32(&ioc->chip->WriteSequence, 0xFF);
 
+	if (ioc->bus_type == SAS) {
+		ioc_state = mpt_GetIocState(ioc, 0);
+		if ( (GetIocFacts(ioc, sleepFlag,
+				MPT_HOSTEVENT_IOC_BRINGUP)) != 0 ) {
+			ddlprintk((MYIOC_s_INFO_FMT "GetIocFacts failed: IocState=%x\n",
+					ioc->name, ioc_state));
+			return -EFAULT;
+		}
+	}
+
 	for (count=0; count<HZ*20; count++) {
 		if ((ioc_state = mpt_GetIocState(ioc, 0)) & MPI_IOC_STATE_READY) {
 			ddlprintk((MYIOC_s_INFO_FMT "downloadboot successful! (count=%d) IocState=%x\n",
 					ioc->name, count, ioc_state));
+			if (ioc->bus_type == SAS) {
+				return 0;
+			}
 			if ((SendIocInit(ioc, sleepFlag)) != 0) {
 				ddlprintk((MYIOC_s_INFO_FMT "downloadboot: SendIocInit failed\n",
 					ioc->name));
@@ -3049,12 +3273,13 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag)
 
 				/* wait 1 sec */
 				if (sleepFlag == CAN_SLEEP) {
-					ssleep(1);
+					msleep_interruptible (1000);
 				} else {
 					mdelay (1000);
 				}
 			}
-			if ((count = mpt_downloadboot(ioc, sleepFlag)) < 0) {
+			if ((count = mpt_downloadboot(ioc,
+				(MpiFwHeader_t *)ioc->cached_fw, sleepFlag)) < 0) {
 				printk(KERN_WARNING MYNAM
 					": firmware downloadboot failure (%d)!\n", count);
 			}
@@ -3637,7 +3862,7 @@ WaitForDoorbellAck(MPT_ADAPTER *ioc, int howlong, int sleepFlag)
 	int count = 0;
 	u32 intstat=0;
 
-	cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * howlong;
+	cntdn = 1000 * howlong;
 
 	if (sleepFlag == CAN_SLEEP) {
 		while (--cntdn) {
@@ -3687,7 +3912,7 @@ WaitForDoorbellInt(MPT_ADAPTER *ioc, int howlong, int sleepFlag)
 	int count = 0;
 	u32 intstat=0;
 
-	cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * howlong;
+	cntdn = 1000 * howlong;
 	if (sleepFlag == CAN_SLEEP) {
 		while (--cntdn) {
 			intstat = CHIPREG_READ32(&ioc->chip->IntStatus);
@@ -4001,6 +4226,85 @@ GetFcPortPage0(MPT_ADAPTER *ioc, int portnum)
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
+ *	mptbase_sas_persist_operation - Perform operation on SAS Persitent Table
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@sas_address: 64bit SAS Address for operation.
+ *	@target_id: specified target for operation
+ *	@bus: specified bus for operation
+ *	@persist_opcode: see below
+ *
+ *	MPI_SAS_OP_CLEAR_NOT_PRESENT - Free all persist TargetID mappings for
+ *		devices not currently present.
+ *	MPI_SAS_OP_CLEAR_ALL_PERSISTENT - Clear al persist TargetID mappings
+ *
+ *	NOTE: Don't use not this function during interrupt time.
+ *
+ *	Returns: 0 for success, non-zero error
+ */
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+int
+mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode)
+{
+	SasIoUnitControlRequest_t	*sasIoUnitCntrReq;
+	SasIoUnitControlReply_t		*sasIoUnitCntrReply;
+	MPT_FRAME_HDR			*mf = NULL;
+	MPIHeader_t			*mpi_hdr;
+
+
+	/* insure garbage is not sent to fw */
+	switch(persist_opcode) {
+
+	case MPI_SAS_OP_CLEAR_NOT_PRESENT:
+	case MPI_SAS_OP_CLEAR_ALL_PERSISTENT:
+		break;
+
+	default:
+		return -1;
+		break;
+	}
+
+	printk("%s: persist_opcode=%x\n",__FUNCTION__, persist_opcode);
+
+	/* Get a MF for this command.
+	 */
+	if ((mf = mpt_get_msg_frame(mpt_base_index, ioc)) == NULL) {
+		printk("%s: no msg frames!\n",__FUNCTION__);
+		return -1;
+        }
+
+	mpi_hdr = (MPIHeader_t *) mf;
+	sasIoUnitCntrReq = (SasIoUnitControlRequest_t *)mf;
+	memset(sasIoUnitCntrReq,0,sizeof(SasIoUnitControlRequest_t));
+	sasIoUnitCntrReq->Function = MPI_FUNCTION_SAS_IO_UNIT_CONTROL;
+	sasIoUnitCntrReq->MsgContext = mpi_hdr->MsgContext;
+	sasIoUnitCntrReq->Operation = persist_opcode;
+
+	init_timer(&ioc->persist_timer);
+	ioc->persist_timer.data = (unsigned long) ioc;
+	ioc->persist_timer.function = mpt_timer_expired;
+	ioc->persist_timer.expires = jiffies + HZ*10 /* 10 sec */;
+	ioc->persist_wait_done=0;
+	add_timer(&ioc->persist_timer);
+	mpt_put_msg_frame(mpt_base_index, ioc, mf);
+	wait_event(mpt_waitq, ioc->persist_wait_done);
+
+	sasIoUnitCntrReply =
+	    (SasIoUnitControlReply_t *)ioc->persist_reply_frame;
+	if (le16_to_cpu(sasIoUnitCntrReply->IOCStatus) != MPI_IOCSTATUS_SUCCESS) {
+		printk("%s: IOCStatus=0x%X IOCLogInfo=0x%X\n",
+		    __FUNCTION__,
+		    sasIoUnitCntrReply->IOCStatus,
+		    sasIoUnitCntrReply->IOCLogInfo);
+		return -1;
+	}
+
+	printk("%s: success\n",__FUNCTION__);
+	return 0;
+}
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/*
  *	GetIoUnitPage2 - Retrieve BIOS version and boot order information.
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
@@ -4340,10 +4644,10 @@ mpt_findImVolumes(MPT_ADAPTER *ioc)
 	if (mpt_config(ioc, &cfg) != 0)
 		goto done_and_free;
 
-	if ( (mem = (u8 *)ioc->spi_data.pIocPg2) == NULL ) {
+	if ( (mem = (u8 *)ioc->raid_data.pIocPg2) == NULL ) {
 		mem = kmalloc(iocpage2sz, GFP_ATOMIC);
 		if (mem) {
-			ioc->spi_data.pIocPg2 = (IOCPage2_t *) mem;
+			ioc->raid_data.pIocPg2 = (IOCPage2_t *) mem;
 		} else {
 			goto done_and_free;
 		}
@@ -4360,7 +4664,7 @@ mpt_findImVolumes(MPT_ADAPTER *ioc)
 		/* At least 1 RAID Volume
 		 */
 		pIocRv = pIoc2->RaidVolume;
-		ioc->spi_data.isRaid = 0;
+		ioc->raid_data.isRaid = 0;
 		for (jj = 0; jj < nVols; jj++, pIocRv++) {
 			vid = pIocRv->VolumeID;
 			vbus = pIocRv->VolumeBus;
@@ -4369,7 +4673,7 @@ mpt_findImVolumes(MPT_ADAPTER *ioc)
 			/* find the match
 			 */
 			if (vbus == 0) {
-				ioc->spi_data.isRaid |= (1 << vid);
+				ioc->raid_data.isRaid |= (1 << vid);
 			} else {
 				/* Error! Always bus 0
 				 */
@@ -4404,8 +4708,8 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc)
 
 	/* Free the old page
 	 */
-	kfree(ioc->spi_data.pIocPg3);
-	ioc->spi_data.pIocPg3 = NULL;
+	kfree(ioc->raid_data.pIocPg3);
+	ioc->raid_data.pIocPg3 = NULL;
 
 	/* There is at least one physical disk.
 	 * Read and save IOC Page 3
@@ -4442,7 +4746,7 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc)
 		mem = kmalloc(iocpage3sz, GFP_ATOMIC);
 		if (mem) {
 			memcpy(mem, (u8 *)pIoc3, iocpage3sz);
-			ioc->spi_data.pIocPg3 = (IOCPage3_t *) mem;
+			ioc->raid_data.pIocPg3 = (IOCPage3_t *) mem;
 		}
 	}
 
@@ -5366,8 +5670,8 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag)
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-static char *
-EventDescriptionStr(u8 event, u32 evData0)
+static void
+EventDescriptionStr(u8 event, u32 evData0, char *evStr)
 {
 	char *ds;
 
@@ -5420,8 +5724,95 @@ EventDescriptionStr(u8 event, u32 evData0)
 			ds = "Events(OFF) Change";
 		break;
 	case MPI_EVENT_INTEGRATED_RAID:
-		ds = "Integrated Raid";
+	{
+		u8 ReasonCode = (u8)(evData0 >> 16);
+		switch (ReasonCode) {
+		case MPI_EVENT_RAID_RC_VOLUME_CREATED :
+			ds = "Integrated Raid: Volume Created";
+			break;
+		case MPI_EVENT_RAID_RC_VOLUME_DELETED :
+			ds = "Integrated Raid: Volume Deleted";
+			break;
+		case MPI_EVENT_RAID_RC_VOLUME_SETTINGS_CHANGED :
+			ds = "Integrated Raid: Volume Settings Changed";
+			break;
+		case MPI_EVENT_RAID_RC_VOLUME_STATUS_CHANGED :
+			ds = "Integrated Raid: Volume Status Changed";
+			break;
+		case MPI_EVENT_RAID_RC_VOLUME_PHYSDISK_CHANGED :
+			ds = "Integrated Raid: Volume Physdisk Changed";
+			break;
+		case MPI_EVENT_RAID_RC_PHYSDISK_CREATED :
+			ds = "Integrated Raid: Physdisk Created";
+			break;
+		case MPI_EVENT_RAID_RC_PHYSDISK_DELETED :
+			ds = "Integrated Raid: Physdisk Deleted";
+			break;
+		case MPI_EVENT_RAID_RC_PHYSDISK_SETTINGS_CHANGED :
+			ds = "Integrated Raid: Physdisk Settings Changed";
+			break;
+		case MPI_EVENT_RAID_RC_PHYSDISK_STATUS_CHANGED :
+			ds = "Integrated Raid: Physdisk Status Changed";
+			break;
+		case MPI_EVENT_RAID_RC_DOMAIN_VAL_NEEDED :
+			ds = "Integrated Raid: Domain Validation Needed";
+			break;
+		case MPI_EVENT_RAID_RC_SMART_DATA :
+			ds = "Integrated Raid; Smart Data";
+			break;
+		case MPI_EVENT_RAID_RC_REPLACE_ACTION_STARTED :
+			ds = "Integrated Raid: Replace Action Started";
+			break;
+		default:
+			ds = "Integrated Raid";
+		break;
+		}
+		break;
+	}
+	case MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE:
+		ds = "SCSI Device Status Change";
+		break;
+	case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE:
+	{
+		u8 ReasonCode = (u8)(evData0 >> 16);
+		switch (ReasonCode) {
+		case MPI_EVENT_SAS_DEV_STAT_RC_ADDED:
+			ds = "SAS Device Status Change: Added";
+			break;
+		case MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING:
+			ds = "SAS Device Status Change: Deleted";
+			break;
+		case MPI_EVENT_SAS_DEV_STAT_RC_SMART_DATA:
+			ds = "SAS Device Status Change: SMART Data";
+			break;
+		case MPI_EVENT_SAS_DEV_STAT_RC_NO_PERSIST_ADDED:
+			ds = "SAS Device Status Change: No Persistancy Added";
+			break;
+		default:
+			ds = "SAS Device Status Change: Unknown";
+		break;
+		}
+		break;
+	}
+	case MPI_EVENT_ON_BUS_TIMER_EXPIRED:
+		ds = "Bus Timer Expired";
+		break;
+	case MPI_EVENT_QUEUE_FULL:
+		ds = "Queue Full";
+		break;
+	case MPI_EVENT_SAS_SES:
+		ds = "SAS SES Event";
+		break;
+	case MPI_EVENT_PERSISTENT_TABLE_FULL:
+		ds = "Persistent Table Full";
+		break;
+	case MPI_EVENT_SAS_PHY_LINK_STATUS:
+		ds = "SAS PHY Link Status";
+		break;
+	case MPI_EVENT_SAS_DISCOVERY_ERROR:
+		ds = "SAS Discovery Error";
 		break;
+
 	/*
 	 *  MPT base "custom" events may be added here...
 	 */
@@ -5429,7 +5820,7 @@ EventDescriptionStr(u8 event, u32 evData0)
 		ds = "Unknown";
 		break;
 	}
-	return ds;
+	strcpy(evStr,ds);
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -5451,7 +5842,7 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply
 	int ii;
 	int r = 0;
 	int handlers = 0;
-	char *evStr;
+	char evStr[100];
 	u8 event;
 
 	/*
@@ -5464,7 +5855,7 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply
 		evData0 = le32_to_cpu(pEventReply->Data[0]);
 	}
 
-	evStr = EventDescriptionStr(event, evData0);
+	EventDescriptionStr(event, evData0, evStr);
 	devtprintk((MYIOC_s_INFO_FMT "MPT event (%s=%02Xh) detected!\n",
 			ioc->name,
 			evStr,
@@ -5481,20 +5872,6 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply
 	 *  Do general / base driver event processing
 	 */
 	switch(event) {
-	case MPI_EVENT_NONE:			/* 00 */
-	case MPI_EVENT_LOG_DATA:		/* 01 */
-	case MPI_EVENT_STATE_CHANGE:		/* 02 */
-	case MPI_EVENT_UNIT_ATTENTION:		/* 03 */
-	case MPI_EVENT_IOC_BUS_RESET:		/* 04 */
-	case MPI_EVENT_EXT_BUS_RESET:		/* 05 */
-	case MPI_EVENT_RESCAN:			/* 06 */
-	case MPI_EVENT_LINK_STATUS_CHANGE:	/* 07 */
-	case MPI_EVENT_LOOP_STATE_CHANGE:	/* 08 */
-	case MPI_EVENT_LOGOUT:			/* 09 */
-	case MPI_EVENT_INTEGRATED_RAID:		/* 0B */
-	case MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE:	/* 0C */
-	default:
-		break;
 	case MPI_EVENT_EVENT_CHANGE:		/* 0A */
 		if (evDataLen) {
 			u8 evState = evData0 & 0xFF;
@@ -5507,6 +5884,8 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply
 			}
 		}
 		break;
+	default:
+		break;
 	}
 
 	/*
@@ -5653,6 +6032,111 @@ mpt_sp_log_info(MPT_ADAPTER *ioc, u32 log_info)
 	printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): F/W: %s\n", ioc->name, log_info, desc);
 }
 
+/* strings for sas loginfo */
+	static char *originator_str[] = {
+		"IOP",						/* 00h */
+		"PL",						/* 01h */
+		"IR"						/* 02h */
+	};
+	static char *iop_code_str[] = {
+		NULL,						/* 00h */
+		"Invalid SAS Address",				/* 01h */
+		NULL,						/* 02h */
+		"Invalid Page",					/* 03h */
+		NULL,						/* 04h */
+		"Task Terminated"				/* 05h */
+	};
+	static char *pl_code_str[] = {
+		NULL,						/* 00h */
+		"Open Failure",					/* 01h */
+		"Invalid Scatter Gather List",			/* 02h */
+		"Wrong Relative Offset or Frame Length",	/* 03h */
+		"Frame Transfer Error",				/* 04h */
+		"Transmit Frame Connected Low",			/* 05h */
+		"SATA Non-NCQ RW Error Bit Set",		/* 06h */
+		"SATA Read Log Receive Data Error",		/* 07h */
+		"SATA NCQ Fail All Commands After Error",	/* 08h */
+		"SATA Error in Receive Set Device Bit FIS",	/* 09h */
+		"Receive Frame Invalid Message",		/* 0Ah */
+		"Receive Context Message Valid Error",		/* 0Bh */
+		"Receive Frame Current Frame Error",		/* 0Ch */
+		"SATA Link Down",				/* 0Dh */
+		"Discovery SATA Init W IOS",			/* 0Eh */
+		"Config Invalid Page",				/* 0Fh */
+		"Discovery SATA Init Timeout",			/* 10h */
+		"Reset",					/* 11h */
+		"Abort",					/* 12h */
+		"IO Not Yet Executed",				/* 13h */
+		"IO Executed",					/* 14h */
+		NULL,						/* 15h */
+		NULL,						/* 16h */
+		NULL,						/* 17h */
+		NULL,						/* 18h */
+		NULL,						/* 19h */
+		NULL,						/* 1Ah */
+		NULL,						/* 1Bh */
+		NULL,						/* 1Ch */
+		NULL,						/* 1Dh */
+		NULL,						/* 1Eh */
+		NULL,						/* 1Fh */
+		"Enclosure Management"				/* 20h */
+	};
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/*
+ *	mpt_sas_log_info - Log information returned from SAS IOC.
+ *	@ioc: Pointer to MPT_ADAPTER structure
+ *	@log_info: U32 LogInfo reply word from the IOC
+ *
+ *	Refer to lsi/mpi_log_sas.h.
+ */
+static void
+mpt_sas_log_info(MPT_ADAPTER *ioc, u32 log_info)
+{
+union loginfo_type {
+	u32	loginfo;
+	struct {
+		u32	subcode:16;
+		u32	code:8;
+		u32	originator:4;
+		u32	bus_type:4;
+	}dw;
+};
+	union loginfo_type sas_loginfo;
+	char *code_desc = NULL;
+
+	sas_loginfo.loginfo = log_info;
+	if ((sas_loginfo.dw.bus_type != 3 /*SAS*/) &&
+	    (sas_loginfo.dw.originator < sizeof(originator_str)/sizeof(char*)))
+		return;
+	if ((sas_loginfo.dw.originator == 0 /*IOP*/) &&
+	    (sas_loginfo.dw.code < sizeof(iop_code_str)/sizeof(char*))) {
+		code_desc = iop_code_str[sas_loginfo.dw.code];
+	}else if ((sas_loginfo.dw.originator == 1 /*PL*/) &&
+	    (sas_loginfo.dw.code < sizeof(pl_code_str)/sizeof(char*) )) {
+		code_desc = pl_code_str[sas_loginfo.dw.code];
+	}
+
+	if (code_desc != NULL)
+		printk(MYIOC_s_INFO_FMT
+			"LogInfo(0x%08x): Originator={%s}, Code={%s},"
+			" SubCode(0x%04x)\n",
+			ioc->name,
+			log_info,
+			originator_str[sas_loginfo.dw.originator],
+			code_desc,
+			sas_loginfo.dw.subcode);
+	else
+		printk(MYIOC_s_INFO_FMT
+			"LogInfo(0x%08x): Originator={%s}, Code=(0x%02x),"
+			" SubCode(0x%04x)\n",
+			ioc->name,
+			log_info,
+			originator_str[sas_loginfo.dw.originator],
+			sas_loginfo.dw.code,
+			sas_loginfo.dw.subcode);
+}
+
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
  *	mpt_sp_ioc_info - IOC information returned from SCSI Parallel IOC.
@@ -5814,6 +6298,7 @@ EXPORT_SYMBOL(mpt_findImVolumes);
 EXPORT_SYMBOL(mpt_read_ioc_pg_3);
 EXPORT_SYMBOL(mpt_alloc_fw_memory);
 EXPORT_SYMBOL(mpt_free_fw_memory);
+EXPORT_SYMBOL(mptbase_sas_persist_operation);
 
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index f4827d92373..75105277e22 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -65,6 +65,7 @@
 #include "lsi/mpi_fc.h"		/* Fibre Channel (lowlevel) support */
 #include "lsi/mpi_targ.h"	/* SCSI/FCP Target protcol support */
 #include "lsi/mpi_tool.h"	/* Tools support */
+#include "lsi/mpi_sas.h"	/* SAS support */
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 
@@ -76,8 +77,8 @@
 #define COPYRIGHT	"Copyright (c) 1999-2005 " MODULEAUTHOR
 #endif
 
-#define MPT_LINUX_VERSION_COMMON	"3.03.02"
-#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.03.02"
+#define MPT_LINUX_VERSION_COMMON	"3.03.03"
+#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.03.03"
 #define WHAT_MAGIC_STRING		"@" "(" "#" ")"
 
 #define show_mptmod_ver(s,ver)  \
@@ -423,7 +424,7 @@ typedef struct _MPT_IOCTL {
 /*
  *  Event Structure and define
  */
-#define MPTCTL_EVENT_LOG_SIZE		(0x0000000A)
+#define MPTCTL_EVENT_LOG_SIZE		(0x000000032)
 typedef struct _mpt_ioctl_events {
 	u32	event;		/* Specified by define above */
 	u32	eventContext;	/* Index or counter */
@@ -451,16 +452,13 @@ typedef struct _mpt_ioctl_events {
 #define MPT_SCSICFG_ALL_IDS		0x02	/* WriteSDP1 to all IDS */
 /* #define MPT_SCSICFG_BLK_NEGO		0x10	   WriteSDP1 with WDTR and SDTR disabled */
 
-typedef	struct _ScsiCfgData {
+typedef	struct _SpiCfgData {
 	u32		 PortFlags;
 	int		*nvram;			/* table of device NVRAM values */
-	IOCPage2_t	*pIocPg2;		/* table of Raid Volumes */
-	IOCPage3_t	*pIocPg3;		/* table of physical disks */
 	IOCPage4_t	*pIocPg4;		/* SEP devices addressing */
 	dma_addr_t	 IocPg4_dma;		/* Phys Addr of IOCPage4 data */
 	int		 IocPg4Sz;		/* IOCPage4 size */
 	u8		 dvStatus[MPT_MAX_SCSI_DEVICES];
-	int		 isRaid;		/* bit field, 1 if RAID */
 	u8		 minSyncFactor;		/* 0xFF if async */
 	u8		 maxSyncOffset;		/* 0 if async */
 	u8		 maxBusWidth;		/* 0 if narrow, 1 if wide */
@@ -472,10 +470,28 @@ typedef	struct _ScsiCfgData {
 	u8		 dvScheduled;		/* 1 if scheduled */
 	u8		 forceDv;		/* 1 to force DV scheduling */
 	u8		 noQas;			/* Disable QAS for this adapter */
-	u8		 Saf_Te;		/* 1 to force all Processors as SAF-TE if Inquiry data length is too short to check for SAF-TE */
+	u8		 Saf_Te;		/* 1 to force all Processors as
+						 * SAF-TE if Inquiry data length
+						 * is too short to check for SAF-TE
+						 */
 	u8		 mpt_dv;		/* command line option: enhanced=1, basic=0 */
+	u8		 bus_reset;		/* 1 to allow bus reset */
 	u8		 rsvd[1];
-} ScsiCfgData;
+}SpiCfgData;
+
+typedef	struct _SasCfgData {
+	u8		 ptClear;		/* 1 to automatically clear the
+						 * persistent table.
+						 * 0 to disable
+						 * automatic clearing.
+						 */
+}SasCfgData;
+
+typedef	struct _RaidCfgData {
+	IOCPage2_t	*pIocPg2;		/* table of Raid Volumes */
+	IOCPage3_t	*pIocPg3;		/* table of physical disks */
+	int		 isRaid;		/* bit field, 1 if RAID */
+}RaidCfgData;
 
 /*
  *  Adapter Structure - pci_dev specific. Maximum: MPT_MAX_ADAPTERS
@@ -530,11 +546,16 @@ typedef struct _MPT_ADAPTER
 	u8			*sense_buf_pool;
 	dma_addr_t		 sense_buf_pool_dma;
 	u32			 sense_buf_low_dma;
+	u8			*HostPageBuffer; /* SAS - host page buffer support */
+	u32			HostPageBuffer_sz;
+	dma_addr_t		HostPageBuffer_dma;
 	int			 mtrr_reg;
 	struct pci_dev		*pcidev;	/* struct pci_dev pointer */
 	u8			__iomem *memmap;	/* mmap address */
 	struct Scsi_Host	*sh;		/* Scsi Host pointer */
-	ScsiCfgData		spi_data;	/* Scsi config. data */
+	SpiCfgData		spi_data;	/* Scsi config. data */
+	RaidCfgData		raid_data;	/* Raid config. data */
+	SasCfgData		sas_data;	/* Sas config. data */
 	MPT_IOCTL		*ioctl;		/* ioctl data pointer */
 	struct proc_dir_entry	*ioc_dentry;
 	struct _MPT_ADAPTER	*alt_ioc;	/* ptr to 929 bound adapter port */
@@ -554,31 +575,35 @@ typedef struct _MPT_ADAPTER
 #else
 	u32			 mfcnt;
 #endif
-	u32			 NB_for_64_byte_frame;       
+	u32			 NB_for_64_byte_frame;
 	u32			 hs_req[MPT_MAX_FRAME_SIZE/sizeof(u32)];
 	u16			 hs_reply[MPT_MAX_FRAME_SIZE/sizeof(u16)];
 	IOCFactsReply_t		 facts;
 	PortFactsReply_t	 pfacts[2];
 	FCPortPage0_t		 fc_port_page0[2];
+	struct timer_list	 persist_timer;	/* persist table timer */
+	int			 persist_wait_done; /* persist completion flag */
+	u8			 persist_reply_frame[MPT_DEFAULT_FRAME_SIZE]; /* persist reply */
 	LANPage0_t		 lan_cnfg_page0;
 	LANPage1_t		 lan_cnfg_page1;
-	/*  
+	/*
 	 * Description: errata_flag_1064
 	 * If a PCIX read occurs within 1 or 2 cycles after the chip receives
 	 * a split completion for a read data, an internal address pointer incorrectly
 	 * increments by 32 bytes
 	 */
-	int			 errata_flag_1064;	
+	int			 errata_flag_1064;
 	u8			 FirstWhoInit;
 	u8			 upload_fw;	/* If set, do a fw upload */
 	u8			 reload_fw;	/* Force a FW Reload on next reset */
-	u8			 NBShiftFactor;  /* NB Shift Factor based on Block Size (Facts)  */     
+	u8			 NBShiftFactor;  /* NB Shift Factor based on Block Size (Facts)  */
 	u8			 pad1[4];
 	int			 DoneCtx;
 	int			 TaskCtx;
 	int			 InternalCtx;
-	struct list_head	 list; 
+	struct list_head	 list;
 	struct net_device	*netdev;
+	struct list_head	 sas_topology;
 } MPT_ADAPTER;
 
 /*
@@ -964,6 +989,7 @@ extern void	 mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size);
 extern void	 mpt_free_fw_memory(MPT_ADAPTER *ioc);
 extern int	 mpt_findImVolumes(MPT_ADAPTER *ioc);
 extern int	 mpt_read_ioc_pg_3(MPT_ADAPTER *ioc);
+extern int	 mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode);
 
 /*
  *  Public data decl's...
diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index 7577c2417e2..cb2d59d5f5a 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -1326,7 +1326,7 @@ mptctl_gettargetinfo (unsigned long arg)
 		 */
 		if (hd && hd->Targets) {
 			mpt_findImVolumes(ioc);
-			pIoc2 = ioc->spi_data.pIocPg2;
+			pIoc2 = ioc->raid_data.pIocPg2;
 			for ( id = 0; id <= max_id; ) {
 				if ( pIoc2 && pIoc2->NumActiveVolumes ) {
 					if ( id == pIoc2->RaidVolume[0].VolumeID ) {
@@ -1348,7 +1348,7 @@ mptctl_gettargetinfo (unsigned long arg)
 						--maxWordsLeft;
 						goto next_id;
 					} else {
-						pIoc3 = ioc->spi_data.pIocPg3;
+						pIoc3 = ioc->raid_data.pIocPg3;
             					for ( jj = 0; jj < pIoc3->NumPhysDisks; jj++ ) {
                     					if ( pIoc3->PhysDisk[jj].PhysDiskID == id )
 								goto next_id;
diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c
index 13771abea13..a628be9bbba 100644
--- a/drivers/message/fusion/mptfc.c
+++ b/drivers/message/fusion/mptfc.c
@@ -189,7 +189,7 @@ mptfc_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		printk(MYIOC_s_WARN_FMT
 			"Skipping ioc=%p because SCSI Initiator mode is NOT enabled!\n",
 			ioc->name, ioc);
-		return -ENODEV;
+		return 0;
 	}
 
 	sh = scsi_host_alloc(&mptfc_driver_template, sizeof(MPT_SCSI_HOST));
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 52794be5a95..ed3c891e388 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -312,7 +312,12 @@ static int
 mpt_lan_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
 	struct net_device *dev = ioc->netdev;
-	struct mpt_lan_priv *priv = netdev_priv(dev);
+	struct mpt_lan_priv *priv;
+
+	if (dev == NULL)
+		return(1);
+	else
+		priv = netdev_priv(dev);
 
 	dlprintk((KERN_INFO MYNAM ": IOC %s_reset routed to LAN driver!\n",
 			reset_phase==MPT_IOC_SETUP_RESET ? "setup" : (
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
new file mode 100644
index 00000000000..429820e48c6
--- /dev/null
+++ b/drivers/message/fusion/mptsas.c
@@ -0,0 +1,1235 @@
+/*
+ *  linux/drivers/message/fusion/mptsas.c
+ *      For use with LSI Logic PCI chip/adapter(s)
+ *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+ *
+ *  Copyright (c) 1999-2005 LSI Logic Corporation
+ *  (mailto:mpt_linux_developer@lsil.com)
+ *  Copyright (c) 2005 Dell
+ */
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/*
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; version 2 of the License.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    NO WARRANTY
+    THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR
+    CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT
+    LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT,
+    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is
+    solely responsible for determining the appropriateness of using and
+    distributing the Program and assumes all risks associated with its
+    exercise of rights under this Agreement, including but not limited to
+    the risks and costs of program errors, damage to or loss of data,
+    programs or equipment, and unavailability or interruption of operations.
+
+    DISCLAIMER OF LIABILITY
+    NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY
+    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND
+    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+    TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+    USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
+    HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport_sas.h>
+
+#include "mptbase.h"
+#include "mptscsih.h"
+
+
+#define my_NAME		"Fusion MPT SAS Host driver"
+#define my_VERSION	MPT_LINUX_VERSION_COMMON
+#define MYNAM		"mptsas"
+
+MODULE_AUTHOR(MODULEAUTHOR);
+MODULE_DESCRIPTION(my_NAME);
+MODULE_LICENSE("GPL");
+
+static int mpt_pq_filter;
+module_param(mpt_pq_filter, int, 0);
+MODULE_PARM_DESC(mpt_pq_filter,
+		"Enable peripheral qualifier filter: enable=1  "
+		"(default=0)");
+
+static int mpt_pt_clear;
+module_param(mpt_pt_clear, int, 0);
+MODULE_PARM_DESC(mpt_pt_clear,
+		"Clear persistency table: enable=1  "
+		"(default=MPTSCSIH_PT_CLEAR=0)");
+
+static int	mptsasDoneCtx = -1;
+static int	mptsasTaskCtx = -1;
+static int	mptsasInternalCtx = -1; /* Used only for internal commands */
+
+
+/*
+ * SAS topology structures
+ *
+ * The MPT Fusion firmware interface spreads information about the
+ * SAS topology over many manufacture pages, thus we need some data
+ * structure to collect it and process it for the SAS transport class.
+ */
+
+struct mptsas_devinfo {
+	u16	handle;		/* unique id to address this device */
+	u8	phy_id;		/* phy number of parent device */
+	u8	port_id;	/* sas physical port this device
+				   is assoc'd with */
+	u8	target;		/* logical target id of this device */
+	u8	bus;		/* logical bus number of this device */
+	u64	sas_address;    /* WWN of this device,
+				   SATA is assigned by HBA,expander */
+	u32	device_info;	/* bitfield detailed info about this device */
+};
+
+struct mptsas_phyinfo {
+	u8	phy_id; 		/* phy index */
+	u8	port_id; 		/* port number this phy is part of */
+	u8	negotiated_link_rate;	/* nego'd link rate for this phy */
+	u8	hw_link_rate; 		/* hardware max/min phys link rate */
+	u8	programmed_link_rate;	/* programmed max/min phy link rate */
+	struct mptsas_devinfo identify;	/* point to phy device info */
+	struct mptsas_devinfo attached;	/* point to attached device info */
+	struct sas_rphy *rphy;
+};
+
+struct mptsas_portinfo {
+	struct list_head list;
+	u16		handle;		/* unique id to address this */
+	u8		num_phys;	/* number of phys */
+	struct mptsas_phyinfo *phy_info;
+};
+
+/*
+ * This is pretty ugly.  We will be able to seriously clean it up
+ * once the DV code in mptscsih goes away and we can properly
+ * implement ->target_alloc.
+ */
+static int
+mptsas_slave_alloc(struct scsi_device *device)
+{
+	struct Scsi_Host	*host = device->host;
+	MPT_SCSI_HOST		*hd = (MPT_SCSI_HOST *)host->hostdata;
+	struct sas_rphy		*rphy;
+	struct mptsas_portinfo	*p;
+	VirtDevice		*vdev;
+	uint			target = device->id;
+	int i;
+
+	if ((vdev = hd->Targets[target]) != NULL)
+		goto out;
+
+	vdev = kmalloc(sizeof(VirtDevice), GFP_KERNEL);
+	if (!vdev) {
+		printk(MYIOC_s_ERR_FMT "slave_alloc kmalloc(%zd) FAILED!\n",
+				hd->ioc->name, sizeof(VirtDevice));
+		return -ENOMEM;
+	}
+
+	memset(vdev, 0, sizeof(VirtDevice));
+	vdev->tflags = MPT_TARGET_FLAGS_Q_YES|MPT_TARGET_FLAGS_VALID_INQUIRY;
+	vdev->ioc_id = hd->ioc->id;
+
+	rphy = dev_to_rphy(device->sdev_target->dev.parent);
+	list_for_each_entry(p, &hd->ioc->sas_topology, list) {
+		for (i = 0; i < p->num_phys; i++) {
+			if (p->phy_info[i].attached.sas_address ==
+					rphy->identify.sas_address) {
+				vdev->target_id =
+					p->phy_info[i].attached.target;
+				vdev->bus_id = p->phy_info[i].attached.bus;
+				hd->Targets[device->id] = vdev;
+				goto out;
+			}
+		}
+	}
+
+	printk("No matching SAS device found!!\n");
+	kfree(vdev);
+	return -ENODEV;
+
+ out:
+	vdev->num_luns++;
+	device->hostdata = vdev;
+	return 0;
+}
+
+static struct scsi_host_template mptsas_driver_template = {
+	.proc_name			= "mptsas",
+	.proc_info			= mptscsih_proc_info,
+	.name				= "MPT SPI Host",
+	.info				= mptscsih_info,
+	.queuecommand			= mptscsih_qcmd,
+	.slave_alloc			= mptsas_slave_alloc,
+	.slave_configure		= mptscsih_slave_configure,
+	.slave_destroy			= mptscsih_slave_destroy,
+	.change_queue_depth 		= mptscsih_change_queue_depth,
+	.eh_abort_handler		= mptscsih_abort,
+	.eh_device_reset_handler	= mptscsih_dev_reset,
+	.eh_bus_reset_handler		= mptscsih_bus_reset,
+	.eh_host_reset_handler		= mptscsih_host_reset,
+	.bios_param			= mptscsih_bios_param,
+	.can_queue			= MPT_FC_CAN_QUEUE,
+	.this_id			= -1,
+	.sg_tablesize			= MPT_SCSI_SG_DEPTH,
+	.max_sectors			= 8192,
+	.cmd_per_lun			= 7,
+	.use_clustering			= ENABLE_CLUSTERING,
+};
+
+static struct sas_function_template mptsas_transport_functions = {
+};
+
+static struct scsi_transport_template *mptsas_transport_template;
+
+#ifdef SASDEBUG
+static void mptsas_print_phy_data(MPI_SAS_IO_UNIT0_PHY_DATA *phy_data)
+{
+	printk("---- IO UNIT PAGE 0 ------------\n");
+	printk("Handle=0x%X\n",
+		le16_to_cpu(phy_data->AttachedDeviceHandle));
+	printk("Controller Handle=0x%X\n",
+		le16_to_cpu(phy_data->ControllerDevHandle));
+	printk("Port=0x%X\n", phy_data->Port);
+	printk("Port Flags=0x%X\n", phy_data->PortFlags);
+	printk("PHY Flags=0x%X\n", phy_data->PhyFlags);
+	printk("Negotiated Link Rate=0x%X\n", phy_data->NegotiatedLinkRate);
+	printk("Controller PHY Device Info=0x%X\n",
+		le32_to_cpu(phy_data->ControllerPhyDeviceInfo));
+	printk("DiscoveryStatus=0x%X\n",
+		le32_to_cpu(phy_data->DiscoveryStatus));
+	printk("\n");
+}
+
+static void mptsas_print_phy_pg0(SasPhyPage0_t *pg0)
+{
+	__le64 sas_address;
+
+	memcpy(&sas_address, &pg0->SASAddress, sizeof(__le64));
+
+	printk("---- SAS PHY PAGE 0 ------------\n");
+	printk("Attached Device Handle=0x%X\n",
+			le16_to_cpu(pg0->AttachedDevHandle));
+	printk("SAS Address=0x%llX\n",
+			(unsigned long long)le64_to_cpu(sas_address));
+	printk("Attached PHY Identifier=0x%X\n", pg0->AttachedPhyIdentifier);
+	printk("Attached Device Info=0x%X\n",
+			le32_to_cpu(pg0->AttachedDeviceInfo));
+	printk("Programmed Link Rate=0x%X\n", pg0->ProgrammedLinkRate);
+	printk("Change Count=0x%X\n", pg0->ChangeCount);
+	printk("PHY Info=0x%X\n", le32_to_cpu(pg0->PhyInfo));
+	printk("\n");
+}
+
+static void mptsas_print_device_pg0(SasDevicePage0_t *pg0)
+{
+	__le64 sas_address;
+
+	memcpy(&sas_address, &pg0->SASAddress, sizeof(__le64));
+
+	printk("---- SAS DEVICE PAGE 0 ---------\n");
+	printk("Handle=0x%X\n" ,le16_to_cpu(pg0->DevHandle));
+	printk("Enclosure Handle=0x%X\n", le16_to_cpu(pg0->EnclosureHandle));
+	printk("Slot=0x%X\n", le16_to_cpu(pg0->Slot));
+	printk("SAS Address=0x%llX\n", le64_to_cpu(sas_address));
+	printk("Target ID=0x%X\n", pg0->TargetID);
+	printk("Bus=0x%X\n", pg0->Bus);
+	printk("PhyNum=0x%X\n", pg0->PhyNum);
+	printk("AccessStatus=0x%X\n", le16_to_cpu(pg0->AccessStatus));
+	printk("Device Info=0x%X\n", le32_to_cpu(pg0->DeviceInfo));
+	printk("Flags=0x%X\n", le16_to_cpu(pg0->Flags));
+	printk("Physical Port=0x%X\n", pg0->PhysicalPort);
+	printk("\n");
+}
+
+static void mptsas_print_expander_pg1(SasExpanderPage1_t *pg1)
+{
+	printk("---- SAS EXPANDER PAGE 1 ------------\n");
+
+	printk("Physical Port=0x%X\n", pg1->PhysicalPort);
+	printk("PHY Identifier=0x%X\n", pg1->Phy);
+	printk("Negotiated Link Rate=0x%X\n", pg1->NegotiatedLinkRate);
+	printk("Programmed Link Rate=0x%X\n", pg1->ProgrammedLinkRate);
+	printk("Hardware Link Rate=0x%X\n", pg1->HwLinkRate);
+	printk("Owner Device Handle=0x%X\n",
+			le16_to_cpu(pg1->OwnerDevHandle));
+	printk("Attached Device Handle=0x%X\n",
+			le16_to_cpu(pg1->AttachedDevHandle));
+}
+#else
+#define mptsas_print_phy_data(phy_data)		do { } while (0)
+#define mptsas_print_phy_pg0(pg0)		do { } while (0)
+#define mptsas_print_device_pg0(pg0)		do { } while (0)
+#define mptsas_print_expander_pg1(pg1)		do { } while (0)
+#endif
+
+static int
+mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info)
+{
+	ConfigExtendedPageHeader_t hdr;
+	CONFIGPARMS cfg;
+	SasIOUnitPage0_t *buffer;
+	dma_addr_t dma_handle;
+	int error, i;
+
+	hdr.PageVersion = MPI_SASIOUNITPAGE0_PAGEVERSION;
+	hdr.ExtPageLength = 0;
+	hdr.PageNumber = 0;
+	hdr.Reserved1 = 0;
+	hdr.Reserved2 = 0;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
+	hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_IO_UNIT;
+
+	cfg.cfghdr.ehdr = &hdr;
+	cfg.physAddr = -1;
+	cfg.pageAddr = 0;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+	cfg.dir = 0;	/* read */
+	cfg.timeout = 10;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out;
+	if (!hdr.ExtPageLength) {
+		error = -ENXIO;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+					    &dma_handle);
+	if (!buffer) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out_free_consistent;
+
+	port_info->num_phys = buffer->NumPhys;
+	port_info->phy_info = kcalloc(port_info->num_phys,
+		sizeof(struct mptsas_phyinfo),GFP_KERNEL);
+	if (!port_info->phy_info) {
+		error = -ENOMEM;
+		goto out_free_consistent;
+	}
+
+	for (i = 0; i < port_info->num_phys; i++) {
+		mptsas_print_phy_data(&buffer->PhyData[i]);
+		port_info->phy_info[i].phy_id = i;
+		port_info->phy_info[i].port_id =
+		    buffer->PhyData[i].Port;
+		port_info->phy_info[i].negotiated_link_rate =
+		    buffer->PhyData[i].NegotiatedLinkRate;
+	}
+
+ out_free_consistent:
+	pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+			    buffer, dma_handle);
+ out:
+	return error;
+}
+
+static int
+mptsas_sas_phy_pg0(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
+		u32 form, u32 form_specific)
+{
+	ConfigExtendedPageHeader_t hdr;
+	CONFIGPARMS cfg;
+	SasPhyPage0_t *buffer;
+	dma_addr_t dma_handle;
+	int error;
+
+	hdr.PageVersion = MPI_SASPHY0_PAGEVERSION;
+	hdr.ExtPageLength = 0;
+	hdr.PageNumber = 0;
+	hdr.Reserved1 = 0;
+	hdr.Reserved2 = 0;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
+	hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_PHY;
+
+	cfg.cfghdr.ehdr = &hdr;
+	cfg.dir = 0;	/* read */
+	cfg.timeout = 10;
+
+	/* Get Phy Pg 0 for each Phy. */
+	cfg.physAddr = -1;
+	cfg.pageAddr = form + form_specific;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out;
+
+	if (!hdr.ExtPageLength) {
+		error = -ENXIO;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+				      &dma_handle);
+	if (!buffer) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out_free_consistent;
+
+	mptsas_print_phy_pg0(buffer);
+
+	phy_info->hw_link_rate = buffer->HwLinkRate;
+	phy_info->programmed_link_rate = buffer->ProgrammedLinkRate;
+	phy_info->identify.handle = le16_to_cpu(buffer->OwnerDevHandle);
+	phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle);
+
+ out_free_consistent:
+	pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+			    buffer, dma_handle);
+ out:
+	return error;
+}
+
+static int
+mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info,
+		u32 form, u32 form_specific)
+{
+	ConfigExtendedPageHeader_t hdr;
+	CONFIGPARMS cfg;
+	SasDevicePage0_t *buffer;
+	dma_addr_t dma_handle;
+	__le64 sas_address;
+	int error;
+
+	hdr.PageVersion = MPI_SASDEVICE0_PAGEVERSION;
+	hdr.ExtPageLength = 0;
+	hdr.PageNumber = 0;
+	hdr.Reserved1 = 0;
+	hdr.Reserved2 = 0;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
+	hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_DEVICE;
+
+	cfg.cfghdr.ehdr = &hdr;
+	cfg.pageAddr = form + form_specific;
+	cfg.physAddr = -1;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+	cfg.dir = 0;	/* read */
+	cfg.timeout = 10;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out;
+	if (!hdr.ExtPageLength) {
+		error = -ENXIO;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+				      &dma_handle);
+	if (!buffer) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out_free_consistent;
+
+	mptsas_print_device_pg0(buffer);
+
+	device_info->handle = le16_to_cpu(buffer->DevHandle);
+	device_info->phy_id = buffer->PhyNum;
+	device_info->port_id = buffer->PhysicalPort;
+	device_info->target = buffer->TargetID;
+	device_info->bus = buffer->Bus;
+	memcpy(&sas_address, &buffer->SASAddress, sizeof(__le64));
+	device_info->sas_address = le64_to_cpu(sas_address);
+	device_info->device_info =
+	    le32_to_cpu(buffer->DeviceInfo);
+
+ out_free_consistent:
+	pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+			    buffer, dma_handle);
+ out:
+	return error;
+}
+
+static int
+mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info,
+		u32 form, u32 form_specific)
+{
+	ConfigExtendedPageHeader_t hdr;
+	CONFIGPARMS cfg;
+	SasExpanderPage0_t *buffer;
+	dma_addr_t dma_handle;
+	int error;
+
+	hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION;
+	hdr.ExtPageLength = 0;
+	hdr.PageNumber = 0;
+	hdr.Reserved1 = 0;
+	hdr.Reserved2 = 0;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
+	hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_EXPANDER;
+
+	cfg.cfghdr.ehdr = &hdr;
+	cfg.physAddr = -1;
+	cfg.pageAddr = form + form_specific;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+	cfg.dir = 0;	/* read */
+	cfg.timeout = 10;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out;
+
+	if (!hdr.ExtPageLength) {
+		error = -ENXIO;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+				      &dma_handle);
+	if (!buffer) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out_free_consistent;
+
+	/* save config data */
+	port_info->num_phys = buffer->NumPhys;
+	port_info->handle = le16_to_cpu(buffer->DevHandle);
+	port_info->phy_info = kcalloc(port_info->num_phys,
+		sizeof(struct mptsas_phyinfo),GFP_KERNEL);
+	if (!port_info->phy_info) {
+		error = -ENOMEM;
+		goto out_free_consistent;
+	}
+
+ out_free_consistent:
+	pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+			    buffer, dma_handle);
+ out:
+	return error;
+}
+
+static int
+mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
+		u32 form, u32 form_specific)
+{
+	ConfigExtendedPageHeader_t hdr;
+	CONFIGPARMS cfg;
+	SasExpanderPage1_t *buffer;
+	dma_addr_t dma_handle;
+	int error;
+
+	hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION;
+	hdr.ExtPageLength = 0;
+	hdr.PageNumber = 1;
+	hdr.Reserved1 = 0;
+	hdr.Reserved2 = 0;
+	hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED;
+	hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_EXPANDER;
+
+	cfg.cfghdr.ehdr = &hdr;
+	cfg.physAddr = -1;
+	cfg.pageAddr = form + form_specific;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER;
+	cfg.dir = 0;	/* read */
+	cfg.timeout = 10;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out;
+
+	if (!hdr.ExtPageLength) {
+		error = -ENXIO;
+		goto out;
+	}
+
+	buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+				      &dma_handle);
+	if (!buffer) {
+		error = -ENOMEM;
+		goto out;
+	}
+
+	cfg.physAddr = dma_handle;
+	cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT;
+
+	error = mpt_config(ioc, &cfg);
+	if (error)
+		goto out_free_consistent;
+
+
+	mptsas_print_expander_pg1(buffer);
+
+	/* save config data */
+	phy_info->phy_id = buffer->Phy;
+	phy_info->port_id = buffer->PhysicalPort;
+	phy_info->negotiated_link_rate = buffer->NegotiatedLinkRate;
+	phy_info->programmed_link_rate = buffer->ProgrammedLinkRate;
+	phy_info->hw_link_rate = buffer->HwLinkRate;
+	phy_info->identify.handle = le16_to_cpu(buffer->OwnerDevHandle);
+	phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle);
+
+
+ out_free_consistent:
+	pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4,
+			    buffer, dma_handle);
+ out:
+	return error;
+}
+
+static void
+mptsas_parse_device_info(struct sas_identify *identify,
+		struct mptsas_devinfo *device_info)
+{
+	u16 protocols;
+
+	identify->sas_address = device_info->sas_address;
+	identify->phy_identifier = device_info->phy_id;
+
+	/*
+	 * Fill in Phy Initiator Port Protocol.
+	 * Bits 6:3, more than one bit can be set, fall through cases.
+	 */
+	protocols = device_info->device_info & 0x78;
+	identify->initiator_port_protocols = 0;
+	if (protocols & MPI_SAS_DEVICE_INFO_SSP_INITIATOR)
+		identify->initiator_port_protocols |= SAS_PROTOCOL_SSP;
+	if (protocols & MPI_SAS_DEVICE_INFO_STP_INITIATOR)
+		identify->initiator_port_protocols |= SAS_PROTOCOL_STP;
+	if (protocols & MPI_SAS_DEVICE_INFO_SMP_INITIATOR)
+		identify->initiator_port_protocols |= SAS_PROTOCOL_SMP;
+	if (protocols & MPI_SAS_DEVICE_INFO_SATA_HOST)
+		identify->initiator_port_protocols |= SAS_PROTOCOL_SATA;
+
+	/*
+	 * Fill in Phy Target Port Protocol.
+	 * Bits 10:7, more than one bit can be set, fall through cases.
+	 */
+	protocols = device_info->device_info & 0x780;
+	identify->target_port_protocols = 0;
+	if (protocols & MPI_SAS_DEVICE_INFO_SSP_TARGET)
+		identify->target_port_protocols |= SAS_PROTOCOL_SSP;
+	if (protocols & MPI_SAS_DEVICE_INFO_STP_TARGET)
+		identify->target_port_protocols |= SAS_PROTOCOL_STP;
+	if (protocols & MPI_SAS_DEVICE_INFO_SMP_TARGET)
+		identify->target_port_protocols |= SAS_PROTOCOL_SMP;
+	if (protocols & MPI_SAS_DEVICE_INFO_SATA_DEVICE)
+		identify->target_port_protocols |= SAS_PROTOCOL_SATA;
+
+	/*
+	 * Fill in Attached device type.
+	 */
+	switch (device_info->device_info &
+			MPI_SAS_DEVICE_INFO_MASK_DEVICE_TYPE) {
+	case MPI_SAS_DEVICE_INFO_NO_DEVICE:
+		identify->device_type = SAS_PHY_UNUSED;
+		break;
+	case MPI_SAS_DEVICE_INFO_END_DEVICE:
+		identify->device_type = SAS_END_DEVICE;
+		break;
+	case MPI_SAS_DEVICE_INFO_EDGE_EXPANDER:
+		identify->device_type = SAS_EDGE_EXPANDER_DEVICE;
+		break;
+	case MPI_SAS_DEVICE_INFO_FANOUT_EXPANDER:
+		identify->device_type = SAS_FANOUT_EXPANDER_DEVICE;
+		break;
+	}
+}
+
+static int mptsas_probe_one_phy(struct device *dev,
+		struct mptsas_phyinfo *phy_info, int index)
+{
+	struct sas_phy *port;
+	int error;
+
+	port = sas_phy_alloc(dev, index);
+	if (!port)
+		return -ENOMEM;
+
+	port->port_identifier = phy_info->port_id;
+	mptsas_parse_device_info(&port->identify, &phy_info->identify);
+
+	/*
+	 * Set Negotiated link rate.
+	 */
+	switch (phy_info->negotiated_link_rate) {
+	case MPI_SAS_IOUNIT0_RATE_PHY_DISABLED:
+		port->negotiated_linkrate = SAS_PHY_DISABLED;
+		break;
+	case MPI_SAS_IOUNIT0_RATE_FAILED_SPEED_NEGOTIATION:
+		port->negotiated_linkrate = SAS_LINK_RATE_FAILED;
+		break;
+	case MPI_SAS_IOUNIT0_RATE_1_5:
+		port->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	case MPI_SAS_IOUNIT0_RATE_3_0:
+		port->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	case MPI_SAS_IOUNIT0_RATE_SATA_OOB_COMPLETE:
+	case MPI_SAS_IOUNIT0_RATE_UNKNOWN:
+	default:
+		port->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN;
+		break;
+	}
+
+	/*
+	 * Set Max hardware link rate.
+	 */
+	switch (phy_info->hw_link_rate & MPI_SAS_PHY0_PRATE_MAX_RATE_MASK) {
+	case MPI_SAS_PHY0_HWRATE_MAX_RATE_1_5:
+		port->maximum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	case MPI_SAS_PHY0_PRATE_MAX_RATE_3_0:
+		port->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Set Max programmed link rate.
+	 */
+	switch (phy_info->programmed_link_rate &
+			MPI_SAS_PHY0_PRATE_MAX_RATE_MASK) {
+	case MPI_SAS_PHY0_PRATE_MAX_RATE_1_5:
+		port->maximum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	case MPI_SAS_PHY0_PRATE_MAX_RATE_3_0:
+		port->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Set Min hardware link rate.
+	 */
+	switch (phy_info->hw_link_rate & MPI_SAS_PHY0_HWRATE_MIN_RATE_MASK) {
+	case MPI_SAS_PHY0_HWRATE_MIN_RATE_1_5:
+		port->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	case MPI_SAS_PHY0_PRATE_MIN_RATE_3_0:
+		port->minimum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Set Min programmed link rate.
+	 */
+	switch (phy_info->programmed_link_rate &
+			MPI_SAS_PHY0_PRATE_MIN_RATE_MASK) {
+	case MPI_SAS_PHY0_PRATE_MIN_RATE_1_5:
+		port->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+		break;
+	case MPI_SAS_PHY0_PRATE_MIN_RATE_3_0:
+		port->minimum_linkrate = SAS_LINK_RATE_3_0_GBPS;
+		break;
+	default:
+		break;
+	}
+
+	error = sas_phy_add(port);
+	if (error) {
+		sas_phy_free(port);
+		return error;
+	}
+
+	if (phy_info->attached.handle) {
+		struct sas_rphy *rphy;
+
+		rphy = sas_rphy_alloc(port);
+		if (!rphy)
+			return 0; /* non-fatal: an rphy can be added later */
+
+		mptsas_parse_device_info(&rphy->identify, &phy_info->attached);
+		error = sas_rphy_add(rphy);
+		if (error) {
+			sas_rphy_free(rphy);
+			return error;
+		}
+
+		phy_info->rphy = rphy;
+	}
+
+	return 0;
+}
+
+static int
+mptsas_probe_hba_phys(MPT_ADAPTER *ioc, int *index)
+{
+	struct mptsas_portinfo *port_info;
+	u32 handle = 0xFFFF;
+	int error = -ENOMEM, i;
+
+	port_info = kmalloc(sizeof(*port_info), GFP_KERNEL);
+	if (!port_info)
+		goto out;
+	memset(port_info, 0, sizeof(*port_info));
+
+	error = mptsas_sas_io_unit_pg0(ioc, port_info);
+	if (error)
+		goto out_free_port_info;
+
+	list_add_tail(&port_info->list, &ioc->sas_topology);
+
+	for (i = 0; i < port_info->num_phys; i++) {
+		mptsas_sas_phy_pg0(ioc, &port_info->phy_info[i],
+			(MPI_SAS_PHY_PGAD_FORM_PHY_NUMBER <<
+			 MPI_SAS_PHY_PGAD_FORM_SHIFT), i);
+
+		mptsas_sas_device_pg0(ioc, &port_info->phy_info[i].identify,
+			(MPI_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE <<
+			 MPI_SAS_DEVICE_PGAD_FORM_SHIFT), handle);
+		handle = port_info->phy_info[i].identify.handle;
+
+		if (port_info->phy_info[i].attached.handle) {
+			mptsas_sas_device_pg0(ioc,
+				&port_info->phy_info[i].attached,
+				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
+				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+				port_info->phy_info[i].attached.handle);
+		}
+
+		mptsas_probe_one_phy(&ioc->sh->shost_gendev,
+				     &port_info->phy_info[i], *index);
+		(*index)++;
+	}
+
+	return 0;
+
+ out_free_port_info:
+	kfree(port_info);
+ out:
+	return error;
+}
+
+static int
+mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle, int *index)
+{
+	struct mptsas_portinfo *port_info, *p;
+	int error = -ENOMEM, i, j;
+
+	port_info = kmalloc(sizeof(*port_info), GFP_KERNEL);
+	if (!port_info)
+		goto out;
+	memset(port_info, 0, sizeof(*port_info));
+
+	error = mptsas_sas_expander_pg0(ioc, port_info,
+		(MPI_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE <<
+		 MPI_SAS_EXPAND_PGAD_FORM_SHIFT), *handle);
+	if (error)
+		goto out_free_port_info;
+
+	*handle = port_info->handle;
+
+	list_add_tail(&port_info->list, &ioc->sas_topology);
+	for (i = 0; i < port_info->num_phys; i++) {
+		struct device *parent;
+
+		mptsas_sas_expander_pg1(ioc, &port_info->phy_info[i],
+			(MPI_SAS_EXPAND_PGAD_FORM_HANDLE_PHY_NUM <<
+			 MPI_SAS_EXPAND_PGAD_FORM_SHIFT), (i << 16) + *handle);
+
+		if (port_info->phy_info[i].identify.handle) {
+			mptsas_sas_device_pg0(ioc,
+				&port_info->phy_info[i].identify,
+				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
+				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+				port_info->phy_info[i].identify.handle);
+		}
+
+		if (port_info->phy_info[i].attached.handle) {
+			mptsas_sas_device_pg0(ioc,
+				&port_info->phy_info[i].attached,
+				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
+				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
+				port_info->phy_info[i].attached.handle);
+		}
+
+		/*
+		 * If we find a parent port handle this expander is
+		 * attached to another expander, else it hangs of the
+		 * HBA phys.
+		 */
+		parent = &ioc->sh->shost_gendev;
+		list_for_each_entry(p, &ioc->sas_topology, list) {
+			for (j = 0; j < p->num_phys; j++) {
+				if (port_info->phy_info[i].identify.handle ==
+						p->phy_info[j].attached.handle)
+					parent = &p->phy_info[j].rphy->dev;
+			}
+		}
+
+		mptsas_probe_one_phy(parent, &port_info->phy_info[i], *index);
+		(*index)++;
+	}
+
+	return 0;
+
+ out_free_port_info:
+	kfree(port_info);
+ out:
+	return error;
+}
+
+static void
+mptsas_scan_sas_topology(MPT_ADAPTER *ioc)
+{
+	u32 handle = 0xFFFF;
+	int index = 0;
+
+	mptsas_probe_hba_phys(ioc, &index);
+	while (!mptsas_probe_expander_phys(ioc, &handle, &index))
+		;
+}
+
+static int
+mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct Scsi_Host	*sh;
+	MPT_SCSI_HOST		*hd;
+	MPT_ADAPTER 		*ioc;
+	unsigned long		 flags;
+	int			 sz, ii;
+	int			 numSGE = 0;
+	int			 scale;
+	int			 ioc_cap;
+	u8			*mem;
+	int			error=0;
+	int			r;
+
+	r = mpt_attach(pdev,id);
+	if (r)
+		return r;
+
+	ioc = pci_get_drvdata(pdev);
+	ioc->DoneCtx = mptsasDoneCtx;
+	ioc->TaskCtx = mptsasTaskCtx;
+	ioc->InternalCtx = mptsasInternalCtx;
+
+	/*  Added sanity check on readiness of the MPT adapter.
+	 */
+	if (ioc->last_state != MPI_IOC_STATE_OPERATIONAL) {
+		printk(MYIOC_s_WARN_FMT
+		  "Skipping because it's not operational!\n",
+		  ioc->name);
+		return -ENODEV;
+	}
+
+	if (!ioc->active) {
+		printk(MYIOC_s_WARN_FMT "Skipping because it's disabled!\n",
+		  ioc->name);
+		return -ENODEV;
+	}
+
+	/*  Sanity check - ensure at least 1 port is INITIATOR capable
+	 */
+	ioc_cap = 0;
+	for (ii = 0; ii < ioc->facts.NumberOfPorts; ii++) {
+		if (ioc->pfacts[ii].ProtocolFlags &
+				MPI_PORTFACTS_PROTOCOL_INITIATOR)
+			ioc_cap++;
+	}
+
+	if (!ioc_cap) {
+		printk(MYIOC_s_WARN_FMT
+			"Skipping ioc=%p because SCSI Initiator mode "
+			"is NOT enabled!\n", ioc->name, ioc);
+		return 0;
+	}
+
+	sh = scsi_host_alloc(&mptsas_driver_template, sizeof(MPT_SCSI_HOST));
+	if (!sh) {
+		printk(MYIOC_s_WARN_FMT
+			"Unable to register controller with SCSI subsystem\n",
+			ioc->name);
+                return -1;
+        }
+
+	spin_lock_irqsave(&ioc->FreeQlock, flags);
+
+	/* Attach the SCSI Host to the IOC structure
+	 */
+	ioc->sh = sh;
+
+	sh->io_port = 0;
+	sh->n_io_port = 0;
+	sh->irq = 0;
+
+	/* set 16 byte cdb's */
+	sh->max_cmd_len = 16;
+
+	sh->max_id = ioc->pfacts->MaxDevices + 1;
+
+	sh->transportt = mptsas_transport_template;
+
+	sh->max_lun = MPT_LAST_LUN + 1;
+	sh->max_channel = 0;
+	sh->this_id = ioc->pfacts[0].PortSCSIID;
+
+	/* Required entry.
+	 */
+	sh->unique_id = ioc->id;
+
+	INIT_LIST_HEAD(&ioc->sas_topology);
+
+	/* Verify that we won't exceed the maximum
+	 * number of chain buffers
+	 * We can optimize:  ZZ = req_sz/sizeof(SGE)
+	 * For 32bit SGE's:
+	 *  numSGE = 1 + (ZZ-1)*(maxChain -1) + ZZ
+	 *               + (req_sz - 64)/sizeof(SGE)
+	 * A slightly different algorithm is required for
+	 * 64bit SGEs.
+	 */
+	scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32));
+	if (sizeof(dma_addr_t) == sizeof(u64)) {
+		numSGE = (scale - 1) *
+		  (ioc->facts.MaxChainDepth-1) + scale +
+		  (ioc->req_sz - 60) / (sizeof(dma_addr_t) +
+		  sizeof(u32));
+	} else {
+		numSGE = 1 + (scale - 1) *
+		  (ioc->facts.MaxChainDepth-1) + scale +
+		  (ioc->req_sz - 64) / (sizeof(dma_addr_t) +
+		  sizeof(u32));
+	}
+
+	if (numSGE < sh->sg_tablesize) {
+		/* Reset this value */
+		dprintk((MYIOC_s_INFO_FMT
+		  "Resetting sg_tablesize to %d from %d\n",
+		  ioc->name, numSGE, sh->sg_tablesize));
+		sh->sg_tablesize = numSGE;
+	}
+
+	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
+
+	hd = (MPT_SCSI_HOST *) sh->hostdata;
+	hd->ioc = ioc;
+
+	/* SCSI needs scsi_cmnd lookup table!
+	 * (with size equal to req_depth*PtrSz!)
+	 */
+	sz = ioc->req_depth * sizeof(void *);
+	mem = kmalloc(sz, GFP_ATOMIC);
+	if (mem == NULL) {
+		error = -ENOMEM;
+		goto mptsas_probe_failed;
+	}
+
+	memset(mem, 0, sz);
+	hd->ScsiLookup = (struct scsi_cmnd **) mem;
+
+	dprintk((MYIOC_s_INFO_FMT "ScsiLookup @ %p, sz=%d\n",
+		 ioc->name, hd->ScsiLookup, sz));
+
+	/* Allocate memory for the device structures.
+	 * A non-Null pointer at an offset
+	 * indicates a device exists.
+	 * max_id = 1 + maximum id (hosts.h)
+	 */
+	sz = sh->max_id * sizeof(void *);
+	mem = kmalloc(sz, GFP_ATOMIC);
+	if (mem == NULL) {
+		error = -ENOMEM;
+		goto mptsas_probe_failed;
+	}
+
+	memset(mem, 0, sz);
+	hd->Targets = (VirtDevice **) mem;
+
+	dprintk((KERN_INFO
+	  "  Targets @ %p, sz=%d\n", hd->Targets, sz));
+
+	/* Clear the TM flags
+	 */
+	hd->tmPending = 0;
+	hd->tmState = TM_STATE_NONE;
+	hd->resetPending = 0;
+	hd->abortSCpnt = NULL;
+
+	/* Clear the pointer used to store
+	 * single-threaded commands, i.e., those
+	 * issued during a bus scan, dv and
+	 * configuration pages.
+	 */
+	hd->cmdPtr = NULL;
+
+	/* Initialize this SCSI Hosts' timers
+	 * To use, set the timer expires field
+	 * and add_timer
+	 */
+	init_timer(&hd->timer);
+	hd->timer.data = (unsigned long) hd;
+	hd->timer.function = mptscsih_timer_expired;
+
+	hd->mpt_pq_filter = mpt_pq_filter;
+	ioc->sas_data.ptClear = mpt_pt_clear;
+
+	if (ioc->sas_data.ptClear==1) {
+		mptbase_sas_persist_operation(
+		    ioc, MPI_SAS_OP_CLEAR_ALL_PERSISTENT);
+	}
+
+	ddvprintk((MYIOC_s_INFO_FMT
+		"mpt_pq_filter %x mpt_pq_filter %x\n",
+		ioc->name,
+		mpt_pq_filter,
+		mpt_pq_filter));
+
+	init_waitqueue_head(&hd->scandv_waitq);
+	hd->scandv_wait_done = 0;
+	hd->last_queue_full = 0;
+
+	error = scsi_add_host(sh, &ioc->pcidev->dev);
+	if (error) {
+		dprintk((KERN_ERR MYNAM
+		  "scsi_add_host failed\n"));
+		goto mptsas_probe_failed;
+	}
+
+	mptsas_scan_sas_topology(ioc);
+
+	return 0;
+
+mptsas_probe_failed:
+
+	mptscsih_remove(pdev);
+	return error;
+}
+
+static void __devexit mptsas_remove(struct pci_dev *pdev)
+{
+	MPT_ADAPTER *ioc = pci_get_drvdata(pdev);
+	struct mptsas_portinfo *p, *n;
+
+	sas_remove_host(ioc->sh);
+
+	list_for_each_entry_safe(p, n, &ioc->sas_topology, list) {
+		list_del(&p->list);
+		kfree(p);
+	}
+
+	mptscsih_remove(pdev);
+}
+
+static struct pci_device_id mptsas_pci_table[] = {
+	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064,
+		PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1066,
+		PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1068,
+		PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064E,
+		PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1066E,
+		PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1068E,
+		PCI_ANY_ID, PCI_ANY_ID },
+	{0}	/* Terminating entry */
+};
+MODULE_DEVICE_TABLE(pci, mptsas_pci_table);
+
+
+static struct pci_driver mptsas_driver = {
+	.name		= "mptsas",
+	.id_table	= mptsas_pci_table,
+	.probe		= mptsas_probe,
+	.remove		= __devexit_p(mptsas_remove),
+	.shutdown	= mptscsih_shutdown,
+#ifdef CONFIG_PM
+	.suspend	= mptscsih_suspend,
+	.resume		= mptscsih_resume,
+#endif
+};
+
+static int __init
+mptsas_init(void)
+{
+	show_mptmod_ver(my_NAME, my_VERSION);
+
+	mptsas_transport_template =
+	    sas_attach_transport(&mptsas_transport_functions);
+	if (!mptsas_transport_template)
+		return -ENODEV;
+
+	mptsasDoneCtx = mpt_register(mptscsih_io_done, MPTSAS_DRIVER);
+	mptsasTaskCtx = mpt_register(mptscsih_taskmgmt_complete, MPTSAS_DRIVER);
+	mptsasInternalCtx =
+		mpt_register(mptscsih_scandv_complete, MPTSAS_DRIVER);
+
+	if (mpt_event_register(mptsasDoneCtx, mptscsih_event_process) == 0) {
+		devtprintk((KERN_INFO MYNAM
+		  ": Registered for IOC event notifications\n"));
+	}
+
+	if (mpt_reset_register(mptsasDoneCtx, mptscsih_ioc_reset) == 0) {
+		dprintk((KERN_INFO MYNAM
+		  ": Registered for IOC reset notifications\n"));
+	}
+
+	return pci_register_driver(&mptsas_driver);
+}
+
+static void __exit
+mptsas_exit(void)
+{
+	pci_unregister_driver(&mptsas_driver);
+	sas_release_transport(mptsas_transport_template);
+
+	mpt_reset_deregister(mptsasDoneCtx);
+	mpt_event_deregister(mptsasDoneCtx);
+
+	mpt_deregister(mptsasInternalCtx);
+	mpt_deregister(mptsasTaskCtx);
+	mpt_deregister(mptsasDoneCtx);
+}
+
+module_init(mptsas_init);
+module_exit(mptsas_exit);
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index 4a003dc5fde..5cb07eb224d 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -62,6 +62,7 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_tcq.h>
+#include <scsi/scsi_dbg.h>
 
 #include "mptbase.h"
 #include "mptscsih.h"
@@ -93,8 +94,9 @@ typedef struct _BIG_SENSE_BUF {
 
 #define MPT_ICFLAG_BUF_CAP	0x01	/* ReadBuffer Read Capacity format */
 #define MPT_ICFLAG_ECHO		0x02	/* ReadBuffer Echo buffer format */
-#define MPT_ICFLAG_PHYS_DISK	0x04	/* Any SCSI IO but do Phys Disk Format */
-#define MPT_ICFLAG_TAGGED_CMD	0x08	/* Do tagged IO */
+#define MPT_ICFLAG_EBOS		0x04	/* ReadBuffer Echo buffer has EBOS */
+#define MPT_ICFLAG_PHYS_DISK	0x08	/* Any SCSI IO but do Phys Disk Format */
+#define MPT_ICFLAG_TAGGED_CMD	0x10	/* Do tagged IO */
 #define MPT_ICFLAG_DID_RESET	0x20	/* Bus Reset occurred with this command */
 #define MPT_ICFLAG_RESERVED	0x40	/* Reserved has been issued */
 
@@ -159,6 +161,8 @@ int		mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR
 static int	mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *iocmd);
 static int	mptscsih_synchronize_cache(MPT_SCSI_HOST *hd, int portnum);
 
+static struct work_struct   mptscsih_persistTask;
+
 #ifdef MPTSCSIH_ENABLE_DOMAIN_VALIDATION
 static int	mptscsih_do_raid(MPT_SCSI_HOST *hd, u8 action, INTERNAL_CMD *io);
 static void	mptscsih_domainValidation(void *hd);
@@ -167,6 +171,7 @@ static void	mptscsih_qas_check(MPT_SCSI_HOST *hd, int id);
 static int	mptscsih_doDv(MPT_SCSI_HOST *hd, int channel, int target);
 static void	mptscsih_dv_parms(MPT_SCSI_HOST *hd, DVPARAMETERS *dv,void *pPage);
 static void	mptscsih_fillbuf(char *buffer, int size, int index, int width);
+static void	mptscsih_set_dvflags_raid(MPT_SCSI_HOST *hd, int id);
 #endif
 
 void 		mptscsih_remove(struct pci_dev *);
@@ -606,11 +611,24 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 		xfer_cnt = le32_to_cpu(pScsiReply->TransferCount);
 		sc->resid = sc->request_bufflen - xfer_cnt;
 
+		/*
+		 *  if we get a data underrun indication, yet no data was
+		 *  transferred and the SCSI status indicates that the
+		 *  command was never started, change the data underrun
+		 *  to success
+		 */
+		if (status == MPI_IOCSTATUS_SCSI_DATA_UNDERRUN && xfer_cnt == 0 &&
+		    (scsi_status == MPI_SCSI_STATUS_BUSY ||
+		     scsi_status == MPI_SCSI_STATUS_RESERVATION_CONFLICT ||
+		     scsi_status == MPI_SCSI_STATUS_TASK_SET_FULL)) {
+			status = MPI_IOCSTATUS_SUCCESS;
+		}
+
 		dreplyprintk((KERN_NOTICE "Reply ha=%d id=%d lun=%d:\n"
 			"IOCStatus=%04xh SCSIState=%02xh SCSIStatus=%02xh\n"
 			"resid=%d bufflen=%d xfer_cnt=%d\n",
 			ioc->id, pScsiReq->TargetID, pScsiReq->LUN[1],
-			status, scsi_state, scsi_status, sc->resid, 
+			status, scsi_state, scsi_status, sc->resid,
 			sc->request_bufflen, xfer_cnt));
 
 		if (scsi_state & MPI_SCSI_STATE_AUTOSENSE_VALID)
@@ -619,8 +637,11 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 		/*
 		 *  Look for + dump FCP ResponseInfo[]!
 		 */
-		if (scsi_state & MPI_SCSI_STATE_RESPONSE_INFO_VALID) {
-			printk(KERN_NOTICE "  FCP_ResponseInfo=%08xh\n",
+		if (scsi_state & MPI_SCSI_STATE_RESPONSE_INFO_VALID &&
+		    pScsiReply->ResponseInfo) {
+			printk(KERN_NOTICE "ha=%d id=%d lun=%d: "
+			"FCP_ResponseInfo=%08xh\n",
+			ioc->id, pScsiReq->TargetID, pScsiReq->LUN[1],
 			le32_to_cpu(pScsiReply->ResponseInfo));
 		}
 
@@ -661,23 +682,13 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 			break;
 
 		case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH:	/* 0x0049 */
-			if ( xfer_cnt >= sc->underflow ) {
-				/* Sufficient data transfer occurred */
+			sc->resid = sc->request_bufflen - xfer_cnt;
+			if((xfer_cnt==0)||(sc->underflow > xfer_cnt))
+				sc->result=DID_SOFT_ERROR << 16;
+			else /* Sufficient data transfer occurred */
 				sc->result = (DID_OK << 16) | scsi_status;
-			} else if ( xfer_cnt == 0 ) {
-				/* A CRC Error causes this condition; retry */
-				sc->result = (DRIVER_SENSE << 24) | (DID_OK << 16) |
-					(CHECK_CONDITION << 1);
-				sc->sense_buffer[0] = 0x70;
-				sc->sense_buffer[2] = NO_SENSE;
-				sc->sense_buffer[12] = 0;
-				sc->sense_buffer[13] = 0;
-			} else {
-				sc->result = DID_SOFT_ERROR << 16;
-			}
-			dreplyprintk((KERN_NOTICE
-			    "RESIDUAL_MISMATCH: result=%x on id=%d\n",
-			    sc->result, sc->device->id));
+			dreplyprintk((KERN_NOTICE 
+			    "RESIDUAL_MISMATCH: result=%x on id=%d\n", sc->result, sc->device->id));
 			break;
 
 		case MPI_IOCSTATUS_SCSI_DATA_UNDERRUN:		/* 0x0045 */
@@ -692,7 +703,10 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 				;
 			} else {
 				if (xfer_cnt < sc->underflow) {
-					sc->result = DID_SOFT_ERROR << 16;
+					if (scsi_status == SAM_STAT_BUSY)
+						sc->result = SAM_STAT_BUSY;
+					else
+						sc->result = DID_SOFT_ERROR << 16;
 				}
 				if (scsi_state & (MPI_SCSI_STATE_AUTOSENSE_FAILED | MPI_SCSI_STATE_NO_SCSI_STATUS)) {
 					/* What to do?
@@ -717,8 +731,10 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr)
 
 		case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
 		case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
-			scsi_status = pScsiReply->SCSIStatus;
-			sc->result = (DID_OK << 16) | scsi_status;
+			if (scsi_status == MPI_SCSI_STATUS_BUSY)
+				sc->result = (DID_BUS_BUSY << 16) | scsi_status;
+			else
+				sc->result = (DID_OK << 16) | scsi_status;
 			if (scsi_state == 0) {
 				;
 			} else if (scsi_state & MPI_SCSI_STATE_AUTOSENSE_VALID) {
@@ -890,12 +906,13 @@ mptscsih_search_running_cmds(MPT_SCSI_HOST *hd, uint target, uint lun)
 	SCSIIORequest_t	*mf = NULL;
 	int		 ii;
 	int		 max = hd->ioc->req_depth;
+	struct scsi_cmnd *sc;
 
 	dsprintk((KERN_INFO MYNAM ": search_running target %d lun %d max %d\n",
 			target, lun, max));
 
 	for (ii=0; ii < max; ii++) {
-		if (hd->ScsiLookup[ii] != NULL) {
+		if ((sc = hd->ScsiLookup[ii]) != NULL) {
 
 			mf = (SCSIIORequest_t *)MPT_INDEX_2_MFPTR(hd->ioc, ii);
 
@@ -910,9 +927,22 @@ mptscsih_search_running_cmds(MPT_SCSI_HOST *hd, uint target, uint lun)
 			hd->ScsiLookup[ii] = NULL;
 			mptscsih_freeChainBuffers(hd->ioc, ii);
 			mpt_free_msg_frame(hd->ioc, (MPT_FRAME_HDR *)mf);
+			if (sc->use_sg) {
+				pci_unmap_sg(hd->ioc->pcidev,
+				(struct scatterlist *) sc->request_buffer,
+					sc->use_sg,
+					sc->sc_data_direction);
+			} else if (sc->request_bufflen) {
+				pci_unmap_single(hd->ioc->pcidev,
+					sc->SCp.dma_handle,
+					sc->request_bufflen,
+					sc->sc_data_direction);
+			}
+			sc->host_scribble = NULL;
+			sc->result = DID_NO_CONNECT << 16;
+			sc->scsi_done(sc);
 		}
 	}
-
 	return;
 }
 
@@ -967,8 +997,10 @@ mptscsih_remove(struct pci_dev *pdev)
 	unsigned long	 	flags;
 	int sz1;
 
-	if(!host)
+	if(!host) {
+		mpt_detach(pdev);
 		return;
+	}
 
 	scsi_remove_host(host);
 
@@ -1256,8 +1288,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	MPT_SCSI_HOST		*hd;
 	MPT_FRAME_HDR		*mf;
 	SCSIIORequest_t		*pScsiReq;
-	VirtDevice		*pTarget;
-	int	 target;
+	VirtDevice		*pTarget = SCpnt->device->hostdata;
 	int	 lun;
 	u32	 datalen;
 	u32	 scsictl;
@@ -1267,12 +1298,9 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	int	 ii;
 
 	hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata;
-	target = SCpnt->device->id;
 	lun = SCpnt->device->lun;
 	SCpnt->scsi_done = done;
 
-	pTarget = hd->Targets[target];
-
 	dmfprintk((MYIOC_s_INFO_FMT "qcmd: SCpnt=%p, done()=%p\n",
 			(hd && hd->ioc) ? hd->ioc->name : "ioc?", SCpnt, done));
 
@@ -1315,7 +1343,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	/* Default to untagged. Once a target structure has been allocated,
 	 * use the Inquiry data to determine if device supports tagged.
 	 */
-	if (   pTarget
+	if (pTarget
 	    && (pTarget->tflags & MPT_TARGET_FLAGS_Q_YES)
 	    && (SCpnt->device->tagged_supported)) {
 		scsictl = scsidir | MPI_SCSIIO_CONTROL_SIMPLEQ;
@@ -1325,8 +1353,8 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 
 	/* Use the above information to set up the message frame
 	 */
-	pScsiReq->TargetID = (u8) target;
-	pScsiReq->Bus = (u8) SCpnt->device->channel;
+	pScsiReq->TargetID = (u8) pTarget->target_id;
+	pScsiReq->Bus = pTarget->bus_id;
 	pScsiReq->ChainOffset = 0;
 	pScsiReq->Function = MPI_FUNCTION_SCSI_IO_REQUEST;
 	pScsiReq->CDBLength = SCpnt->cmd_len;
@@ -1378,7 +1406,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 
 #ifdef MPTSCSIH_ENABLE_DOMAIN_VALIDATION
 	if (hd->ioc->bus_type == SCSI) {
-		int dvStatus = hd->ioc->spi_data.dvStatus[target];
+		int dvStatus = hd->ioc->spi_data.dvStatus[pTarget->target_id];
 		int issueCmd = 1;
 
 		if (dvStatus || hd->ioc->spi_data.forceDv) {
@@ -1426,6 +1454,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *))
 	return 0;
 
  fail:
+	hd->ScsiLookup[my_idx] = NULL;
 	mptscsih_freeChainBuffers(hd->ioc, my_idx);
 	mpt_free_msg_frame(hd->ioc, mf);
 	return SCSI_MLQUEUE_HOST_BUSY;
@@ -1713,24 +1742,23 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 	MPT_FRAME_HDR	*mf;
 	u32		 ctx2abort;
 	int		 scpnt_idx;
+	int		 retval;
 
 	/* If we can't locate our host adapter structure, return FAILED status.
 	 */
 	if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL) {
 		SCpnt->result = DID_RESET << 16;
 		SCpnt->scsi_done(SCpnt);
-		dfailprintk((KERN_WARNING MYNAM ": mptscsih_abort: "
+		dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: "
 			   "Can't locate host! (sc=%p)\n",
 			   SCpnt));
 		return FAILED;
 	}
 
 	ioc = hd->ioc;
-	if (hd->resetPending)
+	if (hd->resetPending) {
 		return FAILED;
-
-	printk(KERN_WARNING MYNAM ": %s: >> Attempting task abort! (sc=%p)\n",
-	       hd->ioc->name, SCpnt);
+	}
 
 	if (hd->timeouts < -1)
 		hd->timeouts++;
@@ -1738,16 +1766,20 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 	/* Find this command
 	 */
 	if ((scpnt_idx = SCPNT_TO_LOOKUP_IDX(SCpnt)) < 0) {
-		/* Cmd not found in ScsiLookup. 
+		/* Cmd not found in ScsiLookup.
 		 * Do OS callback.
 		 */
 		SCpnt->result = DID_RESET << 16;
-		dtmprintk((KERN_WARNING MYNAM ": %s: mptscsih_abort: "
+		dtmprintk((KERN_INFO MYNAM ": %s: mptscsih_abort: "
 			   "Command not in the active list! (sc=%p)\n",
 			   hd->ioc->name, SCpnt));
 		return SUCCESS;
 	}
 
+	printk(KERN_WARNING MYNAM ": %s: attempting task abort! (sc=%p)\n",
+	       hd->ioc->name, SCpnt);
+	scsi_print_command(SCpnt);
+
 	/* Most important!  Set TaskMsgContext to SCpnt's MsgContext!
 	 * (the IO to be ABORT'd)
 	 *
@@ -1760,38 +1792,22 @@ mptscsih_abort(struct scsi_cmnd * SCpnt)
 
 	hd->abortSCpnt = SCpnt;
 
-	if (mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
+	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
 		SCpnt->device->channel, SCpnt->device->id, SCpnt->device->lun,
-		ctx2abort, 2 /* 2 second timeout */)
-		< 0) {
+		ctx2abort, 2 /* 2 second timeout */);
 
-		/* The TM request failed and the subsequent FW-reload failed!
-		 * Fatal error case.
-		 */
-		printk(MYIOC_s_WARN_FMT "Error issuing abort task! (sc=%p)\n",
-		       hd->ioc->name, SCpnt);
+	printk (KERN_WARNING MYNAM ": %s: task abort: %s (sc=%p)\n",
+		hd->ioc->name,
+		((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
 
-		/* We must clear our pending flag before clearing our state.
-		 */
+	if (retval == 0)
+		return SUCCESS;
+
+	if(retval != FAILED ) {
 		hd->tmPending = 0;
 		hd->tmState = TM_STATE_NONE;
-
-		/* Unmap the DMA buffers, if any. */
-		if (SCpnt->use_sg) {
-			pci_unmap_sg(ioc->pcidev, (struct scatterlist *) SCpnt->request_buffer,
-				    SCpnt->use_sg, SCpnt->sc_data_direction);
-		} else if (SCpnt->request_bufflen) {
-			pci_unmap_single(ioc->pcidev, SCpnt->SCp.dma_handle,
-				SCpnt->request_bufflen, SCpnt->sc_data_direction);
-		}
-		hd->ScsiLookup[scpnt_idx] = NULL;
-		SCpnt->result = DID_RESET << 16;
-		SCpnt->scsi_done(SCpnt);		/* Issue the command callback */
-		mptscsih_freeChainBuffers(ioc, scpnt_idx);
-		mpt_free_msg_frame(ioc, mf);
-		return FAILED;
 	}
-	return SUCCESS;
+	return FAILED;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -1807,11 +1823,12 @@ int
 mptscsih_dev_reset(struct scsi_cmnd * SCpnt)
 {
 	MPT_SCSI_HOST	*hd;
+	int		 retval;
 
 	/* If we can't locate our host adapter structure, return FAILED status.
 	 */
 	if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){
-		dtmprintk((KERN_WARNING MYNAM ": mptscsih_dev_reset: "
+		dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: "
 			   "Can't locate host! (sc=%p)\n",
 			   SCpnt));
 		return FAILED;
@@ -1820,24 +1837,26 @@ mptscsih_dev_reset(struct scsi_cmnd * SCpnt)
 	if (hd->resetPending)
 		return FAILED;
 
-	printk(KERN_WARNING MYNAM ": %s: >> Attempting target reset! (sc=%p)\n",
+	printk(KERN_WARNING MYNAM ": %s: attempting target reset! (sc=%p)\n",
 	       hd->ioc->name, SCpnt);
+	scsi_print_command(SCpnt);
 
-	if (mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
+	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
 		SCpnt->device->channel, SCpnt->device->id,
-		0, 0, 5 /* 5 second timeout */)
-		< 0){
-		/* The TM request failed and the subsequent FW-reload failed!
-		 * Fatal error case.
-		 */
-		printk(MYIOC_s_WARN_FMT "Error processing TaskMgmt request (sc=%p)\n",
-		 		hd->ioc->name, SCpnt);
+		0, 0, 5 /* 5 second timeout */);
+
+	printk (KERN_WARNING MYNAM ": %s: target reset: %s (sc=%p)\n",
+		hd->ioc->name,
+		((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+
+	if (retval == 0)
+		return SUCCESS;
+
+	if(retval != FAILED ) {
 		hd->tmPending = 0;
 		hd->tmState = TM_STATE_NONE;
-		return FAILED;
 	}
-
-	return SUCCESS;
+	return FAILED;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -1853,41 +1872,39 @@ int
 mptscsih_bus_reset(struct scsi_cmnd * SCpnt)
 {
 	MPT_SCSI_HOST	*hd;
-	spinlock_t	*host_lock = SCpnt->device->host->host_lock;
+	int		 retval;
 
 	/* If we can't locate our host adapter structure, return FAILED status.
 	 */
 	if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){
-		dtmprintk((KERN_WARNING MYNAM ": mptscsih_bus_reset: "
+		dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: "
 			   "Can't locate host! (sc=%p)\n",
 			   SCpnt ) );
 		return FAILED;
 	}
 
-	printk(KERN_WARNING MYNAM ": %s: >> Attempting bus reset! (sc=%p)\n",
+	printk(KERN_WARNING MYNAM ": %s: attempting bus reset! (sc=%p)\n",
 	       hd->ioc->name, SCpnt);
+	scsi_print_command(SCpnt);
 
 	if (hd->timeouts < -1)
 		hd->timeouts++;
 
-	/* We are now ready to execute the task management request. */
-	if (mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
-		SCpnt->device->channel, 0, 0, 0, 5 /* 5 second timeout */)
-	    < 0){
+	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
+		SCpnt->device->channel, 0, 0, 0, 5 /* 5 second timeout */);
 
-		/* The TM request failed and the subsequent FW-reload failed!
-		 * Fatal error case.
-		 */
-		printk(MYIOC_s_WARN_FMT
-		       "Error processing TaskMgmt request (sc=%p)\n",
-		       hd->ioc->name, SCpnt);
+	printk (KERN_WARNING MYNAM ": %s: bus reset: %s (sc=%p)\n",
+		hd->ioc->name,
+		((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+
+	if (retval == 0)
+		return SUCCESS;
+
+	if(retval != FAILED ) {
 		hd->tmPending = 0;
 		hd->tmState = TM_STATE_NONE;
-		spin_lock_irq(host_lock);
-		return FAILED;
 	}
-
-	return SUCCESS;
+	return FAILED;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -2169,7 +2186,7 @@ mptscsih_slave_alloc(struct scsi_device *device)
 	vdev->raidVolume = 0;
 	hd->Targets[device->id] = vdev;
 	if (hd->ioc->bus_type == SCSI) {
-		if (hd->ioc->spi_data.isRaid & (1 << device->id)) {
+		if (hd->ioc->raid_data.isRaid & (1 << device->id)) {
 			vdev->raidVolume = 1;
 			ddvtprintk((KERN_INFO
 			    "RAID Volume @ id %d\n", device->id));
@@ -2180,22 +2197,7 @@ mptscsih_slave_alloc(struct scsi_device *device)
 
  out:
 	vdev->num_luns++;
-	return 0;
-}
-
-static int 
-mptscsih_is_raid_volume(MPT_SCSI_HOST *hd, uint id)
-{
-	int i;
-
-	if (!hd->ioc->spi_data.isRaid || !hd->ioc->spi_data.pIocPg3)
-		return 0;
-
-	for (i = 0; i < hd->ioc->spi_data.pIocPg3->NumPhysDisks; i++) {
-		if (id == hd->ioc->spi_data.pIocPg3->PhysDisk[i].PhysDiskID)
-			return 1;
-	}
-
+	device->hostdata = vdev;
 	return 0;
 }
 
@@ -2226,7 +2228,7 @@ mptscsih_slave_destroy(struct scsi_device *device)
 	hd->Targets[target] = NULL;
 
 	if (hd->ioc->bus_type == SCSI) {
-		if (mptscsih_is_raid_volume(hd, target)) {
+		if (mptscsih_is_phys_disk(hd->ioc, target)) {
 			hd->ioc->spi_data.forceDv |= MPT_SCSICFG_RELOAD_IOC_PG3;
 		} else {
 			hd->ioc->spi_data.dvStatus[target] =
@@ -2439,6 +2441,7 @@ mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 {
 	MPT_SCSI_HOST	*hd;
 	unsigned long	 flags;
+	int 		ii;
 
 	dtmprintk((KERN_WARNING MYNAM
 			": IOC %s_reset routed to SCSI host driver!\n",
@@ -2496,11 +2499,8 @@ mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 
 		/* ScsiLookup initialization
 		 */
-		{
-			int ii;
-			for (ii=0; ii < hd->ioc->req_depth; ii++)
-				hd->ScsiLookup[ii] = NULL;
-		}
+		for (ii=0; ii < hd->ioc->req_depth; ii++)
+			hd->ScsiLookup[ii] = NULL;
 
 		/* 2. Chain Buffer initialization
 		 */
@@ -2549,6 +2549,16 @@ mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase)
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+/* work queue thread to clear the persitency table */
+static void
+mptscsih_sas_persist_clear_table(void * arg)
+{
+	MPT_ADAPTER *ioc = (MPT_ADAPTER *)arg;
+
+	mptbase_sas_persist_operation(ioc, MPI_SAS_OP_CLEAR_NOT_PRESENT);
+}
+
+/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 int
 mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 {
@@ -2558,18 +2568,18 @@ mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 	devtprintk((MYIOC_s_INFO_FMT "MPT event (=%02Xh) routed to SCSI host driver!\n",
 			ioc->name, event));
 
+	if (ioc->sh == NULL ||
+		((hd = (MPT_SCSI_HOST *)ioc->sh->hostdata) == NULL))
+		return 1;
+
 	switch (event) {
 	case MPI_EVENT_UNIT_ATTENTION:			/* 03 */
 		/* FIXME! */
 		break;
 	case MPI_EVENT_IOC_BUS_RESET:			/* 04 */
 	case MPI_EVENT_EXT_BUS_RESET:			/* 05 */
-		hd = NULL;
-		if (ioc->sh) {
-			hd = (MPT_SCSI_HOST *) ioc->sh->hostdata;
-			if (hd && (ioc->bus_type == SCSI) && (hd->soft_resets < -1))
-				hd->soft_resets++;
-		}
+		if (hd && (ioc->bus_type == SCSI) && (hd->soft_resets < -1))
+			hd->soft_resets++;
 		break;
 	case MPI_EVENT_LOGOUT:				/* 09 */
 		/* FIXME! */
@@ -2588,69 +2598,24 @@ mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply)
 		break;
 
 	case MPI_EVENT_INTEGRATED_RAID:			/* 0B */
+	{
+		pMpiEventDataRaid_t pRaidEventData =
+		    (pMpiEventDataRaid_t) pEvReply->Data;
 #ifdef MPTSCSIH_ENABLE_DOMAIN_VALIDATION
-		/* negoNvram set to 0 if DV enabled and to USE_NVRAM if
-		 * if DV disabled. Need to check for target mode.
-		 */
-		hd = NULL;
-		if (ioc->sh)
-			hd = (MPT_SCSI_HOST *) ioc->sh->hostdata;
-
-		if (hd && (ioc->bus_type == SCSI) && (hd->negoNvram == 0)) {
-			ScsiCfgData	*pSpi;
-			Ioc3PhysDisk_t	*pPDisk;
-			int		 numPDisk;
-			u8		 reason;
-			u8		 physDiskNum;
-
-			reason = (le32_to_cpu(pEvReply->Data[0]) & 0x00FF0000) >> 16;
-			if (reason == MPI_EVENT_RAID_RC_DOMAIN_VAL_NEEDED) {
-				/* New or replaced disk.
-				 * Set DV flag and schedule DV.
-				 */
-				pSpi = &ioc->spi_data;
-				physDiskNum = (le32_to_cpu(pEvReply->Data[0]) & 0xFF000000) >> 24;
-				ddvtprintk(("DV requested for phys disk id %d\n", physDiskNum));
-				if (pSpi->pIocPg3) {
-					pPDisk =  pSpi->pIocPg3->PhysDisk;
-					numPDisk =pSpi->pIocPg3->NumPhysDisks;
-
-					while (numPDisk) {
-						if (physDiskNum == pPDisk->PhysDiskNum) {
-							pSpi->dvStatus[pPDisk->PhysDiskID] = (MPT_SCSICFG_NEED_DV | MPT_SCSICFG_DV_NOT_DONE);
-							pSpi->forceDv = MPT_SCSICFG_NEED_DV;
-							ddvtprintk(("NEED_DV set for phys disk id %d\n", pPDisk->PhysDiskID));
-							break;
-						}
-						pPDisk++;
-						numPDisk--;
-					}
-
-					if (numPDisk == 0) {
-						/* The physical disk that needs DV was not found
-						 * in the stored IOC Page 3. The driver must reload
-						 * this page. DV routine will set the NEED_DV flag for
-						 * all phys disks that have DV_NOT_DONE set.
-						 */
-						pSpi->forceDv = MPT_SCSICFG_NEED_DV | MPT_SCSICFG_RELOAD_IOC_PG3;
-						ddvtprintk(("phys disk %d not found. Setting reload IOC Pg3 Flag\n", physDiskNum));
-					}
-				}
-			}
-		}
+		/* Domain Validation Needed */
+		if (ioc->bus_type == SCSI &&
+		    pRaidEventData->ReasonCode ==
+		    MPI_EVENT_RAID_RC_DOMAIN_VAL_NEEDED)
+			mptscsih_set_dvflags_raid(hd, pRaidEventData->PhysDiskNum);
 #endif
+		break;
+	}
 
-#if defined(MPT_DEBUG_DV) || defined(MPT_DEBUG_DV_TINY)
-		printk("Raid Event RF: ");
-		{
-			u32 *m = (u32 *)pEvReply;
-			int ii;
-			int n = (int)pEvReply->MsgLength;
-			for (ii=6; ii < n; ii++)
-				printk(" %08x", le32_to_cpu(m[ii]));
-			printk("\n");
-		}
-#endif
+	/* Persistent table is full. */
+	case MPI_EVENT_PERSISTENT_TABLE_FULL:
+		INIT_WORK(&mptscsih_persistTask,
+		    mptscsih_sas_persist_clear_table,(void *)ioc);
+		schedule_work(&mptscsih_persistTask);
 		break;
 
 	case MPI_EVENT_NONE:				/* 00 */
@@ -2687,7 +2652,7 @@ mptscsih_initTarget(MPT_SCSI_HOST *hd, int bus_id, int target_id, u8 lun, char *
 {
 	int		indexed_lun, lun_index;
 	VirtDevice	*vdev;
-	ScsiCfgData	*pSpi;
+	SpiCfgData	*pSpi;
 	char		data_56;
 
 	dinitprintk((MYIOC_s_INFO_FMT "initTarget bus=%d id=%d lun=%d hd=%p\n",
@@ -2794,7 +2759,7 @@ mptscsih_initTarget(MPT_SCSI_HOST *hd, int bus_id, int target_id, u8 lun, char *
 static void
 mptscsih_setTargetNegoParms(MPT_SCSI_HOST *hd, VirtDevice *target, char byte56)
 {
-	ScsiCfgData *pspi_data = &hd->ioc->spi_data;
+	SpiCfgData *pspi_data = &hd->ioc->spi_data;
 	int  id = (int) target->target_id;
 	int  nvram;
 	VirtDevice	*vdev;
@@ -2973,11 +2938,13 @@ mptscsih_setTargetNegoParms(MPT_SCSI_HOST *hd, VirtDevice *target, char byte56)
 static void
 mptscsih_set_dvflags(MPT_SCSI_HOST *hd, SCSIIORequest_t *pReq)
 {
+	MPT_ADAPTER	*ioc = hd->ioc;
 	u8 cmd;
-	ScsiCfgData *pSpi;
+	SpiCfgData	*pSpi;
 
-	ddvtprintk((" set_dvflags: id=%d lun=%d negoNvram=%x cmd=%x\n", 
-		pReq->TargetID, pReq->LUN[1], hd->negoNvram, pReq->CDB[0]));
+	ddvtprintk((MYIOC_s_NOTE_FMT
+		" set_dvflags: id=%d lun=%d negoNvram=%x cmd=%x\n",
+		hd->ioc->name, pReq->TargetID, pReq->LUN[1], hd->negoNvram, pReq->CDB[0]));
 
 	if ((pReq->LUN[1] != 0) || (hd->negoNvram != 0))
 		return;
@@ -2985,12 +2952,12 @@ mptscsih_set_dvflags(MPT_SCSI_HOST *hd, SCSIIORequest_t *pReq)
 	cmd = pReq->CDB[0];
 
 	if ((cmd == READ_CAPACITY) || (cmd == MODE_SENSE)) {
-		pSpi = &hd->ioc->spi_data;
-		if ((pSpi->isRaid & (1 << pReq->TargetID)) && pSpi->pIocPg3) {
+		pSpi = &ioc->spi_data;
+		if ((ioc->raid_data.isRaid & (1 << pReq->TargetID)) && ioc->raid_data.pIocPg3) {
 			/* Set NEED_DV for all hidden disks
 			 */
-			Ioc3PhysDisk_t *pPDisk =  pSpi->pIocPg3->PhysDisk;
-			int		numPDisk = pSpi->pIocPg3->NumPhysDisks;
+			Ioc3PhysDisk_t *pPDisk =  ioc->raid_data.pIocPg3->PhysDisk;
+			int		numPDisk = ioc->raid_data.pIocPg3->NumPhysDisks;
 
 			while (numPDisk) {
 				pSpi->dvStatus[pPDisk->PhysDiskID] |= MPT_SCSICFG_NEED_DV;
@@ -3004,6 +2971,50 @@ mptscsih_set_dvflags(MPT_SCSI_HOST *hd, SCSIIORequest_t *pReq)
 	}
 }
 
+/* mptscsih_raid_set_dv_flags()
+ *
+ * New or replaced disk. Set DV flag and schedule DV.
+ */
+static void
+mptscsih_set_dvflags_raid(MPT_SCSI_HOST *hd, int id)
+{
+	MPT_ADAPTER	*ioc = hd->ioc;
+	SpiCfgData	*pSpi = &ioc->spi_data;
+	Ioc3PhysDisk_t	*pPDisk;
+	int		 numPDisk;
+
+	if (hd->negoNvram != 0)
+		return;
+
+	ddvtprintk(("DV requested for phys disk id %d\n", id));
+	if (ioc->raid_data.pIocPg3) {
+		pPDisk =  ioc->raid_data.pIocPg3->PhysDisk;
+		numPDisk = ioc->raid_data.pIocPg3->NumPhysDisks;
+		while (numPDisk) {
+			if (id == pPDisk->PhysDiskNum) {
+				pSpi->dvStatus[pPDisk->PhysDiskID] =
+				    (MPT_SCSICFG_NEED_DV | MPT_SCSICFG_DV_NOT_DONE);
+				pSpi->forceDv = MPT_SCSICFG_NEED_DV;
+				ddvtprintk(("NEED_DV set for phys disk id %d\n",
+				    pPDisk->PhysDiskID));
+				break;
+			}
+			pPDisk++;
+			numPDisk--;
+		}
+
+		if (numPDisk == 0) {
+			/* The physical disk that needs DV was not found
+			 * in the stored IOC Page 3. The driver must reload
+			 * this page. DV routine will set the NEED_DV flag for
+			 * all phys disks that have DV_NOT_DONE set.
+			 */
+			pSpi->forceDv = MPT_SCSICFG_NEED_DV | MPT_SCSICFG_RELOAD_IOC_PG3;
+			ddvtprintk(("phys disk %d not found. Setting reload IOC Pg3 Flag\n",id));
+		}
+	}
+}
+
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /*
  * If no Target, bus reset on 1st I/O. Set the flag to
@@ -3091,7 +3102,7 @@ mptscsih_writeSDP1(MPT_SCSI_HOST *hd, int portnum, int target_id, int flags)
 	MPT_ADAPTER		*ioc = hd->ioc;
 	Config_t		*pReq;
 	SCSIDevicePage1_t	*pData;
-	VirtDevice		*pTarget;
+	VirtDevice		*pTarget=NULL;
 	MPT_FRAME_HDR		*mf;
 	dma_addr_t		 dataDma;
 	u16			 req_idx;
@@ -3190,7 +3201,7 @@ mptscsih_writeSDP1(MPT_SCSI_HOST *hd, int portnum, int target_id, int flags)
 #endif
 
 		if (flags & MPT_SCSICFG_BLK_NEGO)
-			negoFlags = MPT_TARGET_NO_NEGO_WIDE | MPT_TARGET_NO_NEGO_SYNC;
+			negoFlags |= MPT_TARGET_NO_NEGO_WIDE | MPT_TARGET_NO_NEGO_SYNC;
 
 		mptscsih_setDevicePage1Flags(width, factor, offset,
 					&requested, &configuration, negoFlags);
@@ -4011,7 +4022,7 @@ mptscsih_synchronize_cache(MPT_SCSI_HOST *hd, int portnum)
 
 		/* If target Ptr NULL or if this target is NOT a disk, skip.
 		 */
-		if ((pTarget) && (pTarget->tflags & MPT_TARGET_FLAGS_Q_YES)){
+		if ((pTarget) && (pTarget->inq_data[0] == TYPE_DISK)){
 			for (lun=0; lun <= MPT_LAST_LUN; lun++) {
 				/* If LUN present, issue the command
 				 */
@@ -4106,9 +4117,9 @@ mptscsih_domainValidation(void *arg)
 
 			if ((ioc->spi_data.forceDv & MPT_SCSICFG_RELOAD_IOC_PG3) != 0) {
 				mpt_read_ioc_pg_3(ioc);
-				if (ioc->spi_data.pIocPg3) {
-					Ioc3PhysDisk_t *pPDisk = ioc->spi_data.pIocPg3->PhysDisk;
-					int		numPDisk = ioc->spi_data.pIocPg3->NumPhysDisks;
+				if (ioc->raid_data.pIocPg3) {
+					Ioc3PhysDisk_t *pPDisk = ioc->raid_data.pIocPg3->PhysDisk;
+					int		numPDisk = ioc->raid_data.pIocPg3->NumPhysDisks;
 
 					while (numPDisk) {
 						if (ioc->spi_data.dvStatus[pPDisk->PhysDiskID] & MPT_SCSICFG_DV_NOT_DONE)
@@ -4147,7 +4158,7 @@ mptscsih_domainValidation(void *arg)
 					isPhysDisk = mptscsih_is_phys_disk(ioc, id);
 					if (isPhysDisk) {
 						for (ii=0; ii < MPT_MAX_SCSI_DEVICES; ii++) {
-							if (hd->ioc->spi_data.isRaid & (1 << ii)) {
+							if (hd->ioc->raid_data.isRaid & (1 << ii)) {
 								hd->ioc->spi_data.dvStatus[ii] |= MPT_SCSICFG_DV_PENDING;
 							}
 						}
@@ -4166,7 +4177,7 @@ mptscsih_domainValidation(void *arg)
 
 					if (isPhysDisk) {
 						for (ii=0; ii < MPT_MAX_SCSI_DEVICES; ii++) {
-							if (hd->ioc->spi_data.isRaid & (1 << ii)) {
+							if (hd->ioc->raid_data.isRaid & (1 << ii)) {
 								hd->ioc->spi_data.dvStatus[ii] &= ~MPT_SCSICFG_DV_PENDING;
 							}
 						}
@@ -4188,21 +4199,21 @@ mptscsih_domainValidation(void *arg)
 
 /* Search IOC page 3 to determine if this is hidden physical disk
  */
-static int 
+/* Search IOC page 3 to determine if this is hidden physical disk
+ */
+static int
 mptscsih_is_phys_disk(MPT_ADAPTER *ioc, int id)
 {
-	if (ioc->spi_data.pIocPg3) {
-		Ioc3PhysDisk_t *pPDisk =  ioc->spi_data.pIocPg3->PhysDisk;
-		int		numPDisk = ioc->spi_data.pIocPg3->NumPhysDisks;
+	int i;
 
-		while (numPDisk) {
-			if (pPDisk->PhysDiskID == id) {
-				return 1;
-			}
-			pPDisk++;
-			numPDisk--;
-		}
+	if (!ioc->raid_data.isRaid || !ioc->raid_data.pIocPg3)
+		return 0;
+
+	for (i = 0; i < ioc->raid_data.pIocPg3->NumPhysDisks; i++) {
+		if (id == ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskID)
+			return 1;
 	}
+
 	return 0;
 }
 
@@ -4408,7 +4419,7 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id)
 	/* Skip this ID? Set cfg.cfghdr.hdr to force config page write
 	 */
 	{
-		ScsiCfgData *pspi_data = &hd->ioc->spi_data;
+		SpiCfgData *pspi_data = &hd->ioc->spi_data;
 		if (pspi_data->nvram && (pspi_data->nvram[id] != MPT_HOST_NVRAM_INVALID)) {
 			/* Set the factor from nvram */
 			nfactor = (pspi_data->nvram[id] & MPT_NVRAM_SYNC_MASK) >> 8;
@@ -4438,11 +4449,11 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id)
 	}
 
 	/* Finish iocmd inititialization - hidden or visible disk? */
-	if (ioc->spi_data.pIocPg3) {
+	if (ioc->raid_data.pIocPg3) {
 		/* Search IOC page 3 for matching id
 		 */
-		Ioc3PhysDisk_t *pPDisk =  ioc->spi_data.pIocPg3->PhysDisk;
-		int		numPDisk = ioc->spi_data.pIocPg3->NumPhysDisks;
+		Ioc3PhysDisk_t *pPDisk =  ioc->raid_data.pIocPg3->PhysDisk;
+		int		numPDisk = ioc->raid_data.pIocPg3->NumPhysDisks;
 
 		while (numPDisk) {
 			if (pPDisk->PhysDiskID == id) {
@@ -4466,7 +4477,7 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id)
 	/* RAID Volume ID's may double for a physical device. If RAID but
 	 * not a physical ID as well, skip DV.
 	 */
-	if ((hd->ioc->spi_data.isRaid & (1 << id)) && !(iocmd.flags & MPT_ICFLAG_PHYS_DISK))
+	if ((hd->ioc->raid_data.isRaid & (1 << id)) && !(iocmd.flags & MPT_ICFLAG_PHYS_DISK))
 		goto target_done;
 
 
@@ -4815,6 +4826,8 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id)
 					notDone = 0;
 					if (iocmd.flags & MPT_ICFLAG_ECHO) {
 						bufsize =  ((pbuf1[2] & 0x1F) <<8) | pbuf1[3];
+						if (pbuf1[0] & 0x01)
+							iocmd.flags |= MPT_ICFLAG_EBOS;
 					} else {
 						bufsize =  pbuf1[1]<<16 | pbuf1[2]<<8 | pbuf1[3];
 					}
@@ -4911,6 +4924,9 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id)
 		}
 		iocmd.flags &= ~MPT_ICFLAG_DID_RESET;
 
+		if (iocmd.flags & MPT_ICFLAG_EBOS)
+			goto skip_Reserve;
+
 		repeat = 5;
 		while (repeat && (!(iocmd.flags & MPT_ICFLAG_RESERVED))) {
 			iocmd.cmd = RESERVE;
@@ -4954,6 +4970,7 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id)
 			}
 		}
 
+skip_Reserve:
 		mptscsih_fillbuf(pbuf1, sz, patt, 1);
 		iocmd.cmd = WRITE_BUFFER;
 		iocmd.data_dma = buf1_dma;
@@ -5198,11 +5215,12 @@ mptscsih_dv_parms(MPT_SCSI_HOST *hd, DVPARAMETERS *dv,void *pPage)
 		 * If not an LVD bus, the adapter minSyncFactor has been
 		 * already throttled back.
 		 */
+		negoFlags = hd->ioc->spi_data.noQas;
 		if ((hd->Targets)&&((pTarget = hd->Targets[(int)id]) != NULL) && !pTarget->raidVolume) {
 			width = pTarget->maxWidth;
 			offset = pTarget->maxOffset;
 			factor = pTarget->minSyncFactor;
-			negoFlags = pTarget->negoFlags;
+			negoFlags |= pTarget->negoFlags;
 		} else {
 			if (hd->ioc->spi_data.nvram && (hd->ioc->spi_data.nvram[id] != MPT_HOST_NVRAM_INVALID)) {
 				data = hd->ioc->spi_data.nvram[id];
@@ -5223,7 +5241,6 @@ mptscsih_dv_parms(MPT_SCSI_HOST *hd, DVPARAMETERS *dv,void *pPage)
 			}
 
 			/* Set the negotiation flags */
-			negoFlags = hd->ioc->spi_data.noQas;
 			if (!width)
 				negoFlags |= MPT_TARGET_NO_NEGO_WIDE;
 
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index 51c0255ac16..971fda4b8b5 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -1,5 +1,5 @@
 /*
- *  linux/drivers/message/fusion/mptscsi.h
+ *  linux/drivers/message/fusion/mptscsih.h
  *      High performance SCSI / Fibre Channel SCSI Host device driver.
  *      For use with PCI chip/adapter(s):
  *          LSIFC9xx/LSI409xx Fibre Channel
@@ -53,8 +53,8 @@
  *	SCSI Public stuff...
  */
 
-#define MPT_SCSI_CMD_PER_DEV_HIGH	31
-#define MPT_SCSI_CMD_PER_DEV_LOW	7
+#define MPT_SCSI_CMD_PER_DEV_HIGH	64
+#define MPT_SCSI_CMD_PER_DEV_LOW	32
 
 #define MPT_SCSI_CMD_PER_LUN		7
 
@@ -77,6 +77,7 @@
 #define MPTSCSIH_MAX_WIDTH              1
 #define MPTSCSIH_MIN_SYNC               0x08
 #define MPTSCSIH_SAF_TE                 0
+#define MPTSCSIH_PT_CLEAR               0
 
 
 #endif
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 587d1274fd7..5c0e307d1d5 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -199,7 +199,7 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		printk(MYIOC_s_WARN_FMT
 			"Skipping ioc=%p because SCSI Initiator mode is NOT enabled!\n",
 			ioc->name, ioc);
-		return -ENODEV;
+		return 0;
 	}
 
 	sh = scsi_host_alloc(&mptspi_driver_template, sizeof(MPT_SCSI_HOST));
diff --git a/drivers/message/i2o/config-osm.c b/drivers/message/i2o/config-osm.c
index af32ab4e90c..10432f66520 100644
--- a/drivers/message/i2o/config-osm.c
+++ b/drivers/message/i2o/config-osm.c
@@ -56,8 +56,11 @@ static int __init i2o_config_init(void)
 		return -EBUSY;
 	}
 #ifdef CONFIG_I2O_CONFIG_OLD_IOCTL
-	if (i2o_config_old_init())
+	if (i2o_config_old_init()) {
+		osm_err("old config handler initialization failed\n");
 		i2o_driver_unregister(&i2o_config_driver);
+		return -EBUSY;
+	}
 #endif
 
 	return 0;
diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index a851d65c7cf..a260f83bcb0 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c
@@ -48,8 +48,8 @@ struct ucb1x00_ts {
 	u16			x_res;
 	u16			y_res;
 
-	int			restart:1;
-	int			adcsync:1;
+	unsigned int		restart:1;
+	unsigned int		adcsync:1;
 };
 
 static int adcsync;
diff --git a/drivers/mtd/devices/docecc.c b/drivers/mtd/devices/docecc.c
index 9a087c1fb0b..24f670b5a4f 100644
--- a/drivers/mtd/devices/docecc.c
+++ b/drivers/mtd/devices/docecc.c
@@ -40,7 +40,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/doc2000.h>
 
-#define DEBUG 0
+#define DEBUG_ECC 0
 /* need to undef it (from asm/termbits.h) */
 #undef B0
 
@@ -249,7 +249,7 @@ eras_dec_rs(dtype Alpha_to[NN + 1], dtype Index_of[NN + 1],
 	  lambda[j] ^= Alpha_to[modnn(u + tmp)];
       }
     }
-#if DEBUG >= 1
+#if DEBUG_ECC >= 1
     /* Test code that verifies the erasure locator polynomial just constructed
        Needed only for decoder debugging. */
     
@@ -276,7 +276,7 @@ eras_dec_rs(dtype Alpha_to[NN + 1], dtype Index_of[NN + 1],
       count = -1;
       goto finish;
     }
-#if DEBUG >= 2
+#if DEBUG_ECC >= 2
     printf("\n Erasure positions as determined by roots of Eras Loc Poly:\n");
     for (i = 0; i < count; i++)
       printf("%d ", loc[i]);
@@ -409,7 +409,7 @@ eras_dec_rs(dtype Alpha_to[NN + 1], dtype Index_of[NN + 1],
 	den ^= Alpha_to[modnn(lambda[i+1] + i * root[j])];
     }
     if (den == 0) {
-#if DEBUG >= 1
+#if DEBUG_ECC >= 1
       printf("\n ERROR: denominator = 0\n");
 #endif
       /* Convert to dual- basis */
diff --git a/drivers/net/8390.c b/drivers/net/8390.c
index 6d76f3a99b1..f8702742008 100644
--- a/drivers/net/8390.c
+++ b/drivers/net/8390.c
@@ -1094,7 +1094,7 @@ static void NS8390_trigger_send(struct net_device *dev, unsigned int length,
    
 	outb_p(E8390_NODMA+E8390_PAGE0, e8390_base+E8390_CMD);
     
-	if (inb_p(e8390_base) & E8390_TRANS) 
+	if (inb_p(e8390_base + E8390_CMD) & E8390_TRANS) 
 	{
 		printk(KERN_WARNING "%s: trigger_send() called with the transmitter busy.\n",
 			dev->name);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 90449a0f2a6..6d00c3de1a8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1653,7 +1653,8 @@ static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_de
 	int old_features = bond_dev->features;
 	int res = 0;
 
-	if (slave_dev->do_ioctl == NULL) {
+	if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL &&
+		slave_dev->do_ioctl == NULL) {
 		printk(KERN_WARNING DRV_NAME
 		       ": Warning : no link monitoring support for %s\n",
 		       slave_dev->name);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index f0471d102e3..f9223c1c5aa 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -100,11 +100,11 @@ VERSION 2.2LK	<2005/01/25>
 
 #ifdef CONFIG_R8169_NAPI
 #define rtl8169_rx_skb			netif_receive_skb
-#define rtl8169_rx_hwaccel_skb		vlan_hwaccel_rx
+#define rtl8169_rx_hwaccel_skb		vlan_hwaccel_receive_skb
 #define rtl8169_rx_quota(count, quota)	min(count, quota)
 #else
 #define rtl8169_rx_skb			netif_rx
-#define rtl8169_rx_hwaccel_skb		vlan_hwaccel_receive_skb
+#define rtl8169_rx_hwaccel_skb		vlan_hwaccel_rx
 #define rtl8169_rx_quota(count, quota)	count
 #endif
 
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 0208258e782..fd398da4993 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -42,7 +42,7 @@
 #include "skge.h"
 
 #define DRV_NAME		"skge"
-#define DRV_VERSION		"1.0"
+#define DRV_VERSION		"1.1"
 #define PFX			DRV_NAME " "
 
 #define DEFAULT_TX_RING_SIZE	128
@@ -105,41 +105,28 @@ static const u32 rxirqmask[] = { IS_R1_F, IS_R2_F };
 static const u32 txirqmask[] = { IS_XA1_F, IS_XA2_F };
 static const u32 portirqmask[] = { IS_PORT_1, IS_PORT_2 };
 
-/* Don't need to look at whole 16K.
- * last interesting register is descriptor poll timer.
- */
-#define SKGE_REGS_LEN	(29*128)
-
 static int skge_get_regs_len(struct net_device *dev)
 {
-	return SKGE_REGS_LEN;
+	return 0x4000;
 }
 
 /*
- * Returns copy of control register region
- * I/O region is divided into banks and certain regions are unreadable
+ * Returns copy of whole control register region
+ * Note: skip RAM address register because accessing it will
+ * 	 cause bus hangs!
  */
 static void skge_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 			  void *p)
 {
 	const struct skge_port *skge = netdev_priv(dev);
-	unsigned long offs;
 	const void __iomem *io = skge->hw->regs;
-	static const unsigned long bankmap
-		= (1<<0) | (1<<2) | (1<<8) | (1<<9)
-		  | (1<<12) | (1<<13) | (1<<14) | (1<<15) | (1<<16)
-		  | (1<<17) | (1<<20) | (1<<21) | (1<<22) | (1<<23)
-		  | (1<<24)  | (1<<25) | (1<<26) | (1<<27) | (1<<28);
 
 	regs->version = 1;
-	for (offs = 0; offs < regs->len; offs += 128) {
-		u32 len = min_t(u32, 128, regs->len - offs);
+	memset(p, 0, regs->len);
+	memcpy_fromio(p, io, B3_RAM_ADDR);
 
-		if (bankmap & (1<<(offs/128)))
-			memcpy_fromio(p + offs, io + offs, len);
-		else
-			memset(p + offs, 0, len);
-	}
+	memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1,
+		      regs->len - B3_RI_WTO_R1);
 }
 
 /* Wake on Lan only supported on Yukon chps with rev 1 or above */
@@ -775,17 +762,6 @@ static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u64 base)
 	return 0;
 }
 
-static struct sk_buff *skge_rx_alloc(struct net_device *dev, unsigned int size)
-{
-	struct sk_buff *skb = dev_alloc_skb(size);
-
-	if (likely(skb)) {
-		skb->dev = dev;
-		skb_reserve(skb, NET_IP_ALIGN);
-	}
-	return skb;
-}
-
 /* Allocate and setup a new buffer for receiving */
 static void skge_rx_setup(struct skge_port *skge, struct skge_element *e,
 			  struct sk_buff *skb, unsigned int bufsize)
@@ -858,16 +834,17 @@ static int skge_rx_fill(struct skge_port *skge)
 {
 	struct skge_ring *ring = &skge->rx_ring;
 	struct skge_element *e;
-	unsigned int bufsize = skge->rx_buf_size;
 
 	e = ring->start;
 	do {
-		struct sk_buff *skb = skge_rx_alloc(skge->netdev, bufsize);
+		struct sk_buff *skb;
 
+		skb = dev_alloc_skb(skge->rx_buf_size + NET_IP_ALIGN);
 		if (!skb)
 			return -ENOMEM;
 
-		skge_rx_setup(skge, e, skb, bufsize);
+		skb_reserve(skb, NET_IP_ALIGN);
+		skge_rx_setup(skge, e, skb, skge->rx_buf_size);
 	} while ( (e = e->next) != ring->start);
 
 	ring->to_clean = ring->start;
@@ -1666,6 +1643,22 @@ static void yukon_reset(struct skge_hw *hw, int port)
 			 | GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA);
 }
 
+/* Apparently, early versions of Yukon-Lite had wrong chip_id? */
+static int is_yukon_lite_a0(struct skge_hw *hw)
+{
+	u32 reg;
+	int ret;
+
+	if (hw->chip_id != CHIP_ID_YUKON)
+		return 0;
+
+	reg = skge_read32(hw, B2_FAR);
+	skge_write8(hw, B2_FAR + 3, 0xff);
+	ret = (skge_read8(hw, B2_FAR + 3) != 0);
+	skge_write32(hw, B2_FAR, reg);
+	return ret;
+}
+
 static void yukon_mac_init(struct skge_hw *hw, int port)
 {
 	struct skge_port *skge = netdev_priv(hw->dev[port]);
@@ -1781,9 +1774,11 @@ static void yukon_mac_init(struct skge_hw *hw, int port)
 	/* Configure Rx MAC FIFO */
 	skge_write16(hw, SK_REG(port, RX_GMF_FL_MSK), RX_FF_FL_DEF_MSK);
 	reg = GMF_OPER_ON | GMF_RX_F_FL_ON;
-	if (hw->chip_id == CHIP_ID_YUKON_LITE &&
-	    hw->chip_rev >= CHIP_REV_YU_LITE_A3)
+
+	/* disable Rx GMAC FIFO Flush for YUKON-Lite Rev. A0 only */
+	if (is_yukon_lite_a0(hw))
 		reg &= ~GMF_RX_F_FL_ON;
+
 	skge_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_CLR);
 	skge_write16(hw, SK_REG(port, RX_GMF_CTRL_T), reg);
 	/*
@@ -2442,6 +2437,14 @@ static void yukon_set_multicast(struct net_device *dev)
 	gma_write16(hw, port, GM_RX_CTRL, reg);
 }
 
+static inline u16 phy_length(const struct skge_hw *hw, u32 status)
+{
+	if (hw->chip_id == CHIP_ID_GENESIS)
+		return status >> XMR_FS_LEN_SHIFT;
+	else
+		return status >> GMR_FS_LEN_SHIFT;
+}
+
 static inline int bad_phy_status(const struct skge_hw *hw, u32 status)
 {
 	if (hw->chip_id == CHIP_ID_GENESIS)
@@ -2451,80 +2454,99 @@ static inline int bad_phy_status(const struct skge_hw *hw, u32 status)
 			(status & GMR_FS_RX_OK) == 0;
 }
 
-static void skge_rx_error(struct skge_port *skge, int slot,
-			  u32 control, u32 status)
-{
-	if (netif_msg_rx_err(skge))
-		printk(KERN_DEBUG PFX "%s: rx err, slot %d control 0x%x status 0x%x\n",
-		       skge->netdev->name, slot, control, status);
-
-	if ((control & (BMU_EOF|BMU_STF)) != (BMU_STF|BMU_EOF))
-		skge->net_stats.rx_length_errors++;
-	else if (skge->hw->chip_id == CHIP_ID_GENESIS) {
-		if (status & (XMR_FS_RUNT|XMR_FS_LNG_ERR))
-			skge->net_stats.rx_length_errors++;
-		if (status & XMR_FS_FRA_ERR)
-			skge->net_stats.rx_frame_errors++;
-		if (status & XMR_FS_FCS_ERR)
-			skge->net_stats.rx_crc_errors++;
-	} else {
-		if (status & (GMR_FS_LONG_ERR|GMR_FS_UN_SIZE))
-			skge->net_stats.rx_length_errors++;
-		if (status & GMR_FS_FRAGMENT)
-			skge->net_stats.rx_frame_errors++;
-		if (status & GMR_FS_CRC_ERR)
-			skge->net_stats.rx_crc_errors++;
-	}
-}
 
 /* Get receive buffer from descriptor.
  * Handles copy of small buffers and reallocation failures
  */
 static inline struct sk_buff *skge_rx_get(struct skge_port *skge,
 					  struct skge_element *e,
-					  unsigned int len)
+					  u32 control, u32 status, u16 csum)
 {
-	struct sk_buff *nskb, *skb;
+	struct sk_buff *skb;
+	u16 len = control & BMU_BBC;
+
+	if (unlikely(netif_msg_rx_status(skge)))
+		printk(KERN_DEBUG PFX "%s: rx slot %td status 0x%x len %d\n",
+		       skge->netdev->name, e - skge->rx_ring.start,
+		       status, len);
+
+	if (len > skge->rx_buf_size)
+		goto error;
+
+	if ((control & (BMU_EOF|BMU_STF)) != (BMU_STF|BMU_EOF))
+		goto error;
+
+	if (bad_phy_status(skge->hw, status))
+		goto error;
+
+	if (phy_length(skge->hw, status) != len)
+		goto error;
 
 	if (len < RX_COPY_THRESHOLD) {
-		nskb = skge_rx_alloc(skge->netdev, len + NET_IP_ALIGN);
-		if (unlikely(!nskb))
-			return NULL;
+		skb = dev_alloc_skb(len + 2);
+		if (!skb)
+			goto resubmit;
 
+		skb_reserve(skb, 2);
 		pci_dma_sync_single_for_cpu(skge->hw->pdev,
 					    pci_unmap_addr(e, mapaddr),
 					    len, PCI_DMA_FROMDEVICE);
-		memcpy(nskb->data, e->skb->data, len);
+		memcpy(skb->data, e->skb->data, len);
 		pci_dma_sync_single_for_device(skge->hw->pdev,
 					       pci_unmap_addr(e, mapaddr),
 					       len, PCI_DMA_FROMDEVICE);
-
-		if (skge->rx_csum) {
-			struct skge_rx_desc *rd = e->desc;
-			nskb->csum = le16_to_cpu(rd->csum2);
-			nskb->ip_summed = CHECKSUM_HW;
-		}
 		skge_rx_reuse(e, skge->rx_buf_size);
-		return nskb;
 	} else {
-		nskb = skge_rx_alloc(skge->netdev, skge->rx_buf_size);
-		if (unlikely(!nskb))
-			return NULL;
+		struct sk_buff *nskb;
+		nskb = dev_alloc_skb(skge->rx_buf_size + NET_IP_ALIGN);
+		if (!nskb)
+			goto resubmit;
 
 		pci_unmap_single(skge->hw->pdev,
 				 pci_unmap_addr(e, mapaddr),
 				 pci_unmap_len(e, maplen),
 				 PCI_DMA_FROMDEVICE);
 		skb = e->skb;
-		if (skge->rx_csum) {
-			struct skge_rx_desc *rd = e->desc;
-			skb->csum = le16_to_cpu(rd->csum2);
-			skb->ip_summed = CHECKSUM_HW;
-		}
-
+  		prefetch(skb->data);
 		skge_rx_setup(skge, e, nskb, skge->rx_buf_size);
-		return skb;
 	}
+
+	skb_put(skb, len);
+	skb->dev = skge->netdev;
+	if (skge->rx_csum) {
+		skb->csum = csum;
+		skb->ip_summed = CHECKSUM_HW;
+	}
+
+	skb->protocol = eth_type_trans(skb, skge->netdev);
+
+	return skb;
+error:
+
+	if (netif_msg_rx_err(skge))
+		printk(KERN_DEBUG PFX "%s: rx err, slot %td control 0x%x status 0x%x\n",
+		       skge->netdev->name, e - skge->rx_ring.start,
+		       control, status);
+
+	if (skge->hw->chip_id == CHIP_ID_GENESIS) {
+		if (status & (XMR_FS_RUNT|XMR_FS_LNG_ERR))
+			skge->net_stats.rx_length_errors++;
+		if (status & XMR_FS_FRA_ERR)
+			skge->net_stats.rx_frame_errors++;
+		if (status & XMR_FS_FCS_ERR)
+			skge->net_stats.rx_crc_errors++;
+	} else {
+		if (status & (GMR_FS_LONG_ERR|GMR_FS_UN_SIZE))
+			skge->net_stats.rx_length_errors++;
+		if (status & GMR_FS_FRAGMENT)
+			skge->net_stats.rx_frame_errors++;
+		if (status & GMR_FS_CRC_ERR)
+			skge->net_stats.rx_crc_errors++;
+	}
+
+resubmit:
+	skge_rx_reuse(e, skge->rx_buf_size);
+	return NULL;
 }
 
 
@@ -2540,32 +2562,16 @@ static int skge_poll(struct net_device *dev, int *budget)
 	for (e = ring->to_clean; work_done < to_do; e = e->next) {
 		struct skge_rx_desc *rd = e->desc;
 		struct sk_buff *skb;
-		u32 control, len, status;
+		u32 control;
 
 		rmb();
 		control = rd->control;
 		if (control & BMU_OWN)
 			break;
 
-		len = control & BMU_BBC;
-		status = rd->status;
-
-		if (unlikely((control & (BMU_EOF|BMU_STF)) != (BMU_STF|BMU_EOF)
-			     || bad_phy_status(hw, status))) {
-			skge_rx_error(skge, e - ring->start, control, status);
-			skge_rx_reuse(e, skge->rx_buf_size);
-			continue;
-		}
-
-		if (netif_msg_rx_status(skge))
-		    printk(KERN_DEBUG PFX "%s: rx slot %td status 0x%x len %d\n",
-			   dev->name, e - ring->start, rd->status, len);
-
-		skb = skge_rx_get(skge, e, len);
+ 		skb = skge_rx_get(skge, e, control, rd->status,
+ 				  le16_to_cpu(rd->csum2));
 		if (likely(skb)) {
-			skb_put(skb, len);
-			skb->protocol = eth_type_trans(skb, dev);
-
 			dev->last_rx = jiffies;
 			netif_receive_skb(skb);
 
diff --git a/drivers/net/skge.h b/drivers/net/skge.h
index efbf98c675d..72c175b87a5 100644
--- a/drivers/net/skge.h
+++ b/drivers/net/skge.h
@@ -953,6 +953,7 @@ enum {
  */
 enum {
 	XMR_FS_LEN	= 0x3fff<<18,	/* Bit 31..18:	Rx Frame Length */
+	XMR_FS_LEN_SHIFT = 18,
 	XMR_FS_2L_VLAN	= 1<<17, /* Bit 17:	tagged wh 2Lev VLAN ID*/
 	XMR_FS_1_VLAN	= 1<<16, /* Bit 16:	tagged wh 1ev VLAN ID*/
 	XMR_FS_BC	= 1<<15, /* Bit 15:	Broadcast Frame */
@@ -1868,6 +1869,7 @@ enum {
 /* Receive Frame Status Encoding */
 enum {
 	GMR_FS_LEN	= 0xffff<<16, /* Bit 31..16:	Rx Frame Length */
+	GMR_FS_LEN_SHIFT = 16,
 	GMR_FS_VLAN	= 1<<13, /* Bit 13:	VLAN Packet */
 	GMR_FS_JABBER	= 1<<12, /* Bit 12:	Jabber Packet */
 	GMR_FS_UN_SIZE	= 1<<11, /* Bit 11:	Undersize Packet */
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index 48c03c11cd9..a01efa6d5c6 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -72,7 +72,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
 	}
 	skb_reserve(skb, 4);
 	cisco_hard_header(skb, dev, CISCO_KEEPALIVE, NULL, NULL, 0);
-	data = (cisco_packet*)skb->data;
+	data = (cisco_packet*)(skb->data + 4);
 
 	data->type = htonl(type);
 	data->par1 = htonl(par1);
diff --git a/drivers/pci/hotplug.c b/drivers/pci/hotplug.c
index 10444988a10..e1743be3190 100644
--- a/drivers/pci/hotplug.c
+++ b/drivers/pci/hotplug.c
@@ -7,7 +7,6 @@ int pci_hotplug (struct device *dev, char **envp, int num_envp,
 		 char *buffer, int buffer_size)
 {
 	struct pci_dev *pdev;
-	char *scratch;
 	int i = 0;
 	int length = 0;
 
@@ -18,9 +17,6 @@ int pci_hotplug (struct device *dev, char **envp, int num_envp,
 	if (!pdev)
 		return -ENODEV;
 
-	scratch = buffer;
-
-
 	if (add_hotplug_env_var(envp, num_envp, &i,
 				buffer, buffer_size, &length,
 				"PCI_CLASS=%04X", pdev->class))
diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c
index 752e6513c44..db69be85b45 100644
--- a/drivers/pci/hotplug/rpadlpar_sysfs.c
+++ b/drivers/pci/hotplug/rpadlpar_sysfs.c
@@ -62,7 +62,7 @@ static ssize_t add_slot_store(struct dlpar_io_attr *dlpar_attr,
 	char drc_name[MAX_DRC_NAME_LEN];
 	char *end;
 
-	if (nbytes > MAX_DRC_NAME_LEN)
+	if (nbytes >= MAX_DRC_NAME_LEN)
 		return 0;
 
 	memcpy(drc_name, buf, nbytes);
@@ -83,7 +83,7 @@ static ssize_t remove_slot_store(struct dlpar_io_attr *dlpar_attr,
 	char drc_name[MAX_DRC_NAME_LEN];
 	char *end;
 
-	if (nbytes > MAX_DRC_NAME_LEN)
+	if (nbytes >= MAX_DRC_NAME_LEN)
 		return 0;
 
 	memcpy(drc_name, buf, nbytes);
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index b1409441c1c..a32ae82e592 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -159,7 +159,7 @@ static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot,
 
 	pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus);
 
-	slot = kcalloc(1, sizeof(*slot), GFP_KERNEL);
+	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
 	if (!slot)
 		return -ENOMEM;
 	bss_hotplug_slot->private = slot;
@@ -491,7 +491,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
 		if (sn_pci_slot_valid(pci_bus, device) != 1)
 			continue;
 
-		bss_hotplug_slot = kcalloc(1, sizeof(*bss_hotplug_slot),
+		bss_hotplug_slot = kzalloc(sizeof(*bss_hotplug_slot),
 					   GFP_KERNEL);
 		if (!bss_hotplug_slot) {
 			rc = -ENOMEM;
@@ -499,7 +499,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
 		}
 
 		bss_hotplug_slot->info =
-			kcalloc(1, sizeof(struct hotplug_slot_info),
+			kzalloc(sizeof(struct hotplug_slot_info),
 				GFP_KERNEL);
 		if (!bss_hotplug_slot->info) {
 			rc = -ENOMEM;
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 56a3b397efe..2898830c496 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -360,7 +360,7 @@ pci_create_resource_files(struct pci_dev *pdev)
 			continue;
 
 		/* allocate attribute structure, piggyback attribute name */
-		res_attr = kcalloc(1, sizeof(*res_attr) + 10, GFP_ATOMIC);
+		res_attr = kzalloc(sizeof(*res_attr) + 10, GFP_ATOMIC);
 		if (res_attr) {
 			char *res_attr_name = (char *)(res_attr + 1);
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 26a55d08b50..005786416bb 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -165,7 +165,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
 		if (l == 0xffffffff)
 			l = 0;
 		if ((l & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY) {
-			sz = pci_size(l, sz, PCI_BASE_ADDRESS_MEM_MASK);
+			sz = pci_size(l, sz, (u32)PCI_BASE_ADDRESS_MEM_MASK);
 			if (!sz)
 				continue;
 			res->start = l & PCI_BASE_ADDRESS_MEM_MASK;
@@ -215,7 +215,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
 		if (l == 0xffffffff)
 			l = 0;
 		if (sz && sz != 0xffffffff) {
-			sz = pci_size(l, sz, PCI_ROM_ADDRESS_MASK);
+			sz = pci_size(l, sz, (u32)PCI_ROM_ADDRESS_MASK);
 			if (sz) {
 				res->flags = (l & IORESOURCE_ROM_ENABLE) |
 				  IORESOURCE_MEM | IORESOURCE_PREFETCH |
@@ -402,6 +402,12 @@ static void pci_enable_crs(struct pci_dev *dev)
 static void __devinit pci_fixup_parent_subordinate_busnr(struct pci_bus *child, int max)
 {
 	struct pci_bus *parent = child->parent;
+
+	/* Attempts to fix that up are really dangerous unless
+	   we're going to re-assign all bus numbers. */
+	if (!pcibios_assign_all_busses())
+		return;
+
 	while (parent->parent && parent->subordinate < max) {
 		parent->subordinate = max;
 		pci_write_config_byte(parent->self, PCI_SUBORDINATE_BUS, max);
@@ -478,8 +484,18 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max
 		 * We need to assign a number to this bus which we always
 		 * do in the second pass.
 		 */
-		if (!pass)
+		if (!pass) {
+			if (pcibios_assign_all_busses())
+				/* Temporarily disable forwarding of the
+				   configuration cycles on all bridges in
+				   this bus segment to avoid possible
+				   conflicts in the second pass between two
+				   bridges programmed with overlapping
+				   bus ranges. */
+				pci_write_config_dword(dev, PCI_PRIMARY_BUS,
+						       buses & ~0xffffff);
 			return max;
+		}
 
 		/* Clear errors */
 		pci_write_config_word(dev, PCI_STATUS, 0xffff);
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index 91ea8e4777f..dbb3eb0e330 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -437,7 +437,7 @@ __ccwgroup_get_gdev_by_cdev(struct ccw_device *cdev)
 	if (cdev->dev.driver_data) {
 		gdev = (struct ccwgroup_device *)cdev->dev.driver_data;
 		if (get_device(&gdev->dev)) {
-			if (klist_node_attached(&gdev->dev.knode_bus))
+			if (device_is_registered(&gdev->dev))
 				return gdev;
 			put_device(&gdev->dev);
 		}
diff --git a/drivers/s390/scsi/Makefile b/drivers/s390/scsi/Makefile
index fc145307a7d..d6a78f1a2f1 100644
--- a/drivers/s390/scsi/Makefile
+++ b/drivers/s390/scsi/Makefile
@@ -3,7 +3,7 @@
 #
 
 zfcp-objs := zfcp_aux.o zfcp_ccw.o zfcp_scsi.o zfcp_erp.o zfcp_qdio.o \
-	     zfcp_fsf.o zfcp_sysfs_adapter.o zfcp_sysfs_port.o \
+	     zfcp_fsf.o zfcp_dbf.o zfcp_sysfs_adapter.o zfcp_sysfs_port.o \
 	     zfcp_sysfs_unit.o zfcp_sysfs_driver.o
 
 obj-$(CONFIG_ZFCP) += zfcp.o
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index bfe3ba73bc0..0b5087f7cab 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -122,95 +122,6 @@ _zfcp_hex_dump(char *addr, int count)
 
 #define ZFCP_LOG_AREA			ZFCP_LOG_AREA_OTHER
 
-static inline int
-zfcp_fsf_req_is_scsi_cmnd(struct zfcp_fsf_req *fsf_req)
-{
-	return ((fsf_req->fsf_command == FSF_QTCB_FCP_CMND) &&
-		!(fsf_req->status & ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT));
-}
-
-void
-zfcp_cmd_dbf_event_fsf(const char *text, struct zfcp_fsf_req *fsf_req,
-		       void *add_data, int add_length)
-{
-	struct zfcp_adapter *adapter = fsf_req->adapter;
-	struct scsi_cmnd *scsi_cmnd;
-	int level = 3;
-	int i;
-	unsigned long flags;
-
-	spin_lock_irqsave(&adapter->dbf_lock, flags);
-	if (zfcp_fsf_req_is_scsi_cmnd(fsf_req)) {
-		scsi_cmnd = fsf_req->data.send_fcp_command_task.scsi_cmnd;
-		debug_text_event(adapter->cmd_dbf, level, "fsferror");
-		debug_text_event(adapter->cmd_dbf, level, text);
-		debug_event(adapter->cmd_dbf, level, &fsf_req,
-			    sizeof (unsigned long));
-		debug_event(adapter->cmd_dbf, level, &fsf_req->seq_no,
-			    sizeof (u32));
-		debug_event(adapter->cmd_dbf, level, &scsi_cmnd,
-			    sizeof (unsigned long));
-		debug_event(adapter->cmd_dbf, level, &scsi_cmnd->cmnd,
-			    min(ZFCP_CMD_DBF_LENGTH, (int)scsi_cmnd->cmd_len));
-		for (i = 0; i < add_length; i += ZFCP_CMD_DBF_LENGTH)
-			debug_event(adapter->cmd_dbf,
-				    level,
-				    (char *) add_data + i,
-				    min(ZFCP_CMD_DBF_LENGTH, add_length - i));
-	}
-	spin_unlock_irqrestore(&adapter->dbf_lock, flags);
-}
-
-/* XXX additionally log unit if available */
-/* ---> introduce new parameter for unit, see 2.4 code */
-void
-zfcp_cmd_dbf_event_scsi(const char *text, struct scsi_cmnd *scsi_cmnd)
-{
-	struct zfcp_adapter *adapter;
-	union zfcp_req_data *req_data;
-	struct zfcp_fsf_req *fsf_req;
-	int level = ((host_byte(scsi_cmnd->result) != 0) ? 1 : 5);
-	unsigned long flags;
-
-	adapter = (struct zfcp_adapter *) scsi_cmnd->device->host->hostdata[0];
-	req_data = (union zfcp_req_data *) scsi_cmnd->host_scribble;
-	fsf_req = (req_data ? req_data->send_fcp_command_task.fsf_req : NULL);
-	spin_lock_irqsave(&adapter->dbf_lock, flags);
-	debug_text_event(adapter->cmd_dbf, level, "hostbyte");
-	debug_text_event(adapter->cmd_dbf, level, text);
-	debug_event(adapter->cmd_dbf, level, &scsi_cmnd->result, sizeof (u32));
-	debug_event(adapter->cmd_dbf, level, &scsi_cmnd,
-		    sizeof (unsigned long));
-	debug_event(adapter->cmd_dbf, level, &scsi_cmnd->cmnd,
-		    min(ZFCP_CMD_DBF_LENGTH, (int)scsi_cmnd->cmd_len));
-	if (likely(fsf_req)) {
-		debug_event(adapter->cmd_dbf, level, &fsf_req,
-			    sizeof (unsigned long));
-		debug_event(adapter->cmd_dbf, level, &fsf_req->seq_no,
-			    sizeof (u32));
-	} else {
-		debug_text_event(adapter->cmd_dbf, level, "");
-		debug_text_event(adapter->cmd_dbf, level, "");
-	}
-	spin_unlock_irqrestore(&adapter->dbf_lock, flags);
-}
-
-void
-zfcp_in_els_dbf_event(struct zfcp_adapter *adapter, const char *text,
-		      struct fsf_status_read_buffer *status_buffer, int length)
-{
-	int level = 1;
-	int i;
-
-	debug_text_event(adapter->in_els_dbf, level, text);
-	debug_event(adapter->in_els_dbf, level, &status_buffer->d_id, 8);
-	for (i = 0; i < length; i += ZFCP_IN_ELS_DBF_LENGTH)
-		debug_event(adapter->in_els_dbf,
-			    level,
-			    (char *) status_buffer->payload + i,
-			    min(ZFCP_IN_ELS_DBF_LENGTH, length - i));
-}
-
 /**
  * zfcp_device_setup - setup function
  * @str: pointer to parameter string
@@ -1017,81 +928,6 @@ zfcp_free_low_mem_buffers(struct zfcp_adapter *adapter)
 		mempool_destroy(adapter->pool.data_gid_pn);
 }
 
-/**
- * zfcp_adapter_debug_register - registers debug feature for an adapter
- * @adapter: pointer to adapter for which debug features should be registered
- * return: -ENOMEM on error, 0 otherwise
- */
-int
-zfcp_adapter_debug_register(struct zfcp_adapter *adapter)
-{
-	char dbf_name[20];
-
-	/* debug feature area which records SCSI command failures (hostbyte) */
-	spin_lock_init(&adapter->dbf_lock);
-
-	sprintf(dbf_name, ZFCP_CMD_DBF_NAME "%s",
-		zfcp_get_busid_by_adapter(adapter));
-	adapter->cmd_dbf = debug_register(dbf_name, ZFCP_CMD_DBF_INDEX,
-					  ZFCP_CMD_DBF_AREAS,
-					  ZFCP_CMD_DBF_LENGTH);
-	debug_register_view(adapter->cmd_dbf, &debug_hex_ascii_view);
-	debug_set_level(adapter->cmd_dbf, ZFCP_CMD_DBF_LEVEL);
-
-	/* debug feature area which records SCSI command aborts */
-	sprintf(dbf_name, ZFCP_ABORT_DBF_NAME "%s",
-		zfcp_get_busid_by_adapter(adapter));
-	adapter->abort_dbf = debug_register(dbf_name, ZFCP_ABORT_DBF_INDEX,
-					    ZFCP_ABORT_DBF_AREAS,
-					    ZFCP_ABORT_DBF_LENGTH);
-	debug_register_view(adapter->abort_dbf, &debug_hex_ascii_view);
-	debug_set_level(adapter->abort_dbf, ZFCP_ABORT_DBF_LEVEL);
-
-	/* debug feature area which records incoming ELS commands */
-	sprintf(dbf_name, ZFCP_IN_ELS_DBF_NAME "%s",
-		zfcp_get_busid_by_adapter(adapter));
-	adapter->in_els_dbf = debug_register(dbf_name, ZFCP_IN_ELS_DBF_INDEX,
-					     ZFCP_IN_ELS_DBF_AREAS,
-					     ZFCP_IN_ELS_DBF_LENGTH);
-	debug_register_view(adapter->in_els_dbf, &debug_hex_ascii_view);
-	debug_set_level(adapter->in_els_dbf, ZFCP_IN_ELS_DBF_LEVEL);
-
-	/* debug feature area which records erp events */
-	sprintf(dbf_name, ZFCP_ERP_DBF_NAME "%s",
-		zfcp_get_busid_by_adapter(adapter));
-	adapter->erp_dbf = debug_register(dbf_name, ZFCP_ERP_DBF_INDEX,
-					  ZFCP_ERP_DBF_AREAS,
-					  ZFCP_ERP_DBF_LENGTH);
-	debug_register_view(adapter->erp_dbf, &debug_hex_ascii_view);
-	debug_set_level(adapter->erp_dbf, ZFCP_ERP_DBF_LEVEL);
-
-	if (!(adapter->cmd_dbf && adapter->abort_dbf &&
-	      adapter->in_els_dbf && adapter->erp_dbf)) {
-		zfcp_adapter_debug_unregister(adapter);
-		return -ENOMEM;
-	}
-
-	return 0;
-
-}
-
-/**
- * zfcp_adapter_debug_unregister - unregisters debug feature for an adapter
- * @adapter: pointer to adapter for which debug features should be unregistered
- */
-void
-zfcp_adapter_debug_unregister(struct zfcp_adapter *adapter)
-{
- 	debug_unregister(adapter->abort_dbf);
- 	debug_unregister(adapter->cmd_dbf);
- 	debug_unregister(adapter->erp_dbf);
- 	debug_unregister(adapter->in_els_dbf);
-	adapter->abort_dbf = NULL;
-	adapter->cmd_dbf = NULL;
-	adapter->erp_dbf = NULL;
-	adapter->in_els_dbf = NULL;
-}
-
 void
 zfcp_dummy_release(struct device *dev)
 {
@@ -1462,10 +1298,6 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
 	/* see FC-FS */
 	no_entries = (fcp_rscn_head->payload_len / 4);
 
-	zfcp_in_els_dbf_event(adapter, "##rscn", status_buffer,
-			      fcp_rscn_head->payload_len);
-
-	debug_text_event(adapter->erp_dbf, 1, "unsol_els_rscn:");
 	for (i = 1; i < no_entries; i++) {
 		/* skip head and start with 1st element */
 		fcp_rscn_element++;
@@ -1497,8 +1329,6 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
 			    (ZFCP_STATUS_PORT_DID_DID, &port->status)) {
 				ZFCP_LOG_INFO("incoming RSCN, trying to open "
 					      "port 0x%016Lx\n", port->wwpn);
-				debug_text_event(adapter->erp_dbf, 1,
-						 "unsol_els_rscnu:");
 				zfcp_erp_port_reopen(port,
 						     ZFCP_STATUS_COMMON_ERP_FAILED);
 				continue;
@@ -1524,8 +1354,6 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter,
 				 */
 				ZFCP_LOG_INFO("incoming RSCN, trying to open "
 					      "port 0x%016Lx\n", port->wwpn);
-				debug_text_event(adapter->erp_dbf, 1,
-						 "unsol_els_rscnk:");
 				zfcp_test_link(port);
 			}
 		}
@@ -1541,8 +1369,6 @@ zfcp_fsf_incoming_els_plogi(struct zfcp_adapter *adapter,
 	struct zfcp_port *port;
 	unsigned long flags;
 
-	zfcp_in_els_dbf_event(adapter, "##plogi", status_buffer, 28);
-
 	read_lock_irqsave(&zfcp_data.config_lock, flags);
 	list_for_each_entry(port, &adapter->port_list_head, list) {
 		if (port->wwpn == (*(wwn_t *) & els_logi->nport_wwn))
@@ -1556,8 +1382,6 @@ zfcp_fsf_incoming_els_plogi(struct zfcp_adapter *adapter,
 			       status_buffer->d_id,
 			       zfcp_get_busid_by_adapter(adapter));
 	} else {
-		debug_text_event(adapter->erp_dbf, 1, "unsol_els_plogi:");
-		debug_event(adapter->erp_dbf, 1, &els_logi->nport_wwn, 8);
 		zfcp_erp_port_forced_reopen(port, 0);
 	}
 }
@@ -1570,8 +1394,6 @@ zfcp_fsf_incoming_els_logo(struct zfcp_adapter *adapter,
 	struct zfcp_port *port;
 	unsigned long flags;
 
-	zfcp_in_els_dbf_event(adapter, "##logo", status_buffer, 16);
-
 	read_lock_irqsave(&zfcp_data.config_lock, flags);
 	list_for_each_entry(port, &adapter->port_list_head, list) {
 		if (port->wwpn == els_logo->nport_wwpn)
@@ -1585,8 +1407,6 @@ zfcp_fsf_incoming_els_logo(struct zfcp_adapter *adapter,
 			       status_buffer->d_id,
 			       zfcp_get_busid_by_adapter(adapter));
 	} else {
-		debug_text_event(adapter->erp_dbf, 1, "unsol_els_logo:");
-		debug_event(adapter->erp_dbf, 1, &els_logo->nport_wwpn, 8);
 		zfcp_erp_port_forced_reopen(port, 0);
 	}
 }
@@ -1595,7 +1415,6 @@ static void
 zfcp_fsf_incoming_els_unknown(struct zfcp_adapter *adapter,
 			      struct fsf_status_read_buffer *status_buffer)
 {
-	zfcp_in_els_dbf_event(adapter, "##undef", status_buffer, 24);
 	ZFCP_LOG_NORMAL("warning: unknown incoming ELS 0x%08x "
 			"for adapter %s\n", *(u32 *) (status_buffer->payload),
 			zfcp_get_busid_by_adapter(adapter));
@@ -1609,10 +1428,11 @@ zfcp_fsf_incoming_els(struct zfcp_fsf_req *fsf_req)
 	u32 els_type;
 	struct zfcp_adapter *adapter;
 
-	status_buffer = fsf_req->data.status_read.buffer;
+	status_buffer = (struct fsf_status_read_buffer *) fsf_req->data;
 	els_type = *(u32 *) (status_buffer->payload);
 	adapter = fsf_req->adapter;
 
+	zfcp_san_dbf_event_incoming_els(fsf_req);
 	if (els_type == LS_PLOGI)
 		zfcp_fsf_incoming_els_plogi(adapter, status_buffer);
 	else if (els_type == LS_LOGO)
diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c
index b30abab77da..0fc46381fc2 100644
--- a/drivers/s390/scsi/zfcp_ccw.c
+++ b/drivers/s390/scsi/zfcp_ccw.c
@@ -202,19 +202,9 @@ static int
 zfcp_ccw_set_offline(struct ccw_device *ccw_device)
 {
 	struct zfcp_adapter *adapter;
-	struct zfcp_port *port;
-	struct fc_rport *rport;
 
 	down(&zfcp_data.config_sema);
 	adapter = dev_get_drvdata(&ccw_device->dev);
-	/* might be racy, but we cannot take config_lock due to the fact that
-	   fc_remote_port_delete might sleep */
-	list_for_each_entry(port, &adapter->port_list_head, list)
-		if (port->rport) {
-			rport = port->rport;
-			port->rport = NULL;
-			fc_remote_port_delete(rport);
-		}
 	zfcp_erp_adapter_shutdown(adapter, 0);
 	zfcp_erp_wait(adapter);
 	zfcp_adapter_scsi_unregister(adapter);
diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c
new file mode 100644
index 00000000000..826fb3b0060
--- /dev/null
+++ b/drivers/s390/scsi/zfcp_dbf.c
@@ -0,0 +1,995 @@
+/*
+ *
+ * linux/drivers/s390/scsi/zfcp_dbf.c
+ *
+ * FCP adapter driver for IBM eServer zSeries
+ *
+ * Debugging facilities
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define ZFCP_DBF_REVISION "$Revision$"
+
+#include <asm/debug.h>
+#include <linux/ctype.h>
+#include "zfcp_ext.h"
+
+static u32 dbfsize = 4;
+
+module_param(dbfsize, uint, 0400);
+MODULE_PARM_DESC(dbfsize,
+		 "number of pages for each debug feature area (default 4)");
+
+#define ZFCP_LOG_AREA			ZFCP_LOG_AREA_OTHER
+
+static inline int
+zfcp_dbf_stck(char *out_buf, const char *label, unsigned long long stck)
+{
+	unsigned long long sec;
+	struct timespec xtime;
+	int len = 0;
+
+	stck -= 0x8126d60e46000000LL - (0x3c26700LL * 1000000 * 4096);
+	sec = stck >> 12;
+	do_div(sec, 1000000);
+	xtime.tv_sec = sec;
+	stck -= (sec * 1000000) << 12;
+	xtime.tv_nsec = ((stck * 1000) >> 12);
+	len += sprintf(out_buf + len, "%-24s%011lu:%06lu\n",
+		       label, xtime.tv_sec, xtime.tv_nsec);
+
+	return len;
+}
+
+static int zfcp_dbf_tag(char *out_buf, const char *label, const char *tag)
+{
+	int len = 0, i;
+
+	len += sprintf(out_buf + len, "%-24s", label);
+	for (i = 0; i < ZFCP_DBF_TAG_SIZE; i++)
+		len += sprintf(out_buf + len, "%c", tag[i]);
+	len += sprintf(out_buf + len, "\n");
+
+	return len;
+}
+
+static int
+zfcp_dbf_view(char *out_buf, const char *label, const char *format, ...)
+{
+	va_list arg;
+	int len = 0;
+
+	len += sprintf(out_buf + len, "%-24s", label);
+	va_start(arg, format);
+	len += vsprintf(out_buf + len, format, arg);
+	va_end(arg);
+	len += sprintf(out_buf + len, "\n");
+
+	return len;
+}
+
+static int
+zfcp_dbf_view_dump(char *out_buf, const char *label,
+		   char *buffer, int buflen, int offset, int total_size)
+{
+	int len = 0;
+
+	if (offset == 0)
+		len += sprintf(out_buf + len, "%-24s  ", label);
+
+	while (buflen--) {
+		if (offset > 0) {
+			if ((offset % 32) == 0)
+				len += sprintf(out_buf + len, "\n%-24c  ", ' ');
+			else if ((offset % 4) == 0)
+				len += sprintf(out_buf + len, " ");
+		}
+		len += sprintf(out_buf + len, "%02x", *buffer++);
+		if (++offset == total_size) {
+			len += sprintf(out_buf + len, "\n");
+			break;
+		}
+	}
+
+	if (total_size == 0)
+		len += sprintf(out_buf + len, "\n");
+
+	return len;
+}
+
+static inline int
+zfcp_dbf_view_header(debug_info_t * id, struct debug_view *view, int area,
+		     debug_entry_t * entry, char *out_buf)
+{
+	struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)DEBUG_DATA(entry);
+	int len = 0;
+
+	if (strncmp(dump->tag, "dump", ZFCP_DBF_TAG_SIZE) != 0) {
+		len += zfcp_dbf_stck(out_buf + len, "timestamp",
+				     entry->id.stck);
+		len += zfcp_dbf_view(out_buf + len, "cpu", "%02i",
+				     entry->id.fields.cpuid);
+	} else {
+		len += zfcp_dbf_view_dump(out_buf + len, NULL,
+					  dump->data,
+					  dump->size,
+					  dump->offset, dump->total_size);
+		if ((dump->offset + dump->size) == dump->total_size)
+			len += sprintf(out_buf + len, "\n");
+	}
+
+	return len;
+}
+
+inline void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *fsf_req)
+{
+	struct zfcp_adapter *adapter = fsf_req->adapter;
+	struct fsf_qtcb *qtcb = fsf_req->qtcb;
+	union fsf_prot_status_qual *prot_status_qual =
+	    &qtcb->prefix.prot_status_qual;
+	union fsf_status_qual *fsf_status_qual = &qtcb->header.fsf_status_qual;
+	struct scsi_cmnd *scsi_cmnd;
+	struct zfcp_port *port;
+	struct zfcp_unit *unit;
+	struct zfcp_send_els *send_els;
+	struct zfcp_hba_dbf_record *rec = &adapter->hba_dbf_buf;
+	struct zfcp_hba_dbf_record_response *response = &rec->type.response;
+	int level;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->hba_dbf_lock, flags);
+	memset(rec, 0, sizeof(struct zfcp_hba_dbf_record));
+	strncpy(rec->tag, "resp", ZFCP_DBF_TAG_SIZE);
+
+	if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) &&
+	    (qtcb->prefix.prot_status != FSF_PROT_FSF_STATUS_PRESENTED)) {
+		strncpy(rec->tag2, "perr", ZFCP_DBF_TAG_SIZE);
+		level = 1;
+	} else if (qtcb->header.fsf_status != FSF_GOOD) {
+		strncpy(rec->tag2, "ferr", ZFCP_DBF_TAG_SIZE);
+		level = 1;
+	} else if ((fsf_req->fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) ||
+		   (fsf_req->fsf_command == FSF_QTCB_OPEN_LUN)) {
+		strncpy(rec->tag2, "open", ZFCP_DBF_TAG_SIZE);
+		level = 4;
+	} else if ((prot_status_qual->doubleword[0] != 0) ||
+		   (prot_status_qual->doubleword[1] != 0) ||
+		   (fsf_status_qual->doubleword[0] != 0) ||
+		   (fsf_status_qual->doubleword[1] != 0)) {
+		strncpy(rec->tag2, "qual", ZFCP_DBF_TAG_SIZE);
+		level = 3;
+	} else {
+		strncpy(rec->tag2, "norm", ZFCP_DBF_TAG_SIZE);
+		level = 6;
+	}
+
+	response->fsf_command = fsf_req->fsf_command;
+	response->fsf_reqid = (unsigned long)fsf_req;
+	response->fsf_seqno = fsf_req->seq_no;
+	response->fsf_issued = fsf_req->issued;
+	response->fsf_prot_status = qtcb->prefix.prot_status;
+	response->fsf_status = qtcb->header.fsf_status;
+	memcpy(response->fsf_prot_status_qual,
+	       prot_status_qual, FSF_PROT_STATUS_QUAL_SIZE);
+	memcpy(response->fsf_status_qual,
+	       fsf_status_qual, FSF_STATUS_QUALIFIER_SIZE);
+	response->fsf_req_status = fsf_req->status;
+	response->sbal_first = fsf_req->sbal_first;
+	response->sbal_curr = fsf_req->sbal_curr;
+	response->sbal_last = fsf_req->sbal_last;
+	response->pool = fsf_req->pool != NULL;
+	response->erp_action = (unsigned long)fsf_req->erp_action;
+
+	switch (fsf_req->fsf_command) {
+	case FSF_QTCB_FCP_CMND:
+		if (fsf_req->status & ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT)
+			break;
+		scsi_cmnd = (struct scsi_cmnd *)fsf_req->data;
+		if (scsi_cmnd != NULL) {
+			response->data.send_fcp.scsi_cmnd
+			    = (unsigned long)scsi_cmnd;
+			response->data.send_fcp.scsi_serial
+			    = scsi_cmnd->serial_number;
+		}
+		break;
+
+	case FSF_QTCB_OPEN_PORT_WITH_DID:
+	case FSF_QTCB_CLOSE_PORT:
+	case FSF_QTCB_CLOSE_PHYSICAL_PORT:
+		port = (struct zfcp_port *)fsf_req->data;
+		response->data.port.wwpn = port->wwpn;
+		response->data.port.d_id = port->d_id;
+		response->data.port.port_handle = qtcb->header.port_handle;
+		break;
+
+	case FSF_QTCB_OPEN_LUN:
+	case FSF_QTCB_CLOSE_LUN:
+		unit = (struct zfcp_unit *)fsf_req->data;
+		port = unit->port;
+		response->data.unit.wwpn = port->wwpn;
+		response->data.unit.fcp_lun = unit->fcp_lun;
+		response->data.unit.port_handle = qtcb->header.port_handle;
+		response->data.unit.lun_handle = qtcb->header.lun_handle;
+		break;
+
+	case FSF_QTCB_SEND_ELS:
+		send_els = (struct zfcp_send_els *)fsf_req->data;
+		response->data.send_els.d_id = qtcb->bottom.support.d_id;
+		response->data.send_els.ls_code = send_els->ls_code >> 24;
+		break;
+
+	case FSF_QTCB_ABORT_FCP_CMND:
+	case FSF_QTCB_SEND_GENERIC:
+	case FSF_QTCB_EXCHANGE_CONFIG_DATA:
+	case FSF_QTCB_EXCHANGE_PORT_DATA:
+	case FSF_QTCB_DOWNLOAD_CONTROL_FILE:
+	case FSF_QTCB_UPLOAD_CONTROL_FILE:
+		break;
+	}
+
+	debug_event(adapter->hba_dbf, level,
+		    rec, sizeof(struct zfcp_hba_dbf_record));
+	spin_unlock_irqrestore(&adapter->hba_dbf_lock, flags);
+}
+
+inline void
+zfcp_hba_dbf_event_fsf_unsol(const char *tag, struct zfcp_adapter *adapter,
+			     struct fsf_status_read_buffer *status_buffer)
+{
+	struct zfcp_hba_dbf_record *rec = &adapter->hba_dbf_buf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->hba_dbf_lock, flags);
+	memset(rec, 0, sizeof(struct zfcp_hba_dbf_record));
+	strncpy(rec->tag, "stat", ZFCP_DBF_TAG_SIZE);
+	strncpy(rec->tag2, tag, ZFCP_DBF_TAG_SIZE);
+
+	rec->type.status.failed = adapter->status_read_failed;
+	if (status_buffer != NULL) {
+		rec->type.status.status_type = status_buffer->status_type;
+		rec->type.status.status_subtype = status_buffer->status_subtype;
+		memcpy(&rec->type.status.queue_designator,
+		       &status_buffer->queue_designator,
+		       sizeof(struct fsf_queue_designator));
+
+		switch (status_buffer->status_type) {
+		case FSF_STATUS_READ_SENSE_DATA_AVAIL:
+			rec->type.status.payload_size =
+			    ZFCP_DBF_UNSOL_PAYLOAD_SENSE_DATA_AVAIL;
+			break;
+
+		case FSF_STATUS_READ_BIT_ERROR_THRESHOLD:
+			rec->type.status.payload_size =
+			    ZFCP_DBF_UNSOL_PAYLOAD_BIT_ERROR_THRESHOLD;
+			break;
+
+		case FSF_STATUS_READ_LINK_DOWN:
+			switch (status_buffer->status_subtype) {
+			case FSF_STATUS_READ_SUB_NO_PHYSICAL_LINK:
+			case FSF_STATUS_READ_SUB_FDISC_FAILED:
+				rec->type.status.payload_size =
+					sizeof(struct fsf_link_down_info);
+			}
+			break;
+
+		case FSF_STATUS_READ_FEATURE_UPDATE_ALERT:
+			rec->type.status.payload_size =
+			    ZFCP_DBF_UNSOL_PAYLOAD_FEATURE_UPDATE_ALERT;
+			break;
+		}
+		memcpy(&rec->type.status.payload,
+		       &status_buffer->payload, rec->type.status.payload_size);
+	}
+
+	debug_event(adapter->hba_dbf, 2,
+		    rec, sizeof(struct zfcp_hba_dbf_record));
+	spin_unlock_irqrestore(&adapter->hba_dbf_lock, flags);
+}
+
+inline void
+zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, unsigned int status,
+			unsigned int qdio_error, unsigned int siga_error,
+			int sbal_index, int sbal_count)
+{
+	struct zfcp_hba_dbf_record *rec = &adapter->hba_dbf_buf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->hba_dbf_lock, flags);
+	memset(rec, 0, sizeof(struct zfcp_hba_dbf_record));
+	strncpy(rec->tag, "qdio", ZFCP_DBF_TAG_SIZE);
+	rec->type.qdio.status = status;
+	rec->type.qdio.qdio_error = qdio_error;
+	rec->type.qdio.siga_error = siga_error;
+	rec->type.qdio.sbal_index = sbal_index;
+	rec->type.qdio.sbal_count = sbal_count;
+	debug_event(adapter->hba_dbf, 0,
+		    rec, sizeof(struct zfcp_hba_dbf_record));
+	spin_unlock_irqrestore(&adapter->hba_dbf_lock, flags);
+}
+
+static inline int
+zfcp_hba_dbf_view_response(char *out_buf,
+			   struct zfcp_hba_dbf_record_response *rec)
+{
+	int len = 0;
+
+	len += zfcp_dbf_view(out_buf + len, "fsf_command", "0x%08x",
+			     rec->fsf_command);
+	len += zfcp_dbf_view(out_buf + len, "fsf_reqid", "0x%0Lx",
+			     rec->fsf_reqid);
+	len += zfcp_dbf_view(out_buf + len, "fsf_seqno", "0x%08x",
+			     rec->fsf_seqno);
+	len += zfcp_dbf_stck(out_buf + len, "fsf_issued", rec->fsf_issued);
+	len += zfcp_dbf_view(out_buf + len, "fsf_prot_status", "0x%08x",
+			     rec->fsf_prot_status);
+	len += zfcp_dbf_view(out_buf + len, "fsf_status", "0x%08x",
+			     rec->fsf_status);
+	len += zfcp_dbf_view_dump(out_buf + len, "fsf_prot_status_qual",
+				  rec->fsf_prot_status_qual,
+				  FSF_PROT_STATUS_QUAL_SIZE,
+				  0, FSF_PROT_STATUS_QUAL_SIZE);
+	len += zfcp_dbf_view_dump(out_buf + len, "fsf_status_qual",
+				  rec->fsf_status_qual,
+				  FSF_STATUS_QUALIFIER_SIZE,
+				  0, FSF_STATUS_QUALIFIER_SIZE);
+	len += zfcp_dbf_view(out_buf + len, "fsf_req_status", "0x%08x",
+			     rec->fsf_req_status);
+	len += zfcp_dbf_view(out_buf + len, "sbal_first", "0x%02x",
+			     rec->sbal_first);
+	len += zfcp_dbf_view(out_buf + len, "sbal_curr", "0x%02x",
+			     rec->sbal_curr);
+	len += zfcp_dbf_view(out_buf + len, "sbal_last", "0x%02x",
+			     rec->sbal_last);
+	len += zfcp_dbf_view(out_buf + len, "pool", "0x%02x", rec->pool);
+
+	switch (rec->fsf_command) {
+	case FSF_QTCB_FCP_CMND:
+		if (rec->fsf_req_status & ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT)
+			break;
+		len += zfcp_dbf_view(out_buf + len, "scsi_cmnd", "0x%0Lx",
+				     rec->data.send_fcp.scsi_cmnd);
+		len += zfcp_dbf_view(out_buf + len, "scsi_serial", "0x%016Lx",
+				     rec->data.send_fcp.scsi_serial);
+		break;
+
+	case FSF_QTCB_OPEN_PORT_WITH_DID:
+	case FSF_QTCB_CLOSE_PORT:
+	case FSF_QTCB_CLOSE_PHYSICAL_PORT:
+		len += zfcp_dbf_view(out_buf + len, "wwpn", "0x%016Lx",
+				     rec->data.port.wwpn);
+		len += zfcp_dbf_view(out_buf + len, "d_id", "0x%06x",
+				     rec->data.port.d_id);
+		len += zfcp_dbf_view(out_buf + len, "port_handle", "0x%08x",
+				     rec->data.port.port_handle);
+		break;
+
+	case FSF_QTCB_OPEN_LUN:
+	case FSF_QTCB_CLOSE_LUN:
+		len += zfcp_dbf_view(out_buf + len, "wwpn", "0x%016Lx",
+				     rec->data.unit.wwpn);
+		len += zfcp_dbf_view(out_buf + len, "fcp_lun", "0x%016Lx",
+				     rec->data.unit.fcp_lun);
+		len += zfcp_dbf_view(out_buf + len, "port_handle", "0x%08x",
+				     rec->data.unit.port_handle);
+		len += zfcp_dbf_view(out_buf + len, "lun_handle", "0x%08x",
+				     rec->data.unit.lun_handle);
+		break;
+
+	case FSF_QTCB_SEND_ELS:
+		len += zfcp_dbf_view(out_buf + len, "d_id", "0x%06x",
+				     rec->data.send_els.d_id);
+		len += zfcp_dbf_view(out_buf + len, "ls_code", "0x%02x",
+				     rec->data.send_els.ls_code);
+		break;
+
+	case FSF_QTCB_ABORT_FCP_CMND:
+	case FSF_QTCB_SEND_GENERIC:
+	case FSF_QTCB_EXCHANGE_CONFIG_DATA:
+	case FSF_QTCB_EXCHANGE_PORT_DATA:
+	case FSF_QTCB_DOWNLOAD_CONTROL_FILE:
+	case FSF_QTCB_UPLOAD_CONTROL_FILE:
+		break;
+	}
+
+	return len;
+}
+
+static inline int
+zfcp_hba_dbf_view_status(char *out_buf, struct zfcp_hba_dbf_record_status *rec)
+{
+	int len = 0;
+
+	len += zfcp_dbf_view(out_buf + len, "failed", "0x%02x", rec->failed);
+	len += zfcp_dbf_view(out_buf + len, "status_type", "0x%08x",
+			     rec->status_type);
+	len += zfcp_dbf_view(out_buf + len, "status_subtype", "0x%08x",
+			     rec->status_subtype);
+	len += zfcp_dbf_view_dump(out_buf + len, "queue_designator",
+				  (char *)&rec->queue_designator,
+				  sizeof(struct fsf_queue_designator),
+				  0, sizeof(struct fsf_queue_designator));
+	len += zfcp_dbf_view_dump(out_buf + len, "payload",
+				  (char *)&rec->payload,
+				  rec->payload_size, 0, rec->payload_size);
+
+	return len;
+}
+
+static inline int
+zfcp_hba_dbf_view_qdio(char *out_buf, struct zfcp_hba_dbf_record_qdio *rec)
+{
+	int len = 0;
+
+	len += zfcp_dbf_view(out_buf + len, "status", "0x%08x", rec->status);
+	len += zfcp_dbf_view(out_buf + len, "qdio_error", "0x%08x",
+			     rec->qdio_error);
+	len += zfcp_dbf_view(out_buf + len, "siga_error", "0x%08x",
+			     rec->siga_error);
+	len += zfcp_dbf_view(out_buf + len, "sbal_index", "0x%02x",
+			     rec->sbal_index);
+	len += zfcp_dbf_view(out_buf + len, "sbal_count", "0x%02x",
+			     rec->sbal_count);
+
+	return len;
+}
+
+static int
+zfcp_hba_dbf_view_format(debug_info_t * id, struct debug_view *view,
+			 char *out_buf, const char *in_buf)
+{
+	struct zfcp_hba_dbf_record *rec = (struct zfcp_hba_dbf_record *)in_buf;
+	int len = 0;
+
+	if (strncmp(rec->tag, "dump", ZFCP_DBF_TAG_SIZE) == 0)
+		return 0;
+
+	len += zfcp_dbf_tag(out_buf + len, "tag", rec->tag);
+	if (isalpha(rec->tag2[0]))
+		len += zfcp_dbf_tag(out_buf + len, "tag2", rec->tag2);
+	if (strncmp(rec->tag, "resp", ZFCP_DBF_TAG_SIZE) == 0)
+		len += zfcp_hba_dbf_view_response(out_buf + len,
+						  &rec->type.response);
+	else if (strncmp(rec->tag, "stat", ZFCP_DBF_TAG_SIZE) == 0)
+		len += zfcp_hba_dbf_view_status(out_buf + len,
+						&rec->type.status);
+	else if (strncmp(rec->tag, "qdio", ZFCP_DBF_TAG_SIZE) == 0)
+		len += zfcp_hba_dbf_view_qdio(out_buf + len, &rec->type.qdio);
+
+	len += sprintf(out_buf + len, "\n");
+
+	return len;
+}
+
+struct debug_view zfcp_hba_dbf_view = {
+	"structured",
+	NULL,
+	&zfcp_dbf_view_header,
+	&zfcp_hba_dbf_view_format,
+	NULL,
+	NULL
+};
+
+inline void
+_zfcp_san_dbf_event_common_ct(const char *tag, struct zfcp_fsf_req *fsf_req,
+			      u32 s_id, u32 d_id, void *buffer, int buflen)
+{
+	struct zfcp_send_ct *send_ct = (struct zfcp_send_ct *)fsf_req->data;
+	struct zfcp_port *port = send_ct->port;
+	struct zfcp_adapter *adapter = port->adapter;
+	struct ct_hdr *header = (struct ct_hdr *)buffer;
+	struct zfcp_san_dbf_record *rec = &adapter->san_dbf_buf;
+	struct zfcp_san_dbf_record_ct *ct = &rec->type.ct;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->san_dbf_lock, flags);
+	memset(rec, 0, sizeof(struct zfcp_san_dbf_record));
+	strncpy(rec->tag, tag, ZFCP_DBF_TAG_SIZE);
+	rec->fsf_reqid = (unsigned long)fsf_req;
+	rec->fsf_seqno = fsf_req->seq_no;
+	rec->s_id = s_id;
+	rec->d_id = d_id;
+	if (strncmp(tag, "octc", ZFCP_DBF_TAG_SIZE) == 0) {
+		ct->type.request.cmd_req_code = header->cmd_rsp_code;
+		ct->type.request.revision = header->revision;
+		ct->type.request.gs_type = header->gs_type;
+		ct->type.request.gs_subtype = header->gs_subtype;
+		ct->type.request.options = header->options;
+		ct->type.request.max_res_size = header->max_res_size;
+	} else if (strncmp(tag, "rctc", ZFCP_DBF_TAG_SIZE) == 0) {
+		ct->type.response.cmd_rsp_code = header->cmd_rsp_code;
+		ct->type.response.revision = header->revision;
+		ct->type.response.reason_code = header->reason_code;
+		ct->type.response.reason_code_expl = header->reason_code_expl;
+		ct->type.response.vendor_unique = header->vendor_unique;
+	}
+	ct->payload_size =
+	    min(buflen - (int)sizeof(struct ct_hdr), ZFCP_DBF_CT_PAYLOAD);
+	memcpy(ct->payload, buffer + sizeof(struct ct_hdr), ct->payload_size);
+	debug_event(adapter->san_dbf, 3,
+		    rec, sizeof(struct zfcp_san_dbf_record));
+	spin_unlock_irqrestore(&adapter->san_dbf_lock, flags);
+}
+
+inline void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *fsf_req)
+{
+	struct zfcp_send_ct *ct = (struct zfcp_send_ct *)fsf_req->data;
+	struct zfcp_port *port = ct->port;
+	struct zfcp_adapter *adapter = port->adapter;
+
+	_zfcp_san_dbf_event_common_ct("octc", fsf_req,
+				      fc_host_port_id(adapter->scsi_host),
+				      port->d_id, zfcp_sg_to_address(ct->req),
+				      ct->req->length);
+}
+
+inline void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *fsf_req)
+{
+	struct zfcp_send_ct *ct = (struct zfcp_send_ct *)fsf_req->data;
+	struct zfcp_port *port = ct->port;
+	struct zfcp_adapter *adapter = port->adapter;
+
+	_zfcp_san_dbf_event_common_ct("rctc", fsf_req, port->d_id,
+				      fc_host_port_id(adapter->scsi_host),
+				      zfcp_sg_to_address(ct->resp),
+				      ct->resp->length);
+}
+
+static inline void
+_zfcp_san_dbf_event_common_els(const char *tag, int level,
+			       struct zfcp_fsf_req *fsf_req, u32 s_id,
+			       u32 d_id, u8 ls_code, void *buffer, int buflen)
+{
+	struct zfcp_adapter *adapter = fsf_req->adapter;
+	struct zfcp_san_dbf_record *rec = &adapter->san_dbf_buf;
+	struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)rec;
+	unsigned long flags;
+	int offset = 0;
+
+	spin_lock_irqsave(&adapter->san_dbf_lock, flags);
+	do {
+		memset(rec, 0, sizeof(struct zfcp_san_dbf_record));
+		if (offset == 0) {
+			strncpy(rec->tag, tag, ZFCP_DBF_TAG_SIZE);
+			rec->fsf_reqid = (unsigned long)fsf_req;
+			rec->fsf_seqno = fsf_req->seq_no;
+			rec->s_id = s_id;
+			rec->d_id = d_id;
+			rec->type.els.ls_code = ls_code;
+			buflen = min(buflen, ZFCP_DBF_ELS_MAX_PAYLOAD);
+			rec->type.els.payload_size = buflen;
+			memcpy(rec->type.els.payload,
+			       buffer, min(buflen, ZFCP_DBF_ELS_PAYLOAD));
+			offset += min(buflen, ZFCP_DBF_ELS_PAYLOAD);
+		} else {
+			strncpy(dump->tag, "dump", ZFCP_DBF_TAG_SIZE);
+			dump->total_size = buflen;
+			dump->offset = offset;
+			dump->size = min(buflen - offset,
+					 (int)sizeof(struct zfcp_san_dbf_record)
+					 - (int)sizeof(struct zfcp_dbf_dump));
+			memcpy(dump->data, buffer + offset, dump->size);
+			offset += dump->size;
+		}
+		debug_event(adapter->san_dbf, level,
+			    rec, sizeof(struct zfcp_san_dbf_record));
+	} while (offset < buflen);
+	spin_unlock_irqrestore(&adapter->san_dbf_lock, flags);
+}
+
+inline void zfcp_san_dbf_event_els_request(struct zfcp_fsf_req *fsf_req)
+{
+	struct zfcp_send_els *els = (struct zfcp_send_els *)fsf_req->data;
+
+	_zfcp_san_dbf_event_common_els("oels", 2, fsf_req,
+				       fc_host_port_id(els->adapter->scsi_host),
+				       els->d_id,
+				       *(u8 *) zfcp_sg_to_address(els->req),
+				       zfcp_sg_to_address(els->req),
+				       els->req->length);
+}
+
+inline void zfcp_san_dbf_event_els_response(struct zfcp_fsf_req *fsf_req)
+{
+	struct zfcp_send_els *els = (struct zfcp_send_els *)fsf_req->data;
+
+	_zfcp_san_dbf_event_common_els("rels", 2, fsf_req, els->d_id,
+				       fc_host_port_id(els->adapter->scsi_host),
+				       *(u8 *) zfcp_sg_to_address(els->req),
+				       zfcp_sg_to_address(els->resp),
+				       els->resp->length);
+}
+
+inline void zfcp_san_dbf_event_incoming_els(struct zfcp_fsf_req *fsf_req)
+{
+	struct zfcp_adapter *adapter = fsf_req->adapter;
+	struct fsf_status_read_buffer *status_buffer =
+	    (struct fsf_status_read_buffer *)fsf_req->data;
+	int length = (int)status_buffer->length -
+	    (int)((void *)&status_buffer->payload - (void *)status_buffer);
+
+	_zfcp_san_dbf_event_common_els("iels", 1, fsf_req, status_buffer->d_id,
+				       fc_host_port_id(adapter->scsi_host),
+				       *(u8 *) status_buffer->payload,
+				       (void *)status_buffer->payload, length);
+}
+
+static int
+zfcp_san_dbf_view_format(debug_info_t * id, struct debug_view *view,
+			 char *out_buf, const char *in_buf)
+{
+	struct zfcp_san_dbf_record *rec = (struct zfcp_san_dbf_record *)in_buf;
+	char *buffer = NULL;
+	int buflen = 0, total = 0;
+	int len = 0;
+
+	if (strncmp(rec->tag, "dump", ZFCP_DBF_TAG_SIZE) == 0)
+		return 0;
+
+	len += zfcp_dbf_tag(out_buf + len, "tag", rec->tag);
+	len += zfcp_dbf_view(out_buf + len, "fsf_reqid", "0x%0Lx",
+			     rec->fsf_reqid);
+	len += zfcp_dbf_view(out_buf + len, "fsf_seqno", "0x%08x",
+			     rec->fsf_seqno);
+	len += zfcp_dbf_view(out_buf + len, "s_id", "0x%06x", rec->s_id);
+	len += zfcp_dbf_view(out_buf + len, "d_id", "0x%06x", rec->d_id);
+
+	if (strncmp(rec->tag, "octc", ZFCP_DBF_TAG_SIZE) == 0) {
+		len += zfcp_dbf_view(out_buf + len, "cmd_req_code", "0x%04x",
+				     rec->type.ct.type.request.cmd_req_code);
+		len += zfcp_dbf_view(out_buf + len, "revision", "0x%02x",
+				     rec->type.ct.type.request.revision);
+		len += zfcp_dbf_view(out_buf + len, "gs_type", "0x%02x",
+				     rec->type.ct.type.request.gs_type);
+		len += zfcp_dbf_view(out_buf + len, "gs_subtype", "0x%02x",
+				     rec->type.ct.type.request.gs_subtype);
+		len += zfcp_dbf_view(out_buf + len, "options", "0x%02x",
+				     rec->type.ct.type.request.options);
+		len += zfcp_dbf_view(out_buf + len, "max_res_size", "0x%04x",
+				     rec->type.ct.type.request.max_res_size);
+		total = rec->type.ct.payload_size;
+		buffer = rec->type.ct.payload;
+		buflen = min(total, ZFCP_DBF_CT_PAYLOAD);
+	} else if (strncmp(rec->tag, "rctc", ZFCP_DBF_TAG_SIZE) == 0) {
+		len += zfcp_dbf_view(out_buf + len, "cmd_rsp_code", "0x%04x",
+				     rec->type.ct.type.response.cmd_rsp_code);
+		len += zfcp_dbf_view(out_buf + len, "revision", "0x%02x",
+				     rec->type.ct.type.response.revision);
+		len += zfcp_dbf_view(out_buf + len, "reason_code", "0x%02x",
+				     rec->type.ct.type.response.reason_code);
+		len +=
+		    zfcp_dbf_view(out_buf + len, "reason_code_expl", "0x%02x",
+				  rec->type.ct.type.response.reason_code_expl);
+		len +=
+		    zfcp_dbf_view(out_buf + len, "vendor_unique", "0x%02x",
+				  rec->type.ct.type.response.vendor_unique);
+		total = rec->type.ct.payload_size;
+		buffer = rec->type.ct.payload;
+		buflen = min(total, ZFCP_DBF_CT_PAYLOAD);
+	} else if (strncmp(rec->tag, "oels", ZFCP_DBF_TAG_SIZE) == 0 ||
+		   strncmp(rec->tag, "rels", ZFCP_DBF_TAG_SIZE) == 0 ||
+		   strncmp(rec->tag, "iels", ZFCP_DBF_TAG_SIZE) == 0) {
+		len += zfcp_dbf_view(out_buf + len, "ls_code", "0x%02x",
+				     rec->type.els.ls_code);
+		total = rec->type.els.payload_size;
+		buffer = rec->type.els.payload;
+		buflen = min(total, ZFCP_DBF_ELS_PAYLOAD);
+	}
+
+	len += zfcp_dbf_view_dump(out_buf + len, "payload",
+				  buffer, buflen, 0, total);
+
+	if (buflen == total)
+		len += sprintf(out_buf + len, "\n");
+
+	return len;
+}
+
+struct debug_view zfcp_san_dbf_view = {
+	"structured",
+	NULL,
+	&zfcp_dbf_view_header,
+	&zfcp_san_dbf_view_format,
+	NULL,
+	NULL
+};
+
+static inline void
+_zfcp_scsi_dbf_event_common(const char *tag, const char *tag2, int level,
+			    struct zfcp_adapter *adapter,
+			    struct scsi_cmnd *scsi_cmnd,
+			    struct zfcp_fsf_req *new_fsf_req)
+{
+	struct zfcp_fsf_req *fsf_req =
+	    (struct zfcp_fsf_req *)scsi_cmnd->host_scribble;
+	struct zfcp_scsi_dbf_record *rec = &adapter->scsi_dbf_buf;
+	struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)rec;
+	unsigned long flags;
+	struct fcp_rsp_iu *fcp_rsp;
+	char *fcp_rsp_info = NULL, *fcp_sns_info = NULL;
+	int offset = 0, buflen = 0;
+
+	spin_lock_irqsave(&adapter->scsi_dbf_lock, flags);
+	do {
+		memset(rec, 0, sizeof(struct zfcp_scsi_dbf_record));
+		if (offset == 0) {
+			strncpy(rec->tag, tag, ZFCP_DBF_TAG_SIZE);
+			strncpy(rec->tag2, tag2, ZFCP_DBF_TAG_SIZE);
+			if (scsi_cmnd->device) {
+				rec->scsi_id = scsi_cmnd->device->id;
+				rec->scsi_lun = scsi_cmnd->device->lun;
+			}
+			rec->scsi_result = scsi_cmnd->result;
+			rec->scsi_cmnd = (unsigned long)scsi_cmnd;
+			rec->scsi_serial = scsi_cmnd->serial_number;
+			memcpy(rec->scsi_opcode,
+			       &scsi_cmnd->cmnd,
+			       min((int)scsi_cmnd->cmd_len,
+				   ZFCP_DBF_SCSI_OPCODE));
+			rec->scsi_retries = scsi_cmnd->retries;
+			rec->scsi_allowed = scsi_cmnd->allowed;
+			if (fsf_req != NULL) {
+				fcp_rsp = (struct fcp_rsp_iu *)
+				    &(fsf_req->qtcb->bottom.io.fcp_rsp);
+				fcp_rsp_info =
+				    zfcp_get_fcp_rsp_info_ptr(fcp_rsp);
+				fcp_sns_info =
+				    zfcp_get_fcp_sns_info_ptr(fcp_rsp);
+
+				rec->type.fcp.rsp_validity =
+				    fcp_rsp->validity.value;
+				rec->type.fcp.rsp_scsi_status =
+				    fcp_rsp->scsi_status;
+				rec->type.fcp.rsp_resid = fcp_rsp->fcp_resid;
+				if (fcp_rsp->validity.bits.fcp_rsp_len_valid)
+					rec->type.fcp.rsp_code =
+					    *(fcp_rsp_info + 3);
+				if (fcp_rsp->validity.bits.fcp_sns_len_valid) {
+					buflen = min((int)fcp_rsp->fcp_sns_len,
+						     ZFCP_DBF_SCSI_MAX_FCP_SNS_INFO);
+					rec->type.fcp.sns_info_len = buflen;
+					memcpy(rec->type.fcp.sns_info,
+					       fcp_sns_info,
+					       min(buflen,
+						   ZFCP_DBF_SCSI_FCP_SNS_INFO));
+					offset += min(buflen,
+						      ZFCP_DBF_SCSI_FCP_SNS_INFO);
+				}
+
+				rec->fsf_reqid = (unsigned long)fsf_req;
+				rec->fsf_seqno = fsf_req->seq_no;
+				rec->fsf_issued = fsf_req->issued;
+			}
+			if (new_fsf_req != NULL) {
+				rec->type.new_fsf_req.fsf_reqid =
+				    (unsigned long)
+				    new_fsf_req;
+				rec->type.new_fsf_req.fsf_seqno =
+				    new_fsf_req->seq_no;
+				rec->type.new_fsf_req.fsf_issued =
+				    new_fsf_req->issued;
+			}
+		} else {
+			strncpy(dump->tag, "dump", ZFCP_DBF_TAG_SIZE);
+			dump->total_size = buflen;
+			dump->offset = offset;
+			dump->size = min(buflen - offset,
+					 (int)sizeof(struct
+						     zfcp_scsi_dbf_record) -
+					 (int)sizeof(struct zfcp_dbf_dump));
+			memcpy(dump->data, fcp_sns_info + offset, dump->size);
+			offset += dump->size;
+		}
+		debug_event(adapter->scsi_dbf, level,
+			    rec, sizeof(struct zfcp_scsi_dbf_record));
+	} while (offset < buflen);
+	spin_unlock_irqrestore(&adapter->scsi_dbf_lock, flags);
+}
+
+inline void
+zfcp_scsi_dbf_event_result(const char *tag, int level,
+			   struct zfcp_adapter *adapter,
+			   struct scsi_cmnd *scsi_cmnd)
+{
+	_zfcp_scsi_dbf_event_common("rslt",
+				    tag, level, adapter, scsi_cmnd, NULL);
+}
+
+inline void
+zfcp_scsi_dbf_event_abort(const char *tag, struct zfcp_adapter *adapter,
+			  struct scsi_cmnd *scsi_cmnd,
+			  struct zfcp_fsf_req *new_fsf_req)
+{
+	_zfcp_scsi_dbf_event_common("abrt",
+				    tag, 1, adapter, scsi_cmnd, new_fsf_req);
+}
+
+inline void
+zfcp_scsi_dbf_event_devreset(const char *tag, u8 flag, struct zfcp_unit *unit,
+			     struct scsi_cmnd *scsi_cmnd)
+{
+	struct zfcp_adapter *adapter = unit->port->adapter;
+
+	_zfcp_scsi_dbf_event_common(flag == FCP_TARGET_RESET ? "trst" : "lrst",
+				    tag, 1, adapter, scsi_cmnd, NULL);
+}
+
+static int
+zfcp_scsi_dbf_view_format(debug_info_t * id, struct debug_view *view,
+			  char *out_buf, const char *in_buf)
+{
+	struct zfcp_scsi_dbf_record *rec =
+	    (struct zfcp_scsi_dbf_record *)in_buf;
+	int len = 0;
+
+	if (strncmp(rec->tag, "dump", ZFCP_DBF_TAG_SIZE) == 0)
+		return 0;
+
+	len += zfcp_dbf_tag(out_buf + len, "tag", rec->tag);
+	len += zfcp_dbf_tag(out_buf + len, "tag2", rec->tag2);
+	len += zfcp_dbf_view(out_buf + len, "scsi_id", "0x%08x", rec->scsi_id);
+	len += zfcp_dbf_view(out_buf + len, "scsi_lun", "0x%08x",
+			     rec->scsi_lun);
+	len += zfcp_dbf_view(out_buf + len, "scsi_result", "0x%08x",
+			     rec->scsi_result);
+	len += zfcp_dbf_view(out_buf + len, "scsi_cmnd", "0x%0Lx",
+			     rec->scsi_cmnd);
+	len += zfcp_dbf_view(out_buf + len, "scsi_serial", "0x%016Lx",
+			     rec->scsi_serial);
+	len += zfcp_dbf_view_dump(out_buf + len, "scsi_opcode",
+				  rec->scsi_opcode,
+				  ZFCP_DBF_SCSI_OPCODE,
+				  0, ZFCP_DBF_SCSI_OPCODE);
+	len += zfcp_dbf_view(out_buf + len, "scsi_retries", "0x%02x",
+			     rec->scsi_retries);
+	len += zfcp_dbf_view(out_buf + len, "scsi_allowed", "0x%02x",
+			     rec->scsi_allowed);
+	len += zfcp_dbf_view(out_buf + len, "fsf_reqid", "0x%0Lx",
+			     rec->fsf_reqid);
+	len += zfcp_dbf_view(out_buf + len, "fsf_seqno", "0x%08x",
+			     rec->fsf_seqno);
+	len += zfcp_dbf_stck(out_buf + len, "fsf_issued", rec->fsf_issued);
+	if (strncmp(rec->tag, "rslt", ZFCP_DBF_TAG_SIZE) == 0) {
+		len +=
+		    zfcp_dbf_view(out_buf + len, "fcp_rsp_validity", "0x%02x",
+				  rec->type.fcp.rsp_validity);
+		len +=
+		    zfcp_dbf_view(out_buf + len, "fcp_rsp_scsi_status",
+				  "0x%02x", rec->type.fcp.rsp_scsi_status);
+		len +=
+		    zfcp_dbf_view(out_buf + len, "fcp_rsp_resid", "0x%08x",
+				  rec->type.fcp.rsp_resid);
+		len +=
+		    zfcp_dbf_view(out_buf + len, "fcp_rsp_code", "0x%08x",
+				  rec->type.fcp.rsp_code);
+		len +=
+		    zfcp_dbf_view(out_buf + len, "fcp_sns_info_len", "0x%08x",
+				  rec->type.fcp.sns_info_len);
+		len +=
+		    zfcp_dbf_view_dump(out_buf + len, "fcp_sns_info",
+				       rec->type.fcp.sns_info,
+				       min((int)rec->type.fcp.sns_info_len,
+					   ZFCP_DBF_SCSI_FCP_SNS_INFO), 0,
+				       rec->type.fcp.sns_info_len);
+	} else if (strncmp(rec->tag, "abrt", ZFCP_DBF_TAG_SIZE) == 0) {
+		len += zfcp_dbf_view(out_buf + len, "fsf_reqid_abort", "0x%0Lx",
+				     rec->type.new_fsf_req.fsf_reqid);
+		len += zfcp_dbf_view(out_buf + len, "fsf_seqno_abort", "0x%08x",
+				     rec->type.new_fsf_req.fsf_seqno);
+		len += zfcp_dbf_stck(out_buf + len, "fsf_issued",
+				     rec->type.new_fsf_req.fsf_issued);
+	} else if ((strncmp(rec->tag, "trst", ZFCP_DBF_TAG_SIZE) == 0) ||
+		   (strncmp(rec->tag, "lrst", ZFCP_DBF_TAG_SIZE) == 0)) {
+		len += zfcp_dbf_view(out_buf + len, "fsf_reqid_reset", "0x%0Lx",
+				     rec->type.new_fsf_req.fsf_reqid);
+		len += zfcp_dbf_view(out_buf + len, "fsf_seqno_reset", "0x%08x",
+				     rec->type.new_fsf_req.fsf_seqno);
+		len += zfcp_dbf_stck(out_buf + len, "fsf_issued",
+				     rec->type.new_fsf_req.fsf_issued);
+	}
+
+	len += sprintf(out_buf + len, "\n");
+
+	return len;
+}
+
+struct debug_view zfcp_scsi_dbf_view = {
+	"structured",
+	NULL,
+	&zfcp_dbf_view_header,
+	&zfcp_scsi_dbf_view_format,
+	NULL,
+	NULL
+};
+
+/**
+ * zfcp_adapter_debug_register - registers debug feature for an adapter
+ * @adapter: pointer to adapter for which debug features should be registered
+ * return: -ENOMEM on error, 0 otherwise
+ */
+int zfcp_adapter_debug_register(struct zfcp_adapter *adapter)
+{
+	char dbf_name[DEBUG_MAX_NAME_LEN];
+
+	/* debug feature area which records recovery activity */
+	spin_lock_init(&adapter->erp_dbf_lock);
+	sprintf(dbf_name, "zfcp_%s_erp", zfcp_get_busid_by_adapter(adapter));
+	adapter->erp_dbf = debug_register(dbf_name, dbfsize, 2,
+					  sizeof(struct zfcp_erp_dbf_record));
+	if (!adapter->erp_dbf)
+		goto failed;
+	debug_register_view(adapter->erp_dbf, &debug_hex_ascii_view);
+	debug_set_level(adapter->erp_dbf, 3);
+
+	/* debug feature area which records HBA (FSF and QDIO) conditions */
+	spin_lock_init(&adapter->hba_dbf_lock);
+	sprintf(dbf_name, "zfcp_%s_hba", zfcp_get_busid_by_adapter(adapter));
+	adapter->hba_dbf = debug_register(dbf_name, dbfsize, 1,
+					  sizeof(struct zfcp_hba_dbf_record));
+	if (!adapter->hba_dbf)
+		goto failed;
+	debug_register_view(adapter->hba_dbf, &debug_hex_ascii_view);
+	debug_register_view(adapter->hba_dbf, &zfcp_hba_dbf_view);
+	debug_set_level(adapter->hba_dbf, 3);
+
+	/* debug feature area which records SAN command failures and recovery */
+	spin_lock_init(&adapter->san_dbf_lock);
+	sprintf(dbf_name, "zfcp_%s_san", zfcp_get_busid_by_adapter(adapter));
+	adapter->san_dbf = debug_register(dbf_name, dbfsize, 1,
+					  sizeof(struct zfcp_san_dbf_record));
+	if (!adapter->san_dbf)
+		goto failed;
+	debug_register_view(adapter->san_dbf, &debug_hex_ascii_view);
+	debug_register_view(adapter->san_dbf, &zfcp_san_dbf_view);
+	debug_set_level(adapter->san_dbf, 6);
+
+	/* debug feature area which records SCSI command failures and recovery */
+	spin_lock_init(&adapter->scsi_dbf_lock);
+	sprintf(dbf_name, "zfcp_%s_scsi", zfcp_get_busid_by_adapter(adapter));
+	adapter->scsi_dbf = debug_register(dbf_name, dbfsize, 1,
+					   sizeof(struct zfcp_scsi_dbf_record));
+	if (!adapter->scsi_dbf)
+		goto failed;
+	debug_register_view(adapter->scsi_dbf, &debug_hex_ascii_view);
+	debug_register_view(adapter->scsi_dbf, &zfcp_scsi_dbf_view);
+	debug_set_level(adapter->scsi_dbf, 3);
+
+	return 0;
+
+ failed:
+	zfcp_adapter_debug_unregister(adapter);
+
+	return -ENOMEM;
+}
+
+/**
+ * zfcp_adapter_debug_unregister - unregisters debug feature for an adapter
+ * @adapter: pointer to adapter for which debug features should be unregistered
+ */
+void zfcp_adapter_debug_unregister(struct zfcp_adapter *adapter)
+{
+	debug_unregister(adapter->scsi_dbf);
+	debug_unregister(adapter->san_dbf);
+	debug_unregister(adapter->hba_dbf);
+	debug_unregister(adapter->erp_dbf);
+	adapter->scsi_dbf = NULL;
+	adapter->san_dbf = NULL;
+	adapter->hba_dbf = NULL;
+	adapter->erp_dbf = NULL;
+}
+
+#undef ZFCP_LOG_AREA
diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h
index 455e902533a..d81b737d68c 100644
--- a/drivers/s390/scsi/zfcp_def.h
+++ b/drivers/s390/scsi/zfcp_def.h
@@ -66,7 +66,7 @@
 /********************* GENERAL DEFINES *********************************/
 
 /* zfcp version number, it consists of major, minor, and patch-level number */
-#define ZFCP_VERSION		"4.3.0"
+#define ZFCP_VERSION		"4.5.0"
 
 /**
  * zfcp_sg_to_address - determine kernel address from struct scatterlist
@@ -154,13 +154,17 @@ typedef u32 scsi_lun_t;
 #define ZFCP_EXCHANGE_CONFIG_DATA_FIRST_SLEEP	100
 #define ZFCP_EXCHANGE_CONFIG_DATA_RETRIES	7
 
+/* Retry 5 times every 2 second, then every minute */
+#define ZFCP_EXCHANGE_PORT_DATA_SHORT_RETRIES	5
+#define ZFCP_EXCHANGE_PORT_DATA_SHORT_SLEEP	200
+#define ZFCP_EXCHANGE_PORT_DATA_LONG_SLEEP	6000
+
 /* timeout value for "default timer" for fsf requests */
 #define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ);
 
 /*************** FIBRE CHANNEL PROTOCOL SPECIFIC DEFINES ********************/
 
 typedef unsigned long long wwn_t;
-typedef unsigned int       fc_id_t;
 typedef unsigned long long fcp_lun_t;
 /* data length field may be at variable position in FCP-2 FCP_CMND IU */
 typedef unsigned int       fcp_dl_t;
@@ -281,6 +285,171 @@ struct fcp_logo {
 } __attribute__((packed));
 
 /*
+ * DBF stuff
+ */
+#define ZFCP_DBF_TAG_SIZE      4
+
+struct zfcp_dbf_dump {
+	u8 tag[ZFCP_DBF_TAG_SIZE];
+	u32 total_size;		/* size of total dump data */
+	u32 offset;		/* how much data has being already dumped */
+	u32 size;		/* how much data comes with this record */
+	u8 data[];		/* dump data */
+} __attribute__ ((packed));
+
+/* FIXME: to be inflated when reworking the erp dbf */
+struct zfcp_erp_dbf_record {
+	u8 dummy[16];
+} __attribute__ ((packed));
+
+struct zfcp_hba_dbf_record_response {
+	u32 fsf_command;
+	u64 fsf_reqid;
+	u32 fsf_seqno;
+	u64 fsf_issued;
+	u32 fsf_prot_status;
+	u32 fsf_status;
+	u8 fsf_prot_status_qual[FSF_PROT_STATUS_QUAL_SIZE];
+	u8 fsf_status_qual[FSF_STATUS_QUALIFIER_SIZE];
+	u32 fsf_req_status;
+	u8 sbal_first;
+	u8 sbal_curr;
+	u8 sbal_last;
+	u8 pool;
+	u64 erp_action;
+	union {
+		struct {
+			u64 scsi_cmnd;
+			u64 scsi_serial;
+		} send_fcp;
+		struct {
+			u64 wwpn;
+			u32 d_id;
+			u32 port_handle;
+		} port;
+		struct {
+			u64 wwpn;
+			u64 fcp_lun;
+			u32 port_handle;
+			u32 lun_handle;
+		} unit;
+		struct {
+			u32 d_id;
+			u8 ls_code;
+		} send_els;
+	} data;
+} __attribute__ ((packed));
+
+struct zfcp_hba_dbf_record_status {
+	u8 failed;
+	u32 status_type;
+	u32 status_subtype;
+	struct fsf_queue_designator
+	 queue_designator;
+	u32 payload_size;
+#define ZFCP_DBF_UNSOL_PAYLOAD				80
+#define ZFCP_DBF_UNSOL_PAYLOAD_SENSE_DATA_AVAIL		32
+#define ZFCP_DBF_UNSOL_PAYLOAD_BIT_ERROR_THRESHOLD	56
+#define ZFCP_DBF_UNSOL_PAYLOAD_FEATURE_UPDATE_ALERT	2 * sizeof(u32)
+	u8 payload[ZFCP_DBF_UNSOL_PAYLOAD];
+} __attribute__ ((packed));
+
+struct zfcp_hba_dbf_record_qdio {
+	u32 status;
+	u32 qdio_error;
+	u32 siga_error;
+	u8 sbal_index;
+	u8 sbal_count;
+} __attribute__ ((packed));
+
+struct zfcp_hba_dbf_record {
+	u8 tag[ZFCP_DBF_TAG_SIZE];
+	u8 tag2[ZFCP_DBF_TAG_SIZE];
+	union {
+		struct zfcp_hba_dbf_record_response response;
+		struct zfcp_hba_dbf_record_status status;
+		struct zfcp_hba_dbf_record_qdio qdio;
+	} type;
+} __attribute__ ((packed));
+
+struct zfcp_san_dbf_record_ct {
+	union {
+		struct {
+			u16 cmd_req_code;
+			u8 revision;
+			u8 gs_type;
+			u8 gs_subtype;
+			u8 options;
+			u16 max_res_size;
+		} request;
+		struct {
+			u16 cmd_rsp_code;
+			u8 revision;
+			u8 reason_code;
+			u8 reason_code_expl;
+			u8 vendor_unique;
+		} response;
+	} type;
+	u32 payload_size;
+#define ZFCP_DBF_CT_PAYLOAD	24
+	u8 payload[ZFCP_DBF_CT_PAYLOAD];
+} __attribute__ ((packed));
+
+struct zfcp_san_dbf_record_els {
+	u8 ls_code;
+	u32 payload_size;
+#define ZFCP_DBF_ELS_PAYLOAD	32
+#define ZFCP_DBF_ELS_MAX_PAYLOAD 1024
+	u8 payload[ZFCP_DBF_ELS_PAYLOAD];
+} __attribute__ ((packed));
+
+struct zfcp_san_dbf_record {
+	u8 tag[ZFCP_DBF_TAG_SIZE];
+	u64 fsf_reqid;
+	u32 fsf_seqno;
+	u32 s_id;
+	u32 d_id;
+	union {
+		struct zfcp_san_dbf_record_ct ct;
+		struct zfcp_san_dbf_record_els els;
+	} type;
+} __attribute__ ((packed));
+
+struct zfcp_scsi_dbf_record {
+	u8 tag[ZFCP_DBF_TAG_SIZE];
+	u8 tag2[ZFCP_DBF_TAG_SIZE];
+	u32 scsi_id;
+	u32 scsi_lun;
+	u32 scsi_result;
+	u64 scsi_cmnd;
+	u64 scsi_serial;
+#define ZFCP_DBF_SCSI_OPCODE	16
+	u8 scsi_opcode[ZFCP_DBF_SCSI_OPCODE];
+	u8 scsi_retries;
+	u8 scsi_allowed;
+	u64 fsf_reqid;
+	u32 fsf_seqno;
+	u64 fsf_issued;
+	union {
+		struct {
+			u64 fsf_reqid;
+			u32 fsf_seqno;
+			u64 fsf_issued;
+		} new_fsf_req;
+		struct {
+			u8 rsp_validity;
+			u8 rsp_scsi_status;
+			u32 rsp_resid;
+			u8 rsp_code;
+#define ZFCP_DBF_SCSI_FCP_SNS_INFO	16
+#define ZFCP_DBF_SCSI_MAX_FCP_SNS_INFO	256
+			u32 sns_info_len;
+			u8 sns_info[ZFCP_DBF_SCSI_FCP_SNS_INFO];
+		} fcp;
+	} type;
+} __attribute__ ((packed));
+
+/*
  * FC-FS stuff
  */
 #define R_A_TOV				10 /* seconds */
@@ -339,34 +508,6 @@ struct zfcp_rc_entry {
  */
 #define ZFCP_CT_TIMEOUT			(3 * R_A_TOV)
 
-
-/***************** S390 DEBUG FEATURE SPECIFIC DEFINES ***********************/
-
-/* debug feature entries per adapter */
-#define ZFCP_ERP_DBF_INDEX     1 
-#define ZFCP_ERP_DBF_AREAS     2
-#define ZFCP_ERP_DBF_LENGTH    16
-#define ZFCP_ERP_DBF_LEVEL     3
-#define ZFCP_ERP_DBF_NAME      "zfcperp"
-
-#define ZFCP_CMD_DBF_INDEX     2
-#define ZFCP_CMD_DBF_AREAS     1
-#define ZFCP_CMD_DBF_LENGTH    8
-#define ZFCP_CMD_DBF_LEVEL     3
-#define ZFCP_CMD_DBF_NAME      "zfcpcmd"
-
-#define ZFCP_ABORT_DBF_INDEX   2
-#define ZFCP_ABORT_DBF_AREAS   1
-#define ZFCP_ABORT_DBF_LENGTH  8
-#define ZFCP_ABORT_DBF_LEVEL   6
-#define ZFCP_ABORT_DBF_NAME    "zfcpabt"
-
-#define ZFCP_IN_ELS_DBF_INDEX  2
-#define ZFCP_IN_ELS_DBF_AREAS  1
-#define ZFCP_IN_ELS_DBF_LENGTH 8
-#define ZFCP_IN_ELS_DBF_LEVEL  6
-#define ZFCP_IN_ELS_DBF_NAME   "zfcpels"
-
 /******************** LOGGING MACROS AND DEFINES *****************************/
 
 /*
@@ -501,6 +642,7 @@ do { \
 #define ZFCP_STATUS_ADAPTER_ERP_THREAD_KILL	0x00000080
 #define ZFCP_STATUS_ADAPTER_ERP_PENDING		0x00000100
 #define ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED	0x00000200
+#define ZFCP_STATUS_ADAPTER_XPORT_OK		0x00000800
 
 #define ZFCP_STATUS_ADAPTER_SCSI_UP			\
 		(ZFCP_STATUS_COMMON_UNBLOCKED |	\
@@ -635,45 +777,6 @@ struct zfcp_adapter_mempool {
 	mempool_t *data_gid_pn;
 };
 
-struct  zfcp_exchange_config_data{
-};
-
-struct zfcp_open_port {
-        struct zfcp_port *port;
-};
-
-struct zfcp_close_port {
-	struct zfcp_port *port;
-};
-
-struct zfcp_open_unit {
-	struct zfcp_unit *unit;
-};
-
-struct zfcp_close_unit {
-	struct zfcp_unit *unit;
-};
-
-struct zfcp_close_physical_port {
-        struct zfcp_port *port;
-};
-
-struct zfcp_send_fcp_command_task {
-	struct zfcp_fsf_req *fsf_req;
-	struct zfcp_unit *unit;
- 	struct scsi_cmnd *scsi_cmnd;
-	unsigned long start_jiffies;
-};
-
-struct zfcp_send_fcp_command_task_management {
-	struct zfcp_unit *unit;
-};
-
-struct zfcp_abort_fcp_command {
-	struct zfcp_fsf_req *fsf_req;
-	struct zfcp_unit *unit;
-};
-
 /*
  * header for CT_IU
  */
@@ -702,7 +805,7 @@ struct ct_iu_gid_pn_req {
 /* FS_ACC IU and data unit for GID_PN nameserver request */
 struct ct_iu_gid_pn_resp {
 	struct ct_hdr header;
-	fc_id_t d_id;
+	u32 d_id;
 } __attribute__ ((packed));
 
 typedef void (*zfcp_send_ct_handler_t)(unsigned long);
@@ -768,7 +871,7 @@ typedef void (*zfcp_send_els_handler_t)(unsigned long);
 struct zfcp_send_els {
 	struct zfcp_adapter *adapter;
 	struct zfcp_port *port;
-	fc_id_t d_id;
+	u32 d_id;
 	struct scatterlist *req;
 	struct scatterlist *resp;
 	unsigned int req_count;
@@ -781,33 +884,6 @@ struct zfcp_send_els {
 	int status;
 };
 
-struct zfcp_status_read {
-	struct fsf_status_read_buffer *buffer;
-};
-
-struct zfcp_fsf_done {
-	struct completion *complete;
-	int status;
-};
-
-/* request specific data */
-union zfcp_req_data {
-	struct zfcp_exchange_config_data exchange_config_data;
-	struct zfcp_open_port		  open_port;
-	struct zfcp_close_port		  close_port;
-	struct zfcp_open_unit		  open_unit;
-	struct zfcp_close_unit		  close_unit;
-	struct zfcp_close_physical_port	  close_physical_port;
-	struct zfcp_send_fcp_command_task send_fcp_command_task;
-        struct zfcp_send_fcp_command_task_management
-					  send_fcp_command_task_management;
-	struct zfcp_abort_fcp_command	  abort_fcp_command;
-	struct zfcp_send_ct *send_ct;
-	struct zfcp_send_els *send_els;
-	struct zfcp_status_read 	  status_read;
-	struct fsf_qtcb_bottom_port *port_data;
-};
-
 struct zfcp_qdio_queue {
 	struct qdio_buffer *buffer[QDIO_MAX_BUFFERS_PER_Q]; /* SBALs */
 	u8		   free_index;	      /* index of next free bfr
@@ -838,21 +914,19 @@ struct zfcp_adapter {
 	atomic_t                refcount;          /* reference count */
 	wait_queue_head_t	remove_wq;         /* can be used to wait for
 						      refcount drop to zero */
-	wwn_t			wwnn;	           /* WWNN */
-	wwn_t			wwpn;	           /* WWPN */
-	fc_id_t			s_id;	           /* N_Port ID */
 	wwn_t			peer_wwnn;	   /* P2P peer WWNN */
 	wwn_t			peer_wwpn;	   /* P2P peer WWPN */
-	fc_id_t			peer_d_id;	   /* P2P peer D_ID */
+	u32			peer_d_id;	   /* P2P peer D_ID */
+	wwn_t			physical_wwpn;     /* WWPN of physical port */
+	u32			physical_s_id;     /* local FC port ID */
 	struct ccw_device       *ccw_device;	   /* S/390 ccw device */
 	u8			fc_service_class;
 	u32			fc_topology;	   /* FC topology */
-	u32			fc_link_speed;	   /* FC interface speed */
 	u32			hydra_version;	   /* Hydra version */
 	u32			fsf_lic_version;
-        u32			supported_features;/* of FCP channel */
+	u32			adapter_features;  /* FCP channel features */
+	u32			connection_features; /* host connection features */
         u32			hardware_version;  /* of FCP channel */
-        u8			serial_number[32]; /* of hardware */
 	struct Scsi_Host	*scsi_host;	   /* Pointer to mid-layer */
 	unsigned short          scsi_host_no;      /* Assigned host number */
 	unsigned char		name[9];
@@ -889,11 +963,18 @@ struct zfcp_adapter {
 	u32			erp_low_mem_count; /* nr of erp actions waiting
 						      for memory */
 	struct zfcp_port	*nameserver_port;  /* adapter's nameserver */
-        debug_info_t            *erp_dbf;          /* S/390 debug features */
-	debug_info_t            *abort_dbf;
-	debug_info_t            *in_els_dbf;
-	debug_info_t            *cmd_dbf;
-	spinlock_t              dbf_lock;
+	debug_info_t		*erp_dbf;
+	debug_info_t		*hba_dbf;
+	debug_info_t		*san_dbf;          /* debug feature areas */
+	debug_info_t		*scsi_dbf;
+	spinlock_t		erp_dbf_lock;
+	spinlock_t		hba_dbf_lock;
+	spinlock_t		san_dbf_lock;
+	spinlock_t		scsi_dbf_lock;
+	struct zfcp_erp_dbf_record	erp_dbf_buf;
+	struct zfcp_hba_dbf_record	hba_dbf_buf;
+	struct zfcp_san_dbf_record	san_dbf_buf;
+	struct zfcp_scsi_dbf_record	scsi_dbf_buf;
 	struct zfcp_adapter_mempool	pool;      /* Adapter memory pools */
 	struct qdio_initialize  qdio_init_data;    /* for qdio_establish */
 	struct device           generic_services;  /* directory for WKA ports */
@@ -919,7 +1000,7 @@ struct zfcp_port {
 	atomic_t	       status;	       /* status of this remote port */
 	wwn_t		       wwnn;	       /* WWNN if known */
 	wwn_t		       wwpn;	       /* WWPN */
-	fc_id_t		       d_id;	       /* D_ID */
+	u32		       d_id;	       /* D_ID */
 	u32		       handle;	       /* handle assigned by FSF */
 	struct zfcp_erp_action erp_action;     /* pending error recovery */
         atomic_t               erp_counter;
@@ -963,11 +1044,13 @@ struct zfcp_fsf_req {
 	u32		       fsf_command;    /* FSF Command copy */
 	struct fsf_qtcb	       *qtcb;	       /* address of associated QTCB */
 	u32		       seq_no;         /* Sequence number of request */
-        union zfcp_req_data    data;           /* Info fields of request */ 
+        unsigned long          data;           /* private data of request */ 
 	struct zfcp_erp_action *erp_action;    /* used if this request is
 						  issued on behalf of erp */
 	mempool_t	       *pool;	       /* used if request was alloacted
 						  from emergency pool */
+	unsigned long long     issued;         /* request sent time (STCK) */
+	struct zfcp_unit       *unit;
 };
 
 typedef void zfcp_fsf_req_handler_t(struct zfcp_fsf_req*);
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index cb4f612550b..023f4e558ae 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -82,6 +82,7 @@ static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *);
+static int zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *);
 static int zfcp_erp_adapter_strategy_open_fsf_statusread(
 	struct zfcp_erp_action *);
 
@@ -345,13 +346,13 @@ zfcp_erp_adisc(struct zfcp_port *port)
 
 	/* acc. to FC-FS, hard_nport_id in ADISC should not be set for ports
 	   without FC-AL-2 capability, so we don't set it */
-	adisc->wwpn = adapter->wwpn;
-	adisc->wwnn = adapter->wwnn;
-	adisc->nport_id = adapter->s_id;
+	adisc->wwpn = fc_host_port_name(adapter->scsi_host);
+	adisc->wwnn = fc_host_node_name(adapter->scsi_host);
+	adisc->nport_id = fc_host_port_id(adapter->scsi_host);
 	ZFCP_LOG_INFO("ADISC request from s_id 0x%08x to d_id 0x%08x "
 		      "(wwpn=0x%016Lx, wwnn=0x%016Lx, "
 		      "hard_nport_id=0x%08x, nport_id=0x%08x)\n",
-		      adapter->s_id, send_els->d_id, (wwn_t) adisc->wwpn,
+		      adisc->nport_id, send_els->d_id, (wwn_t) adisc->wwpn,
 		      (wwn_t) adisc->wwnn, adisc->hard_nport_id,
 		      adisc->nport_id);
 
@@ -404,7 +405,7 @@ zfcp_erp_adisc_handler(unsigned long data)
 	struct zfcp_send_els *send_els;
 	struct zfcp_port *port;
 	struct zfcp_adapter *adapter;
-	fc_id_t d_id;
+	u32 d_id;
 	struct zfcp_ls_adisc_acc *adisc;
 
 	send_els = (struct zfcp_send_els *) data;
@@ -435,9 +436,9 @@ zfcp_erp_adisc_handler(unsigned long data)
 	ZFCP_LOG_INFO("ADISC response from d_id 0x%08x to s_id "
 		      "0x%08x (wwpn=0x%016Lx, wwnn=0x%016Lx, "
 		      "hard_nport_id=0x%08x, nport_id=0x%08x)\n",
-		      d_id, adapter->s_id, (wwn_t) adisc->wwpn,
-		      (wwn_t) adisc->wwnn, adisc->hard_nport_id,
-		      adisc->nport_id);
+		      d_id, fc_host_port_id(adapter->scsi_host),
+		      (wwn_t) adisc->wwpn, (wwn_t) adisc->wwnn,
+		      adisc->hard_nport_id, adisc->nport_id);
 
 	/* set wwnn for port */
 	if (port->wwnn == 0)
@@ -886,7 +887,7 @@ static int
 zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *erp_action)
 {
 	int retval = 0;
-	struct zfcp_fsf_req *fsf_req;
+	struct zfcp_fsf_req *fsf_req = NULL;
 	struct zfcp_adapter *adapter = erp_action->adapter;
 
 	if (erp_action->fsf_req) {
@@ -896,7 +897,7 @@ zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *erp_action)
 		list_for_each_entry(fsf_req, &adapter->fsf_req_list_head, list)
 		    if (fsf_req == erp_action->fsf_req)
 			break;
-		if (fsf_req == erp_action->fsf_req) {
+		if (fsf_req && (fsf_req->erp_action == erp_action)) {
 			/* fsf_req still exists */
 			debug_text_event(adapter->erp_dbf, 3, "a_ca_req");
 			debug_event(adapter->erp_dbf, 3, &fsf_req,
@@ -2258,16 +2259,21 @@ zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action)
 static int
 zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *erp_action)
 {
-	int retval;
+	int xconfig, xport;
+
+	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
+			     &erp_action->adapter->status)) {
+		zfcp_erp_adapter_strategy_open_fsf_xport(erp_action);
+		atomic_set(&erp_action->adapter->erp_counter, 0);
+		return ZFCP_ERP_FAILED;
+	}
 
-	/* do 'exchange configuration data' */
-	retval = zfcp_erp_adapter_strategy_open_fsf_xconfig(erp_action);
-	if (retval == ZFCP_ERP_FAILED)
-		return retval;
+	xconfig = zfcp_erp_adapter_strategy_open_fsf_xconfig(erp_action);
+	xport   = zfcp_erp_adapter_strategy_open_fsf_xport(erp_action);
+	if ((xconfig == ZFCP_ERP_FAILED) || (xport == ZFCP_ERP_FAILED))
+		return ZFCP_ERP_FAILED;
 
-	/* start the desired number of Status Reads */
-	retval = zfcp_erp_adapter_strategy_open_fsf_statusread(erp_action);
-	return retval;
+	return zfcp_erp_adapter_strategy_open_fsf_statusread(erp_action);
 }
 
 /*
@@ -2291,7 +2297,9 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action)
 		atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 				  &adapter->status);
 		ZFCP_LOG_DEBUG("Doing exchange config data\n");
+		write_lock(&adapter->erp_lock);
 		zfcp_erp_action_to_running(erp_action);
+		write_unlock(&adapter->erp_lock);
 		zfcp_erp_timeout_init(erp_action);
 		if (zfcp_fsf_exchange_config_data(erp_action)) {
 			retval = ZFCP_ERP_FAILED;
@@ -2348,6 +2356,76 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action)
 	return retval;
 }
 
+static int
+zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action)
+{
+	int retval = ZFCP_ERP_SUCCEEDED;
+	int retries;
+	int sleep;
+	struct zfcp_adapter *adapter = erp_action->adapter;
+
+	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
+
+	for (retries = 0; ; retries++) {
+		ZFCP_LOG_DEBUG("Doing exchange port data\n");
+		zfcp_erp_action_to_running(erp_action);
+		zfcp_erp_timeout_init(erp_action);
+		if (zfcp_fsf_exchange_port_data(erp_action, adapter, NULL)) {
+			retval = ZFCP_ERP_FAILED;
+			debug_text_event(adapter->erp_dbf, 5, "a_fstx_xf");
+			ZFCP_LOG_INFO("error: initiation of exchange of "
+				      "port data failed for adapter %s\n",
+				      zfcp_get_busid_by_adapter(adapter));
+			break;
+		}
+		debug_text_event(adapter->erp_dbf, 6, "a_fstx_xok");
+		ZFCP_LOG_DEBUG("Xchange underway\n");
+
+		/*
+		 * Why this works:
+		 * Both the normal completion handler as well as the timeout
+		 * handler will do an 'up' when the 'exchange port data'
+		 * request completes or times out. Thus, the signal to go on
+		 * won't be lost utilizing this semaphore.
+		 * Furthermore, this 'adapter_reopen' action is
+		 * guaranteed to be the only action being there (highest action
+		 * which prevents other actions from being created).
+		 * Resulting from that, the wake signal recognized here
+		 * _must_ be the one belonging to the 'exchange port
+		 * data' request.
+		 */
+		down(&adapter->erp_ready_sem);
+		if (erp_action->status & ZFCP_STATUS_ERP_TIMEDOUT) {
+			ZFCP_LOG_INFO("error: exchange of port data "
+				      "for adapter %s timed out\n",
+				      zfcp_get_busid_by_adapter(adapter));
+			break;
+		}
+
+		if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
+				      &adapter->status))
+			break;
+
+		ZFCP_LOG_DEBUG("host connection still initialising... "
+			       "waiting and retrying...\n");
+		/* sleep a little bit before retry */
+		sleep = retries < ZFCP_EXCHANGE_PORT_DATA_SHORT_RETRIES ?
+				ZFCP_EXCHANGE_PORT_DATA_SHORT_SLEEP :
+				ZFCP_EXCHANGE_PORT_DATA_LONG_SLEEP;
+		msleep(jiffies_to_msecs(sleep));
+	}
+
+	if (atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
+			     &adapter->status)) {
+		ZFCP_LOG_INFO("error: exchange of port data for "
+			      "adapter %s failed\n",
+			      zfcp_get_busid_by_adapter(adapter));
+		retval = ZFCP_ERP_FAILED;
+	}
+
+	return retval;
+}
+
 /*
  * function:	
  *
@@ -3194,11 +3272,19 @@ zfcp_erp_action_enqueue(int action,
 		/* fall through !!! */
 
 	case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED:
-		if (atomic_test_mask
-		    (ZFCP_STATUS_COMMON_ERP_INUSE, &port->status)
-		    && port->erp_action.action ==
-		    ZFCP_ERP_ACTION_REOPEN_PORT_FORCED) {
-			debug_text_event(adapter->erp_dbf, 4, "pf_actenq_drp");
+		if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_INUSE,
+				     &port->status)) {
+			if (port->erp_action.action !=
+			    ZFCP_ERP_ACTION_REOPEN_PORT_FORCED) {
+				ZFCP_LOG_INFO("dropped erp action %i (port "
+					      "0x%016Lx, action in use: %i)\n",
+					      action, port->wwpn,
+					      port->erp_action.action);
+				debug_text_event(adapter->erp_dbf, 4,
+						 "pf_actenq_drp");
+			} else 
+				debug_text_event(adapter->erp_dbf, 4,
+						 "pf_actenq_drpcp");
 			debug_event(adapter->erp_dbf, 4, &port->wwpn,
 				    sizeof (wwn_t));
 			goto out;
@@ -3589,6 +3675,9 @@ zfcp_erp_adapter_access_changed(struct zfcp_adapter *adapter)
 	struct zfcp_port *port;
 	unsigned long flags;
 
+	if (adapter->connection_features & FSF_FEATURE_NPIV_MODE)
+		return;
+
 	debug_text_event(adapter->erp_dbf, 3, "a_access_recover");
 	debug_event(adapter->erp_dbf, 3, &adapter->name, 8);
 
diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h
index cd98a2de9f8..c3782261cb5 100644
--- a/drivers/s390/scsi/zfcp_ext.h
+++ b/drivers/s390/scsi/zfcp_ext.h
@@ -96,7 +96,8 @@ extern int  zfcp_fsf_open_unit(struct zfcp_erp_action *);
 extern int  zfcp_fsf_close_unit(struct zfcp_erp_action *);
 
 extern int  zfcp_fsf_exchange_config_data(struct zfcp_erp_action *);
-extern int  zfcp_fsf_exchange_port_data(struct zfcp_adapter *,
+extern int  zfcp_fsf_exchange_port_data(struct zfcp_erp_action *,
+					struct zfcp_adapter *,
 					struct fsf_qtcb_bottom_port *);
 extern int  zfcp_fsf_control_file(struct zfcp_adapter *, struct zfcp_fsf_req **,
 				  u32, u32, struct zfcp_sg_list *);
@@ -109,7 +110,6 @@ extern int zfcp_fsf_req_create(struct zfcp_adapter *, u32, int, mempool_t *,
 extern int zfcp_fsf_send_ct(struct zfcp_send_ct *, mempool_t *,
 			    struct zfcp_erp_action *);
 extern int zfcp_fsf_send_els(struct zfcp_send_els *);
-extern int  zfcp_fsf_req_wait_and_cleanup(struct zfcp_fsf_req *, int, u32 *);
 extern int  zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *,
 					   struct zfcp_unit *,
 					   struct scsi_cmnd *,
@@ -182,9 +182,25 @@ extern void zfcp_erp_port_access_changed(struct zfcp_port *);
 extern void zfcp_erp_unit_access_changed(struct zfcp_unit *);
 
 /******************************** AUX ****************************************/
-extern void zfcp_cmd_dbf_event_fsf(const char *, struct zfcp_fsf_req *,
-				   void *, int);
-extern void zfcp_cmd_dbf_event_scsi(const char *, struct scsi_cmnd *);
-extern void zfcp_in_els_dbf_event(struct zfcp_adapter *, const char *,
-				  struct fsf_status_read_buffer *, int);
+extern void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *);
+extern void zfcp_hba_dbf_event_fsf_unsol(const char *, struct zfcp_adapter *,
+					 struct fsf_status_read_buffer *);
+extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *,
+				    unsigned int, unsigned int, unsigned int,
+				    int, int);
+
+extern void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *);
+extern void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *);
+extern void zfcp_san_dbf_event_els_request(struct zfcp_fsf_req *);
+extern void zfcp_san_dbf_event_els_response(struct zfcp_fsf_req *);
+extern void zfcp_san_dbf_event_incoming_els(struct zfcp_fsf_req *);
+
+extern void zfcp_scsi_dbf_event_result(const char *, int, struct zfcp_adapter *,
+				       struct scsi_cmnd *);
+extern void zfcp_scsi_dbf_event_abort(const char *, struct zfcp_adapter *,
+				      struct scsi_cmnd *,
+				      struct zfcp_fsf_req *);
+extern void zfcp_scsi_dbf_event_devreset(const char *, u8, struct zfcp_unit *,
+					 struct scsi_cmnd *);
+
 #endif	/* ZFCP_EXT_H */
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index c007b6424e7..3b0fc1163f5 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -59,6 +59,8 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *, struct timer_list *);
 static int zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *);
 static int zfcp_fsf_fsfstatus_eval(struct zfcp_fsf_req *);
 static int zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *);
+static void zfcp_fsf_link_down_info_eval(struct zfcp_adapter *,
+	struct fsf_link_down_info *);
 static int zfcp_fsf_req_dispatch(struct zfcp_fsf_req *);
 static void zfcp_fsf_req_dismiss(struct zfcp_fsf_req *);
 
@@ -285,51 +287,51 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req)
 {
 	int retval = 0;
 	struct zfcp_adapter *adapter = fsf_req->adapter;
+	struct fsf_qtcb *qtcb = fsf_req->qtcb;
+	union fsf_prot_status_qual *prot_status_qual =
+		&qtcb->prefix.prot_status_qual;
 
-	ZFCP_LOG_DEBUG("QTCB is at %p\n", fsf_req->qtcb);
+	zfcp_hba_dbf_event_fsf_response(fsf_req);
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_DISMISSED) {
 		ZFCP_LOG_DEBUG("fsf_req 0x%lx has been dismissed\n",
 			       (unsigned long) fsf_req);
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR |
 			ZFCP_STATUS_FSFREQ_RETRY; /* only for SCSI cmnds. */
-		zfcp_cmd_dbf_event_fsf("dismiss", fsf_req, NULL, 0);
 		goto skip_protstatus;
 	}
 
 	/* log additional information provided by FSF (if any) */
-	if (unlikely(fsf_req->qtcb->header.log_length)) {
+	if (unlikely(qtcb->header.log_length)) {
 		/* do not trust them ;-) */
-		if (fsf_req->qtcb->header.log_start > sizeof(struct fsf_qtcb)) {
+		if (qtcb->header.log_start > sizeof(struct fsf_qtcb)) {
 			ZFCP_LOG_NORMAL
 			    ("bug: ULP (FSF logging) log data starts "
 			     "beyond end of packet header. Ignored. "
 			     "(start=%i, size=%li)\n",
-			     fsf_req->qtcb->header.log_start,
+			     qtcb->header.log_start,
 			     sizeof(struct fsf_qtcb));
 			goto forget_log;
 		}
-		if ((size_t) (fsf_req->qtcb->header.log_start +
-		     fsf_req->qtcb->header.log_length)
+		if ((size_t) (qtcb->header.log_start + qtcb->header.log_length)
 		    > sizeof(struct fsf_qtcb)) {
 			ZFCP_LOG_NORMAL("bug: ULP (FSF logging) log data ends "
 					"beyond end of packet header. Ignored. "
 					"(start=%i, length=%i, size=%li)\n",
-					fsf_req->qtcb->header.log_start,
-					fsf_req->qtcb->header.log_length,
+					qtcb->header.log_start,
+					qtcb->header.log_length,
 					sizeof(struct fsf_qtcb));
 			goto forget_log;
 		}
 		ZFCP_LOG_TRACE("ULP log data: \n");
 		ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_TRACE,
-			      (char *) fsf_req->qtcb +
-			      fsf_req->qtcb->header.log_start,
-			      fsf_req->qtcb->header.log_length);
+			      (char *) qtcb + qtcb->header.log_start,
+			      qtcb->header.log_length);
 	}
  forget_log:
 
 	/* evaluate FSF Protocol Status */
-	switch (fsf_req->qtcb->prefix.prot_status) {
+	switch (qtcb->prefix.prot_status) {
 
 	case FSF_PROT_GOOD:
 	case FSF_PROT_FSF_STATUS_PRESENTED:
@@ -340,14 +342,9 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req)
 				"microcode of version 0x%x, the device driver "
 				"only supports 0x%x. Aborting.\n",
 				zfcp_get_busid_by_adapter(adapter),
-				fsf_req->qtcb->prefix.prot_status_qual.
-				version_error.fsf_version, ZFCP_QTCB_VERSION);
-		/* stop operation for this adapter */
-		debug_text_exception(adapter->erp_dbf, 0, "prot_ver_err");
+				prot_status_qual->version_error.fsf_version,
+				ZFCP_QTCB_VERSION);
 		zfcp_erp_adapter_shutdown(adapter, 0);
-		zfcp_cmd_dbf_event_fsf("qverserr", fsf_req,
-				       &fsf_req->qtcb->prefix.prot_status_qual,
-				       sizeof (union fsf_prot_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -355,16 +352,10 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req)
 		ZFCP_LOG_NORMAL("bug: Sequence number mismatch between "
 				"driver (0x%x) and adapter %s (0x%x). "
 				"Restarting all operations on this adapter.\n",
-				fsf_req->qtcb->prefix.req_seq_no,
+				qtcb->prefix.req_seq_no,
 				zfcp_get_busid_by_adapter(adapter),
-				fsf_req->qtcb->prefix.prot_status_qual.
-				sequence_error.exp_req_seq_no);
-		debug_text_exception(adapter->erp_dbf, 0, "prot_seq_err");
-		/* restart operation on this adapter */
+				prot_status_qual->sequence_error.exp_req_seq_no);
 		zfcp_erp_adapter_reopen(adapter, 0);
-		zfcp_cmd_dbf_event_fsf("seqnoerr", fsf_req,
-				       &fsf_req->qtcb->prefix.prot_status_qual,
-				       sizeof (union fsf_prot_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_RETRY;
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
@@ -375,116 +366,35 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req)
 				"that used on adapter %s. "
 				"Stopping all operations on this adapter.\n",
 				zfcp_get_busid_by_adapter(adapter));
-		debug_text_exception(adapter->erp_dbf, 0, "prot_unsup_qtcb");
 		zfcp_erp_adapter_shutdown(adapter, 0);
-		zfcp_cmd_dbf_event_fsf("unsqtcbt", fsf_req,
-				       &fsf_req->qtcb->prefix.prot_status_qual,
-				       sizeof (union fsf_prot_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
 	case FSF_PROT_HOST_CONNECTION_INITIALIZING:
-		zfcp_cmd_dbf_event_fsf("hconinit", fsf_req,
-				       &fsf_req->qtcb->prefix.prot_status_qual,
-				       sizeof (union fsf_prot_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		atomic_set_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
 				&(adapter->status));
-		debug_text_event(adapter->erp_dbf, 3, "prot_con_init");
 		break;
 
 	case FSF_PROT_DUPLICATE_REQUEST_ID:
-		if (fsf_req->qtcb) {
 			ZFCP_LOG_NORMAL("bug: The request identifier 0x%Lx "
 					"to the adapter %s is ambiguous. "
-					"Stopping all operations on this "
-					"adapter.\n",
-					*(unsigned long long *)
-					(&fsf_req->qtcb->bottom.support.
-					 req_handle),
-					zfcp_get_busid_by_adapter(adapter));
-		} else {
-			ZFCP_LOG_NORMAL("bug: The request identifier %p "
-					"to the adapter %s is ambiguous. "
-					"Stopping all operations on this "
-					"adapter. "
-					"(bug: got this for an unsolicited "
-					"status read request)\n",
-					fsf_req,
+				"Stopping all operations on this adapter.\n",
+				*(unsigned long long*)
+				(&qtcb->bottom.support.req_handle),
 					zfcp_get_busid_by_adapter(adapter));
-		}
-		debug_text_exception(adapter->erp_dbf, 0, "prot_dup_id");
 		zfcp_erp_adapter_shutdown(adapter, 0);
-		zfcp_cmd_dbf_event_fsf("dupreqid", fsf_req,
-				       &fsf_req->qtcb->prefix.prot_status_qual,
-				       sizeof (union fsf_prot_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
 	case FSF_PROT_LINK_DOWN:
-		/*
-		 * 'test and set' is not atomic here -
-		 * it's ok as long as calls to our response queue handler
-		 * (and thus execution of this code here) are serialized
-		 * by the qdio module
-		 */
-		if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
-				      &adapter->status)) {
-			switch (fsf_req->qtcb->prefix.prot_status_qual.
-				locallink_error.code) {
-			case FSF_PSQ_LINK_NOLIGHT:
-				ZFCP_LOG_INFO("The local link to adapter %s "
-					      "is down (no light detected).\n",
-					      zfcp_get_busid_by_adapter(
-						      adapter));
-				break;
-			case FSF_PSQ_LINK_WRAPPLUG:
-				ZFCP_LOG_INFO("The local link to adapter %s "
-					      "is down (wrap plug detected).\n",
-					      zfcp_get_busid_by_adapter(
-						      adapter));
-				break;
-			case FSF_PSQ_LINK_NOFCP:
-				ZFCP_LOG_INFO("The local link to adapter %s "
-					      "is down (adjacent node on "
-					      "link does not support FCP).\n",
-					      zfcp_get_busid_by_adapter(
-						      adapter));
-				break;
-			default:
-				ZFCP_LOG_INFO("The local link to adapter %s "
-					      "is down "
-					      "(warning: unknown reason "
-					      "code).\n",
-					      zfcp_get_busid_by_adapter(
-						      adapter));
-				break;
-
-			}
-			/*
-			 * Due to the 'erp failed' flag the adapter won't
-			 * be recovered but will be just set to 'blocked'
-			 * state. All subordinary devices will have state
-			 * 'blocked' and 'erp failed', too.
-			 * Thus the adapter is still able to provide
-			 * 'link up' status without being flooded with
-			 * requests.
-			 * (note: even 'close port' is not permitted)
-			 */
-			ZFCP_LOG_INFO("Stopping all operations for adapter "
-				      "%s.\n",
-				      zfcp_get_busid_by_adapter(adapter));
-			atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
-					ZFCP_STATUS_COMMON_ERP_FAILED,
-					&adapter->status);
-			zfcp_erp_adapter_reopen(adapter, 0);
-		}
+		zfcp_fsf_link_down_info_eval(adapter,
+					     &prot_status_qual->link_down_info);
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
 	case FSF_PROT_REEST_QUEUE:
-		debug_text_event(adapter->erp_dbf, 1, "prot_reest_queue");
-		ZFCP_LOG_INFO("The local link to adapter with "
+		ZFCP_LOG_NORMAL("The local link to adapter with "
 			      "%s was re-plugged. "
 			      "Re-starting operations on this adapter.\n",
 			      zfcp_get_busid_by_adapter(adapter));
@@ -495,9 +405,6 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req)
 		zfcp_erp_adapter_reopen(adapter,
 					ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED
 					| ZFCP_STATUS_COMMON_ERP_FAILED);
-		zfcp_cmd_dbf_event_fsf("reestque", fsf_req,
-				       &fsf_req->qtcb->prefix.prot_status_qual,
-				       sizeof (union fsf_prot_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -507,12 +414,7 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req)
 				"Restarting all operations on this "
 				"adapter.\n",
 				zfcp_get_busid_by_adapter(adapter));
-		debug_text_event(adapter->erp_dbf, 0, "prot_err_sta");
-		/* restart operation on this adapter */
 		zfcp_erp_adapter_reopen(adapter, 0);
-		zfcp_cmd_dbf_event_fsf("proterrs", fsf_req,
-				       &fsf_req->qtcb->prefix.prot_status_qual,
-				       sizeof (union fsf_prot_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_RETRY;
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
@@ -524,11 +426,7 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req)
 				"Stopping all operations on this adapter. "
 				"(debug info 0x%x).\n",
 				zfcp_get_busid_by_adapter(adapter),
-				fsf_req->qtcb->prefix.prot_status);
-		debug_text_event(adapter->erp_dbf, 0, "prot_inval:");
-		debug_exception(adapter->erp_dbf, 0,
-				&fsf_req->qtcb->prefix.prot_status,
-				sizeof (u32));
+				qtcb->prefix.prot_status);
 		zfcp_erp_adapter_shutdown(adapter, 0);
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 	}
@@ -568,28 +466,18 @@ zfcp_fsf_fsfstatus_eval(struct zfcp_fsf_req *fsf_req)
 				"(debug info 0x%x).\n",
 				zfcp_get_busid_by_adapter(fsf_req->adapter),
 				fsf_req->qtcb->header.fsf_command);
-		debug_text_exception(fsf_req->adapter->erp_dbf, 0,
-				     "fsf_s_unknown");
 		zfcp_erp_adapter_shutdown(fsf_req->adapter, 0);
-		zfcp_cmd_dbf_event_fsf("unknownc", fsf_req,
-				       &fsf_req->qtcb->header.fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
 	case FSF_FCP_RSP_AVAILABLE:
 		ZFCP_LOG_DEBUG("FCP Sense data will be presented to the "
 			       "SCSI stack.\n");
-		debug_text_event(fsf_req->adapter->erp_dbf, 3, "fsf_s_rsp");
 		break;
 
 	case FSF_ADAPTER_STATUS_AVAILABLE:
-		debug_text_event(fsf_req->adapter->erp_dbf, 2, "fsf_s_astatus");
 		zfcp_fsf_fsfstatus_qual_eval(fsf_req);
 		break;
-
-	default:
-		break;
 	}
 
  skip_fsfstatus:
@@ -617,44 +505,28 @@ zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *fsf_req)
 
 	switch (fsf_req->qtcb->header.fsf_status_qual.word[0]) {
 	case FSF_SQ_FCP_RSP_AVAILABLE:
-		debug_text_event(fsf_req->adapter->erp_dbf, 4, "fsf_sq_rsp");
 		break;
 	case FSF_SQ_RETRY_IF_POSSIBLE:
 		/* The SCSI-stack may now issue retries or escalate */
-		debug_text_event(fsf_req->adapter->erp_dbf, 2, "fsf_sq_retry");
-		zfcp_cmd_dbf_event_fsf("sqretry", fsf_req,
-				       &fsf_req->qtcb->header.fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	case FSF_SQ_COMMAND_ABORTED:
 		/* Carry the aborted state on to upper layer */
-		debug_text_event(fsf_req->adapter->erp_dbf, 2, "fsf_sq_abort");
-		zfcp_cmd_dbf_event_fsf("sqabort", fsf_req,
-				       &fsf_req->qtcb->header.fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTED;
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	case FSF_SQ_NO_RECOM:
-		debug_text_exception(fsf_req->adapter->erp_dbf, 0,
-				     "fsf_sq_no_rec");
 		ZFCP_LOG_NORMAL("bug: No recommendation could be given for a"
 				"problem on the adapter %s "
 				"Stopping all operations on this adapter. ",
 				zfcp_get_busid_by_adapter(fsf_req->adapter));
 		zfcp_erp_adapter_shutdown(fsf_req->adapter, 0);
-		zfcp_cmd_dbf_event_fsf("sqnrecom", fsf_req,
-				       &fsf_req->qtcb->header.fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	case FSF_SQ_ULP_PROGRAMMING_ERROR:
 		ZFCP_LOG_NORMAL("error: not enough SBALs for data transfer "
 				"(adapter %s)\n",
 				zfcp_get_busid_by_adapter(fsf_req->adapter));
-		debug_text_exception(fsf_req->adapter->erp_dbf, 0,
-				     "fsf_sq_ulp_err");
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE:
@@ -668,13 +540,6 @@ zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *fsf_req)
 		ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL,
 			      (char *) &fsf_req->qtcb->header.fsf_status_qual,
 			      sizeof (union fsf_status_qual));
-		debug_text_event(fsf_req->adapter->erp_dbf, 0, "fsf_sq_inval:");
-		debug_exception(fsf_req->adapter->erp_dbf, 0,
-				&fsf_req->qtcb->header.fsf_status_qual.word[0],
-				sizeof (u32));
-		zfcp_cmd_dbf_event_fsf("squndef", fsf_req,
-				       &fsf_req->qtcb->header.fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 	}
@@ -682,6 +547,110 @@ zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *fsf_req)
 	return retval;
 }
 
+/**
+ * zfcp_fsf_link_down_info_eval - evaluate link down information block
+ */
+static void
+zfcp_fsf_link_down_info_eval(struct zfcp_adapter *adapter,
+			     struct fsf_link_down_info *link_down)
+{
+	switch (link_down->error_code) {
+	case FSF_PSQ_LINK_NO_LIGHT:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(no light detected)\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_WRAP_PLUG:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(wrap plug detected)\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_NO_FCP:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(adjacent node on link does not support FCP)\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_FIRMWARE_UPDATE:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(firmware update in progress)\n",
+				zfcp_get_busid_by_adapter(adapter));
+			break;
+	case FSF_PSQ_LINK_INVALID_WWPN:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(duplicate or invalid WWPN detected)\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_NO_NPIV_SUPPORT:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(no support for NPIV by Fabric)\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_NO_FCP_RESOURCES:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(out of resource in FCP daughtercard)\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_NO_FABRIC_RESOURCES:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(out of resource in Fabric)\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_FABRIC_LOGIN_UNABLE:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(unable to Fabric login)\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_WWPN_ASSIGNMENT_CORRUPTED:
+		ZFCP_LOG_NORMAL("WWPN assignment file corrupted on adapter %s\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_MODE_TABLE_CURRUPTED:
+		ZFCP_LOG_NORMAL("Mode table corrupted on adapter %s\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	case FSF_PSQ_LINK_NO_WWPN_ASSIGNMENT:
+		ZFCP_LOG_NORMAL("No WWPN for assignment table on adapter %s\n",
+				zfcp_get_busid_by_adapter(adapter));
+		break;
+	default:
+		ZFCP_LOG_NORMAL("The local link to adapter %s is down "
+				"(warning: unknown reason code %d)\n",
+				zfcp_get_busid_by_adapter(adapter),
+				link_down->error_code);
+	}
+
+	if (adapter->connection_features & FSF_FEATURE_NPIV_MODE)
+		ZFCP_LOG_DEBUG("Debug information to link down: "
+		               "primary_status=0x%02x "
+		               "ioerr_code=0x%02x "
+		               "action_code=0x%02x "
+		               "reason_code=0x%02x "
+		               "explanation_code=0x%02x "
+		               "vendor_specific_code=0x%02x\n",
+				link_down->primary_status,
+				link_down->ioerr_code,
+				link_down->action_code,
+				link_down->reason_code,
+				link_down->explanation_code,
+				link_down->vendor_specific_code);
+
+	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
+			      &adapter->status)) {
+		atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
+				&adapter->status);
+		switch (link_down->error_code) {
+		case FSF_PSQ_LINK_NO_LIGHT:
+		case FSF_PSQ_LINK_WRAP_PLUG:
+		case FSF_PSQ_LINK_NO_FCP:
+		case FSF_PSQ_LINK_FIRMWARE_UPDATE:
+			zfcp_erp_adapter_reopen(adapter, 0);
+			break;
+		default:
+			zfcp_erp_adapter_failed(adapter);
+		}
+	}
+}
+
 /*
  * function:	zfcp_fsf_req_dispatch
  *
@@ -696,11 +665,6 @@ zfcp_fsf_req_dispatch(struct zfcp_fsf_req *fsf_req)
 	struct zfcp_adapter *adapter = fsf_req->adapter;
 	int retval = 0;
 
-	if (unlikely(fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR)) {
-		ZFCP_LOG_TRACE("fsf_req=%p, QTCB=%p\n", fsf_req, fsf_req->qtcb);
-		ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_TRACE,
-			      (char *) fsf_req->qtcb, sizeof(struct fsf_qtcb));
-	}
 
 	switch (fsf_req->fsf_command) {
 
@@ -760,13 +724,13 @@ zfcp_fsf_req_dispatch(struct zfcp_fsf_req *fsf_req)
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		ZFCP_LOG_NORMAL("bug: Command issued by the device driver is "
 				"not supported by the adapter %s\n",
-				zfcp_get_busid_by_adapter(fsf_req->adapter));
+				zfcp_get_busid_by_adapter(adapter));
 		if (fsf_req->fsf_command != fsf_req->qtcb->header.fsf_command)
 			ZFCP_LOG_NORMAL
 			    ("bug: Command issued by the device driver differs "
 			     "from the command returned by the adapter %s "
 			     "(debug info 0x%x, 0x%x).\n",
-			     zfcp_get_busid_by_adapter(fsf_req->adapter),
+			     zfcp_get_busid_by_adapter(adapter),
 			     fsf_req->fsf_command,
 			     fsf_req->qtcb->header.fsf_command);
 	}
@@ -774,8 +738,6 @@ zfcp_fsf_req_dispatch(struct zfcp_fsf_req *fsf_req)
 	if (!erp_action)
 		return retval;
 
-	debug_text_event(adapter->erp_dbf, 3, "a_frh");
-	debug_event(adapter->erp_dbf, 3, &erp_action->action, sizeof (int));
 	zfcp_erp_async_handler(erp_action, 0);
 
 	return retval;
@@ -821,7 +783,7 @@ zfcp_fsf_status_read(struct zfcp_adapter *adapter, int req_flags)
 		goto failed_buf;
 	}
 	memset(status_buffer, 0, sizeof (struct fsf_status_read_buffer));
-	fsf_req->data.status_read.buffer = status_buffer;
+	fsf_req->data = (unsigned long) status_buffer;
 
 	/* insert pointer to respective buffer */
 	sbale = zfcp_qdio_sbale_curr(fsf_req);
@@ -846,6 +808,7 @@ zfcp_fsf_status_read(struct zfcp_adapter *adapter, int req_flags)
  failed_buf:
 	zfcp_fsf_req_free(fsf_req);
  failed_req_create:
+	zfcp_hba_dbf_event_fsf_unsol("fail", adapter, NULL);
  out:
 	write_unlock_irqrestore(&adapter->request_queue.queue_lock, lock_flags);
 	return retval;
@@ -859,7 +822,7 @@ zfcp_fsf_status_read_port_closed(struct zfcp_fsf_req *fsf_req)
 	struct zfcp_port *port;
 	unsigned long flags;
 
-	status_buffer = fsf_req->data.status_read.buffer;
+	status_buffer = (struct fsf_status_read_buffer *) fsf_req->data;
 	adapter = fsf_req->adapter;
 
 	read_lock_irqsave(&zfcp_data.config_lock, flags);
@@ -918,38 +881,33 @@ zfcp_fsf_status_read_handler(struct zfcp_fsf_req *fsf_req)
 	int retval = 0;
 	struct zfcp_adapter *adapter = fsf_req->adapter;
 	struct fsf_status_read_buffer *status_buffer =
-	    fsf_req->data.status_read.buffer;
+		(struct fsf_status_read_buffer *) fsf_req->data;
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_DISMISSED) {
+		zfcp_hba_dbf_event_fsf_unsol("dism", adapter, status_buffer);
 		mempool_free(status_buffer, adapter->pool.data_status_read);
 		zfcp_fsf_req_free(fsf_req);
 		goto out;
 	}
 
+	zfcp_hba_dbf_event_fsf_unsol("read", adapter, status_buffer);
+
 	switch (status_buffer->status_type) {
 
 	case FSF_STATUS_READ_PORT_CLOSED:
-		debug_text_event(adapter->erp_dbf, 3, "unsol_pclosed:");
-		debug_event(adapter->erp_dbf, 3,
-			    &status_buffer->d_id, sizeof (u32));
 		zfcp_fsf_status_read_port_closed(fsf_req);
 		break;
 
 	case FSF_STATUS_READ_INCOMING_ELS:
-		debug_text_event(adapter->erp_dbf, 3, "unsol_els:");
 		zfcp_fsf_incoming_els(fsf_req);
 		break;
 
 	case FSF_STATUS_READ_SENSE_DATA_AVAIL:
-		debug_text_event(adapter->erp_dbf, 3, "unsol_sense:");
 		ZFCP_LOG_INFO("unsolicited sense data received (adapter %s)\n",
 			      zfcp_get_busid_by_adapter(adapter));
-                ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL, (char *) status_buffer,
-                              sizeof(struct fsf_status_read_buffer));
 		break;
 
 	case FSF_STATUS_READ_BIT_ERROR_THRESHOLD:
-		debug_text_event(adapter->erp_dbf, 3, "unsol_bit_err:");
 		ZFCP_LOG_NORMAL("Bit error threshold data received:\n");
 		ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL,
 			      (char *) status_buffer,
@@ -957,17 +915,32 @@ zfcp_fsf_status_read_handler(struct zfcp_fsf_req *fsf_req)
 		break;
 
 	case FSF_STATUS_READ_LINK_DOWN:
-		debug_text_event(adapter->erp_dbf, 0, "unsol_link_down:");
-		ZFCP_LOG_INFO("Local link to adapter %s is down\n",
+		switch (status_buffer->status_subtype) {
+		case FSF_STATUS_READ_SUB_NO_PHYSICAL_LINK:
+			ZFCP_LOG_INFO("Physical link to adapter %s is down\n",
+				      zfcp_get_busid_by_adapter(adapter));
+			break;
+		case FSF_STATUS_READ_SUB_FDISC_FAILED:
+			ZFCP_LOG_INFO("Local link to adapter %s is down "
+				      "due to failed FDISC login\n",
 			      zfcp_get_busid_by_adapter(adapter));
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
-				&adapter->status);
-		zfcp_erp_adapter_failed(adapter);
+			break;
+		case FSF_STATUS_READ_SUB_FIRMWARE_UPDATE:
+			ZFCP_LOG_INFO("Local link to adapter %s is down "
+				      "due to firmware update on adapter\n",
+				      zfcp_get_busid_by_adapter(adapter));
+			break;
+		default:
+			ZFCP_LOG_INFO("Local link to adapter %s is down "
+				      "due to unknown reason\n",
+				      zfcp_get_busid_by_adapter(adapter));
+		};
+		zfcp_fsf_link_down_info_eval(adapter,
+			(struct fsf_link_down_info *) &status_buffer->payload);
 		break;
 
 	case FSF_STATUS_READ_LINK_UP:
-		debug_text_event(adapter->erp_dbf, 2, "unsol_link_up:");
-		ZFCP_LOG_INFO("Local link to adapter %s was replugged. "
+		ZFCP_LOG_NORMAL("Local link to adapter %s was replugged. "
 			      "Restarting operations on this adapter\n",
 			      zfcp_get_busid_by_adapter(adapter));
 		/* All ports should be marked as ready to run again */
@@ -980,35 +953,40 @@ zfcp_fsf_status_read_handler(struct zfcp_fsf_req *fsf_req)
 		break;
 
 	case FSF_STATUS_READ_CFDC_UPDATED:
-		debug_text_event(adapter->erp_dbf, 2, "unsol_cfdc_update:");
-		ZFCP_LOG_INFO("CFDC has been updated on the adapter %s\n",
+		ZFCP_LOG_NORMAL("CFDC has been updated on the adapter %s\n",
 			      zfcp_get_busid_by_adapter(adapter));
 		zfcp_erp_adapter_access_changed(adapter);
 		break;
 
 	case FSF_STATUS_READ_CFDC_HARDENED:
-		debug_text_event(adapter->erp_dbf, 2, "unsol_cfdc_harden:");
 		switch (status_buffer->status_subtype) {
 		case FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE:
-			ZFCP_LOG_INFO("CFDC of adapter %s saved on SE\n",
+			ZFCP_LOG_NORMAL("CFDC of adapter %s saved on SE\n",
 				      zfcp_get_busid_by_adapter(adapter));
 			break;
 		case FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE2:
-			ZFCP_LOG_INFO("CFDC of adapter %s has been copied "
+			ZFCP_LOG_NORMAL("CFDC of adapter %s has been copied "
 				      "to the secondary SE\n",
 				zfcp_get_busid_by_adapter(adapter));
 			break;
 		default:
-			ZFCP_LOG_INFO("CFDC of adapter %s has been hardened\n",
+			ZFCP_LOG_NORMAL("CFDC of adapter %s has been hardened\n",
 				      zfcp_get_busid_by_adapter(adapter));
 		}
 		break;
 
+	case FSF_STATUS_READ_FEATURE_UPDATE_ALERT:
+		debug_text_event(adapter->erp_dbf, 2, "unsol_features:");
+		ZFCP_LOG_INFO("List of supported features on adapter %s has "
+			      "been changed from 0x%08X to 0x%08X\n",
+			      zfcp_get_busid_by_adapter(adapter),
+			      *(u32*) (status_buffer->payload + 4),
+			      *(u32*) (status_buffer->payload));
+		adapter->adapter_features = *(u32*) status_buffer->payload;
+		break;
+
 	default:
-		debug_text_event(adapter->erp_dbf, 0, "unsol_unknown:");
-		debug_exception(adapter->erp_dbf, 0,
-				&status_buffer->status_type, sizeof (u32));
-		ZFCP_LOG_NORMAL("bug: An unsolicited status packet of unknown "
+		ZFCP_LOG_NORMAL("warning: An unsolicited status packet of unknown "
 				"type was received (debug info 0x%x)\n",
 				status_buffer->status_type);
 		ZFCP_LOG_DEBUG("Dump of status_read_buffer %p:\n",
@@ -1093,7 +1071,7 @@ zfcp_fsf_abort_fcp_command(unsigned long old_req_id,
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-	fsf_req->data.abort_fcp_command.unit = unit;
+	fsf_req->data = (unsigned long) unit;
 
 	/* set handles of unit and its parent port in QTCB */
 	fsf_req->qtcb->header.lun_handle = unit->handle;
@@ -1139,7 +1117,7 @@ static int
 zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *new_fsf_req)
 {
 	int retval = -EINVAL;
-	struct zfcp_unit *unit = new_fsf_req->data.abort_fcp_command.unit;
+	struct zfcp_unit *unit;
 	unsigned char status_qual =
 	    new_fsf_req->qtcb->header.fsf_status_qual.word[0];
 
@@ -1150,6 +1128,8 @@ zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *new_fsf_req)
 		goto skip_fsfstatus;
 	}
 
+	unit = (struct zfcp_unit *) new_fsf_req->data;
+
 	/* evaluate FSF status in QTCB */
 	switch (new_fsf_req->qtcb->header.fsf_status) {
 
@@ -1364,7 +1344,7 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool,
                 sbale[3].addr = zfcp_sg_to_address(&ct->resp[0]);
                 sbale[3].length = ct->resp[0].length;
                 sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY;
-        } else if (adapter->supported_features &
+	} else if (adapter->adapter_features &
                    FSF_FEATURE_ELS_CT_CHAINED_SBALS) {
                 /* try to use chained SBALs */
                 bytes = zfcp_qdio_sbals_from_sg(fsf_req,
@@ -1414,7 +1394,9 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool,
 	fsf_req->qtcb->header.port_handle = port->handle;
 	fsf_req->qtcb->bottom.support.service_class = adapter->fc_service_class;
 	fsf_req->qtcb->bottom.support.timeout = ct->timeout;
-        fsf_req->data.send_ct = ct;
+        fsf_req->data = (unsigned long) ct;
+
+	zfcp_san_dbf_event_ct_request(fsf_req);
 
 	/* start QDIO request for this FSF request */
 	ret = zfcp_fsf_req_send(fsf_req, ct->timer);
@@ -1445,10 +1427,10 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool,
  * zfcp_fsf_send_ct_handler - handler for Generic Service requests
  * @fsf_req: pointer to struct zfcp_fsf_req
  *
- * Data specific for the Generic Service request is passed by
- * fsf_req->data.send_ct
- * Usually a specific handler for the request is called via
- * fsf_req->data.send_ct->handler at end of this function.
+ * Data specific for the Generic Service request is passed using
+ * fsf_req->data. There we find the pointer to struct zfcp_send_ct.
+ * Usually a specific handler for the CT request is called which is
+ * found in this structure.
  */
 static int
 zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *fsf_req)
@@ -1462,7 +1444,7 @@ zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *fsf_req)
 	u16 subtable, rule, counter;
 
 	adapter = fsf_req->adapter;
-	send_ct = fsf_req->data.send_ct;
+	send_ct = (struct zfcp_send_ct *) fsf_req->data;
 	port = send_ct->port;
 	header = &fsf_req->qtcb->header;
 	bottom = &fsf_req->qtcb->bottom.support;
@@ -1474,6 +1456,7 @@ zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *fsf_req)
 	switch (header->fsf_status) {
 
         case FSF_GOOD:
+		zfcp_san_dbf_event_ct_response(fsf_req);
                 retval = 0;
 		break;
 
@@ -1634,7 +1617,7 @@ zfcp_fsf_send_els(struct zfcp_send_els *els)
 {
 	volatile struct qdio_buffer_element *sbale;
 	struct zfcp_fsf_req *fsf_req;
-	fc_id_t d_id;
+	u32 d_id;
 	struct zfcp_adapter *adapter;
 	unsigned long lock_flags;
         int bytes;
@@ -1664,7 +1647,7 @@ zfcp_fsf_send_els(struct zfcp_send_els *els)
                 sbale[3].addr = zfcp_sg_to_address(&els->resp[0]);
                 sbale[3].length = els->resp[0].length;
                 sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY;
-        } else if (adapter->supported_features &
+	} else if (adapter->adapter_features &
                    FSF_FEATURE_ELS_CT_CHAINED_SBALS) {
                 /* try to use chained SBALs */
                 bytes = zfcp_qdio_sbals_from_sg(fsf_req,
@@ -1714,10 +1697,12 @@ zfcp_fsf_send_els(struct zfcp_send_els *els)
 	fsf_req->qtcb->bottom.support.d_id = d_id;
 	fsf_req->qtcb->bottom.support.service_class = adapter->fc_service_class;
 	fsf_req->qtcb->bottom.support.timeout = ZFCP_ELS_TIMEOUT;
-	fsf_req->data.send_els = els;
+	fsf_req->data = (unsigned long) els;
 
 	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
 
+	zfcp_san_dbf_event_els_request(fsf_req);
+
 	/* start QDIO request for this FSF request */
 	ret = zfcp_fsf_req_send(fsf_req, els->timer);
 	if (ret) {
@@ -1746,23 +1731,23 @@ zfcp_fsf_send_els(struct zfcp_send_els *els)
  * zfcp_fsf_send_els_handler - handler for ELS commands
  * @fsf_req: pointer to struct zfcp_fsf_req
  *
- * Data specific for the ELS command is passed by
- * fsf_req->data.send_els
- * Usually a specific handler for the command is called via
- * fsf_req->data.send_els->handler at end of this function.
+ * Data specific for the ELS command is passed using
+ * fsf_req->data. There we find the pointer to struct zfcp_send_els.
+ * Usually a specific handler for the ELS command is called which is
+ * found in this structure.
  */
 static int zfcp_fsf_send_els_handler(struct zfcp_fsf_req *fsf_req)
 {
 	struct zfcp_adapter *adapter;
 	struct zfcp_port *port;
-	fc_id_t d_id;
+	u32 d_id;
 	struct fsf_qtcb_header *header;
 	struct fsf_qtcb_bottom_support *bottom;
 	struct zfcp_send_els *send_els;
 	int retval = -EINVAL;
 	u16 subtable, rule, counter;
 
-	send_els = fsf_req->data.send_els;
+	send_els = (struct zfcp_send_els *) fsf_req->data;
 	adapter = send_els->adapter;
 	port = send_els->port;
 	d_id = send_els->d_id;
@@ -1775,6 +1760,7 @@ static int zfcp_fsf_send_els_handler(struct zfcp_fsf_req *fsf_req)
 	switch (header->fsf_status) {
 
 	case FSF_GOOD:
+		zfcp_san_dbf_event_els_response(fsf_req);
 		retval = 0;
 		break;
 
@@ -1954,7 +1940,9 @@ zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action)
 
 	erp_action->fsf_req->erp_action = erp_action;
 	erp_action->fsf_req->qtcb->bottom.config.feature_selection =
-		(FSF_FEATURE_CFDC | FSF_FEATURE_LUN_SHARING);
+			FSF_FEATURE_CFDC |
+			FSF_FEATURE_LUN_SHARING |
+			FSF_FEATURE_UPDATE_ALERT;
 
 	/* start QDIO request for this FSF request */
 	retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer);
@@ -1990,29 +1978,36 @@ zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *fsf_req, int xchg_ok)
 {
 	struct fsf_qtcb_bottom_config *bottom;
 	struct zfcp_adapter *adapter = fsf_req->adapter;
+	struct Scsi_Host *shost = adapter->scsi_host;
 
 	bottom = &fsf_req->qtcb->bottom.config;
 	ZFCP_LOG_DEBUG("low/high QTCB version 0x%x/0x%x of FSF\n",
 		       bottom->low_qtcb_version, bottom->high_qtcb_version);
 	adapter->fsf_lic_version = bottom->lic_version;
-	adapter->supported_features = bottom->supported_features;
+	adapter->adapter_features = bottom->adapter_features;
+	adapter->connection_features = bottom->connection_features;
 	adapter->peer_wwpn = 0;
 	adapter->peer_wwnn = 0;
 	adapter->peer_d_id = 0;
 
 	if (xchg_ok) {
-		adapter->wwnn = bottom->nport_serv_param.wwnn;
-		adapter->wwpn = bottom->nport_serv_param.wwpn;
-		adapter->s_id = bottom->s_id & ZFCP_DID_MASK;
+		fc_host_node_name(shost) = bottom->nport_serv_param.wwnn;
+		fc_host_port_name(shost) = bottom->nport_serv_param.wwpn;
+		fc_host_port_id(shost) = bottom->s_id & ZFCP_DID_MASK;
+		fc_host_speed(shost) = bottom->fc_link_speed;
+		fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3;
 		adapter->fc_topology = bottom->fc_topology;
-		adapter->fc_link_speed = bottom->fc_link_speed;
 		adapter->hydra_version = bottom->adapter_type;
+		if (adapter->physical_wwpn == 0)
+			adapter->physical_wwpn = fc_host_port_name(shost);
+		if (adapter->physical_s_id == 0)
+			adapter->physical_s_id = fc_host_port_id(shost);
 	} else {
-		adapter->wwnn = 0;
-		adapter->wwpn = 0;
-		adapter->s_id = 0;
+		fc_host_node_name(shost) = 0;
+		fc_host_port_name(shost) = 0;
+		fc_host_port_id(shost) = 0;
+		fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
 		adapter->fc_topology = 0;
-		adapter->fc_link_speed = 0;
 		adapter->hydra_version = 0;
 	}
 
@@ -2022,26 +2017,28 @@ zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *fsf_req, int xchg_ok)
 		adapter->peer_wwnn = bottom->plogi_payload.wwnn;
 	}
 
-	if(adapter->supported_features & FSF_FEATURE_HBAAPI_MANAGEMENT){
+	if (adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT) {
 		adapter->hardware_version = bottom->hardware_version;
-		memcpy(adapter->serial_number, bottom->serial_number, 17);
-		EBCASC(adapter->serial_number, sizeof(adapter->serial_number));
+		memcpy(fc_host_serial_number(shost), bottom->serial_number,
+		       min(FC_SERIAL_NUMBER_SIZE, 17));
+		EBCASC(fc_host_serial_number(shost),
+		       min(FC_SERIAL_NUMBER_SIZE, 17));
 	}
 
 	ZFCP_LOG_NORMAL("The adapter %s reported the following characteristics:\n"
-		      "WWNN 0x%016Lx, "
-		      "WWPN 0x%016Lx, "
-		      "S_ID 0x%08x,\n"
-		      "adapter version 0x%x, "
-		      "LIC version 0x%x, "
-		      "FC link speed %d Gb/s\n",
-		      zfcp_get_busid_by_adapter(adapter),
-		      adapter->wwnn,
-		      adapter->wwpn,
-		      (unsigned int) adapter->s_id,
-		      adapter->hydra_version,
-		      adapter->fsf_lic_version,
-		      adapter->fc_link_speed);
+			"WWNN 0x%016Lx, "
+			"WWPN 0x%016Lx, "
+			"S_ID 0x%08x,\n"
+			"adapter version 0x%x, "
+			"LIC version 0x%x, "
+			"FC link speed %d Gb/s\n",
+			zfcp_get_busid_by_adapter(adapter),
+			(wwn_t) fc_host_node_name(shost),
+			(wwn_t) fc_host_port_name(shost),
+			fc_host_port_id(shost),
+			adapter->hydra_version,
+			adapter->fsf_lic_version,
+			fc_host_speed(shost));
 	if (ZFCP_QTCB_VERSION < bottom->low_qtcb_version) {
 		ZFCP_LOG_NORMAL("error: the adapter %s "
 				"only supports newer control block "
@@ -2062,7 +2059,6 @@ zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *fsf_req, int xchg_ok)
 		zfcp_erp_adapter_shutdown(adapter, 0);
 		return -EIO;
 	}
-	zfcp_set_fc_host_attrs(adapter);
 	return 0;
 }
 
@@ -2078,11 +2074,12 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req)
 {
 	struct fsf_qtcb_bottom_config *bottom;
 	struct zfcp_adapter *adapter = fsf_req->adapter;
+	struct fsf_qtcb *qtcb = fsf_req->qtcb;
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR)
 		return -EIO;
 
-	switch (fsf_req->qtcb->header.fsf_status) {
+	switch (qtcb->header.fsf_status) {
 
 	case FSF_GOOD:
 		if (zfcp_fsf_exchange_config_evaluate(fsf_req, 1))
@@ -2112,7 +2109,7 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req)
 			zfcp_erp_adapter_shutdown(adapter, 0);
 			return -EIO;
 		case FSF_TOPO_FABRIC:
-			ZFCP_LOG_INFO("Switched fabric fibrechannel "
+			ZFCP_LOG_NORMAL("Switched fabric fibrechannel "
 				      "network detected at adapter %s.\n",
 				      zfcp_get_busid_by_adapter(adapter));
 			break;
@@ -2130,7 +2127,7 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req)
 			zfcp_erp_adapter_shutdown(adapter, 0);
 			return -EIO;
 		}
-		bottom = &fsf_req->qtcb->bottom.config;
+		bottom = &qtcb->bottom.config;
 		if (bottom->max_qtcb_size < sizeof(struct fsf_qtcb)) {
 			ZFCP_LOG_NORMAL("bug: Maximum QTCB size (%d bytes) "
 					"allowed by the adapter %s "
@@ -2155,12 +2152,10 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req)
 		if (zfcp_fsf_exchange_config_evaluate(fsf_req, 0))
 			return -EIO;
 
-		ZFCP_LOG_INFO("Local link to adapter %s is down\n",
-			      zfcp_get_busid_by_adapter(adapter));
-		atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
-				ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED,
-				&adapter->status);
-		zfcp_erp_adapter_failed(adapter);
+		atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, &adapter->status);
+
+		zfcp_fsf_link_down_info_eval(adapter,
+			&qtcb->header.fsf_status_qual.link_down_info);
 		break;
 	default:
 		debug_text_event(fsf_req->adapter->erp_dbf, 0, "fsf-stat-ng");
@@ -2174,11 +2169,13 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req)
 
 /**
  * zfcp_fsf_exchange_port_data - request information about local port
+ * @erp_action: ERP action for the adapter for which port data is requested
  * @adapter: for which port data is requested
  * @data: response to exchange port data request
  */
 int
-zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter,
+zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action,
+			    struct zfcp_adapter *adapter,
 			    struct fsf_qtcb_bottom_port *data)
 {
 	volatile struct qdio_buffer_element *sbale;
@@ -2187,7 +2184,7 @@ zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter,
         struct zfcp_fsf_req *fsf_req;
 	struct timer_list *timer;
 
-        if(!(adapter->supported_features & FSF_FEATURE_HBAAPI_MANAGEMENT)){
+	if (!(adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT)) {
 		ZFCP_LOG_INFO("error: exchange port data "
                               "command not supported by adapter %s\n",
 			      zfcp_get_busid_by_adapter(adapter));
@@ -2211,12 +2208,18 @@ zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter,
 		goto out;
 	}
 
+	if (erp_action) {
+		erp_action->fsf_req = fsf_req;
+		fsf_req->erp_action = erp_action;
+	}
+
+	if (data)
+	fsf_req->data = (unsigned long) data;
+
 	sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0);
         sbale[0].flags |= SBAL_FLAGS0_TYPE_READ;
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
-        fsf_req->data.port_data = data;
-
 	init_timer(timer);
 	timer->function = zfcp_fsf_request_timeout_handler;
 	timer->data = (unsigned long) adapter;
@@ -2228,6 +2231,8 @@ zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter,
                               "command on the adapter %s\n",
 			      zfcp_get_busid_by_adapter(adapter));
 		zfcp_fsf_req_free(fsf_req);
+		if (erp_action)
+			erp_action->fsf_req = NULL;
 		write_unlock_irqrestore(&adapter->request_queue.queue_lock,
 					lock_flags);
 		goto out;
@@ -2256,21 +2261,42 @@ zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter,
 static void
 zfcp_fsf_exchange_port_data_handler(struct zfcp_fsf_req *fsf_req)
 {
-	struct fsf_qtcb_bottom_port *bottom;
-	struct fsf_qtcb_bottom_port *data = fsf_req->data.port_data;
+	struct zfcp_adapter *adapter = fsf_req->adapter;
+	struct Scsi_Host *shost = adapter->scsi_host;
+	struct fsf_qtcb *qtcb = fsf_req->qtcb;
+	struct fsf_qtcb_bottom_port *bottom, *data;
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR)
 		return;
 
-	switch (fsf_req->qtcb->header.fsf_status) {
+	switch (qtcb->header.fsf_status) {
         case FSF_GOOD:
-                bottom = &fsf_req->qtcb->bottom.port;
-                memcpy(data, bottom, sizeof(*data));
+		atomic_set_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
+
+		bottom = &qtcb->bottom.port;
+		data = (struct fsf_qtcb_bottom_port*) fsf_req->data;
+		if (data)
+			memcpy(data, bottom, sizeof(struct fsf_qtcb_bottom_port));
+		if (adapter->connection_features & FSF_FEATURE_NPIV_MODE) {
+			adapter->physical_wwpn = bottom->wwpn;
+			adapter->physical_s_id = bottom->fc_port_id;
+		} else {
+			adapter->physical_wwpn = fc_host_port_name(shost);
+			adapter->physical_s_id = fc_host_port_id(shost);
+		}
+		fc_host_maxframe_size(shost) = bottom->maximum_frame_size;
+		break;
+
+	case FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE:
+		atomic_set_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status);
+
+		zfcp_fsf_link_down_info_eval(adapter,
+			&qtcb->header.fsf_status_qual.link_down_info);
                 break;
 
         default:
-		debug_text_event(fsf_req->adapter->erp_dbf, 0, "xchg-port-ng");
-                debug_event(fsf_req->adapter->erp_dbf, 0,
+		debug_text_event(adapter->erp_dbf, 0, "xchg-port-ng");
+		debug_event(adapter->erp_dbf, 0,
 			    &fsf_req->qtcb->header.fsf_status, sizeof(u32));
 	}
 }
@@ -2312,7 +2338,7 @@ zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
 
 	erp_action->fsf_req->qtcb->bottom.support.d_id = erp_action->port->d_id;
 	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->port->status);
-	erp_action->fsf_req->data.open_port.port = erp_action->port;
+	erp_action->fsf_req->data = (unsigned long) erp_action->port;
 	erp_action->fsf_req->erp_action = erp_action;
 
 	/* start QDIO request for this FSF request */
@@ -2353,7 +2379,7 @@ zfcp_fsf_open_port_handler(struct zfcp_fsf_req *fsf_req)
 	struct fsf_qtcb_header *header;
 	u16 subtable, rule, counter;
 
-	port = fsf_req->data.open_port.port;
+	port = (struct zfcp_port *) fsf_req->data;
 	header = &fsf_req->qtcb->header;
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
@@ -2566,7 +2592,7 @@ zfcp_fsf_close_port(struct zfcp_erp_action *erp_action)
         sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY;
 
 	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->port->status);
-	erp_action->fsf_req->data.close_port.port = erp_action->port;
+	erp_action->fsf_req->data = (unsigned long) erp_action->port;
 	erp_action->fsf_req->erp_action = erp_action;
 	erp_action->fsf_req->qtcb->header.port_handle =
 	    erp_action->port->handle;
@@ -2606,7 +2632,7 @@ zfcp_fsf_close_port_handler(struct zfcp_fsf_req *fsf_req)
 	int retval = -EINVAL;
 	struct zfcp_port *port;
 
-	port = fsf_req->data.close_port.port;
+	port = (struct zfcp_port *) fsf_req->data;
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
 		/* don't change port status in our bookkeeping */
@@ -2703,8 +2729,8 @@ zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action)
 	atomic_set_mask(ZFCP_STATUS_PORT_PHYS_CLOSING,
 			&erp_action->port->status);
 	/* save a pointer to this port */
-	erp_action->fsf_req->data.close_physical_port.port = erp_action->port;
-	/* port to be closeed */
+	erp_action->fsf_req->data = (unsigned long) erp_action->port;
+	/* port to be closed */
 	erp_action->fsf_req->qtcb->header.port_handle =
 	    erp_action->port->handle;
 	erp_action->fsf_req->erp_action = erp_action;
@@ -2747,7 +2773,7 @@ zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *fsf_req)
 	struct fsf_qtcb_header *header;
 	u16 subtable, rule, counter;
 
-	port = fsf_req->data.close_physical_port.port;
+	port = (struct zfcp_port *) fsf_req->data;
 	header = &fsf_req->qtcb->header;
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
@@ -2908,10 +2934,11 @@ zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action)
 		erp_action->port->handle;
 	erp_action->fsf_req->qtcb->bottom.support.fcp_lun =
 		erp_action->unit->fcp_lun;
+	if (!(erp_action->adapter->connection_features & FSF_FEATURE_NPIV_MODE))
 	erp_action->fsf_req->qtcb->bottom.support.option =
 		FSF_OPEN_LUN_SUPPRESS_BOXING;
 	atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->unit->status);
-	erp_action->fsf_req->data.open_unit.unit = erp_action->unit;
+	erp_action->fsf_req->data = (unsigned long) erp_action->unit;
 	erp_action->fsf_req->erp_action = erp_action;
 
 	/* start QDIO request for this FSF request */
@@ -2955,9 +2982,9 @@ zfcp_fsf_open_unit_handler(struct zfcp_fsf_req *fsf_req)
 	struct fsf_qtcb_bottom_support *bottom;
 	struct fsf_queue_designator *queue_designator;
 	u16 subtable, rule, counter;
-	u32 allowed, exclusive, readwrite;
+	int exclusive, readwrite;
 
-	unit = fsf_req->data.open_unit.unit;
+	unit = (struct zfcp_unit *) fsf_req->data;
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
 		/* don't change unit status in our bookkeeping */
@@ -2969,10 +2996,6 @@ zfcp_fsf_open_unit_handler(struct zfcp_fsf_req *fsf_req)
 	bottom = &fsf_req->qtcb->bottom.support;
 	queue_designator = &header->fsf_status_qual.fsf_queue_designator;
 
-	allowed   = bottom->lun_access_info & FSF_UNIT_ACCESS_OPEN_LUN_ALLOWED;
-	exclusive = bottom->lun_access_info & FSF_UNIT_ACCESS_EXCLUSIVE;
-	readwrite = bottom->lun_access_info & FSF_UNIT_ACCESS_OUTBOUND_TRANSFER;
-
 	atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
 			  ZFCP_STATUS_UNIT_SHARED |
 			  ZFCP_STATUS_UNIT_READONLY,
@@ -3146,10 +3169,15 @@ zfcp_fsf_open_unit_handler(struct zfcp_fsf_req *fsf_req)
 			       unit->handle);
 		/* mark unit as open */
 		atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &unit->status);
-		atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
-		                  ZFCP_STATUS_COMMON_ACCESS_BOXED,
-		                  &unit->status);
-		if (adapter->supported_features & FSF_FEATURE_LUN_SHARING){
+
+		if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE) &&
+		    (adapter->adapter_features & FSF_FEATURE_LUN_SHARING) &&
+		    (adapter->ccw_device->id.dev_model != ZFCP_DEVICE_MODEL_PRIV)) {
+			exclusive = (bottom->lun_access_info &
+					FSF_UNIT_ACCESS_EXCLUSIVE);
+			readwrite = (bottom->lun_access_info &
+					FSF_UNIT_ACCESS_OUTBOUND_TRANSFER);
+
 			if (!exclusive)
 		                atomic_set_mask(ZFCP_STATUS_UNIT_SHARED,
 						&unit->status);
@@ -3242,7 +3270,7 @@ zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action)
 	    erp_action->port->handle;
 	erp_action->fsf_req->qtcb->header.lun_handle = erp_action->unit->handle;
 	atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->unit->status);
-	erp_action->fsf_req->data.close_unit.unit = erp_action->unit;
+	erp_action->fsf_req->data = (unsigned long) erp_action->unit;
 	erp_action->fsf_req->erp_action = erp_action;
 
 	/* start QDIO request for this FSF request */
@@ -3281,7 +3309,7 @@ zfcp_fsf_close_unit_handler(struct zfcp_fsf_req *fsf_req)
 	int retval = -EINVAL;
 	struct zfcp_unit *unit;
 
-	unit = fsf_req->data.close_unit.unit;	/* restore unit */
+	unit = (struct zfcp_unit *) fsf_req->data;
 
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
 		/* don't change unit status in our bookkeeping */
@@ -3305,9 +3333,6 @@ zfcp_fsf_close_unit_handler(struct zfcp_fsf_req *fsf_req)
 		debug_text_event(fsf_req->adapter->erp_dbf, 1,
 				 "fsf_s_phand_nv");
 		zfcp_erp_adapter_reopen(unit->port->adapter, 0);
-		zfcp_cmd_dbf_event_fsf("porthinv", fsf_req,
-				       &fsf_req->qtcb->header.fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -3326,9 +3351,6 @@ zfcp_fsf_close_unit_handler(struct zfcp_fsf_req *fsf_req)
 		debug_text_event(fsf_req->adapter->erp_dbf, 1,
 				 "fsf_s_lhand_nv");
 		zfcp_erp_port_reopen(unit->port, 0);
-		zfcp_cmd_dbf_event_fsf("lunhinv", fsf_req,
-				       &fsf_req->qtcb->header.fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -3436,21 +3458,14 @@ zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter,
 		goto failed_req_create;
 	}
 
-	/*
-	 * associate FSF request with SCSI request
-	 * (need this for look up on abort)
-	 */
-	fsf_req->data.send_fcp_command_task.fsf_req = fsf_req;
-	scsi_cmnd->host_scribble = (char *) &(fsf_req->data);
+	zfcp_unit_get(unit);
+	fsf_req->unit = unit;
 
-	/*
-	 * associate SCSI command with FSF request
-	 * (need this for look up on normal command completion)
-	 */
-	fsf_req->data.send_fcp_command_task.scsi_cmnd = scsi_cmnd;
-	fsf_req->data.send_fcp_command_task.start_jiffies = jiffies;
-	fsf_req->data.send_fcp_command_task.unit = unit;
-	ZFCP_LOG_DEBUG("unit=%p, fcp_lun=0x%016Lx\n", unit, unit->fcp_lun);
+	/* associate FSF request with SCSI request (for look up on abort) */
+	scsi_cmnd->host_scribble = (char *) fsf_req;
+
+	/* associate SCSI command with FSF request */
+	fsf_req->data = (unsigned long) scsi_cmnd;
 
 	/* set handles of unit and its parent port in QTCB */
 	fsf_req->qtcb->header.lun_handle = unit->handle;
@@ -3584,6 +3599,7 @@ zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter,
  send_failed:
  no_fit:
  failed_scsi_cmnd:
+	zfcp_unit_put(unit);
 	zfcp_fsf_req_free(fsf_req);
 	fsf_req = NULL;
 	scsi_cmnd->host_scribble = NULL;
@@ -3640,7 +3656,7 @@ zfcp_fsf_send_fcp_command_task_management(struct zfcp_adapter *adapter,
 	 * hold a pointer to the unit being target of this
 	 * task management request
 	 */
-	fsf_req->data.send_fcp_command_task_management.unit = unit;
+	fsf_req->data = (unsigned long) unit;
 
 	/* set FSF related fields in QTCB */
 	fsf_req->qtcb->header.lun_handle = unit->handle;
@@ -3706,9 +3722,9 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req)
 	header = &fsf_req->qtcb->header;
 
 	if (unlikely(fsf_req->status & ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT))
-		unit = fsf_req->data.send_fcp_command_task_management.unit;
+		unit = (struct zfcp_unit *) fsf_req->data;
 	else
-		unit = fsf_req->data.send_fcp_command_task.unit;
+		unit = fsf_req->unit;
 
 	if (unlikely(fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR)) {
 		/* go directly to calls of special handlers */
@@ -3765,10 +3781,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req)
 		debug_text_event(fsf_req->adapter->erp_dbf, 1,
 				 "fsf_s_hand_mis");
 		zfcp_erp_adapter_reopen(unit->port->adapter, 0);
-		zfcp_cmd_dbf_event_fsf("handmism",
-				       fsf_req,
-				       &header->fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -3789,10 +3801,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req)
 		debug_text_exception(fsf_req->adapter->erp_dbf, 0,
 				     "fsf_s_class_nsup");
 		zfcp_erp_adapter_shutdown(unit->port->adapter, 0);
-		zfcp_cmd_dbf_event_fsf("unsclass",
-				       fsf_req,
-				       &header->fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -3811,10 +3819,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req)
 		debug_text_event(fsf_req->adapter->erp_dbf, 1,
 				 "fsf_s_fcp_lun_nv");
 		zfcp_erp_port_reopen(unit->port, 0);
-		zfcp_cmd_dbf_event_fsf("fluninv",
-				       fsf_req,
-				       &header->fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -3853,10 +3857,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req)
 		debug_text_event(fsf_req->adapter->erp_dbf, 0,
 				 "fsf_s_dir_ind_nv");
 		zfcp_erp_adapter_shutdown(unit->port->adapter, 0);
-		zfcp_cmd_dbf_event_fsf("dirinv",
-				       fsf_req,
-				       &header->fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -3872,10 +3872,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req)
 		debug_text_event(fsf_req->adapter->erp_dbf, 0,
 				 "fsf_s_cmd_len_nv");
 		zfcp_erp_adapter_shutdown(unit->port->adapter, 0);
-		zfcp_cmd_dbf_event_fsf("cleninv",
-				       fsf_req,
-				       &header->fsf_status_qual,
-				       sizeof (union fsf_status_qual));
 		fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR;
 		break;
 
@@ -3947,6 +3943,8 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req)
 		    zfcp_fsf_send_fcp_command_task_management_handler(fsf_req);
 	} else {
 		retval = zfcp_fsf_send_fcp_command_task_handler(fsf_req);
+		fsf_req->unit = NULL;
+		zfcp_unit_put(unit);
 	}
 	return retval;
 }
@@ -3970,10 +3968,10 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req)
 	u32 sns_len;
 	char *fcp_rsp_info = zfcp_get_fcp_rsp_info_ptr(fcp_rsp_iu);
 	unsigned long flags;
-	struct zfcp_unit *unit = fsf_req->data.send_fcp_command_task.unit;
+	struct zfcp_unit *unit = fsf_req->unit;
 
 	read_lock_irqsave(&fsf_req->adapter->abort_lock, flags);
-	scpnt = fsf_req->data.send_fcp_command_task.scsi_cmnd;
+	scpnt = (struct scsi_cmnd *) fsf_req->data;
 	if (unlikely(!scpnt)) {
 		ZFCP_LOG_DEBUG
 		    ("Command with fsf_req %p is not associated to "
@@ -4043,7 +4041,6 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req)
 			ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG,
 				      (char *) &fsf_req->qtcb->
 				      bottom.io.fcp_cmnd, FSF_FCP_CMND_SIZE);
-			zfcp_cmd_dbf_event_fsf("clenmis", fsf_req, NULL, 0);
 			set_host_byte(&scpnt->result, DID_ERROR);
 			goto skip_fsfstatus;
 		case RSP_CODE_FIELD_INVALID:
@@ -4062,7 +4059,6 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req)
 				      (char *) &fsf_req->qtcb->
 				      bottom.io.fcp_cmnd, FSF_FCP_CMND_SIZE);
 			set_host_byte(&scpnt->result, DID_ERROR);
-			zfcp_cmd_dbf_event_fsf("codeinv", fsf_req, NULL, 0);
 			goto skip_fsfstatus;
 		case RSP_CODE_RO_MISMATCH:
 			/* hardware bug */
@@ -4079,7 +4075,6 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req)
 			ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG,
 				      (char *) &fsf_req->qtcb->
 				      bottom.io.fcp_cmnd, FSF_FCP_CMND_SIZE);
-			zfcp_cmd_dbf_event_fsf("codemism", fsf_req, NULL, 0);
 			set_host_byte(&scpnt->result, DID_ERROR);
 			goto skip_fsfstatus;
 		default:
@@ -4096,7 +4091,6 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req)
 			ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG,
 				      (char *) &fsf_req->qtcb->
 				      bottom.io.fcp_cmnd, FSF_FCP_CMND_SIZE);
-			zfcp_cmd_dbf_event_fsf("undeffcp", fsf_req, NULL, 0);
 			set_host_byte(&scpnt->result, DID_ERROR);
 			goto skip_fsfstatus;
 		}
@@ -4158,19 +4152,17 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req)
  skip_fsfstatus:
 	ZFCP_LOG_DEBUG("scpnt->result =0x%x\n", scpnt->result);
 
-	zfcp_cmd_dbf_event_scsi("response", scpnt);
+	if (scpnt->result != 0)
+		zfcp_scsi_dbf_event_result("erro", 3, fsf_req->adapter, scpnt);
+	else if (scpnt->retries > 0)
+		zfcp_scsi_dbf_event_result("retr", 4, fsf_req->adapter, scpnt);
+	else
+		zfcp_scsi_dbf_event_result("norm", 6, fsf_req->adapter, scpnt);
 
 	/* cleanup pointer (need this especially for abort) */
 	scpnt->host_scribble = NULL;
 
-	/*
-	 * NOTE:
-	 * according to the outcome of a discussion on linux-scsi we
-	 * don't need to grab the io_request_lock here since we use
-	 * the new eh
-	 */
 	/* always call back */
-
 	(scpnt->scsi_done) (scpnt);
 
 	/*
@@ -4198,8 +4190,7 @@ zfcp_fsf_send_fcp_command_task_management_handler(struct zfcp_fsf_req *fsf_req)
 	struct fcp_rsp_iu *fcp_rsp_iu = (struct fcp_rsp_iu *)
 	    &(fsf_req->qtcb->bottom.io.fcp_rsp);
 	char *fcp_rsp_info = zfcp_get_fcp_rsp_info_ptr(fcp_rsp_iu);
-	struct zfcp_unit *unit =
-	    fsf_req->data.send_fcp_command_task_management.unit;
+	struct zfcp_unit *unit = (struct zfcp_unit *) fsf_req->data;
 
 	del_timer(&fsf_req->adapter->scsi_er_timer);
 	if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) {
@@ -4276,7 +4267,7 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter,
 	int direction;
 	int retval = 0;
 
-	if (!(adapter->supported_features & FSF_FEATURE_CFDC)) {
+	if (!(adapter->adapter_features & FSF_FEATURE_CFDC)) {
 		ZFCP_LOG_INFO("cfdc not supported (adapter %s)\n",
 			      zfcp_get_busid_by_adapter(adapter));
 		retval = -EOPNOTSUPP;
@@ -4549,52 +4540,6 @@ skip_fsfstatus:
 	return retval;
 }
 
-
-/*
- * function:    zfcp_fsf_req_wait_and_cleanup
- *
- * purpose:
- *
- * FIXME(design): signal seems to be <0 !!!
- * returns:	0	- request completed (*status is valid), cleanup succ.
- *		<0	- request completed (*status is valid), cleanup failed
- *		>0	- signal which interrupted waiting (*status invalid),
- *			  request not completed, no cleanup
- *
- *		*status is a copy of status of completed fsf_req
- */
-int
-zfcp_fsf_req_wait_and_cleanup(struct zfcp_fsf_req *fsf_req,
-			      int interruptible, u32 * status)
-{
-	int retval = 0;
-	int signal = 0;
-
-	if (interruptible) {
-		__wait_event_interruptible(fsf_req->completion_wq,
-					   fsf_req->status &
-					   ZFCP_STATUS_FSFREQ_COMPLETED,
-					   signal);
-		if (signal) {
-			ZFCP_LOG_DEBUG("Caught signal %i while waiting for the "
-				       "completion of the request at %p\n",
-				       signal, fsf_req);
-			retval = signal;
-			goto out;
-		}
-	} else {
-		__wait_event(fsf_req->completion_wq,
-			     fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
-	}
-
-	*status = fsf_req->status;
-
-	/* cleanup request */
-	zfcp_fsf_req_free(fsf_req);
- out:
-	return retval;
-}
-
 static inline int
 zfcp_fsf_req_sbal_check(unsigned long *flags,
 			struct zfcp_qdio_queue *queue, int needed)
@@ -4610,15 +4555,16 @@ zfcp_fsf_req_sbal_check(unsigned long *flags,
  * set qtcb pointer in fsf_req and initialize QTCB
  */
 static inline void
-zfcp_fsf_req_qtcb_init(struct zfcp_fsf_req *fsf_req, u32 fsf_cmd)
+zfcp_fsf_req_qtcb_init(struct zfcp_fsf_req *fsf_req)
 {
 	if (likely(fsf_req->qtcb != NULL)) {
+		fsf_req->qtcb->prefix.req_seq_no = fsf_req->adapter->fsf_req_seq_no;
 		fsf_req->qtcb->prefix.req_id = (unsigned long)fsf_req;
 		fsf_req->qtcb->prefix.ulp_info = ZFCP_ULP_INFO_VERSION;
-		fsf_req->qtcb->prefix.qtcb_type = fsf_qtcb_type[fsf_cmd];
+		fsf_req->qtcb->prefix.qtcb_type = fsf_qtcb_type[fsf_req->fsf_command];
 		fsf_req->qtcb->prefix.qtcb_version = ZFCP_QTCB_VERSION;
 		fsf_req->qtcb->header.req_handle = (unsigned long)fsf_req;
-		fsf_req->qtcb->header.fsf_command = fsf_cmd;
+		fsf_req->qtcb->header.fsf_command = fsf_req->fsf_command;
 	}
 }
 
@@ -4686,7 +4632,10 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
 		goto failed_fsf_req;
 	}
 
-        zfcp_fsf_req_qtcb_init(fsf_req, fsf_cmd);
+	fsf_req->adapter = adapter;
+	fsf_req->fsf_command = fsf_cmd;
+
+        zfcp_fsf_req_qtcb_init(fsf_req);
 
 	/* initialize waitqueue which may be used to wait on 
 	   this request completion */
@@ -4708,8 +4657,10 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags,
 		goto failed_sbals;
 	}
 
-	fsf_req->adapter = adapter;	/* pointer to "parent" adapter */
-	fsf_req->fsf_command = fsf_cmd;
+	if (fsf_req->qtcb) {
+		fsf_req->seq_no = adapter->fsf_req_seq_no;
+		fsf_req->qtcb->prefix.req_seq_no = adapter->fsf_req_seq_no;
+	}
 	fsf_req->sbal_number = 1;
 	fsf_req->sbal_first = req_queue->free_index;
 	fsf_req->sbal_curr = req_queue->free_index;
@@ -4760,9 +4711,9 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 	struct zfcp_adapter *adapter;
 	struct zfcp_qdio_queue *req_queue;
 	volatile struct qdio_buffer_element *sbale;
+	int inc_seq_no;
 	int new_distance_from_int;
 	unsigned long flags;
-	int inc_seq_no = 1;
 	int retval = 0;
 
 	adapter = fsf_req->adapter;
@@ -4776,23 +4727,13 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 	ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_TRACE, (char *) sbale[1].addr,
 		      sbale[1].length);
 
-	/* set sequence counter in QTCB */
-	if (likely(fsf_req->qtcb)) {
-		fsf_req->qtcb->prefix.req_seq_no = adapter->fsf_req_seq_no;
-		fsf_req->seq_no = adapter->fsf_req_seq_no;
-		ZFCP_LOG_TRACE("FSF request %p of adapter %s gets "
-			       "FSF sequence counter value of %i\n",
-			       fsf_req,
-			       zfcp_get_busid_by_adapter(adapter),
-			       fsf_req->qtcb->prefix.req_seq_no);
-	} else
-		inc_seq_no = 0;
-
 	/* put allocated FSF request at list tail */
 	spin_lock_irqsave(&adapter->fsf_req_list_lock, flags);
 	list_add_tail(&fsf_req->list, &adapter->fsf_req_list_head);
 	spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags);
 
+	inc_seq_no = (fsf_req->qtcb != NULL);
+
 	/* figure out expiration time of timeout and start timeout */
 	if (unlikely(timer)) {
 		timer->expires += jiffies;
@@ -4822,6 +4763,8 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 	req_queue->free_index %= QDIO_MAX_BUFFERS_PER_Q;  /* wrap if needed */
 	new_distance_from_int = zfcp_qdio_determine_pci(req_queue, fsf_req);
 
+	fsf_req->issued = get_clock();
+
 	retval = do_QDIO(adapter->ccw_device,
 			 QDIO_FLAG_SYNC_OUTPUT,
 			 0, fsf_req->sbal_first, fsf_req->sbal_number, NULL);
@@ -4860,15 +4803,11 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer)
 		 * routines  resulting in missing sequence counter values
 		 * otherwise,
 		 */
+
 		/* Don't increase for unsolicited status */
-		if (likely(inc_seq_no)) {
+		if (inc_seq_no)
 			adapter->fsf_req_seq_no++;
-			ZFCP_LOG_TRACE
-			    ("FSF sequence counter value of adapter %s "
-			     "increased to %i\n",
-			     zfcp_get_busid_by_adapter(adapter),
-			     adapter->fsf_req_seq_no);
-		}
+
 		/* count FSF requests pending */
 		atomic_inc(&adapter->fsf_reqs_active);
 	}
diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h
index 07140dfda2a..48719f05595 100644
--- a/drivers/s390/scsi/zfcp_fsf.h
+++ b/drivers/s390/scsi/zfcp_fsf.h
@@ -116,6 +116,7 @@
 #define FSF_INVALID_COMMAND_OPTION              0x000000E5
 /* #define FSF_ERROR                             0x000000FF  */
 
+#define FSF_PROT_STATUS_QUAL_SIZE		16
 #define FSF_STATUS_QUALIFIER_SIZE		16
 
 /* FSF status qualifier, recommendations */
@@ -139,9 +140,18 @@
 #define FSF_SQ_CFDC_SUBTABLE_LUN		0x0004
 
 /* FSF status qualifier (most significant 4 bytes), local link down */
-#define FSF_PSQ_LINK_NOLIGHT			0x00000004
-#define FSF_PSQ_LINK_WRAPPLUG			0x00000008
-#define FSF_PSQ_LINK_NOFCP			0x00000010
+#define FSF_PSQ_LINK_NO_LIGHT			0x00000004
+#define FSF_PSQ_LINK_WRAP_PLUG			0x00000008
+#define FSF_PSQ_LINK_NO_FCP			0x00000010
+#define FSF_PSQ_LINK_FIRMWARE_UPDATE		0x00000020
+#define FSF_PSQ_LINK_INVALID_WWPN		0x00000100
+#define FSF_PSQ_LINK_NO_NPIV_SUPPORT		0x00000200
+#define FSF_PSQ_LINK_NO_FCP_RESOURCES		0x00000400
+#define FSF_PSQ_LINK_NO_FABRIC_RESOURCES	0x00000800
+#define FSF_PSQ_LINK_FABRIC_LOGIN_UNABLE	0x00001000
+#define FSF_PSQ_LINK_WWPN_ASSIGNMENT_CORRUPTED	0x00002000
+#define FSF_PSQ_LINK_MODE_TABLE_CURRUPTED	0x00004000
+#define FSF_PSQ_LINK_NO_WWPN_ASSIGNMENT		0x00008000
 
 /* payload size in status read buffer */
 #define FSF_STATUS_READ_PAYLOAD_SIZE		4032
@@ -154,15 +164,21 @@
 #define FSF_STATUS_READ_INCOMING_ELS		0x00000002
 #define FSF_STATUS_READ_SENSE_DATA_AVAIL        0x00000003
 #define FSF_STATUS_READ_BIT_ERROR_THRESHOLD	0x00000004
-#define FSF_STATUS_READ_LINK_DOWN		0x00000005 /* FIXME: really? */
+#define FSF_STATUS_READ_LINK_DOWN		0x00000005
 #define FSF_STATUS_READ_LINK_UP          	0x00000006
 #define FSF_STATUS_READ_CFDC_UPDATED		0x0000000A
 #define FSF_STATUS_READ_CFDC_HARDENED		0x0000000B
+#define FSF_STATUS_READ_FEATURE_UPDATE_ALERT	0x0000000C
 
 /* status subtypes in status read buffer */
 #define FSF_STATUS_READ_SUB_CLOSE_PHYS_PORT	0x00000001
 #define FSF_STATUS_READ_SUB_ERROR_PORT		0x00000002
 
+/* status subtypes for link down */
+#define FSF_STATUS_READ_SUB_NO_PHYSICAL_LINK	0x00000000
+#define FSF_STATUS_READ_SUB_FDISC_FAILED	0x00000001
+#define FSF_STATUS_READ_SUB_FIRMWARE_UPDATE	0x00000002
+
 /* status subtypes for CFDC */
 #define FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE	0x00000002
 #define FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE2 0x0000000F
@@ -193,11 +209,15 @@
 #define FSF_QTCB_LOG_SIZE			1024
 
 /* channel features */
-#define FSF_FEATURE_QTCB_SUPPRESSION            0x00000001
 #define FSF_FEATURE_CFDC			0x00000002
 #define FSF_FEATURE_LUN_SHARING			0x00000004
 #define FSF_FEATURE_HBAAPI_MANAGEMENT           0x00000010
 #define FSF_FEATURE_ELS_CT_CHAINED_SBALS        0x00000020
+#define FSF_FEATURE_UPDATE_ALERT		0x00000100
+
+/* host connection features */
+#define FSF_FEATURE_NPIV_MODE			0x00000001
+#define FSF_FEATURE_VM_ASSIGNED_WWPN		0x00000002
 
 /* option */
 #define FSF_OPEN_LUN_SUPPRESS_BOXING		0x00000001
@@ -305,15 +325,23 @@ struct fsf_qual_sequence_error {
 	u32 res1[3];
 } __attribute__ ((packed));
 
-struct fsf_qual_locallink_error {
-	u32 code;
-	u32 res1[3];
+struct fsf_link_down_info {
+	u32 error_code;
+	u32 res1;
+	u8 res2[2];
+	u8 primary_status;
+	u8 ioerr_code;
+	u8 action_code;
+	u8 reason_code;
+	u8 explanation_code;
+	u8 vendor_specific_code;
 } __attribute__ ((packed));
 
 union fsf_prot_status_qual {
+	u64 doubleword[FSF_PROT_STATUS_QUAL_SIZE / sizeof(u64)];
 	struct fsf_qual_version_error   version_error;
 	struct fsf_qual_sequence_error  sequence_error;
-	struct fsf_qual_locallink_error locallink_error;
+	struct fsf_link_down_info link_down_info;
 } __attribute__ ((packed));
 
 struct fsf_qtcb_prefix {
@@ -331,7 +359,9 @@ union fsf_status_qual {
 	u8  byte[FSF_STATUS_QUALIFIER_SIZE];
 	u16 halfword[FSF_STATUS_QUALIFIER_SIZE / sizeof (u16)];
 	u32 word[FSF_STATUS_QUALIFIER_SIZE / sizeof (u32)];
+	u64 doubleword[FSF_STATUS_QUALIFIER_SIZE / sizeof(u64)];
 	struct fsf_queue_designator fsf_queue_designator;
+	struct fsf_link_down_info link_down_info;
 } __attribute__ ((packed));
 
 struct fsf_qtcb_header {
@@ -406,8 +436,8 @@ struct fsf_qtcb_bottom_config {
 	u32 low_qtcb_version;
 	u32 max_qtcb_size;
 	u32 max_data_transfer_size;
-	u32 supported_features;
-	u8  res1[4];
+	u32 adapter_features;
+	u32 connection_features;
 	u32 fc_topology;
 	u32 fc_link_speed;
 	u32 adapter_type;
@@ -425,7 +455,7 @@ struct fsf_qtcb_bottom_config {
 } __attribute__ ((packed));
 
 struct fsf_qtcb_bottom_port {
-	u8 res1[8];
+	u64 wwpn;
 	u32 fc_port_id;
 	u32 port_type;
 	u32 port_state;
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 24e16ec331d..d719f66a29a 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -54,8 +54,7 @@ static inline int zfcp_qdio_sbals_from_buffer
 static qdio_handler_t zfcp_qdio_request_handler;
 static qdio_handler_t zfcp_qdio_response_handler;
 static int zfcp_qdio_handler_error_check(struct zfcp_adapter *,
-					 unsigned int,
-					 unsigned int, unsigned int);
+	unsigned int, unsigned int, unsigned int, int, int);
 
 #define ZFCP_LOG_AREA                   ZFCP_LOG_AREA_QDIO
 
@@ -214,22 +213,12 @@ zfcp_qdio_allocate(struct zfcp_adapter *adapter)
  *
  */
 static inline int
-zfcp_qdio_handler_error_check(struct zfcp_adapter *adapter,
-			      unsigned int status,
-			      unsigned int qdio_error, unsigned int siga_error)
+zfcp_qdio_handler_error_check(struct zfcp_adapter *adapter, unsigned int status,
+			      unsigned int qdio_error, unsigned int siga_error,
+			      int first_element, int elements_processed)
 {
 	int retval = 0;
 
-	if (ZFCP_LOG_CHECK(ZFCP_LOG_LEVEL_TRACE)) {
-		if (status & QDIO_STATUS_INBOUND_INT) {
-			ZFCP_LOG_TRACE("status is"
-				       " QDIO_STATUS_INBOUND_INT \n");
-		}
-		if (status & QDIO_STATUS_OUTBOUND_INT) {
-			ZFCP_LOG_TRACE("status is"
-				       " QDIO_STATUS_OUTBOUND_INT \n");
-		}
-	}
 	if (unlikely(status & QDIO_STATUS_LOOK_FOR_ERROR)) {
 		retval = -EIO;
 
@@ -237,9 +226,10 @@ zfcp_qdio_handler_error_check(struct zfcp_adapter *adapter,
 			      "qdio_error=0x%x, siga_error=0x%x)\n",
 			      status, qdio_error, siga_error);
 
-		/* Restarting IO on the failed adapter from scratch */
-		debug_text_event(adapter->erp_dbf, 1, "qdio_err");
+		zfcp_hba_dbf_event_qdio(adapter, status, qdio_error, siga_error,
+				first_element, elements_processed);
                /*
+               	* Restarting IO on the failed adapter from scratch.
                 * Since we have been using this adapter, it is save to assume
                 * that it is not failed but recoverable. The card seems to
                 * report link-up events by self-initiated queue shutdown.
@@ -282,7 +272,8 @@ zfcp_qdio_request_handler(struct ccw_device *ccw_device,
 		       first_element, elements_processed);
 
 	if (unlikely(zfcp_qdio_handler_error_check(adapter, status, qdio_error,
-					           siga_error)))
+						   siga_error, first_element,
+						   elements_processed)))
 		goto out;
 	/*
 	 * we stored address of struct zfcp_adapter  data structure
@@ -334,7 +325,8 @@ zfcp_qdio_response_handler(struct ccw_device *ccw_device,
 	queue = &adapter->response_queue;
 
 	if (unlikely(zfcp_qdio_handler_error_check(adapter, status, qdio_error,
-					           siga_error)))
+						   siga_error, first_element,
+						   elements_processed)))
 		goto out;
 
 	/*
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 31a76065cf2..3dcd1bfba3b 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -44,7 +44,8 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *);
 static int zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *);
 static int zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *);
 static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *);
-static int zfcp_task_management_function(struct zfcp_unit *, u8);
+static int zfcp_task_management_function(struct zfcp_unit *, u8,
+					 struct scsi_cmnd *);
 
 static struct zfcp_unit *zfcp_unit_lookup(struct zfcp_adapter *, int, scsi_id_t,
 					  scsi_lun_t);
@@ -242,7 +243,10 @@ static void
 zfcp_scsi_command_fail(struct scsi_cmnd *scpnt, int result)
 {
 	set_host_byte(&scpnt->result, result);
-	zfcp_cmd_dbf_event_scsi("failing", scpnt);
+	if ((scpnt->device != NULL) && (scpnt->device->host != NULL))
+		zfcp_scsi_dbf_event_result("fail", 4,
+			(struct zfcp_adapter*) scpnt->device->host->hostdata[0],
+			scpnt);
 	/* return directly */
 	scpnt->scsi_done(scpnt);
 }
@@ -414,67 +418,38 @@ zfcp_port_lookup(struct zfcp_adapter *adapter, int channel, scsi_id_t id)
 	return (struct zfcp_port *) NULL;
 }
 
-/*
- * function:	zfcp_scsi_eh_abort_handler
- *
- * purpose:	tries to abort the specified (timed out) SCSI command
- *
- * note: 	We do not need to care for a SCSI command which completes
- *		normally but late during this abort routine runs.
- *		We are allowed to return late commands to the SCSI stack.
- *		It tracks the state of commands and will handle late commands.
- *		(Usually, the normal completion of late commands is ignored with
- *		respect to the running abort operation. Grep for 'done_late'
- *		in the SCSI stacks sources.)
+/**
+ * zfcp_scsi_eh_abort_handler - abort the specified SCSI command
+ * @scpnt: pointer to scsi_cmnd to be aborted 
+ * Return: SUCCESS - command has been aborted and cleaned up in internal
+ *          bookkeeping, SCSI stack won't be called for aborted command
+ *         FAILED - otherwise
  *
- * returns:	SUCCESS	- command has been aborted and cleaned up in internal
- *			  bookkeeping,
- *			  SCSI stack won't be called for aborted command
- *		FAILED	- otherwise
+ * We do not need to care for a SCSI command which completes normally
+ * but late during this abort routine runs.  We are allowed to return
+ * late commands to the SCSI stack.  It tracks the state of commands and
+ * will handle late commands.  (Usually, the normal completion of late
+ * commands is ignored with respect to the running abort operation.)
  */
 int
-__zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
+zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
 {
+ 	struct Scsi_Host *scsi_host;
+ 	struct zfcp_adapter *adapter;
+	struct zfcp_unit *unit;
 	int retval = SUCCESS;
-	struct zfcp_fsf_req *new_fsf_req, *old_fsf_req;
-	struct zfcp_adapter *adapter = (struct zfcp_adapter *) scpnt->device->host->hostdata[0];
-	struct zfcp_unit *unit = (struct zfcp_unit *) scpnt->device->hostdata;
-	struct zfcp_port *port = unit->port;
-	struct Scsi_Host *scsi_host = scpnt->device->host;
-	union zfcp_req_data *req_data = NULL;
+	struct zfcp_fsf_req *new_fsf_req = NULL;
+	struct zfcp_fsf_req *old_fsf_req;
 	unsigned long flags;
-	u32 status = 0;
-
-	/* the components of a abort_dbf record (fixed size record) */
-	u64 dbf_scsi_cmnd = (unsigned long) scpnt;
-	char dbf_opcode[ZFCP_ABORT_DBF_LENGTH];
-	wwn_t dbf_wwn = port->wwpn;
-	fcp_lun_t dbf_fcp_lun = unit->fcp_lun;
-	u64 dbf_retries = scpnt->retries;
-	u64 dbf_allowed = scpnt->allowed;
-	u64 dbf_timeout = 0;
-	u64 dbf_fsf_req = 0;
-	u64 dbf_fsf_status = 0;
-	u64 dbf_fsf_qual[2] = { 0, 0 };
-	char dbf_result[ZFCP_ABORT_DBF_LENGTH] = "##undef";
-
-	memset(dbf_opcode, 0, ZFCP_ABORT_DBF_LENGTH);
-	memcpy(dbf_opcode,
-	       scpnt->cmnd,
-	       min(scpnt->cmd_len, (unsigned char) ZFCP_ABORT_DBF_LENGTH));
+
+	scsi_host = scpnt->device->host;
+	adapter = (struct zfcp_adapter *) scsi_host->hostdata[0];
+	unit = (struct zfcp_unit *) scpnt->device->hostdata;
 
 	ZFCP_LOG_INFO("aborting scsi_cmnd=%p on adapter %s\n",
 		      scpnt, zfcp_get_busid_by_adapter(adapter));
 
-	spin_unlock_irq(scsi_host->host_lock);
-
-	/*
-	 * Race condition between normal (late) completion and abort has
-	 * to be avoided.
-	 * The entirity of all accesses to scsi_req have to be atomic.
-	 * scsi_req is usually part of the fsf_req and thus we block the
-	 * release of fsf_req as long as we need to access scsi_req.
-	 */
+	/* avoid race condition between late normal completion and abort */
 	write_lock_irqsave(&adapter->abort_lock, flags);
 
 	/*
@@ -484,144 +459,47 @@ __zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
 	 * this routine returns. (scpnt is parameter passed to this routine
 	 * and must not disappear during abort even on late completion.)
 	 */
-	req_data = (union zfcp_req_data *) scpnt->host_scribble;
-	/* DEBUG */
-	ZFCP_LOG_DEBUG("req_data=%p\n", req_data);
-	if (!req_data) {
-		ZFCP_LOG_DEBUG("late command completion overtook abort\n");
-		/*
-		 * That's it.
-		 * Do not initiate abort but return SUCCESS.
-		 */
-		write_unlock_irqrestore(&adapter->abort_lock, flags);
-		retval = SUCCESS;
-		strncpy(dbf_result, "##late1", ZFCP_ABORT_DBF_LENGTH);
-		goto out;
-	}
-
-	/* Figure out which fsf_req needs to be aborted. */
-	old_fsf_req = req_data->send_fcp_command_task.fsf_req;
-
-	dbf_fsf_req = (unsigned long) old_fsf_req;
-	dbf_timeout =
-	    (jiffies - req_data->send_fcp_command_task.start_jiffies) / HZ;
-
-	ZFCP_LOG_DEBUG("old_fsf_req=%p\n", old_fsf_req);
+	old_fsf_req = (struct zfcp_fsf_req *) scpnt->host_scribble;
 	if (!old_fsf_req) {
 		write_unlock_irqrestore(&adapter->abort_lock, flags);
-		ZFCP_LOG_NORMAL("bug: no old fsf request found\n");
-		ZFCP_LOG_NORMAL("req_data:\n");
-		ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL,
-			      (char *) req_data, sizeof (union zfcp_req_data));
-		ZFCP_LOG_NORMAL("scsi_cmnd:\n");
-		ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL,
-			      (char *) scpnt, sizeof (struct scsi_cmnd));
-		retval = FAILED;
-		strncpy(dbf_result, "##bug:r", ZFCP_ABORT_DBF_LENGTH);
+		zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, new_fsf_req);
+		retval = SUCCESS;
 		goto out;
 	}
-	old_fsf_req->data.send_fcp_command_task.scsi_cmnd = NULL;
-	/* mark old request as being aborted */
+	old_fsf_req->data = 0;
 	old_fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING;
-	/*
-	 * We have to collect all information (e.g. unit) needed by 
-	 * zfcp_fsf_abort_fcp_command before calling that routine
-	 * since that routine is not allowed to access
-	 * fsf_req which it is going to abort.
-	 * This is because of we need to release fsf_req_list_lock
-	 * before calling zfcp_fsf_abort_fcp_command.
-	 * Since this lock will not be held, fsf_req may complete
-	 * late and may be released meanwhile.
-	 */
-	ZFCP_LOG_DEBUG("unit 0x%016Lx (%p)\n", unit->fcp_lun, unit);
 
-	/*
-	 * We block (call schedule)
-	 * That's why we must release the lock and enable the
-	 * interrupts before.
-	 * On the other hand we do not need the lock anymore since
-	 * all critical accesses to scsi_req are done.
-	 */
+	/* don't access old_fsf_req after releasing the abort_lock */
 	write_unlock_irqrestore(&adapter->abort_lock, flags);
 	/* call FSF routine which does the abort */
 	new_fsf_req = zfcp_fsf_abort_fcp_command((unsigned long) old_fsf_req,
 						 adapter, unit, 0);
-	ZFCP_LOG_DEBUG("new_fsf_req=%p\n", new_fsf_req);
 	if (!new_fsf_req) {
+		ZFCP_LOG_INFO("error: initiation of Abort FCP Cmnd failed\n");
 		retval = FAILED;
-		ZFCP_LOG_NORMAL("error: initiation of Abort FCP Cmnd "
-				"failed\n");
-		strncpy(dbf_result, "##nores", ZFCP_ABORT_DBF_LENGTH);
 		goto out;
 	}
 
 	/* wait for completion of abort */
-	ZFCP_LOG_DEBUG("waiting for cleanup...\n");
-#if 1
-	/*
-	 * FIXME:
-	 * copying zfcp_fsf_req_wait_and_cleanup code is not really nice
-	 */
 	__wait_event(new_fsf_req->completion_wq,
 		     new_fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
-	status = new_fsf_req->status;
-	dbf_fsf_status = new_fsf_req->qtcb->header.fsf_status;
-	/*
-	 * Ralphs special debug load provides timestamps in the FSF
-	 * status qualifier. This might be specified later if being
-	 * useful for debugging aborts.
-	 */
-	dbf_fsf_qual[0] =
-	    *(u64 *) & new_fsf_req->qtcb->header.fsf_status_qual.word[0];
-	dbf_fsf_qual[1] =
-	    *(u64 *) & new_fsf_req->qtcb->header.fsf_status_qual.word[2];
-	zfcp_fsf_req_free(new_fsf_req);
-#else
-	retval = zfcp_fsf_req_wait_and_cleanup(new_fsf_req,
-					       ZFCP_UNINTERRUPTIBLE, &status);
-#endif
-	ZFCP_LOG_DEBUG("Waiting for cleanup complete, status=0x%x\n", status);
+
 	/* status should be valid since signals were not permitted */
-	if (status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
+	if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) {
+		zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, new_fsf_req);
 		retval = SUCCESS;
-		strncpy(dbf_result, "##succ", ZFCP_ABORT_DBF_LENGTH);
-	} else if (status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
+	} else if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) {
+		zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, new_fsf_req);
 		retval = SUCCESS;
-		strncpy(dbf_result, "##late2", ZFCP_ABORT_DBF_LENGTH);
 	} else {
+		zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, new_fsf_req);
 		retval = FAILED;
-		strncpy(dbf_result, "##fail", ZFCP_ABORT_DBF_LENGTH);
 	}
-
+	zfcp_fsf_req_free(new_fsf_req);
  out:
-	debug_event(adapter->abort_dbf, 1, &dbf_scsi_cmnd, sizeof (u64));
-	debug_event(adapter->abort_dbf, 1, &dbf_opcode, ZFCP_ABORT_DBF_LENGTH);
-	debug_event(adapter->abort_dbf, 1, &dbf_wwn, sizeof (wwn_t));
-	debug_event(adapter->abort_dbf, 1, &dbf_fcp_lun, sizeof (fcp_lun_t));
-	debug_event(adapter->abort_dbf, 1, &dbf_retries, sizeof (u64));
-	debug_event(adapter->abort_dbf, 1, &dbf_allowed, sizeof (u64));
-	debug_event(adapter->abort_dbf, 1, &dbf_timeout, sizeof (u64));
-	debug_event(adapter->abort_dbf, 1, &dbf_fsf_req, sizeof (u64));
-	debug_event(adapter->abort_dbf, 1, &dbf_fsf_status, sizeof (u64));
-	debug_event(adapter->abort_dbf, 1, &dbf_fsf_qual[0], sizeof (u64));
-	debug_event(adapter->abort_dbf, 1, &dbf_fsf_qual[1], sizeof (u64));
-	debug_text_event(adapter->abort_dbf, 1, dbf_result);
-
-	spin_lock_irq(scsi_host->host_lock);
 	return retval;
 }
 
-int
-zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt)
-{
-	int rc;
-	struct Scsi_Host *scsi_host = scpnt->device->host;
-	spin_lock_irq(scsi_host->host_lock);
-	rc = __zfcp_scsi_eh_abort_handler(scpnt);
-	spin_unlock_irq(scsi_host->host_lock);
-	return rc;
-}
-
 /*
  * function:	zfcp_scsi_eh_device_reset_handler
  *
@@ -651,8 +529,9 @@ zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt)
 	 */
 	if (!atomic_test_mask(ZFCP_STATUS_UNIT_NOTSUPPUNITRESET,
 			      &unit->status)) {
-		retval =
-		    zfcp_task_management_function(unit, FCP_LOGICAL_UNIT_RESET);
+		retval = zfcp_task_management_function(unit,
+						       FCP_LOGICAL_UNIT_RESET,
+						       scpnt);
 		if (retval) {
 			ZFCP_LOG_DEBUG("unit reset failed (unit=%p)\n", unit);
 			if (retval == -ENOTSUPP)
@@ -668,7 +547,7 @@ zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt)
 			goto out;
 		}
 	}
-	retval = zfcp_task_management_function(unit, FCP_TARGET_RESET);
+	retval = zfcp_task_management_function(unit, FCP_TARGET_RESET, scpnt);
 	if (retval) {
 		ZFCP_LOG_DEBUG("target reset failed (unit=%p)\n", unit);
 		retval = FAILED;
@@ -681,12 +560,12 @@ zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt)
 }
 
 static int
-zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags)
+zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags,
+			      struct scsi_cmnd *scpnt)
 {
 	struct zfcp_adapter *adapter = unit->port->adapter;
-	int retval;
-	int status;
 	struct zfcp_fsf_req *fsf_req;
+	int retval = 0;
 
 	/* issue task management function */
 	fsf_req = zfcp_fsf_send_fcp_command_task_management
@@ -696,70 +575,63 @@ zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags)
 			      "failed for unit 0x%016Lx on port 0x%016Lx on  "
 			      "adapter %s\n", unit->fcp_lun, unit->port->wwpn,
 			      zfcp_get_busid_by_adapter(adapter));
+		zfcp_scsi_dbf_event_devreset("nres", tm_flags, unit, scpnt);
 		retval = -ENOMEM;
 		goto out;
 	}
 
-	retval = zfcp_fsf_req_wait_and_cleanup(fsf_req,
-					       ZFCP_UNINTERRUPTIBLE, &status);
+	__wait_event(fsf_req->completion_wq,
+		     fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED);
+
 	/*
 	 * check completion status of task management function
-	 * (status should always be valid since no signals permitted)
 	 */
-	if (status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED)
+	if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) {
+		zfcp_scsi_dbf_event_devreset("fail", tm_flags, unit, scpnt);
 		retval = -EIO;
-	else if (status & ZFCP_STATUS_FSFREQ_TMFUNCNOTSUPP)
+	} else if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCNOTSUPP) {
+		zfcp_scsi_dbf_event_devreset("nsup", tm_flags, unit, scpnt);
 		retval = -ENOTSUPP;
-	else
-		retval = 0;
+	} else
+		zfcp_scsi_dbf_event_devreset("okay", tm_flags, unit, scpnt);
+
+	zfcp_fsf_req_free(fsf_req);
  out:
 	return retval;
 }
 
-/*
- * function:	zfcp_scsi_eh_bus_reset_handler
- *
- * purpose:
- *
- * returns:
+/**
+ * zfcp_scsi_eh_bus_reset_handler - reset bus (reopen adapter)
  */
 int
 zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *scpnt)
 {
-	int retval = 0;
-	struct zfcp_unit *unit;
+	struct zfcp_unit *unit = (struct zfcp_unit*) scpnt->device->hostdata;
+	struct zfcp_adapter *adapter = unit->port->adapter;
 
-	unit = (struct zfcp_unit *) scpnt->device->hostdata;
 	ZFCP_LOG_NORMAL("bus reset because of problems with "
 			"unit 0x%016Lx\n", unit->fcp_lun);
-	zfcp_erp_adapter_reopen(unit->port->adapter, 0);
-	zfcp_erp_wait(unit->port->adapter);
-	retval = SUCCESS;
+	zfcp_erp_adapter_reopen(adapter, 0);
+	zfcp_erp_wait(adapter);
 
-	return retval;
+	return SUCCESS;
 }
 
-/*
- * function:	zfcp_scsi_eh_host_reset_handler
- *
- * purpose:
- *
- * returns:
+/**
+ * zfcp_scsi_eh_host_reset_handler - reset host (reopen adapter)
  */
 int
 zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt)
 {
-	int retval = 0;
-	struct zfcp_unit *unit;
+	struct zfcp_unit *unit = (struct zfcp_unit*) scpnt->device->hostdata;
+	struct zfcp_adapter *adapter = unit->port->adapter;
 
-	unit = (struct zfcp_unit *) scpnt->device->hostdata;
 	ZFCP_LOG_NORMAL("host reset because of problems with "
 			"unit 0x%016Lx\n", unit->fcp_lun);
-	zfcp_erp_adapter_reopen(unit->port->adapter, 0);
-	zfcp_erp_wait(unit->port->adapter);
-	retval = SUCCESS;
+	zfcp_erp_adapter_reopen(adapter, 0);
+	zfcp_erp_wait(adapter);
 
-	return retval;
+	return SUCCESS;
 }
 
 /*
@@ -826,10 +698,16 @@ void
 zfcp_adapter_scsi_unregister(struct zfcp_adapter *adapter)
 {
 	struct Scsi_Host *shost;
+	struct zfcp_port *port;
 
 	shost = adapter->scsi_host;
 	if (!shost)
 		return;
+	read_lock_irq(&zfcp_data.config_lock);
+	list_for_each_entry(port, &adapter->port_list_head, list)
+		if (port->rport)
+			port->rport = NULL;
+	read_unlock_irq(&zfcp_data.config_lock);
 	fc_remove_host(shost);
 	scsi_remove_host(shost);
 	scsi_host_put(shost);
@@ -904,18 +782,6 @@ zfcp_get_node_name(struct scsi_target *starget)
 	read_unlock_irqrestore(&zfcp_data.config_lock, flags);
 }
 
-void
-zfcp_set_fc_host_attrs(struct zfcp_adapter *adapter)
-{
-	struct Scsi_Host *shost = adapter->scsi_host;
-
-	fc_host_node_name(shost) = adapter->wwnn;
-	fc_host_port_name(shost) = adapter->wwpn;
-	strncpy(fc_host_serial_number(shost), adapter->serial_number,
-                min(FC_SERIAL_NUMBER_SIZE, 32));
-	fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3;
-}
-
 struct fc_function_template zfcp_transport_functions = {
 	.get_starget_port_id = zfcp_get_port_id,
 	.get_starget_port_name = zfcp_get_port_name,
@@ -927,7 +793,10 @@ struct fc_function_template zfcp_transport_functions = {
 	.show_host_node_name = 1,
 	.show_host_port_name = 1,
 	.show_host_supported_classes = 1,
+	.show_host_maxframe_size = 1,
 	.show_host_serial_number = 1,
+	.show_host_speed = 1,
+	.show_host_port_id = 1,
 };
 
 /**
diff --git a/drivers/s390/scsi/zfcp_sysfs_adapter.c b/drivers/s390/scsi/zfcp_sysfs_adapter.c
index e7345a74800..0cd435280e7 100644
--- a/drivers/s390/scsi/zfcp_sysfs_adapter.c
+++ b/drivers/s390/scsi/zfcp_sysfs_adapter.c
@@ -62,21 +62,18 @@ static ssize_t zfcp_sysfs_adapter_##_name##_show(struct device *dev, struct devi
 static DEVICE_ATTR(_name, S_IRUGO, zfcp_sysfs_adapter_##_name##_show, NULL);
 
 ZFCP_DEFINE_ADAPTER_ATTR(status, "0x%08x\n", atomic_read(&adapter->status));
-ZFCP_DEFINE_ADAPTER_ATTR(wwnn, "0x%016llx\n", adapter->wwnn);
-ZFCP_DEFINE_ADAPTER_ATTR(wwpn, "0x%016llx\n", adapter->wwpn);
-ZFCP_DEFINE_ADAPTER_ATTR(s_id, "0x%06x\n", adapter->s_id);
 ZFCP_DEFINE_ADAPTER_ATTR(peer_wwnn, "0x%016llx\n", adapter->peer_wwnn);
 ZFCP_DEFINE_ADAPTER_ATTR(peer_wwpn, "0x%016llx\n", adapter->peer_wwpn);
 ZFCP_DEFINE_ADAPTER_ATTR(peer_d_id, "0x%06x\n", adapter->peer_d_id);
+ZFCP_DEFINE_ADAPTER_ATTR(physical_wwpn, "0x%016llx\n", adapter->physical_wwpn);
+ZFCP_DEFINE_ADAPTER_ATTR(physical_s_id, "0x%06x\n", adapter->physical_s_id);
 ZFCP_DEFINE_ADAPTER_ATTR(card_version, "0x%04x\n", adapter->hydra_version);
 ZFCP_DEFINE_ADAPTER_ATTR(lic_version, "0x%08x\n", adapter->fsf_lic_version);
-ZFCP_DEFINE_ADAPTER_ATTR(fc_link_speed, "%d Gb/s\n", adapter->fc_link_speed);
 ZFCP_DEFINE_ADAPTER_ATTR(fc_service_class, "%d\n", adapter->fc_service_class);
 ZFCP_DEFINE_ADAPTER_ATTR(fc_topology, "%s\n",
 			 fc_topologies[adapter->fc_topology]);
 ZFCP_DEFINE_ADAPTER_ATTR(hardware_version, "0x%08x\n",
 			 adapter->hardware_version);
-ZFCP_DEFINE_ADAPTER_ATTR(serial_number, "%17s\n", adapter->serial_number);
 ZFCP_DEFINE_ADAPTER_ATTR(scsi_host_no, "0x%x\n", adapter->scsi_host_no);
 ZFCP_DEFINE_ADAPTER_ATTR(in_recovery, "%d\n", atomic_test_mask
 			 (ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status));
@@ -255,21 +252,18 @@ static struct attribute *zfcp_adapter_attrs[] = {
 	&dev_attr_in_recovery.attr,
 	&dev_attr_port_remove.attr,
 	&dev_attr_port_add.attr,
-	&dev_attr_wwnn.attr,
-	&dev_attr_wwpn.attr,
-	&dev_attr_s_id.attr,
 	&dev_attr_peer_wwnn.attr,
 	&dev_attr_peer_wwpn.attr,
 	&dev_attr_peer_d_id.attr,
+	&dev_attr_physical_wwpn.attr,
+	&dev_attr_physical_s_id.attr,
 	&dev_attr_card_version.attr,
 	&dev_attr_lic_version.attr,
-	&dev_attr_fc_link_speed.attr,
 	&dev_attr_fc_service_class.attr,
 	&dev_attr_fc_topology.attr,
 	&dev_attr_scsi_host_no.attr,
 	&dev_attr_status.attr,
 	&dev_attr_hardware_version.attr,
-	&dev_attr_serial_number.attr,
 	NULL
 };
 
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index c932b3b9449..876d1de8480 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -1109,15 +1109,6 @@ ahc_linux_register_host(struct ahc_softc *ahc, struct scsi_host_template *templa
 	return (0);
 }
 
-uint64_t
-ahc_linux_get_memsize(void)
-{
-	struct sysinfo si;
-
-	si_meminfo(&si);
-	return ((uint64_t)si.totalram << PAGE_SHIFT);
-}
-
 /*
  * Place the SCSI bus into a known state by either resetting it,
  * or forcing transfer negotiations on the next command to any
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.h b/drivers/scsi/aic7xxx/aic7xxx_osm.h
index c5299626924..be9edbe26db 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.h
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.h
@@ -494,8 +494,6 @@ ahc_insb(struct ahc_softc * ahc, long port, uint8_t *array, int count)
 int		ahc_linux_register_host(struct ahc_softc *,
 					struct scsi_host_template *);
 
-uint64_t	ahc_linux_get_memsize(void);
-
 /*************************** Pretty Printing **********************************/
 struct info_str {
 	char *buffer;
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
index 0d44a6907dd..3ce77ddc889 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c
@@ -180,6 +180,7 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct		 ahc_pci_identity *entry;
 	char		*name;
 	int		 error;
+	struct device	*dev = &pdev->dev;
 
 	pci = pdev;
 	entry = ahc_find_pci_device(pci);
@@ -209,11 +210,12 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pci_set_master(pdev);
 
 	if (sizeof(dma_addr_t) > 4
-	 && ahc_linux_get_memsize() > 0x80000000
-	 && pci_set_dma_mask(pdev, mask_39bit) == 0) {
+	    && ahc->features & AHC_LARGE_SCBS
+	    && dma_set_mask(dev, mask_39bit) == 0
+	    && dma_get_required_mask(dev) > DMA_32BIT_MASK) {
 		ahc->flags |= AHC_39BIT_ADDRESSING;
 	} else {
-		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
+		if (dma_set_mask(dev, DMA_32BIT_MASK)) {
 			printk(KERN_WARNING "aic7xxx: No suitable DMA available.\n");
                 	return (-ENODEV);
 		}
diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c
index 87e0c36f155..d71cef767ce 100644
--- a/drivers/scsi/ata_piix.c
+++ b/drivers/scsi/ata_piix.c
@@ -442,7 +442,6 @@ static void piix_sata_phy_reset(struct ata_port *ap)
  *	piix_set_piomode - Initialize host controller PATA PIO timings
  *	@ap: Port whose timings we are configuring
  *	@adev: um
- *	@pio: PIO mode, 0 - 4
  *
  *	Set PIO mode for device, in host controller PCI config space.
  *
diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c
index e6153fe5842..a8cfbef304b 100644
--- a/drivers/scsi/atp870u.c
+++ b/drivers/scsi/atp870u.c
@@ -996,6 +996,7 @@ oktosend:
 #ifdef ED_DBGP		
 	printk("send_s870: prdaddr_2 0x%8x tmpcip %x target_id %d\n", dev->id[c][target_id].prdaddr,tmpcip,target_id);
 #endif	
+	dev->id[c][target_id].prdaddr = dev->id[c][target_id].prd_bus;
 	outl(dev->id[c][target_id].prdaddr, tmpcip);
 	tmpcip = tmpcip - 2;
 	outb(0x06, tmpcip);
@@ -2572,7 +2573,7 @@ static void atp870u_free_tables(struct Scsi_Host *host)
 		for (k = 0; k < 16; k++) {
 			if (!atp_dev->id[j][k].prd_table)
 				continue;
-			pci_free_consistent(atp_dev->pdev, 1024, atp_dev->id[j][k].prd_table, atp_dev->id[j][k].prdaddr);
+			pci_free_consistent(atp_dev->pdev, 1024, atp_dev->id[j][k].prd_table, atp_dev->id[j][k].prd_bus);
 			atp_dev->id[j][k].prd_table = NULL;
 		}
 	}
@@ -2584,12 +2585,13 @@ static int atp870u_init_tables(struct Scsi_Host *host)
 	int c,k;
 	for(c=0;c < 2;c++) {
 	   	for(k=0;k<16;k++) {
-	   			atp_dev->id[c][k].prd_table = pci_alloc_consistent(atp_dev->pdev, 1024, &(atp_dev->id[c][k].prdaddr));
+	   			atp_dev->id[c][k].prd_table = pci_alloc_consistent(atp_dev->pdev, 1024, &(atp_dev->id[c][k].prd_bus));
 	   			if (!atp_dev->id[c][k].prd_table) {
 	   				printk("atp870u_init_tables fail\n");
 				atp870u_free_tables(host);
 				return -ENOMEM;
 			}
+			atp_dev->id[c][k].prdaddr = atp_dev->id[c][k].prd_bus;
 			atp_dev->id[c][k].devsp=0x20;
 			atp_dev->id[c][k].devtype = 0x7f;
 			atp_dev->id[c][k].curr_req = NULL;			   
diff --git a/drivers/scsi/atp870u.h b/drivers/scsi/atp870u.h
index 89f43af39cf..62bae64a01c 100644
--- a/drivers/scsi/atp870u.h
+++ b/drivers/scsi/atp870u.h
@@ -54,8 +54,9 @@ struct atp_unit
 		unsigned long tran_len;
 		unsigned long last_len;
 		unsigned char *prd_pos;
-		unsigned char *prd_table;
-		dma_addr_t prdaddr;
+		unsigned char *prd_table;	/* Kernel address of PRD table */
+		dma_addr_t prd_bus;		/* Bus address of PRD */
+		dma_addr_t prdaddr;		/* Dynamically updated in driver */
 		struct scsi_cmnd *curr_req;
 	} id[2][16];
     	struct Scsi_Host *host;
diff --git a/drivers/scsi/fd_mcs.c b/drivers/scsi/fd_mcs.c
index fa652f8aa64..d59d449a9e4 100644
--- a/drivers/scsi/fd_mcs.c
+++ b/drivers/scsi/fd_mcs.c
@@ -1360,3 +1360,5 @@ static Scsi_Host_Template driver_template = {
 	.use_clustering 		= DISABLE_CLUSTERING,
 };
 #include "scsi_module.c"
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 85503fad789..f2a72d33132 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -98,6 +98,7 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state)
 		switch (oldstate) {
 		case SHOST_CREATED:
 		case SHOST_RUNNING:
+		case SHOST_CANCEL_RECOVERY:
 			break;
 		default:
 			goto illegal;
@@ -107,12 +108,31 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state)
 	case SHOST_DEL:
 		switch (oldstate) {
 		case SHOST_CANCEL:
+		case SHOST_DEL_RECOVERY:
 			break;
 		default:
 			goto illegal;
 		}
 		break;
 
+	case SHOST_CANCEL_RECOVERY:
+		switch (oldstate) {
+		case SHOST_CANCEL:
+		case SHOST_RECOVERY:
+			break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	case SHOST_DEL_RECOVERY:
+		switch (oldstate) {
+		case SHOST_CANCEL_RECOVERY:
+			break;
+		default:
+			goto illegal;
+		}
+		break;
 	}
 	shost->shost_state = state;
 	return 0;
@@ -134,13 +154,24 @@ EXPORT_SYMBOL(scsi_host_set_state);
  **/
 void scsi_remove_host(struct Scsi_Host *shost)
 {
+	unsigned long flags;
 	down(&shost->scan_mutex);
-	scsi_host_set_state(shost, SHOST_CANCEL);
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_set_state(shost, SHOST_CANCEL))
+		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) {
+			spin_unlock_irqrestore(shost->host_lock, flags);
+			up(&shost->scan_mutex);
+			return;
+		}
+	spin_unlock_irqrestore(shost->host_lock, flags);
 	up(&shost->scan_mutex);
 	scsi_forget_host(shost);
 	scsi_proc_host_rm(shost);
 
-	scsi_host_set_state(shost, SHOST_DEL);
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_set_state(shost, SHOST_DEL))
+		BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY));
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	transport_unregister_device(&shost->shost_gendev);
 	class_device_unregister(&shost->shost_classdev);
diff --git a/drivers/scsi/ibmmca.c b/drivers/scsi/ibmmca.c
index 6e54c7d9b33..19392f65127 100644
--- a/drivers/scsi/ibmmca.c
+++ b/drivers/scsi/ibmmca.c
@@ -460,6 +460,8 @@ MODULE_PARM(adisplay, "1i");
 MODULE_PARM(normal, "1i");
 MODULE_PARM(ansi, "1i");
 #endif
+
+MODULE_LICENSE("GPL");
 #endif
 /*counter of concurrent disk read/writes, to turn on/off disk led */
 static int disk_rw_in_progress = 0;
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 5b14934ba86..ff25210b00b 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -727,6 +727,16 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct)
 		if (hostdata->madapter_info.port_max_txu[0]) 
 			hostdata->host->max_sectors = 
 				hostdata->madapter_info.port_max_txu[0] >> 9;
+		
+		if (hostdata->madapter_info.os_type == 3 &&
+		    strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) {
+			printk("ibmvscsi: host (Ver. %s) doesn't support large"
+			       "transfers\n",
+			       hostdata->madapter_info.srp_version);
+			printk("ibmvscsi: limiting scatterlists to %d\n",
+			       MAX_INDIRECT_BUFS);
+			hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
+		}
 	}
 }
 
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index d92273cbe0d..e5b01997117 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -4132,6 +4132,53 @@ err_out:
 }
 
 /**
+ *	ata_host_set_remove - PCI layer callback for device removal
+ *	@host_set: ATA host set that was removed
+ *
+ *	Unregister all objects associated with this host set. Free those 
+ *	objects.
+ *
+ *	LOCKING:
+ *	Inherited from calling layer (may sleep).
+ */
+
+
+void ata_host_set_remove(struct ata_host_set *host_set)
+{
+	struct ata_port *ap;
+	unsigned int i;
+
+	for (i = 0; i < host_set->n_ports; i++) {
+		ap = host_set->ports[i];
+		scsi_remove_host(ap->host);
+	}
+
+	free_irq(host_set->irq, host_set);
+
+	for (i = 0; i < host_set->n_ports; i++) {
+		ap = host_set->ports[i];
+
+		ata_scsi_release(ap->host);
+
+		if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) {
+			struct ata_ioports *ioaddr = &ap->ioaddr;
+
+			if (ioaddr->cmd_addr == 0x1f0)
+				release_region(0x1f0, 8);
+			else if (ioaddr->cmd_addr == 0x170)
+				release_region(0x170, 8);
+		}
+
+		scsi_host_put(ap->host);
+	}
+
+	if (host_set->ops->host_stop)
+		host_set->ops->host_stop(host_set);
+
+	kfree(host_set);
+}
+
+/**
  *	ata_scsi_release - SCSI layer callback hook for host unload
  *	@host: libata host to be unloaded
  *
@@ -4471,39 +4518,8 @@ void ata_pci_remove_one (struct pci_dev *pdev)
 {
 	struct device *dev = pci_dev_to_dev(pdev);
 	struct ata_host_set *host_set = dev_get_drvdata(dev);
-	struct ata_port *ap;
-	unsigned int i;
-
-	for (i = 0; i < host_set->n_ports; i++) {
-		ap = host_set->ports[i];
-
-		scsi_remove_host(ap->host);
-	}
-
-	free_irq(host_set->irq, host_set);
-
-	for (i = 0; i < host_set->n_ports; i++) {
-		ap = host_set->ports[i];
-
-		ata_scsi_release(ap->host);
-
-		if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) {
-			struct ata_ioports *ioaddr = &ap->ioaddr;
-
-			if (ioaddr->cmd_addr == 0x1f0)
-				release_region(0x1f0, 8);
-			else if (ioaddr->cmd_addr == 0x170)
-				release_region(0x170, 8);
-		}
-
-		scsi_host_put(ap->host);
-	}
-
-	if (host_set->ops->host_stop)
-		host_set->ops->host_stop(host_set);
-
-	kfree(host_set);
 
+	ata_host_set_remove(host_set);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 	dev_set_drvdata(dev, NULL);
@@ -4573,6 +4589,7 @@ module_exit(ata_exit);
 EXPORT_SYMBOL_GPL(ata_std_bios_param);
 EXPORT_SYMBOL_GPL(ata_std_ports);
 EXPORT_SYMBOL_GPL(ata_device_add);
+EXPORT_SYMBOL_GPL(ata_host_set_remove);
 EXPORT_SYMBOL_GPL(ata_sg_init);
 EXPORT_SYMBOL_GPL(ata_sg_init_one);
 EXPORT_SYMBOL_GPL(ata_qc_complete);
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index a4857db4f9b..b235556b7b6 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1959,22 +1959,35 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
 	/* Set it up */
        	mesh_init(ms);
 
-	/* XXX FIXME: error should be fatal */
-       	if (request_irq(ms->meshintr, do_mesh_interrupt, 0, "MESH", ms))
+	/* Request interrupt */
+       	if (request_irq(ms->meshintr, do_mesh_interrupt, 0, "MESH", ms)) {
 	       	printk(KERN_ERR "MESH: can't get irq %d\n", ms->meshintr);
+		goto out_shutdown;
+	}
 
-	/* XXX FIXME: handle failure */
-	scsi_add_host(mesh_host, &mdev->ofdev.dev);
+	/* Add scsi host & scan */
+	if (scsi_add_host(mesh_host, &mdev->ofdev.dev))
+		goto out_release_irq;
 	scsi_scan_host(mesh_host);
 
 	return 0;
 
-out_unmap:
+ out_release_irq:
+	free_irq(ms->meshintr, ms);
+ out_shutdown:
+	/* shutdown & reset bus in case of error or macos can be confused
+	 * at reboot if the bus was set to synchronous mode already
+	 */
+	mesh_shutdown(mdev);
+	set_mesh_power(ms, 0);
+	pci_free_consistent(macio_get_pci_dev(mdev), ms->dma_cmd_size,
+			    ms->dma_cmd_space, ms->dma_cmd_bus);
+ out_unmap:
 	iounmap(ms->dma);
 	iounmap(ms->mesh);
-out_free:
+ out_free:
 	scsi_host_put(mesh_host);
-out_release:
+ out_release:
 	macio_release_resources(mdev);
 
 	return -ENODEV;
@@ -2001,7 +2014,7 @@ static int mesh_remove(struct macio_dev *mdev)
 
 	/* Free DMA commands memory */
 	pci_free_consistent(macio_get_pci_dev(mdev), ms->dma_cmd_size,
-			  ms->dma_cmd_space, ms->dma_cmd_bus);
+			    ms->dma_cmd_space, ms->dma_cmd_bus);
 
 	/* Release memory resources */
 	macio_release_resources(mdev);
diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c
index a1d62dee3be..c05653c7779 100644
--- a/drivers/scsi/sata_nv.c
+++ b/drivers/scsi/sata_nv.c
@@ -158,6 +158,8 @@ static struct pci_device_id nv_pci_tbl[] = {
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 },
+	{ PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 },
 	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
 		PCI_ANY_ID, PCI_ANY_ID,
 		PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC },
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index a780546eda9..1f0ebabf6d4 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1265,9 +1265,8 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery)
 		list_for_each_safe(lh, lh_sf, &active_list) {
 			scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
 			list_del_init(lh);
-			if (recovery) {
-				scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD);
-			} else {
+			if (recovery &&
+			    !scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD)) {
 				scmd->result = (DID_ABORT << 16);
 				scsi_finish_command(scmd);
 			}
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 07b554affcf..64fc9e21f35 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -110,6 +110,7 @@ static struct {
 	{"RELISYS", "Scorpio", NULL, BLIST_NOLUN},	/* responds to all lun */
 	{"SANKYO", "CP525", "6.64", BLIST_NOLUN},	/* causes failed REQ SENSE, extra reset */
 	{"TEXEL", "CD-ROM", "1.06", BLIST_NOLUN},
+	{"transtec", "T5008", "0001", BLIST_NOREPORTLUN },
 	{"YAMAHA", "CDR100", "1.00", BLIST_NOLUN},	/* locks up */
 	{"YAMAHA", "CDR102", "1.00", BLIST_NOLUN},	/* locks up */
 	{"YAMAHA", "CRW8424S", "1.0", BLIST_NOLUN},	/* locks up */
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 895c9452be4..ad534216507 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -50,7 +50,7 @@
 void scsi_eh_wakeup(struct Scsi_Host *shost)
 {
 	if (shost->host_busy == shost->host_failed) {
-		up(shost->eh_wait);
+		wake_up_process(shost->ehandler);
 		SCSI_LOG_ERROR_RECOVERY(5,
 				printk("Waking error handler thread\n"));
 	}
@@ -68,19 +68,24 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
 {
 	struct Scsi_Host *shost = scmd->device->host;
 	unsigned long flags;
+	int ret = 0;
 
-	if (shost->eh_wait == NULL)
+	if (!shost->ehandler)
 		return 0;
 
 	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_set_state(shost, SHOST_RECOVERY))
+		if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
+			goto out_unlock;
 
+	ret = 1;
 	scmd->eh_eflags |= eh_flag;
 	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
-	scsi_host_set_state(shost, SHOST_RECOVERY);
 	shost->host_failed++;
 	scsi_eh_wakeup(shost);
+ out_unlock:
 	spin_unlock_irqrestore(shost->host_lock, flags);
-	return 1;
+	return ret;
 }
 
 /**
@@ -176,8 +181,8 @@ void scsi_times_out(struct scsi_cmnd *scmd)
 		}
 
 	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
-		panic("Error handler thread not present at %p %p %s %d",
-		      scmd, scmd->device->host, __FILE__, __LINE__);
+		scmd->result |= DID_TIME_OUT << 16;
+		__scsi_done(scmd);
 	}
 }
 
@@ -196,8 +201,7 @@ int scsi_block_when_processing_errors(struct scsi_device *sdev)
 {
 	int online;
 
-	wait_event(sdev->host->host_wait, (sdev->host->shost_state !=
-					   SHOST_RECOVERY));
+	wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
 
 	online = scsi_device_online(sdev);
 
@@ -1441,6 +1445,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 static void scsi_restart_operations(struct Scsi_Host *shost)
 {
 	struct scsi_device *sdev;
+	unsigned long flags;
 
 	/*
 	 * If the door was locked, we need to insert a door lock request
@@ -1460,7 +1465,11 @@ static void scsi_restart_operations(struct Scsi_Host *shost)
 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
 					  __FUNCTION__));
 
-	scsi_host_set_state(shost, SHOST_RUNNING);
+	spin_lock_irqsave(shost->host_lock, flags);
+	if (scsi_host_set_state(shost, SHOST_RUNNING))
+		if (scsi_host_set_state(shost, SHOST_CANCEL))
+			BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	wake_up(&shost->host_wait);
 
@@ -1582,40 +1591,31 @@ int scsi_error_handler(void *data)
 {
 	struct Scsi_Host *shost = (struct Scsi_Host *) data;
 	int rtn;
-	DECLARE_MUTEX_LOCKED(sem);
 
 	current->flags |= PF_NOFREEZE;
-	shost->eh_wait = &sem;
 
+	
 	/*
-	 * Wake up the thread that created us.
+	 * Note - we always use TASK_INTERRUPTIBLE even if the module
+	 * was loaded as part of the kernel.  The reason is that
+	 * UNINTERRUPTIBLE would cause this thread to be counted in
+	 * the load average as a running process, and an interruptible
+	 * wait doesn't.
 	 */
-	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
-					  " scsi_eh_%d\n",shost->host_no));
-
-	while (1) {
-		/*
-		 * If we get a signal, it means we are supposed to go
-		 * away and die.  This typically happens if the user is
-		 * trying to unload a module.
-		 */
-		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
-						  " scsi_eh_%d"
-						  " sleeping\n",shost->host_no));
-
-		/*
-		 * Note - we always use down_interruptible with the semaphore
-		 * even if the module was loaded as part of the kernel.  The
-		 * reason is that down() will cause this thread to be counted
-		 * in the load average as a running process, and down
-		 * interruptible doesn't.  Given that we need to allow this
-		 * thread to die if the driver was loaded as a module, using
-		 * semaphores isn't unreasonable.
-		 */
-		down_interruptible(&sem);
-		if (kthread_should_stop())
-			break;
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		if (shost->host_failed == 0 ||
+		    shost->host_failed != shost->host_busy) {
+			SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
+							  " scsi_eh_%d"
+							  " sleeping\n",
+							  shost->host_no));
+			schedule();
+			set_current_state(TASK_INTERRUPTIBLE);
+			continue;
+		}
 
+		__set_current_state(TASK_RUNNING);
 		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
 						  " scsi_eh_%d waking"
 						  " up\n",shost->host_no));
@@ -1642,7 +1642,7 @@ int scsi_error_handler(void *data)
 		 * which are still online.
 		 */
 		scsi_restart_operations(shost);
-
+		set_current_state(TASK_INTERRUPTIBLE);
 	}
 
 	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
@@ -1651,7 +1651,7 @@ int scsi_error_handler(void *data)
 	/*
 	 * Make sure that nobody tries to wake us up again.
 	 */
-	shost->eh_wait = NULL;
+	shost->ehandler = NULL;
 	return 0;
 }
 
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index b7fddac8134..de7f98cc38f 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -458,7 +458,7 @@ int scsi_nonblockable_ioctl(struct scsi_device *sdev, int cmd,
 	 * error processing, as long as the device was opened
 	 * non-blocking */
 	if (filp && filp->f_flags & O_NONBLOCK) {
-		if (sdev->host->shost_state == SHOST_RECOVERY)
+		if (scsi_host_in_recovery(sdev->host))
 			return -ENODEV;
 	} else if (!scsi_block_when_processing_errors(sdev))
 		return -ENODEV;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 863bb6495da..dc9c772bc87 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -118,7 +118,6 @@ static void scsi_unprep_request(struct request *req)
 	req->flags &= ~REQ_DONTPREP;
 	req->special = (req->flags & REQ_SPECIAL) ? cmd->sc_request : NULL;
 
-	scsi_release_buffers(cmd);
 	scsi_put_command(cmd);
 }
 
@@ -140,14 +139,12 @@ static void scsi_unprep_request(struct request *req)
  *              commands.
  * Notes:       This could be called either from an interrupt context or a
  *              normal process context.
- * Notes:	Upon return, cmd is a stale pointer.
  */
 int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 {
 	struct Scsi_Host *host = cmd->device->host;
 	struct scsi_device *device = cmd->device;
 	struct request_queue *q = device->request_queue;
-	struct request *req = cmd->request;
 	unsigned long flags;
 
 	SCSI_LOG_MLQUEUE(1,
@@ -188,9 +185,8 @@ int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
 	 * function.  The SCSI request function detects the blocked condition
 	 * and plugs the queue appropriately.
          */
-	scsi_unprep_request(req);
 	spin_lock_irqsave(q->queue_lock, flags);
-	blk_requeue_request(q, req);
+	blk_requeue_request(q, cmd->request);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
 	scsi_run_queue(q);
@@ -451,7 +447,7 @@ void scsi_device_unbusy(struct scsi_device *sdev)
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	shost->host_busy--;
-	if (unlikely((shost->shost_state == SHOST_RECOVERY) &&
+	if (unlikely(scsi_host_in_recovery(shost) &&
 		     shost->host_failed))
 		scsi_eh_wakeup(shost);
 	spin_unlock(shost->host_lock);
@@ -1268,6 +1264,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 			}
 		} else {
 			memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
+			cmd->cmd_len = req->cmd_len;
 			if (rq_data_dir(req) == WRITE)
 				cmd->sc_data_direction = DMA_TO_DEVICE;
 			else if (req->data_len)
@@ -1342,7 +1339,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 				   struct Scsi_Host *shost,
 				   struct scsi_device *sdev)
 {
-	if (shost->shost_state == SHOST_RECOVERY)
+	if (scsi_host_in_recovery(shost))
 		return 0;
 	if (shost->host_busy == 0 && shost->host_blocked) {
 		/*
@@ -1514,7 +1511,6 @@ static void scsi_request_fn(struct request_queue *q)
 	 * cases (host limits or settings) should run the queue at some
 	 * later time.
 	 */
-	scsi_unprep_request(req);
 	spin_lock_irq(q->queue_lock);
 	blk_requeue_request(q, req);
 	sdev->device_busy--;
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index b86f170fa8e..fcf9f6cbb14 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1466,23 +1466,17 @@ EXPORT_SYMBOL(scsi_scan_single_target);
 
 void scsi_forget_host(struct Scsi_Host *shost)
 {
-	struct scsi_target *starget, *tmp;
+	struct scsi_device *sdev;
 	unsigned long flags;
 
-	/*
-	 * Ok, this look a bit strange.  We always look for the first device
-	 * on the list as scsi_remove_device removes them from it - thus we
-	 * also have to release the lock.
-	 * We don't need to get another reference to the device before
-	 * releasing the lock as we already own the reference from
-	 * scsi_register_device that's release in scsi_remove_device.  And
-	 * after that we don't look at sdev anymore.
-	 */
+ restart:
 	spin_lock_irqsave(shost->host_lock, flags);
-	list_for_each_entry_safe(starget, tmp, &shost->__targets, siblings) {
+	list_for_each_entry(sdev, &shost->__devices, siblings) {
+		if (sdev->sdev_state == SDEV_DEL)
+			continue;
 		spin_unlock_irqrestore(shost->host_lock, flags);
-		scsi_remove_target(&starget->dev);
-		spin_lock_irqsave(shost->host_lock, flags);
+		__scsi_remove_device(sdev);
+		goto restart;
 	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
 }
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index b8052d5206c..72a6550a056 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -57,6 +57,8 @@ static struct {
 	{ SHOST_CANCEL, "cancel" },
 	{ SHOST_DEL, "deleted" },
 	{ SHOST_RECOVERY, "recovery" },
+	{ SHOST_CANCEL_RECOVERY, "cancel/recovery" },
+	{ SHOST_DEL_RECOVERY, "deleted/recovery", },
 };
 const char *scsi_host_state_name(enum scsi_host_state state)
 {
@@ -707,9 +709,11 @@ void __scsi_remove_device(struct scsi_device *sdev)
  **/
 void scsi_remove_device(struct scsi_device *sdev)
 {
-	down(&sdev->host->scan_mutex);
+	struct Scsi_Host *shost = sdev->host;
+
+	down(&shost->scan_mutex);
 	__scsi_remove_device(sdev);
-	up(&sdev->host->scan_mutex);
+	up(&shost->scan_mutex);
 }
 EXPORT_SYMBOL(scsi_remove_device);
 
@@ -717,17 +721,20 @@ void __scsi_remove_target(struct scsi_target *starget)
 {
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 	unsigned long flags;
-	struct scsi_device *sdev, *tmp;
+	struct scsi_device *sdev;
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	starget->reap_ref++;
-	list_for_each_entry_safe(sdev, tmp, &shost->__devices, siblings) {
+ restart:
+	list_for_each_entry(sdev, &shost->__devices, siblings) {
 		if (sdev->channel != starget->channel ||
-		    sdev->id != starget->id)
+		    sdev->id != starget->id ||
+		    sdev->sdev_state == SDEV_DEL)
 			continue;
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		scsi_remove_device(sdev);
 		spin_lock_irqsave(shost->host_lock, flags);
+		goto restart;
 	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	scsi_target_reap(starget);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index de564b38605..9a1dc0cea03 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -235,6 +235,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
 			return 0;
 
 		memcpy(SCpnt->cmnd, rq->cmd, sizeof(SCpnt->cmnd));
+		SCpnt->cmd_len = rq->cmd_len;
 		if (rq_data_dir(rq) == WRITE)
 			SCpnt->sc_data_direction = DMA_TO_DEVICE;
 		else if (rq->data_len)
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 9ea4765d1d1..4d09a6e4dd2 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1027,7 +1027,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
 		if (sdp->detached)
 			return -ENODEV;
 		if (filp->f_flags & O_NONBLOCK) {
-			if (sdp->device->host->shost_state == SHOST_RECOVERY)
+			if (scsi_host_in_recovery(sdp->device->host))
 				return -EBUSY;
 		} else if (!scsi_block_when_processing_errors(sdp->device))
 			return -EBUSY;
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index ce63fc8312d..561901b1cf1 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -326,6 +326,7 @@ static int sr_init_command(struct scsi_cmnd * SCpnt)
 			return 0;
 
 		memcpy(SCpnt->cmnd, rq->cmd, sizeof(SCpnt->cmnd));
+		SCpnt->cmd_len = rq->cmd_len;
 		if (!rq->data_len)
 			SCpnt->sc_data_direction = DMA_NONE;
 		else if (rq_data_dir(rq) == WRITE)
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index a93308ae973..d001c046551 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4206,6 +4206,7 @@ static int st_init_command(struct scsi_cmnd *SCpnt)
 		return 0;
 
 	memcpy(SCpnt->cmnd, rq->cmd, sizeof(SCpnt->cmnd));
+	SCpnt->cmd_len = rq->cmd_len;
 
 	if (rq_data_dir(rq) == WRITE)
 		SCpnt->sc_data_direction = DMA_TO_DEVICE;
diff --git a/drivers/serial/clps711x.c b/drivers/serial/clps711x.c
index 78c1f36ad9b..87ef368384f 100644
--- a/drivers/serial/clps711x.c
+++ b/drivers/serial/clps711x.c
@@ -98,7 +98,7 @@ static irqreturn_t clps711xuart_int_rx(int irq, void *dev_id, struct pt_regs *re
 {
 	struct uart_port *port = dev_id;
 	struct tty_struct *tty = port->info->tty;
-	unsigned int status, ch, flg, ignored = 0;
+	unsigned int status, ch, flg;
 
 	status = clps_readl(SYSFLG(port));
 	while (!(status & SYSFLG_URXFE)) {
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index c47c8052b48..f1fb67fe22a 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -987,7 +987,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0)
 
 			/* remove this interface if it has been registered */
 			interface = dev->actconfig->interface[i];
-			if (!klist_node_attached(&interface->dev.knode_bus))
+			if (!device_is_registered(&interface->dev))
 				continue;
 			dev_dbg (&dev->dev, "unregistering interface %s\n",
 				interface->dev.bus_id);
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 087af73a59d..7d131509e41 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -303,7 +303,7 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	/* if interface was already added, bind now; else let
 	 * the future device_add() bind it, bypassing probe()
 	 */
-	if (klist_node_attached(&dev->knode_bus))
+	if (device_is_registered(dev))
 		device_bind_driver(dev);
 
 	return 0;
@@ -336,8 +336,8 @@ void usb_driver_release_interface(struct usb_driver *driver,
 	if (iface->condition != USB_INTERFACE_BOUND)
 		return;
 
-	/* release only after device_add() */
-	if (klist_node_attached(&dev->knode_bus)) {
+	/* don't release if the interface hasn't been added yet */
+	if (device_is_registered(dev)) {
 		iface->condition = USB_INTERFACE_UNBINDING;
 		device_release_driver(dev);
 	}
diff --git a/drivers/usb/gadget/pxa2xx_udc.c b/drivers/usb/gadget/pxa2xx_udc.c
index 1507738337c..73f8c940415 100644
--- a/drivers/usb/gadget/pxa2xx_udc.c
+++ b/drivers/usb/gadget/pxa2xx_udc.c
@@ -422,7 +422,7 @@ static inline void ep0_idle (struct pxa2xx_udc *dev)
 }
 
 static int
-write_packet(volatile u32 *uddr, struct pxa2xx_request *req, unsigned max)
+write_packet(volatile unsigned long *uddr, struct pxa2xx_request *req, unsigned max)
 {
 	u8		*buf;
 	unsigned	length, count;
@@ -2602,7 +2602,7 @@ static int __exit pxa2xx_udc_remove(struct device *_dev)
  * VBUS IRQs should probably be ignored so that the PXA device just acts
  * "dead" to USB hosts until system resume.
  */
-static int pxa2xx_udc_suspend(struct device *dev, u32 state, u32 level)
+static int pxa2xx_udc_suspend(struct device *dev, pm_message_t state, u32 level)
 {
 	struct pxa2xx_udc	*udc = dev_get_drvdata(dev);
 
diff --git a/drivers/usb/gadget/pxa2xx_udc.h b/drivers/usb/gadget/pxa2xx_udc.h
index d0bc396a85d..a58f3e6e71f 100644
--- a/drivers/usb/gadget/pxa2xx_udc.h
+++ b/drivers/usb/gadget/pxa2xx_udc.h
@@ -69,11 +69,11 @@ struct pxa2xx_ep {
 	 * UDDR = UDC Endpoint Data Register (the fifo)
 	 * DRCM = DMA Request Channel Map
 	 */
-	volatile u32				*reg_udccs;
-	volatile u32				*reg_ubcr;
-	volatile u32				*reg_uddr;
+	volatile unsigned long			*reg_udccs;
+	volatile unsigned long			*reg_ubcr;
+	volatile unsigned long			*reg_uddr;
 #ifdef USE_DMA
-	volatile u32				*reg_drcmr;
+	volatile unsigned long			*reg_drcmr;
 #define	drcmr(n)  .reg_drcmr = & DRCMR ## n ,
 #else
 #define	drcmr(n)  
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index d2a1fd40dfc..d42a15d10a4 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -782,6 +782,9 @@ retry:
 /* usb 1.1 says max 90% of a frame is available for periodic transfers.
  * this driver doesn't promise that much since it's got to handle an
  * IRQ per packet; irq handling latencies also use up that time.
+ *
+ * NOTE:  the periodic schedule is a sparse tree, with the load for
+ * each branch minimized.  see fig 3.5 in the OHCI spec for example.
  */
 #define	MAX_PERIODIC_LOAD	500	/* out of 1000 usec */
 
@@ -843,6 +846,7 @@ static int sl811h_urb_enqueue(
 	if (!(sl811->port1 & (1 << USB_PORT_FEAT_ENABLE))
 			|| !HC_IS_RUNNING(hcd->state)) {
 		retval = -ENODEV;
+		kfree(ep);
 		goto fail;
 	}
 
@@ -911,8 +915,16 @@ static int sl811h_urb_enqueue(
 	case PIPE_ISOCHRONOUS:
 	case PIPE_INTERRUPT:
 		urb->interval = ep->period;
-		if (ep->branch < PERIODIC_SIZE)
+		if (ep->branch < PERIODIC_SIZE) {
+			/* NOTE:  the phase is correct here, but the value
+			 * needs offsetting by the transfer queue depth.
+			 * All current drivers ignore start_frame, so this
+			 * is unlikely to ever matter...
+			 */
+			urb->start_frame = (sl811->frame & (PERIODIC_SIZE - 1))
+						+ ep->branch;
 			break;
+		}
 
 		retval = balance(sl811, ep->period, ep->load);
 		if (retval < 0)
@@ -1122,7 +1134,7 @@ sl811h_hub_descriptor (
 	desc->wHubCharacteristics = (__force __u16)cpu_to_le16(temp);
 
 	/* two bitmaps:  ports removable, and legacy PortPwrCtrlMask */
-	desc->bitmap[0] = 1 << 1;
+	desc->bitmap[0] = 0 << 1;
 	desc->bitmap[1] = ~0;
 }
 
diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c
index 7484d34780f..6a4ffe6c397 100644
--- a/drivers/usb/net/pegasus.c
+++ b/drivers/usb/net/pegasus.c
@@ -648,6 +648,13 @@ static void read_bulk_callback(struct urb *urb, struct pt_regs *regs)
 	}
 
 	/*
+	 * If the packet is unreasonably long, quietly drop it rather than
+	 * kernel panicing by calling skb_put.
+	 */
+	if (pkt_len > PEGASUS_MTU)
+		goto goon;
+
+	/*
 	 * at this point we are sure pegasus->rx_skb != NULL
 	 * so we go ahead and pass up the packet.
 	 */
@@ -886,15 +893,17 @@ static inline void get_interrupt_interval(pegasus_t * pegasus)
 	__u8 data[2];
 
 	read_eprom_word(pegasus, 4, (__u16 *) data);
-	if (data[1] < 0x80) {
-		if (netif_msg_timer(pegasus))
-			dev_info(&pegasus->intf->dev,
-				"intr interval changed from %ums to %ums\n",
-				data[1], 0x80);
-		data[1] = 0x80;
-#ifdef	PEGASUS_WRITE_EEPROM
-		write_eprom_word(pegasus, 4, *(__u16 *) data);
+	if (pegasus->usb->speed != USB_SPEED_HIGH) {
+		if (data[1] < 0x80) {
+			if (netif_msg_timer(pegasus))
+				dev_info(&pegasus->intf->dev, "intr interval "
+					"changed from %ums to %ums\n",
+					data[1], 0x80);
+			data[1] = 0x80;
+#ifdef PEGASUS_WRITE_EEPROM
+			write_eprom_word(pegasus, 4, *(__u16 *) data);
 #endif
+		}
 	}
 	pegasus->intr_interval = data[1];
 }
@@ -904,8 +913,9 @@ static void set_carrier(struct net_device *net)
 	pegasus_t *pegasus = netdev_priv(net);
 	u16 tmp;
 
-	if (read_mii_word(pegasus, pegasus->phy, MII_BMSR, &tmp))
+	if (!read_mii_word(pegasus, pegasus->phy, MII_BMSR, &tmp))
 		return;
+
 	if (tmp & BMSR_LSTATUS)
 		netif_carrier_on(net);
 	else
@@ -1355,6 +1365,7 @@ static void pegasus_disconnect(struct usb_interface *intf)
 	cancel_delayed_work(&pegasus->carrier_check);
 	unregister_netdev(pegasus->net);
 	usb_put_dev(interface_to_usbdev(intf));
+	unlink_all_urbs(pegasus);
 	free_all_urbs(pegasus);
 	free_skb_pool(pegasus);
 	if (pegasus->rx_skb)
diff --git a/drivers/usb/serial/airprime.c b/drivers/usb/serial/airprime.c
index a4ce0008d69..926d4c2c160 100644
--- a/drivers/usb/serial/airprime.c
+++ b/drivers/usb/serial/airprime.c
@@ -16,7 +16,8 @@
 #include "usb-serial.h"
 
 static struct usb_device_id id_table [] = {
-	{ USB_DEVICE(0xf3d, 0x0112) },
+	{ USB_DEVICE(0xf3d, 0x0112) },  /* AirPrime CDMA Wireless PC Card */
+	{ USB_DEVICE(0x1410, 0x1110) }, /* Novatel Wireless Merlin CDMA */
 	{ },
 };
 MODULE_DEVICE_TABLE(usb, id_table);
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 4e434cb10bb..5a8631c8a4a 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1846,10 +1846,12 @@ static void ftdi_set_termios (struct usb_serial_port *port, struct termios *old_
 	} else {
 		/* set the baudrate determined before */
 		if (change_speed(port)) {
-			err("%s urb failed to set baurdrate", __FUNCTION__);
+			err("%s urb failed to set baudrate", __FUNCTION__);
+		}
+		/* Ensure RTS and DTR are raised when baudrate changed from 0 */
+		if ((old_termios->c_cflag & CBAUD) == B0) {
+			set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
 		}
-		/* Ensure  RTS and DTR are raised */
-		set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
 	}
 
 	/* Set flow control */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 92d0f925d05..4989e5740d1 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -25,6 +25,9 @@
   2005-06-20  v0.4.1 add missing braces :-/
                      killed end-of-line whitespace
   2005-07-15  v0.4.2 rename WLAN product to FUSION, add FUSION2
+  2005-09-10  v0.4.3 added HUAWEI E600 card and Audiovox AirCard
+  2005-09-20  v0.4.4 increased recv buffer size: the card sometimes
+                     wants to send >2000 bytes.
 
   Work sponsored by: Sigos GmbH, Germany <info@sigos.de>
 
@@ -71,15 +74,21 @@ static int  option_send_setup(struct usb_serial_port *port);
 
 /* Vendor and product IDs */
 #define OPTION_VENDOR_ID			0x0AF0
+#define HUAWEI_VENDOR_ID			0x12D1
+#define AUDIOVOX_VENDOR_ID			0x0F3D
 
 #define OPTION_PRODUCT_OLD		0x5000
 #define OPTION_PRODUCT_FUSION	0x6000
 #define OPTION_PRODUCT_FUSION2	0x6300
+#define HUAWEI_PRODUCT_E600     0x1001
+#define AUDIOVOX_PRODUCT_AIRCARD 0x0112
 
 static struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_OLD) },
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUSION) },
 	{ USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUSION2) },
+	{ USB_DEVICE(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E600) },
+	{ USB_DEVICE(AUDIOVOX_VENDOR_ID, AUDIOVOX_PRODUCT_AIRCARD) },
 	{ } /* Terminating entry */
 };
 
@@ -132,7 +141,7 @@ static int debug;
 
 #define N_IN_URB 4
 #define N_OUT_URB 1
-#define IN_BUFLEN 1024
+#define IN_BUFLEN 4096
 #define OUT_BUFLEN 128
 
 struct option_port_private {
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 31ee13eef7a..773ae11b4a1 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -650,6 +650,7 @@ config FB_NVIDIA
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
+	select FB_SOFT_CURSOR
 	help
 	  This driver supports graphics boards with the nVidia chips, TNT
 	  and newer. For very old chipsets, such as the RIVA128, then use
diff --git a/drivers/video/aty/xlinit.c b/drivers/video/aty/xlinit.c
index 92643af1258..a085cbf74ec 100644
--- a/drivers/video/aty/xlinit.c
+++ b/drivers/video/aty/xlinit.c
@@ -174,7 +174,7 @@ int atyfb_xl_init(struct fb_info *info)
 	const struct xl_card_cfg_t * card = &card_cfg[xl_card];
 	struct atyfb_par *par = (struct atyfb_par *) info->par;
 	union aty_pll pll;
-	int i, err;
+	int err;
 	u32 temp;
 	
 	aty_st_8(CONFIG_STAT0, 0x85, par);
@@ -252,9 +252,14 @@ int atyfb_xl_init(struct fb_info *info)
 	aty_st_le32(0xEC, 0x00000000, par);
 	aty_st_le32(0xFC, 0x00000000, par);
 
-	for (i=0; i<sizeof(lcd_tbl)/sizeof(lcd_tbl_t); i++) {
-		aty_st_lcd(lcd_tbl[i].lcd_reg, lcd_tbl[i].val, par);
+#if defined (CONFIG_FB_ATY_GENERIC_LCD)
+	{
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(lcd_tbl); i++)
+			aty_st_lcd(lcd_tbl[i].lcd_reg, lcd_tbl[i].val, par);
 	}
+#endif
 
 	aty_st_le16(CONFIG_STAT0, 0x00A4, par);
 	mdelay(10);
diff --git a/drivers/video/fbcvt.c b/drivers/video/fbcvt.c
index cfa61b512de..0b6af00d197 100644
--- a/drivers/video/fbcvt.c
+++ b/drivers/video/fbcvt.c
@@ -272,11 +272,11 @@ static void fb_cvt_convert_to_mode(struct fb_cvt_data *cvt,
 {
 	mode->refresh = cvt->f_refresh;
 	mode->pixclock = KHZ2PICOS(cvt->pixclock/1000);
-	mode->left_margin = cvt->h_front_porch;
-	mode->right_margin = cvt->h_back_porch;
+	mode->left_margin = cvt->h_back_porch;
+	mode->right_margin = cvt->h_front_porch;
 	mode->hsync_len = cvt->hsync;
-	mode->upper_margin = cvt->v_front_porch;
-	mode->lower_margin = cvt->v_back_porch;
+	mode->upper_margin = cvt->v_back_porch;
+	mode->lower_margin = cvt->v_front_porch;
 	mode->vsync_len = cvt->vsync;
 
 	mode->sync &= ~(FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT);
diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index 3620de0f252..a7f020ada63 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -893,7 +893,7 @@ static int nvidiafb_cursor(struct fb_info *info, struct fb_cursor *cursor)
 	int i, set = cursor->set;
 	u16 fg, bg;
 
-	if (!hwcur || cursor->image.width > MAX_CURS || cursor->image.height > MAX_CURS)
+	if (cursor->image.width > MAX_CURS || cursor->image.height > MAX_CURS)
 		return -ENXIO;
 
 	NVShowHideCursor(par, 0);
@@ -1356,6 +1356,9 @@ static int __devinit nvidia_set_fbinfo(struct fb_info *info)
 	info->pixmap.size = 8 * 1024;
 	info->pixmap.flags = FB_PIXMAP_SYSTEM;
 
+	if (!hwcur)
+	    info->fbops->fb_cursor = soft_cursor;
+
 	info->var.accel_flags = (!noaccel);
 
 	switch (par->Architecture) {
diff --git a/fs/9p/conv.c b/fs/9p/conv.c
index 1554731bd65..18121af99d3 100644
--- a/fs/9p/conv.c
+++ b/fs/9p/conv.c
@@ -3,6 +3,7 @@
  *
  * 9P protocol conversion functions
  *
+ *  Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
@@ -55,66 +56,70 @@ static inline int buf_check_overflow(struct cbuf *buf)
 	return buf->p > buf->ep;
 }
 
-static inline void buf_check_size(struct cbuf *buf, int len)
+static inline int buf_check_size(struct cbuf *buf, int len)
 {
 	if (buf->p+len > buf->ep) {
 		if (buf->p < buf->ep) {
 			eprintk(KERN_ERR, "buffer overflow\n");
 			buf->p = buf->ep + 1;
+			return 0;
 		}
 	}
+
+	return 1;
 }
 
 static inline void *buf_alloc(struct cbuf *buf, int len)
 {
 	void *ret = NULL;
 
-	buf_check_size(buf, len);
-	ret = buf->p;
-	buf->p += len;
+	if (buf_check_size(buf, len)) {
+		ret = buf->p;
+		buf->p += len;
+	}
 
 	return ret;
 }
 
 static inline void buf_put_int8(struct cbuf *buf, u8 val)
 {
-	buf_check_size(buf, 1);
-
-	buf->p[0] = val;
-	buf->p++;
+	if (buf_check_size(buf, 1)) {
+		buf->p[0] = val;
+		buf->p++;
+	}
 }
 
 static inline void buf_put_int16(struct cbuf *buf, u16 val)
 {
-	buf_check_size(buf, 2);
-
-	*(__le16 *) buf->p = cpu_to_le16(val);
-	buf->p += 2;
+	if (buf_check_size(buf, 2)) {
+		*(__le16 *) buf->p = cpu_to_le16(val);
+		buf->p += 2;
+	}
 }
 
 static inline void buf_put_int32(struct cbuf *buf, u32 val)
 {
-	buf_check_size(buf, 4);
-
-	*(__le32 *)buf->p = cpu_to_le32(val);
-	buf->p += 4;
+	if (buf_check_size(buf, 4)) {
+		*(__le32 *)buf->p = cpu_to_le32(val);
+		buf->p += 4;
+	}
 }
 
 static inline void buf_put_int64(struct cbuf *buf, u64 val)
 {
-	buf_check_size(buf, 8);
-
-	*(__le64 *)buf->p = cpu_to_le64(val);
-	buf->p += 8;
+	if (buf_check_size(buf, 8)) {
+		*(__le64 *)buf->p = cpu_to_le64(val);
+		buf->p += 8;
+	}
 }
 
 static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen)
 {
-	buf_check_size(buf, slen + 2);
-
-	buf_put_int16(buf, slen);
-	memcpy(buf->p, s, slen);
-	buf->p += slen;
+	if (buf_check_size(buf, slen + 2)) {
+		buf_put_int16(buf, slen);
+		memcpy(buf->p, s, slen);
+		buf->p += slen;
+	}
 }
 
 static inline void buf_put_string(struct cbuf *buf, const char *s)
@@ -124,20 +129,20 @@ static inline void buf_put_string(struct cbuf *buf, const char *s)
 
 static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
 {
-	buf_check_size(buf, datalen);
-
-	memcpy(buf->p, data, datalen);
-	buf->p += datalen;
+	if (buf_check_size(buf, datalen)) {
+		memcpy(buf->p, data, datalen);
+		buf->p += datalen;
+	}
 }
 
 static inline u8 buf_get_int8(struct cbuf *buf)
 {
 	u8 ret = 0;
 
-	buf_check_size(buf, 1);
-	ret = buf->p[0];
-
-	buf->p++;
+	if (buf_check_size(buf, 1)) {
+		ret = buf->p[0];
+		buf->p++;
+	}
 
 	return ret;
 }
@@ -146,10 +151,10 @@ static inline u16 buf_get_int16(struct cbuf *buf)
 {
 	u16 ret = 0;
 
-	buf_check_size(buf, 2);
-	ret = le16_to_cpu(*(__le16 *)buf->p);
-
-	buf->p += 2;
+	if (buf_check_size(buf, 2)) {
+		ret = le16_to_cpu(*(__le16 *)buf->p);
+		buf->p += 2;
+	}
 
 	return ret;
 }
@@ -158,10 +163,10 @@ static inline u32 buf_get_int32(struct cbuf *buf)
 {
 	u32 ret = 0;
 
-	buf_check_size(buf, 4);
-	ret = le32_to_cpu(*(__le32 *)buf->p);
-
-	buf->p += 4;
+	if (buf_check_size(buf, 4)) {
+		ret = le32_to_cpu(*(__le32 *)buf->p);
+		buf->p += 4;
+	}
 
 	return ret;
 }
@@ -170,10 +175,10 @@ static inline u64 buf_get_int64(struct cbuf *buf)
 {
 	u64 ret = 0;
 
-	buf_check_size(buf, 8);
-	ret = le64_to_cpu(*(__le64 *)buf->p);
-
-	buf->p += 8;
+	if (buf_check_size(buf, 8)) {
+		ret = le64_to_cpu(*(__le64 *)buf->p);
+		buf->p += 8;
+	}
 
 	return ret;
 }
@@ -181,27 +186,35 @@ static inline u64 buf_get_int64(struct cbuf *buf)
 static inline int
 buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
 {
+	u16 len = 0;
+
+	len = buf_get_int16(buf);
+	if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) {
+		memcpy(data, buf->p, len);
+		data[len] = 0;
+		buf->p += len;
+		len++;
+	}
 
-	u16 len = buf_get_int16(buf);
-	buf_check_size(buf, len);
-	if (len + 1 > datalen)
-		return 0;
-
-	memcpy(data, buf->p, len);
-	data[len] = 0;
-	buf->p += len;
-
-	return len + 1;
+	return len;
 }
 
 static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
 {
-	char *ret = NULL;
-	int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p);
+	char *ret;
+	u16 len;
+
+	ret = NULL;
+	len = buf_get_int16(buf);
 
-	if (n > 0) {
+	if (!buf_check_overflow(buf) && buf_check_size(buf, len) &&
+		buf_check_size(sbuf, len+1)) {
+
+		memcpy(sbuf->p, buf->p, len);
+		sbuf->p[len] = 0;
 		ret = sbuf->p;
-		sbuf->p += n;
+		buf->p += len;
+		sbuf->p += len + 1;
 	}
 
 	return ret;
@@ -209,12 +222,15 @@ static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
 
 static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
 {
-	buf_check_size(buf, datalen);
+	int ret = 0;
 
-	memcpy(data, buf->p, datalen);
-	buf->p += datalen;
+	if (buf_check_size(buf, datalen)) {
+		memcpy(data, buf->p, datalen);
+		buf->p += datalen;
+		ret = datalen;
+	}
 
-	return datalen;
+	return ret;
 }
 
 static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
@@ -223,13 +239,12 @@ static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
 	char *ret = NULL;
 	int n = 0;
 
-	buf_check_size(dbuf, datalen);
-
-	n = buf_get_data(buf, dbuf->p, datalen);
-
-	if (n > 0) {
-		ret = dbuf->p;
-		dbuf->p += n;
+	if (buf_check_size(dbuf, datalen)) {
+		n = buf_get_data(buf, dbuf->p, datalen);
+		if (n > 0) {
+			ret = dbuf->p;
+			dbuf->p += n;
+		}
 	}
 
 	return ret;
@@ -636,7 +651,7 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
 		break;
 	case RWALK:
 		rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-		rcall->params.rwalk.wqids = buf_alloc(bufp,
+		rcall->params.rwalk.wqids = buf_alloc(dbufp,
 		      rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
 		if (rcall->params.rwalk.wqids)
 			for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 13bdbbab438..82303f3bf76 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -303,7 +303,13 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 		goto SessCleanUp;
 	};
 
-	v9ses->transport = trans_proto;
+	v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL);
+	if (!v9ses->transport) {
+		retval = -ENOMEM;
+		goto SessCleanUp;
+	}
+
+	memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport));
 
 	if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) {
 		eprintk(KERN_ERR, "problem initializing transport\n");
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 0c13fc60004..b16322db5ce 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1063,8 +1063,8 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
 	int ret;
 	char *link = __getname();
 
-	if (strlen(link) < buflen)
-		buflen = strlen(link);
+	if (buflen > PATH_MAX)
+		buflen = PATH_MAX;
 
 	dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 868f350b2c5..1e2b2b54d30 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -129,8 +129,8 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
 	if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
 		dprintk(DEBUG_ERROR, "problem initiating session\n");
-		retval = newfid;
-		goto free_session;
+		kfree(v9ses);
+		return ERR_PTR(newfid);
 	}
 
 	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
@@ -150,7 +150,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
 	if (!root) {
 		retval = -ENOMEM;
-		goto release_inode;
+		goto put_back_sb;
 	}
 
 	sb->s_root = root;
@@ -159,7 +159,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 	root_fid = v9fs_fid_create(root);
 	if (root_fid == NULL) {
 		retval = -ENOMEM;
-		goto release_dentry;
+		goto put_back_sb;
 	}
 
 	root_fid->fidopen = 0;
@@ -182,25 +182,15 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
 	if (stat_result < 0) {
 		retval = stat_result;
-		goto release_dentry;
+		goto put_back_sb;
 	}
 
 	return sb;
 
-      release_dentry:
-	dput(sb->s_root);
-
-      release_inode:
-	iput(inode);
-
-      put_back_sb:
+put_back_sb:
+	/* deactivate_super calls v9fs_kill_super which will frees the rest */
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
-	v9fs_session_close(v9ses);
-
-      free_session:
-	kfree(v9ses);
-
 	return ERR_PTR(retval);
 }
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8cc23e7d0d5..1ebf7dafc1d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -781,6 +781,8 @@ static int cifs_oplock_thread(void * dummyarg)
 
 	oplockThread = current;
 	do {
+		if (try_to_freeze()) 
+			continue;
 		set_current_state(TASK_INTERRUPTIBLE);
 		
 		schedule_timeout(1*HZ);  
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2335f14a158..47360156cc5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -344,6 +344,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
 	}
 
 	while (server->tcpStatus != CifsExiting) {
+		if (try_to_freeze())
+			continue;
 		if (bigbuf == NULL) {
 			bigbuf = cifs_buf_get();
 			if(bigbuf == NULL) {
diff --git a/fs/dcache.c b/fs/dcache.c
index 7376b61269f..fb10386c59b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -102,7 +102,8 @@ static inline void dentry_iput(struct dentry * dentry)
 		list_del_init(&dentry->d_alias);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&dcache_lock);
-		fsnotify_inoderemove(inode);
+		if (!inode->i_nlink)
+			fsnotify_inoderemove(inode);
 		if (dentry->d_op && dentry->d_op->d_iput)
 			dentry->d_op->d_iput(dentry, inode);
 		else
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index e463dca008e..0213db4911a 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1410,7 +1410,7 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 	unsigned long desc_count;
 	struct ext3_group_desc *gdp;
 	int i;
-	unsigned long ngroups;
+	unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
 #ifdef EXT3FS_DEBUG
 	struct ext3_super_block *es;
 	unsigned long bitmap_count, x;
@@ -1421,7 +1421,8 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+
+	for (i = 0; i < ngroups; i++) {
 		gdp = ext3_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
@@ -1443,7 +1444,6 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
 	return bitmap_count;
 #else
 	desc_count = 0;
-	ngroups = EXT3_SB(sb)->s_groups_count;
 	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
 		gdp = ext3_get_group_desc(sb, i, NULL);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 2c9f81278d5..57f79106267 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -242,7 +242,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 	     i < sbi->s_itb_per_group; i++, bit++, block++) {
 		struct buffer_head *it;
 
-		ext3_debug("clear inode block %#04x (+%ld)\n", block, bit);
+		ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
 		if (IS_ERR(it = bclean(handle, sb, block))) {
 			err = PTR_ERR(it);
 			goto exit_bh;
@@ -643,8 +643,8 @@ static void update_backups(struct super_block *sb,
 			break;
 
 		bh = sb_getblk(sb, group * bpg + blk_off);
-		ext3_debug(sb, __FUNCTION__, "update metadata backup %#04lx\n",
-			   bh->b_blocknr);
+		ext3_debug("update metadata backup %#04lx\n",
+			  (unsigned long)bh->b_blocknr);
 		if ((err = ext3_journal_get_write_access(handle, bh)))
 			break;
 		lock_buffer(bh);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a93c3609025..9e24ceb019f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -512,15 +512,14 @@ static void ext3_clear_inode(struct inode *inode)
 
 static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb);
+	struct super_block *sb = vfs->mnt_sb;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA)
+	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
 		seq_puts(seq, ",data=journal");
-
-	if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA)
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
 		seq_puts(seq, ",data=ordered");
-
-	if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA)
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
 		seq_puts(seq, ",data=writeback");
 
 #if defined(CONFIG_QUOTA)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 51b1d15d9d5..e2effe2dc9b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -300,9 +300,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 	inode->i_blksize = sbi->cluster_size;
 	inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
 			   & ~((loff_t)sbi->cluster_size - 1)) >> 9;
-	inode->i_mtime.tv_sec = inode->i_atime.tv_sec =
+	inode->i_mtime.tv_sec =
 		date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
-	inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0;
+	inode->i_mtime.tv_nsec = 0;
 	if (sbi->options.isvfat) {
 		int secs = de->ctime_cs / 100;
 		int csecs = de->ctime_cs % 100;
@@ -310,8 +310,11 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
 			date_dos2unix(le16_to_cpu(de->ctime),
 				      le16_to_cpu(de->cdate)) + secs;
 		inode->i_ctime.tv_nsec = csecs * 10000000;
+		inode->i_atime.tv_sec =
+			date_dos2unix(le16_to_cpu(0), le16_to_cpu(de->adate));
+		inode->i_atime.tv_nsec = 0;
 	} else
-		inode->i_ctime = inode->i_mtime;
+		inode->i_ctime = inode->i_atime = inode->i_mtime;
 
 	return 0;
 }
@@ -513,7 +516,9 @@ retry:
 	raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
 	fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
 	if (sbi->options.isvfat) {
+		__le16 atime;
 		fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
+		fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
 		raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
 			inode->i_ctime.tv_nsec / 10000000;
 	}
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 0ec62d5310d..9f942ca8e4e 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -129,8 +129,7 @@ void jfs_delete_inode(struct inode *inode)
 	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
 
 	if (!is_bad_inode(inode) &&
-	    (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) {
-
+	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
 		truncate_inode_pages(&inode->i_data, 0);
 
 		if (test_cflag(COMMIT_Freewmap, inode))
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index c739626f5bf..eadf319bee2 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -3055,7 +3055,7 @@ static int cntlz(u32 value)
  * RETURN VALUES:
  *      log2 number of blocks
  */
-int blkstol2(s64 nb)
+static int blkstol2(s64 nb)
 {
 	int l2nb;
 	s64 mask;		/* meant to be signed */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index c7a92f9deb2..9b71ed2674f 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -725,6 +725,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 	else
 		tlck->flag = tlckINODELOCK;
 
+	if (S_ISDIR(ip->i_mode))
+		tlck->flag |= tlckDIRECTORY;
+
 	tlck->type = 0;
 
 	/* bind the tlock and the page */
@@ -1009,6 +1012,8 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 
 	/* bind the tlock and the object */
 	tlck->flag = tlckINODELOCK;
+	if (S_ISDIR(ip->i_mode))
+		tlck->flag |= tlckDIRECTORY;
 	tlck->ip = ip;
 	tlck->mp = NULL;
 
@@ -1077,6 +1082,8 @@ struct linelock *txLinelock(struct linelock * tlock)
 	linelock->flag = tlckLINELOCK;
 	linelock->maxcnt = TLOCKLONG;
 	linelock->index = 0;
+	if (tlck->flag & tlckDIRECTORY)
+		linelock->flag |= tlckDIRECTORY;
 
 	/* append linelock after tlock */
 	linelock->next = tlock->next;
@@ -2070,8 +2077,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
  *
  * function:    log from maplock of freed data extents;
  */
-void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
-	    struct tlock * tlck)
+static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
+		   struct tlock * tlck)
 {
 	struct pxd_lock *pxdlock;
 	int i, nlock;
@@ -2209,7 +2216,7 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
  * function: synchronously write pages locked by transaction
  *              after txLog() but before txUpdateMap();
  */
-void txForce(struct tblock * tblk)
+static void txForce(struct tblock * tblk)
 {
 	struct tlock *tlck;
 	lid_t lid, next;
@@ -2358,7 +2365,7 @@ static void txUpdateMap(struct tblock * tblk)
 			 */
 			else {	/* (maplock->flag & mlckFREE) */
 
-				if (S_ISDIR(tlck->ip->i_mode))
+				if (tlck->flag & tlckDIRECTORY)
 					txFreeMap(ipimap, maplock,
 						  tblk, COMMIT_PWMAP);
 				else
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 59ad0f6b723..0e4dc4514c4 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -122,6 +122,7 @@ extern struct tlock *TxLock;	/* transaction lock table */
 #define tlckLOG			0x0800
 /* updateMap state */
 #define	tlckUPDATEMAP		0x0080
+#define	tlckDIRECTORY		0x0040
 /* freeLock state */
 #define tlckFREELOCK		0x0008
 #define tlckWRITEPAGE		0x0004
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6ceb1d471f2..9758ebd4990 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -184,14 +184,13 @@ static void nfs_readpage_release(struct nfs_page *req)
 {
 	unlock_page(req->wb_page);
 
-	nfs_clear_request(req);
-	nfs_release_request(req);
-
 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
 			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
+	nfs_clear_request(req);
+	nfs_release_request(req);
 }
 
 /*
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 49eafbdb15c..c7e9237379c 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -92,6 +92,8 @@ ToDo/Notes:
 	  an octal number to conform to how chmod(1) works, too.  Thanks to
 	  Giuseppe Bilotta and Horst von Brand for pointing out the errors of
 	  my ways.
+	- Fix various bugs in the runlist merging code.  (Based on libntfs
+	  changes by Richard Russon.)
 
 2.1.23 - Implement extension of resident files and make writing safe as well as
 	 many bug fixes, cleanups, and enhancements...
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index b6cc8cf2462..5e80c07c6a4 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -59,39 +59,49 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 	unsigned long flags;
 	struct buffer_head *first, *tmp;
 	struct page *page;
+	struct inode *vi;
 	ntfs_inode *ni;
 	int page_uptodate = 1;
 
 	page = bh->b_page;
-	ni = NTFS_I(page->mapping->host);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
 
 	if (likely(uptodate)) {
-		s64 file_ofs, initialized_size;
+		loff_t i_size;
+		s64 file_ofs, init_size;
 
 		set_buffer_uptodate(bh);
 
 		file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
 				bh_offset(bh);
 		read_lock_irqsave(&ni->size_lock, flags);
-		initialized_size = ni->initialized_size;
+		init_size = ni->initialized_size;
+		i_size = i_size_read(vi);
 		read_unlock_irqrestore(&ni->size_lock, flags);
+		if (unlikely(init_size > i_size)) {
+			/* Race with shrinking truncate. */
+			init_size = i_size;
+		}
 		/* Check for the current buffer head overflowing. */
-		if (file_ofs + bh->b_size > initialized_size) {
-			char *addr;
-			int ofs = 0;
-
-			if (file_ofs < initialized_size)
-				ofs = initialized_size - file_ofs;
-			addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
-			memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
+		if (unlikely(file_ofs + bh->b_size > init_size)) {
+			u8 *kaddr;
+			int ofs;
+
+			ofs = 0;
+			if (file_ofs < init_size)
+				ofs = init_size - file_ofs;
+			kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+			memset(kaddr + bh_offset(bh) + ofs, 0,
+					bh->b_size - ofs);
+			kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
 			flush_dcache_page(page);
-			kunmap_atomic(addr, KM_BIO_SRC_IRQ);
 		}
 	} else {
 		clear_buffer_uptodate(bh);
 		SetPageError(page);
-		ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
-				(unsigned long long)bh->b_blocknr);
+		ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
+				"0x%llx.", (unsigned long long)bh->b_blocknr);
 	}
 	first = page_buffers(page);
 	local_irq_save(flags);
@@ -124,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		if (likely(page_uptodate && !PageError(page)))
 			SetPageUptodate(page);
 	} else {
-		char *addr;
+		u8 *kaddr;
 		unsigned int i, recs;
 		u32 rec_size;
 
@@ -132,12 +142,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		recs = PAGE_CACHE_SIZE / rec_size;
 		/* Should have been verified before we got here... */
 		BUG_ON(!recs);
-		addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+		kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
 		for (i = 0; i < recs; i++)
-			post_read_mst_fixup((NTFS_RECORD*)(addr +
+			post_read_mst_fixup((NTFS_RECORD*)(kaddr +
 					i * rec_size), rec_size);
+		kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
 		flush_dcache_page(page);
-		kunmap_atomic(addr, KM_BIO_SRC_IRQ);
 		if (likely(page_uptodate && !PageError(page)))
 			SetPageUptodate(page);
 	}
@@ -168,8 +178,11 @@ still_busy:
  */
 static int ntfs_read_block(struct page *page)
 {
+	loff_t i_size;
 	VCN vcn;
 	LCN lcn;
+	s64 init_size;
+	struct inode *vi;
 	ntfs_inode *ni;
 	ntfs_volume *vol;
 	runlist_element *rl;
@@ -180,7 +193,8 @@ static int ntfs_read_block(struct page *page)
 	int i, nr;
 	unsigned char blocksize_bits;
 
-	ni = NTFS_I(page->mapping->host);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
 	vol = ni->vol;
 
 	/* $MFT/$DATA must have its complete runlist in memory at all times. */
@@ -199,11 +213,28 @@ static int ntfs_read_block(struct page *page)
 	bh = head = page_buffers(page);
 	BUG_ON(!bh);
 
+	/*
+	 * We may be racing with truncate.  To avoid some of the problems we
+	 * now take a snapshot of the various sizes and use those for the whole
+	 * of the function.  In case of an extending truncate it just means we
+	 * may leave some buffers unmapped which are now allocated.  This is
+	 * not a problem since these buffers will just get mapped when a write
+	 * occurs.  In case of a shrinking truncate, we will detect this later
+	 * on due to the runlist being incomplete and if the page is being
+	 * fully truncated, truncate will throw it away as soon as we unlock
+	 * it so no need to worry what we do with it.
+	 */
 	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
 	read_lock_irqsave(&ni->size_lock, flags);
 	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
-	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
+	init_size = ni->initialized_size;
+	i_size = i_size_read(vi);
 	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (unlikely(init_size > i_size)) {
+		/* Race with shrinking truncate. */
+		init_size = i_size;
+	}
+	zblock = (init_size + blocksize - 1) >> blocksize_bits;
 
 	/* Loop through all the buffers in the page. */
 	rl = NULL;
@@ -366,6 +397,8 @@ handle_zblock:
  */
 static int ntfs_readpage(struct file *file, struct page *page)
 {
+	loff_t i_size;
+	struct inode *vi;
 	ntfs_inode *ni, *base_ni;
 	u8 *kaddr;
 	ntfs_attr_search_ctx *ctx;
@@ -384,14 +417,17 @@ retry_readpage:
 		unlock_page(page);
 		return 0;
 	}
-	ni = NTFS_I(page->mapping->host);
+	vi = page->mapping->host;
+	ni = NTFS_I(vi);
 	/*
 	 * Only $DATA attributes can be encrypted and only unnamed $DATA
 	 * attributes can be compressed.  Index root can have the flags set but
 	 * this means to create compressed/encrypted files, not that the
-	 * attribute is compressed/encrypted.
+	 * attribute is compressed/encrypted.  Note we need to check for
+	 * AT_INDEX_ALLOCATION since this is the type of both directory and
+	 * index inodes.
 	 */
-	if (ni->type != AT_INDEX_ROOT) {
+	if (ni->type != AT_INDEX_ALLOCATION) {
 		/* If attribute is encrypted, deny access, just like NT4. */
 		if (NInoEncrypted(ni)) {
 			BUG_ON(ni->type != AT_DATA);
@@ -456,7 +492,12 @@ retry_readpage:
 	read_lock_irqsave(&ni->size_lock, flags);
 	if (unlikely(attr_len > ni->initialized_size))
 		attr_len = ni->initialized_size;
+	i_size = i_size_read(vi);
 	read_unlock_irqrestore(&ni->size_lock, flags);
+	if (unlikely(attr_len > i_size)) {
+		/* Race with shrinking truncate. */
+		attr_len = i_size;
+	}
 	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data to the page. */
 	memcpy(kaddr, (u8*)ctx->attr +
@@ -1341,9 +1382,11 @@ retry_writepage:
 	 * Only $DATA attributes can be encrypted and only unnamed $DATA
 	 * attributes can be compressed.  Index root can have the flags set but
 	 * this means to create compressed/encrypted files, not that the
-	 * attribute is compressed/encrypted.
+	 * attribute is compressed/encrypted.  Note we need to check for
+	 * AT_INDEX_ALLOCATION since this is the type of both directory and
+	 * index inodes.
 	 */
-	if (ni->type != AT_INDEX_ROOT) {
+	if (ni->type != AT_INDEX_ALLOCATION) {
 		/* If file is encrypted, deny access, just like NT4. */
 		if (NInoEncrypted(ni)) {
 			unlock_page(page);
@@ -1379,8 +1422,8 @@ retry_writepage:
 			unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
 			kaddr = kmap_atomic(page, KM_USER0);
 			memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
-			flush_dcache_page(page);
 			kunmap_atomic(kaddr, KM_USER0);
+			flush_dcache_page(page);
 		}
 		/* Handle mst protected attributes. */
 		if (NInoMstProtected(ni))
@@ -1443,34 +1486,33 @@ retry_writepage:
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
 	unlock_page(page);
-	/*
-	 * Here, we do not need to zero the out of bounds area everytime
-	 * because the below memcpy() already takes care of the
-	 * mmap-at-end-of-file requirements.  If the file is converted to a
-	 * non-resident one, then the code path use is switched to the
-	 * non-resident one where the zeroing happens on each ntfs_writepage()
-	 * invocation.
-	 */
 	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
 	i_size = i_size_read(vi);
 	if (unlikely(attr_len > i_size)) {
+		/* Race with shrinking truncate or a failed truncate. */
 		attr_len = i_size;
-		ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
+		/*
+		 * If the truncate failed, fix it up now.  If a concurrent
+		 * truncate, we do its job, so it does not have to do anything.
+		 */
+		err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
+				attr_len);
+		/* Shrinking cannot fail. */
+		BUG_ON(err);
 	}
 	kaddr = kmap_atomic(page, KM_USER0);
 	/* Copy the data from the page to the mft record. */
 	memcpy((u8*)ctx->attr +
 			le16_to_cpu(ctx->attr->data.resident.value_offset),
 			kaddr, attr_len);
-	flush_dcache_mft_record_page(ctx->ntfs_ino);
 	/* Zero out of bounds area in the page cache page. */
 	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
-	flush_dcache_page(page);
 	kunmap_atomic(kaddr, KM_USER0);
-
+	flush_dcache_mft_record_page(ctx->ntfs_ino);
+	flush_dcache_page(page);
+	/* We are done with the page. */
 	end_page_writeback(page);
-
-	/* Mark the mft record dirty, so it gets written back. */
+	/* Finally, mark the mft record dirty, so it gets written back. */
 	mark_mft_record_dirty(ctx->ntfs_ino);
 	ntfs_attr_put_search_ctx(ctx);
 	unmap_mft_record(base_ni);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index dc4bbe3acf5..7ec04513180 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1166,6 +1166,8 @@ err_out:
  *
  * Return 0 on success and -errno on error.  In the error case, the inode will
  * have had make_bad_inode() executed on it.
+ *
+ * Note this cannot be called for AT_INDEX_ALLOCATION.
  */
 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 {
@@ -1242,8 +1244,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 			}
 		}
 		/*
-		 * The encryption flag set in an index root just means to
-		 * compress all files.
+		 * The compressed/sparse flag set in an index root just means
+		 * to compress all files.
 		 */
 		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
 			ntfs_error(vi->i_sb, "Found mst protected attribute "
@@ -1319,8 +1321,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 					"the mapping pairs array.");
 			goto unm_err_out;
 		}
-		if ((NInoCompressed(ni) || NInoSparse(ni)) &&
-				ni->type != AT_INDEX_ROOT) {
+		if (NInoCompressed(ni) || NInoSparse(ni)) {
 			if (a->data.non_resident.compression_unit != 4) {
 				ntfs_error(vi->i_sb, "Found nonstandard "
 						"compression unit (%u instead "
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index 3288bcc2c4a..006946efca8 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -1,7 +1,7 @@
 /*
  * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
  *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index f5b2ac92908..061b5ff6b73 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -2,7 +2,7 @@
  * runlist.c - NTFS runlist handling code.  Part of the Linux-NTFS project.
  *
  * Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
+ * Copyright (c) 2002-2005 Richard Russon
  *
  * This program/include file is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as published
@@ -158,17 +158,21 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst,
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	if ((dst->lcn < 0) || (src->lcn < 0)) {   /* Are we merging holes? */
-		if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE)
-			return TRUE;
+	/* We can merge unmapped regions even if they are misaligned. */
+	if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED))
+		return TRUE;
+	/* If the runs are misaligned, we cannot merge them. */
+	if ((dst->vcn + dst->length) != src->vcn)
 		return FALSE;
-	}
-	if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */
-		return FALSE;
-	if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */
-		return FALSE;
-
-	return TRUE;
+	/* If both runs are non-sparse and contiguous, we can merge them. */
+	if ((dst->lcn >= 0) && (src->lcn >= 0) &&
+			((dst->lcn + dst->length) == src->lcn))
+		return TRUE;
+	/* If we are merging two holes, we can merge them. */
+	if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE))
+		return TRUE;
+	/* Cannot merge. */
+	return FALSE;
 }
 
 /**
@@ -214,14 +218,15 @@ static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src)
 static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL right;
-	int magic;
+	BOOL right = FALSE;	/* Right end of @src needs merging. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
 	/* First, check if the right hand end needs merging. */
-	right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+	if ((loc + 1) < dsize)
+		right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
 
 	/* Space required: @dst size + @src size, less one if we merged. */
 	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right);
@@ -236,18 +241,19 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 	if (right)
 		__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
 
-	magic = loc + ssize;
+	/* First run after the @src runs that have been inserted. */
+	marker = loc + ssize + 1;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right);
+	ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right));
 	ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
 
 	/* Adjust the size of the preceding hole. */
 	dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
 
 	/* We may have changed the length of the file, so fix the end marker */
-	if (dst[magic + 1].lcn == LCN_ENOENT)
-		dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length;
+	if (dst[marker].lcn == LCN_ENOENT)
+		dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 
 	return dst;
 }
@@ -279,18 +285,17 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL left = FALSE;
-	BOOL disc = FALSE;	/* Discontinuity */
-	BOOL hole = FALSE;	/* Following a hole */
-	int magic;
+	BOOL left = FALSE;	/* Left end of @src needs merging. */
+	BOOL disc = FALSE;	/* Discontinuity between @dst and @src. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	/* disc => Discontinuity between the end of @dst and the start of @src.
-	 *	   This means we might need to insert a hole.
-	 * hole => @dst ends with a hole or an unmapped region which we can
-	 *	   extend to match the discontinuity. */
+	/*
+	 * disc => Discontinuity between the end of @dst and the start of @src.
+	 *	   This means we might need to insert a "not mapped" run.
+	 */
 	if (loc == 0)
 		disc = (src[0].vcn > 0);
 	else {
@@ -303,58 +308,49 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 			merged_length += src->length;
 
 		disc = (src[0].vcn > dst[loc - 1].vcn + merged_length);
-		if (disc)
-			hole = (dst[loc - 1].lcn == LCN_HOLE);
 	}
-
-	/* Space required: @dst size + @src size, less one if we merged, plus
-	 * one if there was a discontinuity, less one for a trailing hole. */
-	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole);
+	/*
+	 * Space required: @dst size + @src size, less one if we merged, plus
+	 * one if there was a discontinuity.
+	 */
+	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc);
 	if (IS_ERR(dst))
 		return dst;
 	/*
 	 * We are guaranteed to succeed from here so can start modifying the
 	 * original runlist.
 	 */
-
 	if (left)
 		__ntfs_rl_merge(dst + loc - 1, src);
-
-	magic = loc + ssize - left + disc - hole;
+	/*
+	 * First run after the @src runs that have been inserted.
+	 * Nominally,  @marker equals @loc + @ssize, i.e. location + number of
+	 * runs in @src.  However, if @left, then the first run in @src has
+	 * been merged with one in @dst.  And if @disc, then @dst and @src do
+	 * not meet and we need an extra run to fill the gap.
+	 */
+	marker = loc + ssize - left + disc;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic, loc, dsize - loc);
-	ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left);
+	ntfs_rl_mm(dst, marker, loc, dsize - loc);
+	ntfs_rl_mc(dst, loc + disc, src, left, ssize - left);
 
-	/* Adjust the VCN of the last run ... */
-	if (dst[magic].lcn <= LCN_HOLE)
-		dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+	/* Adjust the VCN of the first run after the insertion... */
+	dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 	/* ... and the length. */
-	if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED)
-		dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn;
+	if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED)
+		dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn;
 
-	/* Writing beyond the end of the file and there's a discontinuity. */
+	/* Writing beyond the end of the file and there is a discontinuity. */
 	if (disc) {
-		if (hole)
-			dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn;
-		else {
-			if (loc > 0) {
-				dst[loc].vcn = dst[loc - 1].vcn +
-						dst[loc - 1].length;
-				dst[loc].length = dst[loc + 1].vcn -
-						dst[loc].vcn;
-			} else {
-				dst[loc].vcn = 0;
-				dst[loc].length = dst[loc + 1].vcn;
-			}
-			dst[loc].lcn = LCN_RL_NOT_MAPPED;
+		if (loc > 0) {
+			dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length;
+			dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
+		} else {
+			dst[loc].vcn = 0;
+			dst[loc].length = dst[loc + 1].vcn;
 		}
-
-		magic += hole;
-
-		if (dst[magic].lcn == LCN_ENOENT)
-			dst[magic].vcn = dst[magic - 1].vcn +
-					dst[magic - 1].length;
+		dst[loc].lcn = LCN_RL_NOT_MAPPED;
 	}
 	return dst;
 }
@@ -385,20 +381,23 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 		int dsize, runlist_element *src, int ssize, int loc)
 {
-	BOOL left = FALSE;
-	BOOL right;
-	int magic;
+	BOOL left = FALSE;	/* Left end of @src needs merging. */
+	BOOL right = FALSE;	/* Right end of @src needs merging. */
+	int tail;		/* Start of tail of @dst. */
+	int marker;		/* End of the inserted runs. */
 
 	BUG_ON(!dst);
 	BUG_ON(!src);
 
-	/* First, merge the left and right ends, if necessary. */
-	right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+	/* First, see if the left and right ends need merging. */
+	if ((loc + 1) < dsize)
+		right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
 	if (loc > 0)
 		left = ntfs_are_rl_mergeable(dst + loc - 1, src);
-
-	/* Allocate some space. We'll need less if the left, right, or both
-	 * ends were merged. */
+	/*
+	 * Allocate some space.  We will need less if the left, right, or both
+	 * ends get merged.
+	 */
 	dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right);
 	if (IS_ERR(dst))
 		return dst;
@@ -406,21 +405,37 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
 	 * We are guaranteed to succeed from here so can start modifying the
 	 * original runlists.
 	 */
+
+	/* First, merge the left and right ends, if necessary. */
 	if (right)
 		__ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
 	if (left)
 		__ntfs_rl_merge(dst + loc - 1, src);
-
-	/* FIXME: What does this mean? (AIA) */
-	magic = loc + ssize - left;
+	/*
+	 * Offset of the tail of @dst.  This needs to be moved out of the way
+	 * to make space for the runs to be copied from @src, i.e. the first
+	 * run of the tail of @dst.
+	 * Nominally, @tail equals @loc + 1, i.e. location, skipping the
+	 * replaced run.  However, if @right, then one of @dst's runs is
+	 * already merged into @src.
+	 */
+	tail = loc + right + 1;
+	/*
+	 * First run after the @src runs that have been inserted, i.e. where
+	 * the tail of @dst needs to be moved to.
+	 * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+	 * runs in @src.  However, if @left, then the first run in @src has
+	 * been merged with one in @dst.
+	 */
+	marker = loc + ssize - left;
 
 	/* Move the tail of @dst out of the way, then copy in @src. */
-	ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1);
+	ntfs_rl_mm(dst, marker, tail, dsize - tail);
 	ntfs_rl_mc(dst, loc, src, left, ssize - left);
 
-	/* We may have changed the length of the file, so fix the end marker */
-	if (dst[magic].lcn == LCN_ENOENT)
-		dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+	/* We may have changed the length of the file, so fix the end marker. */
+	if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT)
+		dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
 	return dst;
 }
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 23db452ab42..3b33f94020d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -340,6 +340,54 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
 	return result;
 }
 
+
+/* Same as proc_root_link, but this addionally tries to get fs from other
+ * threads in the group */
+static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
+				struct vfsmount **mnt)
+{
+	struct fs_struct *fs;
+	int result = -ENOENT;
+	struct task_struct *leader = proc_task(inode);
+
+	task_lock(leader);
+	fs = leader->fs;
+	if (fs) {
+		atomic_inc(&fs->count);
+		task_unlock(leader);
+	} else {
+		/* Try to get fs from other threads */
+		task_unlock(leader);
+		read_lock(&tasklist_lock);
+		if (pid_alive(leader)) {
+			struct task_struct *task = leader;
+
+			while ((task = next_thread(task)) != leader) {
+				task_lock(task);
+				fs = task->fs;
+				if (fs) {
+					atomic_inc(&fs->count);
+					task_unlock(task);
+					break;
+				}
+				task_unlock(task);
+			}
+		}
+		read_unlock(&tasklist_lock);
+	}
+
+	if (fs) {
+		read_lock(&fs->lock);
+		*mnt = mntget(fs->rootmnt);
+		*dentry = dget(fs->root);
+		read_unlock(&fs->lock);
+		result = 0;
+		put_fs_struct(fs);
+	}
+	return result;
+}
+
+
 #define MAY_PTRACE(task) \
 	(task == current || \
 	(task->parent == current && \
@@ -471,14 +519,14 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 
 /* permission checks */
 
-static int proc_check_root(struct inode *inode)
+/* If the process being read is separated by chroot from the reading process,
+ * don't let the reader access the threads.
+ */
+static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
 {
-	struct dentry *de, *base, *root;
-	struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
+	struct dentry *de, *base;
+	struct vfsmount *our_vfsmnt, *mnt;
 	int res = 0;
-
-	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
-		return -ENOENT;
 	read_lock(&current->fs->lock);
 	our_vfsmnt = mntget(current->fs->rootmnt);
 	base = dget(current->fs->root);
@@ -511,6 +559,16 @@ out:
 	goto exit;
 }
 
+static int proc_check_root(struct inode *inode)
+{
+	struct dentry *root;
+	struct vfsmount *vfsmnt;
+
+	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
+		return -ENOENT;
+	return proc_check_chroot(root, vfsmnt);
+}
+
 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
 	if (generic_permission(inode, mask, NULL) != 0)
@@ -518,6 +576,20 @@ static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
 	return proc_check_root(inode);
 }
 
+static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	struct dentry *root;
+	struct vfsmount *vfsmnt;
+
+	if (generic_permission(inode, mask, NULL) != 0)
+		return -EACCES;
+
+	if (proc_task_root_link(inode, &root, &vfsmnt))
+		return -ENOENT;
+
+	return proc_check_chroot(root, vfsmnt);
+}
+
 extern struct seq_operations proc_pid_maps_op;
 static int maps_open(struct inode *inode, struct file *file)
 {
@@ -1419,7 +1491,7 @@ static struct inode_operations proc_fd_inode_operations = {
 
 static struct inode_operations proc_task_inode_operations = {
 	.lookup		= proc_task_lookup,
-	.permission	= proc_permission,
+	.permission	= proc_task_permission,
 };
 
 #ifdef CONFIG_SECURITY
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index a3453555a94..5b6b0b6038a 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -629,12 +629,4 @@ void __init proc_misc_init(void)
 	if (entry)
 		entry->proc_fops = &proc_sysrq_trigger_operations;
 #endif
-#ifdef CONFIG_PPC32
-	{
-		extern struct file_operations ppc_htab_operations;
-		entry = create_proc_entry("ppc_htab", S_IRUGO|S_IWUSR, NULL);
-		if (entry)
-			entry->proc_fops = &ppc_htab_operations;
-	}
-#endif
 }
diff --git a/include/asm-alpha/compiler.h b/include/asm-alpha/compiler.h
index 399c33b7be5..0a4a8b40dfc 100644
--- a/include/asm-alpha/compiler.h
+++ b/include/asm-alpha/compiler.h
@@ -98,6 +98,9 @@
 #undef inline
 #undef __inline__
 #undef __inline
-
+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3
+#undef __always_inline
+#define __always_inline		inline __attribute__((always_inline))
+#endif
 
 #endif /* __ALPHA_COMPILER_H */
diff --git a/include/asm-alpha/futex.h b/include/asm-alpha/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-alpha/futex.h
+++ b/include/asm-alpha/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-arm/futex.h b/include/asm-arm/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-arm/futex.h
+++ b/include/asm-arm/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-arm/io.h b/include/asm-arm/io.h
index cfa71a0dffb..5c4ae8f5dbb 100644
--- a/include/asm-arm/io.h
+++ b/include/asm-arm/io.h
@@ -136,9 +136,9 @@ extern void __readwrite_bug(const char *fn);
 /*
  * String version of IO memory access ops:
  */
-extern void _memcpy_fromio(void *, void __iomem *, size_t);
-extern void _memcpy_toio(void __iomem *, const void *, size_t);
-extern void _memset_io(void __iomem *, int, size_t);
+extern void _memcpy_fromio(void *, const volatile void __iomem *, size_t);
+extern void _memcpy_toio(volatile void __iomem *, const void *, size_t);
+extern void _memset_io(volatile void __iomem *, int, size_t);
 
 #define mmiowb()
 
diff --git a/include/asm-arm/mach/arch.h b/include/asm-arm/mach/arch.h
index 56c6bf4ab0c..4fa95084a8c 100644
--- a/include/asm-arm/mach/arch.h
+++ b/include/asm-arm/mach/arch.h
@@ -50,7 +50,7 @@ struct machine_desc {
  */
 #define MACHINE_START(_type,_name)		\
 const struct machine_desc __mach_desc_##_type	\
- __attribute__((__section__(".arch.info"))) = {	\
+ __attribute__((__section__(".arch.info.init"))) = {	\
 	.nr		= MACH_TYPE_##_type,	\
 	.name		= _name,
 
diff --git a/include/asm-arm/setup.h b/include/asm-arm/setup.h
index adcbd79762b..ea3ed246523 100644
--- a/include/asm-arm/setup.h
+++ b/include/asm-arm/setup.h
@@ -171,7 +171,7 @@ struct tagtable {
 	int (*parse)(const struct tag *);
 };
 
-#define __tag __attribute_used__ __attribute__((__section__(".taglist")))
+#define __tag __attribute_used__ __attribute__((__section__(".taglist.init")))
 #define __tagtable(tag, fn) \
 static struct tagtable __tagtable_##fn __tag = { tag, fn }
 
@@ -213,6 +213,6 @@ struct early_params {
 
 #define __early_param(name,fn)					\
 static struct early_params __early_##fn __attribute_used__	\
-__attribute__((__section__("__early_param"))) = { name, fn }
+__attribute__((__section__(".early_param.init"))) = { name, fn }
 
 #endif
diff --git a/include/asm-arm26/futex.h b/include/asm-arm26/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-arm26/futex.h
+++ b/include/asm-arm26/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-cris/futex.h b/include/asm-cris/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-cris/futex.h
+++ b/include/asm-cris/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-frv/futex.h b/include/asm-frv/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-frv/futex.h
+++ b/include/asm-frv/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-h8300/futex.h b/include/asm-h8300/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-h8300/futex.h
+++ b/include/asm-h8300/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h
index 44b9db80647..e7a271d3930 100644
--- a/include/asm-i386/futex.h
+++ b/include/asm-i386/futex.h
@@ -61,7 +61,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	if (op == FUTEX_OP_SET)
 		__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
 	else {
-#ifndef CONFIG_X86_BSWAP
+#if !defined(CONFIG_X86_BSWAP) && !defined(CONFIG_UML)
 		if (boot_cpu_data.x86 == 3)
 			ret = -ENOSYS;
 		else
diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-ia64/futex.h
+++ b/include/asm-ia64/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h
index 97a28b8b2dd..c7d9c9ed38b 100644
--- a/include/asm-ia64/mca.h
+++ b/include/asm-ia64/mca.h
@@ -80,7 +80,12 @@ struct ia64_sal_os_state {
 	u64			sal_ra;			/* Return address in SAL, physical */
 	u64			sal_gp;			/* GP of the SAL - physical */
 	pal_min_state_area_t	*pal_min_state;		/* from R17.  physical in asm, virtual in C */
+	/* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK).
+	 * Note: if the MCA/INIT recovery code wants to resume to a new context
+	 * then it must change these values to reflect the new kernel stack.
+	 */
 	u64			prev_IA64_KR_CURRENT;	/* previous value of IA64_KR(CURRENT) */
+	u64			prev_IA64_KR_CURRENT_STACK;
 	struct task_struct	*prev_task;		/* previous task, NULL if it is not useful */
 	/* Some interrupt registers are not saved in minstate, pt_regs or
 	 * switch_stack.  Because MCA/INIT can occur when interrupts are
diff --git a/include/asm-m32r/futex.h b/include/asm-m32r/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-m32r/futex.h
+++ b/include/asm-m32r/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-m68k/futex.h b/include/asm-m68k/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-m68k/futex.h
+++ b/include/asm-m68k/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-m68knommu/futex.h b/include/asm-m68knommu/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-m68knommu/futex.h
+++ b/include/asm-m68knommu/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-parisc/futex.h
+++ b/include/asm-parisc/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-ppc/atomic.h b/include/asm-powerpc/atomic.h
index eeafd505836..ed4b345ed75 100644
--- a/include/asm-ppc/atomic.h
+++ b/include/asm-powerpc/atomic.h
@@ -1,29 +1,20 @@
+#ifndef _ASM_POWERPC_ATOMIC_H_
+#define _ASM_POWERPC_ATOMIC_H_
+
 /*
  * PowerPC atomic operations
  */
 
-#ifndef _ASM_PPC_ATOMIC_H_
-#define _ASM_PPC_ATOMIC_H_
-
 typedef struct { volatile int counter; } atomic_t;
 
 #ifdef __KERNEL__
+#include <asm/synch.h>
 
-#define ATOMIC_INIT(i)	{ (i) }
+#define ATOMIC_INIT(i)		{ (i) }
 
 #define atomic_read(v)		((v)->counter)
 #define atomic_set(v,i)		(((v)->counter) = (i))
 
-extern void atomic_clear_mask(unsigned long mask, unsigned long *addr);
-
-#ifdef CONFIG_SMP
-#define SMP_SYNC	"sync"
-#define SMP_ISYNC	"\n\tisync"
-#else
-#define SMP_SYNC	""
-#define SMP_ISYNC
-#endif
-
 /* Erratum #77 on the 405 means we need a sync or dcbt before every stwcx.
  * The old ATOMIC_SYNC_FIX covered some but not all of this.
  */
@@ -53,12 +44,13 @@ static __inline__ int atomic_add_return(int a, atomic_t *v)
 	int t;
 
 	__asm__ __volatile__(
+	EIEIO_ON_SMP
 "1:	lwarx	%0,0,%2		# atomic_add_return\n\
 	add	%0,%1,%0\n"
 	PPC405_ERR77(0,%2)
 "	stwcx.	%0,0,%2 \n\
 	bne-	1b"
-	SMP_ISYNC
+	ISYNC_ON_SMP
 	: "=&r" (t)
 	: "r" (a), "r" (&v->counter)
 	: "cc", "memory");
@@ -88,12 +80,13 @@ static __inline__ int atomic_sub_return(int a, atomic_t *v)
 	int t;
 
 	__asm__ __volatile__(
+	EIEIO_ON_SMP
 "1:	lwarx	%0,0,%2		# atomic_sub_return\n\
 	subf	%0,%1,%0\n"
 	PPC405_ERR77(0,%2)
 "	stwcx.	%0,0,%2 \n\
 	bne-	1b"
-	SMP_ISYNC
+	ISYNC_ON_SMP
 	: "=&r" (t)
 	: "r" (a), "r" (&v->counter)
 	: "cc", "memory");
@@ -121,12 +114,13 @@ static __inline__ int atomic_inc_return(atomic_t *v)
 	int t;
 
 	__asm__ __volatile__(
+	EIEIO_ON_SMP
 "1:	lwarx	%0,0,%1		# atomic_inc_return\n\
 	addic	%0,%0,1\n"
 	PPC405_ERR77(0,%1)
 "	stwcx.	%0,0,%1 \n\
 	bne-	1b"
-	SMP_ISYNC
+	ISYNC_ON_SMP
 	: "=&r" (t)
 	: "r" (&v->counter)
 	: "cc", "memory");
@@ -164,12 +158,13 @@ static __inline__ int atomic_dec_return(atomic_t *v)
 	int t;
 
 	__asm__ __volatile__(
+	EIEIO_ON_SMP
 "1:	lwarx	%0,0,%1		# atomic_dec_return\n\
 	addic	%0,%0,-1\n"
 	PPC405_ERR77(0,%1)
 "	stwcx.	%0,0,%1\n\
 	bne-	1b"
-	SMP_ISYNC
+	ISYNC_ON_SMP
 	: "=&r" (t)
 	: "r" (&v->counter)
 	: "cc", "memory");
@@ -189,13 +184,14 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
 	int t;
 
 	__asm__ __volatile__(
+	EIEIO_ON_SMP
 "1:	lwarx	%0,0,%1		# atomic_dec_if_positive\n\
 	addic.	%0,%0,-1\n\
 	blt-	2f\n"
 	PPC405_ERR77(0,%1)
 "	stwcx.	%0,0,%1\n\
 	bne-	1b"
-	SMP_ISYNC
+	ISYNC_ON_SMP
 	"\n\
 2:"	: "=&r" (t)
 	: "r" (&v->counter)
@@ -204,11 +200,10 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
 	return t;
 }
 
-#define __MB	__asm__ __volatile__ (SMP_SYNC : : : "memory")
-#define smp_mb__before_atomic_dec()	__MB
-#define smp_mb__after_atomic_dec()	__MB
-#define smp_mb__before_atomic_inc()	__MB
-#define smp_mb__after_atomic_inc()	__MB
+#define smp_mb__before_atomic_dec()     smp_mb()
+#define smp_mb__after_atomic_dec()      smp_mb()
+#define smp_mb__before_atomic_inc()     smp_mb()
+#define smp_mb__after_atomic_inc()      smp_mb()
 
 #endif /* __KERNEL__ */
-#endif /* _ASM_PPC_ATOMIC_H_ */
+#endif /* _ASM_POWERPC_ATOMIC_H_ */
diff --git a/include/asm-powerpc/auxvec.h b/include/asm-powerpc/auxvec.h
index 19a099b62cd..79d8c473230 100644
--- a/include/asm-powerpc/auxvec.h
+++ b/include/asm-powerpc/auxvec.h
@@ -14,6 +14,8 @@
 /* The vDSO location. We have to use the same value as x86 for glibc's
  * sake :-)
  */
+#ifdef __powerpc64__
 #define AT_SYSINFO_EHDR		33
+#endif
 
 #endif
diff --git a/include/asm-ppc/dma.h b/include/asm-powerpc/dma.h
index cc8e5cd8c9d..926378d2cd9 100644
--- a/include/asm-ppc/dma.h
+++ b/include/asm-powerpc/dma.h
@@ -1,18 +1,14 @@
+#ifndef _ASM_POWERPC_DMA_H
+#define _ASM_POWERPC_DMA_H
+
 /*
- * include/asm-ppc/dma.h: Defines for using and allocating dma channels.
+ * Defines for using and allocating dma channels.
  * Written by Hennus Bergman, 1992.
  * High DMA channel support & info by Hannu Savolainen
  * and John Boyd, Nov. 1992.
  * Changes for ppc sound by Christoph Nadig
  */
 
-#ifdef __KERNEL__
-
-#include <linux/config.h>
-#include <asm/io.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-
 /*
  * Note: Adapted for PowerPC by Gary Thomas
  * Modified by Cort Dougan <cort@cs.nmt.edu>
@@ -25,8 +21,10 @@
  * with a grain of salt.
  */
 
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
+#include <linux/config.h>
+#include <asm/io.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
 
 #ifndef MAX_DMA_CHANNELS
 #define MAX_DMA_CHANNELS	8
@@ -34,11 +32,9 @@
 
 /* The maximum address that we can perform a DMA transfer to on this platform */
 /* Doesn't really apply... */
-#define MAX_DMA_ADDRESS		0xFFFFFFFF
+#define MAX_DMA_ADDRESS		(~0UL)
 
-/* in arch/ppc/kernel/setup.c -- Cort */
-extern unsigned long DMA_MODE_WRITE, DMA_MODE_READ;
-extern unsigned long ISA_DMA_THRESHOLD;
+#if !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI)
 
 #ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
 #define dma_outb	outb_p
@@ -171,7 +167,18 @@ extern long ppc_cs4232_dma, ppc_cs4232_dma2;
 #define DMA1_EXT_REG		0x40B
 #define DMA2_EXT_REG		0x4D6
 
+#ifndef __powerpc64__
+    /* in arch/ppc/kernel/setup.c -- Cort */
+    extern unsigned int DMA_MODE_WRITE;
+    extern unsigned int DMA_MODE_READ;
+    extern unsigned long ISA_DMA_THRESHOLD;
+#else
+    #define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
+    #define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
+#endif
+
 #define DMA_MODE_CASCADE	0xC0	/* pass thru DREQ->HRQ, DACK<-HLDA only */
+
 #define DMA_AUTOINIT		0x10
 
 extern spinlock_t dma_spin_lock;
@@ -200,8 +207,9 @@ static __inline__ void enable_dma(unsigned int dmanr)
 	if (dmanr <= 3) {
 		dma_outb(dmanr, DMA1_MASK_REG);
 		dma_outb(ucDmaCmd, DMA1_CMD_REG);	/* Enable group */
-	} else
+	} else {
 		dma_outb(dmanr & 3, DMA2_MASK_REG);
+	}
 }
 
 static __inline__ void disable_dma(unsigned int dmanr)
@@ -290,19 +298,26 @@ static __inline__ void set_dma_page(unsigned int dmanr, int pagenr)
 static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int phys)
 {
 	if (dmanr <= 3) {
-		dma_outb(phys & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
-		dma_outb((phys >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE);
+		dma_outb(phys & 0xff,
+			 ((dmanr & 3) << 1) + IO_DMA1_BASE);
+		dma_outb((phys >> 8) & 0xff,
+			 ((dmanr & 3) << 1) + IO_DMA1_BASE);
 	} else if (dmanr == SND_DMA1 || dmanr == SND_DMA2) {
-		dma_outb(phys & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
-		dma_outb((phys >> 8) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+		dma_outb(phys & 0xff,
+			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
+		dma_outb((phys >> 8) & 0xff,
+			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
 		dma_outb((dmanr & 3), DMA2_EXT_REG);
 	} else {
-		dma_outb((phys >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
-		dma_outb((phys >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE);
+		dma_outb((phys >> 1) & 0xff,
+			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
+		dma_outb((phys >> 9) & 0xff,
+			 ((dmanr & 3) << 2) + IO_DMA2_BASE);
 	}
 	set_dma_page(dmanr, phys >> 16);
 }
 
+
 /* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
  * a specific DMA channel.
  * You must ensure the parameters are valid.
@@ -315,21 +330,24 @@ static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
 {
 	count--;
 	if (dmanr <= 3) {
-		dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
-		dma_outb((count >> 8) & 0xff, ((dmanr & 3) << 1) + 1 +
-			 IO_DMA1_BASE);
+		dma_outb(count & 0xff,
+			 ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
+		dma_outb((count >> 8) & 0xff,
+			 ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE);
 	} else if (dmanr == SND_DMA1 || dmanr == SND_DMA2) {
-		dma_outb(count & 0xff, ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
-		dma_outb((count >> 8) & 0xff, ((dmanr & 3) << 2) + 2 +
-			 IO_DMA2_BASE);
+		dma_outb(count & 0xff,
+			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+		dma_outb((count >> 8) & 0xff,
+			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
 	} else {
-		dma_outb((count >> 1) & 0xff, ((dmanr & 3) << 2) + 2 +
-			 IO_DMA2_BASE);
-		dma_outb((count >> 9) & 0xff, ((dmanr & 3) << 2) + 2 +
-			 IO_DMA2_BASE);
+		dma_outb((count >> 1) & 0xff,
+			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
+		dma_outb((count >> 9) & 0xff,
+			 ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE);
 	}
 }
 
+
 /* Get DMA residue count. After a DMA transfer, this
  * should return zero. Reading this while a DMA transfer is
  * still in progress will return unpredictable results.
@@ -340,8 +358,8 @@ static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
  */
 static __inline__ int get_dma_residue(unsigned int dmanr)
 {
-	unsigned int io_port = (dmanr <= 3) ?
-	    ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE
+	unsigned int io_port = (dmanr <= 3)
+	    ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE
 	    : ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE;
 
 	/* using short to get 16-bit wrap around */
@@ -352,7 +370,6 @@ static __inline__ int get_dma_residue(unsigned int dmanr)
 
 	return (dmanr <= 3 || dmanr == SND_DMA1 || dmanr == SND_DMA2)
 	    ? count : (count << 1);
-
 }
 
 /* These are in kernel/dma.c: */
@@ -367,5 +384,7 @@ extern int isa_dma_bridge_buggy;
 #else
 #define isa_dma_bridge_buggy	(0)
 #endif
-#endif				/* _ASM_DMA_H */
-#endif				/* __KERNEL__ */
+
+#endif	/* !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI) */
+
+#endif	/* _ASM_POWERPC_DMA_H */
diff --git a/include/asm-ppc64/hw_irq.h b/include/asm-powerpc/hw_irq.h
index c483897b875..605a65e4206 100644
--- a/include/asm-ppc64/hw_irq.h
+++ b/include/asm-powerpc/hw_irq.h
@@ -1,19 +1,15 @@
 /*
  * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- *
- * Use inline IRQs where possible - Anton Blanchard <anton@au.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
+#ifndef _ASM_POWERPC_HW_IRQ_H
+#define _ASM_POWERPC_HW_IRQ_H
+
 #ifdef __KERNEL__
-#ifndef _PPC64_HW_IRQ_H
-#define _PPC64_HW_IRQ_H
 
 #include <linux/config.h>
 #include <linux/errno.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
 #include <asm/irq.h>
 
 extern void timer_interrupt(struct pt_regs *);
@@ -33,45 +29,60 @@ extern void local_irq_restore(unsigned long);
 
 #else
 
-#define local_save_flags(flags)	((flags) = mfmsr())
+#if defined(CONFIG_BOOKE)
+#define SET_MSR_EE(x)	mtmsr(x)
+#define local_irq_restore(flags)	__asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory")
+#elif defined(__powerpc64__)
+#define SET_MSR_EE(x)	__mtmsrd(x, 1)
 #define local_irq_restore(flags) do { \
 	__asm__ __volatile__("": : :"memory"); \
 	__mtmsrd((flags), 1); \
 } while(0)
+#else
+#define SET_MSR_EE(x)	mtmsr(x)
+#define local_irq_restore(flags)	mtmsr(flags)
+#endif
 
 static inline void local_irq_disable(void)
 {
+#ifdef CONFIG_BOOKE
+	__asm__ __volatile__("wrteei 0": : :"memory");
+#else
 	unsigned long msr;
-	msr = mfmsr();
-	__mtmsrd(msr & ~MSR_EE, 1);
 	__asm__ __volatile__("": : :"memory");
+	msr = mfmsr();
+	SET_MSR_EE(msr & ~MSR_EE);
+#endif
 }
 
 static inline void local_irq_enable(void)
 {
+#ifdef CONFIG_BOOKE
+	__asm__ __volatile__("wrteei 1": : :"memory");
+#else
 	unsigned long msr;
 	__asm__ __volatile__("": : :"memory");
 	msr = mfmsr();
-	__mtmsrd(msr | MSR_EE, 1);
+	SET_MSR_EE(msr | MSR_EE);
+#endif
 }
 
-static inline void __do_save_and_cli(unsigned long *flags)
+static inline void local_irq_save_ptr(unsigned long *flags)
 {
 	unsigned long msr;
 	msr = mfmsr();
 	*flags = msr;
-	__mtmsrd(msr & ~MSR_EE, 1);
+#ifdef CONFIG_BOOKE
+	__asm__ __volatile__("wrteei 0": : :"memory");
+#else
+	SET_MSR_EE(msr & ~MSR_EE);
+#endif
 	__asm__ __volatile__("": : :"memory");
 }
 
-#define local_irq_save(flags)          __do_save_and_cli(&flags)
-
-#define irqs_disabled()				\
-({						\
-	unsigned long flags;			\
-	local_save_flags(flags);		\
-	!(flags & MSR_EE);			\
-})
+#define local_save_flags(flags)	((flags) = mfmsr())
+#define local_irq_save(flags)	local_irq_save_ptr(&flags)
+#define irqs_disabled()		((mfmsr() & MSR_EE) == 0)
 
 #endif /* CONFIG_PPC_ISERIES */
 
@@ -99,6 +110,6 @@ static inline void __do_save_and_cli(unsigned long *flags)
  */
 struct hw_interrupt_type;
 static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
- 
-#endif /* _PPC64_HW_IRQ_H */
-#endif /* __KERNEL__ */
+
+#endif	/* __KERNEL__ */
+#endif	/* _ASM_POWERPC_HW_IRQ_H */
diff --git a/include/asm-powerpc/kdebug.h b/include/asm-powerpc/kdebug.h
new file mode 100644
index 00000000000..7c55abf597f
--- /dev/null
+++ b/include/asm-powerpc/kdebug.h
@@ -0,0 +1,42 @@
+#ifndef _POWERPC_KDEBUG_H
+#define _POWERPC_KDEBUG_H 1
+
+/* nearly identical to x86_64/i386 code */
+
+#include <linux/notifier.h>
+
+struct pt_regs;
+
+struct die_args {
+	struct pt_regs *regs;
+	const char *str;
+	long err;
+	int trapnr;
+	int signr;
+};
+
+/*
+   Note - you should never unregister because that can race with NMIs.
+   If you really want to do it first unregister - then synchronize_sched -
+   then free.
+ */
+int register_die_notifier(struct notifier_block *nb);
+extern struct notifier_block *powerpc_die_chain;
+
+/* Grossly misnamed. */
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_IABR_MATCH,
+	DIE_DABR_MATCH,
+	DIE_BPT,
+	DIE_SSTEP,
+	DIE_PAGE_FAULT,
+};
+
+static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig)
+{
+	struct die_args args = { .regs=regs, .str=str, .err=err, .trapnr=trap,.signr=sig };
+	return notifier_call_chain(&powerpc_die_chain, val, &args);
+}
+
+#endif
diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h
new file mode 100644
index 00000000000..d9129d2b038
--- /dev/null
+++ b/include/asm-powerpc/kprobes.h
@@ -0,0 +1,67 @@
+#ifndef _ASM_KPROBES_H
+#define _ASM_KPROBES_H
+/*
+ *  Kernel Probes (KProbes)
+ *  include/asm-ppc64/kprobes.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation ( includes suggestions from
+ *		Rusty Russell).
+ * 2004-Nov	Modified for PPC64 by Ananth N Mavinakayanahalli
+ *		<ananth@in.ibm.com>
+ */
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct pt_regs;
+
+typedef unsigned int kprobe_opcode_t;
+#define BREAKPOINT_INSTRUCTION	0x7fe00008	/* trap */
+#define MAX_INSN_SIZE 1
+
+#define IS_TW(instr)		(((instr) & 0xfc0007fe) == 0x7c000008)
+#define IS_TD(instr)		(((instr) & 0xfc0007fe) == 0x7c000088)
+#define IS_TDI(instr)		(((instr) & 0xfc000000) == 0x08000000)
+#define IS_TWI(instr)		(((instr) & 0xfc000000) == 0x0c000000)
+
+#define JPROBE_ENTRY(pentry)	(kprobe_opcode_t *)((func_descr_t *)pentry)
+
+#define is_trap(instr)	(IS_TW(instr) || IS_TD(instr) || \
+			IS_TWI(instr) || IS_TDI(instr))
+
+#define ARCH_SUPPORTS_KRETPROBES
+void kretprobe_trampoline(void);
+
+/* Architecture specific copy of original instruction */
+struct arch_specific_insn {
+	/* copy of original instruction */
+	kprobe_opcode_t *insn;
+};
+
+#ifdef CONFIG_KPROBES
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+				    unsigned long val, void *data);
+#else				/* !CONFIG_KPROBES */
+static inline int kprobe_exceptions_notify(struct notifier_block *self,
+					   unsigned long val, void *data)
+{
+	return 0;
+}
+#endif
+#endif				/* _ASM_KPROBES_H */
diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h
new file mode 100644
index 00000000000..f1e24f4b2d1
--- /dev/null
+++ b/include/asm-powerpc/mpic.h
@@ -0,0 +1,279 @@
+#include <linux/irq.h>
+
+/*
+ * Global registers
+ */
+
+#define MPIC_GREG_BASE			0x01000
+
+#define MPIC_GREG_FEATURE_0		0x00000
+#define		MPIC_GREG_FEATURE_LAST_SRC_MASK		0x07ff0000
+#define		MPIC_GREG_FEATURE_LAST_SRC_SHIFT	16
+#define		MPIC_GREG_FEATURE_LAST_CPU_MASK		0x00001f00
+#define		MPIC_GREG_FEATURE_LAST_CPU_SHIFT	8
+#define		MPIC_GREG_FEATURE_VERSION_MASK		0xff
+#define MPIC_GREG_FEATURE_1		0x00010
+#define MPIC_GREG_GLOBAL_CONF_0		0x00020
+#define		MPIC_GREG_GCONF_RESET			0x80000000
+#define		MPIC_GREG_GCONF_8259_PTHROU_DIS		0x20000000
+#define		MPIC_GREG_GCONF_BASE_MASK		0x000fffff
+#define MPIC_GREG_GLOBAL_CONF_1		0x00030
+#define MPIC_GREG_VENDOR_0		0x00040
+#define MPIC_GREG_VENDOR_1		0x00050
+#define MPIC_GREG_VENDOR_2		0x00060
+#define MPIC_GREG_VENDOR_3		0x00070
+#define MPIC_GREG_VENDOR_ID		0x00080
+#define 	MPIC_GREG_VENDOR_ID_STEPPING_MASK	0x00ff0000
+#define 	MPIC_GREG_VENDOR_ID_STEPPING_SHIFT	16
+#define 	MPIC_GREG_VENDOR_ID_DEVICE_ID_MASK	0x0000ff00
+#define 	MPIC_GREG_VENDOR_ID_DEVICE_ID_SHIFT	8
+#define 	MPIC_GREG_VENDOR_ID_VENDOR_ID_MASK	0x000000ff
+#define MPIC_GREG_PROCESSOR_INIT	0x00090
+#define MPIC_GREG_IPI_VECTOR_PRI_0	0x000a0
+#define MPIC_GREG_IPI_VECTOR_PRI_1	0x000b0
+#define MPIC_GREG_IPI_VECTOR_PRI_2	0x000c0
+#define MPIC_GREG_IPI_VECTOR_PRI_3	0x000d0
+#define MPIC_GREG_SPURIOUS		0x000e0
+#define MPIC_GREG_TIMER_FREQ		0x000f0
+
+/*
+ *
+ * Timer registers
+ */
+#define MPIC_TIMER_BASE			0x01100
+#define MPIC_TIMER_STRIDE		0x40
+
+#define MPIC_TIMER_CURRENT_CNT		0x00000
+#define MPIC_TIMER_BASE_CNT		0x00010
+#define MPIC_TIMER_VECTOR_PRI		0x00020
+#define MPIC_TIMER_DESTINATION		0x00030
+
+/*
+ * Per-Processor registers
+ */
+
+#define MPIC_CPU_THISBASE		0x00000
+#define MPIC_CPU_BASE			0x20000
+#define MPIC_CPU_STRIDE			0x01000
+
+#define MPIC_CPU_IPI_DISPATCH_0		0x00040
+#define MPIC_CPU_IPI_DISPATCH_1		0x00050
+#define MPIC_CPU_IPI_DISPATCH_2		0x00060
+#define MPIC_CPU_IPI_DISPATCH_3		0x00070
+#define MPIC_CPU_CURRENT_TASK_PRI	0x00080
+#define 	MPIC_CPU_TASKPRI_MASK			0x0000000f
+#define MPIC_CPU_WHOAMI			0x00090
+#define 	MPIC_CPU_WHOAMI_MASK			0x0000001f
+#define MPIC_CPU_INTACK			0x000a0
+#define MPIC_CPU_EOI			0x000b0
+
+/*
+ * Per-source registers
+ */
+
+#define MPIC_IRQ_BASE			0x10000
+#define MPIC_IRQ_STRIDE			0x00020
+#define MPIC_IRQ_VECTOR_PRI		0x00000
+#define 	MPIC_VECPRI_MASK			0x80000000
+#define 	MPIC_VECPRI_ACTIVITY			0x40000000	/* Read Only */
+#define 	MPIC_VECPRI_PRIORITY_MASK		0x000f0000
+#define 	MPIC_VECPRI_PRIORITY_SHIFT		16
+#define 	MPIC_VECPRI_VECTOR_MASK			0x000007ff
+#define 	MPIC_VECPRI_POLARITY_POSITIVE		0x00800000
+#define 	MPIC_VECPRI_POLARITY_NEGATIVE		0x00000000
+#define 	MPIC_VECPRI_POLARITY_MASK		0x00800000
+#define 	MPIC_VECPRI_SENSE_LEVEL			0x00400000
+#define 	MPIC_VECPRI_SENSE_EDGE			0x00000000
+#define 	MPIC_VECPRI_SENSE_MASK			0x00400000
+#define MPIC_IRQ_DESTINATION		0x00010
+
+#define MPIC_MAX_IRQ_SOURCES	2048
+#define MPIC_MAX_CPUS		32
+#define MPIC_MAX_ISU		32
+
+/*
+ * Special vector numbers (internal use only)
+ */
+#define MPIC_VEC_SPURRIOUS	255
+#define MPIC_VEC_IPI_3		254
+#define MPIC_VEC_IPI_2		253
+#define MPIC_VEC_IPI_1		252
+#define MPIC_VEC_IPI_0		251
+
+/* unused */
+#define MPIC_VEC_TIMER_3	250
+#define MPIC_VEC_TIMER_2	249
+#define MPIC_VEC_TIMER_1	248
+#define MPIC_VEC_TIMER_0	247
+
+/* Type definition of the cascade handler */
+typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data);
+
+#ifdef CONFIG_MPIC_BROKEN_U3
+/* Fixup table entry */
+struct mpic_irq_fixup
+{
+	u8 __iomem	*base;
+	unsigned int   irq;
+};
+#endif /* CONFIG_MPIC_BROKEN_U3 */
+
+
+/* The instance data of a given MPIC */
+struct mpic
+{
+	/* The "linux" controller struct */
+	hw_irq_controller	hc_irq;
+#ifdef CONFIG_SMP
+	hw_irq_controller	hc_ipi;
+#endif
+	const char		*name;
+	/* Flags */
+	unsigned int		flags;
+	/* How many irq sources in a given ISU */
+	unsigned int		isu_size;
+	unsigned int		isu_shift;
+	unsigned int		isu_mask;
+	/* Offset of irq vector numbers */
+	unsigned int		irq_offset;	
+	unsigned int		irq_count;
+	/* Offset of ipi vector numbers */
+	unsigned int		ipi_offset;
+	/* Number of sources */
+	unsigned int		num_sources;
+	/* Number of CPUs */
+	unsigned int		num_cpus;
+	/* cascade handler */
+	mpic_cascade_t		cascade;
+	void			*cascade_data;
+	unsigned int		cascade_vec;
+	/* senses array */
+	unsigned char		*senses;
+	unsigned int		senses_count;
+
+#ifdef CONFIG_MPIC_BROKEN_U3
+	/* The fixup table */
+	struct mpic_irq_fixup	*fixups;
+	spinlock_t		fixup_lock;
+#endif
+
+	/* The various ioremap'ed bases */
+	volatile u32 __iomem	*gregs;
+	volatile u32 __iomem	*tmregs;
+	volatile u32 __iomem	*cpuregs[MPIC_MAX_CPUS];
+	volatile u32 __iomem	*isus[MPIC_MAX_ISU];
+
+	/* link */
+	struct mpic		*next;
+};
+
+/* This is the primary controller, only that one has IPIs and
+ * has afinity control. A non-primary MPIC always uses CPU0
+ * registers only
+ */
+#define MPIC_PRIMARY			0x00000001
+/* Set this for a big-endian MPIC */
+#define MPIC_BIG_ENDIAN			0x00000002
+/* Broken U3 MPIC */
+#define MPIC_BROKEN_U3			0x00000004
+/* Broken IPI registers (autodetected) */
+#define MPIC_BROKEN_IPI			0x00000008
+/* MPIC wants a reset */
+#define MPIC_WANTS_RESET		0x00000010
+
+/* Allocate the controller structure and setup the linux irq descs
+ * for the range if interrupts passed in. No HW initialization is
+ * actually performed.
+ * 
+ * @phys_addr:	physial base address of the MPIC
+ * @flags:	flags, see constants above
+ * @isu_size:	number of interrupts in an ISU. Use 0 to use a
+ *              standard ISU-less setup (aka powermac)
+ * @irq_offset: first irq number to assign to this mpic
+ * @irq_count:  number of irqs to use with this mpic IRQ sources. Pass 0
+ *	        to match the number of sources
+ * @ipi_offset: first irq number to assign to this mpic IPI sources,
+ *		used only on primary mpic
+ * @senses:	array of sense values
+ * @senses_num: number of entries in the array
+ *
+ * Note about the sense array. If none is passed, all interrupts are
+ * setup to be level negative unless MPIC_BROKEN_U3 is set in which
+ * case they are edge positive (and the array is ignored anyway).
+ * The values in the array start at the first source of the MPIC,
+ * that is senses[0] correspond to linux irq "irq_offset".
+ */
+extern struct mpic *mpic_alloc(unsigned long phys_addr,
+			       unsigned int flags,
+			       unsigned int isu_size,
+			       unsigned int irq_offset,
+			       unsigned int irq_count,
+			       unsigned int ipi_offset,
+			       unsigned char *senses,
+			       unsigned int senses_num,
+			       const char *name);
+
+/* Assign ISUs, to call before mpic_init()
+ *
+ * @mpic:	controller structure as returned by mpic_alloc()
+ * @isu_num:	ISU number
+ * @phys_addr:	physical address of the ISU
+ */
+extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num,
+			    unsigned long phys_addr);
+
+/* Initialize the controller. After this has been called, none of the above
+ * should be called again for this mpic
+ */
+extern void mpic_init(struct mpic *mpic);
+
+/* Setup a cascade. Currently, only one cascade is supported this
+ * way, though you can always do a normal request_irq() and add
+ * other cascades this way. You should call this _after_ having
+ * added all the ISUs
+ *
+ * @irq_no:	"linux" irq number of the cascade (that is offset'ed vector)
+ * @handler:	cascade handler function
+ */
+extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder,
+			       void *data);
+
+/*
+ * All of the following functions must only be used after the
+ * ISUs have been assigned and the controller fully initialized
+ * with mpic_init()
+ */
+
+
+/* Change/Read the priority of an interrupt. Default is 8 for irqs and
+ * 10 for IPIs. You can call this on both IPIs and IRQ numbers, but the
+ * IPI number is then the offset'ed (linux irq number mapped to the IPI)
+ */
+extern void mpic_irq_set_priority(unsigned int irq, unsigned int pri);
+extern unsigned int mpic_irq_get_priority(unsigned int irq);
+
+/* Setup a non-boot CPU */
+extern void mpic_setup_this_cpu(void);
+
+/* Clean up for kexec (or cpu offline or ...) */
+extern void mpic_teardown_this_cpu(int secondary);
+
+/* Get the current cpu priority for this cpu (0..15) */
+extern int mpic_cpu_get_priority(void);
+
+/* Set the current cpu priority for this cpu */
+extern void mpic_cpu_set_priority(int prio);
+
+/* Request IPIs on primary mpic */
+extern void mpic_request_ipis(void);
+
+/* Send an IPI (non offseted number 0..3) */
+extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask);
+
+/* Fetch interrupt from a given mpic */
+extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs);
+/* This one gets to the primary mpic */
+extern int mpic_get_irq(struct pt_regs *regs);
+
+/* global mpic for pSeries */
+extern struct mpic *pSeries_mpic;
diff --git a/include/asm-powerpc/ppc_asm.h b/include/asm-powerpc/ppc_asm.h
index 553035cda00..4efa71878fa 100644
--- a/include/asm-powerpc/ppc_asm.h
+++ b/include/asm-powerpc/ppc_asm.h
@@ -75,8 +75,11 @@
 #define REST_32EVRS(n,s,base)	REST_16EVRS(n,s,base); REST_16EVRS(n+16,s,base)
 
 /* Macros to adjust thread priority for Iseries hardware multithreading */
+#define HMT_VERY_LOW    or   31,31,31	# very low priority\n"
 #define HMT_LOW		or 1,1,1
+#define HMT_MEDIUM_LOW  or   6,6,6	# medium low priority\n"
 #define HMT_MEDIUM	or 2,2,2
+#define HMT_MEDIUM_HIGH or   5,5,5	# medium high priority\n"
 #define HMT_HIGH	or 3,3,3
 
 /* handle instructions that older assemblers may not know */
diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h
new file mode 100644
index 00000000000..f97a5f1761b
--- /dev/null
+++ b/include/asm-powerpc/reg.h
@@ -0,0 +1,446 @@
+/*
+ * Contains the definition of registers common to all PowerPC variants.
+ * If a register definition has been changed in a different PowerPC
+ * variant, we will case it in #ifndef XXX ... #endif, and have the
+ * number used in the Programming Environments Manual For 32-Bit
+ * Implementations of the PowerPC Architecture (a.k.a. Green Book) here.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_PPC_REGS_H__
+#define __ASM_PPC_REGS_H__
+
+#include <linux/stringify.h>
+
+/* Pickup Book E specific registers. */
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#include <asm/reg_booke.h>
+#endif
+
+/* Machine State Register (MSR) Fields */
+#define MSR_SF		(1<<63)
+#define MSR_ISF		(1<<61)
+#define MSR_VEC		(1<<25)		/* Enable AltiVec */
+#define MSR_POW		(1<<18)		/* Enable Power Management */
+#define MSR_WE		(1<<18)		/* Wait State Enable */
+#define MSR_TGPR	(1<<17)		/* TLB Update registers in use */
+#define MSR_CE		(1<<17)		/* Critical Interrupt Enable */
+#define MSR_ILE		(1<<16)		/* Interrupt Little Endian */
+#define MSR_EE		(1<<15)		/* External Interrupt Enable */
+#define MSR_PR		(1<<14)		/* Problem State / Privilege Level */
+#define MSR_FP		(1<<13)		/* Floating Point enable */
+#define MSR_ME		(1<<12)		/* Machine Check Enable */
+#define MSR_FE0		(1<<11)		/* Floating Exception mode 0 */
+#define MSR_SE		(1<<10)		/* Single Step */
+#define MSR_BE		(1<<9)		/* Branch Trace */
+#define MSR_DE		(1<<9)		/* Debug Exception Enable */
+#define MSR_FE1		(1<<8)		/* Floating Exception mode 1 */
+#define MSR_IP		(1<<6)		/* Exception prefix 0x000/0xFFF */
+#define MSR_IR		(1<<5)		/* Instruction Relocate */
+#define MSR_DR		(1<<4)		/* Data Relocate */
+#define MSR_PE		(1<<3)		/* Protection Enable */
+#define MSR_PX		(1<<2)		/* Protection Exclusive Mode */
+#define MSR_RI		(1<<1)		/* Recoverable Exception */
+#define MSR_LE		(1<<0)		/* Little Endian */
+
+/* Default MSR for kernel mode. */
+#ifdef CONFIG_APUS_FAST_EXCEPT
+#define MSR_KERNEL	(MSR_ME|MSR_IP|MSR_RI|MSR_IR|MSR_DR)
+#endif
+
+#ifndef MSR_KERNEL
+#define MSR_KERNEL	(MSR_ME|MSR_RI|MSR_IR|MSR_DR)
+#endif
+
+#define MSR_USER	(MSR_KERNEL|MSR_PR|MSR_EE)
+
+/* Floating Point Status and Control Register (FPSCR) Fields */
+#define FPSCR_FX	0x80000000	/* FPU exception summary */
+#define FPSCR_FEX	0x40000000	/* FPU enabled exception summary */
+#define FPSCR_VX	0x20000000	/* Invalid operation summary */
+#define FPSCR_OX	0x10000000	/* Overflow exception summary */
+#define FPSCR_UX	0x08000000	/* Underflow exception summary */
+#define FPSCR_ZX	0x04000000	/* Zero-divide exception summary */
+#define FPSCR_XX	0x02000000	/* Inexact exception summary */
+#define FPSCR_VXSNAN	0x01000000	/* Invalid op for SNaN */
+#define FPSCR_VXISI	0x00800000	/* Invalid op for Inv - Inv */
+#define FPSCR_VXIDI	0x00400000	/* Invalid op for Inv / Inv */
+#define FPSCR_VXZDZ	0x00200000	/* Invalid op for Zero / Zero */
+#define FPSCR_VXIMZ	0x00100000	/* Invalid op for Inv * Zero */
+#define FPSCR_VXVC	0x00080000	/* Invalid op for Compare */
+#define FPSCR_FR	0x00040000	/* Fraction rounded */
+#define FPSCR_FI	0x00020000	/* Fraction inexact */
+#define FPSCR_FPRF	0x0001f000	/* FPU Result Flags */
+#define FPSCR_FPCC	0x0000f000	/* FPU Condition Codes */
+#define FPSCR_VXSOFT	0x00000400	/* Invalid op for software request */
+#define FPSCR_VXSQRT	0x00000200	/* Invalid op for square root */
+#define FPSCR_VXCVI	0x00000100	/* Invalid op for integer convert */
+#define FPSCR_VE	0x00000080	/* Invalid op exception enable */
+#define FPSCR_OE	0x00000040	/* IEEE overflow exception enable */
+#define FPSCR_UE	0x00000020	/* IEEE underflow exception enable */
+#define FPSCR_ZE	0x00000010	/* IEEE zero divide exception enable */
+#define FPSCR_XE	0x00000008	/* FP inexact exception enable */
+#define FPSCR_NI	0x00000004	/* FPU non IEEE-Mode */
+#define FPSCR_RN	0x00000003	/* FPU rounding control */
+
+/* Special Purpose Registers (SPRNs)*/
+#define SPRN_CTR	0x009	/* Count Register */
+#define SPRN_DABR	0x3F5	/* Data Address Breakpoint Register */
+#define   DABR_TRANSLATION	(1UL << 2)
+#define SPRN_DAR	0x013	/* Data Address Register */
+#define	SPRN_DSISR	0x012	/* Data Storage Interrupt Status Register */
+#define   DSISR_NOHPTE		0x40000000	/* no translation found */
+#define   DSISR_PROTFAULT	0x08000000	/* protection fault */
+#define   DSISR_ISSTORE		0x02000000	/* access was a store */
+#define   DSISR_DABRMATCH	0x00400000	/* hit data breakpoint */
+#define   DSISR_NOSEGMENT	0x00200000	/* STAB/SLB miss */
+#define SPRN_TBRL	0x10C	/* Time Base Read Lower Register (user, R/O) */
+#define SPRN_TBRU	0x10D	/* Time Base Read Upper Register (user, R/O) */
+#define SPRN_TBWL	0x11C	/* Time Base Lower Register (super, R/W) */
+#define SPRN_TBWU	0x11D	/* Time Base Upper Register (super, R/W) */
+#define SPRN_HIOR	0x137	/* 970 Hypervisor interrupt offset */
+#define SPRN_DBAT0L	0x219	/* Data BAT 0 Lower Register */
+#define SPRN_DBAT0U	0x218	/* Data BAT 0 Upper Register */
+#define SPRN_DBAT1L	0x21B	/* Data BAT 1 Lower Register */
+#define SPRN_DBAT1U	0x21A	/* Data BAT 1 Upper Register */
+#define SPRN_DBAT2L	0x21D	/* Data BAT 2 Lower Register */
+#define SPRN_DBAT2U	0x21C	/* Data BAT 2 Upper Register */
+#define SPRN_DBAT3L	0x21F	/* Data BAT 3 Lower Register */
+#define SPRN_DBAT3U	0x21E	/* Data BAT 3 Upper Register */
+#define SPRN_DBAT4L	0x239	/* Data BAT 4 Lower Register */
+#define SPRN_DBAT4U	0x238	/* Data BAT 4 Upper Register */
+#define SPRN_DBAT5L	0x23B	/* Data BAT 5 Lower Register */
+#define SPRN_DBAT5U	0x23A	/* Data BAT 5 Upper Register */
+#define SPRN_DBAT6L	0x23D	/* Data BAT 6 Lower Register */
+#define SPRN_DBAT6U	0x23C	/* Data BAT 6 Upper Register */
+#define SPRN_DBAT7L	0x23F	/* Data BAT 7 Lower Register */
+#define SPRN_DBAT7U	0x23E	/* Data BAT 7 Upper Register */
+
+#define SPRN_DEC	0x016		/* Decrement Register */
+#define SPRN_DER	0x095		/* Debug Enable Regsiter */
+#define DER_RSTE	0x40000000	/* Reset Interrupt */
+#define DER_CHSTPE	0x20000000	/* Check Stop */
+#define DER_MCIE	0x10000000	/* Machine Check Interrupt */
+#define DER_EXTIE	0x02000000	/* External Interrupt */
+#define DER_ALIE	0x01000000	/* Alignment Interrupt */
+#define DER_PRIE	0x00800000	/* Program Interrupt */
+#define DER_FPUVIE	0x00400000	/* FP Unavailable Interrupt */
+#define DER_DECIE	0x00200000	/* Decrementer Interrupt */
+#define DER_SYSIE	0x00040000	/* System Call Interrupt */
+#define DER_TRE		0x00020000	/* Trace Interrupt */
+#define DER_SEIE	0x00004000	/* FP SW Emulation Interrupt */
+#define DER_ITLBMSE	0x00002000	/* Imp. Spec. Instruction TLB Miss */
+#define DER_ITLBERE	0x00001000	/* Imp. Spec. Instruction TLB Error */
+#define DER_DTLBMSE	0x00000800	/* Imp. Spec. Data TLB Miss */
+#define DER_DTLBERE	0x00000400	/* Imp. Spec. Data TLB Error */
+#define DER_LBRKE	0x00000008	/* Load/Store Breakpoint Interrupt */
+#define DER_IBRKE	0x00000004	/* Instruction Breakpoint Interrupt */
+#define DER_EBRKE	0x00000002	/* External Breakpoint Interrupt */
+#define DER_DPIE	0x00000001	/* Dev. Port Nonmaskable Request */
+#define SPRN_DMISS	0x3D0		/* Data TLB Miss Register */
+#define SPRN_EAR	0x11A		/* External Address Register */
+#define SPRN_HASH1	0x3D2		/* Primary Hash Address Register */
+#define SPRN_HASH2	0x3D3		/* Secondary Hash Address Resgister */
+#define SPRN_HID0	0x3F0		/* Hardware Implementation Register 0 */
+#define HID0_EMCP	(1<<31)		/* Enable Machine Check pin */
+#define HID0_EBA	(1<<29)		/* Enable Bus Address Parity */
+#define HID0_EBD	(1<<28)		/* Enable Bus Data Parity */
+#define HID0_SBCLK	(1<<27)
+#define HID0_EICE	(1<<26)
+#define HID0_TBEN	(1<<26)		/* Timebase enable - 745x */
+#define HID0_ECLK	(1<<25)
+#define HID0_PAR	(1<<24)
+#define HID0_STEN	(1<<24)		/* Software table search enable - 745x */
+#define HID0_HIGH_BAT	(1<<23)		/* Enable high BATs - 7455 */
+#define HID0_DOZE	(1<<23)
+#define HID0_NAP	(1<<22)
+#define HID0_SLEEP	(1<<21)
+#define HID0_DPM	(1<<20)
+#define HID0_BHTCLR	(1<<18)		/* Clear branch history table - 7450 */
+#define HID0_XAEN	(1<<17)		/* Extended addressing enable - 7450 */
+#define HID0_NHR	(1<<16)		/* Not hard reset (software bit-7450)*/
+#define HID0_ICE	(1<<15)		/* Instruction Cache Enable */
+#define HID0_DCE	(1<<14)		/* Data Cache Enable */
+#define HID0_ILOCK	(1<<13)		/* Instruction Cache Lock */
+#define HID0_DLOCK	(1<<12)		/* Data Cache Lock */
+#define HID0_ICFI	(1<<11)		/* Instr. Cache Flash Invalidate */
+#define HID0_DCI	(1<<10)		/* Data Cache Invalidate */
+#define HID0_SPD	(1<<9)		/* Speculative disable */
+#define HID0_DAPUEN	(1<<8)		/* Debug APU enable */
+#define HID0_SGE	(1<<7)		/* Store Gathering Enable */
+#define HID0_SIED	(1<<7)		/* Serial Instr. Execution [Disable] */
+#define HID0_DFCA	(1<<6)		/* Data Cache Flush Assist */
+#define HID0_LRSTK	(1<<4)		/* Link register stack - 745x */
+#define HID0_BTIC	(1<<5)		/* Branch Target Instr Cache Enable */
+#define HID0_ABE	(1<<3)		/* Address Broadcast Enable */
+#define HID0_FOLD	(1<<3)		/* Branch Folding enable - 745x */
+#define HID0_BHTE	(1<<2)		/* Branch History Table Enable */
+#define HID0_BTCD	(1<<1)		/* Branch target cache disable */
+#define HID0_NOPDST	(1<<1)		/* No-op dst, dstt, etc. instr. */
+#define HID0_NOPTI	(1<<0)		/* No-op dcbt and dcbst instr. */
+
+#define SPRN_HID1	0x3F1		/* Hardware Implementation Register 1 */
+#define HID1_EMCP	(1<<31)		/* 7450 Machine Check Pin Enable */
+#define HID1_DFS	(1<<22)		/* 7447A Dynamic Frequency Scaling */
+#define HID1_PC0	(1<<16)		/* 7450 PLL_CFG[0] */
+#define HID1_PC1	(1<<15)		/* 7450 PLL_CFG[1] */
+#define HID1_PC2	(1<<14)		/* 7450 PLL_CFG[2] */
+#define HID1_PC3	(1<<13)		/* 7450 PLL_CFG[3] */
+#define HID1_SYNCBE	(1<<11)		/* 7450 ABE for sync, eieio */
+#define HID1_ABE	(1<<10)		/* 7450 Address Broadcast Enable */
+#define HID1_PS		(1<<16)		/* 750FX PLL selection */
+#define SPRN_HID2	0x3F8		/* Hardware Implementation Register 2 */
+#define SPRN_IABR	0x3F2	/* Instruction Address Breakpoint Register */
+#define SPRN_HID4	0x3F4		/* 970 HID4 */
+#define SPRN_HID5	0x3F6		/* 970 HID5 */
+#if !defined(SPRN_IAC1) && !defined(SPRN_IAC2)
+#define SPRN_IAC1	0x3F4		/* Instruction Address Compare 1 */
+#define SPRN_IAC2	0x3F5		/* Instruction Address Compare 2 */
+#endif
+#define SPRN_IBAT0L	0x211		/* Instruction BAT 0 Lower Register */
+#define SPRN_IBAT0U	0x210		/* Instruction BAT 0 Upper Register */
+#define SPRN_IBAT1L	0x213		/* Instruction BAT 1 Lower Register */
+#define SPRN_IBAT1U	0x212		/* Instruction BAT 1 Upper Register */
+#define SPRN_IBAT2L	0x215		/* Instruction BAT 2 Lower Register */
+#define SPRN_IBAT2U	0x214		/* Instruction BAT 2 Upper Register */
+#define SPRN_IBAT3L	0x217		/* Instruction BAT 3 Lower Register */
+#define SPRN_IBAT3U	0x216		/* Instruction BAT 3 Upper Register */
+#define SPRN_IBAT4L	0x231		/* Instruction BAT 4 Lower Register */
+#define SPRN_IBAT4U	0x230		/* Instruction BAT 4 Upper Register */
+#define SPRN_IBAT5L	0x233		/* Instruction BAT 5 Lower Register */
+#define SPRN_IBAT5U	0x232		/* Instruction BAT 5 Upper Register */
+#define SPRN_IBAT6L	0x235		/* Instruction BAT 6 Lower Register */
+#define SPRN_IBAT6U	0x234		/* Instruction BAT 6 Upper Register */
+#define SPRN_IBAT7L	0x237		/* Instruction BAT 7 Lower Register */
+#define SPRN_IBAT7U	0x236		/* Instruction BAT 7 Upper Register */
+#define SPRN_ICMP	0x3D5		/* Instruction TLB Compare Register */
+#define SPRN_ICTC	0x3FB	/* Instruction Cache Throttling Control Reg */
+#define SPRN_ICTRL	0x3F3	/* 1011 7450 icache and interrupt ctrl */
+#define ICTRL_EICE	0x08000000	/* enable icache parity errs */
+#define ICTRL_EDC	0x04000000	/* enable dcache parity errs */
+#define ICTRL_EICP	0x00000100	/* enable icache par. check */
+#define SPRN_IMISS	0x3D4		/* Instruction TLB Miss Register */
+#define SPRN_IMMR	0x27E		/* Internal Memory Map Register */
+#define SPRN_L2CR	0x3F9		/* Level 2 Cache Control Regsiter */
+#define SPRN_L2CR2	0x3f8
+#define L2CR_L2E		0x80000000	/* L2 enable */
+#define L2CR_L2PE		0x40000000	/* L2 parity enable */
+#define L2CR_L2SIZ_MASK		0x30000000	/* L2 size mask */
+#define L2CR_L2SIZ_256KB	0x10000000	/* L2 size 256KB */
+#define L2CR_L2SIZ_512KB	0x20000000	/* L2 size 512KB */
+#define L2CR_L2SIZ_1MB		0x30000000	/* L2 size 1MB */
+#define L2CR_L2CLK_MASK		0x0e000000	/* L2 clock mask */
+#define L2CR_L2CLK_DISABLED	0x00000000	/* L2 clock disabled */
+#define L2CR_L2CLK_DIV1		0x02000000	/* L2 clock / 1 */
+#define L2CR_L2CLK_DIV1_5	0x04000000	/* L2 clock / 1.5 */
+#define L2CR_L2CLK_DIV2		0x08000000	/* L2 clock / 2 */
+#define L2CR_L2CLK_DIV2_5	0x0a000000	/* L2 clock / 2.5 */
+#define L2CR_L2CLK_DIV3		0x0c000000	/* L2 clock / 3 */
+#define L2CR_L2RAM_MASK		0x01800000	/* L2 RAM type mask */
+#define L2CR_L2RAM_FLOW		0x00000000	/* L2 RAM flow through */
+#define L2CR_L2RAM_PIPE		0x01000000	/* L2 RAM pipelined */
+#define L2CR_L2RAM_PIPE_LW	0x01800000	/* L2 RAM pipelined latewr */
+#define L2CR_L2DO		0x00400000	/* L2 data only */
+#define L2CR_L2I		0x00200000	/* L2 global invalidate */
+#define L2CR_L2CTL		0x00100000	/* L2 RAM control */
+#define L2CR_L2WT		0x00080000	/* L2 write-through */
+#define L2CR_L2TS		0x00040000	/* L2 test support */
+#define L2CR_L2OH_MASK		0x00030000	/* L2 output hold mask */
+#define L2CR_L2OH_0_5		0x00000000	/* L2 output hold 0.5 ns */
+#define L2CR_L2OH_1_0		0x00010000	/* L2 output hold 1.0 ns */
+#define L2CR_L2SL		0x00008000	/* L2 DLL slow */
+#define L2CR_L2DF		0x00004000	/* L2 differential clock */
+#define L2CR_L2BYP		0x00002000	/* L2 DLL bypass */
+#define L2CR_L2IP		0x00000001	/* L2 GI in progress */
+#define L2CR_L2IO_745x		0x00100000	/* L2 instr. only (745x) */
+#define L2CR_L2DO_745x		0x00010000	/* L2 data only (745x) */
+#define L2CR_L2REP_745x		0x00001000	/* L2 repl. algorithm (745x) */
+#define L2CR_L2HWF_745x		0x00000800	/* L2 hardware flush (745x) */
+#define SPRN_L3CR		0x3FA	/* Level 3 Cache Control Regsiter */
+#define L3CR_L3E		0x80000000	/* L3 enable */
+#define L3CR_L3PE		0x40000000	/* L3 data parity enable */
+#define L3CR_L3APE		0x20000000	/* L3 addr parity enable */
+#define L3CR_L3SIZ		0x10000000	/* L3 size */
+#define L3CR_L3CLKEN		0x08000000	/* L3 clock enable */
+#define L3CR_L3RES		0x04000000	/* L3 special reserved bit */
+#define L3CR_L3CLKDIV		0x03800000	/* L3 clock divisor */
+#define L3CR_L3IO		0x00400000	/* L3 instruction only */
+#define L3CR_L3SPO		0x00040000	/* L3 sample point override */
+#define L3CR_L3CKSP		0x00030000	/* L3 clock sample point */
+#define L3CR_L3PSP		0x0000e000	/* L3 P-clock sample point */
+#define L3CR_L3REP		0x00001000	/* L3 replacement algorithm */
+#define L3CR_L3HWF		0x00000800	/* L3 hardware flush */
+#define L3CR_L3I		0x00000400	/* L3 global invalidate */
+#define L3CR_L3RT		0x00000300	/* L3 SRAM type */
+#define L3CR_L3NIRCA		0x00000080	/* L3 non-integer ratio clock adj. */
+#define L3CR_L3DO		0x00000040	/* L3 data only mode */
+#define L3CR_PMEN		0x00000004	/* L3 private memory enable */
+#define L3CR_PMSIZ		0x00000001	/* L3 private memory size */
+#define SPRN_MSSCR0	0x3f6	/* Memory Subsystem Control Register 0 */
+#define SPRN_MSSSR0	0x3f7	/* Memory Subsystem Status Register 1 */
+#define SPRN_LDSTCR	0x3f8	/* Load/Store control register */
+#define SPRN_LDSTDB	0x3f4	/* */
+#define SPRN_LR		0x008	/* Link Register */
+#define SPRN_MMCR0	0x3B8	/* Monitor Mode Control Register 0 */
+#define SPRN_MMCR1	0x3BC	/* Monitor Mode Control Register 1 */
+#ifndef SPRN_PIR
+#define SPRN_PIR	0x3FF	/* Processor Identification Register */
+#endif
+#define SPRN_PMC1	0x3B9	/* Performance Counter Register 1 */
+#define SPRN_PMC2	0x3BA	/* Performance Counter Register 2 */
+#define SPRN_PMC3	0x3BD	/* Performance Counter Register 3 */
+#define SPRN_PMC4	0x3BE	/* Performance Counter Register 4 */
+#define SPRN_PTEHI	0x3D5	/* 981 7450 PTE HI word (S/W TLB load) */
+#define SPRN_PTELO	0x3D6	/* 982 7450 PTE LO word (S/W TLB load) */
+#define SPRN_PVR	0x11F	/* Processor Version Register */
+#define SPRN_RPA	0x3D6	/* Required Physical Address Register */
+#define SPRN_SDA	0x3BF	/* Sampled Data Address Register */
+#define SPRN_SDR1	0x019	/* MMU Hash Base Register */
+#define SPRN_SIA	0x3BB	/* Sampled Instruction Address Register */
+#define SPRN_SPRG0	0x110	/* Special Purpose Register General 0 */
+#define SPRN_SPRG1	0x111	/* Special Purpose Register General 1 */
+#define SPRN_SPRG2	0x112	/* Special Purpose Register General 2 */
+#define SPRN_SPRG3	0x113	/* Special Purpose Register General 3 */
+#define SPRN_SPRG4	0x114	/* Special Purpose Register General 4 */
+#define SPRN_SPRG5	0x115	/* Special Purpose Register General 5 */
+#define SPRN_SPRG6	0x116	/* Special Purpose Register General 6 */
+#define SPRN_SPRG7	0x117	/* Special Purpose Register General 7 */
+#define SPRN_SRR0	0x01A	/* Save/Restore Register 0 */
+#define SPRN_SRR1	0x01B	/* Save/Restore Register 1 */
+#ifndef SPRN_SVR
+#define SPRN_SVR	0x11E	/* System Version Register */
+#endif
+#define SPRN_THRM1	0x3FC		/* Thermal Management Register 1 */
+/* these bits were defined in inverted endian sense originally, ugh, confusing */
+#define THRM1_TIN	(1 << 31)
+#define THRM1_TIV	(1 << 30)
+#define THRM1_THRES(x)	((x&0x7f)<<23)
+#define THRM3_SITV(x)	((x&0x3fff)<<1)
+#define THRM1_TID	(1<<2)
+#define THRM1_TIE	(1<<1)
+#define THRM1_V		(1<<0)
+#define SPRN_THRM2	0x3FD		/* Thermal Management Register 2 */
+#define SPRN_THRM3	0x3FE		/* Thermal Management Register 3 */
+#define THRM3_E		(1<<0)
+#define SPRN_TLBMISS	0x3D4		/* 980 7450 TLB Miss Register */
+#define SPRN_UMMCR0	0x3A8	/* User Monitor Mode Control Register 0 */
+#define SPRN_UMMCR1	0x3AC	/* User Monitor Mode Control Register 0 */
+#define SPRN_UPMC1	0x3A9	/* User Performance Counter Register 1 */
+#define SPRN_UPMC2	0x3AA	/* User Performance Counter Register 2 */
+#define SPRN_UPMC3	0x3AD	/* User Performance Counter Register 3 */
+#define SPRN_UPMC4	0x3AE	/* User Performance Counter Register 4 */
+#define SPRN_USIA	0x3AB	/* User Sampled Instruction Address Register */
+#define SPRN_VRSAVE	0x100	/* Vector Register Save Register */
+#define SPRN_XER	0x001	/* Fixed Point Exception Register */
+
+/* Bit definitions for MMCR0 and PMC1 / PMC2. */
+#define MMCR0_PMC1_CYCLES	(1 << 7)
+#define MMCR0_PMC1_ICACHEMISS	(5 << 7)
+#define MMCR0_PMC1_DTLB		(6 << 7)
+#define MMCR0_PMC2_DCACHEMISS	0x6
+#define MMCR0_PMC2_CYCLES	0x1
+#define MMCR0_PMC2_ITLB		0x7
+#define MMCR0_PMC2_LOADMISSTIME	0x5
+#define MMCR0_PMXE	(1 << 26)
+
+/* Processor Version Register */
+
+/* Processor Version Register (PVR) field extraction */
+
+#define PVR_VER(pvr)	(((pvr) >>  16) & 0xFFFF)	/* Version field */
+#define PVR_REV(pvr)	(((pvr) >>   0) & 0xFFFF)	/* Revison field */
+
+/*
+ * IBM has further subdivided the standard PowerPC 16-bit version and
+ * revision subfields of the PVR for the PowerPC 403s into the following:
+ */
+
+#define PVR_FAM(pvr)	(((pvr) >> 20) & 0xFFF)	/* Family field */
+#define PVR_MEM(pvr)	(((pvr) >> 16) & 0xF)	/* Member field */
+#define PVR_CORE(pvr)	(((pvr) >> 12) & 0xF)	/* Core field */
+#define PVR_CFG(pvr)	(((pvr) >>  8) & 0xF)	/* Configuration field */
+#define PVR_MAJ(pvr)	(((pvr) >>  4) & 0xF)	/* Major revision field */
+#define PVR_MIN(pvr)	(((pvr) >>  0) & 0xF)	/* Minor revision field */
+
+/* Processor Version Numbers */
+
+#define PVR_403GA	0x00200000
+#define PVR_403GB	0x00200100
+#define PVR_403GC	0x00200200
+#define PVR_403GCX	0x00201400
+#define PVR_405GP	0x40110000
+#define PVR_STB03XXX	0x40310000
+#define PVR_NP405H	0x41410000
+#define PVR_NP405L	0x41610000
+#define PVR_601		0x00010000
+#define PVR_602		0x00050000
+#define PVR_603		0x00030000
+#define PVR_603e	0x00060000
+#define PVR_603ev	0x00070000
+#define PVR_603r	0x00071000
+#define PVR_604		0x00040000
+#define PVR_604e	0x00090000
+#define PVR_604r	0x000A0000
+#define PVR_620		0x00140000
+#define PVR_740		0x00080000
+#define PVR_750		PVR_740
+#define PVR_740P	0x10080000
+#define PVR_750P	PVR_740P
+#define PVR_7400	0x000C0000
+#define PVR_7410	0x800C0000
+#define PVR_7450	0x80000000
+#define PVR_8540	0x80200000
+#define PVR_8560	0x80200000
+/*
+ * For the 8xx processors, all of them report the same PVR family for
+ * the PowerPC core. The various versions of these processors must be
+ * differentiated by the version number in the Communication Processor
+ * Module (CPM).
+ */
+#define PVR_821		0x00500000
+#define PVR_823		PVR_821
+#define PVR_850		PVR_821
+#define PVR_860		PVR_821
+#define PVR_8240	0x00810100
+#define PVR_8245	0x80811014
+#define PVR_8260	PVR_8240
+
+#if 0
+/* Segment Registers */
+#define SR0	0
+#define SR1	1
+#define SR2	2
+#define SR3	3
+#define SR4	4
+#define SR5	5
+#define SR6	6
+#define SR7	7
+#define SR8	8
+#define SR9	9
+#define SR10	10
+#define SR11	11
+#define SR12	12
+#define SR13	13
+#define SR14	14
+#define SR15	15
+#endif
+
+/* Macros for setting and retrieving special purpose registers */
+#ifndef __ASSEMBLY__
+#define mfmsr()		({unsigned int rval; \
+			asm volatile("mfmsr %0" : "=r" (rval)); rval;})
+#define mtmsr(v)	asm volatile("mtmsr %0" : : "r" (v))
+
+#define mfspr(rn)	({unsigned int rval; \
+			asm volatile("mfspr %0," __stringify(rn) \
+				: "=r" (rval)); rval;})
+#define mtspr(rn, v)	asm volatile("mtspr " __stringify(rn) ",%0" : : "r" (v))
+
+#define mfsrin(v)	({unsigned int rval; \
+			asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \
+					rval;})
+
+#define proc_trap()	asm volatile("trap")
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_PPC_REGS_H__ */
+#endif /* __KERNEL__ */
diff --git a/include/asm-ppc64/rwsem.h b/include/asm-powerpc/rwsem.h
index bd5c2f09357..0a5b83a3c94 100644
--- a/include/asm-ppc64/rwsem.h
+++ b/include/asm-powerpc/rwsem.h
@@ -1,18 +1,14 @@
+#ifndef _ASM_POWERPC_RWSEM_H
+#define _ASM_POWERPC_RWSEM_H
+
+#ifdef __KERNEL__
+
 /*
  * include/asm-ppc64/rwsem.h: R/W semaphores for PPC using the stuff
  * in lib/rwsem.c.  Adapted largely from include/asm-i386/rwsem.h
  * by Paul Mackerras <paulus@samba.org>.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
-#ifndef _PPC64_RWSEM_H
-#define _PPC64_RWSEM_H
-
-#ifdef __KERNEL__
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
@@ -163,5 +159,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
 	return atomic_add_return(delta, (atomic_t *)(&sem->count));
 }
 
-#endif /* __KERNEL__ */
-#endif /* _PPC_RWSEM_XADD_H */
+#endif	/* __KERNEL__ */
+#endif	/* _ASM_POWERPC_RWSEM_H */
diff --git a/include/asm-ppc64/seccomp.h b/include/asm-powerpc/seccomp.h
index c130c334bda..1e1cfe12882 100644
--- a/include/asm-ppc64/seccomp.h
+++ b/include/asm-powerpc/seccomp.h
@@ -1,11 +1,6 @@
-#ifndef _ASM_SECCOMP_H
-
-#include <linux/thread_info.h> /* already defines TIF_32BIT */
-
-#ifndef TIF_32BIT
-#error "unexpected TIF_32BIT on ppc64"
-#endif
+#ifndef _ASM_POWERPC_SECCOMP_H
 
+#include <linux/thread_info.h>
 #include <linux/unistd.h>
 
 #define __NR_seccomp_read __NR_read
@@ -18,4 +13,4 @@
 #define __NR_seccomp_exit_32 __NR_exit
 #define __NR_seccomp_sigreturn_32 __NR_sigreturn
 
-#endif /* _ASM_SECCOMP_H */
+#endif	/* _ASM_POWERPC_SECCOMP_H */
diff --git a/include/asm-ppc64/semaphore.h b/include/asm-powerpc/semaphore.h
index aefe7753ea4..fd42fe97158 100644
--- a/include/asm-ppc64/semaphore.h
+++ b/include/asm-powerpc/semaphore.h
@@ -1,5 +1,5 @@
-#ifndef _PPC64_SEMAPHORE_H
-#define _PPC64_SEMAPHORE_H
+#ifndef _ASM_POWERPC_SEMAPHORE_H
+#define _ASM_POWERPC_SEMAPHORE_H
 
 /*
  * Remove spinlock-based RW semaphores; RW semaphore definitions are
@@ -95,4 +95,4 @@ static inline void up(struct semaphore * sem)
 
 #endif /* __KERNEL__ */
 
-#endif /* !(_PPC64_SEMAPHORE_H) */
+#endif /* _ASM_POWERPC_SEMAPHORE_H */
diff --git a/include/asm-powerpc/synch.h b/include/asm-powerpc/synch.h
new file mode 100644
index 00000000000..4660c0394a7
--- /dev/null
+++ b/include/asm-powerpc/synch.h
@@ -0,0 +1,51 @@
+#ifndef _ASM_POWERPC_SYNCH_H 
+#define _ASM_POWERPC_SYNCH_H 
+
+#include <linux/config.h>
+
+#ifdef __powerpc64__
+#define __SUBARCH_HAS_LWSYNC
+#endif
+
+#ifdef __SUBARCH_HAS_LWSYNC
+#    define LWSYNC	lwsync
+#else
+#    define LWSYNC	sync
+#endif
+
+
+/*
+ * Arguably the bitops and *xchg operations don't imply any memory barrier
+ * or SMP ordering, but in fact a lot of drivers expect them to imply
+ * both, since they do on x86 cpus.
+ */
+#ifdef CONFIG_SMP
+#define EIEIO_ON_SMP	"eieio\n"
+#define ISYNC_ON_SMP	"\n\tisync"
+#define SYNC_ON_SMP	__stringify(LWSYNC) "\n"
+#else
+#define EIEIO_ON_SMP
+#define ISYNC_ON_SMP
+#define SYNC_ON_SMP
+#endif
+
+static inline void eieio(void)
+{
+	__asm__ __volatile__ ("eieio" : : : "memory");
+}
+
+static inline void isync(void)
+{
+	__asm__ __volatile__ ("isync" : : : "memory");
+}
+
+#ifdef CONFIG_SMP
+#define eieio_on_smp()	eieio()
+#define isync_on_smp()	isync()
+#else
+#define eieio_on_smp()	__asm__ __volatile__("": : :"memory")
+#define isync_on_smp()	__asm__ __volatile__("": : :"memory")
+#endif
+
+#endif	/* _ASM_POWERPC_SYNCH_H */
+
diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
new file mode 100644
index 00000000000..be542efb32d
--- /dev/null
+++ b/include/asm-powerpc/system.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef __PPC_SYSTEM_H
+#define __PPC_SYSTEM_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+
+#include <asm/hw_irq.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * Memory barrier.
+ * The sync instruction guarantees that all memory accesses initiated
+ * by this processor have been performed (with respect to all other
+ * mechanisms that access memory).  The eieio instruction is a barrier
+ * providing an ordering (separately) for (a) cacheable stores and (b)
+ * loads and stores to non-cacheable memory (e.g. I/O devices).
+ *
+ * mb() prevents loads and stores being reordered across this point.
+ * rmb() prevents loads being reordered across this point.
+ * wmb() prevents stores being reordered across this point.
+ * read_barrier_depends() prevents data-dependent loads being reordered
+ *	across this point (nop on PPC).
+ *
+ * We have to use the sync instructions for mb(), since lwsync doesn't
+ * order loads with respect to previous stores.  Lwsync is fine for
+ * rmb(), though.  Note that lwsync is interpreted as sync by
+ * 32-bit and older 64-bit CPUs.
+ *
+ * For wmb(), we use sync since wmb is used in drivers to order
+ * stores to system memory with respect to writes to the device.
+ * However, smp_wmb() can be a lighter-weight eieio barrier on
+ * SMP since it is only used to order updates to system memory.
+ */
+#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
+#define rmb()  __asm__ __volatile__ ("lwsync" : : : "memory")
+#define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define read_barrier_depends()  do { } while(0)
+
+#define set_mb(var, value)	do { var = value; mb(); } while (0)
+#define set_wmb(var, value)	do { var = value; wmb(); } while (0)
+
+#ifdef CONFIG_SMP
+#define smp_mb()	mb()
+#define smp_rmb()	rmb()
+#define smp_wmb()	__asm__ __volatile__ ("eieio" : : : "memory")
+#define smp_read_barrier_depends()	read_barrier_depends()
+#else
+#define smp_mb()	barrier()
+#define smp_rmb()	barrier()
+#define smp_wmb()	barrier()
+#define smp_read_barrier_depends()	do { } while(0)
+#endif /* CONFIG_SMP */
+
+#ifdef __KERNEL__
+struct task_struct;
+struct pt_regs;
+
+#ifdef CONFIG_DEBUGGER
+
+extern int (*__debugger)(struct pt_regs *regs);
+extern int (*__debugger_ipi)(struct pt_regs *regs);
+extern int (*__debugger_bpt)(struct pt_regs *regs);
+extern int (*__debugger_sstep)(struct pt_regs *regs);
+extern int (*__debugger_iabr_match)(struct pt_regs *regs);
+extern int (*__debugger_dabr_match)(struct pt_regs *regs);
+extern int (*__debugger_fault_handler)(struct pt_regs *regs);
+
+#define DEBUGGER_BOILERPLATE(__NAME) \
+static inline int __NAME(struct pt_regs *regs) \
+{ \
+	if (unlikely(__ ## __NAME)) \
+		return __ ## __NAME(regs); \
+	return 0; \
+}
+
+DEBUGGER_BOILERPLATE(debugger)
+DEBUGGER_BOILERPLATE(debugger_ipi)
+DEBUGGER_BOILERPLATE(debugger_bpt)
+DEBUGGER_BOILERPLATE(debugger_sstep)
+DEBUGGER_BOILERPLATE(debugger_iabr_match)
+DEBUGGER_BOILERPLATE(debugger_dabr_match)
+DEBUGGER_BOILERPLATE(debugger_fault_handler)
+
+#ifdef CONFIG_XMON
+extern void xmon_init(int enable);
+#endif
+
+#else
+static inline int debugger(struct pt_regs *regs) { return 0; }
+static inline int debugger_ipi(struct pt_regs *regs) { return 0; }
+static inline int debugger_bpt(struct pt_regs *regs) { return 0; }
+static inline int debugger_sstep(struct pt_regs *regs) { return 0; }
+static inline int debugger_iabr_match(struct pt_regs *regs) { return 0; }
+static inline int debugger_dabr_match(struct pt_regs *regs) { return 0; }
+static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
+#endif
+
+extern int set_dabr(unsigned long dabr);
+extern void print_backtrace(unsigned long *);
+extern void show_regs(struct pt_regs * regs);
+extern void flush_instruction_cache(void);
+extern void hard_reset_now(void);
+extern void poweroff_now(void);
+
+#ifdef CONFIG_6xx
+extern long _get_L2CR(void);
+extern long _get_L3CR(void);
+extern void _set_L2CR(unsigned long);
+extern void _set_L3CR(unsigned long);
+#else
+#define _get_L2CR()	0L
+#define _get_L3CR()	0L
+#define _set_L2CR(val)	do { } while(0)
+#define _set_L3CR(val)	do { } while(0)
+#endif
+
+extern void via_cuda_init(void);
+extern void pmac_nvram_init(void);
+extern void read_rtc_time(void);
+extern void pmac_find_display(void);
+extern void giveup_fpu(struct task_struct *);
+extern void enable_kernel_fp(void);
+extern void flush_fp_to_thread(struct task_struct *);
+extern void enable_kernel_altivec(void);
+extern void giveup_altivec(struct task_struct *);
+extern void load_up_altivec(struct task_struct *);
+extern void giveup_spe(struct task_struct *);
+extern void load_up_spe(struct task_struct *);
+extern int fix_alignment(struct pt_regs *);
+extern void cvt_fd(float *from, double *to, unsigned long *fpscr);
+extern void cvt_df(double *from, float *to, unsigned long *fpscr);
+
+#ifdef CONFIG_ALTIVEC
+extern void flush_altivec_to_thread(struct task_struct *);
+#else
+static inline void flush_altivec_to_thread(struct task_struct *t)
+{
+}
+#endif
+
+#ifdef CONFIG_SPE
+extern void flush_spe_to_thread(struct task_struct *);
+#else
+static inline void flush_spe_to_thread(struct task_struct *t)
+{
+}
+#endif
+
+extern int call_rtas(const char *, int, int, unsigned long *, ...);
+extern void cacheable_memzero(void *p, unsigned int nb);
+extern void *cacheable_memcpy(void *, const void *, unsigned int);
+extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
+extern void bad_page_fault(struct pt_regs *, unsigned long, int);
+extern int die(const char *, struct pt_regs *, long);
+extern void _exception(int, struct pt_regs *, int, unsigned long);
+#ifdef CONFIG_BOOKE_WDT
+extern u32 booke_wdt_enabled;
+extern u32 booke_wdt_period;
+#endif /* CONFIG_BOOKE_WDT */
+
+/* EBCDIC -> ASCII conversion for [0-9A-Z] on iSeries */
+extern unsigned char e2a(unsigned char);
+
+struct device_node;
+extern void note_scsi_host(struct device_node *, void *);
+
+extern struct task_struct *__switch_to(struct task_struct *,
+	struct task_struct *);
+#define switch_to(prev, next, last)	((last) = __switch_to((prev), (next)))
+
+struct thread_struct;
+extern struct task_struct *_switch(struct thread_struct *prev,
+				   struct thread_struct *next);
+
+extern unsigned int rtas_data;
+
+/*
+ * Atomic exchange
+ *
+ * Changes the memory location '*ptr' to be val and returns
+ * the previous value stored there.
+ */
+static __inline__ unsigned long
+__xchg_u32(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+	EIEIO_ON_SMP
+"1:	lwarx	%0,0,%2 \n"
+	PPC405_ERR77(0,%2)
+"	stwcx.	%3,0,%2 \n\
+	bne-	1b"
+	ISYNC_ON_SMP
+	: "=&r" (prev), "=m" (*(volatile unsigned int *)p)
+	: "r" (p), "r" (val), "m" (*(volatile unsigned int *)p)
+	: "cc", "memory");
+
+	return prev;
+}
+
+#ifdef CONFIG_PPC64
+static __inline__ unsigned long
+__xchg_u64(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+	EIEIO_ON_SMP
+"1:	ldarx	%0,0,%2 \n"
+	PPC405_ERR77(0,%2)
+"	stdcx.	%3,0,%2 \n\
+	bne-	1b"
+	ISYNC_ON_SMP
+	: "=&r" (prev), "=m" (*(volatile unsigned long *)p)
+	: "r" (p), "r" (val), "m" (*(volatile unsigned long *)p)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid xchg().
+ */
+extern void __xchg_called_with_bad_pointer(void);
+
+static __inline__ unsigned long
+__xchg(volatile void *ptr, unsigned long x, unsigned int size)
+{
+	switch (size) {
+	case 4:
+		return __xchg_u32(ptr, x);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __xchg_u64(ptr, x);
+#endif
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+#define xchg(ptr,x)							     \
+  ({									     \
+     __typeof__(*(ptr)) _x_ = (x);					     \
+     (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
+  })
+
+#define tas(ptr) (xchg((ptr),1))
+
+/*
+ * Compare and exchange - if *p == old, set it to new,
+ * and return the old value of *p.
+ */
+#define __HAVE_ARCH_CMPXCHG	1
+
+static __inline__ unsigned long
+__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	EIEIO_ON_SMP
+"1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+	PPC405_ERR77(0,%2)
+"	stwcx.	%4,0,%2\n\
+	bne-	1b"
+	ISYNC_ON_SMP
+	"\n\
+2:"
+	: "=&r" (prev), "=m" (*p)
+	: "r" (p), "r" (old), "r" (new), "m" (*p)
+	: "cc", "memory");
+
+	return prev;
+}
+
+#ifdef CONFIG_PPC64
+static __inline__ unsigned long
+__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+	EIEIO_ON_SMP
+"1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
+	cmpd	0,%0,%3\n\
+	bne-	2f\n\
+	stdcx.	%4,0,%2\n\
+	bne-	1b"
+	ISYNC_ON_SMP
+	"\n\
+2:"
+	: "=&r" (prev), "=m" (*p)
+	: "r" (p), "r" (old), "r" (new), "m" (*p)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
+
+/* This function doesn't exist, so you'll get a linker error
+   if something tries to do an invalid cmpxchg().  */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static __inline__ unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
+	  unsigned int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32(ptr, old, new);
+#ifdef CONFIG_PPC64
+	case 8:
+		return __cmpxchg_u64(ptr, old, new);
+#endif
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr,o,n)						 \
+  ({									 \
+     __typeof__(*(ptr)) _o_ = (o);					 \
+     __typeof__(*(ptr)) _n_ = (n);					 \
+     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
+				    (unsigned long)_n_, sizeof(*(ptr))); \
+  })
+
+#ifdef CONFIG_PPC64
+/*
+ * We handle most unaligned accesses in hardware. On the other hand 
+ * unaligned DMA can be very expensive on some ppc64 IO chips (it does
+ * powers of 2 writes until it reaches sufficient alignment).
+ *
+ * Based on this we disable the IP header alignment in network drivers.
+ */
+#define NET_IP_ALIGN   0
+#endif
+
+#define arch_align_stack(x) (x)
+
+#endif /* __KERNEL__ */
+#endif /* __PPC_SYSTEM_H */
diff --git a/include/asm-ppc/futex.h b/include/asm-ppc/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-ppc/futex.h
+++ b/include/asm-ppc/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-ppc/hw_irq.h b/include/asm-ppc/hw_irq.h
deleted file mode 100644
index da0fa940adb..00000000000
--- a/include/asm-ppc/hw_irq.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- */
-#ifdef __KERNEL__
-#ifndef _PPC_HW_IRQ_H
-#define _PPC_HW_IRQ_H
-
-#include <asm/ptrace.h>
-#include <asm/reg.h>
-#include <asm/irq.h>
-
-extern void timer_interrupt(struct pt_regs *);
-
-#define irqs_disabled()	((mfmsr() & MSR_EE) == 0)
-
-static inline void local_irq_disable(void)
-{
-	unsigned long msr;
-	msr = mfmsr();
-	mtmsr(msr & ~MSR_EE);
-	__asm__ __volatile__("": : :"memory");
-}
-
-static inline void local_irq_enable(void)
-{
-	unsigned long msr;
-	__asm__ __volatile__("": : :"memory");
-	msr = mfmsr();
-	mtmsr(msr | MSR_EE);
-}
-
-static inline void local_irq_save_ptr(unsigned long *flags)
-{
-	unsigned long msr;
-	msr = mfmsr();
-	*flags = msr;
-	mtmsr(msr & ~MSR_EE);
-	__asm__ __volatile__("": : :"memory");
-}
-
-#define local_save_flags(flags)		((flags) = mfmsr())
-#define local_irq_save(flags)		local_irq_save_ptr(&flags)
-#define local_irq_restore(flags)	mtmsr(flags)
-
-extern void do_lost_interrupts(unsigned long);
-
-#define mask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->disable) irq_desc[irq].handler->disable(irq);})
-#define unmask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->enable) irq_desc[irq].handler->enable(irq);})
-#define ack_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->ack) irq_desc[irq].handler->ack(irq);})
-
-/* Should we handle this via lost interrupts and IPIs or should we don't care like
- * we do now ? --BenH.
- */
-struct hw_interrupt_type;
-static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
-
-
-#endif /* _PPC_HW_IRQ_H */
-#endif /* __KERNEL__ */
diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index 7eb7cf6360b..39caf067a31 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -8,6 +8,7 @@
 
 #include <asm/page.h>
 #include <asm/byteorder.h>
+#include <asm/synch.h>
 #include <asm/mmu.h>
 
 #define SIO_CONFIG_RA	0x398
@@ -440,16 +441,6 @@ extern inline void * phys_to_virt(unsigned long address)
 #define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
 #define page_to_bus(page)	(page_to_phys(page) + PCI_DRAM_OFFSET)
 
-/*
- * Enforce In-order Execution of I/O:
- * Acts as a barrier to ensure all previous I/O accesses have
- * completed before any further ones are issued.
- */
-extern inline void eieio(void)
-{
-	__asm__ __volatile__ ("eieio" : : : "memory");
-}
-
 /* Enforce in-order execution of data I/O.
  * No distinction between read/write on PPC; use eieio for all three.
  */
diff --git a/include/asm-ppc/irq.h b/include/asm-ppc/irq.h
index bd9674807f0..137ea0cf34d 100644
--- a/include/asm-ppc/irq.h
+++ b/include/asm-ppc/irq.h
@@ -24,6 +24,12 @@
  */
 #define ARCH_HAS_IRQ_PER_CPU
 
+#define get_irq_desc(irq) (&irq_desc[(irq)])
+
+/* Define a way to iterate across irqs. */
+#define for_each_irq(i) \
+	for ((i) = 0; (i) < NR_IRQS; ++(i))
+
 #if defined(CONFIG_40x)
 #include <asm/ibm4xx.h>
 
diff --git a/include/asm-ppc/macio.h b/include/asm-ppc/macio.h
index a481b772d15..b553dd4b139 100644
--- a/include/asm-ppc/macio.h
+++ b/include/asm-ppc/macio.h
@@ -1,7 +1,6 @@
 #ifndef __MACIO_ASIC_H__
 #define __MACIO_ASIC_H__
 
-#include <linux/mod_devicetable.h>
 #include <asm/of_device.h>
 
 extern struct bus_type macio_bus_type;
diff --git a/include/asm-ppc/of_device.h b/include/asm-ppc/of_device.h
index 4b264cfd399..575bce418f8 100644
--- a/include/asm-ppc/of_device.h
+++ b/include/asm-ppc/of_device.h
@@ -2,6 +2,7 @@
 #define __OF_DEVICE_H__
 
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 #include <asm/prom.h>
 
 /*
@@ -55,7 +56,9 @@ extern int of_register_driver(struct of_platform_driver *drv);
 extern void of_unregister_driver(struct of_platform_driver *drv);
 extern int of_device_register(struct of_device *ofdev);
 extern void of_device_unregister(struct of_device *ofdev);
-extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id);
+extern struct of_device *of_platform_device_create(struct device_node *np,
+						   const char *bus_id,
+						   struct device *parent);
 extern void of_release_dev(struct device *dev);
 
 #endif /* __OF_DEVICE_H__ */
diff --git a/include/asm-ppc/rwsem.h b/include/asm-ppc/rwsem.h
deleted file mode 100644
index 3e738f483c1..00000000000
--- a/include/asm-ppc/rwsem.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * include/asm-ppc/rwsem.h: R/W semaphores for PPC using the stuff
- * in lib/rwsem.c.  Adapted largely from include/asm-i386/rwsem.h
- * by Paul Mackerras <paulus@samba.org>.
- */
-
-#ifndef _PPC_RWSEM_H
-#define _PPC_RWSEM_H
-
-#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
-	/* XXX this should be able to be an atomic_t  -- paulus */
-	signed long		count;
-#define RWSEM_UNLOCKED_VALUE		0x00000000
-#define RWSEM_ACTIVE_BIAS		0x00000001
-#define RWSEM_ACTIVE_MASK		0x0000ffff
-#define RWSEM_WAITING_BIAS		(-0x00010000)
-#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-	spinlock_t		wait_lock;
-	struct list_head	wait_list;
-#if RWSEM_DEBUG
-	int			debug;
-#endif
-};
-
-/*
- * initialisation
- */
-#if RWSEM_DEBUG
-#define __RWSEM_DEBUG_INIT      , 0
-#else
-#define __RWSEM_DEBUG_INIT	/* */
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-	{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
-	  LIST_HEAD_INIT((name).wait_list) \
-	  __RWSEM_DEBUG_INIT }
-
-#define DECLARE_RWSEM(name)		\
-	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
-	sem->count = RWSEM_UNLOCKED_VALUE;
-	spin_lock_init(&sem->wait_lock);
-	INIT_LIST_HEAD(&sem->wait_list);
-#if RWSEM_DEBUG
-	sem->debug = 0;
-#endif
-}
-
-/*
- * lock for reading
- */
-static inline void __down_read(struct rw_semaphore *sem)
-{
-	if (atomic_inc_return((atomic_t *)(&sem->count)) > 0)
-		smp_wmb();
-	else
-		rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg(&sem->count, tmp,
-				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
-			smp_wmb();
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * lock for writing
- */
-static inline void __down_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS,
-				(atomic_t *)(&sem->count));
-	if (tmp == RWSEM_ACTIVE_WRITE_BIAS)
-		smp_wmb();
-	else
-		rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
-		      RWSEM_ACTIVE_WRITE_BIAS);
-	smp_wmb();
-	return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_dec_return((atomic_t *)(&sem->count));
-	if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)
-		rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
-	smp_wmb();
-	if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
-			      (atomic_t *)(&sem->count)) < 0)
-		rwsem_wake(sem);
-}
-
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
-{
-	atomic_add(delta, (atomic_t *)(&sem->count));
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
-	int tmp;
-
-	smp_wmb();
-	tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count));
-	if (tmp < 0)
-		rwsem_downgrade_wake(sem);
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
-{
-	smp_mb();
-	return atomic_add_return(delta, (atomic_t *)(&sem->count));
-}
-
-#endif /* __KERNEL__ */
-#endif /* _PPC_RWSEM_XADD_H */
diff --git a/include/asm-ppc/seccomp.h b/include/asm-ppc/seccomp.h
deleted file mode 100644
index 666c4da96d8..00000000000
--- a/include/asm-ppc/seccomp.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_SECCOMP_H
-
-#include <linux/unistd.h>
-
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_rt_sigreturn
-
-#endif /* _ASM_SECCOMP_H */
diff --git a/include/asm-ppc/semaphore.h b/include/asm-ppc/semaphore.h
deleted file mode 100644
index 89e6e73be08..00000000000
--- a/include/asm-ppc/semaphore.h
+++ /dev/null
@@ -1,111 +0,0 @@
-#ifndef _PPC_SEMAPHORE_H
-#define _PPC_SEMAPHORE_H
-
-/*
- * Swiped from asm-sparc/semaphore.h and modified
- * -- Cort (cort@cs.nmt.edu)
- *
- * Stole some rw spinlock-based semaphore stuff from asm-alpha/semaphore.h
- * -- Ani Joshi (ajoshi@unixbox.com)
- *
- * Remove spinlock-based RW semaphores; RW semaphore definitions are
- * now in rwsem.h and we use the generic lib/rwsem.c implementation.
- * Rework semaphores to use atomic_dec_if_positive.
- * -- Paul Mackerras (paulus@samba.org)
- */
-
-#ifdef __KERNEL__
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <linux/wait.h>
-#include <linux/rwsem.h>
-
-struct semaphore {
-	/*
-	 * Note that any negative value of count is equivalent to 0,
-	 * but additionally indicates that some process(es) might be
-	 * sleeping on `wait'.
-	 */
-	atomic_t count;
-	wait_queue_head_t wait;
-};
-
-#define __SEMAPHORE_INITIALIZER(name, n)				\
-{									\
-	.count		= ATOMIC_INIT(n),				\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
-}
-
-#define __MUTEX_INITIALIZER(name) \
-	__SEMAPHORE_INITIALIZER(name, 1)
-
-#define __DECLARE_SEMAPHORE_GENERIC(name, count) \
-	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
-
-#define DECLARE_MUTEX(name)		__DECLARE_SEMAPHORE_GENERIC(name, 1)
-#define DECLARE_MUTEX_LOCKED(name)	__DECLARE_SEMAPHORE_GENERIC(name, 0)
-
-static inline void sema_init (struct semaphore *sem, int val)
-{
-	atomic_set(&sem->count, val);
-	init_waitqueue_head(&sem->wait);
-}
-
-static inline void init_MUTEX (struct semaphore *sem)
-{
-	sema_init(sem, 1);
-}
-
-static inline void init_MUTEX_LOCKED (struct semaphore *sem)
-{
-	sema_init(sem, 0);
-}
-
-extern void __down(struct semaphore * sem);
-extern int  __down_interruptible(struct semaphore * sem);
-extern void __up(struct semaphore * sem);
-
-extern inline void down(struct semaphore * sem)
-{
-	might_sleep();
-
-	/*
-	 * Try to get the semaphore, take the slow path if we fail.
-	 */
-	if (atomic_dec_return(&sem->count) < 0)
-		__down(sem);
-	smp_wmb();
-}
-
-extern inline int down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-
-	might_sleep();
-
-	if (atomic_dec_return(&sem->count) < 0)
-		ret = __down_interruptible(sem);
-	smp_wmb();
-	return ret;
-}
-
-extern inline int down_trylock(struct semaphore * sem)
-{
-	int ret;
-
-	ret = atomic_dec_if_positive(&sem->count) < 0;
-	smp_wmb();
-	return ret;
-}
-
-extern inline void up(struct semaphore * sem)
-{
-	smp_wmb();
-	if (atomic_inc_return(&sem->count) <= 0)
-		__up(sem);
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* !(_PPC_SEMAPHORE_H) */
diff --git a/include/asm-ppc/smp.h b/include/asm-ppc/smp.h
index 829481c0a9d..79c1be3dfe6 100644
--- a/include/asm-ppc/smp.h
+++ b/include/asm-ppc/smp.h
@@ -45,30 +45,21 @@ extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
 extern void cpu_die(void) __attribute__((noreturn));
 
-#define NO_PROC_ID		0xFF            /* No processor magic marker */
-#define PROC_CHANGE_PENALTY	20
-
 #define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern int __cpu_up(unsigned int cpu);
 
 extern int smp_hw_index[];
-#define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
-
-struct klock_info_struct {
-	unsigned long kernel_flag;
-	unsigned char akp;
-};
-
-extern struct klock_info_struct klock_info;
-#define KLOCK_HELD       0xffffffff
-#define KLOCK_CLEAR      0x0
+#define hard_smp_processor_id() 	(smp_hw_index[smp_processor_id()])
+#define get_hard_smp_processor_id(cpu)	(smp_hw_index[(cpu)])
 
 #endif /* __ASSEMBLY__ */
 
 #else /* !(CONFIG_SMP) */
 
 static inline void cpu_die(void) { }
+#define get_hard_smp_processor_id(cpu) 0
+#define hard_smp_processor_id() 0
 
 #endif /* !(CONFIG_SMP) */
 
diff --git a/include/asm-ppc/spinlock.h b/include/asm-ppc/spinlock.h
index 20edcf2a6e0..5c64b75f029 100644
--- a/include/asm-ppc/spinlock.h
+++ b/include/asm-ppc/spinlock.h
@@ -9,7 +9,7 @@
  * (the type definitions are in asm/raw_spinlock_types.h)
  */
 
-#define __raw_spin_is_locked(x)		((x)->lock != 0)
+#define __raw_spin_is_locked(x)		((x)->slock != 0)
 #define __raw_spin_unlock_wait(lock) \
 	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
@@ -31,17 +31,17 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock)
 	bne-	2b\n\
 	isync"
 	: "=&r"(tmp)
-	: "r"(&lock->lock), "r"(1)
+	: "r"(&lock->slock), "r"(1)
 	: "cr0", "memory");
 }
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
 	__asm__ __volatile__("eieio	# __raw_spin_unlock": : :"memory");
-	lock->lock = 0;
+	lock->slock = 0;
 }
 
-#define __raw_spin_trylock(l) (!test_and_set_bit(0,&(l)->lock))
+#define __raw_spin_trylock(l) (!test_and_set_bit(0,(volatile unsigned long *)(&(l)->slock)))
 
 /*
  * Read-write spinlocks, allowing multiple readers
diff --git a/include/asm-ppc64/atomic.h b/include/asm-ppc64/atomic.h
deleted file mode 100644
index 0e5f25e83bc..00000000000
--- a/include/asm-ppc64/atomic.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * PowerPC64 atomic operations
- *
- * Copyright (C) 2001 Paul Mackerras <paulus@au.ibm.com>, IBM
- * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_PPC64_ATOMIC_H_ 
-#define _ASM_PPC64_ATOMIC_H_
-
-#include <asm/memory.h>
-
-typedef struct { volatile int counter; } atomic_t;
-
-#define ATOMIC_INIT(i)	{ (i) }
-
-#define atomic_read(v)		((v)->counter)
-#define atomic_set(v,i)		(((v)->counter) = (i))
-
-static __inline__ void atomic_add(int a, atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%3		# atomic_add\n\
-	add	%0,%2,%0\n\
-	stwcx.	%0,0,%3\n\
-	bne-	1b"
-	: "=&r" (t), "=m" (v->counter)
-	: "r" (a), "r" (&v->counter), "m" (v->counter)
-	: "cc");
-}
-
-static __inline__ int atomic_add_return(int a, atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	lwarx	%0,0,%2		# atomic_add_return\n\
-	add	%0,%1,%0\n\
-	stwcx.	%0,0,%2\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (t)
-	: "r" (a), "r" (&v->counter)
-	: "cc", "memory");
-
-	return t;
-}
-
-#define atomic_add_negative(a, v)	(atomic_add_return((a), (v)) < 0)
-
-static __inline__ void atomic_sub(int a, atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%3		# atomic_sub\n\
-	subf	%0,%2,%0\n\
-	stwcx.	%0,0,%3\n\
-	bne-	1b"
-	: "=&r" (t), "=m" (v->counter)
-	: "r" (a), "r" (&v->counter), "m" (v->counter)
-	: "cc");
-}
-
-static __inline__ int atomic_sub_return(int a, atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	lwarx	%0,0,%2		# atomic_sub_return\n\
-	subf	%0,%1,%0\n\
-	stwcx.	%0,0,%2\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (t)
-	: "r" (a), "r" (&v->counter)
-	: "cc", "memory");
-
-	return t;
-}
-
-static __inline__ void atomic_inc(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2		# atomic_inc\n\
-	addic	%0,%0,1\n\
-	stwcx.	%0,0,%2\n\
-	bne-	1b"
-	: "=&r" (t), "=m" (v->counter)
-	: "r" (&v->counter), "m" (v->counter)
-	: "cc");
-}
-
-static __inline__ int atomic_inc_return(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	lwarx	%0,0,%1		# atomic_inc_return\n\
-	addic	%0,%0,1\n\
-	stwcx.	%0,0,%1\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "memory");
-
-	return t;
-}
-
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
-
-static __inline__ void atomic_dec(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-"1:	lwarx	%0,0,%2		# atomic_dec\n\
-	addic	%0,%0,-1\n\
-	stwcx.	%0,0,%2\n\
-	bne-	1b"
-	: "=&r" (t), "=m" (v->counter)
-	: "r" (&v->counter), "m" (v->counter)
-	: "cc");
-}
-
-static __inline__ int atomic_dec_return(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	lwarx	%0,0,%1		# atomic_dec_return\n\
-	addic	%0,%0,-1\n\
-	stwcx.	%0,0,%1\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "memory");
-
-	return t;
-}
-
-#define atomic_sub_and_test(a, v)	(atomic_sub_return((a), (v)) == 0)
-#define atomic_dec_and_test(v)		(atomic_dec_return((v)) == 0)
-
-/*
- * Atomically test *v and decrement if it is greater than 0.
- * The function returns the old value of *v minus 1.
- */
-static __inline__ int atomic_dec_if_positive(atomic_t *v)
-{
-	int t;
-
-	__asm__ __volatile__(
-	EIEIO_ON_SMP
-"1:	lwarx	%0,0,%1		# atomic_dec_if_positive\n\
-	addic.	%0,%0,-1\n\
-	blt-	2f\n\
-	stwcx.	%0,0,%1\n\
-	bne-	1b"
-	ISYNC_ON_SMP
-	"\n\
-2:"	: "=&r" (t)
-	: "r" (&v->counter)
-	: "cc", "memory");
-
-	return t;
-}
-
-#define smp_mb__before_atomic_dec()     smp_mb()
-#define smp_mb__after_atomic_dec()      smp_mb()
-#define smp_mb__before_atomic_inc()     smp_mb()
-#define smp_mb__after_atomic_inc()      smp_mb()
-
-#endif /* _ASM_PPC64_ATOMIC_H_ */
diff --git a/include/asm-ppc64/bitops.h b/include/asm-ppc64/bitops.h
index a0f831224f9..dbfa42ef4a9 100644
--- a/include/asm-ppc64/bitops.h
+++ b/include/asm-ppc64/bitops.h
@@ -42,7 +42,7 @@
 
 #ifdef __KERNEL__
 
-#include <asm/memory.h>
+#include <asm/synch.h>
 
 /*
  * clear_bit doesn't imply a memory barrier
diff --git a/include/asm-ppc64/dma.h b/include/asm-ppc64/dma.h
deleted file mode 100644
index dfd1f69059b..00000000000
--- a/include/asm-ppc64/dma.h
+++ /dev/null
@@ -1,329 +0,0 @@
-/* 
- * linux/include/asm/dma.h: Defines for using and allocating dma channels.
- * Written by Hennus Bergman, 1992.
- * High DMA channel support & info by Hannu Savolainen
- * and John Boyd, Nov. 1992.
- * Changes for ppc sound by Christoph Nadig
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_DMA_H
-#define _ASM_DMA_H
-
-#include <linux/config.h>
-#include <asm/io.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-
-#ifndef MAX_DMA_CHANNELS
-#define MAX_DMA_CHANNELS	8
-#endif
-
-/* The maximum address that we can perform a DMA transfer to on this platform */
-/* Doesn't really apply... */
-#define MAX_DMA_ADDRESS  (~0UL)
-
-#if !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI)
-
-#define dma_outb	outb
-#define dma_inb		inb
-
-/*
- * NOTES about DMA transfers:
- *
- *  controller 1: channels 0-3, byte operations, ports 00-1F
- *  controller 2: channels 4-7, word operations, ports C0-DF
- *
- *  - ALL registers are 8 bits only, regardless of transfer size
- *  - channel 4 is not used - cascades 1 into 2.
- *  - channels 0-3 are byte - addresses/counts are for physical bytes
- *  - channels 5-7 are word - addresses/counts are for physical words
- *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
- *  - transfer count loaded to registers is 1 less than actual count
- *  - controller 2 offsets are all even (2x offsets for controller 1)
- *  - page registers for 5-7 don't use data bit 0, represent 128K pages
- *  - page registers for 0-3 use bit 0, represent 64K pages
- *
- * On PReP, DMA transfers are limited to the lower 16MB of _physical_ memory.  
- * On CHRP, the W83C553F (and VLSI Tollgate?) support full 32 bit addressing.
- * Note that addresses loaded into registers must be _physical_ addresses,
- * not logical addresses (which may differ if paging is active).
- *
- *  Address mapping for channels 0-3:
- *
- *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *    |  ...  |   |  ... |   |  ... |
- *   P7  ...  P0  A7 ... A0  A7 ... A0   
- * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
- *
- *  Address mapping for channels 5-7:
- *
- *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
- *    |  ...  |   \   \   ... \  \  \  ... \  \
- *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
- *    |  ...  |     \   \   ... \  \  \  ... \
- *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
- * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
- *
- * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
- * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
- * the hardware level, so odd-byte transfers aren't possible).
- *
- * Transfer count (_not # bytes_) is limited to 64K, represented as actual
- * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
- * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
- *
- */
-
-/* 8237 DMA controllers */
-#define IO_DMA1_BASE	0x00	/* 8 bit slave DMA, channels 0..3 */
-#define IO_DMA2_BASE	0xC0	/* 16 bit master DMA, ch 4(=slave input)..7 */
-
-/* DMA controller registers */
-#define DMA1_CMD_REG		0x08	/* command register (w) */
-#define DMA1_STAT_REG		0x08	/* status register (r) */
-#define DMA1_REQ_REG            0x09    /* request register (w) */
-#define DMA1_MASK_REG		0x0A	/* single-channel mask (w) */
-#define DMA1_MODE_REG		0x0B	/* mode register (w) */
-#define DMA1_CLEAR_FF_REG	0x0C	/* clear pointer flip-flop (w) */
-#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
-#define DMA1_RESET_REG		0x0D	/* Master Clear (w) */
-#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
-#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
-
-#define DMA2_CMD_REG		0xD0	/* command register (w) */
-#define DMA2_STAT_REG		0xD0	/* status register (r) */
-#define DMA2_REQ_REG            0xD2    /* request register (w) */
-#define DMA2_MASK_REG		0xD4	/* single-channel mask (w) */
-#define DMA2_MODE_REG		0xD6	/* mode register (w) */
-#define DMA2_CLEAR_FF_REG	0xD8	/* clear pointer flip-flop (w) */
-#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
-#define DMA2_RESET_REG		0xDA	/* Master Clear (w) */
-#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
-#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
-
-#define DMA_ADDR_0              0x00    /* DMA address registers */
-#define DMA_ADDR_1              0x02
-#define DMA_ADDR_2              0x04
-#define DMA_ADDR_3              0x06
-#define DMA_ADDR_4              0xC0
-#define DMA_ADDR_5              0xC4
-#define DMA_ADDR_6              0xC8
-#define DMA_ADDR_7              0xCC
-
-#define DMA_CNT_0               0x01    /* DMA count registers */
-#define DMA_CNT_1               0x03
-#define DMA_CNT_2               0x05
-#define DMA_CNT_3               0x07
-#define DMA_CNT_4               0xC2
-#define DMA_CNT_5               0xC6
-#define DMA_CNT_6               0xCA
-#define DMA_CNT_7               0xCE
-
-#define DMA_LO_PAGE_0              0x87    /* DMA page registers */
-#define DMA_LO_PAGE_1              0x83
-#define DMA_LO_PAGE_2              0x81
-#define DMA_LO_PAGE_3              0x82
-#define DMA_LO_PAGE_5              0x8B
-#define DMA_LO_PAGE_6              0x89
-#define DMA_LO_PAGE_7              0x8A
-
-#define DMA_HI_PAGE_0              0x487    /* DMA page registers */
-#define DMA_HI_PAGE_1              0x483
-#define DMA_HI_PAGE_2              0x481
-#define DMA_HI_PAGE_3              0x482
-#define DMA_HI_PAGE_5              0x48B
-#define DMA_HI_PAGE_6              0x489
-#define DMA_HI_PAGE_7              0x48A
-
-#define DMA1_EXT_REG               0x40B
-#define DMA2_EXT_REG               0x4D6
-
-#define DMA_MODE_READ	0x44	/* I/O to memory, no autoinit, increment, single mode */
-#define DMA_MODE_WRITE	0x48	/* memory to I/O, no autoinit, increment, single mode */
-#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
-
-#define DMA_AUTOINIT   	 0x10
-
-extern spinlock_t  dma_spin_lock;
-
-static __inline__ unsigned long claim_dma_lock(void)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&dma_spin_lock, flags);
-	return flags;
-}
-
-static __inline__ void release_dma_lock(unsigned long flags)
-{
-	spin_unlock_irqrestore(&dma_spin_lock, flags);
-}
-
-/* enable/disable a specific DMA channel */
-static __inline__ void enable_dma(unsigned int dmanr)
-{
-	unsigned char ucDmaCmd=0x00;
-
-	if (dmanr != 4)
-	{
-		dma_outb(0, DMA2_MASK_REG);  /* This may not be enabled */
-		dma_outb(ucDmaCmd, DMA2_CMD_REG);  /* Enable group */
-	}
-	if (dmanr<=3)
-	{
-		dma_outb(dmanr,  DMA1_MASK_REG);
-		dma_outb(ucDmaCmd, DMA1_CMD_REG);  /* Enable group */
-	} else
-	{
-		dma_outb(dmanr & 3,  DMA2_MASK_REG);
-	}
-}
-
-static __inline__ void disable_dma(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(dmanr | 4,  DMA1_MASK_REG);
-	else
-		dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
-}
-
-/* Clear the 'DMA Pointer Flip Flop'.
- * Write 0 for LSB/MSB, 1 for MSB/LSB access.
- * Use this once to initialize the FF to a known state.
- * After that, keep track of it. :-)
- * --- In order to do that, the DMA routines below should ---
- * --- only be used while interrupts are disabled! ---
- */
-static __inline__ void clear_dma_ff(unsigned int dmanr)
-{
-	if (dmanr<=3)
-		dma_outb(0,  DMA1_CLEAR_FF_REG);
-	else
-		dma_outb(0,  DMA2_CLEAR_FF_REG);
-}
-
-/* set mode (above) for a specific DMA channel */
-static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
-{
-	if (dmanr<=3)
-		dma_outb(mode | dmanr,  DMA1_MODE_REG);
-	else
-		dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
-}
-
-/* Set only the page register bits of the transfer address.
- * This is used for successive transfers when we know the contents of
- * the lower 16 bits of the DMA current address register, but a 64k boundary
- * may have been crossed.
- */
-static __inline__ void set_dma_page(unsigned int dmanr, int pagenr)
-{
-	switch(dmanr) {
-		case 0:
-			dma_outb(pagenr, DMA_LO_PAGE_0);
-                        dma_outb(pagenr>>8, DMA_HI_PAGE_0);
-			break;
-		case 1:
-			dma_outb(pagenr, DMA_LO_PAGE_1);
-                        dma_outb(pagenr>>8, DMA_HI_PAGE_1);
-			break;
-		case 2:
-			dma_outb(pagenr, DMA_LO_PAGE_2);
-			dma_outb(pagenr>>8, DMA_HI_PAGE_2); 
-			break;
-		case 3:
-			dma_outb(pagenr, DMA_LO_PAGE_3);
-			dma_outb(pagenr>>8, DMA_HI_PAGE_3); 
-			break;
-	        case 5:
-		        dma_outb(pagenr & 0xfe, DMA_LO_PAGE_5);
-                        dma_outb(pagenr>>8, DMA_HI_PAGE_5);
-			break;
-		case 6:
-		        dma_outb(pagenr & 0xfe, DMA_LO_PAGE_6);
-			dma_outb(pagenr>>8, DMA_HI_PAGE_6);
-			break;
-		case 7:
-		        dma_outb(pagenr & 0xfe, DMA_LO_PAGE_7);
-			dma_outb(pagenr>>8, DMA_HI_PAGE_7);
-		  break;
-	}
-}
-
-
-/* Set transfer address & page bits for specific DMA channel.
- * Assumes dma flipflop is clear.
- */
-static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int phys)
-{
-	if (dmanr <= 3)  {
-	    dma_outb( phys & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-            dma_outb( (phys>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
-	}  else  {
-	    dma_outb( (phys>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	    dma_outb( (phys>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
-	}
-	set_dma_page(dmanr, phys>>16);
-}
-
-
-/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
- * a specific DMA channel.
- * You must ensure the parameters are valid.
- * NOTE: from a manual: "the number of transfers is one more
- * than the initial word count"! This is taken into account.
- * Assumes dma flip-flop is clear.
- * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
- */
-static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
-{
-        count--;
-	if (dmanr <= 3)  {
-	    dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-	    dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
-        } else {
-	    dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-	    dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
-        }
-}
-
-
-/* Get DMA residue count. After a DMA transfer, this
- * should return zero. Reading this while a DMA transfer is
- * still in progress will return unpredictable results.
- * If called before the channel has been used, it may return 1.
- * Otherwise, it returns the number of _bytes_ left to transfer.
- *
- * Assumes DMA flip-flop is clear.
- */
-static __inline__ int get_dma_residue(unsigned int dmanr)
-{
-	unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
-					 : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
-
-	/* using short to get 16-bit wrap around */
-	unsigned short count;
-
-	count = 1 + dma_inb(io_port);
-	count += dma_inb(io_port) << 8;
-	
-	return (dmanr <= 3)? count : (count<<1);
-}
-
-/* These are in kernel/dma.c: */
-extern int request_dma(unsigned int dmanr, const char * device_id);	/* reserve a DMA channel */
-extern void free_dma(unsigned int dmanr);	/* release it again */
-
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;                                        
-#else                                                         
-#define isa_dma_bridge_buggy   (0)
-#endif
-#endif	/* !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI) */
-#endif /* _ASM_DMA_H */
diff --git a/include/asm-ppc64/futex.h b/include/asm-ppc64/futex.h
index cb2640b3a40..266b460de44 100644
--- a/include/asm-ppc64/futex.h
+++ b/include/asm-ppc64/futex.h
@@ -5,7 +5,7 @@
 
 #include <linux/futex.h>
 #include <asm/errno.h>
-#include <asm/memory.h>
+#include <asm/synch.h>
 #include <asm/uaccess.h>
 
 #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
diff --git a/include/asm-ppc64/io.h b/include/asm-ppc64/io.h
index 59c958aea4d..bd7c9532d77 100644
--- a/include/asm-ppc64/io.h
+++ b/include/asm-ppc64/io.h
@@ -15,7 +15,7 @@
 #ifdef CONFIG_PPC_ISERIES 
 #include <asm/iSeries/iSeries_io.h>
 #endif  
-#include <asm/memory.h>
+#include <asm/synch.h>
 #include <asm/delay.h>
 
 #include <asm-generic/iomap.h>
diff --git a/include/asm-ppc64/memory.h b/include/asm-ppc64/memory.h
deleted file mode 100644
index af53ffb5572..00000000000
--- a/include/asm-ppc64/memory.h
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef _ASM_PPC64_MEMORY_H_ 
-#define _ASM_PPC64_MEMORY_H_ 
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/config.h>
-
-/*
- * Arguably the bitops and *xchg operations don't imply any memory barrier
- * or SMP ordering, but in fact a lot of drivers expect them to imply
- * both, since they do on x86 cpus.
- */
-#ifdef CONFIG_SMP
-#define EIEIO_ON_SMP	"eieio\n"
-#define ISYNC_ON_SMP	"\n\tisync"
-#define SYNC_ON_SMP	"lwsync\n\t"
-#else
-#define EIEIO_ON_SMP
-#define ISYNC_ON_SMP
-#define SYNC_ON_SMP
-#endif
-
-static inline void eieio(void)
-{
-	__asm__ __volatile__ ("eieio" : : : "memory");
-}
-
-static inline void isync(void)
-{
-	__asm__ __volatile__ ("isync" : : : "memory");
-}
-
-#ifdef CONFIG_SMP
-#define eieio_on_smp()	eieio()
-#define isync_on_smp()	isync()
-#else
-#define eieio_on_smp()	__asm__ __volatile__("": : :"memory")
-#define isync_on_smp()	__asm__ __volatile__("": : :"memory")
-#endif
-
-/* Macros for adjusting thread priority (hardware multi-threading) */
-#define HMT_very_low()    asm volatile("or 31,31,31   # very low priority")
-#define HMT_low()	asm volatile("or 1,1,1		# low priority")
-#define HMT_medium_low()  asm volatile("or 6,6,6      # medium low priority")
-#define HMT_medium()	asm volatile("or 2,2,2		# medium priority")
-#define HMT_medium_high() asm volatile("or 5,5,5      # medium high priority")
-#define HMT_high()	asm volatile("or 3,3,3		# high priority")
-
-#define HMT_VERY_LOW    "\tor   31,31,31        # very low priority\n"
-#define HMT_LOW		"\tor	1,1,1		# low priority\n"
-#define HMT_MEDIUM_LOW  "\tor   6,6,6           # medium low priority\n"
-#define HMT_MEDIUM	"\tor	2,2,2		# medium priority\n"
-#define HMT_MEDIUM_HIGH "\tor   5,5,5           # medium high priority\n"
-#define HMT_HIGH	"\tor	3,3,3		# high priority\n"
-
-#endif
diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h
index fe5cd2f5868..6447fbee7d6 100644
--- a/include/asm-ppc64/processor.h
+++ b/include/asm-ppc64/processor.h
@@ -369,6 +369,14 @@ GLUE(.,name):
 #define mfasr()		({unsigned long rval; \
 			asm volatile("mfasr %0" : "=r" (rval)); rval;})
 
+/* Macros for adjusting thread priority (hardware multi-threading) */
+#define HMT_very_low()    asm volatile("or 31,31,31   # very low priority")
+#define HMT_low()	asm volatile("or 1,1,1		# low priority")
+#define HMT_medium_low()  asm volatile("or 6,6,6      # medium low priority")
+#define HMT_medium()	asm volatile("or 2,2,2		# medium priority")
+#define HMT_medium_high() asm volatile("or 5,5,5      # medium high priority")
+#define HMT_high()	asm volatile("or 3,3,3		# high priority")
+
 static inline void set_tb(unsigned int upper, unsigned int lower)
 {
 	mttbl(0);
diff --git a/include/asm-ppc64/smu.h b/include/asm-ppc64/smu.h
index 10b4397af9a..dee8eefe47b 100644
--- a/include/asm-ppc64/smu.h
+++ b/include/asm-ppc64/smu.h
@@ -1,22 +1,379 @@
+#ifndef _SMU_H
+#define _SMU_H
+
 /*
  * Definitions for talking to the SMU chip in newer G5 PowerMacs
  */
 
 #include <linux/config.h>
+#include <linux/list.h>
+
+/*
+ * Known SMU commands
+ *
+ * Most of what is below comes from looking at the Open Firmware driver,
+ * though this is still incomplete and could use better documentation here
+ * or there...
+ */
+
+
+/*
+ * Partition info commands
+ *
+ * I do not know what those are for at this point
+ */
+#define SMU_CMD_PARTITION_COMMAND		0x3e
+
+
+/*
+ * Fan control
+ *
+ * This is a "mux" for fan control commands, first byte is the
+ * "sub" command.
+ */
+#define SMU_CMD_FAN_COMMAND			0x4a
+
+
+/*
+ * Battery access
+ *
+ * Same command number as the PMU, could it be same syntax ?
+ */
+#define SMU_CMD_BATTERY_COMMAND			0x6f
+#define   SMU_CMD_GET_BATTERY_INFO		0x00
+
+/*
+ * Real time clock control
+ *
+ * This is a "mux", first data byte contains the "sub" command.
+ * The "RTC" part of the SMU controls the date, time, powerup
+ * timer, but also a PRAM
+ *
+ * Dates are in BCD format on 7 bytes:
+ * [sec] [min] [hour] [weekday] [month day] [month] [year]
+ * with month being 1 based and year minus 100
+ */
+#define SMU_CMD_RTC_COMMAND			0x8e
+#define   SMU_CMD_RTC_SET_PWRUP_TIMER		0x00 /* i: 7 bytes date */
+#define   SMU_CMD_RTC_GET_PWRUP_TIMER		0x01 /* o: 7 bytes date */
+#define   SMU_CMD_RTC_STOP_PWRUP_TIMER		0x02
+#define   SMU_CMD_RTC_SET_PRAM_BYTE_ACC		0x20 /* i: 1 byte (address?) */
+#define   SMU_CMD_RTC_SET_PRAM_AUTOINC		0x21 /* i: 1 byte (data?) */
+#define   SMU_CMD_RTC_SET_PRAM_LO_BYTES 	0x22 /* i: 10 bytes */
+#define   SMU_CMD_RTC_SET_PRAM_HI_BYTES 	0x23 /* i: 10 bytes */
+#define   SMU_CMD_RTC_GET_PRAM_BYTE		0x28 /* i: 1 bytes (address?) */
+#define   SMU_CMD_RTC_GET_PRAM_LO_BYTES 	0x29 /* o: 10 bytes */
+#define   SMU_CMD_RTC_GET_PRAM_HI_BYTES 	0x2a /* o: 10 bytes */
+#define	  SMU_CMD_RTC_SET_DATETIME		0x80 /* i: 7 bytes date */
+#define   SMU_CMD_RTC_GET_DATETIME		0x81 /* o: 7 bytes date */
+
+ /*
+  * i2c commands
+  *
+  * To issue an i2c command, first is to send a parameter block to the
+  * the SMU. This is a command of type 0x9a with 9 bytes of header
+  * eventually followed by data for a write:
+  *
+  * 0: bus number (from device-tree usually, SMU has lots of busses !)
+  * 1: transfer type/format (see below)
+  * 2: device address. For combined and combined4 type transfers, this
+  *    is the "write" version of the address (bit 0x01 cleared)
+  * 3: subaddress length (0..3)
+  * 4: subaddress byte 0 (or only byte for subaddress length 1)
+  * 5: subaddress byte 1
+  * 6: subaddress byte 2
+  * 7: combined address (device address for combined mode data phase)
+  * 8: data length
+  *
+  * The transfer types are the same good old Apple ones it seems,
+  * that is:
+  *   - 0x00: Simple transfer
+  *   - 0x01: Subaddress transfer (addr write + data tx, no restart)
+  *   - 0x02: Combined transfer (addr write + restart + data tx)
+  *
+  * This is then followed by actual data for a write.
+  *
+  * At this point, the OF driver seems to have a limitation on transfer
+  * sizes of 0xd bytes on reads and 0x5 bytes on writes. I do not know
+  * wether this is just an OF limit due to some temporary buffer size
+  * or if this is an SMU imposed limit. This driver has the same limitation
+  * for now as I use a 0x10 bytes temporary buffer as well
+  *
+  * Once that is completed, a response is expected from the SMU. This is
+  * obtained via a command of type 0x9a with a length of 1 byte containing
+  * 0 as the data byte. OF also fills the rest of the data buffer with 0xff's
+  * though I can't tell yet if this is actually necessary. Once this command
+  * is complete, at this point, all I can tell is what OF does. OF tests
+  * byte 0 of the reply:
+  *   - on read, 0xfe or 0xfc : bus is busy, wait (see below) or nak ?
+  *   - on read, 0x00 or 0x01 : reply is in buffer (after the byte 0)
+  *   - on write, < 0 -> failure (immediate exit)
+  *   - else, OF just exists (without error, weird)
+  *
+  * So on read, there is this wait-for-busy thing when getting a 0xfc or
+  * 0xfe result. OF does a loop of up to 64 retries, waiting 20ms and
+  * doing the above again until either the retries expire or the result
+  * is no longer 0xfe or 0xfc
+  *
+  * The Darwin I2C driver is less subtle though. On any non-success status
+  * from the response command, it waits 5ms and tries again up to 20 times,
+  * it doesn't differenciate between fatal errors or "busy" status.
+  *
+  * This driver provides an asynchronous paramblock based i2c command
+  * interface to be used either directly by low level code or by a higher
+  * level driver interfacing to the linux i2c layer. The current
+  * implementation of this relies on working timers & timer interrupts
+  * though, so be careful of calling context for now. This may be "fixed"
+  * in the future by adding a polling facility.
+  */
+#define SMU_CMD_I2C_COMMAND			0x9a
+          /* transfer types */
+#define   SMU_I2C_TRANSFER_SIMPLE	0x00
+#define   SMU_I2C_TRANSFER_STDSUB	0x01
+#define   SMU_I2C_TRANSFER_COMBINED	0x02
+
+/*
+ * Power supply control
+ *
+ * The "sub" command is an ASCII string in the data, the
+ * data lenght is that of the string.
+ *
+ * The VSLEW command can be used to get or set the voltage slewing.
+ *  - lenght 5 (only "VSLEW") : it returns "DONE" and 3 bytes of
+ *    reply at data offset 6, 7 and 8.
+ *  - lenght 8 ("VSLEWxyz") has 3 additional bytes appended, and is
+ *    used to set the voltage slewing point. The SMU replies with "DONE"
+ * I yet have to figure out their exact meaning of those 3 bytes in
+ * both cases.
+ *
+ */
+#define SMU_CMD_POWER_COMMAND			0xaa
+#define   SMU_CMD_POWER_RESTART		       	"RESTART"
+#define   SMU_CMD_POWER_SHUTDOWN		"SHUTDOWN"
+#define   SMU_CMD_POWER_VOLTAGE_SLEW		"VSLEW"
+
+/* Misc commands
+ *
+ * This command seem to be a grab bag of various things
+ */
+#define SMU_CMD_MISC_df_COMMAND			0xdf
+#define   SMU_CMD_MISC_df_SET_DISPLAY_LIT	0x02 /* i: 1 byte */
+#define   SMU_CMD_MISC_df_NMI_OPTION		0x04
+
+/*
+ * Version info commands
+ *
+ * I haven't quite tried to figure out how these work
+ */
+#define SMU_CMD_VERSION_COMMAND			0xea
+
+
+/*
+ * Misc commands
+ *
+ * This command seem to be a grab bag of various things
+ */
+#define SMU_CMD_MISC_ee_COMMAND			0xee
+#define   SMU_CMD_MISC_ee_GET_DATABLOCK_REC	0x02
+#define	  SMU_CMD_MISC_ee_LEDS_CTRL		0x04 /* i: 00 (00,01) [00] */
+#define   SMU_CMD_MISC_ee_GET_DATA		0x05 /* i: 00 , o: ?? */
+
+
+
+/*
+ * - Kernel side interface -
+ */
+
+#ifdef __KERNEL__
+
+/*
+ * Asynchronous SMU commands
+ *
+ * Fill up this structure and submit it via smu_queue_command(),
+ * and get notified by the optional done() callback, or because
+ * status becomes != 1
+ */
+
+struct smu_cmd;
+
+struct smu_cmd
+{
+	/* public */
+	u8			cmd;		/* command */
+	int			data_len;	/* data len */
+	int			reply_len;	/* reply len */
+	void			*data_buf;	/* data buffer */
+	void			*reply_buf;	/* reply buffer */
+	int			status;		/* command status */
+	void			(*done)(struct smu_cmd *cmd, void *misc);
+	void			*misc;
+
+	/* private */
+	struct list_head	link;
+};
+
+/*
+ * Queues an SMU command, all fields have to be initialized
+ */
+extern int smu_queue_cmd(struct smu_cmd *cmd);
+
+/*
+ * Simple command wrapper. This structure embeds a small buffer
+ * to ease sending simple SMU commands from the stack
+ */
+struct smu_simple_cmd
+{
+	struct smu_cmd	cmd;
+	u8	       	buffer[16];
+};
+
+/*
+ * Queues a simple command. All fields will be initialized by that
+ * function
+ */
+extern int smu_queue_simple(struct smu_simple_cmd *scmd, u8 command,
+			    unsigned int data_len,
+			    void (*done)(struct smu_cmd *cmd, void *misc),
+			    void *misc,
+			    ...);
+
+/*
+ * Completion helper. Pass it to smu_queue_simple or as 'done'
+ * member to smu_queue_cmd, it will call complete() on the struct
+ * completion passed in the "misc" argument
+ */
+extern void smu_done_complete(struct smu_cmd *cmd, void *misc);
 
 /*
- * Basic routines for use by architecture. To be extended as
- * we understand more of the chip
+ * Synchronous helpers. Will spin-wait for completion of a command
+ */
+extern void smu_spinwait_cmd(struct smu_cmd *cmd);
+
+static inline void smu_spinwait_simple(struct smu_simple_cmd *scmd)
+{
+	smu_spinwait_cmd(&scmd->cmd);
+}
+
+/*
+ * Poll routine to call if blocked with irqs off
+ */
+extern void smu_poll(void);
+
+
+/*
+ * Init routine, presence check....
  */
 extern int smu_init(void);
 extern int smu_present(void);
+struct of_device;
+extern struct of_device *smu_get_ofdev(void);
+
+
+/*
+ * Common command wrappers
+ */
 extern void smu_shutdown(void);
 extern void smu_restart(void);
-extern int smu_get_rtc_time(struct rtc_time *time);
-extern int smu_set_rtc_time(struct rtc_time *time);
+struct rtc_time;
+extern int smu_get_rtc_time(struct rtc_time *time, int spinwait);
+extern int smu_set_rtc_time(struct rtc_time *time, int spinwait);
 
 /*
  * SMU command buffer absolute address, exported by pmac_setup,
  * this is allocated very early during boot.
  */
 extern unsigned long smu_cmdbuf_abs;
+
+
+/*
+ * Kenrel asynchronous i2c interface
+ */
+
+/* SMU i2c header, exactly matches i2c header on wire */
+struct smu_i2c_param
+{
+	u8	bus;		/* SMU bus ID (from device tree) */
+	u8	type;		/* i2c transfer type */
+	u8	devaddr;	/* device address (includes direction) */
+	u8	sublen;		/* subaddress length */
+	u8	subaddr[3];	/* subaddress */
+	u8	caddr;		/* combined address, filled by SMU driver */
+	u8	datalen;	/* length of transfer */
+	u8	data[7];	/* data */
+};
+
+#define SMU_I2C_READ_MAX	0x0d
+#define SMU_I2C_WRITE_MAX	0x05
+
+struct smu_i2c_cmd
+{
+	/* public */
+	struct smu_i2c_param	info;
+	void			(*done)(struct smu_i2c_cmd *cmd, void *misc);
+	void			*misc;
+	int			status; /* 1 = pending, 0 = ok, <0 = fail */
+
+	/* private */
+	struct smu_cmd		scmd;
+	int			read;
+	int			stage;
+	int			retries;
+	u8			pdata[0x10];
+	struct list_head	link;
+};
+
+/*
+ * Call this to queue an i2c command to the SMU. You must fill info,
+ * including info.data for a write, done and misc.
+ * For now, no polling interface is provided so you have to use completion
+ * callback.
+ */
+extern int smu_queue_i2c(struct smu_i2c_cmd *cmd);
+
+
+#endif /* __KERNEL__ */
+
+/*
+ * - Userland interface -
+ */
+
+/*
+ * A given instance of the device can be configured for 2 different
+ * things at the moment:
+ *
+ *  - sending SMU commands (default at open() time)
+ *  - receiving SMU events (not yet implemented)
+ *
+ * Commands are written with write() of a command block. They can be
+ * "driver" commands (for example to switch to event reception mode)
+ * or real SMU commands. They are made of a header followed by command
+ * data if any.
+ *
+ * For SMU commands (not for driver commands), you can then read() back
+ * a reply. The reader will be blocked or not depending on how the device
+ * file is opened. poll() isn't implemented yet. The reply will consist
+ * of a header as well, followed by the reply data if any. You should
+ * always provide a buffer large enough for the maximum reply data, I
+ * recommand one page.
+ *
+ * It is illegal to send SMU commands through a file descriptor configured
+ * for events reception
+ *
+ */
+struct smu_user_cmd_hdr
+{
+	__u32		cmdtype;
+#define SMU_CMDTYPE_SMU			0	/* SMU command */
+#define SMU_CMDTYPE_WANTS_EVENTS	1	/* switch fd to events mode */
+
+	__u8		cmd;			/* SMU command byte */
+	__u32		data_len;		/* Lenght of data following */
+};
+
+struct smu_user_reply_hdr
+{
+	__u32		status;			/* Command status */
+	__u32		reply_len;		/* Lenght of data follwing */
+};
+
+#endif /*  _SMU_H */
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
index 375015c62f2..1fbdc9f0590 100644
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -13,7 +13,7 @@
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/hw_irq.h>
-#include <asm/memory.h>
+#include <asm/synch.h>
 
 /*
  * Memory barrier.
@@ -48,7 +48,7 @@
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
 #define smp_rmb()	rmb()
-#define smp_wmb()	__asm__ __volatile__ ("eieio" : : : "memory")
+#define smp_wmb()	eieio()
 #define smp_read_barrier_depends()  read_barrier_depends()
 #else
 #define smp_mb()	__asm__ __volatile__("": : :"memory")
diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-s390/futex.h
+++ b/include/asm-s390/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sh/futex.h b/include/asm-sh/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-sh/futex.h
+++ b/include/asm-sh/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sh64/futex.h b/include/asm-sh64/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-sh64/futex.h
+++ b/include/asm-sh64/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sparc/futex.h b/include/asm-sparc/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-sparc/futex.h
+++ b/include/asm-sparc/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h
index 51b26e81d82..ededd2659ea 100644
--- a/include/asm-sparc64/cacheflush.h
+++ b/include/asm-sparc64/cacheflush.h
@@ -4,13 +4,6 @@
 #include <linux/config.h>
 #include <asm/page.h>
 
-/* Flushing for D-cache alias handling is only needed if
- * the page size is smaller than 16K.
- */
-#if PAGE_SHIFT < 14
-#define DCACHE_ALIASING_POSSIBLE
-#endif
-
 #ifndef __ASSEMBLY__
 
 #include <linux/mm.h>
diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-sparc64/ide.h b/include/asm-sparc64/ide.h
index 4c1098474c7..c393f815b0b 100644
--- a/include/asm-sparc64/ide.h
+++ b/include/asm-sparc64/ide.h
@@ -15,6 +15,7 @@
 #include <asm/io.h>
 #include <asm/spitfire.h>
 #include <asm/cacheflush.h>
+#include <asm/page.h>
 
 #ifndef MAX_HWIFS
 # ifdef CONFIG_BLK_DEV_IDEPCI
diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h
index c9f8ef208ea..7f8d764abc4 100644
--- a/include/asm-sparc64/page.h
+++ b/include/asm-sparc64/page.h
@@ -21,6 +21,13 @@
 #define PAGE_SIZE    (_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK    (~(PAGE_SIZE-1))
 
+/* Flushing for D-cache alias handling is only needed if
+ * the page size is smaller than 16K.
+ */
+#if PAGE_SHIFT < 14
+#define DCACHE_ALIASING_POSSIBLE
+#endif
+
 #ifdef __KERNEL__
 
 #ifndef __ASSEMBLY__
diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
index b9b1914aae6..a96067cca96 100644
--- a/include/asm-sparc64/pgalloc.h
+++ b/include/asm-sparc64/pgalloc.h
@@ -10,6 +10,7 @@
 #include <asm/spitfire.h>
 #include <asm/cpudata.h>
 #include <asm/cacheflush.h>
+#include <asm/page.h>
 
 /* Page table allocation/freeing. */
 #ifdef CONFIG_SMP
diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
index a2b4f5ed462..a297f6144f0 100644
--- a/include/asm-sparc64/pgtable.h
+++ b/include/asm-sparc64/pgtable.h
@@ -24,21 +24,23 @@
 #include <asm/processor.h>
 #include <asm/const.h>
 
-/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 16MB).
- * The page copy blockops use 0x1000000 to 0x18000000 (16MB --> 24MB).
+/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB).
+ * The page copy blockops can use 0x2000000 to 0x10000000.
  * The PROM resides in an area spanning 0xf0000000 to 0x100000000.
- * The vmalloc area spans 0x140000000 to 0x200000000.
+ * The vmalloc area spans 0x100000000 to 0x200000000.
+ * Since modules need to be in the lowest 32-bits of the address space,
+ * we place them right before the OBP area from 0x10000000 to 0xf0000000.
  * There is a single static kernel PMD which maps from 0x0 to address
  * 0x400000000.
  */
-#define	TLBTEMP_BASE		_AC(0x0000000001000000,UL)
-#define MODULES_VADDR		_AC(0x0000000002000000,UL)
-#define MODULES_LEN		_AC(0x000000007e000000,UL)
-#define MODULES_END		_AC(0x0000000080000000,UL)
-#define VMALLOC_START		_AC(0x0000000140000000,UL)
-#define VMALLOC_END		_AC(0x0000000200000000,UL)
+#define	TLBTEMP_BASE		_AC(0x0000000002000000,UL)
+#define MODULES_VADDR		_AC(0x0000000010000000,UL)
+#define MODULES_LEN		_AC(0x00000000e0000000,UL)
+#define MODULES_END		_AC(0x00000000f0000000,UL)
 #define LOW_OBP_ADDRESS		_AC(0x00000000f0000000,UL)
 #define HI_OBP_ADDRESS		_AC(0x0000000100000000,UL)
+#define VMALLOC_START		_AC(0x0000000100000000,UL)
+#define VMALLOC_END		_AC(0x0000000200000000,UL)
 
 /* XXX All of this needs to be rethought so we can take advantage
  * XXX cheetah's full 64-bit virtual address space, ie. no more hole
diff --git a/include/asm-um/futex.h b/include/asm-um/futex.h
index 2cac5ecd9d0..142ee2d8e0f 100644
--- a/include/asm-um/futex.h
+++ b/include/asm-um/futex.h
@@ -1,53 +1,12 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#ifdef __KERNEL__
+#ifndef __UM_FUTEX_H
+#define __UM_FUTEX_H
 
 #include <linux/futex.h>
 #include <asm/errno.h>
+#include <asm/system.h>
+#include <asm/processor.h>
 #include <asm/uaccess.h>
 
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-	int op = (encoded_op >> 28) & 7;
-	int cmp = (encoded_op >> 24) & 15;
-	int oparg = (encoded_op << 8) >> 20;
-	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
-	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-		oparg = 1 << oparg;
-
-	if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-		return -EFAULT;
-
-	inc_preempt_count();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-	case FUTEX_OP_ADD:
-	case FUTEX_OP_OR:
-	case FUTEX_OP_ANDN:
-	case FUTEX_OP_XOR:
-	default:
-		ret = -ENOSYS;
-	}
+#include "asm/arch/futex.h"
 
-	dec_preempt_count();
-
-	if (!ret) {
-		switch (cmp) {
-		case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-		case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-		case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-		case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-		case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-		case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-		default: ret = -ENOSYS;
-		}
-	}
-	return ret;
-}
-
-#endif
 #endif
diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index ed06170e0ed..616d02b57ea 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -346,7 +346,6 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	pte_set_val(pte, (pte_val(pte) & _PAGE_CHG_MASK), newprot);
-	if(pte_present(pte)) pte = pte_mknewpage(pte_mknewprot(pte));
 	return pte; 
 }
 
diff --git a/include/asm-v850/futex.h b/include/asm-v850/futex.h
index 2cac5ecd9d0..9feff4ce142 100644
--- a/include/asm-v850/futex.h
+++ b/include/asm-v850/futex.h
@@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 	int cmp = (encoded_op >> 24) & 15;
 	int oparg = (encoded_op << 8) >> 20;
 	int cmparg = (encoded_op << 20) >> 20;
-	int oldval = 0, ret, tem;
+	int oldval = 0, ret;
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index 24f86f0e43c..12b5732dc6e 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -22,7 +22,7 @@ typedef struct { volatile int counter; } atomic_t;
 #include <asm/processor.h>
 #include <asm/system.h>
 
-#define ATOMIC_INIT(i)	( (atomic_t) { (i) } )
+#define ATOMIC_INIT(i)	{ (i) }
 
 /*
  * This Xtensa implementation assumes that the right mechanism
diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index d395ef226c3..e76ee889e21 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -174,7 +174,7 @@ static __inline__ int test_bit(int nr, const volatile void *addr)
 	return 1UL & (((const volatile unsigned int *)addr)[nr>>5] >> (nr&31));
 }
 
-#if XCHAL_HAVE_NSAU
+#if XCHAL_HAVE_NSA
 
 static __inline__ int __cntlz (unsigned long x)
 {
diff --git a/include/asm-xtensa/hardirq.h b/include/asm-xtensa/hardirq.h
index e07c76c36b9..aa9c1adf68d 100644
--- a/include/asm-xtensa/hardirq.h
+++ b/include/asm-xtensa/hardirq.h
@@ -23,6 +23,7 @@ typedef struct {
 	unsigned int __nmi_count;	       /* arch dependent */
 } ____cacheline_aligned irq_cpustat_t;
 
+void ack_bad_irq(unsigned int irq);
 #include <linux/irq_cpustat.h>	/* Standard mappings for irq_cpustat_t above */
 
 #endif	/* _XTENSA_HARDIRQ_H */
diff --git a/include/asm-xtensa/semaphore.h b/include/asm-xtensa/semaphore.h
index db740b8bc6f..09e89ab3eb6 100644
--- a/include/asm-xtensa/semaphore.h
+++ b/include/asm-xtensa/semaphore.h
@@ -20,28 +20,19 @@ struct semaphore {
 	atomic_t count;
 	int sleepers;
 	wait_queue_head_t wait;
-#if WAITQUEUE_DEBUG
-	long __magic;
-#endif
 };
 
-#if WAITQUEUE_DEBUG
-# define __SEM_DEBUG_INIT(name) \
-		, (int)&(name).__magic
-#else
-# define __SEM_DEBUG_INIT(name)
-#endif
-
-#define __SEMAPHORE_INITIALIZER(name,count)			\
-	{ ATOMIC_INIT(count), 					\
-	  0,							\
-	  __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)		\
-	__SEM_DEBUG_INIT(name) }
+#define __SEMAPHORE_INITIALIZER(name,n)					\
+{									\
+	.count		= ATOMIC_INIT(n),				\
+	.sleepers	= 0,						\
+	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER((name).wait)	\
+}
 
-#define __MUTEX_INITIALIZER(name) \
+#define __MUTEX_INITIALIZER(name) 					\
 	__SEMAPHORE_INITIALIZER(name, 1)
 
-#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
+#define __DECLARE_SEMAPHORE_GENERIC(name,count) 			\
 	struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
 
 #define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
@@ -49,17 +40,8 @@ struct semaphore {
 
 static inline void sema_init (struct semaphore *sem, int val)
 {
-/*
- *	*sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
- *
- * i'd rather use the more flexible initialization above, but sadly
- * GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well.
- */
 	atomic_set(&sem->count, val);
 	init_waitqueue_head(&sem->wait);
-#if WAITQUEUE_DEBUG
-	sem->__magic = (int)&sem->__magic;
-#endif
 }
 
 static inline void init_MUTEX (struct semaphore *sem)
@@ -81,9 +63,7 @@ extern spinlock_t semaphore_wake_lock;
 
 static inline void down(struct semaphore * sem)
 {
-#if WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
+	might_sleep();
 
 	if (atomic_sub_return(1, &sem->count) < 0)
 		__down(sem);
@@ -92,9 +72,8 @@ static inline void down(struct semaphore * sem)
 static inline int down_interruptible(struct semaphore * sem)
 {
 	int ret = 0;
-#if WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
+
+	might_sleep();
 
 	if (atomic_sub_return(1, &sem->count) < 0)
 		ret = __down_interruptible(sem);
@@ -104,9 +83,6 @@ static inline int down_interruptible(struct semaphore * sem)
 static inline int down_trylock(struct semaphore * sem)
 {
 	int ret = 0;
-#if WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
 
 	if (atomic_sub_return(1, &sem->count) < 0)
 		ret = __down_trylock(sem);
@@ -119,9 +95,6 @@ static inline int down_trylock(struct semaphore * sem)
  */
 static inline void up(struct semaphore * sem)
 {
-#if WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
 	if (atomic_add_return(1, &sem->count) <= 0)
 		__up(sem);
 }
diff --git a/include/asm-xtensa/system.h b/include/asm-xtensa/system.h
index f09393232e5..9284867f1cb 100644
--- a/include/asm-xtensa/system.h
+++ b/include/asm-xtensa/system.h
@@ -189,20 +189,6 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 
 #define tas(ptr) (xchg((ptr),1))
 
-#if ( __XCC__ == 1 )
-
-/* xt-xcc processes __inline__ differently than xt-gcc and decides to
- * insert an out-of-line copy of function __xchg.  This presents the
- * unresolved symbol at link time of __xchg_called_with_bad_pointer,
- * even though such a function would never be called at run-time.
- * xt-gcc always inlines __xchg, and optimizes away the undefined
- * bad_pointer function.
- */
-
-#define xchg(ptr,x) xchg_u32(ptr,x)
-
-#else  /* assume xt-gcc */
-
 #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
 /*
@@ -224,8 +210,6 @@ __xchg(unsigned long x, volatile void * ptr, int size)
 	return x;
 }
 
-#endif
-
 extern void set_except_vector(int n, void *addr);
 
 static inline void spill_registers(void)
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 5fde6f4d6c1..04bd756efc6 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -5,6 +5,10 @@
  * linux/byteorder_generic.h
  * Generic Byte-reordering support
  *
+ * The "... p" macros, like le64_to_cpup, can be used with pointers
+ * to unaligned data, but there will be a performance penalty on 
+ * some architectures.  Use get_unaligned for unaligned data.
+ *
  * Francois-Rene Rideau <fare@tunes.org> 19970707
  *    gathered all the good ideas from all asm-foo/byteorder.h into one file,
  *    cleaned them up.
diff --git a/include/linux/device.h b/include/linux/device.h
index 06e5d42f2c7..95d607a48f0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -317,6 +317,11 @@ dev_set_drvdata (struct device *dev, void *data)
 	dev->driver_data = data;
 }
 
+static inline int device_is_registered(struct device *dev)
+{
+	return klist_node_attached(&dev->knode_bus);
+}
+
 /*
  * High level routines for use by the bus drivers
  */
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 17d0c0d40b0..eef0876d830 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -42,8 +42,8 @@ struct hlist_node;
 struct vlan_ethhdr {
    unsigned char	h_dest[ETH_ALEN];	   /* destination eth addr	*/
    unsigned char	h_source[ETH_ALEN];	   /* source ether addr	*/
-   unsigned short       h_vlan_proto;              /* Should always be 0x8100 */
-   unsigned short       h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
+   __be16               h_vlan_proto;              /* Should always be 0x8100 */
+   __be16               h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
    unsigned short	h_vlan_encapsulated_proto; /* packet type ID field (or len) */
 };
 
@@ -55,8 +55,8 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 }
 
 struct vlan_hdr {
-   unsigned short       h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
-   unsigned short       h_vlan_encapsulated_proto; /* packet type ID field (or len) */
+   __be16               h_vlan_TCI;                /* Encapsulates priority and VLAN ID */
+   __be16               h_vlan_encapsulated_proto; /* packet type ID field (or len) */
 };
 
 #define VLAN_VID_MASK	0xfff
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 022105c745f..ceee1fc42c6 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -393,6 +393,7 @@ extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_i
 extern void ata_pci_remove_one (struct pci_dev *pdev);
 #endif /* CONFIG_PCI */
 extern int ata_device_add(struct ata_probe_ent *ent);
+extern void ata_host_set_remove(struct ata_host_set *host_set);
 extern int ata_scsi_detect(Scsi_Host_Template *sht);
 extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
 extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 82d7024f076..097b3a3c693 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -136,6 +136,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_EXEC		0x00000004
 #define VM_SHARED	0x00000008
 
+/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */
 #define VM_MAYREAD	0x00000010	/* limits for mprotect() etc */
 #define VM_MAYWRITE	0x00000020
 #define VM_MAYEXEC	0x00000040
@@ -350,7 +351,8 @@ static inline void put_page(struct page *page)
  * only one copy in memory, at most, normally.
  *
  * For the non-reserved pages, page_count(page) denotes a reference count.
- *   page_count() == 0 means the page is free.
+ *   page_count() == 0 means the page is free. page->lru is then used for
+ *   freelist management in the buddy allocator.
  *   page_count() == 1 means the page is used for exactly one purpose
  *   (e.g. a private data page of one process).
  *
@@ -376,10 +378,8 @@ static inline void put_page(struct page *page)
  * attaches, plus 1 if `private' contains something, plus one for
  * the page cache itself.
  *
- * All pages belonging to an inode are in these doubly linked lists:
- * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
- * using the page->list list_head. These fields are also used for
- * freelist managemet (when page_count()==0).
+ * Instead of keeping dirty/clean pages in per address-space lists, we instead
+ * now tag pages as dirty/under writeback in the radix tree.
  *
  * There is also a per-mapping radix tree mapping index to the page
  * in memory if present. The tree is rooted at mapping->root.  
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 7e033e9271a..4ced3873681 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -133,11 +133,13 @@ enum ip_conntrack_expect_events {
 
 #include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_icmp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
 #include <linux/netfilter_ipv4/ip_conntrack_sctp.h>
 
 /* per conntrack: protocol private data */
 union ip_conntrack_proto {
 	/* insert conntrack proto private data here */
+	struct ip_ct_gre gre;
 	struct ip_ct_sctp sctp;
 	struct ip_ct_tcp tcp;
 	struct ip_ct_icmp icmp;
@@ -148,6 +150,7 @@ union ip_conntrack_expect_proto {
 };
 
 /* Add protocol helper include file here */
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
 #include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
 #include <linux/netfilter_ipv4/ip_conntrack_irc.h>
@@ -155,12 +158,20 @@ union ip_conntrack_expect_proto {
 /* per conntrack: application helper private data */
 union ip_conntrack_help {
 	/* insert conntrack helper private data (master) here */
+	struct ip_ct_pptp_master ct_pptp_info;
 	struct ip_ct_ftp_master ct_ftp_info;
 	struct ip_ct_irc_master ct_irc_info;
 };
 
 #ifdef CONFIG_IP_NF_NAT_NEEDED
 #include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_pptp.h>
+
+/* per conntrack: nat application helper private data */
+union ip_conntrack_nat_help {
+	/* insert nat helper private data here */
+	struct ip_nat_pptp nat_pptp_info;
+};
 #endif
 
 #include <linux/types.h>
@@ -223,6 +234,7 @@ struct ip_conntrack
 #ifdef CONFIG_IP_NF_NAT_NEEDED
 	struct {
 		struct ip_nat_info info;
+		union ip_conntrack_nat_help help;
 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
 	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
 		int masq_index;
@@ -320,11 +332,28 @@ extern void need_ip_conntrack(void);
 extern int invert_tuplepr(struct ip_conntrack_tuple *inverse,
 			  const struct ip_conntrack_tuple *orig);
 
+extern void __ip_ct_refresh_acct(struct ip_conntrack *ct,
+			         enum ip_conntrack_info ctinfo,
+			         const struct sk_buff *skb,
+			         unsigned long extra_jiffies,
+				 int do_acct);
+
+/* Refresh conntrack for this many jiffies and do accounting */
+static inline void ip_ct_refresh_acct(struct ip_conntrack *ct, 
+				      enum ip_conntrack_info ctinfo,
+				      const struct sk_buff *skb,
+				      unsigned long extra_jiffies)
+{
+	__ip_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1);
+}
+
 /* Refresh conntrack for this many jiffies */
-extern void ip_ct_refresh_acct(struct ip_conntrack *ct,
-			       enum ip_conntrack_info ctinfo,
-			       const struct sk_buff *skb,
-			       unsigned long extra_jiffies);
+static inline void ip_ct_refresh(struct ip_conntrack *ct,
+				 const struct sk_buff *skb,
+				 unsigned long extra_jiffies)
+{
+	__ip_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
+}
 
 /* These are for NAT.  Icky. */
 /* Update TCP window tracking data when NAT mangles the packet */
@@ -372,7 +401,7 @@ extern struct ip_conntrack_expect *
 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
 extern struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
+ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
 
 extern struct ip_conntrack_tuple_hash *
 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
new file mode 100644
index 00000000000..816144c75de
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h
@@ -0,0 +1,325 @@
+/* PPTP constants and structs */
+#ifndef _CONNTRACK_PPTP_H
+#define _CONNTRACK_PPTP_H
+
+/* state of the control session */
+enum pptp_ctrlsess_state {
+	PPTP_SESSION_NONE,			/* no session present */
+	PPTP_SESSION_ERROR,			/* some session error */
+	PPTP_SESSION_STOPREQ,			/* stop_sess request seen */
+	PPTP_SESSION_REQUESTED,			/* start_sess request seen */
+	PPTP_SESSION_CONFIRMED,			/* session established */
+};
+
+/* state of the call inside the control session */
+enum pptp_ctrlcall_state {
+	PPTP_CALL_NONE,
+	PPTP_CALL_ERROR,
+	PPTP_CALL_OUT_REQ,
+	PPTP_CALL_OUT_CONF,
+	PPTP_CALL_IN_REQ,
+	PPTP_CALL_IN_REP,
+	PPTP_CALL_IN_CONF,
+	PPTP_CALL_CLEAR_REQ,
+};
+
+
+/* conntrack private data */
+struct ip_ct_pptp_master {
+	enum pptp_ctrlsess_state sstate;	/* session state */
+
+	/* everything below is going to be per-expectation in newnat,
+	 * since there could be more than one call within one session */
+	enum pptp_ctrlcall_state cstate;	/* call state */
+	u_int16_t pac_call_id;			/* call id of PAC, host byte order */
+	u_int16_t pns_call_id;			/* call id of PNS, host byte order */
+
+	/* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack
+	 * and therefore imposes a fixed limit on the number of maps */
+	struct ip_ct_gre_keymap *keymap_orig, *keymap_reply;
+};
+
+/* conntrack_expect private member */
+struct ip_ct_pptp_expect {
+	enum pptp_ctrlcall_state cstate; 	/* call state */
+	u_int16_t pac_call_id;			/* call id of PAC */
+	u_int16_t pns_call_id;			/* call id of PNS */
+};
+
+
+#ifdef __KERNEL__
+
+#define IP_CONNTR_PPTP		PPTP_CONTROL_PORT
+
+#define PPTP_CONTROL_PORT	1723
+
+#define PPTP_PACKET_CONTROL	1
+#define PPTP_PACKET_MGMT	2
+
+#define PPTP_MAGIC_COOKIE	0x1a2b3c4d
+
+struct pptp_pkt_hdr {
+	__u16	packetLength;
+	__be16	packetType;
+	__be32	magicCookie;
+};
+
+/* PptpControlMessageType values */
+#define PPTP_START_SESSION_REQUEST	1
+#define PPTP_START_SESSION_REPLY	2
+#define PPTP_STOP_SESSION_REQUEST	3
+#define PPTP_STOP_SESSION_REPLY		4
+#define PPTP_ECHO_REQUEST		5
+#define PPTP_ECHO_REPLY			6
+#define PPTP_OUT_CALL_REQUEST		7
+#define PPTP_OUT_CALL_REPLY		8
+#define PPTP_IN_CALL_REQUEST		9
+#define PPTP_IN_CALL_REPLY		10
+#define PPTP_IN_CALL_CONNECT		11
+#define PPTP_CALL_CLEAR_REQUEST		12
+#define PPTP_CALL_DISCONNECT_NOTIFY	13
+#define PPTP_WAN_ERROR_NOTIFY		14
+#define PPTP_SET_LINK_INFO		15
+
+#define PPTP_MSG_MAX			15
+
+/* PptpGeneralError values */
+#define PPTP_ERROR_CODE_NONE		0
+#define PPTP_NOT_CONNECTED		1
+#define PPTP_BAD_FORMAT			2
+#define PPTP_BAD_VALUE			3
+#define PPTP_NO_RESOURCE		4
+#define PPTP_BAD_CALLID			5
+#define PPTP_REMOVE_DEVICE_ERROR	6
+
+struct PptpControlHeader {
+	__be16	messageType;
+	__u16	reserved;
+};
+
+/* FramingCapability Bitmap Values */
+#define PPTP_FRAME_CAP_ASYNC		0x1
+#define PPTP_FRAME_CAP_SYNC		0x2
+
+/* BearerCapability Bitmap Values */
+#define PPTP_BEARER_CAP_ANALOG		0x1
+#define PPTP_BEARER_CAP_DIGITAL		0x2
+
+struct PptpStartSessionRequest {
+	__be16	protocolVersion;
+	__u8	reserved1;
+	__u8	reserved2;
+	__be32	framingCapability;
+	__be32	bearerCapability;
+	__be16	maxChannels;
+	__be16	firmwareRevision;
+	__u8	hostName[64];
+	__u8	vendorString[64];
+};
+
+/* PptpStartSessionResultCode Values */
+#define PPTP_START_OK			1
+#define PPTP_START_GENERAL_ERROR	2
+#define PPTP_START_ALREADY_CONNECTED	3
+#define PPTP_START_NOT_AUTHORIZED	4
+#define PPTP_START_UNKNOWN_PROTOCOL	5
+
+struct PptpStartSessionReply {
+	__be16	protocolVersion;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__be32	framingCapability;
+	__be32	bearerCapability;
+	__be16	maxChannels;
+	__be16	firmwareRevision;
+	__u8	hostName[64];
+	__u8	vendorString[64];
+};
+
+/* PptpStopReasons */
+#define PPTP_STOP_NONE			1
+#define PPTP_STOP_PROTOCOL		2
+#define PPTP_STOP_LOCAL_SHUTDOWN	3
+
+struct PptpStopSessionRequest {
+	__u8	reason;
+};
+
+/* PptpStopSessionResultCode */
+#define PPTP_STOP_OK			1
+#define PPTP_STOP_GENERAL_ERROR		2
+
+struct PptpStopSessionReply {
+	__u8	resultCode;
+	__u8	generalErrorCode;
+};
+
+struct PptpEchoRequest {
+	__be32 identNumber;
+};
+
+/* PptpEchoReplyResultCode */
+#define PPTP_ECHO_OK			1
+#define PPTP_ECHO_GENERAL_ERROR		2
+
+struct PptpEchoReply {
+	__be32	identNumber;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__u16	reserved;
+};
+
+/* PptpFramingType */
+#define PPTP_ASYNC_FRAMING		1
+#define PPTP_SYNC_FRAMING		2
+#define PPTP_DONT_CARE_FRAMING		3
+
+/* PptpCallBearerType */
+#define PPTP_ANALOG_TYPE		1
+#define PPTP_DIGITAL_TYPE		2
+#define PPTP_DONT_CARE_BEARER_TYPE	3
+
+struct PptpOutCallRequest {
+	__be16	callID;
+	__be16	callSerialNumber;
+	__be32	minBPS;
+	__be32	maxBPS;
+	__be32	bearerType;
+	__be32	framingType;
+	__be16	packetWindow;
+	__be16	packetProcDelay;
+	__u16	reserved1;
+	__be16	phoneNumberLength;
+	__u16	reserved2;
+	__u8	phoneNumber[64];
+	__u8	subAddress[64];
+};
+
+/* PptpCallResultCode */
+#define PPTP_OUTCALL_CONNECT		1
+#define PPTP_OUTCALL_GENERAL_ERROR	2
+#define PPTP_OUTCALL_NO_CARRIER		3
+#define PPTP_OUTCALL_BUSY		4
+#define PPTP_OUTCALL_NO_DIAL_TONE	5
+#define PPTP_OUTCALL_TIMEOUT		6
+#define PPTP_OUTCALL_DONT_ACCEPT	7
+
+struct PptpOutCallReply {
+	__be16	callID;
+	__be16	peersCallID;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__be16	causeCode;
+	__be32	connectSpeed;
+	__be16	packetWindow;
+	__be16	packetProcDelay;
+	__be32	physChannelID;
+};
+
+struct PptpInCallRequest {
+	__be16	callID;
+	__be16	callSerialNumber;
+	__be32	callBearerType;
+	__be32	physChannelID;
+	__be16	dialedNumberLength;
+	__be16	dialingNumberLength;
+	__u8	dialedNumber[64];
+	__u8	dialingNumber[64];
+	__u8	subAddress[64];
+};
+
+/* PptpInCallResultCode */
+#define PPTP_INCALL_ACCEPT		1
+#define PPTP_INCALL_GENERAL_ERROR	2
+#define PPTP_INCALL_DONT_ACCEPT		3
+
+struct PptpInCallReply {
+	__be16	callID;
+	__be16	peersCallID;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__be16	packetWindow;
+	__be16	packetProcDelay;
+	__u16	reserved;
+};
+
+struct PptpInCallConnected {
+	__be16	peersCallID;
+	__u16	reserved;
+	__be32	connectSpeed;
+	__be16	packetWindow;
+	__be16	packetProcDelay;
+	__be32	callFramingType;
+};
+
+struct PptpClearCallRequest {
+	__be16	callID;
+	__u16	reserved;
+};
+
+struct PptpCallDisconnectNotify {
+	__be16	callID;
+	__u8	resultCode;
+	__u8	generalErrorCode;
+	__be16	causeCode;
+	__u16	reserved;
+	__u8	callStatistics[128];
+};
+
+struct PptpWanErrorNotify {
+	__be16	peersCallID;
+	__u16	reserved;
+	__be32	crcErrors;
+	__be32	framingErrors;
+	__be32	hardwareOverRuns;
+	__be32	bufferOverRuns;
+	__be32	timeoutErrors;
+	__be32	alignmentErrors;
+};
+
+struct PptpSetLinkInfo {
+	__be16	peersCallID;
+	__u16	reserved;
+	__be32	sendAccm;
+	__be32	recvAccm;
+};
+
+union pptp_ctrl_union {
+		struct PptpStartSessionRequest	sreq;
+		struct PptpStartSessionReply	srep;
+		struct PptpStopSessionRequest	streq;
+		struct PptpStopSessionReply	strep;
+                struct PptpOutCallRequest       ocreq;
+                struct PptpOutCallReply         ocack;
+                struct PptpInCallRequest        icreq;
+                struct PptpInCallReply          icack;
+                struct PptpInCallConnected      iccon;
+		struct PptpClearCallRequest	clrreq;
+                struct PptpCallDisconnectNotify disc;
+                struct PptpWanErrorNotify       wanerr;
+                struct PptpSetLinkInfo          setlink;
+};
+
+extern int
+(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+extern int
+(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+extern int
+(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig,
+			    struct ip_conntrack_expect *exp_reply);
+
+extern void
+(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
+			     struct ip_conntrack_expect *exp);
+#endif /* __KERNEL__ */
+#endif /* _CONNTRACK_PPTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
new file mode 100644
index 00000000000..8d090ef82f5
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h
@@ -0,0 +1,114 @@
+#ifndef _CONNTRACK_PROTO_GRE_H
+#define _CONNTRACK_PROTO_GRE_H
+#include <asm/byteorder.h>
+
+/* GRE PROTOCOL HEADER */
+
+/* GRE Version field */
+#define GRE_VERSION_1701	0x0
+#define GRE_VERSION_PPTP	0x1
+
+/* GRE Protocol field */
+#define GRE_PROTOCOL_PPTP	0x880B
+
+/* GRE Flags */
+#define GRE_FLAG_C		0x80
+#define GRE_FLAG_R		0x40
+#define GRE_FLAG_K		0x20
+#define GRE_FLAG_S		0x10
+#define GRE_FLAG_A		0x80
+
+#define GRE_IS_C(f)	((f)&GRE_FLAG_C)
+#define GRE_IS_R(f)	((f)&GRE_FLAG_R)
+#define GRE_IS_K(f)	((f)&GRE_FLAG_K)
+#define GRE_IS_S(f)	((f)&GRE_FLAG_S)
+#define GRE_IS_A(f)	((f)&GRE_FLAG_A)
+
+/* GRE is a mess: Four different standards */
+struct gre_hdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u16	rec:3,
+		srr:1,
+		seq:1,
+		key:1,
+		routing:1,
+		csum:1,
+		version:3,
+		reserved:4,
+		ack:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u16	csum:1,
+		routing:1,
+		key:1,
+		seq:1,
+		srr:1,
+		rec:3,
+		ack:1,
+		reserved:4,
+		version:3;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+	__u16	protocol;
+};
+
+/* modified GRE header for PPTP */
+struct gre_hdr_pptp {
+	__u8  flags;		/* bitfield */
+	__u8  version;		/* should be GRE_VERSION_PPTP */
+	__u16 protocol;		/* should be GRE_PROTOCOL_PPTP */
+	__u16 payload_len;	/* size of ppp payload, not inc. gre header */
+	__u16 call_id;		/* peer's call_id for this session */
+	__u32 seq;		/* sequence number.  Present if S==1 */
+	__u32 ack;		/* seq number of highest packet recieved by */
+				/*  sender in this session */
+};
+
+
+/* this is part of ip_conntrack */
+struct ip_ct_gre {
+	unsigned int stream_timeout;
+	unsigned int timeout;
+};
+
+#ifdef __KERNEL__
+struct ip_conntrack_expect;
+struct ip_conntrack;
+
+/* structure for original <-> reply keymap */
+struct ip_ct_gre_keymap {
+	struct list_head list;
+
+	struct ip_conntrack_tuple tuple;
+};
+
+/* add new tuple->key_reply pair to keymap */
+int ip_ct_gre_keymap_add(struct ip_conntrack *ct,
+			 struct ip_conntrack_tuple *t,
+			 int reply);
+
+/* delete keymap entries */
+void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct);
+
+
+/* get pointer to gre key, if present */
+static inline u_int32_t *gre_key(struct gre_hdr *greh)
+{
+	if (!greh->key)
+		return NULL;
+	if (greh->csum || greh->routing)
+		return (u_int32_t *) (greh+sizeof(*greh)+4);
+	return (u_int32_t *) (greh+sizeof(*greh));
+}
+
+/* get pointer ot gre csum, if present */
+static inline u_int16_t *gre_csum(struct gre_hdr *greh)
+{
+	if (!greh->csum)
+		return NULL;
+	return (u_int16_t *) (greh+sizeof(*greh));
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
index c33f0b5e0d0..20e43f018b7 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
@@ -17,7 +17,7 @@ union ip_conntrack_manip_proto
 	u_int16_t all;
 
 	struct {
-		u_int16_t port;
+		__be16 port;
 	} tcp;
 	struct {
 		u_int16_t port;
@@ -28,6 +28,9 @@ union ip_conntrack_manip_proto
 	struct {
 		u_int16_t port;
 	} sctp;
+	struct {
+		__be16 key;	/* key is 32bit, pptp only uses 16 */
+	} gre;
 };
 
 /* The manipulable part of the tuple. */
@@ -61,6 +64,10 @@ struct ip_conntrack_tuple
 			struct {
 				u_int16_t port;
 			} sctp;
+			struct {
+				__be16 key;	/* key is 32bit, 
+						 * pptp only uses 16 */
+			} gre;
 		} u;
 
 		/* The protocol. */
diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h
new file mode 100644
index 00000000000..eaf66c2e8f9
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h
@@ -0,0 +1,11 @@
+/* PPTP constants and structs */
+#ifndef _NAT_PPTP_H
+#define _NAT_PPTP_H
+
+/* conntrack private data */
+struct ip_nat_pptp {
+	u_int16_t pns_call_id;		/* NAT'ed PNS call id */
+	u_int16_t pac_call_id;		/* NAT'ed PAC call id */
+};
+
+#endif /* _NAT_PPTP_H */
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 58c72a52dc6..59f70b34e02 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -455,6 +455,9 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb,
 
 /* Check for an extension */
 extern int ip6t_ext_hdr(u8 nexthdr);
+/* find specified header and get offset to it */
+extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+			 u8 target);
 
 #define IP6T_ALIGN(s) (((s) + (__alignof__(struct ip6t_entry)-1)) & ~(__alignof__(struct ip6t_entry)-1))
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c49d28eca56..b86a4b77007 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1268,7 +1268,8 @@
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA	0x0266
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2	0x0267
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE	0x036E
-#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA	0x036F
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA	0x037E
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2	0x037F
 #define PCI_DEVICE_ID_NVIDIA_NVENET_12		0x0268
 #define PCI_DEVICE_ID_NVIDIA_NVENET_13		0x0269
 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO	0x026B
diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 3b3266ff1a9..7ab2cdb83ef 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -59,6 +59,10 @@ extern void machine_crash_shutdown(struct pt_regs *);
  * Architecture independent implemenations of sys_reboot commands.
  */
 
+extern void kernel_restart_prepare(char *cmd);
+extern void kernel_halt_prepare(void);
+extern void kernel_power_off_prepare(void);
+
 extern void kernel_restart(char *cmd);
 extern void kernel_halt(void);
 extern void kernel_power_off(void);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 425f58c8ea4..a6f03e47373 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -508,5 +508,7 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3,
 
 asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
 asmlinkage long sys_ioprio_get(int which, int who);
+asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
+					unsigned long maxnode);
 
 #endif
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 53184a38fdf..0e293fe733b 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -108,6 +108,13 @@
 #define IB_QP1_QKEY	0x80010000
 #define IB_QP_SET_QKEY	0x80000000
 
+enum {
+	IB_MGMT_MAD_DATA = 232,
+	IB_MGMT_RMPP_DATA = 220,
+	IB_MGMT_VENDOR_DATA = 216,
+	IB_MGMT_SA_DATA = 200
+};
+
 struct ib_mad_hdr {
 	u8	base_version;
 	u8	mgmt_class;
@@ -149,20 +156,20 @@ struct ib_sa_hdr {
 
 struct ib_mad {
 	struct ib_mad_hdr	mad_hdr;
-	u8			data[232];
+	u8			data[IB_MGMT_MAD_DATA];
 };
 
 struct ib_rmpp_mad {
 	struct ib_mad_hdr	mad_hdr;
 	struct ib_rmpp_hdr	rmpp_hdr;
-	u8			data[220];
+	u8			data[IB_MGMT_RMPP_DATA];
 };
 
 struct ib_sa_mad {
 	struct ib_mad_hdr	mad_hdr;
 	struct ib_rmpp_hdr	rmpp_hdr;
 	struct ib_sa_hdr	sa_hdr;
-	u8			data[200];
+	u8			data[IB_MGMT_SA_DATA];
 } __attribute__ ((packed));
 
 struct ib_vendor_mad {
@@ -170,7 +177,7 @@ struct ib_vendor_mad {
 	struct ib_rmpp_hdr	rmpp_hdr;
 	u8			reserved;
 	u8			oui[3];
-	u8			data[216];
+	u8			data[IB_MGMT_VENDOR_DATA];
 };
 
 struct ib_class_port_info
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 916144be208..69313ba7505 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -439,6 +439,8 @@ enum scsi_host_state {
 	SHOST_CANCEL,
 	SHOST_DEL,
 	SHOST_RECOVERY,
+	SHOST_CANCEL_RECOVERY,
+	SHOST_DEL_RECOVERY,
 };
 
 struct Scsi_Host {
@@ -465,8 +467,6 @@ struct Scsi_Host {
 
 	struct list_head	eh_cmd_q;
 	struct task_struct    * ehandler;  /* Error recovery thread. */
-	struct semaphore      * eh_wait;   /* The error recovery thread waits
-					      on this. */
 	struct semaphore      * eh_action; /* Wait for specific actions on the
                                           host. */
 	unsigned int            eh_active:1; /* Indicates the eh thread is awake and active if
@@ -621,6 +621,13 @@ static inline struct Scsi_Host *dev_to_shost(struct device *dev)
 	return container_of(dev, struct Scsi_Host, shost_gendev);
 }
 
+static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
+{
+	return shost->shost_state == SHOST_RECOVERY ||
+		shost->shost_state == SHOST_CANCEL_RECOVERY ||
+		shost->shost_state == SHOST_DEL_RECOVERY;
+}
+
 extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
 extern void scsi_flush_work(struct Scsi_Host *);
 
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 115db056dc6..b0d44543737 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -103,8 +103,8 @@ enum fc_port_state {
 					     incapable of reporting */
 #define FC_PORTSPEED_1GBIT		1
 #define FC_PORTSPEED_2GBIT		2
-#define FC_PORTSPEED_10GBIT		4
-#define FC_PORTSPEED_4GBIT		8
+#define FC_PORTSPEED_4GBIT		4
+#define FC_PORTSPEED_10GBIT		8
 #define FC_PORTSPEED_NOT_NEGOTIATED	(1 << 15) /* Speed not established */
 
 /*
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 396c7873e80..46a5e5acff9 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -29,7 +29,7 @@ config PM_DEBUG
 
 config SOFTWARE_SUSPEND
 	bool "Software Suspend"
-	depends on PM && SWAP && (X86 || ((FVR || PPC32) && !SMP))
+	depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FVR || PPC32) && !SMP)
 	---help---
 	  Enable the possibility of suspending the machine.
 	  It doesn't need APM.
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 2d8bf054d03..761956e813f 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -17,12 +17,12 @@
 #include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
+#include <linux/pm.h>
 
 #include "power.h"
 
 
 extern suspend_disk_method_t pm_disk_mode;
-extern struct pm_ops * pm_ops;
 
 extern int swsusp_suspend(void);
 extern int swsusp_write(void);
@@ -49,13 +49,11 @@ dev_t swsusp_resume_device;
 
 static void power_down(suspend_disk_method_t mode)
 {
-	unsigned long flags;
 	int error = 0;
 
-	local_irq_save(flags);
 	switch(mode) {
 	case PM_DISK_PLATFORM:
- 		device_shutdown();
+		kernel_power_off_prepare();
 		error = pm_ops->enter(PM_SUSPEND_DISK);
 		break;
 	case PM_DISK_SHUTDOWN:
diff --git a/kernel/power/power.h b/kernel/power/power.h
index cd6a3493cc0..9c9167d910d 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -1,7 +1,7 @@
 #include <linux/suspend.h>
 #include <linux/utsname.h>
 
-/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but
+/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but
    we probably do not take enough locks for switching consoles, etc,
    so bad things might happen.
 */
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index d967e875ee8..1cc9ff25e47 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -363,7 +363,7 @@ static void lock_swapdevices(void)
 }
 
 /**
- *	write_swap_page - Write one page to a fresh swap location.
+ *	write_page - Write one page to a fresh swap location.
  *	@addr:	Address we're writing.
  *	@loc:	Place to store the entry we used.
  *
@@ -863,6 +863,9 @@ static int alloc_image_pages(void)
 	return 0;
 }
 
+/* Free pages we allocated for suspend. Suspend pages are alocated
+ * before atomic copy, so we need to free them after resume.
+ */
 void swsusp_free(void)
 {
 	BUG_ON(PageNosave(virt_to_page(pagedir_save)));
@@ -918,6 +921,7 @@ static int swsusp_alloc(void)
 
 	pagedir_nosave = NULL;
 	nr_copy_pages = calc_nr(nr_copy_pages);
+	nr_copy_pages_check = nr_copy_pages;
 
 	pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
 		 nr_copy_pages, PAGES_FOR_IO, nr_free_pages());
@@ -940,7 +944,6 @@ static int swsusp_alloc(void)
 		return error;
 	}
 
-	nr_copy_pages_check = nr_copy_pages;
 	return 0;
 }
 
@@ -1213,8 +1216,9 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
 		free_pagedir(pblist);
 		free_eaten_memory();
 		pblist = NULL;
-	}
-	else
+		/* Is this even worth handling? It should never ever happen, and we
+		   have just lost user's state, anyway... */
+	} else
 		printk("swsusp: Relocated %d pages\n", rel);
 
 	return pblist;
diff --git a/kernel/printk.c b/kernel/printk.c
index a967605bc2e..4b8f0f9230a 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -488,6 +488,11 @@ static int __init printk_time_setup(char *str)
 
 __setup("time", printk_time_setup);
 
+__attribute__((weak)) unsigned long long printk_clock(void)
+{
+	return sched_clock();
+}
+
 /*
  * This is printk.  It can be called from any context.  We want it to work.
  * 
@@ -565,7 +570,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 					loglev_char = default_message_loglevel
 						+ '0';
 				}
-				t = sched_clock();
+				t = printk_clock();
 				nanosec_rem = do_div(t, 1000000000);
 				tlen = sprintf(tbuf,
 						"<%c>[%5lu.%06lu] ",
diff --git a/kernel/signal.c b/kernel/signal.c
index b92c3c9f8b9..5a274705ba1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -936,34 +936,31 @@ force_sig_specific(int sig, struct task_struct *t)
  * as soon as they're available, so putting the signal on the shared queue
  * will be equivalent to sending it to one such thread.
  */
-#define wants_signal(sig, p, mask) 			\
-	(!sigismember(&(p)->blocked, sig)		\
-	 && !((p)->state & mask)			\
-	 && !((p)->flags & PF_EXITING)			\
-	 && (task_curr(p) || !signal_pending(p)))
-
+static inline int wants_signal(int sig, struct task_struct *p)
+{
+	if (sigismember(&p->blocked, sig))
+		return 0;
+	if (p->flags & PF_EXITING)
+		return 0;
+	if (sig == SIGKILL)
+		return 1;
+	if (p->state & (TASK_STOPPED | TASK_TRACED))
+		return 0;
+	return task_curr(p) || !signal_pending(p);
+}
 
 static void
 __group_complete_signal(int sig, struct task_struct *p)
 {
-	unsigned int mask;
 	struct task_struct *t;
 
 	/*
-	 * Don't bother traced and stopped tasks (but
-	 * SIGKILL will punch through that).
-	 */
-	mask = TASK_STOPPED | TASK_TRACED;
-	if (sig == SIGKILL)
-		mask = 0;
-
-	/*
 	 * Now find a thread we can wake up to take the signal off the queue.
 	 *
 	 * If the main thread wants the signal, it gets first crack.
 	 * Probably the least surprising to the average bear.
 	 */
-	if (wants_signal(sig, p, mask))
+	if (wants_signal(sig, p))
 		t = p;
 	else if (thread_group_empty(p))
 		/*
@@ -981,7 +978,7 @@ __group_complete_signal(int sig, struct task_struct *p)
 			t = p->signal->curr_target = p;
 		BUG_ON(t->tgid != p->tgid);
 
-		while (!wants_signal(sig, t, mask)) {
+		while (!wants_signal(sig, t)) {
 			t = next_thread(t);
 			if (t == p->signal->curr_target)
 				/*
diff --git a/kernel/sys.c b/kernel/sys.c
index f723522e698..2fa1ed18123 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -361,17 +361,35 @@ out_unlock:
 	return retval;
 }
 
+/**
+ *	emergency_restart - reboot the system
+ *
+ *	Without shutting down any hardware or taking any locks
+ *	reboot the system.  This is called when we know we are in
+ *	trouble so this is our best effort to reboot.  This is
+ *	safe to call in interrupt context.
+ */
 void emergency_restart(void)
 {
 	machine_emergency_restart();
 }
 EXPORT_SYMBOL_GPL(emergency_restart);
 
-void kernel_restart(char *cmd)
+/**
+ *	kernel_restart - reboot the system
+ *
+ *	Shutdown everything and perform a clean reboot.
+ *	This is not safe to call in interrupt context.
+ */
+void kernel_restart_prepare(char *cmd)
 {
 	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
 	system_state = SYSTEM_RESTART;
 	device_shutdown();
+}
+void kernel_restart(char *cmd)
+{
+	kernel_restart_prepare(cmd);
 	if (!cmd) {
 		printk(KERN_EMERG "Restarting system.\n");
 	} else {
@@ -382,6 +400,12 @@ void kernel_restart(char *cmd)
 }
 EXPORT_SYMBOL_GPL(kernel_restart);
 
+/**
+ *	kernel_kexec - reboot the system
+ *
+ *	Move into place and start executing a preloaded standalone
+ *	executable.  If nothing was preloaded return an error.
+ */
 void kernel_kexec(void)
 {
 #ifdef CONFIG_KEXEC
@@ -390,9 +414,7 @@ void kernel_kexec(void)
 	if (!image) {
 		return;
 	}
-	notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
-	system_state = SYSTEM_RESTART;
-	device_shutdown();
+	kernel_restart_prepare(NULL);
 	printk(KERN_EMERG "Starting new kernel\n");
 	machine_shutdown();
 	machine_kexec(image);
@@ -400,21 +422,39 @@ void kernel_kexec(void)
 }
 EXPORT_SYMBOL_GPL(kernel_kexec);
 
-void kernel_halt(void)
+/**
+ *	kernel_halt - halt the system
+ *
+ *	Shutdown everything and perform a clean system halt.
+ */
+void kernel_halt_prepare(void)
 {
 	notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
 	system_state = SYSTEM_HALT;
 	device_shutdown();
+}
+void kernel_halt(void)
+{
+	kernel_halt_prepare();
 	printk(KERN_EMERG "System halted.\n");
 	machine_halt();
 }
 EXPORT_SYMBOL_GPL(kernel_halt);
 
-void kernel_power_off(void)
+/**
+ *	kernel_power_off - power_off the system
+ *
+ *	Shutdown everything and perform a clean system power_off.
+ */
+void kernel_power_off_prepare(void)
 {
 	notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
 	system_state = SYSTEM_POWER_OFF;
 	device_shutdown();
+}
+void kernel_power_off(void)
+{
+	kernel_power_off_prepare();
 	printk(KERN_EMERG "Power down.\n");
 	machine_power_off();
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 8b8e05f07cd..fa11d91242e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1640,7 +1640,7 @@ static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
 /*
  * Get rid of page table information in the indicated region.
  *
- * Called with the page table lock held.
+ * Called with the mm semaphore held.
  */
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e9fbd013ad9..57577f63b30 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -248,7 +248,8 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot)
 
 		newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
 
-		if ((newflags & ~(newflags >> 4)) & 0xf) {
+		/* newflags >> 4 shift VM_MAY% in place of VM_% */
+		if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
 			error = -EACCES;
 			goto out;
 		}
diff --git a/mm/slab.c b/mm/slab.c
index 437d3388054..c9adfce0040 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -308,12 +308,12 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
 #define	SIZE_L3 (1 + MAX_NUMNODES)
 
 /*
- * This function may be completely optimized away if
+ * This function must be completely optimized away if
  * a constant is passed to it. Mostly the same as
  * what is in linux/slab.h except it returns an
  * index.
  */
-static inline int index_of(const size_t size)
+static __always_inline int index_of(const size_t size)
 {
 	if (__builtin_constant_p(size)) {
 		int i = 0;
@@ -329,7 +329,8 @@ static inline int index_of(const size_t size)
 			extern void __bad_size(void);
 			__bad_size();
 		}
-	}
+	} else
+		BUG();
 	return 0;
 }
 
@@ -639,7 +640,7 @@ static enum {
 
 static DEFINE_PER_CPU(struct work_struct, reap_work);
 
-static void free_block(kmem_cache_t* cachep, void** objpp, int len);
+static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node);
 static void enable_cpucache (kmem_cache_t *cachep);
 static void cache_reap (void *unused);
 static int __node_shrink(kmem_cache_t *cachep, int node);
@@ -804,7 +805,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache
 
 	if (ac->avail) {
 		spin_lock(&rl3->list_lock);
-		free_block(cachep, ac->entry, ac->avail);
+		free_block(cachep, ac->entry, ac->avail, node);
 		ac->avail = 0;
 		spin_unlock(&rl3->list_lock);
 	}
@@ -925,7 +926,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
 			/* Free limit for this kmem_list3 */
 			l3->free_limit -= cachep->batchcount;
 			if (nc)
-				free_block(cachep, nc->entry, nc->avail);
+				free_block(cachep, nc->entry, nc->avail, node);
 
 			if (!cpus_empty(mask)) {
                                 spin_unlock(&l3->list_lock);
@@ -934,7 +935,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
 
 			if (l3->shared) {
 				free_block(cachep, l3->shared->entry,
-						l3->shared->avail);
+						l3->shared->avail, node);
 				kfree(l3->shared);
 				l3->shared = NULL;
 			}
@@ -1882,12 +1883,13 @@ static void do_drain(void *arg)
 {
 	kmem_cache_t *cachep = (kmem_cache_t*)arg;
 	struct array_cache *ac;
+	int node = numa_node_id();
 
 	check_irq_off();
 	ac = ac_data(cachep);
-	spin_lock(&cachep->nodelists[numa_node_id()]->list_lock);
-	free_block(cachep, ac->entry, ac->avail);
-	spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock);
+	spin_lock(&cachep->nodelists[node]->list_lock);
+	free_block(cachep, ac->entry, ac->avail, node);
+	spin_unlock(&cachep->nodelists[node]->list_lock);
 	ac->avail = 0;
 }
 
@@ -2608,7 +2610,7 @@ done:
 /*
  * Caller needs to acquire correct kmem_list's list_lock
  */
-static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
+static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node)
 {
 	int i;
 	struct kmem_list3 *l3;
@@ -2617,14 +2619,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects)
 		void *objp = objpp[i];
 		struct slab *slabp;
 		unsigned int objnr;
-		int nodeid = 0;
 
 		slabp = GET_PAGE_SLAB(virt_to_page(objp));
-		nodeid = slabp->nodeid;
-		l3 = cachep->nodelists[nodeid];
+		l3 = cachep->nodelists[node];
 		list_del(&slabp->list);
 		objnr = (objp - slabp->s_mem) / cachep->objsize;
-		check_spinlock_acquired_node(cachep, nodeid);
+		check_spinlock_acquired_node(cachep, node);
 		check_slabp(cachep, slabp);
 
 
@@ -2664,13 +2664,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
 {
 	int batchcount;
 	struct kmem_list3 *l3;
+	int node = numa_node_id();
 
 	batchcount = ac->batchcount;
 #if DEBUG
 	BUG_ON(!batchcount || batchcount > ac->avail);
 #endif
 	check_irq_off();
-	l3 = cachep->nodelists[numa_node_id()];
+	l3 = cachep->nodelists[node];
 	spin_lock(&l3->list_lock);
 	if (l3->shared) {
 		struct array_cache *shared_array = l3->shared;
@@ -2686,7 +2687,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
 		}
 	}
 
-	free_block(cachep, ac->entry, batchcount);
+	free_block(cachep, ac->entry, batchcount, node);
 free_done:
 #if STATS
 	{
@@ -2751,7 +2752,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
 			} else {
 				spin_lock(&(cachep->nodelists[nodeid])->
 						list_lock);
-				free_block(cachep, &objp, 1);
+				free_block(cachep, &objp, 1, nodeid);
 				spin_unlock(&(cachep->nodelists[nodeid])->
 						list_lock);
 			}
@@ -2844,7 +2845,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i
 	unsigned long save_flags;
 	void *ptr;
 
-	if (nodeid == numa_node_id() || nodeid == -1)
+	if (nodeid == -1)
 		return __cache_alloc(cachep, flags);
 
 	if (unlikely(!cachep->nodelists[nodeid])) {
@@ -3079,7 +3080,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep)
 
 			if ((nc = cachep->nodelists[node]->shared))
 				free_block(cachep, nc->entry,
-							nc->avail);
+							nc->avail, node);
 
 			l3->shared = new;
 			if (!cachep->nodelists[node]->alien) {
@@ -3160,7 +3161,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
 		if (!ccold)
 			continue;
 		spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
-		free_block(cachep, ccold->entry, ccold->avail);
+		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
 		spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
 		kfree(ccold);
 	}
@@ -3240,7 +3241,7 @@ static void drain_array_locked(kmem_cache_t *cachep,
 		if (tofree > ac->avail) {
 			tofree = (ac->avail+1)/2;
 		}
-		free_block(cachep, ac->entry, tofree);
+		free_block(cachep, ac->entry, tofree, node);
 		ac->avail -= tofree;
 		memmove(ac->entry, &(ac->entry[tofree]),
 					sizeof(void*)*ac->avail);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0184f510aac..1dcaeda039f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1381,6 +1381,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 		error = bd_claim(bdev, sys_swapon);
 		if (error < 0) {
 			bdev = NULL;
+			error = -EINVAL;
 			goto bad_swap;
 		}
 		p->old_block_size = block_size(bdev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 145f5cde96c..b7486488967 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	unsigned short vid;
 	struct net_device_stats *stats;
 	unsigned short vlan_TCI;
-	unsigned short proto;
+	__be16 proto;
 
 	/* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */
 	vlan_TCI = ntohs(vhdr->h_vlan_TCI);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 069253f830c..2d24fb400e0 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -31,7 +31,8 @@ static inline int should_deliver(const struct net_bridge_port *p,
 
 int br_dev_queue_push_xmit(struct sk_buff *skb)
 {
-	if (skb->len > skb->dev->mtu) 
+	/* drop mtu oversized packets except tso */
+	if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size)
 		kfree_skb(skb);
 	else {
 #ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 1b63b482416..50c0519cd70 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-#define VERSION "0.403"
+#define VERSION "0.404"
 
 #include <linux/config.h>
 #include <asm/uaccess.h>
@@ -224,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b)
   Consider a node 'n' and its parent 'tp'.
 
   If n is a leaf, every bit in its key is significant. Its presence is 
-  necessitaded by path compression, since during a tree traversal (when 
+  necessitated by path compression, since during a tree traversal (when 
   searching for a leaf - unless we are doing an insertion) we will completely 
   ignore all skipped bits we encounter. Thus we need to verify, at the end of 
   a potentially successful search, that we have indeed been walking the 
@@ -836,11 +836,12 @@ static void trie_init(struct trie *t)
 #endif
 }
 
-/* readside most use rcu_read_lock currently dump routines
+/* readside must use rcu_read_lock currently dump routines
  via get_fa_head and dump */
 
-static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
+static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
 {
+	struct hlist_head *head = &l->list;
 	struct hlist_node *node;
 	struct leaf_info *li;
 
@@ -853,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
 
 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
 {
-	struct leaf_info *li = find_leaf_info(&l->list, plen);
+	struct leaf_info *li = find_leaf_info(l, plen);
 
 	if (!li)
 		return NULL;
@@ -1085,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen)
 	}
 
 	if (tp && tp->pos + tp->bits > 32)
-		printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
+		printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
 		       tp, tp->pos, tp->bits, key, plen);
 
 	/* Rebalance the trie */
@@ -1248,7 +1249,7 @@ err:
 }
 
 
-/* should be clalled with rcu_read_lock */
+/* should be called with rcu_read_lock */
 static inline int check_leaf(struct trie *t, struct leaf *l,
 			     t_key key, int *plen, const struct flowi *flp,
 			     struct fib_result *res)
@@ -1590,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
 
 	l = fib_find_node(t, key);
-	li = find_leaf_info(&l->list, plen);
+	li = find_leaf_info(l, plen);
 
 	list_del_rcu(&fa->fa_list);
 
@@ -1714,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb)
 
 	t->revision++;
 
-	rcu_read_lock();
 	for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
 		found += trie_flush_leaf(t, l);
 
@@ -1722,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb)
 			trie_leaf_remove(t, ll->key);
 		ll = l;
 	}
-	rcu_read_unlock();  
 
 	if (ll && hlist_empty(&ll->list))
 		trie_leaf_remove(t, ll->key);
@@ -1833,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi
 			i++;
 			continue;
 		}
-		if (fa->fa_info->fib_nh == NULL) {
-			printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
-			i++;
-			continue;
-		}
-		if (fa->fa_info == NULL) {
-			printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
-			i++;
-			continue;
-		}
+		BUG_ON(!fa->fa_info);
 
 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
 				  cb->nlh->nlmsg_seq,
@@ -1965,7 +1955,7 @@ struct fib_table * __init fib_hash_init(int id)
 		trie_main = t;
 
 	if (id == RT_TABLE_LOCAL)
-		printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
+		printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
 
 	return tb;
 }
@@ -2029,7 +2019,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
 		iter->tnode = (struct tnode *) n;
 		iter->trie = t;
 		iter->index = 0;
-		iter->depth = 0;
+		iter->depth = 1;
 		return n;
 	}
 	return NULL;
@@ -2274,11 +2264,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 				seq_puts(seq, "<local>:\n");
 			else
 				seq_puts(seq, "<main>:\n");
-		} else {
-			seq_indent(seq, iter->depth-1);
-			seq_printf(seq, "  +-- %d.%d.%d.%d/%d\n",
-				   NIPQUAD(prf), tn->pos);
-		}
+		} 
+		seq_indent(seq, iter->depth-1);
+		seq_printf(seq, "  +-- %d.%d.%d.%d/%d %d %d %d\n",
+			   NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, 
+			   tn->empty_children);
+		
 	} else {
 		struct leaf *l = (struct leaf *) n;
 		int i;
@@ -2287,7 +2278,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v)
 		seq_indent(seq, iter->depth);
 		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
 		for (i = 32; i >= 0; i--) {
-			struct leaf_info *li = find_leaf_info(&l->list, i);
+			struct leaf_info *li = find_leaf_info(l, i);
 			if (li) {
 				struct fib_alias *fa;
 				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
@@ -2383,7 +2374,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 		return 0;
 
 	for (i=32; i>=0; i--) {
-		struct leaf_info *li = find_leaf_info(&l->list, i);
+		struct leaf_info *li = find_leaf_info(l, i);
 		struct fib_alias *fa;
 		u32 mask, prefix;
 
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index e2162d27007..3cf9b451675 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -137,6 +137,22 @@ config IP_NF_AMANDA
 
 	  To compile it as a module, choose M here.  If unsure, say Y.
 
+config IP_NF_PPTP
+	tristate  'PPTP protocol support'
+	help
+	  This module adds support for PPTP (Point to Point Tunnelling
+	  Protocol, RFC2637) conncection tracking and NAT. 
+	
+	  If you are running PPTP sessions over a stateful firewall or NAT
+	  box, you may want to enable this feature.  
+	
+	  Please note that not all PPTP modes of operation are supported yet.
+	  For more info, read top of the file
+	  net/ipv4/netfilter/ip_conntrack_pptp.c
+	
+	  If you want to compile it as a module, say M here and read
+	  Documentation/modules.txt.  If unsure, say `N'.
+
 config IP_NF_QUEUE
 	tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
 	help
@@ -621,6 +637,12 @@ config IP_NF_NAT_AMANDA
 	default IP_NF_NAT if IP_NF_AMANDA=y
 	default m if IP_NF_AMANDA=m
 
+config IP_NF_NAT_PPTP
+	tristate
+	depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
+	default IP_NF_NAT if IP_NF_PPTP=y
+	default m if IP_NF_PPTP=m
+
 # mangle + specific targets
 config IP_NF_MANGLE
 	tristate "Packet mangling"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 1ba0db74681..3d45d3c0283 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -6,6 +6,9 @@
 ip_conntrack-objs	:= ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
 iptable_nat-objs	:= ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
 
+ip_conntrack_pptp-objs	:= ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
+ip_nat_pptp-objs	:= ip_nat_helper_pptp.o ip_nat_proto_gre.o
+
 # connection tracking
 obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
 
@@ -17,6 +20,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
 obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
 
 # connection tracking helpers
+obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
 obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
 obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
 obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
@@ -24,6 +28,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
 obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
 
 # NAT helpers 
+obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
 obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
 obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
 obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index dc20881004b..fa3f914117e 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb,
 
 	/* increase the UDP timeout of the master connection as replies from
 	 * Amanda clients to the server can be quite delayed */
-	ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
+	ip_ct_refresh(ct, *pskb, master_timeout * HZ);
 
 	/* No data? */
 	dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index f8cd8e42961..ea65dd3e517 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
 
 /* Just find a expectation corresponding to a tuple. */
 struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
+ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
 {
 	struct ip_conntrack_expect *i;
 	
@@ -1112,45 +1112,46 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
 	synchronize_net();
 }
 
-static inline void ct_add_counters(struct ip_conntrack *ct,
-				   enum ip_conntrack_info ctinfo,
-				   const struct sk_buff *skb)
-{
-#ifdef CONFIG_IP_NF_CT_ACCT
-	if (skb) {
-		ct->counters[CTINFO2DIR(ctinfo)].packets++;
-		ct->counters[CTINFO2DIR(ctinfo)].bytes += 
-					ntohs(skb->nh.iph->tot_len);
-	}
-#endif
-}
-
-/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */
-void ip_ct_refresh_acct(struct ip_conntrack *ct, 
+/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
+void __ip_ct_refresh_acct(struct ip_conntrack *ct, 
 		        enum ip_conntrack_info ctinfo,
 			const struct sk_buff *skb,
-			unsigned long extra_jiffies)
+			unsigned long extra_jiffies,
+			int do_acct)
 {
+	int do_event = 0;
+
 	IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
+	IP_NF_ASSERT(skb);
+
+	write_lock_bh(&ip_conntrack_lock);
 
 	/* If not in hash table, timer will not be active yet */
 	if (!is_confirmed(ct)) {
 		ct->timeout.expires = extra_jiffies;
-		ct_add_counters(ct, ctinfo, skb);
+		do_event = 1;
 	} else {
-		write_lock_bh(&ip_conntrack_lock);
 		/* Need del_timer for race avoidance (may already be dying). */
 		if (del_timer(&ct->timeout)) {
 			ct->timeout.expires = jiffies + extra_jiffies;
 			add_timer(&ct->timeout);
-			/* FIXME: We loose some REFRESH events if this function
-			 * is called without an skb.  I'll fix this later -HW */
-			if (skb)
-				ip_conntrack_event_cache(IPCT_REFRESH, skb);
+			do_event = 1;
 		}
-		ct_add_counters(ct, ctinfo, skb);
-		write_unlock_bh(&ip_conntrack_lock);
 	}
+
+#ifdef CONFIG_IP_NF_CT_ACCT
+	if (do_acct) {
+		ct->counters[CTINFO2DIR(ctinfo)].packets++;
+		ct->counters[CTINFO2DIR(ctinfo)].bytes += 
+						ntohs(skb->nh.iph->tot_len);
+	}
+#endif
+
+	write_unlock_bh(&ip_conntrack_lock);
+
+	/* must be unlocked when calling event cache */
+	if (do_event)
+		ip_conntrack_event_cache(IPCT_REFRESH, skb);
 }
 
 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
new file mode 100644
index 00000000000..926a6684643
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
@@ -0,0 +1,806 @@
+/*
+ * ip_conntrack_pptp.c	- Version 3.0
+ *
+ * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft.  PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * Limitations:
+ * 	 - We blindly assume that control connections are always
+ * 	   established in PNS->PAC direction.  This is a violation
+ * 	   of RFFC2673
+ * 	 - We can only support one single call within each session
+ *
+ * TODO:
+ *	 - testing of incoming PPTP calls 
+ *
+ * Changes: 
+ * 	2002-02-05 - Version 1.3
+ * 	  - Call ip_conntrack_unexpect_related() from 
+ * 	    pptp_destroy_siblings() to destroy expectations in case
+ * 	    CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
+ * 	    (Philip Craig <philipc@snapgear.com>)
+ * 	  - Add Version information at module loadtime
+ * 	2002-02-10 - Version 1.6
+ * 	  - move to C99 style initializers
+ * 	  - remove second expectation if first arrives
+ * 	2004-10-22 - Version 2.0
+ * 	  - merge Mandrake's 2.6.x port with recent 2.6.x API changes
+ * 	  - fix lots of linear skb assumptions from Mandrake's port
+ * 	2005-06-10 - Version 2.1
+ * 	  - use ip_conntrack_expect_free() instead of kfree() on the
+ * 	    expect's (which are from the slab for quite some time)
+ * 	2005-06-10 - Version 3.0
+ * 	  - port helper to post-2.6.11 API changes,
+ * 	    funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
+ * 	2005-07-30 - Version 3.1
+ * 	  - port helper to 2.6.13 API changes
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+#define IP_CT_PPTP_VERSION "3.1"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
+
+static DEFINE_SPINLOCK(ip_pptp_lock);
+
+int
+(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+int
+(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
+			  struct ip_conntrack *ct,
+			  enum ip_conntrack_info ctinfo,
+			  struct PptpControlHeader *ctlh,
+			  union pptp_ctrl_union *pptpReq);
+
+int
+(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
+			    struct ip_conntrack_expect *expect_reply);
+
+void
+(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
+			     struct ip_conntrack_expect *exp);
+
+#if 0
+/* PptpControlMessageType names */
+const char *pptp_msg_name[] = {
+	"UNKNOWN_MESSAGE",
+	"START_SESSION_REQUEST",
+	"START_SESSION_REPLY",
+	"STOP_SESSION_REQUEST",
+	"STOP_SESSION_REPLY",
+	"ECHO_REQUEST",
+	"ECHO_REPLY",
+	"OUT_CALL_REQUEST",
+	"OUT_CALL_REPLY",
+	"IN_CALL_REQUEST",
+	"IN_CALL_REPLY",
+	"IN_CALL_CONNECT",
+	"CALL_CLEAR_REQUEST",
+	"CALL_DISCONNECT_NOTIFY",
+	"WAN_ERROR_NOTIFY",
+	"SET_LINK_INFO"
+};
+EXPORT_SYMBOL(pptp_msg_name);
+#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+#define SECS *HZ
+#define MINS * 60 SECS
+#define HOURS * 60 MINS
+
+#define PPTP_GRE_TIMEOUT 		(10 MINS)
+#define PPTP_GRE_STREAM_TIMEOUT 	(5 HOURS)
+
+static void pptp_expectfn(struct ip_conntrack *ct,
+			 struct ip_conntrack_expect *exp)
+{
+	DEBUGP("increasing timeouts\n");
+
+	/* increase timeout of GRE data channel conntrack entry */
+	ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
+	ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
+
+	/* Can you see how rusty this code is, compared with the pre-2.6.11
+	 * one? That's what happened to my shiny newnat of 2002 ;( -HW */
+
+	if (!ip_nat_pptp_hook_expectfn) {
+		struct ip_conntrack_tuple inv_t;
+		struct ip_conntrack_expect *exp_other;
+
+		/* obviously this tuple inversion only works until you do NAT */
+		invert_tuplepr(&inv_t, &exp->tuple);
+		DEBUGP("trying to unexpect other dir: ");
+		DUMP_TUPLE(&inv_t);
+	
+		exp_other = ip_conntrack_expect_find(&inv_t);
+		if (exp_other) {
+			/* delete other expectation.  */
+			DEBUGP("found\n");
+			ip_conntrack_unexpect_related(exp_other);
+			ip_conntrack_expect_put(exp_other);
+		} else {
+			DEBUGP("not found\n");
+		}
+	} else {
+		/* we need more than simple inversion */
+		ip_nat_pptp_hook_expectfn(ct, exp);
+	}
+}
+
+static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
+{
+	struct ip_conntrack_tuple_hash *h;
+	struct ip_conntrack_expect *exp;
+
+	DEBUGP("trying to timeout ct or exp for tuple ");
+	DUMP_TUPLE(t);
+
+	h = ip_conntrack_find_get(t, NULL);
+	if (h)  {
+		struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
+		DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
+		sibling->proto.gre.timeout = 0;
+		sibling->proto.gre.stream_timeout = 0;
+		if (del_timer(&sibling->timeout))
+			sibling->timeout.function((unsigned long)sibling);
+		ip_conntrack_put(sibling);
+		return 1;
+	} else {
+		exp = ip_conntrack_expect_find(t);
+		if (exp) {
+			DEBUGP("unexpect_related of expect %p\n", exp);
+			ip_conntrack_unexpect_related(exp);
+			ip_conntrack_expect_put(exp);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+
+/* timeout GRE data connections */
+static void pptp_destroy_siblings(struct ip_conntrack *ct)
+{
+	struct ip_conntrack_tuple t;
+
+	/* Since ct->sibling_list has literally rusted away in 2.6.11, 
+	 * we now need another way to find out about our sibling
+	 * contrack and expects... -HW */
+
+	/* try original (pns->pac) tuple */
+	memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
+	t.dst.protonum = IPPROTO_GRE;
+	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+
+	if (!destroy_sibling_or_exp(&t))
+		DEBUGP("failed to timeout original pns->pac ct/exp\n");
+
+	/* try reply (pac->pns) tuple */
+	memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
+	t.dst.protonum = IPPROTO_GRE;
+	t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
+	t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
+
+	if (!destroy_sibling_or_exp(&t))
+		DEBUGP("failed to timeout reply pac->pns ct/exp\n");
+}
+
+/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
+static inline int
+exp_gre(struct ip_conntrack *master,
+	u_int32_t seq,
+	__be16 callid,
+	__be16 peer_callid)
+{
+	struct ip_conntrack_tuple inv_tuple;
+	struct ip_conntrack_tuple exp_tuples[] = {
+		/* tuple in original direction, PNS->PAC */
+		{ .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
+			   .u = { .gre = { .key = peer_callid } }
+			 },
+		  .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
+			   .u = { .gre = { .key = callid } },
+			   .protonum = IPPROTO_GRE
+			 },
+		 },
+		/* tuple in reply direction, PAC->PNS */
+		{ .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
+			   .u = { .gre = { .key = callid } }
+			 },
+		  .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
+			   .u = { .gre = { .key = peer_callid } },
+			   .protonum = IPPROTO_GRE
+			 },
+		 }
+	};
+	struct ip_conntrack_expect *exp_orig, *exp_reply;
+	int ret = 1;
+
+	exp_orig = ip_conntrack_expect_alloc(master);
+	if (exp_orig == NULL)
+		goto out;
+
+	exp_reply = ip_conntrack_expect_alloc(master);
+	if (exp_reply == NULL)
+		goto out_put_orig;
+
+	memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
+
+	exp_orig->mask.src.ip = 0xffffffff;
+	exp_orig->mask.src.u.all = 0;
+	exp_orig->mask.dst.u.all = 0;
+	exp_orig->mask.dst.u.gre.key = htons(0xffff);
+	exp_orig->mask.dst.ip = 0xffffffff;
+	exp_orig->mask.dst.protonum = 0xff;
+		
+	exp_orig->master = master;
+	exp_orig->expectfn = pptp_expectfn;
+	exp_orig->flags = 0;
+
+	exp_orig->dir = IP_CT_DIR_ORIGINAL;
+
+	/* both expectations are identical apart from tuple */
+	memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
+	memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
+
+	exp_reply->dir = !exp_orig->dir;
+
+	if (ip_nat_pptp_hook_exp_gre)
+		ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
+	else {
+
+		DEBUGP("calling expect_related PNS->PAC");
+		DUMP_TUPLE(&exp_orig->tuple);
+
+		if (ip_conntrack_expect_related(exp_orig) != 0) {
+			DEBUGP("cannot expect_related()\n");
+			goto out_put_both;
+		}
+
+		DEBUGP("calling expect_related PAC->PNS");
+		DUMP_TUPLE(&exp_reply->tuple);
+
+		if (ip_conntrack_expect_related(exp_reply) != 0) {
+			DEBUGP("cannot expect_related()\n");
+			goto out_unexpect_orig;
+		}
+
+		/* Add GRE keymap entries */
+		if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
+			DEBUGP("cannot keymap_add() exp\n");
+			goto out_unexpect_both;
+		}
+
+		invert_tuplepr(&inv_tuple, &exp_reply->tuple);
+		if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
+			ip_ct_gre_keymap_destroy(master);
+			DEBUGP("cannot keymap_add() exp_inv\n");
+			goto out_unexpect_both;
+		}
+		ret = 0;
+	}
+
+out_put_both:
+	ip_conntrack_expect_put(exp_reply);
+out_put_orig:
+	ip_conntrack_expect_put(exp_orig);
+out:
+	return ret;
+
+out_unexpect_both:
+	ip_conntrack_unexpect_related(exp_reply);
+out_unexpect_orig:
+	ip_conntrack_unexpect_related(exp_orig);
+	goto out_put_both;
+}
+
+static inline int 
+pptp_inbound_pkt(struct sk_buff **pskb,
+		 struct tcphdr *tcph,
+		 unsigned int nexthdr_off,
+		 unsigned int datalen,
+		 struct ip_conntrack *ct,
+		 enum ip_conntrack_info ctinfo)
+{
+	struct PptpControlHeader _ctlh, *ctlh;
+	unsigned int reqlen;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	u_int16_t msg;
+	__be16 *cid, *pcid;
+	u_int32_t seq;	
+
+	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh) {
+		DEBUGP("error during skb_header_pointer\n");
+		return NF_ACCEPT;
+	}
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+
+	reqlen = datalen;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq) {
+		DEBUGP("error during skb_header_pointer\n");
+		return NF_ACCEPT;
+	}
+
+	msg = ntohs(ctlh->messageType);
+	DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
+
+	switch (msg) {
+	case PPTP_START_SESSION_REPLY:
+		if (reqlen < sizeof(_pptpReq.srep)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms new control session */
+		if (info->sstate < PPTP_SESSION_REQUESTED) {
+			DEBUGP("%s without START_SESS_REQUEST\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->srep.resultCode == PPTP_START_OK)
+			info->sstate = PPTP_SESSION_CONFIRMED;
+		else 
+			info->sstate = PPTP_SESSION_ERROR;
+		break;
+
+	case PPTP_STOP_SESSION_REPLY:
+		if (reqlen < sizeof(_pptpReq.strep)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms end of control session */
+		if (info->sstate > PPTP_SESSION_STOPREQ) {
+			DEBUGP("%s without STOP_SESS_REQUEST\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->strep.resultCode == PPTP_STOP_OK)
+			info->sstate = PPTP_SESSION_NONE;
+		else
+			info->sstate = PPTP_SESSION_ERROR;
+		break;
+
+	case PPTP_OUT_CALL_REPLY:
+		if (reqlen < sizeof(_pptpReq.ocack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server accepted call, we now expect GRE frames */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (info->cstate != PPTP_CALL_OUT_REQ &&
+		    info->cstate != PPTP_CALL_OUT_CONF) {
+			DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
+			info->cstate = PPTP_CALL_NONE;
+			break;
+		}
+
+		cid = &pptpReq->ocack.callID;
+		pcid = &pptpReq->ocack.peersCallID;
+
+		info->pac_call_id = ntohs(*cid);
+		
+		if (htons(info->pns_call_id) != *pcid) {
+			DEBUGP("%s for unknown callid %u\n",
+				pptp_msg_name[msg], ntohs(*pcid));
+			break;
+		}
+
+		DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], 
+			ntohs(*cid), ntohs(*pcid));
+		
+		info->cstate = PPTP_CALL_OUT_CONF;
+
+		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
+				       + sizeof(struct PptpControlHeader)
+				       + ((void *)pcid - (void *)pptpReq);
+			
+		if (exp_gre(ct, seq, *cid, *pcid) != 0)
+			printk("ip_conntrack_pptp: error during exp_gre\n");
+		break;
+
+	case PPTP_IN_CALL_REQUEST:
+		if (reqlen < sizeof(_pptpReq.icack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server tells us about incoming call request */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		pcid = &pptpReq->icack.peersCallID;
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		info->cstate = PPTP_CALL_IN_REQ;
+		info->pac_call_id = ntohs(*pcid);
+		break;
+
+	case PPTP_IN_CALL_CONNECT:
+		if (reqlen < sizeof(_pptpReq.iccon)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server tells us about incoming call established */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n", pptp_msg_name[msg]);
+			break;
+		}
+		if (info->sstate != PPTP_CALL_IN_REP
+		    && info->sstate != PPTP_CALL_IN_CONF) {
+			DEBUGP("%s but never sent IN_CALL_REPLY\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+
+		pcid = &pptpReq->iccon.peersCallID;
+		cid = &info->pac_call_id;
+
+		if (info->pns_call_id != ntohs(*pcid)) {
+			DEBUGP("%s for unknown CallID %u\n", 
+				pptp_msg_name[msg], ntohs(*pcid));
+			break;
+		}
+
+		DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		info->cstate = PPTP_CALL_IN_CONF;
+
+		/* we expect a GRE connection from PAC to PNS */
+		seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
+				       + sizeof(struct PptpControlHeader)
+				       + ((void *)pcid - (void *)pptpReq);
+			
+		if (exp_gre(ct, seq, *cid, *pcid) != 0)
+			printk("ip_conntrack_pptp: error during exp_gre\n");
+
+		break;
+
+	case PPTP_CALL_DISCONNECT_NOTIFY:
+		if (reqlen < sizeof(_pptpReq.disc)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* server confirms disconnect */
+		cid = &pptpReq->disc.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+		info->cstate = PPTP_CALL_NONE;
+
+		/* untrack this call id, unexpect GRE packets */
+		pptp_destroy_siblings(ct);
+		break;
+
+	case PPTP_WAN_ERROR_NOTIFY:
+		break;
+
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* I don't have to explain these ;) */
+		break;
+	default:
+		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
+			? pptp_msg_name[msg]:pptp_msg_name[0], msg);
+		break;
+	}
+
+
+	if (ip_nat_pptp_hook_inbound)
+		return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
+						pptpReq);
+
+	return NF_ACCEPT;
+
+}
+
+static inline int
+pptp_outbound_pkt(struct sk_buff **pskb,
+		  struct tcphdr *tcph,
+		  unsigned int nexthdr_off,
+		  unsigned int datalen,
+		  struct ip_conntrack *ct,
+		  enum ip_conntrack_info ctinfo)
+{
+	struct PptpControlHeader _ctlh, *ctlh;
+	unsigned int reqlen;
+	union pptp_ctrl_union _pptpReq, *pptpReq;
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	u_int16_t msg;
+	__be16 *cid, *pcid;
+
+	ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
+	if (!ctlh)
+		return NF_ACCEPT;
+	nexthdr_off += sizeof(_ctlh);
+	datalen -= sizeof(_ctlh);
+	
+	reqlen = datalen;
+	if (reqlen > sizeof(*pptpReq))
+		reqlen = sizeof(*pptpReq);
+	pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
+	if (!pptpReq)
+		return NF_ACCEPT;
+
+	msg = ntohs(ctlh->messageType);
+	DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
+
+	switch (msg) {
+	case PPTP_START_SESSION_REQUEST:
+		/* client requests for new control session */
+		if (info->sstate != PPTP_SESSION_NONE) {
+			DEBUGP("%s but we already have one",
+				pptp_msg_name[msg]);
+		}
+		info->sstate = PPTP_SESSION_REQUESTED;
+		break;
+	case PPTP_STOP_SESSION_REQUEST:
+		/* client requests end of control session */
+		info->sstate = PPTP_SESSION_STOPREQ;
+		break;
+
+	case PPTP_OUT_CALL_REQUEST:
+		if (reqlen < sizeof(_pptpReq.ocreq)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			/* FIXME: break; */
+		}
+
+		/* client initiating connection to server */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("%s but no session\n",
+				pptp_msg_name[msg]);
+			break;
+		}
+		info->cstate = PPTP_CALL_OUT_REQ;
+		/* track PNS call id */
+		cid = &pptpReq->ocreq.callID;
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
+		info->pns_call_id = ntohs(*cid);
+		break;
+	case PPTP_IN_CALL_REPLY:
+		if (reqlen < sizeof(_pptpReq.icack)) {
+			DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
+			break;
+		}
+
+		/* client answers incoming call */
+		if (info->cstate != PPTP_CALL_IN_REQ
+		    && info->cstate != PPTP_CALL_IN_REP) {
+			DEBUGP("%s without incall_req\n", 
+				pptp_msg_name[msg]);
+			break;
+		}
+		if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
+			info->cstate = PPTP_CALL_NONE;
+			break;
+		}
+		pcid = &pptpReq->icack.peersCallID;
+		if (info->pac_call_id != ntohs(*pcid)) {
+			DEBUGP("%s for unknown call %u\n", 
+				pptp_msg_name[msg], ntohs(*pcid));
+			break;
+		}
+		DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
+		/* part two of the three-way handshake */
+		info->cstate = PPTP_CALL_IN_REP;
+		info->pns_call_id = ntohs(pptpReq->icack.callID);
+		break;
+
+	case PPTP_CALL_CLEAR_REQUEST:
+		/* client requests hangup of call */
+		if (info->sstate != PPTP_SESSION_CONFIRMED) {
+			DEBUGP("CLEAR_CALL but no session\n");
+			break;
+		}
+		/* FUTURE: iterate over all calls and check if
+		 * call ID is valid.  We don't do this without newnat,
+		 * because we only know about last call */
+		info->cstate = PPTP_CALL_CLEAR_REQ;
+		break;
+	case PPTP_SET_LINK_INFO:
+		break;
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* I don't have to explain these ;) */
+		break;
+	default:
+		DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? 
+			pptp_msg_name[msg]:pptp_msg_name[0], msg);
+		/* unknown: no need to create GRE masq table entry */
+		break;
+	}
+	
+	if (ip_nat_pptp_hook_outbound)
+		return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
+						 pptpReq);
+
+	return NF_ACCEPT;
+}
+
+
+/* track caller id inside control connection, call expect_related */
+static int 
+conntrack_pptp_help(struct sk_buff **pskb,
+		    struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
+
+{
+	struct pptp_pkt_hdr _pptph, *pptph;
+	struct tcphdr _tcph, *tcph;
+	u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
+	u_int32_t datalen;
+	int dir = CTINFO2DIR(ctinfo);
+	struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
+	unsigned int nexthdr_off;
+
+	int oldsstate, oldcstate;
+	int ret;
+
+	/* don't do any tracking before tcp handshake complete */
+	if (ctinfo != IP_CT_ESTABLISHED 
+	    && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
+		DEBUGP("ctinfo = %u, skipping\n", ctinfo);
+		return NF_ACCEPT;
+	}
+	
+	nexthdr_off = (*pskb)->nh.iph->ihl*4;
+	tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
+	BUG_ON(!tcph);
+	nexthdr_off += tcph->doff * 4;
+ 	datalen = tcplen - tcph->doff * 4;
+
+	if (tcph->fin || tcph->rst) {
+		DEBUGP("RST/FIN received, timeouting GRE\n");
+		/* can't do this after real newnat */
+		info->cstate = PPTP_CALL_NONE;
+
+		/* untrack this call id, unexpect GRE packets */
+		pptp_destroy_siblings(ct);
+	}
+
+	pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
+	if (!pptph) {
+		DEBUGP("no full PPTP header, can't track\n");
+		return NF_ACCEPT;
+	}
+	nexthdr_off += sizeof(_pptph);
+	datalen -= sizeof(_pptph);
+
+	/* if it's not a control message we can't do anything with it */
+	if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
+	    ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
+		DEBUGP("not a control packet\n");
+		return NF_ACCEPT;
+	}
+
+	oldsstate = info->sstate;
+	oldcstate = info->cstate;
+
+	spin_lock_bh(&ip_pptp_lock);
+
+	/* FIXME: We just blindly assume that the control connection is always
+	 * established from PNS->PAC.  However, RFC makes no guarantee */
+	if (dir == IP_CT_DIR_ORIGINAL)
+		/* client -> server (PNS -> PAC) */
+		ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+					ctinfo);
+	else
+		/* server -> client (PAC -> PNS) */
+		ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
+				       ctinfo);
+	DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
+		oldsstate, info->sstate, oldcstate, info->cstate);
+	spin_unlock_bh(&ip_pptp_lock);
+
+	return ret;
+}
+
+/* control protocol helper */
+static struct ip_conntrack_helper pptp = { 
+	.list = { NULL, NULL },
+	.name = "pptp", 
+	.me = THIS_MODULE,
+	.max_expected = 2,
+	.timeout = 5 * 60,
+	.tuple = { .src = { .ip = 0, 
+		 	    .u = { .tcp = { .port =  
+				    __constant_htons(PPTP_CONTROL_PORT) } } 
+			  }, 
+		   .dst = { .ip = 0, 
+			    .u = { .all = 0 },
+			    .protonum = IPPROTO_TCP
+			  } 
+		 },
+	.mask = { .src = { .ip = 0, 
+			   .u = { .tcp = { .port = __constant_htons(0xffff) } } 
+			 }, 
+		  .dst = { .ip = 0, 
+			   .u = { .all = 0 },
+			   .protonum = 0xff 
+		 	 } 
+		},
+	.help = conntrack_pptp_help
+};
+
+extern void __exit ip_ct_proto_gre_fini(void);
+extern int __init ip_ct_proto_gre_init(void);
+
+/* ip_conntrack_pptp initialization */
+static int __init init(void)
+{
+	int retcode;
+ 
+	retcode = ip_ct_proto_gre_init();
+	if (retcode < 0)
+		return retcode;
+
+	DEBUGP(" registering helper\n");
+	if ((retcode = ip_conntrack_helper_register(&pptp))) {
+		printk(KERN_ERR "Unable to register conntrack application "
+				"helper for pptp: %d\n", retcode);
+		ip_ct_proto_gre_fini();
+		return retcode;
+	}
+
+	printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	ip_conntrack_helper_unregister(&pptp);
+	ip_ct_proto_gre_fini();
+	printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
+
+EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
+EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
+EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
+EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
index 71ef19d126d..577bac22dcc 100644
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
@@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb,
 	ip_conntrack_expect_related(exp);
 	ip_conntrack_expect_put(exp);
 
-	ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ);
+	ip_ct_refresh(ct, *pskb, timeout * HZ);
 out:
 	return NF_ACCEPT;
 }
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 15aef356474..b08a432efcf 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
 	if (err < 0)
 		return err;
 
-	exp = ip_conntrack_expect_find_get(&tuple);
+	exp = ip_conntrack_expect_find(&tuple);
 	if (!exp)
 		return -ENOENT;
 
@@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 			return err;
 
 		/* bump usage count to 2 */
-		exp = ip_conntrack_expect_find_get(&tuple);
+		exp = ip_conntrack_expect_find(&tuple);
 		if (!exp)
 			return -ENOENT;
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
new file mode 100644
index 00000000000..de3cb9db6f8
--- /dev/null
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -0,0 +1,327 @@
+/*
+ * ip_conntrack_proto_gre.c - Version 3.0 
+ *
+ * Connection tracking protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two 
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/netfilter.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/list.h>
+
+static DEFINE_RWLOCK(ip_ct_gre_lock);
+#define ASSERT_READ_LOCK(x)
+#define ASSERT_WRITE_LOCK(x)
+
+#include <linux/netfilter_ipv4/listhelp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
+
+/* shamelessly stolen from ip_conntrack_proto_udp.c */
+#define GRE_TIMEOUT		(30*HZ)
+#define GRE_STREAM_TIMEOUT	(180*HZ)
+
+#if 0
+#define DEBUGP(format, args...)	printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
+#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
+			NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
+			NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
+#else
+#define DEBUGP(x, args...)
+#define DUMP_TUPLE_GRE(x)
+#endif
+				
+/* GRE KEYMAP HANDLING FUNCTIONS */
+static LIST_HEAD(gre_keymap_list);
+
+static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
+				const struct ip_conntrack_tuple *t)
+{
+	return ((km->tuple.src.ip == t->src.ip) &&
+		(km->tuple.dst.ip == t->dst.ip) &&
+		(km->tuple.dst.protonum == t->dst.protonum) &&
+		(km->tuple.dst.u.all == t->dst.u.all));
+}
+
+/* look up the source key for a given tuple */
+static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
+{
+	struct ip_ct_gre_keymap *km;
+	u_int32_t key = 0;
+
+	read_lock_bh(&ip_ct_gre_lock);
+	km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
+			struct ip_ct_gre_keymap *, t);
+	if (km)
+		key = km->tuple.src.u.gre.key;
+	read_unlock_bh(&ip_ct_gre_lock);
+	
+	DEBUGP("lookup src key 0x%x up key for ", key);
+	DUMP_TUPLE_GRE(t);
+
+	return key;
+}
+
+/* add a single keymap entry, associate with specified master ct */
+int
+ip_ct_gre_keymap_add(struct ip_conntrack *ct,
+		     struct ip_conntrack_tuple *t, int reply)
+{
+	struct ip_ct_gre_keymap **exist_km, *km, *old;
+
+	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
+		DEBUGP("refusing to add GRE keymap to non-pptp session\n");
+		return -1;
+	}
+
+	if (!reply) 
+		exist_km = &ct->help.ct_pptp_info.keymap_orig;
+	else
+		exist_km = &ct->help.ct_pptp_info.keymap_reply;
+
+	if (*exist_km) {
+		/* check whether it's a retransmission */
+		old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
+				struct ip_ct_gre_keymap *, t);
+		if (old == *exist_km) {
+			DEBUGP("retransmission\n");
+			return 0;
+		}
+
+		DEBUGP("trying to override keymap_%s for ct %p\n", 
+			reply? "reply":"orig", ct);
+		return -EEXIST;
+	}
+
+	km = kmalloc(sizeof(*km), GFP_ATOMIC);
+	if (!km)
+		return -ENOMEM;
+
+	memcpy(&km->tuple, t, sizeof(*t));
+	*exist_km = km;
+
+	DEBUGP("adding new entry %p: ", km);
+	DUMP_TUPLE_GRE(&km->tuple);
+
+	write_lock_bh(&ip_ct_gre_lock);
+	list_append(&gre_keymap_list, km);
+	write_unlock_bh(&ip_ct_gre_lock);
+
+	return 0;
+}
+
+/* destroy the keymap entries associated with specified master ct */
+void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
+{
+	DEBUGP("entering for ct %p\n", ct);
+
+	if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
+		DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
+		return;
+	}
+
+	write_lock_bh(&ip_ct_gre_lock);
+	if (ct->help.ct_pptp_info.keymap_orig) {
+		DEBUGP("removing %p from list\n", 
+			ct->help.ct_pptp_info.keymap_orig);
+		list_del(&ct->help.ct_pptp_info.keymap_orig->list);
+		kfree(ct->help.ct_pptp_info.keymap_orig);
+		ct->help.ct_pptp_info.keymap_orig = NULL;
+	}
+	if (ct->help.ct_pptp_info.keymap_reply) {
+		DEBUGP("removing %p from list\n",
+			ct->help.ct_pptp_info.keymap_reply);
+		list_del(&ct->help.ct_pptp_info.keymap_reply->list);
+		kfree(ct->help.ct_pptp_info.keymap_reply);
+		ct->help.ct_pptp_info.keymap_reply = NULL;
+	}
+	write_unlock_bh(&ip_ct_gre_lock);
+}
+
+
+/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
+
+/* invert gre part of tuple */
+static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
+			    const struct ip_conntrack_tuple *orig)
+{
+	tuple->dst.u.gre.key = orig->src.u.gre.key;
+	tuple->src.u.gre.key = orig->dst.u.gre.key;
+
+	return 1;
+}
+
+/* gre hdr info to tuple */
+static int gre_pkt_to_tuple(const struct sk_buff *skb,
+			   unsigned int dataoff,
+			   struct ip_conntrack_tuple *tuple)
+{
+	struct gre_hdr_pptp _pgrehdr, *pgrehdr;
+	u_int32_t srckey;
+	struct gre_hdr _grehdr, *grehdr;
+
+	/* first only delinearize old RFC1701 GRE header */
+	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
+	if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+		/* try to behave like "ip_conntrack_proto_generic" */
+		tuple->src.u.all = 0;
+		tuple->dst.u.all = 0;
+		return 1;
+	}
+
+	/* PPTP header is variable length, only need up to the call_id field */
+	pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
+	if (!pgrehdr)
+		return 1;
+
+	if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
+		DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
+		return 0;
+	}
+
+	tuple->dst.u.gre.key = pgrehdr->call_id;
+	srckey = gre_keymap_lookup(tuple);
+	tuple->src.u.gre.key = srckey;
+
+	return 1;
+}
+
+/* print gre part of tuple */
+static int gre_print_tuple(struct seq_file *s,
+			   const struct ip_conntrack_tuple *tuple)
+{
+	return seq_printf(s, "srckey=0x%x dstkey=0x%x ", 
+			  ntohs(tuple->src.u.gre.key),
+			  ntohs(tuple->dst.u.gre.key));
+}
+
+/* print private data for conntrack */
+static int gre_print_conntrack(struct seq_file *s,
+			       const struct ip_conntrack *ct)
+{
+	return seq_printf(s, "timeout=%u, stream_timeout=%u ",
+			  (ct->proto.gre.timeout / HZ),
+			  (ct->proto.gre.stream_timeout / HZ));
+}
+
+/* Returns verdict for packet, and may modify conntrack */
+static int gre_packet(struct ip_conntrack *ct,
+		      const struct sk_buff *skb,
+		      enum ip_conntrack_info conntrackinfo)
+{
+	/* If we've seen traffic both ways, this is a GRE connection.
+	 * Extend timeout. */
+	if (ct->status & IPS_SEEN_REPLY) {
+		ip_ct_refresh_acct(ct, conntrackinfo, skb,
+				   ct->proto.gre.stream_timeout);
+		/* Also, more likely to be important, and not a probe. */
+		set_bit(IPS_ASSURED_BIT, &ct->status);
+	} else
+		ip_ct_refresh_acct(ct, conntrackinfo, skb,
+				   ct->proto.gre.timeout);
+	
+	return NF_ACCEPT;
+}
+
+/* Called when a new connection for this protocol found. */
+static int gre_new(struct ip_conntrack *ct,
+		   const struct sk_buff *skb)
+{ 
+	DEBUGP(": ");
+	DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+
+	/* initialize to sane value.  Ideally a conntrack helper
+	 * (e.g. in case of pptp) is increasing them */
+	ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
+	ct->proto.gre.timeout = GRE_TIMEOUT;
+
+	return 1;
+}
+
+/* Called when a conntrack entry has already been removed from the hashes
+ * and is about to be deleted from memory */
+static void gre_destroy(struct ip_conntrack *ct)
+{
+	struct ip_conntrack *master = ct->master;
+	DEBUGP(" entering\n");
+
+	if (!master)
+		DEBUGP("no master !?!\n");
+	else
+		ip_ct_gre_keymap_destroy(master);
+}
+
+/* protocol helper struct */
+static struct ip_conntrack_protocol gre = { 
+	.proto		 = IPPROTO_GRE,
+	.name		 = "gre", 
+	.pkt_to_tuple	 = gre_pkt_to_tuple,
+	.invert_tuple	 = gre_invert_tuple,
+	.print_tuple	 = gre_print_tuple,
+	.print_conntrack = gre_print_conntrack,
+	.packet		 = gre_packet,
+	.new		 = gre_new,
+	.destroy	 = gre_destroy,
+	.me 		 = THIS_MODULE,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+	.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
+#endif
+};
+
+/* ip_conntrack_proto_gre initialization */
+int __init ip_ct_proto_gre_init(void)
+{
+	return ip_conntrack_protocol_register(&gre);
+}
+
+void __exit ip_ct_proto_gre_fini(void)
+{
+	struct list_head *pos, *n;
+
+	/* delete all keymap entries */
+	write_lock_bh(&ip_ct_gre_lock);
+	list_for_each_safe(pos, n, &gre_keymap_list) {
+		DEBUGP("deleting keymap %p at module unload time\n", pos);
+		list_del(pos);
+		kfree(pos);
+	}
+	write_unlock_bh(&ip_ct_gre_lock);
+
+	ip_conntrack_protocol_unregister(&gre); 
+}
+
+EXPORT_SYMBOL(ip_ct_gre_keymap_add);
+EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index ae3e3e655db..dd476b191f4 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -989,15 +989,15 @@ EXPORT_SYMBOL(need_ip_conntrack);
 EXPORT_SYMBOL(ip_conntrack_helper_register);
 EXPORT_SYMBOL(ip_conntrack_helper_unregister);
 EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(ip_ct_refresh_acct);
+EXPORT_SYMBOL(__ip_ct_refresh_acct);
 
 EXPORT_SYMBOL(ip_conntrack_expect_alloc);
 EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
+EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_unexpect_related);
 EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
 EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
 
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 1adedb743f6..c3ea891d38e 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
+EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
 #endif
 
 int __init ip_nat_init(void)
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
new file mode 100644
index 00000000000..3cdd0684d30
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c
@@ -0,0 +1,401 @@
+/*
+ * ip_nat_pptp.c	- Version 3.0
+ *
+ * NAT support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft.  PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702.  Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * TODO: - NAT to a unique tuple, not to TCP source port
+ * 	   (needs netfilter tuple reservation)
+ *
+ * Changes:
+ *     2002-02-10 - Version 1.3
+ *       - Use ip_nat_mangle_tcp_packet() because of cloned skb's
+ *	   in local connections (Philip Craig <philipc@snapgear.com>)
+ *       - add checks for magicCookie and pptp version
+ *       - make argument list of pptp_{out,in}bound_packet() shorter
+ *       - move to C99 style initializers
+ *       - print version number at module loadtime
+ *     2003-09-22 - Version 1.5
+ *       - use SNATed tcp sourceport as callid, since we get called before
+ *	   TCP header is mangled (Philip Craig <philipc@snapgear.com>)
+ *     2004-10-22 - Version 2.0
+ *       - kernel 2.6.x version
+ *     2005-06-10 - Version 3.0
+ *       - kernel >= 2.6.11 version,
+ *	   funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
+ * 
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/netfilter_ipv4/ip_nat_pptp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
+
+#define IP_NAT_PPTP_VERSION "3.0"
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
+
+
+#if 0
+extern const char *pptp_msg_name[];
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+				       __FUNCTION__, ## args)
+#else
+#define DEBUGP(format, args...)
+#endif
+
+static void pptp_nat_expected(struct ip_conntrack *ct,
+			      struct ip_conntrack_expect *exp)
+{
+	struct ip_conntrack *master = ct->master;
+	struct ip_conntrack_expect *other_exp;
+	struct ip_conntrack_tuple t;
+	struct ip_ct_pptp_master *ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info;
+
+	ct_pptp_info = &master->help.ct_pptp_info;
+	nat_pptp_info = &master->nat.help.nat_pptp_info;
+
+	/* And here goes the grand finale of corrosion... */
+
+	if (exp->dir == IP_CT_DIR_ORIGINAL) {
+		DEBUGP("we are PNS->PAC\n");
+		/* therefore, build tuple for PAC->PNS */
+		t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+		t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
+		t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+		t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
+		t.dst.protonum = IPPROTO_GRE;
+	} else {
+		DEBUGP("we are PAC->PNS\n");
+		/* build tuple for PNS->PAC */
+		t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+		t.src.u.gre.key = 
+			htons(master->nat.help.nat_pptp_info.pns_call_id);
+		t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
+		t.dst.u.gre.key = 
+			htons(master->nat.help.nat_pptp_info.pac_call_id);
+		t.dst.protonum = IPPROTO_GRE;
+	}
+
+	DEBUGP("trying to unexpect other dir: ");
+	DUMP_TUPLE(&t);
+	other_exp = ip_conntrack_expect_find(&t);
+	if (other_exp) {
+		ip_conntrack_unexpect_related(other_exp);
+		ip_conntrack_expect_put(other_exp);
+		DEBUGP("success\n");
+	} else {
+		DEBUGP("not found!\n");
+	}
+
+	ip_nat_follow_master(ct, exp);
+}
+
+/* outbound packets == from PNS to PAC */
+static int
+pptp_outbound_pkt(struct sk_buff **pskb,
+		  struct ip_conntrack *ct,
+		  enum ip_conntrack_info ctinfo,
+		  struct PptpControlHeader *ctlh,
+		  union pptp_ctrl_union *pptpReq)
+
+{
+	struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
+
+	u_int16_t msg, *cid = NULL, new_callid;
+
+	new_callid = htons(ct_pptp_info->pns_call_id);
+	
+	switch (msg = ntohs(ctlh->messageType)) {
+		case PPTP_OUT_CALL_REQUEST:
+			cid = &pptpReq->ocreq.callID;
+			/* FIXME: ideally we would want to reserve a call ID
+			 * here.  current netfilter NAT core is not able to do
+			 * this :( For now we use TCP source port. This breaks
+			 * multiple calls within one control session */
+
+			/* save original call ID in nat_info */
+			nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+			/* don't use tcph->source since we are at a DSTmanip
+			 * hook (e.g. PREROUTING) and pkt is not mangled yet */
+			new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+			/* save new call ID in ct info */
+			ct_pptp_info->pns_call_id = ntohs(new_callid);
+			break;
+		case PPTP_IN_CALL_REPLY:
+			cid = &pptpReq->icreq.callID;
+			break;
+		case PPTP_CALL_CLEAR_REQUEST:
+			cid = &pptpReq->clrreq.callID;
+			break;
+		default:
+			DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
+			      (msg <= PPTP_MSG_MAX)? 
+			      pptp_msg_name[msg]:pptp_msg_name[0]);
+			/* fall through */
+
+		case PPTP_SET_LINK_INFO:
+			/* only need to NAT in case PAC is behind NAT box */
+		case PPTP_START_SESSION_REQUEST:
+		case PPTP_START_SESSION_REPLY:
+		case PPTP_STOP_SESSION_REQUEST:
+		case PPTP_STOP_SESSION_REPLY:
+		case PPTP_ECHO_REQUEST:
+		case PPTP_ECHO_REPLY:
+			/* no need to alter packet */
+			return NF_ACCEPT;
+	}
+
+	/* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
+	 * down to here */
+
+	IP_NF_ASSERT(cid);
+
+	DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+		ntohs(*cid), ntohs(new_callid));
+
+	/* mangle packet */
+	if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
+		(void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+				 	sizeof(new_callid), 
+					(char *)&new_callid,
+				 	sizeof(new_callid)) == 0)
+		return NF_DROP;
+
+	return NF_ACCEPT;
+}
+
+static int
+pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
+	     struct ip_conntrack_expect *expect_reply)
+{
+	struct ip_ct_pptp_master *ct_pptp_info = 
+				&expect_orig->master->help.ct_pptp_info;
+	struct ip_nat_pptp *nat_pptp_info = 
+				&expect_orig->master->nat.help.nat_pptp_info;
+
+	struct ip_conntrack *ct = expect_orig->master;
+
+	struct ip_conntrack_tuple inv_t;
+	struct ip_conntrack_tuple *orig_t, *reply_t;
+
+	/* save original PAC call ID in nat_info */
+	nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
+
+	/* alter expectation */
+	orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+	reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	/* alter expectation for PNS->PAC direction */
+	invert_tuplepr(&inv_t, &expect_orig->tuple);
+	expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id);
+	expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
+	expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+	inv_t.src.ip = reply_t->src.ip;
+	inv_t.dst.ip = reply_t->dst.ip;
+	inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
+	inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+
+	if (!ip_conntrack_expect_related(expect_orig)) {
+		DEBUGP("successfully registered expect\n");
+	} else {
+		DEBUGP("can't expect_related(expect_orig)\n");
+		return 1;
+	}
+
+	/* alter expectation for PAC->PNS direction */
+	invert_tuplepr(&inv_t, &expect_reply->tuple);
+	expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
+	expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
+	expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
+	inv_t.src.ip = orig_t->src.ip;
+	inv_t.dst.ip = orig_t->dst.ip;
+	inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
+	inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
+
+	if (!ip_conntrack_expect_related(expect_reply)) {
+		DEBUGP("successfully registered expect\n");
+	} else {
+		DEBUGP("can't expect_related(expect_reply)\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		return 1;
+	}
+
+	if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
+		DEBUGP("can't register original keymap\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		ip_conntrack_unexpect_related(expect_reply);
+		return 1;
+	}
+
+	if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
+		DEBUGP("can't register reply keymap\n");
+		ip_conntrack_unexpect_related(expect_orig);
+		ip_conntrack_unexpect_related(expect_reply);
+		ip_ct_gre_keymap_destroy(ct);
+		return 1;
+	}
+
+	return 0;
+}
+
+/* inbound packets == from PAC to PNS */
+static int
+pptp_inbound_pkt(struct sk_buff **pskb,
+		 struct ip_conntrack *ct,
+		 enum ip_conntrack_info ctinfo,
+		 struct PptpControlHeader *ctlh,
+		 union pptp_ctrl_union *pptpReq)
+{
+	struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
+	u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL;
+
+	int ret = NF_ACCEPT, rv;
+
+	new_pcid = htons(nat_pptp_info->pns_call_id);
+
+	switch (msg = ntohs(ctlh->messageType)) {
+	case PPTP_OUT_CALL_REPLY:
+		pcid = &pptpReq->ocack.peersCallID;	
+		cid = &pptpReq->ocack.callID;
+		break;
+	case PPTP_IN_CALL_CONNECT:
+		pcid = &pptpReq->iccon.peersCallID;
+		break;
+	case PPTP_IN_CALL_REQUEST:
+		/* only need to nat in case PAC is behind NAT box */
+		break;
+	case PPTP_WAN_ERROR_NOTIFY:
+		pcid = &pptpReq->wanerr.peersCallID;
+		break;
+	case PPTP_CALL_DISCONNECT_NOTIFY:
+		pcid = &pptpReq->disc.callID;
+		break;
+	case PPTP_SET_LINK_INFO:
+		pcid = &pptpReq->setlink.peersCallID;
+		break;
+
+	default:
+		DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? 
+			pptp_msg_name[msg]:pptp_msg_name[0]);
+		/* fall through */
+
+	case PPTP_START_SESSION_REQUEST:
+	case PPTP_START_SESSION_REPLY:
+	case PPTP_STOP_SESSION_REQUEST:
+	case PPTP_STOP_SESSION_REPLY:
+	case PPTP_ECHO_REQUEST:
+	case PPTP_ECHO_REPLY:
+		/* no need to alter packet */
+		return NF_ACCEPT;
+	}
+
+	/* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
+	 * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
+
+	/* mangle packet */
+	IP_NF_ASSERT(pcid);
+	DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
+		ntohs(*pcid), ntohs(new_pcid));
+	
+	rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
+				      (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)),
+				      sizeof(new_pcid), (char *)&new_pcid, 
+				      sizeof(new_pcid));
+	if (rv != NF_ACCEPT) 
+		return rv;
+
+	if (new_cid) {
+		IP_NF_ASSERT(cid);
+		DEBUGP("altering call id from 0x%04x to 0x%04x\n",
+			ntohs(*cid), ntohs(new_cid));
+		rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, 
+					      (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), 
+					      sizeof(new_cid),
+					      (char *)&new_cid, 
+					      sizeof(new_cid));
+		if (rv != NF_ACCEPT)
+			return rv;
+	}
+
+	/* check for earlier return value of 'switch' above */
+	if (ret != NF_ACCEPT)
+		return ret;
+
+	/* great, at least we don't need to resize packets */
+	return NF_ACCEPT;
+}
+
+
+extern int __init ip_nat_proto_gre_init(void);
+extern void __exit ip_nat_proto_gre_fini(void);
+
+static int __init init(void)
+{
+	int ret;
+
+	DEBUGP("%s: registering NAT helper\n", __FILE__);
+
+	ret = ip_nat_proto_gre_init();
+	if (ret < 0)
+		return ret;
+
+	BUG_ON(ip_nat_pptp_hook_outbound);
+	ip_nat_pptp_hook_outbound = &pptp_outbound_pkt;
+
+	BUG_ON(ip_nat_pptp_hook_inbound);
+	ip_nat_pptp_hook_inbound = &pptp_inbound_pkt;
+
+	BUG_ON(ip_nat_pptp_hook_exp_gre);
+	ip_nat_pptp_hook_exp_gre = &pptp_exp_gre;
+
+	BUG_ON(ip_nat_pptp_hook_expectfn);
+	ip_nat_pptp_hook_expectfn = &pptp_nat_expected;
+
+	printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
+	return 0;
+}
+
+static void __exit fini(void)
+{
+	DEBUGP("cleanup_module\n" );
+
+	ip_nat_pptp_hook_expectfn = NULL;
+	ip_nat_pptp_hook_exp_gre = NULL;
+	ip_nat_pptp_hook_inbound = NULL;
+	ip_nat_pptp_hook_outbound = NULL;
+
+	ip_nat_proto_gre_fini();
+	/* Make sure noone calls it, meanwhile */
+	synchronize_net();
+
+	printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
+}
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
new file mode 100644
index 00000000000..7c128540167
--- /dev/null
+++ b/net/ipv4/netfilter/ip_nat_proto_gre.c
@@ -0,0 +1,214 @@
+/*
+ * ip_nat_proto_gre.c - Version 2.0
+ *
+ * NAT protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two 
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_nat_rule.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+
+#if 0
+#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
+				       __FUNCTION__, ## args)
+#else
+#define DEBUGP(x, args...)
+#endif
+
+/* is key in given range between min and max */
+static int
+gre_in_range(const struct ip_conntrack_tuple *tuple,
+	     enum ip_nat_manip_type maniptype,
+	     const union ip_conntrack_manip_proto *min,
+	     const union ip_conntrack_manip_proto *max)
+{
+	u_int32_t key;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		key = tuple->src.u.gre.key;
+	else
+		key = tuple->dst.u.gre.key;
+
+	return ntohl(key) >= ntohl(min->gre.key)
+		&& ntohl(key) <= ntohl(max->gre.key);
+}
+
+/* generate unique tuple ... */
+static int 
+gre_unique_tuple(struct ip_conntrack_tuple *tuple,
+		 const struct ip_nat_range *range,
+		 enum ip_nat_manip_type maniptype,
+		 const struct ip_conntrack *conntrack)
+{
+	static u_int16_t key;
+	u_int16_t *keyptr;
+	unsigned int min, i, range_size;
+
+	if (maniptype == IP_NAT_MANIP_SRC)
+		keyptr = &tuple->src.u.gre.key;
+	else
+		keyptr = &tuple->dst.u.gre.key;
+
+	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+		DEBUGP("%p: NATing GRE PPTP\n", conntrack);
+		min = 1;
+		range_size = 0xffff;
+	} else {
+		min = ntohl(range->min.gre.key);
+		range_size = ntohl(range->max.gre.key) - min + 1;
+	}
+
+	DEBUGP("min = %u, range_size = %u\n", min, range_size); 
+
+	for (i = 0; i < range_size; i++, key++) {
+		*keyptr = htonl(min + key % range_size);
+		if (!ip_nat_used_tuple(tuple, conntrack))
+			return 1;
+	}
+
+	DEBUGP("%p: no NAT mapping\n", conntrack);
+
+	return 0;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static int
+gre_manip_pkt(struct sk_buff **pskb,
+	      unsigned int iphdroff,
+	      const struct ip_conntrack_tuple *tuple,
+	      enum ip_nat_manip_type maniptype)
+{
+	struct gre_hdr *greh;
+	struct gre_hdr_pptp *pgreh;
+	struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
+	unsigned int hdroff = iphdroff + iph->ihl*4;
+
+	/* pgreh includes two optional 32bit fields which are not required
+	 * to be there.  That's where the magic '8' comes from */
+	if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
+		return 0;
+
+	greh = (void *)(*pskb)->data + hdroff;
+	pgreh = (struct gre_hdr_pptp *) greh;
+
+	/* we only have destination manip of a packet, since 'source key' 
+	 * is not present in the packet itself */
+	if (maniptype == IP_NAT_MANIP_DST) {
+		/* key manipulation is always dest */
+		switch (greh->version) {
+		case 0:
+			if (!greh->key) {
+				DEBUGP("can't nat GRE w/o key\n");
+				break;
+			}
+			if (greh->csum) {
+				/* FIXME: Never tested this code... */
+				*(gre_csum(greh)) = 
+					ip_nat_cheat_check(~*(gre_key(greh)),
+							tuple->dst.u.gre.key,
+							*(gre_csum(greh)));
+			}
+			*(gre_key(greh)) = tuple->dst.u.gre.key;
+			break;
+		case GRE_VERSION_PPTP:
+			DEBUGP("call_id -> 0x%04x\n", 
+				ntohl(tuple->dst.u.gre.key));
+			pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key));
+			break;
+		default:
+			DEBUGP("can't nat unknown GRE version\n");
+			return 0;
+			break;
+		}
+	}
+	return 1;
+}
+
+/* print out a nat tuple */
+static unsigned int 
+gre_print(char *buffer, 
+	  const struct ip_conntrack_tuple *match,
+	  const struct ip_conntrack_tuple *mask)
+{
+	unsigned int len = 0;
+
+	if (mask->src.u.gre.key)
+		len += sprintf(buffer + len, "srckey=0x%x ", 
+				ntohl(match->src.u.gre.key));
+
+	if (mask->dst.u.gre.key)
+		len += sprintf(buffer + len, "dstkey=0x%x ",
+				ntohl(match->src.u.gre.key));
+
+	return len;
+}
+
+/* print a range of keys */
+static unsigned int 
+gre_print_range(char *buffer, const struct ip_nat_range *range)
+{
+	if (range->min.gre.key != 0 
+	    || range->max.gre.key != 0xFFFF) {
+		if (range->min.gre.key == range->max.gre.key)
+			return sprintf(buffer, "key 0x%x ",
+					ntohl(range->min.gre.key));
+		else
+			return sprintf(buffer, "keys 0x%u-0x%u ",
+					ntohl(range->min.gre.key),
+					ntohl(range->max.gre.key));
+	} else
+		return 0;
+}
+
+/* nat helper struct */
+static struct ip_nat_protocol gre = { 
+	.name		= "GRE", 
+	.protonum	= IPPROTO_GRE,
+	.manip_pkt	= gre_manip_pkt,
+	.in_range	= gre_in_range,
+	.unique_tuple	= gre_unique_tuple,
+	.print		= gre_print,
+	.print_range	= gre_print_range,
+#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
+    defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
+	.range_to_nfattr	= ip_nat_port_range_to_nfattr,
+	.nfattr_to_range	= ip_nat_port_nfattr_to_range,
+#endif
+};
+				  
+int __init ip_nat_proto_gre_init(void)
+{
+	return ip_nat_protocol_register(&gre);
+}
+
+void __exit ip_nat_proto_gre_fini(void)
+{
+	ip_nat_protocol_unregister(&gre);
+}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 304bb0a1d4f..4b0d7e4d626 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 
 			if (type && code) {
 				get_user(fl->fl_icmp_type, type);
-				__get_user(fl->fl_icmp_code, code);
+			        get_user(fl->fl_icmp_code, code);
 				probed = 1;
 			}
 			break;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a88db28b0af..b1a63b2c6b4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->frto_counter = 0;
 		newtp->frto_highmark = 0;
 
-		newicsk->icsk_ca_ops = &tcp_reno;
+		newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
 
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b018e31b653..d6e3d269e90 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -461,9 +461,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 	flags = TCP_SKB_CB(skb)->flags;
 	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
-	TCP_SKB_CB(buff)->sacked =
-		(TCP_SKB_CB(skb)->sacked &
-		 (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
+	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 	TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
 
 	if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) {
@@ -501,11 +499,26 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
 			tcp_skb_pcount(buff);
 
 		tp->packets_out -= diff;
+
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+			tp->sacked_out -= diff;
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
+			tp->retrans_out -= diff;
+
 		if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
 			tp->lost_out -= diff;
 			tp->left_out -= diff;
 		}
+
 		if (diff > 0) {
+			/* Adjust Reno SACK estimate. */
+			if (!tp->rx_opt.sack_ok) {
+				tp->sacked_out -= diff;
+				if ((int)tp->sacked_out < 0)
+					tp->sacked_out = 0;
+				tcp_sync_left_out(tp);
+			}
+
 			tp->fackets_out -= diff;
 			if ((int)tp->fackets_out < 0)
 				tp->fackets_out = 0;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1cb8adb2787..2da514b16d9 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1955,6 +1955,57 @@ static void __exit fini(void)
 #endif
 }
 
+/*
+ * find specified header up to transport protocol header.
+ * If found target header, the offset to the header is set to *offset
+ * and return 0. otherwise, return -1.
+ *
+ * Notes: - non-1st Fragment Header isn't skipped.
+ *	  - ESP header isn't skipped.
+ *	  - The target header may be trancated.
+ */
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
+{
+	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
+	u8 nexthdr = skb->nh.ipv6h->nexthdr;
+	unsigned int len = skb->len - start;
+
+	while (nexthdr != target) {
+		struct ipv6_opt_hdr _hdr, *hp;
+		unsigned int hdrlen;
+
+		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
+			return -1;
+		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+		if (hp == NULL)
+			return -1;
+		if (nexthdr == NEXTHDR_FRAGMENT) {
+			unsigned short _frag_off, *fp;
+			fp = skb_header_pointer(skb,
+						start+offsetof(struct frag_hdr,
+							       frag_off),
+						sizeof(_frag_off),
+						&_frag_off);
+			if (fp == NULL)
+				return -1;
+
+			if (ntohs(*fp) & ~0x7)
+				return -1;
+			hdrlen = 8;
+		} else if (nexthdr == NEXTHDR_AUTH)
+			hdrlen = (hp->hdrlen + 2) << 2; 
+		else
+			hdrlen = ipv6_optlen(hp); 
+
+		nexthdr = hp->nexthdr;
+		len -= hdrlen;
+		start += hdrlen;
+	}
+
+	*offset = start;
+	return 0;
+}
+
 EXPORT_SYMBOL(ip6t_register_table);
 EXPORT_SYMBOL(ip6t_unregister_table);
 EXPORT_SYMBOL(ip6t_do_table);
@@ -1963,6 +2014,7 @@ EXPORT_SYMBOL(ip6t_unregister_match);
 EXPORT_SYMBOL(ip6t_register_target);
 EXPORT_SYMBOL(ip6t_unregister_target);
 EXPORT_SYMBOL(ip6t_ext_hdr);
+EXPORT_SYMBOL(ipv6_find_hdr);
 
 module_init(init);
 module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index d5b94f142bb..dde37793d20 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -48,92 +48,21 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-	struct ip_auth_hdr *ah = NULL, _ah;
+	struct ip_auth_hdr *ah, _ah;
 	const struct ip6t_ah *ahinfo = matchinfo;
-	unsigned int temp;
-	int len;
-	u8 nexthdr;
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 
-	/*DEBUGP("IPv6 AH entered\n");*/
-	/* if (opt->auth == 0) return 0;
-	* It does not filled on output */
-
-	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
-	/* pointer to the 1st exthdr */
-	ptr = sizeof(struct ipv6hdr);
-	/* available length */
-	len = skb->len - ptr;
-	temp = 0;
-
-	while (ip6t_ext_hdr(nexthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
-
-		DEBUGP("ipv6_ah header iteration \n");
-
-		/* Is there enough space for the next ext header? */
-		if (len < sizeof(struct ipv6_opt_hdr))
-			return 0;
-		/* No more exthdr -> evaluate */
-		if (nexthdr == NEXTHDR_NONE)
-			break;
-		/* ESP -> evaluate */
-		if (nexthdr == NEXTHDR_ESP)
-			break;
-
-		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		BUG_ON(hp == NULL);
-
-		/* Calculate the header length */
-		if (nexthdr == NEXTHDR_FRAGMENT)
-			hdrlen = 8;
-		else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen+2)<<2;
-		else
-			hdrlen = ipv6_optlen(hp);
-
-		/* AH -> evaluate */
-		if (nexthdr == NEXTHDR_AUTH) {
-			temp |= MASK_AH;
-			break;
-		}
-
-		
-		/* set the flag */
-		switch (nexthdr) {
-		case NEXTHDR_HOP:
-		case NEXTHDR_ROUTING:
-		case NEXTHDR_FRAGMENT:
-		case NEXTHDR_AUTH:
-		case NEXTHDR_DEST:
-			break;
-		default:
-			DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr);
-			return 0;
-		}
-
-		nexthdr = hp->nexthdr;
-		len -= hdrlen;
-		ptr += hdrlen;
-		if (ptr > skb->len) {
-			DEBUGP("ipv6_ah: new pointer too large! \n");
-			break;
-		}
-	}
-
-	/* AH header not found */
-	if (temp != MASK_AH)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
 		return 0;
 
-	if (len < sizeof(struct ip_auth_hdr)){
+	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
+	if (ah == NULL) {
 		*hotdrop = 1;
 		return 0;
 	}
 
-	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
-	BUG_ON(ah == NULL);
+	hdrlen = (ah->hdrlen + 2) << 2;
 
 	DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen);
 	DEBUGP("RES %04X ", ah->reserved);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index 540925e4a7a..c450a635e54 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb,
        struct ipv6_opt_hdr _optsh, *oh;
        const struct ip6t_opts *optinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb,
        u8 _optlen, *lp = NULL;
        unsigned int optlen;
        
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_opts header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* OPTS -> evaluate */
 #if HOPBYHOP
-                if (nexthdr == NEXTHDR_HOP) {
-                     temp |= MASK_HOPOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
 #else
-                if (nexthdr == NEXTHDR_DEST) {
-                     temp |= MASK_DSTOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
 #endif
-                     break;
-              }
-
+		return 0;
 
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_opts: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* OPTIONS header not found */
-#if HOPBYHOP
-       if ( temp != MASK_HOPOPTS ) return 0;
-#else
-       if ( temp != MASK_DSTOPTS ) return 0;
-#endif
-
-       if (len < (int)sizeof(struct ipv6_opt_hdr)){
+       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+       if (oh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(oh);
+       if (skb->len - ptr < hdrlen){
 	       /* Packet smaller than it's length field */
        		return 0;
        }
 
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       BUG_ON(oh == NULL);
-
        DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
 
        DEBUGP("len %02X %04X %02X ",
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index e39dd236fd8..24bc0cde43a 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -48,87 +48,22 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-	struct ip_esp_hdr _esp, *eh = NULL;
+	struct ip_esp_hdr _esp, *eh;
 	const struct ip6t_esp *espinfo = matchinfo;
-	unsigned int temp;
-	int len;
-	u8 nexthdr;
 	unsigned int ptr;
 
 	/* Make sure this isn't an evil packet */
 	/*DEBUGP("ipv6_esp entered \n");*/
 
-	/* type of the 1st exthdr */
-	nexthdr = skb->nh.ipv6h->nexthdr;
-	/* pointer to the 1st exthdr */
-	ptr = sizeof(struct ipv6hdr);
-	/* available length */
-	len = skb->len - ptr;
-	temp = 0;
-
-	while (ip6t_ext_hdr(nexthdr)) {
-		struct ipv6_opt_hdr _hdr, *hp;
-		int hdrlen;
-
-		DEBUGP("ipv6_esp header iteration \n");
-
-		/* Is there enough space for the next ext header? */
-		if (len < sizeof(struct ipv6_opt_hdr))
-			return 0;
-		/* No more exthdr -> evaluate */
-		if (nexthdr == NEXTHDR_NONE)
-			break;
-		/* ESP -> evaluate */
-		if (nexthdr == NEXTHDR_ESP) {
-			temp |= MASK_ESP;
-			break;
-		}
-
-		hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-		BUG_ON(hp == NULL);
-
-		/* Calculate the header length */
-		if (nexthdr == NEXTHDR_FRAGMENT)
-			hdrlen = 8;
-		else if (nexthdr == NEXTHDR_AUTH)
-			hdrlen = (hp->hdrlen+2)<<2;
-		else
-			hdrlen = ipv6_optlen(hp);
-
-		/* set the flag */
-		switch (nexthdr) {
-		case NEXTHDR_HOP:
-		case NEXTHDR_ROUTING:
-		case NEXTHDR_FRAGMENT:
-		case NEXTHDR_AUTH:
-		case NEXTHDR_DEST:
-			break;
-		default:
-			DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr);
-			return 0;
-		}
-
-		nexthdr = hp->nexthdr;
-		len -= hdrlen;
-		ptr += hdrlen;
-		if (ptr > skb->len) {
-			DEBUGP("ipv6_esp: new pointer too large! \n");
-			break;
-		}
-	}
-
-	/* ESP header not found */
-	if (temp != MASK_ESP)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
 		return 0;
 
-	if (len < sizeof(struct ip_esp_hdr)) {
+	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
+	if (eh == NULL) {
 		*hotdrop = 1;
 		return 0;
 	}
 
-	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
-	BUG_ON(eh == NULL);
-
 	DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi));
 
 	return (eh != NULL)
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 4bfa30a9bc8..085d5f8eea2 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -48,90 +48,18 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct frag_hdr _frag, *fh = NULL;
+       struct frag_hdr _frag, *fh;
        const struct ip6t_frag *fraginfo = matchinfo;
-       unsigned int temp;
-       int len;
-       u8 nexthdr;
        unsigned int ptr;
-       unsigned int hdrlen = 0;
-
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_frag header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* FRAG -> evaluate */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                     temp |= MASK_FRAGMENT;
-                     break;
-              }
-
-
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_frag: new pointer too large! \n");
-			break;
-		}
-        }
-
-       /* FRAG header not found */
-       if ( temp != MASK_FRAGMENT ) return 0;
-
-       if (len < sizeof(struct frag_hdr)){
-	       *hotdrop = 1;
-       		return 0;
-       }
 
-       fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
-       BUG_ON(fh == NULL);
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
+		return 0;
+
+	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
+	if (fh == NULL){
+		*hotdrop = 1;
+		return 0;
+	}
 
        DEBUGP("INFO %04X ", fh->frag_off);
        DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 27f3650d127..1d09485111d 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -63,8 +63,6 @@ match(const struct sk_buff *skb,
        struct ipv6_opt_hdr _optsh, *oh;
        const struct ip6t_opts *optinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
@@ -72,97 +70,25 @@ match(const struct sk_buff *skb,
        u8 _optlen, *lp = NULL;
        unsigned int optlen;
        
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
-
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_opts header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* OPTS -> evaluate */
 #if HOPBYHOP
-                if (nexthdr == NEXTHDR_HOP) {
-                     temp |= MASK_HOPOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
 #else
-                if (nexthdr == NEXTHDR_DEST) {
-                     temp |= MASK_DSTOPTS;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
 #endif
-                     break;
-              }
-
+		return 0;
 
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_opts: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* OPTIONS header not found */
-#if HOPBYHOP
-       if ( temp != MASK_HOPOPTS ) return 0;
-#else
-       if ( temp != MASK_DSTOPTS ) return 0;
-#endif
-
-       if (len < (int)sizeof(struct ipv6_opt_hdr)){
+       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
+       if (oh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(oh);
+       if (skb->len - ptr < hdrlen){
 	       /* Packet smaller than it's length field */
        		return 0;
        }
 
-       oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
-       BUG_ON(oh == NULL);
-
        DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen);
 
        DEBUGP("len %02X %04X %02X ",
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 2bb670037df..beb2fd5cebb 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -50,98 +50,29 @@ match(const struct sk_buff *skb,
       unsigned int protoff,
       int *hotdrop)
 {
-       struct ipv6_rt_hdr _route, *rh = NULL;
+       struct ipv6_rt_hdr _route, *rh;
        const struct ip6t_rt *rtinfo = matchinfo;
        unsigned int temp;
-       unsigned int len;
-       u8 nexthdr;
        unsigned int ptr;
        unsigned int hdrlen = 0;
        unsigned int ret = 0;
        struct in6_addr *ap, _addr;
 
-       /* type of the 1st exthdr */
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       /* pointer to the 1st exthdr */
-       ptr = sizeof(struct ipv6hdr);
-       /* available length */
-       len = skb->len - ptr;
-       temp = 0;
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
+		return 0;
 
-        while (ip6t_ext_hdr(nexthdr)) {
-               struct ipv6_opt_hdr _hdr, *hp;
-
-              DEBUGP("ipv6_rt header iteration \n");
-
-              /* Is there enough space for the next ext header? */
-                if (len < (int)sizeof(struct ipv6_opt_hdr))
-                        return 0;
-              /* No more exthdr -> evaluate */
-                if (nexthdr == NEXTHDR_NONE) {
-                     break;
-              }
-              /* ESP -> evaluate */
-                if (nexthdr == NEXTHDR_ESP) {
-                     break;
-              }
-
-	      hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-	      BUG_ON(hp == NULL);
-
-              /* Calculate the header length */
-                if (nexthdr == NEXTHDR_FRAGMENT) {
-                        hdrlen = 8;
-                } else if (nexthdr == NEXTHDR_AUTH)
-                        hdrlen = (hp->hdrlen+2)<<2;
-                else
-                        hdrlen = ipv6_optlen(hp);
-
-              /* ROUTING -> evaluate */
-                if (nexthdr == NEXTHDR_ROUTING) {
-                     temp |= MASK_ROUTING;
-                     break;
-              }
-
-
-              /* set the flag */
-              switch (nexthdr){
-                     case NEXTHDR_HOP:
-                     case NEXTHDR_ROUTING:
-                     case NEXTHDR_FRAGMENT:
-                     case NEXTHDR_AUTH:
-                     case NEXTHDR_DEST:
-                            break;
-                     default:
-                            DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr);
-                            return 0;
-                            break;
-              }
-
-                nexthdr = hp->nexthdr;
-                len -= hdrlen;
-                ptr += hdrlen;
-		if ( ptr > skb->len ) {
-			DEBUGP("ipv6_rt: new pointer is too large! \n");
-			break;
-		}
-        }
-
-       /* ROUTING header not found */
-       if ( temp != MASK_ROUTING ) return 0;
-
-       if (len < (int)sizeof(struct ipv6_rt_hdr)){
+       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
+       if (rh == NULL){
 	       *hotdrop = 1;
        		return 0;
        }
 
-       if (len < hdrlen){
+       hdrlen = ipv6_optlen(rh);
+       if (skb->len - ptr < hdrlen){
 	       /* Pcket smaller than its length field */
        		return 0;
        }
 
-       rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
-       BUG_ON(rh == NULL);
-
        DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
        DEBUGP("TYPE %04X ", rh->type);
        DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 5aa3691c578..a1265a320b1 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -627,7 +627,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
 
 			if (type && code) {
 				get_user(fl->fl_icmp_type, type);
-				__get_user(fl->fl_icmp_code, code);
+				get_user(fl->fl_icmp_code, code);
 				probed = 1;
 			}
 			break;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8690f171c1e..ee865d88183 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -36,6 +36,11 @@
  *	Michal Ostrowski        :       Module initialization cleanup.
  *         Ulises Alonso        :       Frame number limit removal and 
  *                                      packet_set_ring memory leak.
+ *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
+ *					The convention is that longer addresses
+ *					will simply extend the hardware address
+ *					byte arrays at the end of sockaddr_ll 
+ *					and packet_mreq.
  *
  *		This program is free software; you can redistribute it and/or
  *		modify it under the terms of the GNU General Public License
@@ -161,7 +166,17 @@ struct packet_mclist
 	int			count;
 	unsigned short		type;
 	unsigned short		alen;
-	unsigned char		addr[8];
+	unsigned char		addr[MAX_ADDR_LEN];
+};
+/* identical to struct packet_mreq except it has
+ * a longer address field.
+ */
+struct packet_mreq_max
+{
+	int		mr_ifindex;
+	unsigned short	mr_type;
+	unsigned short	mr_alen;
+	unsigned char	mr_address[MAX_ADDR_LEN];
 };
 #endif
 #ifdef CONFIG_PACKET_MMAP
@@ -716,6 +731,8 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 		err = -EINVAL;
 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
 			goto out;
+		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
+			goto out;
 		ifindex	= saddr->sll_ifindex;
 		proto	= saddr->sll_protocol;
 		addr	= saddr->sll_addr;
@@ -744,6 +761,12 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (dev->hard_header) {
 		int res;
 		err = -EINVAL;
+		if (saddr) {
+			if (saddr->sll_halen != dev->addr_len)
+				goto out_free;
+			if (saddr->sll_hatype != dev->type)
+				goto out_free;
+		}
 		res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
 		if (sock->type != SOCK_DGRAM) {
 			skb->tail = skb->data;
@@ -1045,6 +1068,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	int copied, err;
+	struct sockaddr_ll *sll;
 
 	err = -EINVAL;
 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1057,16 +1081,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 #endif
 
 	/*
-	 *	If the address length field is there to be filled in, we fill
-	 *	it in now.
-	 */
-
-	if (sock->type == SOCK_PACKET)
-		msg->msg_namelen = sizeof(struct sockaddr_pkt);
-	else
-		msg->msg_namelen = sizeof(struct sockaddr_ll);
-
-	/*
 	 *	Call the generic datagram receiver. This handles all sorts
 	 *	of horrible races and re-entrancy so we can forget about it
 	 *	in the protocol layers.
@@ -1087,6 +1101,17 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		goto out;
 
 	/*
+	 *	If the address length field is there to be filled in, we fill
+	 *	it in now.
+	 */
+
+	sll = (struct sockaddr_ll*)skb->cb;
+	if (sock->type == SOCK_PACKET)
+		msg->msg_namelen = sizeof(struct sockaddr_pkt);
+	else
+		msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
+
+	/*
 	 *	You lose any data beyond the buffer you gave. If it worries a
 	 *	user program they can ask the device for its MTU anyway.
 	 */
@@ -1166,7 +1191,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
 		sll->sll_halen = 0;
 	}
-	*uaddr_len = sizeof(*sll);
+	*uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
 
 	return 0;
 }
@@ -1199,7 +1224,7 @@ static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, i
 	}
 }
 
-static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
+static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 {
 	struct packet_sock *po = pkt_sk(sk);
 	struct packet_mclist *ml, *i;
@@ -1249,7 +1274,7 @@ done:
 	return err;
 }
 
-static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
+static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
 {
 	struct packet_mclist *ml, **mlp;
 
@@ -1315,11 +1340,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 	case PACKET_ADD_MEMBERSHIP:	
 	case PACKET_DROP_MEMBERSHIP:
 	{
-		struct packet_mreq mreq;
-		if (optlen<sizeof(mreq))
+		struct packet_mreq_max mreq;
+		int len = optlen;
+		memset(&mreq, 0, sizeof(mreq));
+		if (len < sizeof(struct packet_mreq))
 			return -EINVAL;
-		if (copy_from_user(&mreq,optval,sizeof(mreq)))
+		if (len > sizeof(mreq))
+			len = sizeof(mreq);
+		if (copy_from_user(&mreq,optval,len))
 			return -EFAULT;
+		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
+			return -EINVAL;
 		if (optname == PACKET_ADD_MEMBERSHIP)
 			ret = packet_mc_add(sk, &mreq);
 		else
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 86073df418f..505c7de10c5 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2414,6 +2414,17 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
 	skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t));
 	chunk->subh.shutdown_hdr = sdh;
 
+	/* API 5.3.1.5 SCTP_SHUTDOWN_EVENT
+	 * When a peer sends a SHUTDOWN, SCTP delivers this notification to
+	 * inform the application that it should cease sending data.
+	 */
+	ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC);
+	if (!ev) {
+		disposition = SCTP_DISPOSITION_NOMEM;
+		goto out;	
+	}
+	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
+
 	/* Upon the reception of the SHUTDOWN, the peer endpoint shall
 	 *  - enter the SHUTDOWN-RECEIVED state,
 	 *  - stop accepting new data from its SCTP user
@@ -2439,17 +2450,6 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep,
 	sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN,
 			SCTP_U32(chunk->subh.shutdown_hdr->cum_tsn_ack));
 
-	/* API 5.3.1.5 SCTP_SHUTDOWN_EVENT
-	 * When a peer sends a SHUTDOWN, SCTP delivers this notification to
-	 * inform the application that it should cease sending data.
-	 */
-	ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC);
-	if (!ev) {
-		disposition = SCTP_DISPOSITION_NOMEM;
-		goto out;	
-	}
-	sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
-
 out:
 	return disposition;
 }
diff --git a/sound/oss/au1000.c b/sound/oss/au1000.c
index 4491733c9e4..2c2ae2ee01a 100644
--- a/sound/oss/au1000.c
+++ b/sound/oss/au1000.c
@@ -1295,7 +1295,7 @@ static int au1000_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long   size;
 	int ret = 0;
 
-	dbg(__FUNCTION__);
+	dbg("%s", __FUNCTION__);
     
 	lock_kernel();
 	down(&s->sem);
diff --git a/sound/oss/ite8172.c b/sound/oss/ite8172.c
index 58f879fda97..26e5944b6ba 100644
--- a/sound/oss/ite8172.c
+++ b/sound/oss/ite8172.c
@@ -1859,7 +1859,7 @@ static int it8172_release(struct inode *inode, struct file *file)
 	struct it8172_state *s = (struct it8172_state *)file->private_data;
 
 #ifdef IT8172_VERBOSE_DEBUG
-	dbg(__FUNCTION__);
+	dbg("%s", __FUNCTION__);
 #endif
 	lock_kernel();
 	if (file->f_mode & FMODE_WRITE)
diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c
index 8a59598167f..c1a239a4dac 100644
--- a/sound/pci/atiixp_modem.c
+++ b/sound/pci/atiixp_modem.c
@@ -405,7 +405,7 @@ static int snd_atiixp_acquire_codec(atiixp_t *chip)
 
 	while (atiixp_read(chip, PHYS_OUT_ADDR) & ATI_REG_PHYS_OUT_ADDR_EN) {
 		if (! timeout--) {
-			snd_printk(KERN_WARNING "atiixp: codec acquire timeout\n");
+			snd_printk(KERN_WARNING "atiixp-modem: codec acquire timeout\n");
 			return -EBUSY;
 		}
 		udelay(1);
@@ -436,7 +436,7 @@ static unsigned short snd_atiixp_codec_read(atiixp_t *chip, unsigned short codec
 	} while (--timeout);
 	/* time out may happen during reset */
 	if (reg < 0x7c)
-		snd_printk(KERN_WARNING "atiixp: codec read timeout (reg %x)\n", reg);
+		snd_printk(KERN_WARNING "atiixp-modem: codec read timeout (reg %x)\n", reg);
 	return 0xffff;
 }
 
@@ -498,7 +498,7 @@ static int snd_atiixp_aclink_reset(atiixp_t *chip)
 		do_delay();
 		atiixp_update(chip, CMD, ATI_REG_CMD_AC_RESET, ATI_REG_CMD_AC_RESET);
 		if (--timeout) {
-			snd_printk(KERN_ERR "atiixp: codec reset timeout\n");
+			snd_printk(KERN_ERR "atiixp-modem: codec reset timeout\n");
 			break;
 		}
 	}
@@ -552,7 +552,7 @@ static int snd_atiixp_codec_detect(atiixp_t *chip)
 	atiixp_write(chip, IER, 0); /* disable irqs */
 
 	if ((chip->codec_not_ready_bits & ALL_CODEC_NOT_READY) == ALL_CODEC_NOT_READY) {
-		snd_printk(KERN_ERR "atiixp: no codec detected!\n");
+		snd_printk(KERN_ERR "atiixp-modem: no codec detected!\n");
 		return -ENXIO;
 	}
 	return 0;
@@ -635,7 +635,7 @@ static void snd_atiixp_xrun_dma(atiixp_t *chip, atiixp_dma_t *dma)
 {
 	if (! dma->substream || ! dma->running)
 		return;
-	snd_printdd("atiixp: XRUN detected (DMA %d)\n", dma->ops->type);
+	snd_printdd("atiixp-modem: XRUN detected (DMA %d)\n", dma->ops->type);
 	snd_pcm_stop(dma->substream, SNDRV_PCM_STATE_XRUN);
 }
 
@@ -1081,14 +1081,14 @@ static int __devinit snd_atiixp_mixer_new(atiixp_t *chip, int clock)
 		ac97.scaps = AC97_SCAP_SKIP_AUDIO;
 		if ((err = snd_ac97_mixer(pbus, &ac97, &chip->ac97[i])) < 0) {
 			chip->ac97[i] = NULL; /* to be sure */
-			snd_printdd("atiixp: codec %d not available for modem\n", i);
+			snd_printdd("atiixp-modem: codec %d not available for modem\n", i);
 			continue;
 		}
 		codec_count++;
 	}
 
 	if (! codec_count) {
-		snd_printk(KERN_ERR "atiixp: no codec available\n");
+		snd_printk(KERN_ERR "atiixp-modem: no codec available\n");
 		return -ENODEV;
 	}
 
@@ -1159,7 +1159,7 @@ static void __devinit snd_atiixp_proc_init(atiixp_t *chip)
 {
 	snd_info_entry_t *entry;
 
-	if (! snd_card_proc_new(chip->card, "atiixp", &entry))
+	if (! snd_card_proc_new(chip->card, "atiixp-modem", &entry))
 		snd_info_set_text_ops(entry, chip, 1024, snd_atiixp_proc_read);
 }
 
diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
index 2fb27c4e951..f4361c518e4 100644
--- a/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@ -173,7 +173,7 @@ static cs4231_t *cs4231_list;
 
 #define CS4231_GLOBALIRQ	0x01	/* IRQ is active */
 
-/* definitions for codec irq status */
+/* definitions for codec irq status - CS4231_IRQ_STATUS	*/
 
 #define CS4231_PLAYBACK_IRQ	0x10
 #define CS4231_RECORD_IRQ	0x20
@@ -402,7 +402,7 @@ static void snd_cs4231_outm(cs4231_t *chip, unsigned char reg,
 	     	udelay(100);
 #ifdef CONFIG_SND_DEBUG
 	if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT)
-		snd_printk("outm: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value);
+		snd_printdd("outm: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value);
 #endif
 	if (chip->calibrate_mute) {
 		chip->image[reg] &= mask;
@@ -425,6 +425,10 @@ static void snd_cs4231_dout(cs4231_t *chip, unsigned char reg, unsigned char val
 	     timeout > 0 && (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT);
 	     timeout--)
 	     	udelay(100);
+#ifdef CONFIG_SND_DEBUG
+	if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT)
+		snd_printdd("out: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value);
+#endif
 	__cs4231_writeb(chip, chip->mce_bit | reg, CS4231P(chip, REGSEL));
 	__cs4231_writeb(chip, value, CS4231P(chip, REG));
 	mb();
@@ -440,15 +444,12 @@ static void snd_cs4231_out(cs4231_t *chip, unsigned char reg, unsigned char valu
 	     	udelay(100);
 #ifdef CONFIG_SND_DEBUG
 	if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT)
-		snd_printk("out: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value);
+		snd_printdd("out: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value);
 #endif
 	__cs4231_writeb(chip, chip->mce_bit | reg, CS4231P(chip, REGSEL));
 	__cs4231_writeb(chip, value, CS4231P(chip, REG));
 	chip->image[reg] = value;
 	mb();
-#if 0
-	printk("codec out - reg 0x%x = 0x%x\n", chip->mce_bit | reg, value);
-#endif
 }
 
 static unsigned char snd_cs4231_in(cs4231_t *chip, unsigned char reg)
@@ -462,61 +463,14 @@ static unsigned char snd_cs4231_in(cs4231_t *chip, unsigned char reg)
 	     	udelay(100);
 #ifdef CONFIG_SND_DEBUG
 	if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT)
-		snd_printk("in: auto calibration time out - reg = 0x%x\n", reg);
+		snd_printdd("in: auto calibration time out - reg = 0x%x\n", reg);
 #endif
 	__cs4231_writeb(chip, chip->mce_bit | reg, CS4231P(chip, REGSEL));
 	mb();
 	ret = __cs4231_readb(chip, CS4231P(chip, REG));
-#if 0
-	printk("codec in - reg 0x%x = 0x%x\n", chip->mce_bit | reg, ret);
-#endif
 	return ret;
 }
 
-#if 0
-
-static void snd_cs4231_debug(cs4231_t *chip)
-{
-	printk("CS4231 REGS:      INDEX = 0x%02x  ",
-	       __cs4231_readb(chip, CS4231P(chip, REGSEL)));
-	printk("                 STATUS = 0x%02x\n",
-	       __cs4231_readb(chip, CS4231P(chip, STATUS)));
-	printk("  0x00: left input      = 0x%02x  ", snd_cs4231_in(chip, 0x00));
-	printk("  0x10: alt 1 (CFIG 2)  = 0x%02x\n", snd_cs4231_in(chip, 0x10));
-	printk("  0x01: right input     = 0x%02x  ", snd_cs4231_in(chip, 0x01));
-	printk("  0x11: alt 2 (CFIG 3)  = 0x%02x\n", snd_cs4231_in(chip, 0x11));
-	printk("  0x02: GF1 left input  = 0x%02x  ", snd_cs4231_in(chip, 0x02));
-	printk("  0x12: left line in    = 0x%02x\n", snd_cs4231_in(chip, 0x12));
-	printk("  0x03: GF1 right input = 0x%02x  ", snd_cs4231_in(chip, 0x03));
-	printk("  0x13: right line in   = 0x%02x\n", snd_cs4231_in(chip, 0x13));
-	printk("  0x04: CD left input   = 0x%02x  ", snd_cs4231_in(chip, 0x04));
-	printk("  0x14: timer low       = 0x%02x\n", snd_cs4231_in(chip, 0x14));
-	printk("  0x05: CD right input  = 0x%02x  ", snd_cs4231_in(chip, 0x05));
-	printk("  0x15: timer high      = 0x%02x\n", snd_cs4231_in(chip, 0x15));
-	printk("  0x06: left output     = 0x%02x  ", snd_cs4231_in(chip, 0x06));
-	printk("  0x16: left MIC (PnP)  = 0x%02x\n", snd_cs4231_in(chip, 0x16));
-	printk("  0x07: right output    = 0x%02x  ", snd_cs4231_in(chip, 0x07));
-	printk("  0x17: right MIC (PnP) = 0x%02x\n", snd_cs4231_in(chip, 0x17));
-	printk("  0x08: playback format = 0x%02x  ", snd_cs4231_in(chip, 0x08));
-	printk("  0x18: IRQ status      = 0x%02x\n", snd_cs4231_in(chip, 0x18));
-	printk("  0x09: iface (CFIG 1)  = 0x%02x  ", snd_cs4231_in(chip, 0x09));
-	printk("  0x19: left line out   = 0x%02x\n", snd_cs4231_in(chip, 0x19));
-	printk("  0x0a: pin control     = 0x%02x  ", snd_cs4231_in(chip, 0x0a));
-	printk("  0x1a: mono control    = 0x%02x\n", snd_cs4231_in(chip, 0x1a));
-	printk("  0x0b: init & status   = 0x%02x  ", snd_cs4231_in(chip, 0x0b));
-	printk("  0x1b: right line out  = 0x%02x\n", snd_cs4231_in(chip, 0x1b));
-	printk("  0x0c: revision & mode = 0x%02x  ", snd_cs4231_in(chip, 0x0c));
-	printk("  0x1c: record format   = 0x%02x\n", snd_cs4231_in(chip, 0x1c));
-	printk("  0x0d: loopback        = 0x%02x  ", snd_cs4231_in(chip, 0x0d));
-	printk("  0x1d: var freq (PnP)  = 0x%02x\n", snd_cs4231_in(chip, 0x1d));
-	printk("  0x0e: ply upr count   = 0x%02x  ", snd_cs4231_in(chip, 0x0e));
-	printk("  0x1e: rec upr count   = 0x%02x\n", snd_cs4231_in(chip, 0x1e));
-	printk("  0x0f: ply lwr count   = 0x%02x  ", snd_cs4231_in(chip, 0x0f));
-	printk("  0x1f: rec lwr count   = 0x%02x\n", snd_cs4231_in(chip, 0x1f));
-}
-
-#endif
-
 /*
  *  CS4231 detection / MCE routines
  */
@@ -528,11 +482,12 @@ static void snd_cs4231_busy_wait(cs4231_t *chip)
 	/* huh.. looks like this sequence is proper for CS4231A chip (GUS MAX) */
 	for (timeout = 5; timeout > 0; timeout--)
 		__cs4231_readb(chip, CS4231P(chip, REGSEL));
+
 	/* end of cleanup sequence */
-	for (timeout = 250;
+	for (timeout = 500;
 	     timeout > 0 && (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT);
 	     timeout--)
-	     	udelay(100);
+	     	udelay(1000);
 }
 
 static void snd_cs4231_mce_up(cs4231_t *chip)
@@ -545,12 +500,12 @@ static void snd_cs4231_mce_up(cs4231_t *chip)
 		udelay(100);
 #ifdef CONFIG_SND_DEBUG
 	if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT)
-		snd_printk("mce_up - auto calibration time out (0)\n");
+		snd_printdd("mce_up - auto calibration time out (0)\n");
 #endif
 	chip->mce_bit |= CS4231_MCE;
 	timeout = __cs4231_readb(chip, CS4231P(chip, REGSEL));
 	if (timeout == 0x80)
-		snd_printk("mce_up [%p]: serious init problem - codec still busy\n", chip->port);
+		snd_printdd("mce_up [%p]: serious init problem - codec still busy\n", chip->port);
 	if (!(timeout & CS4231_MCE))
 		__cs4231_writeb(chip, chip->mce_bit | (timeout & 0x1f), CS4231P(chip, REGSEL));
 	spin_unlock_irqrestore(&chip->lock, flags);
@@ -563,18 +518,15 @@ static void snd_cs4231_mce_down(cs4231_t *chip)
 
 	spin_lock_irqsave(&chip->lock, flags);
 	snd_cs4231_busy_wait(chip);
-#if 0
-	printk("(1) timeout = %i\n", timeout);
-#endif
 #ifdef CONFIG_SND_DEBUG
 	if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT)
-		snd_printk("mce_down [%p] - auto calibration time out (0)\n", CS4231P(chip, REGSEL));
+		snd_printdd("mce_down [%p] - auto calibration time out (0)\n", CS4231P(chip, REGSEL));
 #endif
 	chip->mce_bit &= ~CS4231_MCE;
 	timeout = __cs4231_readb(chip, CS4231P(chip, REGSEL));
 	__cs4231_writeb(chip, chip->mce_bit | (timeout & 0x1f), CS4231P(chip, REGSEL));
 	if (timeout == 0x80)
-		snd_printk("mce_down [%p]: serious init problem - codec still busy\n", chip->port);
+		snd_printdd("mce_down [%p]: serious init problem - codec still busy\n", chip->port);
 	if ((timeout & CS4231_MCE) == 0) {
 		spin_unlock_irqrestore(&chip->lock, flags);
 		return;
@@ -590,9 +542,7 @@ static void snd_cs4231_mce_down(cs4231_t *chip)
 		spin_unlock_irqrestore(&chip->lock, flags);
 		return;
 	}
-#if 0
-	printk("(2) timeout = %i, jiffies = %li\n", timeout, jiffies);
-#endif
+
 	/* in 10ms increments, check condition, up to 250ms */
 	timeout = 25;
 	while (snd_cs4231_in(chip, CS4231_TEST_INIT) & CS4231_CALIB_IN_PROGRESS) {
@@ -604,9 +554,7 @@ static void snd_cs4231_mce_down(cs4231_t *chip)
 		msleep(10);
 		spin_lock_irqsave(&chip->lock, flags);
 	}
-#if 0
-	printk("(3) jiffies = %li\n", jiffies);
-#endif
+
 	/* in 10ms increments, check condition, up to 100ms */
 	timeout = 10;
 	while (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT) {
@@ -619,54 +567,58 @@ static void snd_cs4231_mce_down(cs4231_t *chip)
 		spin_lock_irqsave(&chip->lock, flags);
 	}
 	spin_unlock_irqrestore(&chip->lock, flags);
-#if 0
-	printk("(4) jiffies = %li\n", jiffies);
-	snd_printk("mce_down - exit = 0x%x\n", __cs4231_readb(chip, CS4231P(chip, REGSEL)));
-#endif
 }
 
-#if 0 /* Unused for now... */
-static unsigned int snd_cs4231_get_count(unsigned char format, unsigned int size)
-{
-	switch (format & 0xe0) {
-	case CS4231_LINEAR_16:
-	case CS4231_LINEAR_16_BIG:
-		size >>= 1;
-		break;
-	case CS4231_ADPCM_16:
-		return size >> 2;
-	}
-	if (format & CS4231_STEREO)
-		size >>= 1;
-	return size;
-}
-#endif
-
 #ifdef EBUS_SUPPORT
 static void snd_cs4231_ebus_advance_dma(struct ebus_dma_info *p, snd_pcm_substream_t *substream, unsigned int *periods_sent)
 {
 	snd_pcm_runtime_t *runtime = substream->runtime;
 
 	while (1) {
-		unsigned int dma_size = snd_pcm_lib_period_bytes(substream);
-		unsigned int offset = dma_size * (*periods_sent);
+		unsigned int period_size = snd_pcm_lib_period_bytes(substream);
+		unsigned int offset = period_size * (*periods_sent);
 
-		if (dma_size >= (1 << 24))
+		if (period_size >= (1 << 24))
 			BUG();
 
-		if (ebus_dma_request(p, runtime->dma_addr + offset, dma_size))
+		if (ebus_dma_request(p, runtime->dma_addr + offset, period_size))
 			return;
-#if 0
-		printk("ebus_advance: Sent period %u (size[%x] offset[%x])\n",
-		       (*periods_sent), dma_size, offset);
-#endif
 		(*periods_sent) = ((*periods_sent) + 1) % runtime->periods;
 	}
 }
 #endif
 
-static void cs4231_dma_trigger(cs4231_t *chip, unsigned int what, int on)
+#ifdef SBUS_SUPPORT
+static void snd_cs4231_sbus_advance_dma(snd_pcm_substream_t *substream, unsigned int *periods_sent)
+{
+	cs4231_t *chip = snd_pcm_substream_chip(substream);
+	snd_pcm_runtime_t *runtime = substream->runtime;
+
+	unsigned int period_size = snd_pcm_lib_period_bytes(substream);
+	unsigned int offset = period_size * (*periods_sent % runtime->periods);
+
+	if (runtime->period_size > 0xffff + 1)
+		BUG();
+
+	switch (substream->stream) {
+	case SNDRV_PCM_STREAM_PLAYBACK:
+		sbus_writel(runtime->dma_addr + offset, chip->port + APCPNVA);
+		sbus_writel(period_size, chip->port + APCPNC);
+		break;
+	case SNDRV_PCM_STREAM_CAPTURE:
+		sbus_writel(runtime->dma_addr + offset, chip->port + APCCNVA);
+		sbus_writel(period_size, chip->port + APCCNC);
+		break;
+	}
+
+	(*periods_sent) = (*periods_sent + 1) % runtime->periods;
+}
+#endif
+
+static void cs4231_dma_trigger(snd_pcm_substream_t *substream, unsigned int what, int on)
 {
+	cs4231_t *chip = snd_pcm_substream_chip(substream);
+
 #ifdef EBUS_SUPPORT
 	if (chip->flags & CS4231_FLAG_EBUS) {
 		if (what & CS4231_PLAYBACK_ENABLE) {
@@ -694,6 +646,60 @@ static void cs4231_dma_trigger(cs4231_t *chip, unsigned int what, int on)
 	} else {
 #endif
 #ifdef SBUS_SUPPORT
+	u32 csr = sbus_readl(chip->port + APCCSR);
+	/* I don't know why, but on sbus the period counter must
+	 * only start counting after the first period is sent.
+	 * Therefore this dummy thing.
+	 */
+	unsigned int dummy = 0;
+
+	switch (what) {
+	case CS4231_PLAYBACK_ENABLE:
+		if (on) {
+			csr &= ~APC_XINT_PLAY;
+			sbus_writel(csr, chip->port + APCCSR);
+
+			csr &= ~APC_PPAUSE;
+			sbus_writel(csr, chip->port + APCCSR);
+
+			snd_cs4231_sbus_advance_dma(substream, &dummy);
+
+			csr |=  APC_GENL_INT | APC_PLAY_INT | APC_XINT_ENA |
+				APC_XINT_PLAY | APC_XINT_EMPT | APC_XINT_GENL |
+				APC_XINT_PENA | APC_PDMA_READY;
+			sbus_writel(csr, chip->port + APCCSR);
+		} else {
+			csr |= APC_PPAUSE;
+			sbus_writel(csr, chip->port + APCCSR);
+
+			csr &= ~APC_PDMA_READY;
+			sbus_writel(csr, chip->port + APCCSR);
+		}
+		break;
+	case CS4231_RECORD_ENABLE:
+		if (on) {
+			csr &= ~APC_XINT_CAPT;
+			sbus_writel(csr, chip->port + APCCSR);
+
+			csr &= ~APC_CPAUSE;
+			sbus_writel(csr, chip->port + APCCSR);
+
+			snd_cs4231_sbus_advance_dma(substream, &dummy);
+
+			csr |=  APC_GENL_INT | APC_CAPT_INT | APC_XINT_ENA |
+				APC_XINT_CAPT | APC_XINT_CEMP | APC_XINT_GENL |
+				APC_CDMA_READY;
+
+			sbus_writel(csr, chip->port + APCCSR);
+		} else {
+			csr |= APC_CPAUSE;
+			sbus_writel(csr, chip->port + APCCSR);
+
+			csr &= ~APC_CDMA_READY;
+			sbus_writel(csr, chip->port + APCCSR);
+		}
+		break;
+	}
 #endif
 #ifdef EBUS_SUPPORT
 	}
@@ -725,25 +731,12 @@ static int snd_cs4231_trigger(snd_pcm_substream_t *substream, int cmd)
 			}
 		}
 
-#if 0
-		printk("TRIGGER: what[%x] on(%d)\n",
-		       what, (cmd == SNDRV_PCM_TRIGGER_START));
-#endif
-
 		spin_lock_irqsave(&chip->lock, flags);
 		if (cmd == SNDRV_PCM_TRIGGER_START) {
-			cs4231_dma_trigger(chip, what, 1);
+			cs4231_dma_trigger(substream, what, 1);
 			chip->image[CS4231_IFACE_CTRL] |= what;
-			if (what & CS4231_PLAYBACK_ENABLE) {
-				snd_cs4231_out(chip, CS4231_PLY_LWR_CNT, 0xff);
-				snd_cs4231_out(chip, CS4231_PLY_UPR_CNT, 0xff);
-			}
-			if (what & CS4231_RECORD_ENABLE) {
-				snd_cs4231_out(chip, CS4231_REC_LWR_CNT, 0xff);
-				snd_cs4231_out(chip, CS4231_REC_UPR_CNT, 0xff);
-			}
 		} else {
-			cs4231_dma_trigger(chip, what, 0);
+			cs4231_dma_trigger(substream, what, 0);
 			chip->image[CS4231_IFACE_CTRL] &= ~what;
 		}
 		snd_cs4231_out(chip, CS4231_IFACE_CTRL,
@@ -755,9 +748,7 @@ static int snd_cs4231_trigger(snd_pcm_substream_t *substream, int cmd)
 		result = -EINVAL;
 		break;
 	}
-#if 0
-	snd_cs4231_debug(chip);
-#endif
+
 	return result;
 }
 
@@ -790,9 +781,6 @@ static unsigned char snd_cs4231_get_format(cs4231_t *chip, int format, int chann
 	}
 	if (channels > 1)
 		rformat |= CS4231_STEREO;
-#if 0
-	snd_printk("get_format: 0x%x (mode=0x%x)\n", format, mode);
-#endif
 	return rformat;
 }
 
@@ -944,7 +932,7 @@ static void snd_cs4231_init(cs4231_t *chip)
 	snd_cs4231_mce_down(chip);
 
 #ifdef SNDRV_DEBUG_MCE
-	snd_printk("init: (1)\n");
+	snd_printdd("init: (1)\n");
 #endif
 	snd_cs4231_mce_up(chip);
 	spin_lock_irqsave(&chip->lock, flags);
@@ -957,7 +945,7 @@ static void snd_cs4231_init(cs4231_t *chip)
 	snd_cs4231_mce_down(chip);
 
 #ifdef SNDRV_DEBUG_MCE
-	snd_printk("init: (2)\n");
+	snd_printdd("init: (2)\n");
 #endif
 
 	snd_cs4231_mce_up(chip);
@@ -967,7 +955,7 @@ static void snd_cs4231_init(cs4231_t *chip)
 	snd_cs4231_mce_down(chip);
 
 #ifdef SNDRV_DEBUG_MCE
-	snd_printk("init: (3) - afei = 0x%x\n", chip->image[CS4231_ALT_FEATURE_1]);
+	snd_printdd("init: (3) - afei = 0x%x\n", chip->image[CS4231_ALT_FEATURE_1]);
 #endif
 
 	spin_lock_irqsave(&chip->lock, flags);
@@ -981,7 +969,7 @@ static void snd_cs4231_init(cs4231_t *chip)
 	snd_cs4231_mce_down(chip);
 
 #ifdef SNDRV_DEBUG_MCE
-	snd_printk("init: (4)\n");
+	snd_printdd("init: (4)\n");
 #endif
 
 	snd_cs4231_mce_up(chip);
@@ -991,7 +979,7 @@ static void snd_cs4231_init(cs4231_t *chip)
 	snd_cs4231_mce_down(chip);
 
 #ifdef SNDRV_DEBUG_MCE
-	snd_printk("init: (5)\n");
+	snd_printdd("init: (5)\n");
 #endif
 }
 
@@ -1022,6 +1010,7 @@ static int snd_cs4231_open(cs4231_t *chip, unsigned int mode)
 		       CS4231_RECORD_IRQ |
 		       CS4231_TIMER_IRQ);
 	snd_cs4231_out(chip, CS4231_IRQ_STATUS, 0);
+
 	spin_unlock_irqrestore(&chip->lock, flags);
 
 	chip->mode = mode;
@@ -1136,11 +1125,21 @@ static int snd_cs4231_playback_hw_free(snd_pcm_substream_t *substream)
 static int snd_cs4231_playback_prepare(snd_pcm_substream_t *substream)
 {
 	cs4231_t *chip = snd_pcm_substream_chip(substream);
+	snd_pcm_runtime_t *runtime = substream->runtime;
 	unsigned long flags;
 
 	spin_lock_irqsave(&chip->lock, flags);
+
 	chip->image[CS4231_IFACE_CTRL] &= ~(CS4231_PLAYBACK_ENABLE |
 					    CS4231_PLAYBACK_PIO);
+
+	if (runtime->period_size > 0xffff + 1)
+		BUG();
+
+	snd_cs4231_out(chip, CS4231_PLY_LWR_CNT, (runtime->period_size - 1) & 0x00ff);
+	snd_cs4231_out(chip, CS4231_PLY_UPR_CNT, (runtime->period_size - 1) >> 8 & 0x00ff);
+	chip->p_periods_sent = 0;
+
 	spin_unlock_irqrestore(&chip->lock, flags);
 
 	return 0;
@@ -1172,12 +1171,16 @@ static int snd_cs4231_capture_hw_free(snd_pcm_substream_t *substream)
 static int snd_cs4231_capture_prepare(snd_pcm_substream_t *substream)
 {
 	cs4231_t *chip = snd_pcm_substream_chip(substream);
+	snd_pcm_runtime_t *runtime = substream->runtime;
 	unsigned long flags;
 
 	spin_lock_irqsave(&chip->lock, flags);
 	chip->image[CS4231_IFACE_CTRL] &= ~(CS4231_RECORD_ENABLE |
 					    CS4231_RECORD_PIO);
 
+	snd_cs4231_out(chip, CS4231_REC_LWR_CNT, (runtime->period_size - 1) & 0x00ff);
+	snd_cs4231_out(chip, CS4231_REC_LWR_CNT, (runtime->period_size - 1) >> 8 & 0x00ff);
+
 	spin_unlock_irqrestore(&chip->lock, flags);
 
 	return 0;
@@ -1196,53 +1199,61 @@ static void snd_cs4231_overrange(cs4231_t *chip)
 		chip->capture_substream->runtime->overrange++;
 }
 
-static void snd_cs4231_generic_interrupt(cs4231_t *chip)
+static irqreturn_t snd_cs4231_generic_interrupt(cs4231_t *chip)
 {
 	unsigned long flags;
 	unsigned char status;
 
+	/*This is IRQ is not raised by the cs4231*/
+	if (!(__cs4231_readb(chip, CS4231P(chip, STATUS)) & CS4231_GLOBALIRQ))
+		return IRQ_NONE;
+
 	status = snd_cs4231_in(chip, CS4231_IRQ_STATUS);
-	if (!status)
-		return;
 
 	if (status & CS4231_TIMER_IRQ) {
 		if (chip->timer)
 			snd_timer_interrupt(chip->timer, chip->timer->sticks);
 	}		
-	if (status & CS4231_PLAYBACK_IRQ)
-		snd_pcm_period_elapsed(chip->playback_substream);
-	if (status & CS4231_RECORD_IRQ) {
+
+	if (status & CS4231_RECORD_IRQ)
 		snd_cs4231_overrange(chip);
-		snd_pcm_period_elapsed(chip->capture_substream);
-	}
 
 	/* ACK the CS4231 interrupt. */
 	spin_lock_irqsave(&chip->lock, flags);
 	snd_cs4231_outm(chip, CS4231_IRQ_STATUS, ~CS4231_ALL_IRQS | ~status, 0);
 	spin_unlock_irqrestore(&chip->lock, flags);
+
+	return 0;
 }
 
 #ifdef SBUS_SUPPORT
 static irqreturn_t snd_cs4231_sbus_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 {
 	cs4231_t *chip = dev_id;
-	u32 csr;
-
-	csr = sbus_readl(chip->port + APCCSR);
-	if (!(csr & (APC_INT_PENDING |
-		     APC_PLAY_INT |
-		     APC_CAPT_INT |
-		     APC_GENL_INT |
-		     APC_XINT_PEMP |
-		     APC_XINT_CEMP)))
-		return IRQ_NONE;
 
 	/* ACK the APC interrupt. */
+	u32 csr = sbus_readl(chip->port + APCCSR);
+
 	sbus_writel(csr, chip->port + APCCSR);
 
-	snd_cs4231_generic_interrupt(chip);
+	if ((chip->image[CS4231_IFACE_CTRL] & CS4231_PLAYBACK_ENABLE) &&
+	    (csr & APC_PLAY_INT) &&
+	    (csr & APC_XINT_PNVA) &&
+	    !(csr & APC_XINT_EMPT)) {
+		snd_cs4231_sbus_advance_dma(chip->playback_substream,
+					    &chip->p_periods_sent);
+		snd_pcm_period_elapsed(chip->playback_substream);
+	}
 
-	return IRQ_HANDLED;
+	if ((chip->image[CS4231_IFACE_CTRL] & CS4231_RECORD_ENABLE) &&
+	    (csr & APC_CAPT_INT) &&
+	    (csr & APC_XINT_CNVA)) {
+		snd_cs4231_sbus_advance_dma(chip->capture_substream,
+					    &chip->c_periods_sent);
+		snd_pcm_period_elapsed(chip->capture_substream);
+	}
+
+	return snd_cs4231_generic_interrupt(chip);
 }
 #endif
 
@@ -1290,7 +1301,8 @@ static snd_pcm_uframes_t snd_cs4231_playback_pointer(snd_pcm_substream_t *substr
 #ifdef EBUS_SUPPORT
 	}
 #endif
-	ptr += (period_bytes - residue);
+	ptr += period_bytes - residue;
+
 	return bytes_to_frames(substream->runtime, ptr);
 }
 
@@ -1314,7 +1326,7 @@ static snd_pcm_uframes_t snd_cs4231_capture_pointer(snd_pcm_substream_t * substr
 #ifdef EBUS_SUPPORT
 	}
 #endif
-	ptr += (period_bytes - residue);
+	ptr += period_bytes - residue;
 	return bytes_to_frames(substream->runtime, ptr);
 }
 
@@ -1328,9 +1340,6 @@ static int snd_cs4231_probe(cs4231_t *chip)
 	int i, id, vers;
 	unsigned char *ptr;
 
-#if 0
-	snd_cs4231_debug(chip);
-#endif
 	id = vers = 0;
 	for (i = 0; i < 50; i++) {
 		mb();
@@ -1985,13 +1994,13 @@ static int __init snd_cs4231_sbus_create(snd_card_t *card,
 	chip->port = sbus_ioremap(&sdev->resource[0], 0,
 				  chip->regs_size, "cs4231");
 	if (!chip->port) {
-		snd_printk("cs4231-%d: Unable to map chip registers.\n", dev);
+		snd_printdd("cs4231-%d: Unable to map chip registers.\n", dev);
 		return -EIO;
 	}
 
 	if (request_irq(sdev->irqs[0], snd_cs4231_sbus_interrupt,
 			SA_SHIRQ, "cs4231", chip)) {
-		snd_printk("cs4231-%d: Unable to grab SBUS IRQ %s\n",
+		snd_printdd("cs4231-%d: Unable to grab SBUS IRQ %s\n",
 			   dev,
 			   __irq_itoa(sdev->irqs[0]));
 		snd_cs4231_sbus_free(chip);
@@ -2113,29 +2122,29 @@ static int __init snd_cs4231_ebus_create(snd_card_t *card,
 	chip->eb2c.regs = ioremap(edev->resource[2].start, 0x10);
 	if (!chip->port || !chip->eb2p.regs || !chip->eb2c.regs) {
 		snd_cs4231_ebus_free(chip);
-		snd_printk("cs4231-%d: Unable to map chip registers.\n", dev);
+		snd_printdd("cs4231-%d: Unable to map chip registers.\n", dev);
 		return -EIO;
 	}
 
 	if (ebus_dma_register(&chip->eb2c)) {
 		snd_cs4231_ebus_free(chip);
-		snd_printk("cs4231-%d: Unable to register EBUS capture DMA\n", dev);
+		snd_printdd("cs4231-%d: Unable to register EBUS capture DMA\n", dev);
 		return -EBUSY;
 	}
 	if (ebus_dma_irq_enable(&chip->eb2c, 1)) {
 		snd_cs4231_ebus_free(chip);
-		snd_printk("cs4231-%d: Unable to enable EBUS capture IRQ\n", dev);
+		snd_printdd("cs4231-%d: Unable to enable EBUS capture IRQ\n", dev);
 		return -EBUSY;
 	}
 
 	if (ebus_dma_register(&chip->eb2p)) {
 		snd_cs4231_ebus_free(chip);
-		snd_printk("cs4231-%d: Unable to register EBUS play DMA\n", dev);
+		snd_printdd("cs4231-%d: Unable to register EBUS play DMA\n", dev);
 		return -EBUSY;
 	}
 	if (ebus_dma_irq_enable(&chip->eb2p, 1)) {
 		snd_cs4231_ebus_free(chip);
-		snd_printk("cs4231-%d: Unable to enable EBUS play IRQ\n", dev);
+		snd_printdd("cs4231-%d: Unable to enable EBUS play IRQ\n", dev);
 		return -EBUSY;
 	}