111 files changed, 5557 insertions, 2912 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 7e1d077874d..58801d718cc 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -49,12 +49,12 @@ static struct kset_uevent_ops memory_uevent_ops = {
 
 static struct notifier_block *memory_chain;
 
-static int register_memory_notifier(struct notifier_block *nb)
+int register_memory_notifier(struct notifier_block *nb)
 {
         return notifier_chain_register(&memory_chain, nb);
 }
 
-static void unregister_memory_notifier(struct notifier_block *nb)
+void unregister_memory_notifier(struct notifier_block *nb)
 {
         notifier_chain_unregister(&memory_chain, nb);
 }
@@ -62,8 +62,7 @@ static void unregister_memory_notifier(struct notifier_block *nb)
 /*
  * register_memory - Setup a sysfs device for a memory block
  */
-static int
-register_memory(struct memory_block *memory, struct mem_section *section,
+int register_memory(struct memory_block *memory, struct mem_section *section,
 		struct node *root)
 {
 	int error;
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index c4b9d2adfc0..139cbba7618 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -117,7 +117,7 @@ config BLK_DEV_XD
 
 config PARIDE
 	tristate "Parallel port IDE device support"
-	depends on PARPORT
+	depends on PARPORT_PC
 	---help---
 	  There are many external CD-ROM and disk devices that connect through
 	  your computer's parallel port. Most of them are actually IDE devices
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 485345c8e63..33d6f237b2e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -54,11 +54,15 @@
 #include <linux/errno.h>
 #include <linux/file.h>
 #include <linux/ioctl.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
 #include <net/sock.h>
 
 #include <linux/devfs_fs_kernel.h>
 
 #include <asm/uaccess.h>
+#include <asm/system.h>
 #include <asm/types.h>
 
 #include <linux/nbd.h>
@@ -230,14 +234,6 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 	request.len = htonl(size);
 	memcpy(request.handle, &req, sizeof(req));
 
-	down(&lo->tx_lock);
-
-	if (!sock || !lo->sock) {
-		printk(KERN_ERR "%s: Attempted send on closed socket\n",
-				lo->disk->disk_name);
-		goto error_out;
-	}
-
 	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
 			lo->disk->disk_name, req,
 			nbdcmd_to_ascii(nbd_cmd(req)),
@@ -276,11 +272,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 			}
 		}
 	}
-	up(&lo->tx_lock);
 	return 0;
 
 error_out:
-	up(&lo->tx_lock);
 	return 1;
 }
 
@@ -289,9 +283,14 @@ static struct request *nbd_find_request(struct nbd_device *lo, char *handle)
 	struct request *req;
 	struct list_head *tmp;
 	struct request *xreq;
+	int err;
 
 	memcpy(&xreq, handle, sizeof(xreq));
 
+	err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq);
+	if (unlikely(err))
+		goto out;
+
 	spin_lock(&lo->queue_lock);
 	list_for_each(tmp, &lo->queue_head) {
 		req = list_entry(tmp, struct request, queuelist);
@@ -302,7 +301,11 @@ static struct request *nbd_find_request(struct nbd_device *lo, char *handle)
 		return req;
 	}
 	spin_unlock(&lo->queue_lock);
-	return NULL;
+
+	err = -ENOENT;
+
+out:
+	return ERR_PTR(err);
 }
 
 static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec)
@@ -331,7 +334,11 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
 		goto harderror;
 	}
 	req = nbd_find_request(lo, reply.handle);
-	if (req == NULL) {
+	if (unlikely(IS_ERR(req))) {
+		result = PTR_ERR(req);
+		if (result != -ENOENT)
+			goto harderror;
+
 		printk(KERN_ERR "%s: Unexpected reply (%p)\n",
 				lo->disk->disk_name, reply.handle);
 		result = -EBADR;
@@ -395,19 +402,24 @@ static void nbd_clear_que(struct nbd_device *lo)
 
 	BUG_ON(lo->magic != LO_MAGIC);
 
-	do {
-		req = NULL;
-		spin_lock(&lo->queue_lock);
-		if (!list_empty(&lo->queue_head)) {
-			req = list_entry(lo->queue_head.next, struct request, queuelist);
-			list_del_init(&req->queuelist);
-		}
-		spin_unlock(&lo->queue_lock);
-		if (req) {
-			req->errors++;
-			nbd_end_request(req);
-		}
-	} while (req);
+	/*
+	 * Because we have set lo->sock to NULL under the tx_lock, all
+	 * modifications to the list must have completed by now.  For
+	 * the same reason, the active_req must be NULL.
+	 *
+	 * As a consequence, we don't need to take the spin lock while
+	 * purging the list here.
+	 */
+	BUG_ON(lo->sock);
+	BUG_ON(lo->active_req);
+
+	while (!list_empty(&lo->queue_head)) {
+		req = list_entry(lo->queue_head.next, struct request,
+				 queuelist);
+		list_del_init(&req->queuelist);
+		req->errors++;
+		nbd_end_request(req);
+	}
 }
 
 /*
@@ -435,11 +447,6 @@ static void do_nbd_request(request_queue_t * q)
 
 		BUG_ON(lo->magic != LO_MAGIC);
 
-		if (!lo->file) {
-			printk(KERN_ERR "%s: Request when not-ready\n",
-					lo->disk->disk_name);
-			goto error_out;
-		}
 		nbd_cmd(req) = NBD_CMD_READ;
 		if (rq_data_dir(req) == WRITE) {
 			nbd_cmd(req) = NBD_CMD_WRITE;
@@ -453,32 +460,34 @@ static void do_nbd_request(request_queue_t * q)
 		req->errors = 0;
 		spin_unlock_irq(q->queue_lock);
 
-		spin_lock(&lo->queue_lock);
-
-		if (!lo->file) {
-			spin_unlock(&lo->queue_lock);
-			printk(KERN_ERR "%s: failed between accept and semaphore, file lost\n",
-					lo->disk->disk_name);
+		down(&lo->tx_lock);
+		if (unlikely(!lo->sock)) {
+			up(&lo->tx_lock);
+			printk(KERN_ERR "%s: Attempted send on closed socket\n",
+			       lo->disk->disk_name);
 			req->errors++;
 			nbd_end_request(req);
 			spin_lock_irq(q->queue_lock);
 			continue;
 		}
 
-		list_add(&req->queuelist, &lo->queue_head);
-		spin_unlock(&lo->queue_lock);
+		lo->active_req = req;
 
 		if (nbd_send_req(lo, req) != 0) {
 			printk(KERN_ERR "%s: Request send failed\n",
 					lo->disk->disk_name);
-			if (nbd_find_request(lo, (char *)&req) != NULL) {
-				/* we still own req */
-				req->errors++;
-				nbd_end_request(req);
-			} else /* we're racing with nbd_clear_que */
-				printk(KERN_DEBUG "nbd: can't find req\n");
+			req->errors++;
+			nbd_end_request(req);
+		} else {
+			spin_lock(&lo->queue_lock);
+			list_add(&req->queuelist, &lo->queue_head);
+			spin_unlock(&lo->queue_lock);
 		}
 
+		lo->active_req = NULL;
+		up(&lo->tx_lock);
+		wake_up_all(&lo->active_wq);
+
 		spin_lock_irq(q->queue_lock);
 		continue;
 
@@ -529,17 +538,10 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 		down(&lo->tx_lock);
 		lo->sock = NULL;
 		up(&lo->tx_lock);
-		spin_lock(&lo->queue_lock);
 		file = lo->file;
 		lo->file = NULL;
-		spin_unlock(&lo->queue_lock);
 		nbd_clear_que(lo);
-		spin_lock(&lo->queue_lock);
-		if (!list_empty(&lo->queue_head)) {
-			printk(KERN_ERR "nbd: disconnect: some requests are in progress -> please try again.\n");
-			error = -EBUSY;
-		}
-		spin_unlock(&lo->queue_lock);
+		BUG_ON(!list_empty(&lo->queue_head));
 		if (file)
 			fput(file);
 		return error;
@@ -598,24 +600,19 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 			lo->sock = NULL;
 		}
 		up(&lo->tx_lock);
-		spin_lock(&lo->queue_lock);
 		file = lo->file;
 		lo->file = NULL;
-		spin_unlock(&lo->queue_lock);
 		nbd_clear_que(lo);
 		printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
 		if (file)
 			fput(file);
 		return lo->harderror;
 	case NBD_CLEAR_QUE:
-		down(&lo->tx_lock);
-		if (lo->sock) {
-			up(&lo->tx_lock);
-			return 0; /* probably should be error, but that would
-				   * break "nbd-client -d", so just return 0 */
-		}
-		up(&lo->tx_lock);
-		nbd_clear_que(lo);
+		/*
+		 * This is for compatibility only.  The queue is always cleared
+		 * by NBD_DO_IT or NBD_CLEAR_SOCK.
+		 */
+		BUG_ON(!lo->sock && !list_empty(&lo->queue_head));
 		return 0;
 	case NBD_PRINT_DEBUG:
 		printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
@@ -688,6 +685,7 @@ static int __init nbd_init(void)
 		spin_lock_init(&nbd_dev[i].queue_lock);
 		INIT_LIST_HEAD(&nbd_dev[i].queue_head);
 		init_MUTEX(&nbd_dev[i].tx_lock);
+		init_waitqueue_head(&nbd_dev[i].active_wq);
 		nbd_dev[i].blksize = 1024;
 		nbd_dev[i].bytesize = 0x7ffffc00ULL << 10; /* 2TB */
 		disk->major = NBD_MAJOR;
diff --git a/drivers/block/paride/Kconfig b/drivers/block/paride/Kconfig
index 17ff4056125..c0d2854dd09 100644
--- a/drivers/block/paride/Kconfig
+++ b/drivers/block/paride/Kconfig
@@ -4,11 +4,12 @@
 # PARIDE doesn't need PARPORT, but if PARPORT is configured as a module,
 # PARIDE must also be a module.  The bogus CONFIG_PARIDE_PARPORT option
 # controls the choices given to the user ...
+# PARIDE only supports PC style parports. Tough for USB or other parports...
 config PARIDE_PARPORT
 	tristate
 	depends on PARIDE!=n
-	default m if PARPORT=m
-	default y if PARPORT!=m
+	default m if PARPORT_PC=m
+	default y if PARPORT_PC!=m
 
 comment "Parallel IDE high-level drivers"
 	depends on PARIDE
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 84e68cdd451..5ebd06b1b4c 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -985,7 +985,7 @@ config HPET_MMAP
 
 config HANGCHECK_TIMER
 	tristate "Hangcheck timer"
-	depends on X86 || IA64 || PPC64 || ARCH_S390
+	depends on X86 || IA64 || PPC64 || S390
 	help
 	  The hangcheck-timer module detects when the system has gone
 	  out to lunch past a certain margin.  It can reboot the system
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
index 66e53dd450f..40a67c86420 100644
--- a/drivers/char/hangcheck-timer.c
+++ b/drivers/char/hangcheck-timer.c
@@ -120,7 +120,7 @@ __setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks);
 #if defined(CONFIG_X86)
 # define HAVE_MONOTONIC
 # define TIMER_FREQ 1000000000ULL
-#elif defined(CONFIG_ARCH_S390)
+#elif defined(CONFIG_S390)
 /* FA240000 is 1 Second in the IBM time universe (Page 4-38 Principles of Op for zSeries */
 # define TIMER_FREQ 0xFA240000ULL
 #elif defined(CONFIG_IA64)
diff --git a/drivers/char/hw_random.c b/drivers/char/hw_random.c
index 6f673d2de0b..49769f59ea1 100644
--- a/drivers/char/hw_random.c
+++ b/drivers/char/hw_random.c
@@ -1,4 +1,9 @@
 /*
+        Added support for the AMD Geode LX RNG
+	(c) Copyright 2004-2005 Advanced Micro Devices, Inc.
+
+	derived from
+
  	Hardware driver for the Intel/AMD/VIA Random Number Generators (RNG)
 	(c) Copyright 2003 Red Hat Inc <jgarzik@redhat.com>
  
@@ -95,6 +100,11 @@ static unsigned int via_data_present (void);
 static u32 via_data_read (void);
 #endif
 
+static int __init geode_init(struct pci_dev *dev);
+static void geode_cleanup(void);
+static unsigned int geode_data_present (void);
+static u32 geode_data_read (void);
+
 struct rng_operations {
 	int (*init) (struct pci_dev *dev);
 	void (*cleanup) (void);
@@ -122,6 +132,7 @@ enum {
 	rng_hw_intel,
 	rng_hw_amd,
 	rng_hw_via,
+	rng_hw_geode,
 };
 
 static struct rng_operations rng_vendor_ops[] = {
@@ -139,6 +150,9 @@ static struct rng_operations rng_vendor_ops[] = {
 	/* rng_hw_via */
 	{ via_init, via_cleanup, via_data_present, via_data_read, 1 },
 #endif
+
+	/* rng_hw_geode */
+	{ geode_init, geode_cleanup, geode_data_present, geode_data_read, 4 }
 };
 
 /*
@@ -159,6 +173,9 @@ static struct pci_device_id rng_pci_tbl[] = {
 	{ 0x8086, 0x244e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
 	{ 0x8086, 0x245e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
 
+	{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LX_AES,
+	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_geode },
+
 	{ 0, },	/* terminate list */
 };
 MODULE_DEVICE_TABLE (pci, rng_pci_tbl);
@@ -460,6 +477,57 @@ static void via_cleanup(void)
 }
 #endif
 
+/***********************************************************************
+ *
+ * AMD Geode RNG operations
+ *
+ */
+
+static void __iomem *geode_rng_base = NULL;
+
+#define GEODE_RNG_DATA_REG   0x50
+#define GEODE_RNG_STATUS_REG 0x54
+
+static u32 geode_data_read(void)
+{
+	u32 val;
+
+	assert(geode_rng_base != NULL);
+	val = readl(geode_rng_base + GEODE_RNG_DATA_REG);
+	return val;
+}
+
+static unsigned int geode_data_present(void)
+{
+	u32 val;
+
+	assert(geode_rng_base != NULL);
+	val = readl(geode_rng_base + GEODE_RNG_STATUS_REG);
+	return val;
+}
+
+static void geode_cleanup(void)
+{
+	iounmap(geode_rng_base);
+  	geode_rng_base = NULL;
+}
+
+static int geode_init(struct pci_dev *dev)
+{
+	unsigned long rng_base = pci_resource_start(dev, 0);
+
+	if (rng_base == 0)
+		return 1;
+
+	geode_rng_base = ioremap(rng_base, 0x58);
+
+	if (geode_rng_base == NULL) {
+		printk(KERN_ERR PFX "Cannot ioremap RNG memory\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
 
 /***********************************************************************
  *
@@ -574,7 +642,7 @@ static int __init rng_init (void)
 
 	DPRINTK ("ENTER\n");
 
-	/* Probe for Intel, AMD RNGs */
+	/* Probe for Intel, AMD, Geode RNGs */
 	for_each_pci_dev(pdev) {
 		ent = pci_match_id(rng_pci_tbl, pdev);
 		if (ent) {
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 1f56b4cf0f5..561430ed94a 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -787,7 +787,6 @@ int ipmi_destroy_user(ipmi_user_t user)
 	int              i;
 	unsigned long    flags;
 	struct cmd_rcvr  *rcvr;
-	struct list_head *entry1, *entry2;
 	struct cmd_rcvr  *rcvrs = NULL;
 
 	user->valid = 1;
@@ -812,8 +811,7 @@ int ipmi_destroy_user(ipmi_user_t user)
 	 * synchronize_rcu()) then free everything in that list.
 	 */
 	down(&intf->cmd_rcvrs_lock);
-	list_for_each_safe_rcu(entry1, entry2, &intf->cmd_rcvrs) {
-		rcvr = list_entry(entry1, struct cmd_rcvr, link);
+	list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) {
 		if (rcvr->user == user) {
 			list_del_rcu(&rcvr->link);
 			rcvr->next = rcvrs;
diff --git a/drivers/char/watchdog/Kconfig b/drivers/char/watchdog/Kconfig
index 344001b45af..a6544790af6 100644
--- a/drivers/char/watchdog/Kconfig
+++ b/drivers/char/watchdog/Kconfig
@@ -438,7 +438,7 @@ config INDYDOG
 
 config ZVM_WATCHDOG
 	tristate "z/VM Watchdog Timer"
-	depends on WATCHDOG && ARCH_S390
+	depends on WATCHDOG && S390
 	help
 	  IBM s/390 and zSeries machines running under z/VM 5.1 or later
 	  provide a virtual watchdog timer to their guest that cause a
diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c
index 64fbbb01d52..25ef5a86f5f 100644
--- a/drivers/ieee1394/ieee1394_core.c
+++ b/drivers/ieee1394/ieee1394_core.c
@@ -1027,10 +1027,10 @@ static int hpsbpkt_thread(void *__hi)
 
 	daemonize("khpsbpkt");
 
+	current->flags |= PF_NOFREEZE;
+
 	while (1) {
 		if (down_interruptible(&khpsbpkt_sig)) {
-			if (try_to_freeze())
-				continue;
 			printk("khpsbpkt: received unexpected signal?!\n" );
 			break;
 		}
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 9f2352bd834..a1e660e3531 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -157,7 +157,7 @@ struct input_event_compat {
 #  define COMPAT_TEST test_thread_flag(TIF_IA32)
 #elif defined(CONFIG_IA64)
 #  define COMPAT_TEST IS_IA32_PROCESS(ia64_task_regs(current))
-#elif defined(CONFIG_ARCH_S390)
+#elif defined(CONFIG_S390)
 #  define COMPAT_TEST test_thread_flag(TIF_31BIT)
 #elif defined(CONFIG_MIPS)
 #  define COMPAT_TEST (current->thread.mflags & MF_32BIT_ADDR)
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index f38696622eb..5e1f5e9653c 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -52,6 +52,7 @@ static char *sensor_location[3] = {NULL, NULL, NULL};
 
 static int limit_adjust = 0;
 static int fan_speed = -1;
+static int verbose = 0;
 
 MODULE_AUTHOR("Colin Leroy <colin@colino.net>");
 MODULE_DESCRIPTION("Driver for ADT746x thermostat in iBook G4 and "
@@ -66,6 +67,10 @@ module_param(fan_speed, int, 0644);
 MODULE_PARM_DESC(fan_speed,"Specify starting fan speed (0-255) "
 		 "(default 64)");
 
+module_param(verbose, bool, 0);
+MODULE_PARM_DESC(verbose,"Verbose log operations "
+		 "(default 0)");
+
 struct thermostat {
 	struct i2c_client	clt;
 	u8			temps[3];
@@ -149,13 +154,13 @@ detach_thermostat(struct i2c_adapter *adapter)
 	if (thread_therm != NULL) {
 		kthread_stop(thread_therm);
 	}
-		
+
 	printk(KERN_INFO "adt746x: Putting max temperatures back from "
 			 "%d, %d, %d to %d, %d, %d\n",
 		th->limits[0], th->limits[1], th->limits[2],
 		th->initial_limits[0], th->initial_limits[1],
 		th->initial_limits[2]);
-	
+
 	for (i = 0; i < 3; i++)
 		write_reg(th, LIMIT_REG[i], th->initial_limits[i]);
 
@@ -212,12 +217,14 @@ static void write_fan_speed(struct thermostat *th, int speed, int fan)
 		return;
 	
 	if (th->last_speed[fan] != speed) {
-		if (speed == -1)
-			printk(KERN_DEBUG "adt746x: Setting speed to automatic "
-				"for %s fan.\n", sensor_location[fan+1]);
-		else
-			printk(KERN_DEBUG "adt746x: Setting speed to %d "
-				"for %s fan.\n", speed, sensor_location[fan+1]);
+		if (verbose) {
+			if (speed == -1)
+				printk(KERN_DEBUG "adt746x: Setting speed to automatic "
+					"for %s fan.\n", sensor_location[fan+1]);
+			else
+				printk(KERN_DEBUG "adt746x: Setting speed to %d "
+					"for %s fan.\n", speed, sensor_location[fan+1]);
+		}
 	} else
 		return;
 	
@@ -298,10 +305,11 @@ static void update_fans_speed (struct thermostat *th)
 			if (new_speed > 255)
 				new_speed = 255;
 
-			printk(KERN_DEBUG "adt746x: setting fans speed to %d "
-					 "(limit exceeded by %d on %s) \n",
-					new_speed, var,
-					sensor_location[fan_number+1]);
+			if (verbose)
+				printk(KERN_DEBUG "adt746x: Setting fans speed to %d "
+						 "(limit exceeded by %d on %s) \n",
+						new_speed, var,
+						sensor_location[fan_number+1]);
 			write_both_fan_speed(th, new_speed);
 			th->last_var[fan_number] = var;
 		} else if (var < -2) {
@@ -309,8 +317,9 @@ static void update_fans_speed (struct thermostat *th)
 			 * so cold (lastvar >= -1) */
 			if (i == 2 && lastvar < -1) {
 				if (th->last_speed[fan_number] != 0)
-					printk(KERN_DEBUG "adt746x: Stopping "
-						"fans.\n");
+					if (verbose)
+						printk(KERN_DEBUG "adt746x: Stopping "
+							"fans.\n");
 				write_both_fan_speed(th, 0);
 			}
 		}
@@ -406,7 +415,7 @@ static int attach_one_thermostat(struct i2c_adapter *adapter, int addr,
 		th->initial_limits[i] = read_reg(th, LIMIT_REG[i]);
 		set_limit(th, i);
 	}
-	
+
 	printk(KERN_INFO "adt746x: Lowering max temperatures from %d, %d, %d"
 			 " to %d, %d, %d\n",
 			 th->initial_limits[0], th->initial_limits[1],
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 190878eef99..435427daed7 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -1988,18 +1988,13 @@ static void fcu_lookup_fans(struct device_node *fcu_node)
 
 static int fcu_of_probe(struct of_device* dev, const struct of_device_id *match)
 {
-	int rc;
-
 	state = state_detached;
 
 	/* Lookup the fans in the device tree */
 	fcu_lookup_fans(dev->node);
 
 	/* Add the driver */
-	rc = i2c_add_driver(&therm_pm72_driver);
-	if (rc < 0)
-		return rc;
-	return 0;
+	return i2c_add_driver(&therm_pm72_driver);
 }
 
 static int fcu_of_remove(struct of_device* dev)
diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index a0a41ad0f2b..c62ed68a313 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -240,12 +240,7 @@ static int wf_lm75_detach(struct i2c_client *client)
 
 static int __init wf_lm75_sensor_init(void)
 {
-	int rc;
-
-	rc = i2c_add_driver(&wf_lm75_driver);
-	if (rc < 0)
-		return rc;
-	return 0;
+	return i2c_add_driver(&wf_lm75_driver);
 }
 
 static void __exit wf_lm75_sensor_exit(void)
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 252d55df964..76a189ceb52 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -315,6 +315,8 @@ static int write_page(struct bitmap *bitmap, struct page *page, int wait)
 	if (bitmap->file == NULL)
 		return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);
 
+	flush_dcache_page(page); /* make sure visible to anyone reading the file */
+
 	if (wait)
 		lock_page(page);
 	else {
@@ -341,7 +343,7 @@ static int write_page(struct bitmap *bitmap, struct page *page, int wait)
 		/* add to list to be waited for by daemon */
 		struct page_list *item = mempool_alloc(bitmap->write_pool, GFP_NOIO);
 		item->page = page;
-		page_cache_get(page);
+		get_page(page);
 		spin_lock(&bitmap->write_lock);
 		list_add(&item->list, &bitmap->complete_pages);
 		spin_unlock(&bitmap->write_lock);
@@ -357,10 +359,10 @@ static struct page *read_page(struct file *file, unsigned long index,
 	struct inode *inode = file->f_mapping->host;
 	struct page *page = NULL;
 	loff_t isize = i_size_read(inode);
-	unsigned long end_index = isize >> PAGE_CACHE_SHIFT;
+	unsigned long end_index = isize >> PAGE_SHIFT;
 
-	PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_CACHE_SIZE,
-			(unsigned long long)index << PAGE_CACHE_SHIFT);
+	PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE,
+			(unsigned long long)index << PAGE_SHIFT);
 
 	page = read_cache_page(inode->i_mapping, index,
 			(filler_t *)inode->i_mapping->a_ops->readpage, file);
@@ -368,7 +370,7 @@ static struct page *read_page(struct file *file, unsigned long index,
 		goto out;
 	wait_on_page_locked(page);
 	if (!PageUptodate(page) || PageError(page)) {
-		page_cache_release(page);
+		put_page(page);
 		page = ERR_PTR(-EIO);
 		goto out;
 	}
@@ -376,14 +378,14 @@ static struct page *read_page(struct file *file, unsigned long index,
 	if (index > end_index) /* we have read beyond EOF */
 		*bytes_read = 0;
 	else if (index == end_index) /* possible short read */
-		*bytes_read = isize & ~PAGE_CACHE_MASK;
+		*bytes_read = isize & ~PAGE_MASK;
 	else
-		*bytes_read = PAGE_CACHE_SIZE; /* got a full page */
+		*bytes_read = PAGE_SIZE; /* got a full page */
 out:
 	if (IS_ERR(page))
 		printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n",
-			(int)PAGE_CACHE_SIZE,
-			(unsigned long long)index << PAGE_CACHE_SHIFT,
+			(int)PAGE_SIZE,
+			(unsigned long long)index << PAGE_SHIFT,
 			PTR_ERR(page));
 	return page;
 }
@@ -406,11 +408,11 @@ int bitmap_update_sb(struct bitmap *bitmap)
 		return 0;
 	}
 	spin_unlock_irqrestore(&bitmap->lock, flags);
-	sb = (bitmap_super_t *)kmap(bitmap->sb_page);
+	sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 	sb->events = cpu_to_le64(bitmap->mddev->events);
 	if (!bitmap->mddev->degraded)
 		sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
-	kunmap(bitmap->sb_page);
+	kunmap_atomic(sb, KM_USER0);
 	return write_page(bitmap, bitmap->sb_page, 1);
 }
 
@@ -421,7 +423,7 @@ void bitmap_print_sb(struct bitmap *bitmap)
 
 	if (!bitmap || !bitmap->sb_page)
 		return;
-	sb = (bitmap_super_t *)kmap(bitmap->sb_page);
+	sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 	printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
 	printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
 	printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
@@ -440,7 +442,7 @@ void bitmap_print_sb(struct bitmap *bitmap)
 	printk(KERN_DEBUG "     sync size: %llu KB\n",
 			(unsigned long long)le64_to_cpu(sb->sync_size)/2);
 	printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
-	kunmap(bitmap->sb_page);
+	kunmap_atomic(sb, KM_USER0);
 }
 
 /* read the superblock from the bitmap file and initialize some bitmap fields */
@@ -466,7 +468,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
 		return err;
 	}
 
-	sb = (bitmap_super_t *)kmap(bitmap->sb_page);
+	sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 
 	if (bytes_read < sizeof(*sb)) { /* short read */
 		printk(KERN_INFO "%s: bitmap file superblock truncated\n",
@@ -485,12 +487,12 @@ static int bitmap_read_sb(struct bitmap *bitmap)
 	else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
 		 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
 		reason = "unrecognized superblock version";
-	else if (chunksize < 512 || chunksize > (1024 * 1024 * 4))
-		reason = "bitmap chunksize out of range (512B - 4MB)";
+	else if (chunksize < PAGE_SIZE)
+		reason = "bitmap chunksize too small";
 	else if ((1 << ffz(~chunksize)) != chunksize)
 		reason = "bitmap chunksize not a power of 2";
-	else if (daemon_sleep < 1 || daemon_sleep > 15)
-		reason = "daemon sleep period out of range (1-15s)";
+	else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ)
+		reason = "daemon sleep period out of range";
 	else if (write_behind > COUNTER_MAX)
 		reason = "write-behind limit out of range (0 - 16383)";
 	if (reason) {
@@ -535,7 +537,7 @@ success:
 		bitmap->events_cleared = bitmap->mddev->events;
 	err = 0;
 out:
-	kunmap(bitmap->sb_page);
+	kunmap_atomic(sb, KM_USER0);
 	if (err)
 		bitmap_print_sb(bitmap);
 	return err;
@@ -558,9 +560,9 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
 		spin_unlock_irqrestore(&bitmap->lock, flags);
 		return;
 	}
-	page_cache_get(bitmap->sb_page);
+	get_page(bitmap->sb_page);
 	spin_unlock_irqrestore(&bitmap->lock, flags);
-	sb = (bitmap_super_t *)kmap(bitmap->sb_page);
+	sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 	switch (op) {
 		case MASK_SET: sb->state |= bits;
 				break;
@@ -568,8 +570,8 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
 				break;
 		default: BUG();
 	}
-	kunmap(bitmap->sb_page);
-	page_cache_release(bitmap->sb_page);
+	kunmap_atomic(sb, KM_USER0);
+	put_page(bitmap->sb_page);
 }
 
 /*
@@ -622,12 +624,11 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
 
 	while (pages--)
 		if (map[pages]->index != 0) /* 0 is sb_page, release it below */
-			page_cache_release(map[pages]);
+			put_page(map[pages]);
 	kfree(map);
 	kfree(attr);
 
-	if (sb_page)
-		page_cache_release(sb_page);
+	safe_put_page(sb_page);
 }
 
 static void bitmap_stop_daemon(struct bitmap *bitmap);
@@ -654,7 +655,7 @@ static void drain_write_queues(struct bitmap *bitmap)
 
 	while ((item = dequeue_page(bitmap))) {
 		/* don't bother to wait */
-		page_cache_release(item->page);
+		put_page(item->page);
 		mempool_free(item, bitmap->write_pool);
 	}
 
@@ -763,7 +764,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
 
 	/* make sure the page stays cached until it gets written out */
 	if (! (get_page_attr(bitmap, page) & BITMAP_PAGE_DIRTY))
-		page_cache_get(page);
+		get_page(page);
 
  	/* set the bit */
 	kaddr = kmap_atomic(page, KM_USER0);
@@ -854,6 +855,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 	unsigned long bytes, offset, dummy;
 	int outofdate;
 	int ret = -ENOSPC;
+	void *paddr;
 
 	chunks = bitmap->chunks;
 	file = bitmap->file;
@@ -887,12 +889,10 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 	if (!bitmap->filemap)
 		goto out;
 
-	bitmap->filemap_attr = kmalloc(sizeof(long) * num_pages, GFP_KERNEL);
+	bitmap->filemap_attr = kzalloc(sizeof(long) * num_pages, GFP_KERNEL);
 	if (!bitmap->filemap_attr)
 		goto out;
 
-	memset(bitmap->filemap_attr, 0, sizeof(long) * num_pages);
-
 	oldindex = ~0L;
 
 	for (i = 0; i < chunks; i++) {
@@ -901,8 +901,6 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 		bit = file_page_offset(i);
 		if (index != oldindex) { /* this is a new page, read it in */
 			/* unmap the old page, we're done with it */
-			if (oldpage != NULL)
-				kunmap(oldpage);
 			if (index == 0) {
 				/*
 				 * if we're here then the superblock page
@@ -925,30 +923,32 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 
 			oldindex = index;
 			oldpage = page;
-			kmap(page);
 
 			if (outofdate) {
 				/*
 				 * if bitmap is out of date, dirty the
 			 	 * whole page and write it out
 				 */
-				memset(page_address(page) + offset, 0xff,
+				paddr = kmap_atomic(page, KM_USER0);
+				memset(paddr + offset, 0xff,
 				       PAGE_SIZE - offset);
+				kunmap_atomic(paddr, KM_USER0);
 				ret = write_page(bitmap, page, 1);
 				if (ret) {
-					kunmap(page);
 					/* release, page not in filemap yet */
-					page_cache_release(page);
+					put_page(page);
 					goto out;
 				}
 			}
 
 			bitmap->filemap[bitmap->file_pages++] = page;
 		}
+		paddr = kmap_atomic(page, KM_USER0);
 		if (bitmap->flags & BITMAP_HOSTENDIAN)
-			b = test_bit(bit, page_address(page));
+			b = test_bit(bit, paddr);
 		else
-			b = ext2_test_bit(bit, page_address(page));
+			b = ext2_test_bit(bit, paddr);
+		kunmap_atomic(paddr, KM_USER0);
 		if (b) {
 			/* if the disk bit is set, set the memory bit */
 			bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap),
@@ -963,9 +963,6 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 	ret = 0;
 	bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
 
-	if (page) /* unmap the last page */
-		kunmap(page);
-
 	if (bit_cnt) { /* Kick recovery if any bits were set */
 		set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
 		md_wakeup_thread(bitmap->mddev->thread);
@@ -1021,6 +1018,7 @@ int bitmap_daemon_work(struct bitmap *bitmap)
 	int err = 0;
 	int blocks;
 	int attr;
+	void *paddr;
 
 	if (bitmap == NULL)
 		return 0;
@@ -1043,7 +1041,7 @@ int bitmap_daemon_work(struct bitmap *bitmap)
 			/* skip this page unless it's marked as needing cleaning */
 			if (!((attr=get_page_attr(bitmap, page)) & BITMAP_PAGE_CLEAN)) {
 				if (attr & BITMAP_PAGE_NEEDWRITE) {
-					page_cache_get(page);
+					get_page(page);
 					clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
 				}
 				spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -1057,13 +1055,13 @@ int bitmap_daemon_work(struct bitmap *bitmap)
 					default:
 						bitmap_file_kick(bitmap);
 					}
-					page_cache_release(page);
+					put_page(page);
 				}
 				continue;
 			}
 
 			/* grab the new page, sync and release the old */
-			page_cache_get(page);
+			get_page(page);
 			if (lastpage != NULL) {
 				if (get_page_attr(bitmap, lastpage) & BITMAP_PAGE_NEEDWRITE) {
 					clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
@@ -1077,14 +1075,12 @@ int bitmap_daemon_work(struct bitmap *bitmap)
 					set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
 					spin_unlock_irqrestore(&bitmap->lock, flags);
 				}
-				kunmap(lastpage);
-				page_cache_release(lastpage);
+				put_page(lastpage);
 				if (err)
 					bitmap_file_kick(bitmap);
 			} else
 				spin_unlock_irqrestore(&bitmap->lock, flags);
 			lastpage = page;
-			kmap(page);
 /*
 			printk("bitmap clean at page %lu\n", j);
 */
@@ -1107,10 +1103,12 @@ int bitmap_daemon_work(struct bitmap *bitmap)
 						  -1);
 
 				/* clear the bit */
+				paddr = kmap_atomic(page, KM_USER0);
 				if (bitmap->flags & BITMAP_HOSTENDIAN)
-					clear_bit(file_page_offset(j), page_address(page));
+					clear_bit(file_page_offset(j), paddr);
 				else
-					ext2_clear_bit(file_page_offset(j), page_address(page));
+					ext2_clear_bit(file_page_offset(j), paddr);
+				kunmap_atomic(paddr, KM_USER0);
 			}
 		}
 		spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -1118,7 +1116,6 @@ int bitmap_daemon_work(struct bitmap *bitmap)
 
 	/* now sync the final page */
 	if (lastpage != NULL) {
-		kunmap(lastpage);
 		spin_lock_irqsave(&bitmap->lock, flags);
 		if (get_page_attr(bitmap, lastpage) &BITMAP_PAGE_NEEDWRITE) {
 			clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
@@ -1133,7 +1130,7 @@ int bitmap_daemon_work(struct bitmap *bitmap)
 			spin_unlock_irqrestore(&bitmap->lock, flags);
 		}
 
-		page_cache_release(lastpage);
+		put_page(lastpage);
 	}
 
 	return err;
@@ -1184,7 +1181,7 @@ static void bitmap_writeback_daemon(mddev_t *mddev)
 		PRINTK("finished page writeback: %p\n", page);
 
 		err = PageError(page);
-		page_cache_release(page);
+		put_page(page);
 		if (err) {
 			printk(KERN_WARNING "%s: bitmap file writeback "
 			       "failed (page %lu): %d\n",
@@ -1530,6 +1527,8 @@ void bitmap_destroy(mddev_t *mddev)
 		return;
 
 	mddev->bitmap = NULL; /* disconnect from the md device */
+	if (mddev->thread)
+		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
 
 	bitmap_free(bitmap);
 }
@@ -1555,12 +1554,10 @@ int bitmap_create(mddev_t *mddev)
 
 	BUG_ON(file && mddev->bitmap_offset);
 
-	bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
+	bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
 	if (!bitmap)
 		return -ENOMEM;
 
-	memset(bitmap, 0, sizeof(*bitmap));
-
 	spin_lock_init(&bitmap->lock);
 	bitmap->mddev = mddev;
 
@@ -1601,12 +1598,11 @@ int bitmap_create(mddev_t *mddev)
 #ifdef INJECT_FATAL_FAULT_1
 	bitmap->bp = NULL;
 #else
-	bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
+	bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
 #endif
 	err = -ENOMEM;
 	if (!bitmap->bp)
 		goto error;
-	memset(bitmap->bp, 0, pages * sizeof(*bitmap->bp));
 
 	bitmap->flags |= BITMAP_ACTIVE;
 
@@ -1636,6 +1632,8 @@ int bitmap_create(mddev_t *mddev)
 
 	if (IS_ERR(bitmap->writeback_daemon))
 		return PTR_ERR(bitmap->writeback_daemon);
+	mddev->thread->timeout = bitmap->daemon_sleep * HZ;
+
 	return bitmap_update_sb(bitmap);
 
  error:
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index cf663105668..a601a427885 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -690,6 +690,8 @@ bad3:
 bad2:
 	crypto_free_tfm(tfm);
 bad1:
+	/* Must zero key material before freeing */
+	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
 	kfree(cc);
 	return -EINVAL;
 }
@@ -706,6 +708,9 @@ static void crypt_dtr(struct dm_target *ti)
 		cc->iv_gen_ops->dtr(cc);
 	crypto_free_tfm(cc->tfm);
 	dm_put_device(ti, cc->dev);
+
+	/* Must zero key material before freeing */
+	memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
 	kfree(cc);
 }
 
diff --git a/drivers/md/dm-io.h b/drivers/md/dm-io.h
index 1a77f326570..f9035bfd1a9 100644
--- a/drivers/md/dm-io.h
+++ b/drivers/md/dm-io.h
@@ -9,9 +9,6 @@
 
 #include "dm.h"
 
-/* FIXME make this configurable */
-#define DM_MAX_IO_REGIONS 8
-
 struct io_region {
 	struct block_device *bdev;
 	sector_t sector;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 07d44e19536..561bda5011e 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -270,6 +270,7 @@ static int dm_hash_rename(const char *old, const char *new)
 {
 	char *new_name, *old_name;
 	struct hash_cell *hc;
+	struct dm_table *table;
 
 	/*
 	 * duplicate new.
@@ -317,6 +318,15 @@ static int dm_hash_rename(const char *old, const char *new)
 	/* rename the device node in devfs */
 	register_with_devfs(hc);
 
+	/*
+	 * Wake up any dm event waiters.
+	 */
+	table = dm_get_table(hc->md);
+	if (table) {
+		dm_table_event(table);
+		dm_table_put(table);
+	}
+
 	up_write(&_hash_lock);
 	kfree(old_name);
 	return 0;
@@ -683,14 +693,18 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 static int do_suspend(struct dm_ioctl *param)
 {
 	int r = 0;
+	int do_lockfs = 1;
 	struct mapped_device *md;
 
 	md = find_device(param);
 	if (!md)
 		return -ENXIO;
 
+	if (param->flags & DM_SKIP_LOCKFS_FLAG)
+		do_lockfs = 0;
+
 	if (!dm_suspended(md))
-		r = dm_suspend(md);
+		r = dm_suspend(md, do_lockfs);
 
 	if (!r)
 		r = __dev_status(md, param);
@@ -702,6 +716,7 @@ static int do_suspend(struct dm_ioctl *param)
 static int do_resume(struct dm_ioctl *param)
 {
 	int r = 0;
+	int do_lockfs = 1;
 	struct hash_cell *hc;
 	struct mapped_device *md;
 	struct dm_table *new_map;
@@ -727,8 +742,10 @@ static int do_resume(struct dm_ioctl *param)
 	/* Do we need to load a new map ? */
 	if (new_map) {
 		/* Suspend if it isn't already suspended */
+		if (param->flags & DM_SKIP_LOCKFS_FLAG)
+			do_lockfs = 0;
 		if (!dm_suspended(md))
-			dm_suspend(md);
+			dm_suspend(md, do_lockfs);
 
 		r = dm_swap_table(md, new_map);
 		if (r) {
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index a76349cb10a..efe4adf7853 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -573,7 +573,7 @@ static int core_get_resync_work(struct dirty_log *log, region_t *region)
 					     lc->sync_search);
 		lc->sync_search = *region + 1;
 
-		if (*region == lc->region_count)
+		if (*region >= lc->region_count)
 			return 0;
 
 	} while (log_test_bit(lc->recovering_bits, *region));
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 6b0fc167092..6cfa8d435d5 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -562,6 +562,8 @@ struct mirror_set {
 	region_t nr_regions;
 	int in_sync;
 
+	struct mirror *default_mirror;	/* Default mirror */
+
 	unsigned int nr_mirrors;
 	struct mirror mirror[0];
 };
@@ -611,7 +613,7 @@ static int recover(struct mirror_set *ms, struct region *reg)
 	unsigned long flags = 0;
 
 	/* fill in the source */
-	m = ms->mirror + DEFAULT_MIRROR;
+	m = ms->default_mirror;
 	from.bdev = m->dev->bdev;
 	from.sector = m->offset + region_to_sector(reg->rh, reg->key);
 	if (reg->key == (ms->nr_regions - 1)) {
@@ -627,7 +629,7 @@ static int recover(struct mirror_set *ms, struct region *reg)
 
 	/* fill in the destinations */
 	for (i = 0, dest = to; i < ms->nr_mirrors; i++) {
-		if (i == DEFAULT_MIRROR)
+		if (&ms->mirror[i] == ms->default_mirror)
 			continue;
 
 		m = ms->mirror + i;
@@ -682,7 +684,7 @@ static void do_recovery(struct mirror_set *ms)
 static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
 {
 	/* FIXME: add read balancing */
-	return ms->mirror + DEFAULT_MIRROR;
+	return ms->default_mirror;
 }
 
 /*
@@ -709,7 +711,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads)
 		if (rh_in_sync(&ms->rh, region, 0))
 			m = choose_mirror(ms, bio->bi_sector);
 		else
-			m = ms->mirror + DEFAULT_MIRROR;
+			m = ms->default_mirror;
 
 		map_bio(ms, m, bio);
 		generic_make_request(bio);
@@ -833,7 +835,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
 		rh_delay(&ms->rh, bio);
 
 	while ((bio = bio_list_pop(&nosync))) {
-		map_bio(ms, ms->mirror + DEFAULT_MIRROR, bio);
+		map_bio(ms, ms->default_mirror, bio);
 		generic_make_request(bio);
 	}
 }
@@ -900,6 +902,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
 	ms->nr_mirrors = nr_mirrors;
 	ms->nr_regions = dm_sector_div_up(ti->len, region_size);
 	ms->in_sync = 0;
+	ms->default_mirror = &ms->mirror[DEFAULT_MIRROR];
 
 	if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
 		ti->error = "dm-mirror: Error creating dirty region hash";
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ab54f99b7c3..4b9dd8fb1e5 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -371,6 +371,20 @@ static inline ulong round_up(ulong n, ulong size)
 	return (n + size) & ~size;
 }
 
+static void read_snapshot_metadata(struct dm_snapshot *s)
+{
+	if (s->have_metadata)
+		return;
+
+	if (s->store.read_metadata(&s->store)) {
+		down_write(&s->lock);
+		s->valid = 0;
+		up_write(&s->lock);
+	}
+
+	s->have_metadata = 1;
+}
+
 /*
  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
  */
@@ -848,16 +862,7 @@ static void snapshot_resume(struct dm_target *ti)
 {
 	struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 
-	if (s->have_metadata)
-		return;
-
-	if (s->store.read_metadata(&s->store)) {
-		down_write(&s->lock);
-		s->valid = 0;
-		up_write(&s->lock);
-	}
-
-	s->have_metadata = 1;
+	read_snapshot_metadata(s);
 }
 
 static int snapshot_status(struct dm_target *ti, status_type_t type,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 930b9fc2795..0e481512f91 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -55,6 +55,7 @@ union map_info *dm_get_mapinfo(struct bio *bio)
  */
 #define DMF_BLOCK_IO 0
 #define DMF_SUSPENDED 1
+#define DMF_FROZEN 2
 
 struct mapped_device {
 	struct rw_semaphore io_lock;
@@ -97,7 +98,7 @@ struct mapped_device {
 	 * freeze/thaw support require holding onto a super block
 	 */
 	struct super_block *frozen_sb;
-	struct block_device *frozen_bdev;
+	struct block_device *suspended_bdev;
 };
 
 #define MIN_IOS 256
@@ -836,9 +837,9 @@ static void __set_size(struct mapped_device *md, sector_t size)
 {
 	set_capacity(md->disk, size);
 
-	down(&md->frozen_bdev->bd_inode->i_sem);
-	i_size_write(md->frozen_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
-	up(&md->frozen_bdev->bd_inode->i_sem);
+	down(&md->suspended_bdev->bd_inode->i_sem);
+	i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
+	up(&md->suspended_bdev->bd_inode->i_sem);
 }
 
 static int __bind(struct mapped_device *md, struct dm_table *t)
@@ -902,10 +903,9 @@ int dm_create_with_minor(unsigned int minor, struct mapped_device **result)
 	return create_aux(minor, 1, result);
 }
 
-void *dm_get_mdptr(dev_t dev)
+static struct mapped_device *dm_find_md(dev_t dev)
 {
 	struct mapped_device *md;
-	void *mdptr = NULL;
 	unsigned minor = MINOR(dev);
 
 	if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
@@ -914,12 +914,32 @@ void *dm_get_mdptr(dev_t dev)
 	down(&_minor_lock);
 
 	md = idr_find(&_minor_idr, minor);
-
-	if (md && (dm_disk(md)->first_minor == minor))
-		mdptr = md->interface_ptr;
+	if (!md || (dm_disk(md)->first_minor != minor))
+		md = NULL;
 
 	up(&_minor_lock);
 
+	return md;
+}
+
+struct mapped_device *dm_get_md(dev_t dev)
+{
+	struct mapped_device *md = dm_find_md(dev);
+
+	if (md)
+		dm_get(md);
+
+	return md;
+}
+
+void *dm_get_mdptr(dev_t dev)
+{
+	struct mapped_device *md;
+	void *mdptr = NULL;
+
+	md = dm_find_md(dev);
+	if (md)
+		mdptr = md->interface_ptr;
 	return mdptr;
 }
 
@@ -991,43 +1011,33 @@ out:
  */
 static int lock_fs(struct mapped_device *md)
 {
-	int r = -ENOMEM;
-
-	md->frozen_bdev = bdget_disk(md->disk, 0);
-	if (!md->frozen_bdev) {
-		DMWARN("bdget failed in lock_fs");
-		goto out;
-	}
+	int r;
 
 	WARN_ON(md->frozen_sb);
 
-	md->frozen_sb = freeze_bdev(md->frozen_bdev);
+	md->frozen_sb = freeze_bdev(md->suspended_bdev);
 	if (IS_ERR(md->frozen_sb)) {
 		r = PTR_ERR(md->frozen_sb);
-		goto out_bdput;
+		md->frozen_sb = NULL;
+		return r;
 	}
 
+	set_bit(DMF_FROZEN, &md->flags);
+
 	/* don't bdput right now, we don't want the bdev
-	 * to go away while it is locked.  We'll bdput
-	 * in unlock_fs
+	 * to go away while it is locked.
 	 */
 	return 0;
-
-out_bdput:
-	bdput(md->frozen_bdev);
-	md->frozen_sb = NULL;
-	md->frozen_bdev = NULL;
-out:
-	return r;
 }
 
 static void unlock_fs(struct mapped_device *md)
 {
-	thaw_bdev(md->frozen_bdev, md->frozen_sb);
-	bdput(md->frozen_bdev);
+	if (!test_bit(DMF_FROZEN, &md->flags))
+		return;
 
+	thaw_bdev(md->suspended_bdev, md->frozen_sb);
 	md->frozen_sb = NULL;
-	md->frozen_bdev = NULL;
+	clear_bit(DMF_FROZEN, &md->flags);
 }
 
 /*
@@ -1037,7 +1047,7 @@ static void unlock_fs(struct mapped_device *md)
  * dm_bind_table, dm_suspend must be called to flush any in
  * flight bios and ensure that any further io gets deferred.
  */
-int dm_suspend(struct mapped_device *md)
+int dm_suspend(struct mapped_device *md, int do_lockfs)
 {
 	struct dm_table *map = NULL;
 	DECLARE_WAITQUEUE(wait, current);
@@ -1053,10 +1063,19 @@ int dm_suspend(struct mapped_device *md)
 	/* This does not get reverted if there's an error later. */
 	dm_table_presuspend_targets(map);
 
-	/* Flush I/O to the device. */
-	r = lock_fs(md);
-	if (r)
+	md->suspended_bdev = bdget_disk(md->disk, 0);
+	if (!md->suspended_bdev) {
+		DMWARN("bdget failed in dm_suspend");
+		r = -ENOMEM;
 		goto out;
+	}
+
+	/* Flush I/O to the device. */
+	if (do_lockfs) {
+		r = lock_fs(md);
+		if (r)
+			goto out;
+	}
 
 	/*
 	 * First we set the BLOCK_IO flag so no more ios will be mapped.
@@ -1105,6 +1124,11 @@ int dm_suspend(struct mapped_device *md)
 	r = 0;
 
 out:
+	if (r && md->suspended_bdev) {
+		bdput(md->suspended_bdev);
+		md->suspended_bdev = NULL;
+	}
+
 	dm_table_put(map);
 	up(&md->suspend_lock);
 	return r;
@@ -1135,6 +1159,9 @@ int dm_resume(struct mapped_device *md)
 
 	unlock_fs(md);
 
+	bdput(md->suspended_bdev);
+	md->suspended_bdev = NULL;
+
 	clear_bit(DMF_SUSPENDED, &md->flags);
 
 	dm_table_unplug_all(map);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index e38c3fc1a1d..4eaf075da21 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -28,7 +28,7 @@
  * in types.h.
  */
 #ifdef CONFIG_LBD
-#define SECTOR_FORMAT "%Lu"
+#define SECTOR_FORMAT "%llu"
 #else
 #define SECTOR_FORMAT "%lu"
 #endif
@@ -58,6 +58,7 @@ int dm_create(struct mapped_device **md);
 int dm_create_with_minor(unsigned int minor, struct mapped_device **md);
 void dm_set_mdptr(struct mapped_device *md, void *ptr);
 void *dm_get_mdptr(dev_t dev);
+struct mapped_device *dm_get_md(dev_t dev);
 
 /*
  * Reference counting for md.
@@ -68,7 +69,7 @@ void dm_put(struct mapped_device *md);
 /*
  * A device can still be used while suspended, but I/O is deferred.
  */
-int dm_suspend(struct mapped_device *md);
+int dm_suspend(struct mapped_device *md, int with_lockfs);
 int dm_resume(struct mapped_device *md);
 
 /*
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 0248f8e7eac..a7a5ab55433 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -316,9 +316,10 @@ static int stop(mddev_t *mddev)
 	return 0;
 }
 
-static mdk_personality_t faulty_personality =
+static struct mdk_personality faulty_personality =
 {
 	.name		= "faulty",
+	.level		= LEVEL_FAULTY,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -329,15 +330,17 @@ static mdk_personality_t faulty_personality =
 
 static int __init raid_init(void)
 {
-	return register_md_personality(FAULTY, &faulty_personality);
+	return register_md_personality(&faulty_personality);
 }
 
 static void raid_exit(void)
 {
-	unregister_md_personality(FAULTY);
+	unregister_md_personality(&faulty_personality);
 }
 
 module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-10"); /* faulty */
+MODULE_ALIAS("md-faulty");
+MODULE_ALIAS("md-level--5");
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index eb703648597..ca99979c868 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -561,11 +561,13 @@ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
  * Cancels a kcopyd job, eg. someone might be deactivating a
  * mirror.
  */
+#if 0
 int kcopyd_cancel(struct kcopyd_job *job, int block)
 {
 	/* FIXME: finish */
 	return -1;
 }
+#endif  /*  0  */
 
 /*-----------------------------------------------------------------
  * Unit setup
@@ -684,4 +686,3 @@ void kcopyd_client_destroy(struct kcopyd_client *kc)
 EXPORT_SYMBOL(kcopyd_client_create);
 EXPORT_SYMBOL(kcopyd_client_destroy);
 EXPORT_SYMBOL(kcopyd_copy);
-EXPORT_SYMBOL(kcopyd_cancel);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 946efef3a8f..777585458c8 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -121,11 +121,10 @@ static int linear_run (mddev_t *mddev)
 	sector_t curr_offset;
 	struct list_head *tmp;
 
-	conf = kmalloc (sizeof (*conf) + mddev->raid_disks*sizeof(dev_info_t),
+	conf = kzalloc (sizeof (*conf) + mddev->raid_disks*sizeof(dev_info_t),
 			GFP_KERNEL);
 	if (!conf)
 		goto out;
-	memset(conf, 0, sizeof(*conf) + mddev->raid_disks*sizeof(dev_info_t));
 	mddev->private = conf;
 
 	cnt = 0;
@@ -352,9 +351,10 @@ static void linear_status (struct seq_file *seq, mddev_t *mddev)
 }
 
 
-static mdk_personality_t linear_personality=
+static struct mdk_personality linear_personality =
 {
 	.name		= "linear",
+	.level		= LEVEL_LINEAR,
 	.owner		= THIS_MODULE,
 	.make_request	= linear_make_request,
 	.run		= linear_run,
@@ -364,16 +364,18 @@ static mdk_personality_t linear_personality=
 
 static int __init linear_init (void)
 {
-	return register_md_personality (LINEAR, &linear_personality);
+	return register_md_personality (&linear_personality);
 }
 
 static void linear_exit (void)
 {
-	unregister_md_personality (LINEAR);
+	unregister_md_personality (&linear_personality);
 }
 
 
 module_init(linear_init);
 module_exit(linear_exit);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("md-personality-1"); /* LINEAR */
+MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
+MODULE_ALIAS("md-linear");
+MODULE_ALIAS("md-level--1");
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8175a2a222d..1b76fb29fb7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -42,6 +42,7 @@
 #include <linux/devfs_fs_kernel.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
 #include <linux/suspend.h>
+#include <linux/poll.h>
 
 #include <linux/init.h>
 
@@ -67,7 +68,7 @@
 static void autostart_arrays (int part);
 #endif
 
-static mdk_personality_t *pers[MAX_PERSONALITY];
+static LIST_HEAD(pers_list);
 static DEFINE_SPINLOCK(pers_lock);
 
 /*
@@ -80,10 +81,22 @@ static DEFINE_SPINLOCK(pers_lock);
  * idle IO detection.
  *
  * you can change it via /proc/sys/dev/raid/speed_limit_min and _max.
+ * or /sys/block/mdX/md/sync_speed_{min,max}
  */
 
 static int sysctl_speed_limit_min = 1000;
 static int sysctl_speed_limit_max = 200000;
+static inline int speed_min(mddev_t *mddev)
+{
+	return mddev->sync_speed_min ?
+		mddev->sync_speed_min : sysctl_speed_limit_min;
+}
+
+static inline int speed_max(mddev_t *mddev)
+{
+	return mddev->sync_speed_max ?
+		mddev->sync_speed_max : sysctl_speed_limit_max;
+}
 
 static struct ctl_table_header *raid_table_header;
 
@@ -134,6 +147,24 @@ static struct block_device_operations md_fops;
 static int start_readonly;
 
 /*
+ * We have a system wide 'event count' that is incremented
+ * on any 'interesting' event, and readers of /proc/mdstat
+ * can use 'poll' or 'select' to find out when the event
+ * count increases.
+ *
+ * Events are:
+ *  start array, stop array, error, add device, remove device,
+ *  start build, activate spare
+ */
+static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
+static atomic_t md_event_count;
+static void md_new_event(mddev_t *mddev)
+{
+	atomic_inc(&md_event_count);
+	wake_up(&md_event_waiters);
+}
+
+/*
  * Enables to iterate over all existing md arrays
  * all_mddevs_lock protects this list.
  */
@@ -209,12 +240,10 @@ static mddev_t * mddev_find(dev_t unit)
 	}
 	spin_unlock(&all_mddevs_lock);
 
-	new = (mddev_t *) kmalloc(sizeof(*new), GFP_KERNEL);
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		return NULL;
 
-	memset(new, 0, sizeof(*new));
-
 	new->unit = unit;
 	if (MAJOR(unit) == MD_MAJOR)
 		new->md_minor = MINOR(unit);
@@ -262,7 +291,7 @@ static inline void mddev_unlock(mddev_t * mddev)
 	md_wakeup_thread(mddev->thread);
 }
 
-mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
+static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
 {
 	mdk_rdev_t * rdev;
 	struct list_head *tmp;
@@ -286,6 +315,18 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
 	return NULL;
 }
 
+static struct mdk_personality *find_pers(int level, char *clevel)
+{
+	struct mdk_personality *pers;
+	list_for_each_entry(pers, &pers_list, list) {
+		if (level != LEVEL_NONE && pers->level == level)
+			return pers;
+		if (strcmp(pers->name, clevel)==0)
+			return pers;
+	}
+	return NULL;
+}
+
 static inline sector_t calc_dev_sboffset(struct block_device *bdev)
 {
 	sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
@@ -320,7 +361,7 @@ static int alloc_disk_sb(mdk_rdev_t * rdev)
 static void free_disk_sb(mdk_rdev_t * rdev)
 {
 	if (rdev->sb_page) {
-		page_cache_release(rdev->sb_page);
+		put_page(rdev->sb_page);
 		rdev->sb_loaded = 0;
 		rdev->sb_page = NULL;
 		rdev->sb_offset = 0;
@@ -461,6 +502,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size,
 	bio_put(bio);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(sync_page_io);
 
 static int read_disk_sb(mdk_rdev_t * rdev, int size)
 {
@@ -665,6 +707,10 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
 	}
 	rdev->size = calc_dev_size(rdev, sb->chunk_size);
 
+	if (rdev->size < sb->size && sb->level > 1)
+		/* "this cannot possibly happen" ... */
+		ret = -EINVAL;
+
  abort:
 	return ret;
 }
@@ -688,6 +734,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->ctime = sb->ctime;
 		mddev->utime = sb->utime;
 		mddev->level = sb->level;
+		mddev->clevel[0] = 0;
 		mddev->layout = sb->layout;
 		mddev->raid_disks = sb->raid_disks;
 		mddev->size = sb->size;
@@ -714,9 +761,10 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 
 		if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
 		    mddev->bitmap_file == NULL) {
-			if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6) {
+			if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6
+			    && mddev->level != 10) {
 				/* FIXME use a better test */
-				printk(KERN_WARNING "md: bitmaps only support for raid1\n");
+				printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
 				return -EINVAL;
 			}
 			mddev->bitmap_offset = mddev->default_bitmap_offset;
@@ -968,6 +1016,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 	}
 	rdev->preferred_minor = 0xffff;
 	rdev->data_offset = le64_to_cpu(sb->data_offset);
+	atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
 
 	rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
 	bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
@@ -1006,6 +1055,9 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 	rdev->size = le64_to_cpu(sb->data_size)/2;
 	if (le32_to_cpu(sb->chunksize))
 		rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
+
+	if (le32_to_cpu(sb->size) > rdev->size*2)
+		return -EINVAL;
 	return 0;
 }
 
@@ -1023,6 +1075,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
 		mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
 		mddev->level = le32_to_cpu(sb->level);
+		mddev->clevel[0] = 0;
 		mddev->layout = le32_to_cpu(sb->layout);
 		mddev->raid_disks = le32_to_cpu(sb->raid_disks);
 		mddev->size = le64_to_cpu(sb->size)/2;
@@ -1037,8 +1090,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 
 		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
 		    mddev->bitmap_file == NULL ) {
-			if (mddev->level != 1) {
-				printk(KERN_WARNING "md: bitmaps only supported for raid1\n");
+			if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6
+			    && mddev->level != 10) {
+				printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
 				return -EINVAL;
 			}
 			mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
@@ -1105,6 +1159,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	else
 		sb->resync_offset = cpu_to_le64(0);
 
+	sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
+
 	if (mddev->bitmap && mddev->bitmap_file == NULL) {
 		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
 		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -1187,6 +1243,14 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 		MD_BUG();
 		return -EINVAL;
 	}
+	/* make sure rdev->size exceeds mddev->size */
+	if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) {
+		if (mddev->pers)
+			/* Cannot change size, so fail */
+			return -ENOSPC;
+		else
+			mddev->size = rdev->size;
+	}
 	same_pdev = match_dev_unit(mddev, rdev);
 	if (same_pdev)
 		printk(KERN_WARNING
@@ -1496,6 +1560,26 @@ repeat:
 
 }
 
+/* words written to sysfs files may, or my not, be \n terminated.
+ * We want to accept with case. For this we use cmd_match.
+ */
+static int cmd_match(const char *cmd, const char *str)
+{
+	/* See if cmd, written into a sysfs file, matches
+	 * str.  They must either be the same, or cmd can
+	 * have a trailing newline
+	 */
+	while (*cmd && *str && *cmd == *str) {
+		cmd++;
+		str++;
+	}
+	if (*cmd == '\n')
+		cmd++;
+	if (*str || *cmd)
+		return 0;
+	return 1;
+}
+
 struct rdev_sysfs_entry {
 	struct attribute attr;
 	ssize_t (*show)(mdk_rdev_t *, char *);
@@ -1538,9 +1622,113 @@ super_show(mdk_rdev_t *rdev, char *page)
 }
 static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super);
 
+static ssize_t
+errors_show(mdk_rdev_t *rdev, char *page)
+{
+	return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
+}
+
+static ssize_t
+errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+	char *e;
+	unsigned long n = simple_strtoul(buf, &e, 10);
+	if (*buf && (*e == 0 || *e == '\n')) {
+		atomic_set(&rdev->corrected_errors, n);
+		return len;
+	}
+	return -EINVAL;
+}
+static struct rdev_sysfs_entry rdev_errors =
+__ATTR(errors, 0644, errors_show, errors_store);
+
+static ssize_t
+slot_show(mdk_rdev_t *rdev, char *page)
+{
+	if (rdev->raid_disk < 0)
+		return sprintf(page, "none\n");
+	else
+		return sprintf(page, "%d\n", rdev->raid_disk);
+}
+
+static ssize_t
+slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+	char *e;
+	int slot = simple_strtoul(buf, &e, 10);
+	if (strncmp(buf, "none", 4)==0)
+		slot = -1;
+	else if (e==buf || (*e && *e!= '\n'))
+		return -EINVAL;
+	if (rdev->mddev->pers)
+		/* Cannot set slot in active array (yet) */
+		return -EBUSY;
+	if (slot >= rdev->mddev->raid_disks)
+		return -ENOSPC;
+	rdev->raid_disk = slot;
+	/* assume it is working */
+	rdev->flags = 0;
+	set_bit(In_sync, &rdev->flags);
+	return len;
+}
+
+
+static struct rdev_sysfs_entry rdev_slot =
+__ATTR(slot, 0644, slot_show, slot_store);
+
+static ssize_t
+offset_show(mdk_rdev_t *rdev, char *page)
+{
+	return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
+}
+
+static ssize_t
+offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+	char *e;
+	unsigned long long offset = simple_strtoull(buf, &e, 10);
+	if (e==buf || (*e && *e != '\n'))
+		return -EINVAL;
+	if (rdev->mddev->pers)
+		return -EBUSY;
+	rdev->data_offset = offset;
+	return len;
+}
+
+static struct rdev_sysfs_entry rdev_offset =
+__ATTR(offset, 0644, offset_show, offset_store);
+
+static ssize_t
+rdev_size_show(mdk_rdev_t *rdev, char *page)
+{
+	return sprintf(page, "%llu\n", (unsigned long long)rdev->size);
+}
+
+static ssize_t
+rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+	char *e;
+	unsigned long long size = simple_strtoull(buf, &e, 10);
+	if (e==buf || (*e && *e != '\n'))
+		return -EINVAL;
+	if (rdev->mddev->pers)
+		return -EBUSY;
+	rdev->size = size;
+	if (size < rdev->mddev->size || rdev->mddev->size == 0)
+		rdev->mddev->size = size;
+	return len;
+}
+
+static struct rdev_sysfs_entry rdev_size =
+__ATTR(size, 0644, rdev_size_show, rdev_size_store);
+
 static struct attribute *rdev_default_attrs[] = {
 	&rdev_state.attr,
 	&rdev_super.attr,
+	&rdev_errors.attr,
+	&rdev_slot.attr,
+	&rdev_offset.attr,
+	&rdev_size.attr,
 	NULL,
 };
 static ssize_t
@@ -1598,12 +1786,11 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
 	mdk_rdev_t *rdev;
 	sector_t size;
 
-	rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL);
+	rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
 	if (!rdev) {
 		printk(KERN_ERR "md: could not alloc mem for new device!\n");
 		return ERR_PTR(-ENOMEM);
 	}
-	memset(rdev, 0, sizeof(*rdev));
 
 	if ((err = alloc_disk_sb(rdev)))
 		goto abort_free;
@@ -1621,6 +1808,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
 	rdev->data_offset = 0;
 	atomic_set(&rdev->nr_pending, 0);
 	atomic_set(&rdev->read_errors, 0);
+	atomic_set(&rdev->corrected_errors, 0);
 
 	size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
 	if (!size) {
@@ -1725,16 +1913,37 @@ static void analyze_sbs(mddev_t * mddev)
 static ssize_t
 level_show(mddev_t *mddev, char *page)
 {
-	mdk_personality_t *p = mddev->pers;
-	if (p == NULL && mddev->raid_disks == 0)
-		return 0;
-	if (mddev->level >= 0)
-		return sprintf(page, "raid%d\n", mddev->level);
-	else
+	struct mdk_personality *p = mddev->pers;
+	if (p)
 		return sprintf(page, "%s\n", p->name);
+	else if (mddev->clevel[0])
+		return sprintf(page, "%s\n", mddev->clevel);
+	else if (mddev->level != LEVEL_NONE)
+		return sprintf(page, "%d\n", mddev->level);
+	else
+		return 0;
+}
+
+static ssize_t
+level_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	int rv = len;
+	if (mddev->pers)
+		return -EBUSY;
+	if (len == 0)
+		return 0;
+	if (len >= sizeof(mddev->clevel))
+		return -ENOSPC;
+	strncpy(mddev->clevel, buf, len);
+	if (mddev->clevel[len-1] == '\n')
+		len--;
+	mddev->clevel[len] = 0;
+	mddev->level = LEVEL_NONE;
+	return rv;
 }
 
-static struct md_sysfs_entry md_level = __ATTR_RO(level);
+static struct md_sysfs_entry md_level =
+__ATTR(level, 0644, level_show, level_store);
 
 static ssize_t
 raid_disks_show(mddev_t *mddev, char *page)
@@ -1744,7 +1953,197 @@ raid_disks_show(mddev_t *mddev, char *page)
 	return sprintf(page, "%d\n", mddev->raid_disks);
 }
 
-static struct md_sysfs_entry md_raid_disks = __ATTR_RO(raid_disks);
+static int update_raid_disks(mddev_t *mddev, int raid_disks);
+
+static ssize_t
+raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	/* can only set raid_disks if array is not yet active */
+	char *e;
+	int rv = 0;
+	unsigned long n = simple_strtoul(buf, &e, 10);
+
+	if (!*buf || (*e && *e != '\n'))
+		return -EINVAL;
+
+	if (mddev->pers)
+		rv = update_raid_disks(mddev, n);
+	else
+		mddev->raid_disks = n;
+	return rv ? rv : len;
+}
+static struct md_sysfs_entry md_raid_disks =
+__ATTR(raid_disks, 0644, raid_disks_show, raid_disks_store);
+
+static ssize_t
+chunk_size_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%d\n", mddev->chunk_size);
+}
+
+static ssize_t
+chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	/* can only set chunk_size if array is not yet active */
+	char *e;
+	unsigned long n = simple_strtoul(buf, &e, 10);
+
+	if (mddev->pers)
+		return -EBUSY;
+	if (!*buf || (*e && *e != '\n'))
+		return -EINVAL;
+
+	mddev->chunk_size = n;
+	return len;
+}
+static struct md_sysfs_entry md_chunk_size =
+__ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store);
+
+static ssize_t
+null_show(mddev_t *mddev, char *page)
+{
+	return -EINVAL;
+}
+
+static ssize_t
+new_dev_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	/* buf must be %d:%d\n? giving major and minor numbers */
+	/* The new device is added to the array.
+	 * If the array has a persistent superblock, we read the
+	 * superblock to initialise info and check validity.
+	 * Otherwise, only checking done is that in bind_rdev_to_array,
+	 * which mainly checks size.
+	 */
+	char *e;
+	int major = simple_strtoul(buf, &e, 10);
+	int minor;
+	dev_t dev;
+	mdk_rdev_t *rdev;
+	int err;
+
+	if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
+		return -EINVAL;
+	minor = simple_strtoul(e+1, &e, 10);
+	if (*e && *e != '\n')
+		return -EINVAL;
+	dev = MKDEV(major, minor);
+	if (major != MAJOR(dev) ||
+	    minor != MINOR(dev))
+		return -EOVERFLOW;
+
+
+	if (mddev->persistent) {
+		rdev = md_import_device(dev, mddev->major_version,
+					mddev->minor_version);
+		if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
+			mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
+						       mdk_rdev_t, same_set);
+			err = super_types[mddev->major_version]
+				.load_super(rdev, rdev0, mddev->minor_version);
+			if (err < 0)
+				goto out;
+		}
+	} else
+		rdev = md_import_device(dev, -1, -1);
+
+	if (IS_ERR(rdev))
+		return PTR_ERR(rdev);
+	err = bind_rdev_to_array(rdev, mddev);
+ out:
+	if (err)
+		export_rdev(rdev);
+	return err ? err : len;
+}
+
+static struct md_sysfs_entry md_new_device =
+__ATTR(new_dev, 0200, null_show, new_dev_store);
+
+static ssize_t
+size_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%llu\n", (unsigned long long)mddev->size);
+}
+
+static int update_size(mddev_t *mddev, unsigned long size);
+
+static ssize_t
+size_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	/* If array is inactive, we can reduce the component size, but
+	 * not increase it (except from 0).
+	 * If array is active, we can try an on-line resize
+	 */
+	char *e;
+	int err = 0;
+	unsigned long long size = simple_strtoull(buf, &e, 10);
+	if (!*buf || *buf == '\n' ||
+	    (*e && *e != '\n'))
+		return -EINVAL;
+
+	if (mddev->pers) {
+		err = update_size(mddev, size);
+		md_update_sb(mddev);
+	} else {
+		if (mddev->size == 0 ||
+		    mddev->size > size)
+			mddev->size = size;
+		else
+			err = -ENOSPC;
+	}
+	return err ? err : len;
+}
+
+static struct md_sysfs_entry md_size =
+__ATTR(component_size, 0644, size_show, size_store);
+
+
+/* Metdata version.
+ * This is either 'none' for arrays with externally managed metadata,
+ * or N.M for internally known formats
+ */
+static ssize_t
+metadata_show(mddev_t *mddev, char *page)
+{
+	if (mddev->persistent)
+		return sprintf(page, "%d.%d\n",
+			       mddev->major_version, mddev->minor_version);
+	else
+		return sprintf(page, "none\n");
+}
+
+static ssize_t
+metadata_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	int major, minor;
+	char *e;
+	if (!list_empty(&mddev->disks))
+		return -EBUSY;
+
+	if (cmd_match(buf, "none")) {
+		mddev->persistent = 0;
+		mddev->major_version = 0;
+		mddev->minor_version = 90;
+		return len;
+	}
+	major = simple_strtoul(buf, &e, 10);
+	if (e==buf || *e != '.')
+		return -EINVAL;
+	buf = e+1;
+	minor = simple_strtoul(buf, &e, 10);
+	if (e==buf || *e != '\n')
+		return -EINVAL;
+	if (major >= sizeof(super_types)/sizeof(super_types[0]) ||
+	    super_types[major].name == NULL)
+		return -ENOENT;
+	mddev->major_version = major;
+	mddev->minor_version = minor;
+	mddev->persistent = 1;
+	return len;
+}
+
+static struct md_sysfs_entry md_metadata =
+__ATTR(metadata_version, 0644, metadata_show, metadata_store);
 
 static ssize_t
 action_show(mddev_t *mddev, char *page)
@@ -1771,31 +2170,27 @@ action_store(mddev_t *mddev, const char *page, size_t len)
 	if (!mddev->pers || !mddev->pers->sync_request)
 		return -EINVAL;
 
-	if (strcmp(page, "idle")==0 || strcmp(page, "idle\n")==0) {
+	if (cmd_match(page, "idle")) {
 		if (mddev->sync_thread) {
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 			md_unregister_thread(mddev->sync_thread);
 			mddev->sync_thread = NULL;
 			mddev->recovery = 0;
 		}
-		return len;
-	}
-
-	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
-	    test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
+	} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+		   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
 		return -EBUSY;
-	if (strcmp(page, "resync")==0 || strcmp(page, "resync\n")==0 ||
-	    strcmp(page, "recover")==0 || strcmp(page, "recover\n")==0)
+	else if (cmd_match(page, "resync") || cmd_match(page, "recover"))
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	else {
-		if (strcmp(page, "check")==0 || strcmp(page, "check\n")==0)
+		if (cmd_match(page, "check"))
 			set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
-		else if (strcmp(page, "repair")!=0 && strcmp(page, "repair\n")!=0)
+		else if (cmd_match(page, "repair"))
 			return -EINVAL;
 		set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
 		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
-		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	}
+	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
 	return len;
 }
@@ -1814,15 +2209,107 @@ md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
 static struct md_sysfs_entry
 md_mismatches = __ATTR_RO(mismatch_cnt);
 
+static ssize_t
+sync_min_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%d (%s)\n", speed_min(mddev),
+		       mddev->sync_speed_min ? "local": "system");
+}
+
+static ssize_t
+sync_min_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	int min;
+	char *e;
+	if (strncmp(buf, "system", 6)==0) {
+		mddev->sync_speed_min = 0;
+		return len;
+	}
+	min = simple_strtoul(buf, &e, 10);
+	if (buf == e || (*e && *e != '\n') || min <= 0)
+		return -EINVAL;
+	mddev->sync_speed_min = min;
+	return len;
+}
+
+static struct md_sysfs_entry md_sync_min =
+__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
+
+static ssize_t
+sync_max_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%d (%s)\n", speed_max(mddev),
+		       mddev->sync_speed_max ? "local": "system");
+}
+
+static ssize_t
+sync_max_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	int max;
+	char *e;
+	if (strncmp(buf, "system", 6)==0) {
+		mddev->sync_speed_max = 0;
+		return len;
+	}
+	max = simple_strtoul(buf, &e, 10);
+	if (buf == e || (*e && *e != '\n') || max <= 0)
+		return -EINVAL;
+	mddev->sync_speed_max = max;
+	return len;
+}
+
+static struct md_sysfs_entry md_sync_max =
+__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
+
+
+static ssize_t
+sync_speed_show(mddev_t *mddev, char *page)
+{
+	unsigned long resync, dt, db;
+	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+	dt = ((jiffies - mddev->resync_mark) / HZ);
+	if (!dt) dt++;
+	db = resync - (mddev->resync_mark_cnt);
+	return sprintf(page, "%ld\n", db/dt/2); /* K/sec */
+}
+
+static struct md_sysfs_entry
+md_sync_speed = __ATTR_RO(sync_speed);
+
+static ssize_t
+sync_completed_show(mddev_t *mddev, char *page)
+{
+	unsigned long max_blocks, resync;
+
+	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+		max_blocks = mddev->resync_max_sectors;
+	else
+		max_blocks = mddev->size << 1;
+
+	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+	return sprintf(page, "%lu / %lu\n", resync, max_blocks);
+}
+
+static struct md_sysfs_entry
+md_sync_completed = __ATTR_RO(sync_completed);
+
 static struct attribute *md_default_attrs[] = {
 	&md_level.attr,
 	&md_raid_disks.attr,
+	&md_chunk_size.attr,
+	&md_size.attr,
+	&md_metadata.attr,
+	&md_new_device.attr,
 	NULL,
 };
 
 static struct attribute *md_redundancy_attrs[] = {
 	&md_scan_mode.attr,
 	&md_mismatches.attr,
+	&md_sync_min.attr,
+	&md_sync_max.attr,
+	&md_sync_speed.attr,
+	&md_sync_completed.attr,
 	NULL,
 };
 static struct attribute_group md_redundancy_group = {
@@ -1937,14 +2424,16 @@ static void md_safemode_timeout(unsigned long data)
 	md_wakeup_thread(mddev->thread);
 }
 
+static int start_dirty_degraded;
 
 static int do_md_run(mddev_t * mddev)
 {
-	int pnum, err;
+	int err;
 	int chunk_size;
 	struct list_head *tmp;
 	mdk_rdev_t *rdev;
 	struct gendisk *disk;
+	struct mdk_personality *pers;
 	char b[BDEVNAME_SIZE];
 
 	if (list_empty(&mddev->disks))
@@ -1961,20 +2450,8 @@ static int do_md_run(mddev_t * mddev)
 		analyze_sbs(mddev);
 
 	chunk_size = mddev->chunk_size;
-	pnum = level_to_pers(mddev->level);
 
-	if ((pnum != MULTIPATH) && (pnum != RAID1)) {
-		if (!chunk_size) {
-			/*
-			 * 'default chunksize' in the old md code used to
-			 * be PAGE_SIZE, baaad.
-			 * we abort here to be on the safe side. We don't
-			 * want to continue the bad practice.
-			 */
-			printk(KERN_ERR 
-				"no chunksize specified, see 'man raidtab'\n");
-			return -EINVAL;
-		}
+	if (chunk_size) {
 		if (chunk_size > MAX_CHUNK_SIZE) {
 			printk(KERN_ERR "too big chunk_size: %d > %d\n",
 				chunk_size, MAX_CHUNK_SIZE);
@@ -2010,10 +2487,10 @@ static int do_md_run(mddev_t * mddev)
 	}
 
 #ifdef CONFIG_KMOD
-	if (!pers[pnum])
-	{
-		request_module("md-personality-%d", pnum);
-	}
+	if (mddev->level != LEVEL_NONE)
+		request_module("md-level-%d", mddev->level);
+	else if (mddev->clevel[0])
+		request_module("md-%s", mddev->clevel);
 #endif
 
 	/*
@@ -2035,30 +2512,39 @@ static int do_md_run(mddev_t * mddev)
 		return -ENOMEM;
 
 	spin_lock(&pers_lock);
-	if (!pers[pnum] || !try_module_get(pers[pnum]->owner)) {
+	pers = find_pers(mddev->level, mddev->clevel);
+	if (!pers || !try_module_get(pers->owner)) {
 		spin_unlock(&pers_lock);
-		printk(KERN_WARNING "md: personality %d is not loaded!\n",
-		       pnum);
+		if (mddev->level != LEVEL_NONE)
+			printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
+			       mddev->level);
+		else
+			printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
+			       mddev->clevel);
 		return -EINVAL;
 	}
-
-	mddev->pers = pers[pnum];
+	mddev->pers = pers;
 	spin_unlock(&pers_lock);
+	mddev->level = pers->level;
+	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
 
 	mddev->recovery = 0;
 	mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
 	mddev->barriers_work = 1;
+	mddev->ok_start_degraded = start_dirty_degraded;
 
 	if (start_readonly)
 		mddev->ro = 2; /* read-only, but switch on first write */
 
-	/* before we start the array running, initialise the bitmap */
-	err = bitmap_create(mddev);
-	if (err)
-		printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
-			mdname(mddev), err);
-	else
-		err = mddev->pers->run(mddev);
+	err = mddev->pers->run(mddev);
+	if (!err && mddev->pers->sync_request) {
+		err = bitmap_create(mddev);
+		if (err) {
+			printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
+			       mdname(mddev), err);
+			mddev->pers->stop(mddev);
+		}
+	}
 	if (err) {
 		printk(KERN_ERR "md: pers->run() failed ...\n");
 		module_put(mddev->pers->owner);
@@ -2104,6 +2590,7 @@ static int do_md_run(mddev_t * mddev)
 	mddev->queue->make_request_fn = mddev->pers->make_request;
 
 	mddev->changed = 1;
+	md_new_event(mddev);
 	return 0;
 }
 
@@ -2231,6 +2718,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
 		printk(KERN_INFO "md: %s switched to read-only mode.\n",
 			mdname(mddev));
 	err = 0;
+	md_new_event(mddev);
 out:
 	return err;
 }
@@ -2668,12 +3156,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 		if (info->state & (1<<MD_DISK_WRITEMOSTLY))
 			set_bit(WriteMostly, &rdev->flags);
 
-		err = bind_rdev_to_array(rdev, mddev);
-		if (err) {
-			export_rdev(rdev);
-			return err;
-		}
-
 		if (!mddev->persistent) {
 			printk(KERN_INFO "md: nonpersistent superblock ...\n");
 			rdev->sb_offset = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
@@ -2681,8 +3163,11 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
 			rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
 		rdev->size = calc_dev_size(rdev, mddev->chunk_size);
 
-		if (!mddev->size || (mddev->size > rdev->size))
-			mddev->size = rdev->size;
+		err = bind_rdev_to_array(rdev, mddev);
+		if (err) {
+			export_rdev(rdev);
+			return err;
+		}
 	}
 
 	return 0;
@@ -2705,6 +3190,7 @@ static int hot_remove_disk(mddev_t * mddev, dev_t dev)
 
 	kick_rdev_from_array(rdev);
 	md_update_sb(mddev);
+	md_new_event(mddev);
 
 	return 0;
 busy:
@@ -2753,15 +3239,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
 	size = calc_dev_size(rdev, mddev->chunk_size);
 	rdev->size = size;
 
-	if (size < mddev->size) {
-		printk(KERN_WARNING 
-			"%s: disk size %llu blocks < array size %llu\n",
-			mdname(mddev), (unsigned long long)size,
-			(unsigned long long)mddev->size);
-		err = -ENOSPC;
-		goto abort_export;
-	}
-
 	if (test_bit(Faulty, &rdev->flags)) {
 		printk(KERN_WARNING 
 			"md: can not hot-add faulty %s disk to %s!\n",
@@ -2771,7 +3248,9 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
 	}
 	clear_bit(In_sync, &rdev->flags);
 	rdev->desc_nr = -1;
-	bind_rdev_to_array(rdev, mddev);
+	err = bind_rdev_to_array(rdev, mddev);
+	if (err)
+		goto abort_export;
 
 	/*
 	 * The rest should better be atomic, we can have disk failures
@@ -2795,7 +3274,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
 	 */
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
-
+	md_new_event(mddev);
 	return 0;
 
 abort_unbind_export:
@@ -2942,6 +3421,81 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
 	return 0;
 }
 
+static int update_size(mddev_t *mddev, unsigned long size)
+{
+	mdk_rdev_t * rdev;
+	int rv;
+	struct list_head *tmp;
+
+	if (mddev->pers->resize == NULL)
+		return -EINVAL;
+	/* The "size" is the amount of each device that is used.
+	 * This can only make sense for arrays with redundancy.
+	 * linear and raid0 always use whatever space is available
+	 * We can only consider changing the size if no resync
+	 * or reconstruction is happening, and if the new size
+	 * is acceptable. It must fit before the sb_offset or,
+	 * if that is <data_offset, it must fit before the
+	 * size of each device.
+	 * If size is zero, we find the largest size that fits.
+	 */
+	if (mddev->sync_thread)
+		return -EBUSY;
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		sector_t avail;
+		int fit = (size == 0);
+		if (rdev->sb_offset > rdev->data_offset)
+			avail = (rdev->sb_offset*2) - rdev->data_offset;
+		else
+			avail = get_capacity(rdev->bdev->bd_disk)
+				- rdev->data_offset;
+		if (fit && (size == 0 || size > avail/2))
+			size = avail/2;
+		if (avail < ((sector_t)size << 1))
+			return -ENOSPC;
+	}
+	rv = mddev->pers->resize(mddev, (sector_t)size *2);
+	if (!rv) {
+		struct block_device *bdev;
+
+		bdev = bdget_disk(mddev->gendisk, 0);
+		if (bdev) {
+			down(&bdev->bd_inode->i_sem);
+			i_size_write(bdev->bd_inode, mddev->array_size << 10);
+			up(&bdev->bd_inode->i_sem);
+			bdput(bdev);
+		}
+	}
+	return rv;
+}
+
+static int update_raid_disks(mddev_t *mddev, int raid_disks)
+{
+	int rv;
+	/* change the number of raid disks */
+	if (mddev->pers->reshape == NULL)
+		return -EINVAL;
+	if (raid_disks <= 0 ||
+	    raid_disks >= mddev->max_disks)
+		return -EINVAL;
+	if (mddev->sync_thread)
+		return -EBUSY;
+	rv = mddev->pers->reshape(mddev, raid_disks);
+	if (!rv) {
+		struct block_device *bdev;
+
+		bdev = bdget_disk(mddev->gendisk, 0);
+		if (bdev) {
+			down(&bdev->bd_inode->i_sem);
+			i_size_write(bdev->bd_inode, mddev->array_size << 10);
+			up(&bdev->bd_inode->i_sem);
+			bdput(bdev);
+		}
+	}
+	return rv;
+}
+
+
 /*
  * update_array_info is used to change the configuration of an
  * on-line array.
@@ -2990,71 +3544,12 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
 		else
 			return mddev->pers->reconfig(mddev, info->layout, -1);
 	}
-	if (mddev->size != info->size) {
-		mdk_rdev_t * rdev;
-		struct list_head *tmp;
-		if (mddev->pers->resize == NULL)
-			return -EINVAL;
-		/* The "size" is the amount of each device that is used.
-		 * This can only make sense for arrays with redundancy.
-		 * linear and raid0 always use whatever space is available
-		 * We can only consider changing the size if no resync
-		 * or reconstruction is happening, and if the new size
-		 * is acceptable. It must fit before the sb_offset or,
-		 * if that is <data_offset, it must fit before the
-		 * size of each device.
-		 * If size is zero, we find the largest size that fits.
-		 */
-		if (mddev->sync_thread)
-			return -EBUSY;
-		ITERATE_RDEV(mddev,rdev,tmp) {
-			sector_t avail;
-			int fit = (info->size == 0);
-			if (rdev->sb_offset > rdev->data_offset)
-				avail = (rdev->sb_offset*2) - rdev->data_offset;
-			else
-				avail = get_capacity(rdev->bdev->bd_disk)
-					- rdev->data_offset;
-			if (fit && (info->size == 0 || info->size > avail/2))
-				info->size = avail/2;
-			if (avail < ((sector_t)info->size << 1))
-				return -ENOSPC;
-		}
-		rv = mddev->pers->resize(mddev, (sector_t)info->size *2);
-		if (!rv) {
-			struct block_device *bdev;
-
-			bdev = bdget_disk(mddev->gendisk, 0);
-			if (bdev) {
-				down(&bdev->bd_inode->i_sem);
-				i_size_write(bdev->bd_inode, mddev->array_size << 10);
-				up(&bdev->bd_inode->i_sem);
-				bdput(bdev);
-			}
-		}
-	}
-	if (mddev->raid_disks    != info->raid_disks) {
-		/* change the number of raid disks */
-		if (mddev->pers->reshape == NULL)
-			return -EINVAL;
-		if (info->raid_disks <= 0 ||
-		    info->raid_disks >= mddev->max_disks)
-			return -EINVAL;
-		if (mddev->sync_thread)
-			return -EBUSY;
-		rv = mddev->pers->reshape(mddev, info->raid_disks);
-		if (!rv) {
-			struct block_device *bdev;
-
-			bdev = bdget_disk(mddev->gendisk, 0);
-			if (bdev) {
-				down(&bdev->bd_inode->i_sem);
-				i_size_write(bdev->bd_inode, mddev->array_size << 10);
-				up(&bdev->bd_inode->i_sem);
-				bdput(bdev);
-			}
-		}
-	}
+	if (mddev->size != info->size)
+		rv = update_size(mddev, info->size);
+
+	if (mddev->raid_disks    != info->raid_disks)
+		rv = update_raid_disks(mddev, info->raid_disks);
+
 	if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
 		if (mddev->pers->quiesce == NULL)
 			return -EINVAL;
@@ -3476,11 +3971,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
 {
 	mdk_thread_t *thread;
 
-	thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL);
+	thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL);
 	if (!thread)
 		return NULL;
 
-	memset(thread, 0, sizeof(mdk_thread_t));
 	init_waitqueue_head(&thread->wqueue);
 
 	thread->run = run;
@@ -3524,6 +4018,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
+	md_new_event(mddev);
 }
 
 /* seq_file implementation /proc/mdstat */
@@ -3664,24 +4159,29 @@ static void md_seq_stop(struct seq_file *seq, void *v)
 		mddev_put(mddev);
 }
 
+struct mdstat_info {
+	int event;
+};
+
 static int md_seq_show(struct seq_file *seq, void *v)
 {
 	mddev_t *mddev = v;
 	sector_t size;
 	struct list_head *tmp2;
 	mdk_rdev_t *rdev;
-	int i;
+	struct mdstat_info *mi = seq->private;
 	struct bitmap *bitmap;
 
 	if (v == (void*)1) {
+		struct mdk_personality *pers;
 		seq_printf(seq, "Personalities : ");
 		spin_lock(&pers_lock);
-		for (i = 0; i < MAX_PERSONALITY; i++)
-			if (pers[i])
-				seq_printf(seq, "[%s] ", pers[i]->name);
+		list_for_each_entry(pers, &pers_list, list)
+			seq_printf(seq, "[%s] ", pers->name);
 
 		spin_unlock(&pers_lock);
 		seq_printf(seq, "\n");
+		mi->event = atomic_read(&md_event_count);
 		return 0;
 	}
 	if (v == (void*)2) {
@@ -3790,47 +4290,68 @@ static struct seq_operations md_seq_ops = {
 static int md_seq_open(struct inode *inode, struct file *file)
 {
 	int error;
+	struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
+	if (mi == NULL)
+		return -ENOMEM;
 
 	error = seq_open(file, &md_seq_ops);
+	if (error)
+		kfree(mi);
+	else {
+		struct seq_file *p = file->private_data;
+		p->private = mi;
+		mi->event = atomic_read(&md_event_count);
+	}
 	return error;
 }
 
+static int md_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *m = file->private_data;
+	struct mdstat_info *mi = m->private;
+	m->private = NULL;
+	kfree(mi);
+	return seq_release(inode, file);
+}
+
+static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
+{
+	struct seq_file *m = filp->private_data;
+	struct mdstat_info *mi = m->private;
+	int mask;
+
+	poll_wait(filp, &md_event_waiters, wait);
+
+	/* always allow read */
+	mask = POLLIN | POLLRDNORM;
+
+	if (mi->event != atomic_read(&md_event_count))
+		mask |= POLLERR | POLLPRI;
+	return mask;
+}
+
 static struct file_operations md_seq_fops = {
 	.open           = md_seq_open,
 	.read           = seq_read,
 	.llseek         = seq_lseek,
-	.release	= seq_release,
+	.release	= md_seq_release,
+	.poll		= mdstat_poll,
 };
 
-int register_md_personality(int pnum, mdk_personality_t *p)
+int register_md_personality(struct mdk_personality *p)
 {
-	if (pnum >= MAX_PERSONALITY) {
-		printk(KERN_ERR
-		       "md: tried to install personality %s as nr %d, but max is %lu\n",
-		       p->name, pnum, MAX_PERSONALITY-1);
-		return -EINVAL;
-	}
-
 	spin_lock(&pers_lock);
-	if (pers[pnum]) {
-		spin_unlock(&pers_lock);
-		return -EBUSY;
-	}
-
-	pers[pnum] = p;
-	printk(KERN_INFO "md: %s personality registered as nr %d\n", p->name, pnum);
+	list_add_tail(&p->list, &pers_list);
+	printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
 	spin_unlock(&pers_lock);
 	return 0;
 }
 
-int unregister_md_personality(int pnum)
+int unregister_md_personality(struct mdk_personality *p)
 {
-	if (pnum >= MAX_PERSONALITY)
-		return -EINVAL;
-
-	printk(KERN_INFO "md: %s personality unregistered\n", pers[pnum]->name);
+	printk(KERN_INFO "md: %s personality unregistered\n", p->name);
 	spin_lock(&pers_lock);
-	pers[pnum] = NULL;
+	list_del_init(&p->list);
 	spin_unlock(&pers_lock);
 	return 0;
 }
@@ -4012,10 +4533,10 @@ static void md_do_sync(mddev_t *mddev)
 
 	printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
 	printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
-		" %d KB/sec/disc.\n", sysctl_speed_limit_min);
+		" %d KB/sec/disc.\n", speed_min(mddev));
 	printk(KERN_INFO "md: using maximum available idle IO bandwidth "
 	       "(but not more than %d KB/sec) for reconstruction.\n",
-	       sysctl_speed_limit_max);
+	       speed_max(mddev));
 
 	is_mddev_idle(mddev); /* this also initializes IO event counters */
 	/* we don't use the checkpoint if there's a bitmap */
@@ -4056,7 +4577,7 @@ static void md_do_sync(mddev_t *mddev)
 
 		skipped = 0;
 		sectors = mddev->pers->sync_request(mddev, j, &skipped,
-					    currspeed < sysctl_speed_limit_min);
+					    currspeed < speed_min(mddev));
 		if (sectors == 0) {
 			set_bit(MD_RECOVERY_ERR, &mddev->recovery);
 			goto out;
@@ -4069,7 +4590,11 @@ static void md_do_sync(mddev_t *mddev)
 
 		j += sectors;
 		if (j>1) mddev->curr_resync = j;
-
+		if (last_check == 0)
+			/* this is the earliers that rebuilt will be
+			 * visible in /proc/mdstat
+			 */
+			md_new_event(mddev);
 
 		if (last_check + window > io_sectors || j == max_sectors)
 			continue;
@@ -4117,8 +4642,8 @@ static void md_do_sync(mddev_t *mddev)
 		currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
 			/((jiffies-mddev->resync_mark)/HZ +1) +1;
 
-		if (currspeed > sysctl_speed_limit_min) {
-			if ((currspeed > sysctl_speed_limit_max) ||
+		if (currspeed > speed_min(mddev)) {
+			if ((currspeed > speed_max(mddev)) ||
 					!is_mddev_idle(mddev)) {
 				msleep(500);
 				goto repeat;
@@ -4255,6 +4780,7 @@ void md_check_recovery(mddev_t *mddev)
 			mddev->recovery = 0;
 			/* flag recovery needed just to double check */
 			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+			md_new_event(mddev);
 			goto unlock;
 		}
 		/* Clear some bits that don't mean anything, but
@@ -4292,6 +4818,7 @@ void md_check_recovery(mddev_t *mddev)
 						sprintf(nm, "rd%d", rdev->raid_disk);
 						sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
 						spares++;
+						md_new_event(mddev);
 					} else
 						break;
 				}
@@ -4324,9 +4851,9 @@ void md_check_recovery(mddev_t *mddev)
 					mdname(mddev));
 				/* leave the spares where they are, it shouldn't hurt */
 				mddev->recovery = 0;
-			} else {
+			} else
 				md_wakeup_thread(mddev->sync_thread);
-			}
+			md_new_event(mddev);
 		}
 	unlock:
 		mddev_unlock(mddev);
@@ -4503,12 +5030,14 @@ static int set_ro(const char *val, struct kernel_param *kp)
 	int num = simple_strtoul(val, &e, 10);
 	if (*val && (*e == '\0' || *e == '\n')) {
 		start_readonly = num;
-		return 0;;
+		return 0;
 	}
 	return -EINVAL;
 }
 
 module_param_call(start_ro, set_ro, get_ro, NULL, 0600);
+module_param(start_dirty_degraded, int, 0644);
+
 
 EXPORT_SYMBOL(register_md_personality);
 EXPORT_SYMBOL(unregister_md_personality);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 145cdc5ad00..e6aa309a66d 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -35,15 +35,10 @@
 #define	NR_RESERVED_BUFS	32
 
 
-static mdk_personality_t multipath_personality;
-
-
 static void *mp_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	struct multipath_bh *mpb;
-	mpb = kmalloc(sizeof(*mpb), gfp_flags);
-	if (mpb) 
-		memset(mpb, 0, sizeof(*mpb));
+	mpb = kzalloc(sizeof(*mpb), gfp_flags);
 	return mpb;
 }
 
@@ -444,7 +439,7 @@ static int multipath_run (mddev_t *mddev)
 	 * should be freed in multipath_stop()]
 	 */
 
-	conf = kmalloc(sizeof(multipath_conf_t), GFP_KERNEL);
+	conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
 	mddev->private = conf;
 	if (!conf) {
 		printk(KERN_ERR 
@@ -452,9 +447,8 @@ static int multipath_run (mddev_t *mddev)
 			mdname(mddev));
 		goto out;
 	}
-	memset(conf, 0, sizeof(*conf));
 
-	conf->multipaths = kmalloc(sizeof(struct multipath_info)*mddev->raid_disks,
+	conf->multipaths = kzalloc(sizeof(struct multipath_info)*mddev->raid_disks,
 				   GFP_KERNEL);
 	if (!conf->multipaths) {
 		printk(KERN_ERR 
@@ -462,7 +456,6 @@ static int multipath_run (mddev_t *mddev)
 			mdname(mddev));
 		goto out_free_conf;
 	}
-	memset(conf->multipaths, 0, sizeof(struct multipath_info)*mddev->raid_disks);
 
 	conf->working_disks = 0;
 	ITERATE_RDEV(mddev,rdev,tmp) {
@@ -557,9 +550,10 @@ static int multipath_stop (mddev_t *mddev)
 	return 0;
 }
 
-static mdk_personality_t multipath_personality=
+static struct mdk_personality multipath_personality =
 {
 	.name		= "multipath",
+	.level		= LEVEL_MULTIPATH,
 	.owner		= THIS_MODULE,
 	.make_request	= multipath_make_request,
 	.run		= multipath_run,
@@ -572,15 +566,17 @@ static mdk_personality_t multipath_personality=
 
 static int __init multipath_init (void)
 {
-	return register_md_personality (MULTIPATH, &multipath_personality);
+	return register_md_personality (&multipath_personality);
 }
 
 static void __exit multipath_exit (void)
 {
-	unregister_md_personality (MULTIPATH);
+	unregister_md_personality (&multipath_personality);
 }
 
 module_init(multipath_init);
 module_exit(multipath_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
+MODULE_ALIAS("md-multipath");
+MODULE_ALIAS("md-level--4");
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index fece3277c2a..abbca150202 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -113,21 +113,16 @@ static int create_strip_zones (mddev_t *mddev)
 	}
 	printk("raid0: FINAL %d zones\n", conf->nr_strip_zones);
 
-	conf->strip_zone = kmalloc(sizeof(struct strip_zone)*
+	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
 				conf->nr_strip_zones, GFP_KERNEL);
 	if (!conf->strip_zone)
 		return 1;
-	conf->devlist = kmalloc(sizeof(mdk_rdev_t*)*
+	conf->devlist = kzalloc(sizeof(mdk_rdev_t*)*
 				conf->nr_strip_zones*mddev->raid_disks,
 				GFP_KERNEL);
 	if (!conf->devlist)
 		return 1;
 
-	memset(conf->strip_zone, 0,sizeof(struct strip_zone)*
-				   conf->nr_strip_zones);
-	memset(conf->devlist, 0,
-	       sizeof(mdk_rdev_t*) * conf->nr_strip_zones * mddev->raid_disks);
-
 	/* The first zone must contain all devices, so here we check that
 	 * there is a proper alignment of slots to devices and find them all
 	 */
@@ -280,7 +275,11 @@ static int raid0_run (mddev_t *mddev)
 	mdk_rdev_t *rdev;
 	struct list_head *tmp;
 
-	printk("%s: setting max_sectors to %d, segment boundary to %d\n",
+	if (mddev->chunk_size == 0) {
+		printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
+		return -EINVAL;
+	}
+	printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n",
 	       mdname(mddev),
 	       mddev->chunk_size >> 9,
 	       (mddev->chunk_size>>1)-1);
@@ -361,7 +360,7 @@ static int raid0_run (mddev_t *mddev)
 	 * chunksize should be used in that case.
 	 */
 	{
-		int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_CACHE_SIZE;
+		int stripe = mddev->raid_disks * mddev->chunk_size / PAGE_SIZE;
 		if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
 			mddev->queue->backing_dev_info.ra_pages = 2* stripe;
 	}
@@ -512,9 +511,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev)
 	return;
 }
 
-static mdk_personality_t raid0_personality=
+static struct mdk_personality raid0_personality=
 {
 	.name		= "raid0",
+	.level		= 0,
 	.owner		= THIS_MODULE,
 	.make_request	= raid0_make_request,
 	.run		= raid0_run,
@@ -524,15 +524,17 @@ static mdk_personality_t raid0_personality=
 
 static int __init raid0_init (void)
 {
-	return register_md_personality (RAID0, &raid0_personality);
+	return register_md_personality (&raid0_personality);
 }
 
 static void raid0_exit (void)
 {
-	unregister_md_personality (RAID0);
+	unregister_md_personality (&raid0_personality);
 }
 
 module_init(raid0_init);
 module_exit(raid0_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-2"); /* RAID0 */
+MODULE_ALIAS("md-raid0");
+MODULE_ALIAS("md-level-0");
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 229d7b20429..a06ff91f27e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -47,10 +47,11 @@
  */
 #define	NR_RAID1_BIOS 256
 
-static mdk_personality_t raid1_personality;
 
 static void unplug_slaves(mddev_t *mddev);
 
+static void allow_barrier(conf_t *conf);
+static void lower_barrier(conf_t *conf);
 
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -59,10 +60,8 @@ static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
 	int size = offsetof(r1bio_t, bios[pi->raid_disks]);
 
 	/* allocate a r1bio with room for raid_disks entries in the bios array */
-	r1_bio = kmalloc(size, gfp_flags);
-	if (r1_bio)
-		memset(r1_bio, 0, size);
-	else
+	r1_bio = kzalloc(size, gfp_flags);
+	if (!r1_bio)
 		unplug_slaves(pi->mddev);
 
 	return r1_bio;
@@ -104,15 +103,30 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 	}
 	/*
 	 * Allocate RESYNC_PAGES data pages and attach them to
-	 * the first bio;
+	 * the first bio.
+	 * If this is a user-requested check/repair, allocate
+	 * RESYNC_PAGES for each bio.
 	 */
-	bio = r1_bio->bios[0];
-	for (i = 0; i < RESYNC_PAGES; i++) {
-		page = alloc_page(gfp_flags);
-		if (unlikely(!page))
-			goto out_free_pages;
-
-		bio->bi_io_vec[i].bv_page = page;
+	if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery))
+		j = pi->raid_disks;
+	else
+		j = 1;
+	while(j--) {
+		bio = r1_bio->bios[j];
+		for (i = 0; i < RESYNC_PAGES; i++) {
+			page = alloc_page(gfp_flags);
+			if (unlikely(!page))
+				goto out_free_pages;
+
+			bio->bi_io_vec[i].bv_page = page;
+		}
+	}
+	/* If not user-requests, copy the page pointers to all bios */
+	if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
+		for (i=0; i<RESYNC_PAGES ; i++)
+			for (j=1; j<pi->raid_disks; j++)
+				r1_bio->bios[j]->bi_io_vec[i].bv_page =
+					r1_bio->bios[0]->bi_io_vec[i].bv_page;
 	}
 
 	r1_bio->master_bio = NULL;
@@ -120,8 +134,10 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 	return r1_bio;
 
 out_free_pages:
-	for ( ; i > 0 ; i--)
-		__free_page(bio->bi_io_vec[i-1].bv_page);
+	for (i=0; i < RESYNC_PAGES ; i++)
+		for (j=0 ; j < pi->raid_disks; j++)
+			safe_put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
+	j = -1;
 out_free_bio:
 	while ( ++j < pi->raid_disks )
 		bio_put(r1_bio->bios[j]);
@@ -132,14 +148,16 @@ out_free_bio:
 static void r1buf_pool_free(void *__r1_bio, void *data)
 {
 	struct pool_info *pi = data;
-	int i;
+	int i,j;
 	r1bio_t *r1bio = __r1_bio;
-	struct bio *bio = r1bio->bios[0];
 
-	for (i = 0; i < RESYNC_PAGES; i++) {
-		__free_page(bio->bi_io_vec[i].bv_page);
-		bio->bi_io_vec[i].bv_page = NULL;
-	}
+	for (i = 0; i < RESYNC_PAGES; i++)
+		for (j = pi->raid_disks; j-- ;) {
+			if (j == 0 ||
+			    r1bio->bios[j]->bi_io_vec[i].bv_page !=
+			    r1bio->bios[0]->bi_io_vec[i].bv_page)
+				safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
+		}
 	for (i=0 ; i < pi->raid_disks; i++)
 		bio_put(r1bio->bios[i]);
 
@@ -152,7 +170,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
 
 	for (i = 0; i < conf->raid_disks; i++) {
 		struct bio **bio = r1_bio->bios + i;
-		if (*bio)
+		if (*bio && *bio != IO_BLOCKED)
 			bio_put(*bio);
 		*bio = NULL;
 	}
@@ -160,20 +178,13 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
 
 static inline void free_r1bio(r1bio_t *r1_bio)
 {
-	unsigned long flags;
-
 	conf_t *conf = mddev_to_conf(r1_bio->mddev);
 
 	/*
 	 * Wake up any possible resync thread that waits for the device
 	 * to go idle.
 	 */
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	if (!--conf->nr_pending) {
-		wake_up(&conf->wait_idle);
-		wake_up(&conf->wait_resume);
-	}
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	allow_barrier(conf);
 
 	put_all_bios(conf, r1_bio);
 	mempool_free(r1_bio, conf->r1bio_pool);
@@ -182,22 +193,17 @@ static inline void free_r1bio(r1bio_t *r1_bio)
 static inline void put_buf(r1bio_t *r1_bio)
 {
 	conf_t *conf = mddev_to_conf(r1_bio->mddev);
-	unsigned long flags;
+	int i;
 
-	mempool_free(r1_bio, conf->r1buf_pool);
+	for (i=0; i<conf->raid_disks; i++) {
+		struct bio *bio = r1_bio->bios[i];
+		if (bio->bi_end_io)
+			rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
+	}
 
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	if (!conf->barrier)
-		BUG();
-	--conf->barrier;
-	wake_up(&conf->wait_resume);
-	wake_up(&conf->wait_idle);
+	mempool_free(r1_bio, conf->r1buf_pool);
 
-	if (!--conf->nr_pending) {
-		wake_up(&conf->wait_idle);
-		wake_up(&conf->wait_resume);
-	}
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	lower_barrier(conf);
 }
 
 static void reschedule_retry(r1bio_t *r1_bio)
@@ -208,8 +214,10 @@ static void reschedule_retry(r1bio_t *r1_bio)
 
 	spin_lock_irqsave(&conf->device_lock, flags);
 	list_add(&r1_bio->retry_list, &conf->retry_list);
+	conf->nr_queued ++;
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
+	wake_up(&conf->wait_barrier);
 	md_wakeup_thread(mddev->thread);
 }
 
@@ -261,9 +269,9 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
-		md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
-	else
+	update_head_pos(mirror, r1_bio);
+
+	if (uptodate || conf->working_disks <= 1) {
 		/*
 		 * Set R1BIO_Uptodate in our master bio, so that
 		 * we will return a good error code for to the higher
@@ -273,16 +281,11 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
 		 * user-side. So if something waits for IO, then it will
 		 * wait for the 'master' bio.
 		 */
-		set_bit(R1BIO_Uptodate, &r1_bio->state);
-
-	update_head_pos(mirror, r1_bio);
+		if (uptodate)
+			set_bit(R1BIO_Uptodate, &r1_bio->state);
 
-	/*
-	 * we have only one bio on the read side
-	 */
-	if (uptodate)
 		raid_end_bio_io(r1_bio);
-	else {
+	} else {
 		/*
 		 * oops, read error:
 		 */
@@ -378,7 +381,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 			/* free extra copy of the data pages */
 			int i = bio->bi_vcnt;
 			while (i--)
-				__free_page(bio->bi_io_vec[i].bv_page);
+				safe_put_page(bio->bi_io_vec[i].bv_page);
 		}
 		/* clear the bitmap if all writes complete successfully */
 		bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
@@ -433,11 +436,13 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 		new_disk = 0;
 
 		for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
+		     r1_bio->bios[new_disk] == IO_BLOCKED ||
 		     !rdev || !test_bit(In_sync, &rdev->flags)
 			     || test_bit(WriteMostly, &rdev->flags);
 		     rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
 
-			if (rdev && test_bit(In_sync, &rdev->flags))
+			if (rdev && test_bit(In_sync, &rdev->flags) &&
+				r1_bio->bios[new_disk] != IO_BLOCKED)
 				wonly_disk = new_disk;
 
 			if (new_disk == conf->raid_disks - 1) {
@@ -451,11 +456,13 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 
 	/* make sure the disk is operational */
 	for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
+	     r1_bio->bios[new_disk] == IO_BLOCKED ||
 	     !rdev || !test_bit(In_sync, &rdev->flags) ||
 		     test_bit(WriteMostly, &rdev->flags);
 	     rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {
 
-		if (rdev && test_bit(In_sync, &rdev->flags))
+		if (rdev && test_bit(In_sync, &rdev->flags) &&
+		    r1_bio->bios[new_disk] != IO_BLOCKED)
 			wonly_disk = new_disk;
 
 		if (new_disk <= 0)
@@ -492,7 +499,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 
 		rdev = rcu_dereference(conf->mirrors[disk].rdev);
 
-		if (!rdev ||
+		if (!rdev || r1_bio->bios[disk] == IO_BLOCKED ||
 		    !test_bit(In_sync, &rdev->flags) ||
 		    test_bit(WriteMostly, &rdev->flags))
 			continue;
@@ -520,7 +527,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 			/* cannot risk returning a device that failed
 			 * before we inc'ed nr_pending
 			 */
-			atomic_dec(&rdev->nr_pending);
+			rdev_dec_pending(rdev, conf->mddev);
 			goto retry;
 		}
 		conf->next_seq_sect = this_sector + sectors;
@@ -593,42 +600,119 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
 	return ret;
 }
 
-/*
- * Throttle resync depth, so that we can both get proper overlapping of
- * requests, but are still able to handle normal requests quickly.
+/* Barriers....
+ * Sometimes we need to suspend IO while we do something else,
+ * either some resync/recovery, or reconfigure the array.
+ * To do this we raise a 'barrier'.
+ * The 'barrier' is a counter that can be raised multiple times
+ * to count how many activities are happening which preclude
+ * normal IO.
+ * We can only raise the barrier if there is no pending IO.
+ * i.e. if nr_pending == 0.
+ * We choose only to raise the barrier if no-one is waiting for the
+ * barrier to go down.  This means that as soon as an IO request
+ * is ready, no other operations which require a barrier will start
+ * until the IO request has had a chance.
+ *
+ * So: regular IO calls 'wait_barrier'.  When that returns there
+ *    is no backgroup IO happening,  It must arrange to call
+ *    allow_barrier when it has finished its IO.
+ * backgroup IO calls must call raise_barrier.  Once that returns
+ *    there is no normal IO happeing.  It must arrange to call
+ *    lower_barrier when the particular background IO completes.
  */
 #define RESYNC_DEPTH 32
 
-static void device_barrier(conf_t *conf, sector_t sect)
+static void raise_barrier(conf_t *conf)
 {
 	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
-			    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-	
-	if (!conf->barrier++) {
-		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-				    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-		if (conf->nr_pending)
-			BUG();
+
+	/* Wait until no block IO is waiting */
+	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
+			    conf->resync_lock,
+			    raid1_unplug(conf->mddev->queue));
+
+	/* block any new IO from starting */
+	conf->barrier++;
+
+	/* No wait for all pending IO to complete */
+	wait_event_lock_irq(conf->wait_barrier,
+			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+			    conf->resync_lock,
+			    raid1_unplug(conf->mddev->queue));
+
+	spin_unlock_irq(&conf->resync_lock);
+}
+
+static void lower_barrier(conf_t *conf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&conf->resync_lock, flags);
+	conf->barrier--;
+	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	wake_up(&conf->wait_barrier);
+}
+
+static void wait_barrier(conf_t *conf)
+{
+	spin_lock_irq(&conf->resync_lock);
+	if (conf->barrier) {
+		conf->nr_waiting++;
+		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
+				    conf->resync_lock,
+				    raid1_unplug(conf->mddev->queue));
+		conf->nr_waiting--;
 	}
-	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-	conf->next_resync = sect;
+	conf->nr_pending++;
+	spin_unlock_irq(&conf->resync_lock);
+}
+
+static void allow_barrier(conf_t *conf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&conf->resync_lock, flags);
+	conf->nr_pending--;
+	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	wake_up(&conf->wait_barrier);
+}
+
+static void freeze_array(conf_t *conf)
+{
+	/* stop syncio and normal IO and wait for everything to
+	 * go quite.
+	 * We increment barrier and nr_waiting, and then
+	 * wait until barrier+nr_pending match nr_queued+2
+	 */
+	spin_lock_irq(&conf->resync_lock);
+	conf->barrier++;
+	conf->nr_waiting++;
+	wait_event_lock_irq(conf->wait_barrier,
+			    conf->barrier+conf->nr_pending == conf->nr_queued+2,
+			    conf->resync_lock,
+			    raid1_unplug(conf->mddev->queue));
+	spin_unlock_irq(&conf->resync_lock);
+}
+static void unfreeze_array(conf_t *conf)
+{
+	/* reverse the effect of the freeze */
+	spin_lock_irq(&conf->resync_lock);
+	conf->barrier--;
+	conf->nr_waiting--;
+	wake_up(&conf->wait_barrier);
 	spin_unlock_irq(&conf->resync_lock);
 }
 
+
 /* duplicate the data pages for behind I/O */
 static struct page **alloc_behind_pages(struct bio *bio)
 {
 	int i;
 	struct bio_vec *bvec;
-	struct page **pages = kmalloc(bio->bi_vcnt * sizeof(struct page *),
+	struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page *),
 					GFP_NOIO);
 	if (unlikely(!pages))
 		goto do_sync_io;
 
-	memset(pages, 0, bio->bi_vcnt * sizeof(struct page *));
-
 	bio_for_each_segment(bvec, bio, i) {
 		pages[i] = alloc_page(GFP_NOIO);
 		if (unlikely(!pages[i]))
@@ -644,7 +728,7 @@ static struct page **alloc_behind_pages(struct bio *bio)
 do_sync_io:
 	if (pages)
 		for (i = 0; i < bio->bi_vcnt && pages[i]; i++)
-			__free_page(pages[i]);
+			put_page(pages[i]);
 	kfree(pages);
 	PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
 	return NULL;
@@ -678,10 +762,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	 */
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
-	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
-	conf->nr_pending++;
-	spin_unlock_irq(&conf->resync_lock);
+	wait_barrier(conf);
 
 	disk_stat_inc(mddev->gendisk, ios[rw]);
 	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -749,7 +830,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 		    !test_bit(Faulty, &rdev->flags)) {
 			atomic_inc(&rdev->nr_pending);
 			if (test_bit(Faulty, &rdev->flags)) {
-				atomic_dec(&rdev->nr_pending);
+				rdev_dec_pending(rdev, mddev);
 				r1_bio->bios[i] = NULL;
 			} else
 				r1_bio->bios[i] = bio;
@@ -909,13 +990,8 @@ static void print_conf(conf_t *conf)
 
 static void close_sync(conf_t *conf)
 {
-	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier,
-			    conf->resync_lock, 	raid1_unplug(conf->mddev->queue));
-	spin_unlock_irq(&conf->resync_lock);
-
-	if (conf->barrier) BUG();
-	if (waitqueue_active(&conf->wait_idle)) BUG();
+	wait_barrier(conf);
+	allow_barrier(conf);
 
 	mempool_destroy(conf->r1buf_pool);
 	conf->r1buf_pool = NULL;
@@ -1015,28 +1091,27 @@ abort:
 
 static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 {
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
-	conf_t *conf = mddev_to_conf(r1_bio->mddev);
+	int i;
 
 	if (bio->bi_size)
 		return 1;
 
-	if (r1_bio->bios[r1_bio->read_disk] != bio)
-		BUG();
-	update_head_pos(r1_bio->read_disk, r1_bio);
+	for (i=r1_bio->mddev->raid_disks; i--; )
+		if (r1_bio->bios[i] == bio)
+			break;
+	BUG_ON(i < 0);
+	update_head_pos(i, r1_bio);
 	/*
 	 * we have read a block, now it needs to be re-written,
 	 * or re-read if the read failed.
 	 * We don't do much here, just schedule handling by raid1d
 	 */
-	if (!uptodate) {
-		md_error(r1_bio->mddev,
-			 conf->mirrors[r1_bio->read_disk].rdev);
-	} else
+	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
-	rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
-	reschedule_retry(r1_bio);
+
+	if (atomic_dec_and_test(&r1_bio->remaining))
+		reschedule_retry(r1_bio);
 	return 0;
 }
 
@@ -1066,7 +1141,6 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
 		md_done_sync(mddev, r1_bio->sectors, uptodate);
 		put_buf(r1_bio);
 	}
-	rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
 	return 0;
 }
 
@@ -1079,34 +1153,173 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 
 	bio = r1_bio->bios[r1_bio->read_disk];
 
-/*
-	if (r1_bio->sector == 0) printk("First sync write startss\n");
-*/
-	/*
-	 * schedule writes
-	 */
+
+	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+		/* We have read all readable devices.  If we haven't
+		 * got the block, then there is no hope left.
+		 * If we have, then we want to do a comparison
+		 * and skip the write if everything is the same.
+		 * If any blocks failed to read, then we need to
+		 * attempt an over-write
+		 */
+		int primary;
+		if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
+			for (i=0; i<mddev->raid_disks; i++)
+				if (r1_bio->bios[i]->bi_end_io == end_sync_read)
+					md_error(mddev, conf->mirrors[i].rdev);
+
+			md_done_sync(mddev, r1_bio->sectors, 1);
+			put_buf(r1_bio);
+			return;
+		}
+		for (primary=0; primary<mddev->raid_disks; primary++)
+			if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
+			    test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+				r1_bio->bios[primary]->bi_end_io = NULL;
+				rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
+				break;
+			}
+		r1_bio->read_disk = primary;
+		for (i=0; i<mddev->raid_disks; i++)
+			if (r1_bio->bios[i]->bi_end_io == end_sync_read &&
+			    test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) {
+				int j;
+				int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
+				struct bio *pbio = r1_bio->bios[primary];
+				struct bio *sbio = r1_bio->bios[i];
+				for (j = vcnt; j-- ; )
+					if (memcmp(page_address(pbio->bi_io_vec[j].bv_page),
+						   page_address(sbio->bi_io_vec[j].bv_page),
+						   PAGE_SIZE))
+						break;
+				if (j >= 0)
+					mddev->resync_mismatches += r1_bio->sectors;
+				if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
+					sbio->bi_end_io = NULL;
+					rdev_dec_pending(conf->mirrors[i].rdev, mddev);
+				} else {
+					/* fixup the bio for reuse */
+					sbio->bi_vcnt = vcnt;
+					sbio->bi_size = r1_bio->sectors << 9;
+					sbio->bi_idx = 0;
+					sbio->bi_phys_segments = 0;
+					sbio->bi_hw_segments = 0;
+					sbio->bi_hw_front_size = 0;
+					sbio->bi_hw_back_size = 0;
+					sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
+					sbio->bi_flags |= 1 << BIO_UPTODATE;
+					sbio->bi_next = NULL;
+					sbio->bi_sector = r1_bio->sector +
+						conf->mirrors[i].rdev->data_offset;
+					sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+				}
+			}
+	}
 	if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
-		/*
-		 * There is no point trying a read-for-reconstruct as
-		 * reconstruct is about to be aborted
+		/* ouch - failed to read all of that.
+		 * Try some synchronous reads of other devices to get
+		 * good data, much like with normal read errors.  Only
+		 * read into the pages we already have so they we don't
+		 * need to re-issue the read request.
+		 * We don't need to freeze the array, because being in an
+		 * active sync request, there is no normal IO, and
+		 * no overlapping syncs.
 		 */
-		char b[BDEVNAME_SIZE];
-		printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error"
-			" for block %llu\n",
-			bdevname(bio->bi_bdev,b), 
-			(unsigned long long)r1_bio->sector);
-		md_done_sync(mddev, r1_bio->sectors, 0);
-		put_buf(r1_bio);
-		return;
+		sector_t sect = r1_bio->sector;
+		int sectors = r1_bio->sectors;
+		int idx = 0;
+
+		while(sectors) {
+			int s = sectors;
+			int d = r1_bio->read_disk;
+			int success = 0;
+			mdk_rdev_t *rdev;
+
+			if (s > (PAGE_SIZE>>9))
+				s = PAGE_SIZE >> 9;
+			do {
+				if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
+					rdev = conf->mirrors[d].rdev;
+					if (sync_page_io(rdev->bdev,
+							 sect + rdev->data_offset,
+							 s<<9,
+							 bio->bi_io_vec[idx].bv_page,
+							 READ)) {
+						success = 1;
+						break;
+					}
+				}
+				d++;
+				if (d == conf->raid_disks)
+					d = 0;
+			} while (!success && d != r1_bio->read_disk);
+
+			if (success) {
+				int start = d;
+				/* write it back and re-read */
+				set_bit(R1BIO_Uptodate, &r1_bio->state);
+				while (d != r1_bio->read_disk) {
+					if (d == 0)
+						d = conf->raid_disks;
+					d--;
+					if (r1_bio->bios[d]->bi_end_io != end_sync_read)
+						continue;
+					rdev = conf->mirrors[d].rdev;
+					atomic_add(s, &rdev->corrected_errors);
+					if (sync_page_io(rdev->bdev,
+							 sect + rdev->data_offset,
+							 s<<9,
+							 bio->bi_io_vec[idx].bv_page,
+							 WRITE) == 0)
+						md_error(mddev, rdev);
+				}
+				d = start;
+				while (d != r1_bio->read_disk) {
+					if (d == 0)
+						d = conf->raid_disks;
+					d--;
+					if (r1_bio->bios[d]->bi_end_io != end_sync_read)
+						continue;
+					rdev = conf->mirrors[d].rdev;
+					if (sync_page_io(rdev->bdev,
+							 sect + rdev->data_offset,
+							 s<<9,
+							 bio->bi_io_vec[idx].bv_page,
+							 READ) == 0)
+						md_error(mddev, rdev);
+				}
+			} else {
+				char b[BDEVNAME_SIZE];
+				/* Cannot read from anywhere, array is toast */
+				md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+				printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error"
+				       " for block %llu\n",
+				       bdevname(bio->bi_bdev,b),
+				       (unsigned long long)r1_bio->sector);
+				md_done_sync(mddev, r1_bio->sectors, 0);
+				put_buf(r1_bio);
+				return;
+			}
+			sectors -= s;
+			sect += s;
+			idx ++;
+		}
 	}
 
+	/*
+	 * schedule writes
+	 */
 	atomic_set(&r1_bio->remaining, 1);
 	for (i = 0; i < disks ; i++) {
 		wbio = r1_bio->bios[i];
-		if (wbio->bi_end_io != end_sync_write)
+		if (wbio->bi_end_io == NULL ||
+		    (wbio->bi_end_io == end_sync_read &&
+		     (i == r1_bio->read_disk ||
+		      !test_bit(MD_RECOVERY_SYNC, &mddev->recovery))))
 			continue;
 
-		atomic_inc(&conf->mirrors[i].rdev->nr_pending);
+		wbio->bi_rw = WRITE;
+		wbio->bi_end_io = end_sync_write;
 		atomic_inc(&r1_bio->remaining);
 		md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
 
@@ -1167,6 +1380,7 @@ static void raid1d(mddev_t *mddev)
 			break;
 		r1_bio = list_entry(head->prev, r1bio_t, retry_list);
 		list_del(head->prev);
+		conf->nr_queued--;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 
 		mddev = r1_bio->mddev;
@@ -1206,6 +1420,86 @@ static void raid1d(mddev_t *mddev)
 				}
 		} else {
 			int disk;
+
+			/* we got a read error. Maybe the drive is bad.  Maybe just
+			 * the block and we can fix it.
+			 * We freeze all other IO, and try reading the block from
+			 * other devices.  When we find one, we re-write
+			 * and check it that fixes the read error.
+			 * This is all done synchronously while the array is
+			 * frozen
+			 */
+			sector_t sect = r1_bio->sector;
+			int sectors = r1_bio->sectors;
+			freeze_array(conf);
+			if (mddev->ro == 0) while(sectors) {
+				int s = sectors;
+				int d = r1_bio->read_disk;
+				int success = 0;
+
+				if (s > (PAGE_SIZE>>9))
+					s = PAGE_SIZE >> 9;
+
+				do {
+					rdev = conf->mirrors[d].rdev;
+					if (rdev &&
+					    test_bit(In_sync, &rdev->flags) &&
+					    sync_page_io(rdev->bdev,
+							 sect + rdev->data_offset,
+							 s<<9,
+							 conf->tmppage, READ))
+						success = 1;
+					else {
+						d++;
+						if (d == conf->raid_disks)
+							d = 0;
+					}
+				} while (!success && d != r1_bio->read_disk);
+
+				if (success) {
+					/* write it back and re-read */
+					int start = d;
+					while (d != r1_bio->read_disk) {
+						if (d==0)
+							d = conf->raid_disks;
+						d--;
+						rdev = conf->mirrors[d].rdev;
+						atomic_add(s, &rdev->corrected_errors);
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
+									 sect + rdev->data_offset,
+									 s<<9, conf->tmppage, WRITE) == 0)
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+						}
+					}
+					d = start;
+					while (d != r1_bio->read_disk) {
+						if (d==0)
+							d = conf->raid_disks;
+						d--;
+						rdev = conf->mirrors[d].rdev;
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
+									 sect + rdev->data_offset,
+									 s<<9, conf->tmppage, READ) == 0)
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+						}
+					}
+				} else {
+					/* Cannot read from anywhere -- bye bye array */
+					md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+					break;
+				}
+				sectors -= s;
+				sect += s;
+			}
+
+			unfreeze_array(conf);
+
 			bio = r1_bio->bios[r1_bio->read_disk];
 			if ((disk=read_balance(conf, r1_bio)) == -1) {
 				printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
@@ -1214,7 +1508,8 @@ static void raid1d(mddev_t *mddev)
 				       (unsigned long long)r1_bio->sector);
 				raid_end_bio_io(r1_bio);
 			} else {
-				r1_bio->bios[r1_bio->read_disk] = NULL;
+				r1_bio->bios[r1_bio->read_disk] =
+					mddev->ro ? IO_BLOCKED : NULL;
 				r1_bio->read_disk = disk;
 				bio_put(bio);
 				bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
@@ -1269,14 +1564,13 @@ static int init_resync(conf_t *conf)
 static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
 {
 	conf_t *conf = mddev_to_conf(mddev);
-	mirror_info_t *mirror;
 	r1bio_t *r1_bio;
 	struct bio *bio;
 	sector_t max_sector, nr_sectors;
-	int disk;
+	int disk = -1;
 	int i;
-	int wonly;
-	int write_targets = 0;
+	int wonly = -1;
+	int write_targets = 0, read_targets = 0;
 	int sync_blocks;
 	int still_degraded = 0;
 
@@ -1317,55 +1611,35 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return sync_blocks;
 	}
 	/*
-	 * If there is non-resync activity waiting for us then
-	 * put in a delay to throttle resync.
+	 * If there is non-resync activity waiting for a turn,
+	 * and resync is going fast enough,
+	 * then let it though before starting on this new sync request.
 	 */
-	if (!go_faster && waitqueue_active(&conf->wait_resume))
+	if (!go_faster && conf->nr_waiting)
 		msleep_interruptible(1000);
-	device_barrier(conf, sector_nr + RESYNC_SECTORS);
-
-	/*
-	 * If reconstructing, and >1 working disc,
-	 * could dedicate one to rebuild and others to
-	 * service read requests ..
-	 */
-	disk = conf->last_used;
-	/* make sure disk is operational */
-	wonly = disk;
-	while (conf->mirrors[disk].rdev == NULL ||
-	       !test_bit(In_sync, &conf->mirrors[disk].rdev->flags) ||
-	       test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags)
-		) {
-		if (conf->mirrors[disk].rdev  &&
-		    test_bit(In_sync, &conf->mirrors[disk].rdev->flags))
-			wonly = disk;
-		if (disk <= 0)
-			disk = conf->raid_disks;
-		disk--;
-		if (disk == conf->last_used) {
-			disk = wonly;
-			break;
-		}
-	}
-	conf->last_used = disk;
-	atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
 
+	raise_barrier(conf);
 
-	mirror = conf->mirrors + disk;
+	conf->next_resync = sector_nr;
 
 	r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
-
-	spin_lock_irq(&conf->resync_lock);
-	conf->nr_pending++;
-	spin_unlock_irq(&conf->resync_lock);
+	rcu_read_lock();
+	/*
+	 * If we get a correctably read error during resync or recovery,
+	 * we might want to read from a different device.  So we
+	 * flag all drives that could conceivably be read from for READ,
+	 * and any others (which will be non-In_sync devices) for WRITE.
+	 * If a read fails, we try reading from something else for which READ
+	 * is OK.
+	 */
 
 	r1_bio->mddev = mddev;
 	r1_bio->sector = sector_nr;
 	r1_bio->state = 0;
 	set_bit(R1BIO_IsSync, &r1_bio->state);
-	r1_bio->read_disk = disk;
 
 	for (i=0; i < conf->raid_disks; i++) {
+		mdk_rdev_t *rdev;
 		bio = r1_bio->bios[i];
 
 		/* take from bio_init */
@@ -1380,35 +1654,49 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		bio->bi_end_io = NULL;
 		bio->bi_private = NULL;
 
-		if (i == disk) {
-			bio->bi_rw = READ;
-			bio->bi_end_io = end_sync_read;
-		} else if (conf->mirrors[i].rdev == NULL ||
-			   test_bit(Faulty, &conf->mirrors[i].rdev->flags)) {
+		rdev = rcu_dereference(conf->mirrors[i].rdev);
+		if (rdev == NULL ||
+			   test_bit(Faulty, &rdev->flags)) {
 			still_degraded = 1;
 			continue;
-		} else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
-			   sector_nr + RESYNC_SECTORS > mddev->recovery_cp   ||
-			   test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+		} else if (!test_bit(In_sync, &rdev->flags)) {
 			bio->bi_rw = WRITE;
 			bio->bi_end_io = end_sync_write;
 			write_targets ++;
-		} else
-			/* no need to read or write here */
-			continue;
-		bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset;
-		bio->bi_bdev = conf->mirrors[i].rdev->bdev;
+		} else {
+			/* may need to read from here */
+			bio->bi_rw = READ;
+			bio->bi_end_io = end_sync_read;
+			if (test_bit(WriteMostly, &rdev->flags)) {
+				if (wonly < 0)
+					wonly = i;
+			} else {
+				if (disk < 0)
+					disk = i;
+			}
+			read_targets++;
+		}
+		atomic_inc(&rdev->nr_pending);
+		bio->bi_sector = sector_nr + rdev->data_offset;
+		bio->bi_bdev = rdev->bdev;
 		bio->bi_private = r1_bio;
 	}
+	rcu_read_unlock();
+	if (disk < 0)
+		disk = wonly;
+	r1_bio->read_disk = disk;
+
+	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0)
+		/* extra read targets are also write targets */
+		write_targets += read_targets-1;
 
-	if (write_targets == 0) {
+	if (write_targets == 0 || read_targets == 0) {
 		/* There is nowhere to write, so all non-sync
 		 * drives must be failed - so we are finished
 		 */
 		sector_t rv = max_sector - sector_nr;
 		*skipped = 1;
 		put_buf(r1_bio);
-		rdev_dec_pending(conf->mirrors[disk].rdev, mddev);
 		return rv;
 	}
 
@@ -1436,10 +1724,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		for (i=0 ; i < conf->raid_disks; i++) {
 			bio = r1_bio->bios[i];
 			if (bio->bi_end_io) {
-				page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page;
+				page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
 				if (bio_add_page(bio, page, len, 0) == 0) {
 					/* stop here */
-					r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page;
+					bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
 					while (i > 0) {
 						i--;
 						bio = r1_bio->bios[i];
@@ -1459,12 +1747,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		sync_blocks -= (len>>9);
 	} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
  bio_full:
-	bio = r1_bio->bios[disk];
 	r1_bio->sectors = nr_sectors;
 
-	md_sync_acct(mirror->rdev->bdev, nr_sectors);
+	/* For a user-requested sync, we read all readable devices and do a
+	 * compare
+	 */
+	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+		atomic_set(&r1_bio->remaining, read_targets);
+		for (i=0; i<conf->raid_disks; i++) {
+			bio = r1_bio->bios[i];
+			if (bio->bi_end_io == end_sync_read) {
+				md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors);
+				generic_make_request(bio);
+			}
+		}
+	} else {
+		atomic_set(&r1_bio->remaining, 1);
+		bio = r1_bio->bios[r1_bio->read_disk];
+		md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev,
+			     nr_sectors);
+		generic_make_request(bio);
 
-	generic_make_request(bio);
+	}
 
 	return nr_sectors;
 }
@@ -1487,18 +1791,19 @@ static int run(mddev_t *mddev)
 	 * bookkeeping area. [whatever we allocate in run(),
 	 * should be freed in stop()]
 	 */
-	conf = kmalloc(sizeof(conf_t), GFP_KERNEL);
+	conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
 	mddev->private = conf;
 	if (!conf)
 		goto out_no_mem;
 
-	memset(conf, 0, sizeof(*conf));
-	conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, 
+	conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
 				 GFP_KERNEL);
 	if (!conf->mirrors)
 		goto out_no_mem;
 
-	memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);
+	conf->tmppage = alloc_page(GFP_KERNEL);
+	if (!conf->tmppage)
+		goto out_no_mem;
 
 	conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
 	if (!conf->poolinfo)
@@ -1542,8 +1847,7 @@ static int run(mddev_t *mddev)
 		mddev->recovery_cp = MaxSector;
 
 	spin_lock_init(&conf->resync_lock);
-	init_waitqueue_head(&conf->wait_idle);
-	init_waitqueue_head(&conf->wait_resume);
+	init_waitqueue_head(&conf->wait_barrier);
 
 	bio_list_init(&conf->pending_bio_list);
 	bio_list_init(&conf->flushing_bio_list);
@@ -1583,7 +1887,6 @@ static int run(mddev_t *mddev)
 		       mdname(mddev));
 		goto out_free_conf;
 	}
-	if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
 
 	printk(KERN_INFO 
 		"raid1: raid set %s active with %d out of %d mirrors\n",
@@ -1608,6 +1911,7 @@ out_free_conf:
 		if (conf->r1bio_pool)
 			mempool_destroy(conf->r1bio_pool);
 		kfree(conf->mirrors);
+		safe_put_page(conf->tmppage);
 		kfree(conf->poolinfo);
 		kfree(conf);
 		mddev->private = NULL;
@@ -1706,19 +2010,14 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 		kfree(newpoolinfo);
 		return -ENOMEM;
 	}
-	newmirrors = kmalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);
+	newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);
 	if (!newmirrors) {
 		kfree(newpoolinfo);
 		mempool_destroy(newpool);
 		return -ENOMEM;
 	}
-	memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks);
 
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier++;
-	wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-			    conf->resync_lock, raid1_unplug(mddev->queue));
-	spin_unlock_irq(&conf->resync_lock);
+	raise_barrier(conf);
 
 	/* ok, everything is stopped */
 	oldpool = conf->r1bio_pool;
@@ -1738,12 +2037,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 	conf->raid_disks = mddev->raid_disks = raid_disks;
 
 	conf->last_used = 0; /* just make sure it is in-range */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier--;
-	spin_unlock_irq(&conf->resync_lock);
-	wake_up(&conf->wait_resume);
-	wake_up(&conf->wait_idle);
-
+	lower_barrier(conf);
 
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
@@ -1758,33 +2052,19 @@ static void raid1_quiesce(mddev_t *mddev, int state)
 
 	switch(state) {
 	case 1:
-		spin_lock_irq(&conf->resync_lock);
-		conf->barrier++;
-		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-				    conf->resync_lock, raid1_unplug(mddev->queue));
-		spin_unlock_irq(&conf->resync_lock);
+		raise_barrier(conf);
 		break;
 	case 0:
-		spin_lock_irq(&conf->resync_lock);
-		conf->barrier--;
-		spin_unlock_irq(&conf->resync_lock);
-		wake_up(&conf->wait_resume);
-		wake_up(&conf->wait_idle);
+		lower_barrier(conf);
 		break;
 	}
-	if (mddev->thread) {
-		if (mddev->bitmap)
-			mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
-		else
-			mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
-		md_wakeup_thread(mddev->thread);
-	}
 }
 
 
-static mdk_personality_t raid1_personality =
+static struct mdk_personality raid1_personality =
 {
 	.name		= "raid1",
+	.level		= 1,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -1802,15 +2082,17 @@ static mdk_personality_t raid1_personality =
 
 static int __init raid_init(void)
 {
-	return register_md_personality(RAID1, &raid1_personality);
+	return register_md_personality(&raid1_personality);
 }
 
 static void raid_exit(void)
 {
-	unregister_md_personality(RAID1);
+	unregister_md_personality(&raid1_personality);
 }
 
 module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-3"); /* RAID1 */
+MODULE_ALIAS("md-raid1");
+MODULE_ALIAS("md-level-1");
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 713dc9c2c73..9e658e519a2 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -18,7 +18,9 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include "dm-bio-list.h"
 #include <linux/raid/raid10.h>
+#include <linux/raid/bitmap.h>
 
 /*
  * RAID10 provides a combination of RAID0 and RAID1 functionality.
@@ -47,6 +49,9 @@
 
 static void unplug_slaves(mddev_t *mddev);
 
+static void allow_barrier(conf_t *conf);
+static void lower_barrier(conf_t *conf);
+
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	conf_t *conf = data;
@@ -54,10 +59,8 @@ static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 	int size = offsetof(struct r10bio_s, devs[conf->copies]);
 
 	/* allocate a r10bio with room for raid_disks entries in the bios array */
-	r10_bio = kmalloc(size, gfp_flags);
-	if (r10_bio)
-		memset(r10_bio, 0, size);
-	else
+	r10_bio = kzalloc(size, gfp_flags);
+	if (!r10_bio)
 		unplug_slaves(conf->mddev);
 
 	return r10_bio;
@@ -129,10 +132,10 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 
 out_free_pages:
 	for ( ; i > 0 ; i--)
-		__free_page(bio->bi_io_vec[i-1].bv_page);
+		safe_put_page(bio->bi_io_vec[i-1].bv_page);
 	while (j--)
 		for (i = 0; i < RESYNC_PAGES ; i++)
-			__free_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
+			safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
 	j = -1;
 out_free_bio:
 	while ( ++j < nalloc )
@@ -152,7 +155,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
 		struct bio *bio = r10bio->devs[j].bio;
 		if (bio) {
 			for (i = 0; i < RESYNC_PAGES; i++) {
-				__free_page(bio->bi_io_vec[i].bv_page);
+				safe_put_page(bio->bi_io_vec[i].bv_page);
 				bio->bi_io_vec[i].bv_page = NULL;
 			}
 			bio_put(bio);
@@ -167,7 +170,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
 
 	for (i = 0; i < conf->copies; i++) {
 		struct bio **bio = & r10_bio->devs[i].bio;
-		if (*bio)
+		if (*bio && *bio != IO_BLOCKED)
 			bio_put(*bio);
 		*bio = NULL;
 	}
@@ -175,20 +178,13 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
 
 static inline void free_r10bio(r10bio_t *r10_bio)
 {
-	unsigned long flags;
-
 	conf_t *conf = mddev_to_conf(r10_bio->mddev);
 
 	/*
 	 * Wake up any possible resync thread that waits for the device
 	 * to go idle.
 	 */
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	if (!--conf->nr_pending) {
-		wake_up(&conf->wait_idle);
-		wake_up(&conf->wait_resume);
-	}
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	allow_barrier(conf);
 
 	put_all_bios(conf, r10_bio);
 	mempool_free(r10_bio, conf->r10bio_pool);
@@ -197,22 +193,10 @@ static inline void free_r10bio(r10bio_t *r10_bio)
 static inline void put_buf(r10bio_t *r10_bio)
 {
 	conf_t *conf = mddev_to_conf(r10_bio->mddev);
-	unsigned long flags;
 
 	mempool_free(r10_bio, conf->r10buf_pool);
 
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	if (!conf->barrier)
-		BUG();
-	--conf->barrier;
-	wake_up(&conf->wait_resume);
-	wake_up(&conf->wait_idle);
-
-	if (!--conf->nr_pending) {
-		wake_up(&conf->wait_idle);
-		wake_up(&conf->wait_resume);
-	}
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	lower_barrier(conf);
 }
 
 static void reschedule_retry(r10bio_t *r10_bio)
@@ -223,6 +207,7 @@ static void reschedule_retry(r10bio_t *r10_bio)
 
 	spin_lock_irqsave(&conf->device_lock, flags);
 	list_add(&r10_bio->retry_list, &conf->retry_list);
+	conf->nr_queued ++;
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
 	md_wakeup_thread(mddev->thread);
@@ -268,9 +253,9 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
-		md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
-	else
+	update_head_pos(slot, r10_bio);
+
+	if (uptodate) {
 		/*
 		 * Set R10BIO_Uptodate in our master bio, so that
 		 * we will return a good error code to the higher
@@ -281,15 +266,8 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int
 		 * wait for the 'master' bio.
 		 */
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
-
-	update_head_pos(slot, r10_bio);
-
-	/*
-	 * we have only one bio on the read side
-	 */
-	if (uptodate)
 		raid_end_bio_io(r10_bio);
-	else {
+	} else {
 		/*
 		 * oops, read error:
 		 */
@@ -322,9 +300,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
+	if (!uptodate) {
 		md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
-	else
+		/* an I/O failed, we can't clear the bitmap */
+		set_bit(R10BIO_Degraded, &r10_bio->state);
+	} else
 		/*
 		 * Set R10BIO_Uptodate in our master bio, so that
 		 * we will return a good error code for to the higher
@@ -344,6 +324,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
 	 * already.
 	 */
 	if (atomic_dec_and_test(&r10_bio->remaining)) {
+		/* clear the bitmap if all writes complete successfully */
+		bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
+				r10_bio->sectors,
+				!test_bit(R10BIO_Degraded, &r10_bio->state),
+				0);
 		md_write_end(r10_bio->mddev);
 		raid_end_bio_io(r10_bio);
 	}
@@ -502,8 +487,9 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 	rcu_read_lock();
 	/*
 	 * Check if we can balance. We can balance on the whole
-	 * device if no resync is going on, or below the resync window.
-	 * We take the first readable disk when above the resync window.
+	 * device if no resync is going on (recovery is ok), or below
+	 * the resync window. We take the first readable disk when
+	 * above the resync window.
 	 */
 	if (conf->mddev->recovery_cp < MaxSector
 	    && (this_sector + sectors >= conf->next_resync)) {
@@ -512,6 +498,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 		disk = r10_bio->devs[slot].devnum;
 
 		while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+		       r10_bio->devs[slot].bio == IO_BLOCKED ||
 		       !test_bit(In_sync, &rdev->flags)) {
 			slot++;
 			if (slot == conf->copies) {
@@ -529,6 +516,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 	slot = 0;
 	disk = r10_bio->devs[slot].devnum;
 	while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+	       r10_bio->devs[slot].bio == IO_BLOCKED ||
 	       !test_bit(In_sync, &rdev->flags)) {
 		slot ++;
 		if (slot == conf->copies) {
@@ -549,6 +537,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 
 
 		if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
+		    r10_bio->devs[nslot].bio == IO_BLOCKED ||
 		    !test_bit(In_sync, &rdev->flags))
 			continue;
 
@@ -607,7 +596,10 @@ static void unplug_slaves(mddev_t *mddev)
 
 static void raid10_unplug(request_queue_t *q)
 {
+	mddev_t *mddev = q->queuedata;
+
 	unplug_slaves(q->queuedata);
+	md_wakeup_thread(mddev->thread);
 }
 
 static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
@@ -640,27 +632,107 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
 	return ret;
 }
 
-/*
- * Throttle resync depth, so that we can both get proper overlapping of
- * requests, but are still able to handle normal requests quickly.
+/* Barriers....
+ * Sometimes we need to suspend IO while we do something else,
+ * either some resync/recovery, or reconfigure the array.
+ * To do this we raise a 'barrier'.
+ * The 'barrier' is a counter that can be raised multiple times
+ * to count how many activities are happening which preclude
+ * normal IO.
+ * We can only raise the barrier if there is no pending IO.
+ * i.e. if nr_pending == 0.
+ * We choose only to raise the barrier if no-one is waiting for the
+ * barrier to go down.  This means that as soon as an IO request
+ * is ready, no other operations which require a barrier will start
+ * until the IO request has had a chance.
+ *
+ * So: regular IO calls 'wait_barrier'.  When that returns there
+ *    is no backgroup IO happening,  It must arrange to call
+ *    allow_barrier when it has finished its IO.
+ * backgroup IO calls must call raise_barrier.  Once that returns
+ *    there is no normal IO happeing.  It must arrange to call
+ *    lower_barrier when the particular background IO completes.
  */
 #define RESYNC_DEPTH 32
 
-static void device_barrier(conf_t *conf, sector_t sect)
+static void raise_barrier(conf_t *conf, int force)
+{
+	BUG_ON(force && !conf->barrier);
+	spin_lock_irq(&conf->resync_lock);
+
+	/* Wait until no block IO is waiting (unless 'force') */
+	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
+			    conf->resync_lock,
+			    raid10_unplug(conf->mddev->queue));
+
+	/* block any new IO from starting */
+	conf->barrier++;
+
+	/* No wait for all pending IO to complete */
+	wait_event_lock_irq(conf->wait_barrier,
+			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+			    conf->resync_lock,
+			    raid10_unplug(conf->mddev->queue));
+
+	spin_unlock_irq(&conf->resync_lock);
+}
+
+static void lower_barrier(conf_t *conf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&conf->resync_lock, flags);
+	conf->barrier--;
+	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	wake_up(&conf->wait_barrier);
+}
+
+static void wait_barrier(conf_t *conf)
 {
 	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
-			    conf->resync_lock, unplug_slaves(conf->mddev));
-
-	if (!conf->barrier++) {
-		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-				    conf->resync_lock, unplug_slaves(conf->mddev));
-		if (conf->nr_pending)
-			BUG();
+	if (conf->barrier) {
+		conf->nr_waiting++;
+		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
+				    conf->resync_lock,
+				    raid10_unplug(conf->mddev->queue));
+		conf->nr_waiting--;
 	}
-	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, unplug_slaves(conf->mddev));
-	conf->next_resync = sect;
+	conf->nr_pending++;
+	spin_unlock_irq(&conf->resync_lock);
+}
+
+static void allow_barrier(conf_t *conf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&conf->resync_lock, flags);
+	conf->nr_pending--;
+	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	wake_up(&conf->wait_barrier);
+}
+
+static void freeze_array(conf_t *conf)
+{
+	/* stop syncio and normal IO and wait for everything to
+	 * go quiet.
+	 * We increment barrier and nr_waiting, and then
+	 * wait until barrier+nr_pending match nr_queued+2
+	 */
+	spin_lock_irq(&conf->resync_lock);
+	conf->barrier++;
+	conf->nr_waiting++;
+	wait_event_lock_irq(conf->wait_barrier,
+			    conf->barrier+conf->nr_pending == conf->nr_queued+2,
+			    conf->resync_lock,
+			    raid10_unplug(conf->mddev->queue));
+	spin_unlock_irq(&conf->resync_lock);
+}
+
+static void unfreeze_array(conf_t *conf)
+{
+	/* reverse the effect of the freeze */
+	spin_lock_irq(&conf->resync_lock);
+	conf->barrier--;
+	conf->nr_waiting--;
+	wake_up(&conf->wait_barrier);
 	spin_unlock_irq(&conf->resync_lock);
 }
 
@@ -674,6 +746,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	int i;
 	int chunk_sects = conf->chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
+	struct bio_list bl;
+	unsigned long flags;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
@@ -719,10 +793,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	 * thread has put up a bar for new requests.
 	 * Continue immediately if no resync is active currently.
 	 */
-	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
-	conf->nr_pending++;
-	spin_unlock_irq(&conf->resync_lock);
+	wait_barrier(conf);
 
 	disk_stat_inc(mddev->gendisk, ios[rw]);
 	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -734,6 +805,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 
 	r10_bio->mddev = mddev;
 	r10_bio->sector = bio->bi_sector;
+	r10_bio->state = 0;
 
 	if (rw == READ) {
 		/*
@@ -778,13 +850,16 @@ static int make_request(request_queue_t *q, struct bio * bio)
 		    !test_bit(Faulty, &rdev->flags)) {
 			atomic_inc(&rdev->nr_pending);
 			r10_bio->devs[i].bio = bio;
-		} else
+		} else {
 			r10_bio->devs[i].bio = NULL;
+			set_bit(R10BIO_Degraded, &r10_bio->state);
+		}
 	}
 	rcu_read_unlock();
 
-	atomic_set(&r10_bio->remaining, 1);
+	atomic_set(&r10_bio->remaining, 0);
 
+	bio_list_init(&bl);
 	for (i = 0; i < conf->copies; i++) {
 		struct bio *mbio;
 		int d = r10_bio->devs[i].devnum;
@@ -802,13 +877,14 @@ static int make_request(request_queue_t *q, struct bio * bio)
 		mbio->bi_private = r10_bio;
 
 		atomic_inc(&r10_bio->remaining);
-		generic_make_request(mbio);
+		bio_list_add(&bl, mbio);
 	}
 
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		md_write_end(mddev);
-		raid_end_bio_io(r10_bio);
-	}
+	bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
+	spin_lock_irqsave(&conf->device_lock, flags);
+	bio_list_merge(&conf->pending_bio_list, &bl);
+	blk_plug_device(mddev->queue);
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 
 	return 0;
 }
@@ -897,13 +973,8 @@ static void print_conf(conf_t *conf)
 
 static void close_sync(conf_t *conf)
 {
-	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier,
-			    conf->resync_lock, 	unplug_slaves(conf->mddev));
-	spin_unlock_irq(&conf->resync_lock);
-
-	if (conf->barrier) BUG();
-	if (waitqueue_active(&conf->wait_idle)) BUG();
+	wait_barrier(conf);
+	allow_barrier(conf);
 
 	mempool_destroy(conf->r10buf_pool);
 	conf->r10buf_pool = NULL;
@@ -971,7 +1042,12 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 	if (!enough(conf))
 		return 0;
 
-	for (mirror=0; mirror < mddev->raid_disks; mirror++)
+	if (rdev->saved_raid_disk >= 0 &&
+	    conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
+		mirror = rdev->saved_raid_disk;
+	else
+		mirror = 0;
+	for ( ; mirror < mddev->raid_disks; mirror++)
 		if ( !(p=conf->mirrors+mirror)->rdev) {
 
 			blk_queue_stack_limits(mddev->queue,
@@ -987,6 +1063,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
 			found = 1;
+			if (rdev->saved_raid_disk != mirror)
+				conf->fullsync = 1;
 			rcu_assign_pointer(p->rdev, rdev);
 			break;
 		}
@@ -1027,7 +1105,6 @@ abort:
 
 static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 {
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
 	conf_t *conf = mddev_to_conf(r10_bio->mddev);
 	int i,d;
@@ -1042,9 +1119,16 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 		BUG();
 	update_head_pos(i, r10_bio);
 	d = r10_bio->devs[i].devnum;
-	if (!uptodate)
-		md_error(r10_bio->mddev,
-			 conf->mirrors[d].rdev);
+
+	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+		set_bit(R10BIO_Uptodate, &r10_bio->state);
+	else {
+		atomic_add(r10_bio->sectors,
+			   &conf->mirrors[d].rdev->corrected_errors);
+		if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
+			md_error(r10_bio->mddev,
+				 conf->mirrors[d].rdev);
+	}
 
 	/* for reconstruct, we always reschedule after a read.
 	 * for resync, only after all reads
@@ -1132,23 +1216,32 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
 	fbio = r10_bio->devs[i].bio;
 
 	/* now find blocks with errors */
-	for (i=first+1 ; i < conf->copies ; i++) {
-		int vcnt, j, d;
+	for (i=0 ; i < conf->copies ; i++) {
+		int  j, d;
+		int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
 
-		if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
-			continue;
-		/* We know that the bi_io_vec layout is the same for
-		 * both 'first' and 'i', so we just compare them.
-		 * All vec entries are PAGE_SIZE;
-		 */
 		tbio = r10_bio->devs[i].bio;
-		vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
-		for (j = 0; j < vcnt; j++)
-			if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
-				   page_address(tbio->bi_io_vec[j].bv_page),
-				   PAGE_SIZE))
-				break;
-		if (j == vcnt)
+
+		if (tbio->bi_end_io != end_sync_read)
+			continue;
+		if (i == first)
+			continue;
+		if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+			/* We know that the bi_io_vec layout is the same for
+			 * both 'first' and 'i', so we just compare them.
+			 * All vec entries are PAGE_SIZE;
+			 */
+			for (j = 0; j < vcnt; j++)
+				if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
+					   page_address(tbio->bi_io_vec[j].bv_page),
+					   PAGE_SIZE))
+					break;
+			if (j == vcnt)
+				continue;
+			mddev->resync_mismatches += r10_bio->sectors;
+		}
+		if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+			/* Don't fix anything. */
 			continue;
 		/* Ok, we need to write this bio
 		 * First we need to fixup bv_offset, bv_len and
@@ -1227,7 +1320,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
 
 	atomic_inc(&conf->mirrors[d].rdev->nr_pending);
 	md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
-	generic_make_request(wbio);
+	if (test_bit(R10BIO_Uptodate, &r10_bio->state))
+		generic_make_request(wbio);
+	else
+		bio_endio(wbio, wbio->bi_size, -EIO);
 }
 
 
@@ -1254,10 +1350,31 @@ static void raid10d(mddev_t *mddev)
 	for (;;) {
 		char b[BDEVNAME_SIZE];
 		spin_lock_irqsave(&conf->device_lock, flags);
+
+		if (conf->pending_bio_list.head) {
+			bio = bio_list_get(&conf->pending_bio_list);
+			blk_remove_plug(mddev->queue);
+			spin_unlock_irqrestore(&conf->device_lock, flags);
+			/* flush any pending bitmap writes to disk before proceeding w/ I/O */
+			if (bitmap_unplug(mddev->bitmap) != 0)
+				printk("%s: bitmap file write failed!\n", mdname(mddev));
+
+			while (bio) { /* submit pending writes */
+				struct bio *next = bio->bi_next;
+				bio->bi_next = NULL;
+				generic_make_request(bio);
+				bio = next;
+			}
+			unplug = 1;
+
+			continue;
+		}
+
 		if (list_empty(head))
 			break;
 		r10_bio = list_entry(head->prev, r10bio_t, retry_list);
 		list_del(head->prev);
+		conf->nr_queued--;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 
 		mddev = r10_bio->mddev;
@@ -1270,8 +1387,96 @@ static void raid10d(mddev_t *mddev)
 			unplug = 1;
 		} else {
 			int mirror;
+			/* we got a read error. Maybe the drive is bad.  Maybe just
+			 * the block and we can fix it.
+			 * We freeze all other IO, and try reading the block from
+			 * other devices.  When we find one, we re-write
+			 * and check it that fixes the read error.
+			 * This is all done synchronously while the array is
+			 * frozen.
+			 */
+			int sect = 0; /* Offset from r10_bio->sector */
+			int sectors = r10_bio->sectors;
+			freeze_array(conf);
+			if (mddev->ro == 0) while(sectors) {
+				int s = sectors;
+				int sl = r10_bio->read_slot;
+				int success = 0;
+
+				if (s > (PAGE_SIZE>>9))
+					s = PAGE_SIZE >> 9;
+
+				do {
+					int d = r10_bio->devs[sl].devnum;
+					rdev = conf->mirrors[d].rdev;
+					if (rdev &&
+					    test_bit(In_sync, &rdev->flags) &&
+					    sync_page_io(rdev->bdev,
+							 r10_bio->devs[sl].addr +
+							 sect + rdev->data_offset,
+							 s<<9,
+							 conf->tmppage, READ))
+						success = 1;
+					else {
+						sl++;
+						if (sl == conf->copies)
+							sl = 0;
+					}
+				} while (!success && sl != r10_bio->read_slot);
+
+				if (success) {
+					int start = sl;
+					/* write it back and re-read */
+					while (sl != r10_bio->read_slot) {
+						int d;
+						if (sl==0)
+							sl = conf->copies;
+						sl--;
+						d = r10_bio->devs[sl].devnum;
+						rdev = conf->mirrors[d].rdev;
+						atomic_add(s, &rdev->corrected_errors);
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
+									 r10_bio->devs[sl].addr +
+									 sect + rdev->data_offset,
+									 s<<9, conf->tmppage, WRITE) == 0)
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+						}
+					}
+					sl = start;
+					while (sl != r10_bio->read_slot) {
+						int d;
+						if (sl==0)
+							sl = conf->copies;
+						sl--;
+						d = r10_bio->devs[sl].devnum;
+						rdev = conf->mirrors[d].rdev;
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
+									 r10_bio->devs[sl].addr +
+									 sect + rdev->data_offset,
+									 s<<9, conf->tmppage, READ) == 0)
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+						}
+					}
+				} else {
+					/* Cannot read from anywhere -- bye bye array */
+					md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev);
+					break;
+				}
+				sectors -= s;
+				sect += s;
+			}
+
+			unfreeze_array(conf);
+
 			bio = r10_bio->devs[r10_bio->read_slot].bio;
-			r10_bio->devs[r10_bio->read_slot].bio = NULL;
+			r10_bio->devs[r10_bio->read_slot].bio =
+				mddev->ro ? IO_BLOCKED : NULL;
 			bio_put(bio);
 			mirror = read_balance(conf, r10_bio);
 			if (mirror == -1) {
@@ -1360,6 +1565,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	sector_t max_sector, nr_sectors;
 	int disk;
 	int i;
+	int max_sync;
+	int sync_blocks;
 
 	sector_t sectors_skipped = 0;
 	int chunks_skipped = 0;
@@ -1373,6 +1580,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
 		max_sector = mddev->resync_max_sectors;
 	if (sector_nr >= max_sector) {
+		/* If we aborted, we need to abort the
+		 * sync on the 'current' bitmap chucks (there can
+		 * be several when recovering multiple devices).
+		 * as we may have started syncing it but not finished.
+		 * We can find the current address in
+		 * mddev->curr_resync, but for recovery,
+		 * we need to convert that to several
+		 * virtual addresses.
+		 */
+		if (mddev->curr_resync < max_sector) { /* aborted */
+			if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+				bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
+						&sync_blocks, 1);
+			else for (i=0; i<conf->raid_disks; i++) {
+				sector_t sect =
+					raid10_find_virt(conf, mddev->curr_resync, i);
+				bitmap_end_sync(mddev->bitmap, sect,
+						&sync_blocks, 1);
+			}
+		} else /* completed sync */
+			conf->fullsync = 0;
+
+		bitmap_close_sync(mddev->bitmap);
 		close_sync(conf);
 		*skipped = 1;
 		return sectors_skipped;
@@ -1395,9 +1625,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	 * If there is non-resync activity waiting for us then
 	 * put in a delay to throttle resync.
 	 */
-	if (!go_faster && waitqueue_active(&conf->wait_resume))
+	if (!go_faster && conf->nr_waiting)
 		msleep_interruptible(1000);
-	device_barrier(conf, sector_nr + RESYNC_SECTORS);
 
 	/* Again, very different code for resync and recovery.
 	 * Both must result in an r10bio with a list of bios that
@@ -1414,6 +1643,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	 * end_sync_write if we will want to write.
 	 */
 
+	max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
 	if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
 		/* recovery... the complicated one */
 		int i, j, k;
@@ -1422,14 +1652,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		for (i=0 ; i<conf->raid_disks; i++)
 			if (conf->mirrors[i].rdev &&
 			    !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) {
+				int still_degraded = 0;
 				/* want to reconstruct this device */
 				r10bio_t *rb2 = r10_bio;
+				sector_t sect = raid10_find_virt(conf, sector_nr, i);
+				int must_sync;
+				/* Unless we are doing a full sync, we only need
+				 * to recover the block if it is set in the bitmap
+				 */
+				must_sync = bitmap_start_sync(mddev->bitmap, sect,
+							      &sync_blocks, 1);
+				if (sync_blocks < max_sync)
+					max_sync = sync_blocks;
+				if (!must_sync &&
+				    !conf->fullsync) {
+					/* yep, skip the sync_blocks here, but don't assume
+					 * that there will never be anything to do here
+					 */
+					chunks_skipped = -1;
+					continue;
+				}
 
 				r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
-				spin_lock_irq(&conf->resync_lock);
-				conf->nr_pending++;
-				if (rb2) conf->barrier++;
-				spin_unlock_irq(&conf->resync_lock);
+				raise_barrier(conf, rb2 != NULL);
 				atomic_set(&r10_bio->remaining, 0);
 
 				r10_bio->master_bio = (struct bio*)rb2;
@@ -1437,8 +1682,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 					atomic_inc(&rb2->remaining);
 				r10_bio->mddev = mddev;
 				set_bit(R10BIO_IsRecover, &r10_bio->state);
-				r10_bio->sector = raid10_find_virt(conf, sector_nr, i);
+				r10_bio->sector = sect;
+
 				raid10_find_phys(conf, r10_bio);
+				/* Need to check if this section will still be
+				 * degraded
+				 */
+				for (j=0; j<conf->copies;j++) {
+					int d = r10_bio->devs[j].devnum;
+					if (conf->mirrors[d].rdev == NULL ||
+					    test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
+						still_degraded = 1;
+						break;
+					}
+				}
+				must_sync = bitmap_start_sync(mddev->bitmap, sect,
+							      &sync_blocks, still_degraded);
+
 				for (j=0; j<conf->copies;j++) {
 					int d = r10_bio->devs[j].devnum;
 					if (conf->mirrors[d].rdev &&
@@ -1498,14 +1758,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	} else {
 		/* resync. Schedule a read for every block at this virt offset */
 		int count = 0;
-		r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
 
-		spin_lock_irq(&conf->resync_lock);
-		conf->nr_pending++;
-		spin_unlock_irq(&conf->resync_lock);
+		if (!bitmap_start_sync(mddev->bitmap, sector_nr,
+				       &sync_blocks, mddev->degraded) &&
+		    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+			/* We can skip this block */
+			*skipped = 1;
+			return sync_blocks + sectors_skipped;
+		}
+		if (sync_blocks < max_sync)
+			max_sync = sync_blocks;
+		r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
 
 		r10_bio->mddev = mddev;
 		atomic_set(&r10_bio->remaining, 0);
+		raise_barrier(conf, 0);
+		conf->next_resync = sector_nr;
 
 		r10_bio->master_bio = NULL;
 		r10_bio->sector = sector_nr;
@@ -1558,6 +1826,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	}
 
 	nr_sectors = 0;
+	if (sector_nr + max_sync < max_sector)
+		max_sector = sector_nr + max_sync;
 	do {
 		struct page *page;
 		int len = PAGE_SIZE;
@@ -1632,11 +1902,11 @@ static int run(mddev_t *mddev)
 	int nc, fc;
 	sector_t stride, size;
 
-	if (mddev->level != 10) {
-		printk(KERN_ERR "raid10: %s: raid level not set correctly... (%d)\n",
-		       mdname(mddev), mddev->level);
-		goto out;
+	if (mddev->chunk_size == 0) {
+		printk(KERN_ERR "md/raid10: non-zero chunk size required.\n");
+		return -EINVAL;
 	}
+
 	nc = mddev->layout & 255;
 	fc = (mddev->layout >> 8) & 255;
 	if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
@@ -1650,22 +1920,24 @@ static int run(mddev_t *mddev)
 	 * bookkeeping area. [whatever we allocate in run(),
 	 * should be freed in stop()]
 	 */
-	conf = kmalloc(sizeof(conf_t), GFP_KERNEL);
+	conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
 	mddev->private = conf;
 	if (!conf) {
 		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
 			mdname(mddev));
 		goto out;
 	}
-	memset(conf, 0, sizeof(*conf));
-	conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks,
+	conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
 				 GFP_KERNEL);
 	if (!conf->mirrors) {
 		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
 		       mdname(mddev));
 		goto out_free_conf;
 	}
-	memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);
+
+	conf->tmppage = alloc_page(GFP_KERNEL);
+	if (!conf->tmppage)
+		goto out_free_conf;
 
 	conf->near_copies = nc;
 	conf->far_copies = fc;
@@ -1713,8 +1985,7 @@ static int run(mddev_t *mddev)
 	INIT_LIST_HEAD(&conf->retry_list);
 
 	spin_lock_init(&conf->resync_lock);
-	init_waitqueue_head(&conf->wait_idle);
-	init_waitqueue_head(&conf->wait_resume);
+	init_waitqueue_head(&conf->wait_barrier);
 
 	/* need to check that every block has at least one working mirror */
 	if (!enough(conf)) {
@@ -1763,7 +2034,7 @@ static int run(mddev_t *mddev)
 	 * maybe...
 	 */
 	{
-		int stripe = conf->raid_disks * mddev->chunk_size / PAGE_CACHE_SIZE;
+		int stripe = conf->raid_disks * mddev->chunk_size / PAGE_SIZE;
 		stripe /= conf->near_copies;
 		if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
 			mddev->queue->backing_dev_info.ra_pages = 2* stripe;
@@ -1776,6 +2047,7 @@ static int run(mddev_t *mddev)
 out_free_conf:
 	if (conf->r10bio_pool)
 		mempool_destroy(conf->r10bio_pool);
+	safe_put_page(conf->tmppage);
 	kfree(conf->mirrors);
 	kfree(conf);
 	mddev->private = NULL;
@@ -1798,10 +2070,31 @@ static int stop(mddev_t *mddev)
 	return 0;
 }
 
+static void raid10_quiesce(mddev_t *mddev, int state)
+{
+	conf_t *conf = mddev_to_conf(mddev);
+
+	switch(state) {
+	case 1:
+		raise_barrier(conf, 0);
+		break;
+	case 0:
+		lower_barrier(conf);
+		break;
+	}
+	if (mddev->thread) {
+		if (mddev->bitmap)
+			mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
+		else
+			mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
+		md_wakeup_thread(mddev->thread);
+	}
+}
 
-static mdk_personality_t raid10_personality =
+static struct mdk_personality raid10_personality =
 {
 	.name		= "raid10",
+	.level		= 10,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -1812,19 +2105,22 @@ static mdk_personality_t raid10_personality =
 	.hot_remove_disk= raid10_remove_disk,
 	.spare_active	= raid10_spare_active,
 	.sync_request	= sync_request,
+	.quiesce	= raid10_quiesce,
 };
 
 static int __init raid_init(void)
 {
-	return register_md_personality(RAID10, &raid10_personality);
+	return register_md_personality(&raid10_personality);
 }
 
 static void raid_exit(void)
 {
-	unregister_md_personality(RAID10);
+	unregister_md_personality(&raid10_personality);
 }
 
 module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-9"); /* RAID10 */
+MODULE_ALIAS("md-raid10");
+MODULE_ALIAS("md-level-10");
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index fafc4bc045f..54f4a9847e3 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -35,12 +35,10 @@
 #define STRIPE_SHIFT		(PAGE_SHIFT - 9)
 #define STRIPE_SECTORS		(STRIPE_SIZE>>9)
 #define	IO_THRESHOLD		1
-#define HASH_PAGES		1
-#define HASH_PAGES_ORDER	0
-#define NR_HASH			(HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *))
+#define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
 #define HASH_MASK		(NR_HASH - 1)
 
-#define stripe_hash(conf, sect)	((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])
+#define stripe_hash(conf, sect)	(&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]))
 
 /* bio's attached to a stripe+device for I/O are linked together in bi_sector
  * order without overlap.  There may be several bio's per stripe+device, and
@@ -113,29 +111,21 @@ static void release_stripe(struct stripe_head *sh)
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
-static void remove_hash(struct stripe_head *sh)
+static inline void remove_hash(struct stripe_head *sh)
 {
 	PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector);
 
-	if (sh->hash_pprev) {
-		if (sh->hash_next)
-			sh->hash_next->hash_pprev = sh->hash_pprev;
-		*sh->hash_pprev = sh->hash_next;
-		sh->hash_pprev = NULL;
-	}
+	hlist_del_init(&sh->hash);
 }
 
-static __inline__ void insert_hash(raid5_conf_t *conf, struct stripe_head *sh)
+static inline void insert_hash(raid5_conf_t *conf, struct stripe_head *sh)
 {
-	struct stripe_head **shp = &stripe_hash(conf, sh->sector);
+	struct hlist_head *hp = stripe_hash(conf, sh->sector);
 
 	PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector);
 
 	CHECK_DEVLOCK();
-	if ((sh->hash_next = *shp) != NULL)
-		(*shp)->hash_pprev = &sh->hash_next;
-	*shp = sh;
-	sh->hash_pprev = shp;
+	hlist_add_head(&sh->hash, hp);
 }
 
 
@@ -167,7 +157,7 @@ static void shrink_buffers(struct stripe_head *sh, int num)
 		if (!p)
 			continue;
 		sh->dev[i].page = NULL;
-		page_cache_release(p);
+		put_page(p);
 	}
 }
 
@@ -228,10 +218,11 @@ static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_i
 static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
 {
 	struct stripe_head *sh;
+	struct hlist_node *hn;
 
 	CHECK_DEVLOCK();
 	PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
-	for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next)
+	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
 		if (sh->sector == sector)
 			return sh;
 	PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
@@ -417,7 +408,7 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
 #endif
 		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
-			printk("R5: read error corrected!!\n");
+			printk(KERN_INFO "raid5: read error corrected!!\n");
 			clear_bit(R5_ReadError, &sh->dev[i].flags);
 			clear_bit(R5_ReWrite, &sh->dev[i].flags);
 		}
@@ -428,13 +419,14 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
 		clear_bit(R5_UPTODATE, &sh->dev[i].flags);
 		atomic_inc(&conf->disks[i].rdev->read_errors);
 		if (conf->mddev->degraded)
-			printk("R5: read error not correctable.\n");
+			printk(KERN_WARNING "raid5: read error not correctable.\n");
 		else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
 			/* Oh, no!!! */
-			printk("R5: read error NOT corrected!!\n");
+			printk(KERN_WARNING "raid5: read error NOT corrected!!\n");
 		else if (atomic_read(&conf->disks[i].rdev->read_errors)
 			 > conf->max_nr_stripes)
-			printk("raid5: Too many read errors, failing device.\n");
+			printk(KERN_WARNING
+			       "raid5: Too many read errors, failing device.\n");
 		else
 			retry = 1;
 		if (retry)
@@ -604,7 +596,7 @@ static sector_t raid5_compute_sector(sector_t r_sector, unsigned int raid_disks,
 			*dd_idx = (*pd_idx + 1 + *dd_idx) % raid_disks;
 			break;
 		default:
-			printk("raid5: unsupported algorithm %d\n",
+			printk(KERN_ERR "raid5: unsupported algorithm %d\n",
 				conf->algorithm);
 	}
 
@@ -645,7 +637,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 			i -= (sh->pd_idx + 1);
 			break;
 		default:
-			printk("raid5: unsupported algorithm %d\n",
+			printk(KERN_ERR "raid5: unsupported algorithm %d\n",
 				conf->algorithm);
 	}
 
@@ -654,7 +646,7 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
 
 	check = raid5_compute_sector (r_sector, raid_disks, data_disks, &dummy1, &dummy2, conf);
 	if (check != sh->sector || dummy1 != dd_idx || dummy2 != sh->pd_idx) {
-		printk("compute_blocknr: map not correct\n");
+		printk(KERN_ERR "compute_blocknr: map not correct\n");
 		return 0;
 	}
 	return r_sector;
@@ -737,7 +729,7 @@ static void compute_block(struct stripe_head *sh, int dd_idx)
 		if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
 			ptr[count++] = p;
 		else
-			printk("compute_block() %d, stripe %llu, %d"
+			printk(KERN_ERR "compute_block() %d, stripe %llu, %d"
 				" not present\n", dd_idx,
 				(unsigned long long)sh->sector, i);
 
@@ -960,11 +952,11 @@ static void handle_stripe(struct stripe_head *sh)
 	syncing = test_bit(STRIPE_SYNCING, &sh->state);
 	/* Now to look around and see what can be done */
 
+	rcu_read_lock();
 	for (i=disks; i--; ) {
 		mdk_rdev_t *rdev;
 		dev = &sh->dev[i];
 		clear_bit(R5_Insync, &dev->flags);
-		clear_bit(R5_Syncio, &dev->flags);
 
 		PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
 			i, dev->flags, dev->toread, dev->towrite, dev->written);
@@ -1003,9 +995,9 @@ static void handle_stripe(struct stripe_head *sh)
 				non_overwrite++;
 		}
 		if (dev->written) written++;
-		rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
+		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
-			/* The ReadError flag wil just be confusing now */
+			/* The ReadError flag will just be confusing now */
 			clear_bit(R5_ReadError, &dev->flags);
 			clear_bit(R5_ReWrite, &dev->flags);
 		}
@@ -1016,6 +1008,7 @@ static void handle_stripe(struct stripe_head *sh)
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
+	rcu_read_unlock();
 	PRINTK("locked=%d uptodate=%d to_read=%d"
 		" to_write=%d failed=%d failed_num=%d\n",
 		locked, uptodate, to_read, to_write, failed, failed_num);
@@ -1027,10 +1020,13 @@ static void handle_stripe(struct stripe_head *sh)
 			int bitmap_end = 0;
 
 			if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
-				mdk_rdev_t *rdev = conf->disks[i].rdev;
+				mdk_rdev_t *rdev;
+				rcu_read_lock();
+				rdev = rcu_dereference(conf->disks[i].rdev);
 				if (rdev && test_bit(In_sync, &rdev->flags))
 					/* multiple read failures in one stripe */
 					md_error(conf->mddev, rdev);
+				rcu_read_unlock();
 			}
 
 			spin_lock_irq(&conf->device_lock);
@@ -1179,9 +1175,6 @@ static void handle_stripe(struct stripe_head *sh)
 					locked++;
 					PRINTK("Reading block %d (sync=%d)\n", 
 						i, syncing);
-					if (syncing)
-						md_sync_acct(conf->disks[i].rdev->bdev,
-							     STRIPE_SECTORS);
 				}
 			}
 		}
@@ -1288,7 +1281,7 @@ static void handle_stripe(struct stripe_head *sh)
 	 * is available
 	 */
 	if (syncing && locked == 0 &&
-	    !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 1) {
+	    !test_bit(STRIPE_INSYNC, &sh->state)) {
 		set_bit(STRIPE_HANDLE, &sh->state);
 		if (failed == 0) {
 			char *pagea;
@@ -1306,27 +1299,25 @@ static void handle_stripe(struct stripe_head *sh)
 				if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
 					/* don't try to repair!! */
 					set_bit(STRIPE_INSYNC, &sh->state);
+				else {
+					compute_block(sh, sh->pd_idx);
+					uptodate++;
+				}
 			}
 		}
 		if (!test_bit(STRIPE_INSYNC, &sh->state)) {
+			/* either failed parity check, or recovery is happening */
 			if (failed==0)
 				failed_num = sh->pd_idx;
-			/* should be able to compute the missing block and write it to spare */
-			if (!test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)) {
-				if (uptodate+1 != disks)
-					BUG();
-				compute_block(sh, failed_num);
-				uptodate++;
-			}
-			if (uptodate != disks)
-				BUG();
 			dev = &sh->dev[failed_num];
+			BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
+			BUG_ON(uptodate != disks);
+
 			set_bit(R5_LOCKED, &dev->flags);
 			set_bit(R5_Wantwrite, &dev->flags);
 			clear_bit(STRIPE_DEGRADED, &sh->state);
 			locked++;
 			set_bit(STRIPE_INSYNC, &sh->state);
-			set_bit(R5_Syncio, &dev->flags);
 		}
 	}
 	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
@@ -1392,7 +1383,7 @@ static void handle_stripe(struct stripe_head *sh)
 		rcu_read_unlock();
  
 		if (rdev) {
-			if (test_bit(R5_Syncio, &sh->dev[i].flags))
+			if (syncing)
 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
 
 			bi->bi_bdev = rdev->bdev;
@@ -1409,6 +1400,9 @@ static void handle_stripe(struct stripe_head *sh)
 			bi->bi_io_vec[0].bv_offset = 0;
 			bi->bi_size = STRIPE_SIZE;
 			bi->bi_next = NULL;
+			if (rw == WRITE &&
+			    test_bit(R5_ReWrite, &sh->dev[i].flags))
+				atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
 			generic_make_request(bi);
 		} else {
 			if (rw == 1)
@@ -1822,21 +1816,21 @@ static int run(mddev_t *mddev)
 	struct list_head *tmp;
 
 	if (mddev->level != 5 && mddev->level != 4) {
-		printk("raid5: %s: raid level not set to 4/5 (%d)\n", mdname(mddev), mddev->level);
+		printk(KERN_ERR "raid5: %s: raid level not set to 4/5 (%d)\n",
+		       mdname(mddev), mddev->level);
 		return -EIO;
 	}
 
-	mddev->private = kmalloc (sizeof (raid5_conf_t)
-				  + mddev->raid_disks * sizeof(struct disk_info),
-				  GFP_KERNEL);
+	mddev->private = kzalloc(sizeof (raid5_conf_t)
+				 + mddev->raid_disks * sizeof(struct disk_info),
+				 GFP_KERNEL);
 	if ((conf = mddev->private) == NULL)
 		goto abort;
-	memset (conf, 0, sizeof (*conf) + mddev->raid_disks * sizeof(struct disk_info) );
+
 	conf->mddev = mddev;
 
-	if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL)
+	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
 		goto abort;
-	memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE);
 
 	spin_lock_init(&conf->device_lock);
 	init_waitqueue_head(&conf->wait_for_stripe);
@@ -1903,10 +1897,17 @@ static int run(mddev_t *mddev)
 
 	if (mddev->degraded == 1 &&
 	    mddev->recovery_cp != MaxSector) {
-		printk(KERN_ERR 
-			"raid5: cannot start dirty degraded array for %s\n",
-			mdname(mddev));
-		goto abort;
+		if (mddev->ok_start_degraded)
+			printk(KERN_WARNING
+			       "raid5: starting dirty degraded array: %s"
+			       "- data corruption possible.\n",
+			       mdname(mddev));
+		else {
+			printk(KERN_ERR
+			       "raid5: cannot start dirty degraded array for %s\n",
+			       mdname(mddev));
+			goto abort;
+		}
 	}
 
 	{
@@ -1948,7 +1949,7 @@ static int run(mddev_t *mddev)
 	 */
 	{
 		int stripe = (mddev->raid_disks-1) * mddev->chunk_size
-			/ PAGE_CACHE_SIZE;
+			/ PAGE_SIZE;
 		if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
 			mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
 	}
@@ -1956,9 +1957,6 @@ static int run(mddev_t *mddev)
 	/* Ok, everything is just fine now */
 	sysfs_create_group(&mddev->kobj, &raid5_attrs_group);
 
-	if (mddev->bitmap)
-		mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
-
 	mddev->queue->unplug_fn = raid5_unplug_device;
 	mddev->queue->issue_flush_fn = raid5_issue_flush;
 
@@ -1967,9 +1965,7 @@ static int run(mddev_t *mddev)
 abort:
 	if (conf) {
 		print_raid5_conf(conf);
-		if (conf->stripe_hashtbl)
-			free_pages((unsigned long) conf->stripe_hashtbl,
-							HASH_PAGES_ORDER);
+		kfree(conf->stripe_hashtbl);
 		kfree(conf);
 	}
 	mddev->private = NULL;
@@ -1986,7 +1982,7 @@ static int stop(mddev_t *mddev)
 	md_unregister_thread(mddev->thread);
 	mddev->thread = NULL;
 	shrink_stripes(conf);
-	free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
+	kfree(conf->stripe_hashtbl);
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
 	kfree(conf);
@@ -2014,12 +2010,12 @@ static void print_sh (struct stripe_head *sh)
 static void printall (raid5_conf_t *conf)
 {
 	struct stripe_head *sh;
+	struct hlist_node *hn;
 	int i;
 
 	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < NR_HASH; i++) {
-		sh = conf->stripe_hashtbl[i];
-		for (; sh; sh = sh->hash_next) {
+		hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) {
 			if (sh->raid_conf != conf)
 				continue;
 			print_sh(sh);
@@ -2192,17 +2188,12 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 		spin_unlock_irq(&conf->device_lock);
 		break;
 	}
-	if (mddev->thread) {
-		if (mddev->bitmap)
-			mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
-		else
-			mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
-		md_wakeup_thread(mddev->thread);
-	}
 }
-static mdk_personality_t raid5_personality=
+
+static struct mdk_personality raid5_personality =
 {
 	.name		= "raid5",
+	.level		= 5,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -2217,17 +2208,42 @@ static mdk_personality_t raid5_personality=
 	.quiesce	= raid5_quiesce,
 };
 
-static int __init raid5_init (void)
+static struct mdk_personality raid4_personality =
 {
-	return register_md_personality (RAID5, &raid5_personality);
+	.name		= "raid4",
+	.level		= 4,
+	.owner		= THIS_MODULE,
+	.make_request	= make_request,
+	.run		= run,
+	.stop		= stop,
+	.status		= status,
+	.error_handler	= error,
+	.hot_add_disk	= raid5_add_disk,
+	.hot_remove_disk= raid5_remove_disk,
+	.spare_active	= raid5_spare_active,
+	.sync_request	= sync_request,
+	.resize		= raid5_resize,
+	.quiesce	= raid5_quiesce,
+};
+
+static int __init raid5_init(void)
+{
+	register_md_personality(&raid5_personality);
+	register_md_personality(&raid4_personality);
+	return 0;
 }
 
-static void raid5_exit (void)
+static void raid5_exit(void)
 {
-	unregister_md_personality (RAID5);
+	unregister_md_personality(&raid5_personality);
+	unregister_md_personality(&raid4_personality);
 }
 
 module_init(raid5_init);
 module_exit(raid5_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-4"); /* RAID5 */
+MODULE_ALIAS("md-raid5");
+MODULE_ALIAS("md-raid4");
+MODULE_ALIAS("md-level-5");
+MODULE_ALIAS("md-level-4");
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 0000d162d19..8c823d686a6 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -40,12 +40,10 @@
 #define STRIPE_SHIFT		(PAGE_SHIFT - 9)
 #define STRIPE_SECTORS		(STRIPE_SIZE>>9)
 #define	IO_THRESHOLD		1
-#define HASH_PAGES		1
-#define HASH_PAGES_ORDER	0
-#define NR_HASH			(HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *))
+#define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
 #define HASH_MASK		(NR_HASH - 1)
 
-#define stripe_hash(conf, sect)	((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])
+#define stripe_hash(conf, sect)	(&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]))
 
 /* bio's attached to a stripe+device for I/O are linked together in bi_sector
  * order without overlap.  There may be several bio's per stripe+device, and
@@ -132,29 +130,21 @@ static void release_stripe(struct stripe_head *sh)
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
-static void remove_hash(struct stripe_head *sh)
+static inline void remove_hash(struct stripe_head *sh)
 {
 	PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector);
 
-	if (sh->hash_pprev) {
-		if (sh->hash_next)
-			sh->hash_next->hash_pprev = sh->hash_pprev;
-		*sh->hash_pprev = sh->hash_next;
-		sh->hash_pprev = NULL;
-	}
+	hlist_del_init(&sh->hash);
 }
 
-static __inline__ void insert_hash(raid6_conf_t *conf, struct stripe_head *sh)
+static inline void insert_hash(raid6_conf_t *conf, struct stripe_head *sh)
 {
-	struct stripe_head **shp = &stripe_hash(conf, sh->sector);
+	struct hlist_head *hp = stripe_hash(conf, sh->sector);
 
 	PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector);
 
 	CHECK_DEVLOCK();
-	if ((sh->hash_next = *shp) != NULL)
-		(*shp)->hash_pprev = &sh->hash_next;
-	*shp = sh;
-	sh->hash_pprev = shp;
+	hlist_add_head(&sh->hash, hp);
 }
 
 
@@ -186,7 +176,7 @@ static void shrink_buffers(struct stripe_head *sh, int num)
 		if (!p)
 			continue;
 		sh->dev[i].page = NULL;
-		page_cache_release(p);
+		put_page(p);
 	}
 }
 
@@ -247,10 +237,11 @@ static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_i
 static struct stripe_head *__find_stripe(raid6_conf_t *conf, sector_t sector)
 {
 	struct stripe_head *sh;
+	struct hlist_node *hn;
 
 	CHECK_DEVLOCK();
 	PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
-	for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next)
+	hlist_for_each_entry (sh, hn,  stripe_hash(conf, sector), hash)
 		if (sh->sector == sector)
 			return sh;
 	PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
@@ -367,8 +358,8 @@ static void shrink_stripes(raid6_conf_t *conf)
 	conf->slab_cache = NULL;
 }
 
-static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done,
-				   int error)
+static int raid6_end_read_request(struct bio * bi, unsigned int bytes_done,
+				  int error)
 {
  	struct stripe_head *sh = bi->bi_private;
 	raid6_conf_t *conf = sh->raid_conf;
@@ -420,9 +411,35 @@ static int raid6_end_read_request (struct bio * bi, unsigned int bytes_done,
 #else
 		set_bit(R5_UPTODATE, &sh->dev[i].flags);
 #endif
+		if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+			printk(KERN_INFO "raid6: read error corrected!!\n");
+			clear_bit(R5_ReadError, &sh->dev[i].flags);
+			clear_bit(R5_ReWrite, &sh->dev[i].flags);
+		}
+		if (atomic_read(&conf->disks[i].rdev->read_errors))
+			atomic_set(&conf->disks[i].rdev->read_errors, 0);
 	} else {
-		md_error(conf->mddev, conf->disks[i].rdev);
+		int retry = 0;
 		clear_bit(R5_UPTODATE, &sh->dev[i].flags);
+		atomic_inc(&conf->disks[i].rdev->read_errors);
+		if (conf->mddev->degraded)
+			printk(KERN_WARNING "raid6: read error not correctable.\n");
+		else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
+			/* Oh, no!!! */
+			printk(KERN_WARNING "raid6: read error NOT corrected!!\n");
+		else if (atomic_read(&conf->disks[i].rdev->read_errors)
+			 > conf->max_nr_stripes)
+			printk(KERN_WARNING
+			       "raid6: Too many read errors, failing device.\n");
+		else
+			retry = 1;
+		if (retry)
+			set_bit(R5_ReadError, &sh->dev[i].flags);
+		else {
+			clear_bit(R5_ReadError, &sh->dev[i].flags);
+			clear_bit(R5_ReWrite, &sh->dev[i].flags);
+			md_error(conf->mddev, conf->disks[i].rdev);
+		}
 	}
 	rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
 #if 0
@@ -805,7 +822,7 @@ static void compute_parity(struct stripe_head *sh, int method)
 }
 
 /* Compute one missing block */
-static void compute_block_1(struct stripe_head *sh, int dd_idx)
+static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
 {
 	raid6_conf_t *conf = sh->raid_conf;
 	int i, count, disks = conf->raid_disks;
@@ -821,7 +838,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx)
 		compute_parity(sh, UPDATE_PARITY);
 	} else {
 		ptr[0] = page_address(sh->dev[dd_idx].page);
-		memset(ptr[0], 0, STRIPE_SIZE);
+		if (!nozero) memset(ptr[0], 0, STRIPE_SIZE);
 		count = 1;
 		for (i = disks ; i--; ) {
 			if (i == dd_idx || i == qd_idx)
@@ -838,7 +855,8 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx)
 		}
 		if (count != 1)
 			xor_block(count, STRIPE_SIZE, ptr);
-		set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+		if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+		else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
 	}
 }
 
@@ -871,7 +889,7 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
 			return;
 		} else {
 			/* We're missing D+Q; recompute D from P */
-			compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1);
+			compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0);
 			compute_parity(sh, UPDATE_PARITY); /* Is this necessary? */
 			return;
 		}
@@ -982,6 +1000,12 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
 }
 
 
+static int page_is_zero(struct page *p)
+{
+	char *a = page_address(p);
+	return ((*(u32*)a) == 0 &&
+		memcmp(a, a+4, STRIPE_SIZE-4)==0);
+}
 /*
  * handle_stripe - do things to a stripe.
  *
@@ -1000,7 +1024,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
  *
  */
 
-static void handle_stripe(struct stripe_head *sh)
+static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 {
 	raid6_conf_t *conf = sh->raid_conf;
 	int disks = conf->raid_disks;
@@ -1027,11 +1051,11 @@ static void handle_stripe(struct stripe_head *sh)
 	syncing = test_bit(STRIPE_SYNCING, &sh->state);
 	/* Now to look around and see what can be done */
 
+	rcu_read_lock();
 	for (i=disks; i--; ) {
 		mdk_rdev_t *rdev;
 		dev = &sh->dev[i];
 		clear_bit(R5_Insync, &dev->flags);
-		clear_bit(R5_Syncio, &dev->flags);
 
 		PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
 			i, dev->flags, dev->toread, dev->towrite, dev->written);
@@ -1070,14 +1094,21 @@ static void handle_stripe(struct stripe_head *sh)
 				non_overwrite++;
 		}
 		if (dev->written) written++;
-		rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
+		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
+			/* The ReadError flag will just be confusing now */
+			clear_bit(R5_ReadError, &dev->flags);
+			clear_bit(R5_ReWrite, &dev->flags);
+		}
+		if (!rdev || !test_bit(In_sync, &rdev->flags)
+		    || test_bit(R5_ReadError, &dev->flags)) {
 			if ( failed < 2 )
 				failed_num[failed] = i;
 			failed++;
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
+	rcu_read_unlock();
 	PRINTK("locked=%d uptodate=%d to_read=%d"
 	       " to_write=%d failed=%d failed_num=%d,%d\n",
 	       locked, uptodate, to_read, to_write, failed,
@@ -1088,6 +1119,17 @@ static void handle_stripe(struct stripe_head *sh)
 	if (failed > 2 && to_read+to_write+written) {
 		for (i=disks; i--; ) {
 			int bitmap_end = 0;
+
+			if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+				mdk_rdev_t *rdev;
+				rcu_read_lock();
+				rdev = rcu_dereference(conf->disks[i].rdev);
+				if (rdev && test_bit(In_sync, &rdev->flags))
+					/* multiple read failures in one stripe */
+					md_error(conf->mddev, rdev);
+				rcu_read_unlock();
+			}
+
 			spin_lock_irq(&conf->device_lock);
 			/* fail all writes first */
 			bi = sh->dev[i].towrite;
@@ -1123,7 +1165,8 @@ static void handle_stripe(struct stripe_head *sh)
 			}
 
 			/* fail any reads if this device is non-operational */
-			if (!test_bit(R5_Insync, &sh->dev[i].flags)) {
+			if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
+			    test_bit(R5_ReadError, &sh->dev[i].flags)) {
 				bi = sh->dev[i].toread;
 				sh->dev[i].toread = NULL;
 				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
@@ -1228,7 +1271,7 @@ static void handle_stripe(struct stripe_head *sh)
 				if (uptodate == disks-1) {
 					PRINTK("Computing stripe %llu block %d\n",
 					       (unsigned long long)sh->sector, i);
-					compute_block_1(sh, i);
+					compute_block_1(sh, i, 0);
 					uptodate++;
 				} else if ( uptodate == disks-2 && failed >= 2 ) {
 					/* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
@@ -1259,9 +1302,6 @@ static void handle_stripe(struct stripe_head *sh)
 					locked++;
 					PRINTK("Reading block %d (sync=%d)\n",
 						i, syncing);
-					if (syncing)
-						md_sync_acct(conf->disks[i].rdev->bdev,
-							     STRIPE_SECTORS);
 				}
 			}
 		}
@@ -1323,7 +1363,7 @@ static void handle_stripe(struct stripe_head *sh)
 				/* We have failed blocks and need to compute them */
 				switch ( failed ) {
 				case 0:	BUG();
-				case 1: compute_block_1(sh, failed_num[0]); break;
+				case 1: compute_block_1(sh, failed_num[0], 0); break;
 				case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
 				default: BUG();	/* This request should have been failed? */
 				}
@@ -1338,12 +1378,10 @@ static void handle_stripe(struct stripe_head *sh)
 					       (unsigned long long)sh->sector, i);
 					locked++;
 					set_bit(R5_Wantwrite, &sh->dev[i].flags);
-#if 0 /**** FIX: I don't understand the logic here... ****/
-					if (!test_bit(R5_Insync, &sh->dev[i].flags)
-					    || ((i==pd_idx || i==qd_idx) && failed == 0)) /* FIX? */
-						set_bit(STRIPE_INSYNC, &sh->state);
-#endif
 				}
+			/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
+			set_bit(STRIPE_INSYNC, &sh->state);
+
 			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 				atomic_dec(&conf->preread_active_stripes);
 				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -1356,84 +1394,119 @@ static void handle_stripe(struct stripe_head *sh)
 	 * Any reads will already have been scheduled, so we just see if enough data
 	 * is available
 	 */
-	if (syncing && locked == 0 &&
-	    !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 2) {
-		set_bit(STRIPE_HANDLE, &sh->state);
-#if 0 /* RAID-6: Don't support CHECK PARITY yet */
-		if (failed == 0) {
-			char *pagea;
-			if (uptodate != disks)
-				BUG();
-			compute_parity(sh, CHECK_PARITY);
-			uptodate--;
-			pagea = page_address(sh->dev[pd_idx].page);
-			if ((*(u32*)pagea) == 0 &&
-			    !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) {
-				/* parity is correct (on disc, not in buffer any more) */
-				set_bit(STRIPE_INSYNC, &sh->state);
-			}
-		}
-#endif
-		if (!test_bit(STRIPE_INSYNC, &sh->state)) {
-			int failed_needupdate[2];
-			struct r5dev *adev, *bdev;
-
-			if ( failed < 1 )
-				failed_num[0] = pd_idx;
-			if ( failed < 2 )
-				failed_num[1] = (failed_num[0] == qd_idx) ? pd_idx : qd_idx;
+	if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
+		int update_p = 0, update_q = 0;
+		struct r5dev *dev;
 
-			failed_needupdate[0] = !test_bit(R5_UPTODATE, &sh->dev[failed_num[0]].flags);
-			failed_needupdate[1] = !test_bit(R5_UPTODATE, &sh->dev[failed_num[1]].flags);
+		set_bit(STRIPE_HANDLE, &sh->state);
 
-			PRINTK("sync: failed=%d num=%d,%d fnu=%u%u\n",
-			       failed, failed_num[0], failed_num[1], failed_needupdate[0], failed_needupdate[1]);
+		BUG_ON(failed>2);
+		BUG_ON(uptodate < disks);
+		/* Want to check and possibly repair P and Q.
+		 * However there could be one 'failed' device, in which
+		 * case we can only check one of them, possibly using the
+		 * other to generate missing data
+		 */
 
-#if 0  /* RAID-6: This code seems to require that CHECK_PARITY destroys the uptodateness of the parity */
-			/* should be able to compute the missing block(s) and write to spare */
-			if ( failed_needupdate[0] ^ failed_needupdate[1] ) {
-				if (uptodate+1 != disks)
-					BUG();
-				compute_block_1(sh, failed_needupdate[0] ? failed_num[0] : failed_num[1]);
-				uptodate++;
-			} else if ( failed_needupdate[0] & failed_needupdate[1] ) {
-				if (uptodate+2 != disks)
-					BUG();
-				compute_block_2(sh, failed_num[0], failed_num[1]);
-				uptodate += 2;
+		/* If !tmp_page, we cannot do the calculations,
+		 * but as we have set STRIPE_HANDLE, we will soon be called
+		 * by stripe_handle with a tmp_page - just wait until then.
+		 */
+		if (tmp_page) {
+			if (failed == q_failed) {
+				/* The only possible failed device holds 'Q', so it makes
+				 * sense to check P (If anything else were failed, we would
+				 * have used P to recreate it).
+				 */
+				compute_block_1(sh, pd_idx, 1);
+				if (!page_is_zero(sh->dev[pd_idx].page)) {
+					compute_block_1(sh,pd_idx,0);
+					update_p = 1;
+				}
+			}
+			if (!q_failed && failed < 2) {
+				/* q is not failed, and we didn't use it to generate
+				 * anything, so it makes sense to check it
+				 */
+				memcpy(page_address(tmp_page),
+				       page_address(sh->dev[qd_idx].page),
+				       STRIPE_SIZE);
+				compute_parity(sh, UPDATE_PARITY);
+				if (memcmp(page_address(tmp_page),
+					   page_address(sh->dev[qd_idx].page),
+					   STRIPE_SIZE)!= 0) {
+					clear_bit(STRIPE_INSYNC, &sh->state);
+					update_q = 1;
+				}
+			}
+			if (update_p || update_q) {
+				conf->mddev->resync_mismatches += STRIPE_SECTORS;
+				if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+					/* don't try to repair!! */
+					update_p = update_q = 0;
 			}
-#else
-			compute_block_2(sh, failed_num[0], failed_num[1]);
-			uptodate += failed_needupdate[0] + failed_needupdate[1];
-#endif
 
-			if (uptodate != disks)
-				BUG();
+			/* now write out any block on a failed drive,
+			 * or P or Q if they need it
+			 */
 
-			PRINTK("Marking for sync stripe %llu blocks %d,%d\n",
-			       (unsigned long long)sh->sector, failed_num[0], failed_num[1]);
+			if (failed == 2) {
+				dev = &sh->dev[failed_num[1]];
+				locked++;
+				set_bit(R5_LOCKED, &dev->flags);
+				set_bit(R5_Wantwrite, &dev->flags);
+			}
+			if (failed >= 1) {
+				dev = &sh->dev[failed_num[0]];
+				locked++;
+				set_bit(R5_LOCKED, &dev->flags);
+				set_bit(R5_Wantwrite, &dev->flags);
+			}
 
-			/**** FIX: Should we really do both of these unconditionally? ****/
-			adev = &sh->dev[failed_num[0]];
-			locked += !test_bit(R5_LOCKED, &adev->flags);
-			set_bit(R5_LOCKED, &adev->flags);
-			set_bit(R5_Wantwrite, &adev->flags);
-			bdev = &sh->dev[failed_num[1]];
-			locked += !test_bit(R5_LOCKED, &bdev->flags);
-			set_bit(R5_LOCKED, &bdev->flags);
+			if (update_p) {
+				dev = &sh->dev[pd_idx];
+				locked ++;
+				set_bit(R5_LOCKED, &dev->flags);
+				set_bit(R5_Wantwrite, &dev->flags);
+			}
+			if (update_q) {
+				dev = &sh->dev[qd_idx];
+				locked++;
+				set_bit(R5_LOCKED, &dev->flags);
+				set_bit(R5_Wantwrite, &dev->flags);
+			}
 			clear_bit(STRIPE_DEGRADED, &sh->state);
-			set_bit(R5_Wantwrite, &bdev->flags);
 
 			set_bit(STRIPE_INSYNC, &sh->state);
-			set_bit(R5_Syncio, &adev->flags);
-			set_bit(R5_Syncio, &bdev->flags);
 		}
 	}
+
 	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
 	}
 
+	/* If the failed drives are just a ReadError, then we might need
+	 * to progress the repair/check process
+	 */
+	if (failed <= 2 && ! conf->mddev->ro)
+		for (i=0; i<failed;i++) {
+			dev = &sh->dev[failed_num[i]];
+			if (test_bit(R5_ReadError, &dev->flags)
+			    && !test_bit(R5_LOCKED, &dev->flags)
+			    && test_bit(R5_UPTODATE, &dev->flags)
+				) {
+				if (!test_bit(R5_ReWrite, &dev->flags)) {
+					set_bit(R5_Wantwrite, &dev->flags);
+					set_bit(R5_ReWrite, &dev->flags);
+					set_bit(R5_LOCKED, &dev->flags);
+				} else {
+					/* let's read it back */
+					set_bit(R5_Wantread, &dev->flags);
+					set_bit(R5_LOCKED, &dev->flags);
+				}
+			}
+		}
 	spin_unlock(&sh->lock);
 
 	while ((bi=return_bi)) {
@@ -1472,7 +1545,7 @@ static void handle_stripe(struct stripe_head *sh)
 		rcu_read_unlock();
 
 		if (rdev) {
-			if (test_bit(R5_Syncio, &sh->dev[i].flags))
+			if (syncing)
 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
 
 			bi->bi_bdev = rdev->bdev;
@@ -1489,6 +1562,9 @@ static void handle_stripe(struct stripe_head *sh)
 			bi->bi_io_vec[0].bv_offset = 0;
 			bi->bi_size = STRIPE_SIZE;
 			bi->bi_next = NULL;
+			if (rw == WRITE &&
+			    test_bit(R5_ReWrite, &sh->dev[i].flags))
+				atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
 			generic_make_request(bi);
 		} else {
 			if (rw == 1)
@@ -1664,7 +1740,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
 			}
 			finish_wait(&conf->wait_for_overlap, &w);
 			raid6_plug_device(conf);
-			handle_stripe(sh);
+			handle_stripe(sh, NULL);
 			release_stripe(sh);
 		} else {
 			/* cannot get stripe for read-ahead, just give-up */
@@ -1728,6 +1804,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return rv;
 	}
 	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
+	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
 	    !conf->fullsync && sync_blocks >= STRIPE_SECTORS) {
 		/* we can skip this block, and probably more */
 		sync_blocks /= STRIPE_SECTORS;
@@ -1765,7 +1842,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	clear_bit(STRIPE_INSYNC, &sh->state);
 	spin_unlock(&sh->lock);
 
-	handle_stripe(sh);
+	handle_stripe(sh, NULL);
 	release_stripe(sh);
 
 	return STRIPE_SECTORS;
@@ -1821,7 +1898,7 @@ static void raid6d (mddev_t *mddev)
 		spin_unlock_irq(&conf->device_lock);
 
 		handled++;
-		handle_stripe(sh);
+		handle_stripe(sh, conf->spare_page);
 		release_stripe(sh);
 
 		spin_lock_irq(&conf->device_lock);
@@ -1848,17 +1925,19 @@ static int run(mddev_t *mddev)
 		return -EIO;
 	}
 
-	mddev->private = kmalloc (sizeof (raid6_conf_t)
-				  + mddev->raid_disks * sizeof(struct disk_info),
-				  GFP_KERNEL);
+	mddev->private = kzalloc(sizeof (raid6_conf_t)
+				 + mddev->raid_disks * sizeof(struct disk_info),
+				 GFP_KERNEL);
 	if ((conf = mddev->private) == NULL)
 		goto abort;
-	memset (conf, 0, sizeof (*conf) + mddev->raid_disks * sizeof(struct disk_info) );
 	conf->mddev = mddev;
 
-	if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL)
+	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
+		goto abort;
+
+	conf->spare_page = alloc_page(GFP_KERNEL);
+	if (!conf->spare_page)
 		goto abort;
-	memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE);
 
 	spin_lock_init(&conf->device_lock);
 	init_waitqueue_head(&conf->wait_for_stripe);
@@ -1929,13 +2008,18 @@ static int run(mddev_t *mddev)
 		goto abort;
 	}
 
-#if 0				/* FIX: For now */
 	if (mddev->degraded > 0 &&
 	    mddev->recovery_cp != MaxSector) {
-		printk(KERN_ERR "raid6: cannot start dirty degraded array for %s\n", mdname(mddev));
-		goto abort;
+		if (mddev->ok_start_degraded)
+			printk(KERN_WARNING "raid6: starting dirty degraded array:%s"
+			       "- data corruption possible.\n",
+			       mdname(mddev));
+		else {
+			printk(KERN_ERR "raid6: cannot start dirty degraded array"
+			       " for %s\n", mdname(mddev));
+			goto abort;
+		}
 	}
-#endif
 
 	{
 		mddev->thread = md_register_thread(raid6d, mddev, "%s_raid6");
@@ -1977,7 +2061,7 @@ static int run(mddev_t *mddev)
 	 */
 	{
 		int stripe = (mddev->raid_disks-2) * mddev->chunk_size
-			/ PAGE_CACHE_SIZE;
+			/ PAGE_SIZE;
 		if (mddev->queue->backing_dev_info.ra_pages < 2 * stripe)
 			mddev->queue->backing_dev_info.ra_pages = 2 * stripe;
 	}
@@ -1985,18 +2069,14 @@ static int run(mddev_t *mddev)
 	/* Ok, everything is just fine now */
 	mddev->array_size =  mddev->size * (mddev->raid_disks - 2);
 
-	if (mddev->bitmap)
-		mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
-
 	mddev->queue->unplug_fn = raid6_unplug_device;
 	mddev->queue->issue_flush_fn = raid6_issue_flush;
 	return 0;
 abort:
 	if (conf) {
 		print_raid6_conf(conf);
-		if (conf->stripe_hashtbl)
-			free_pages((unsigned long) conf->stripe_hashtbl,
-							HASH_PAGES_ORDER);
+		safe_put_page(conf->spare_page);
+		kfree(conf->stripe_hashtbl);
 		kfree(conf);
 	}
 	mddev->private = NULL;
@@ -2013,7 +2093,7 @@ static int stop (mddev_t *mddev)
 	md_unregister_thread(mddev->thread);
 	mddev->thread = NULL;
 	shrink_stripes(conf);
-	free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
+	kfree(conf->stripe_hashtbl);
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	kfree(conf);
 	mddev->private = NULL;
@@ -2040,12 +2120,13 @@ static void print_sh (struct seq_file *seq, struct stripe_head *sh)
 static void printall (struct seq_file *seq, raid6_conf_t *conf)
 {
 	struct stripe_head *sh;
+	struct hlist_node *hn;
 	int i;
 
 	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < NR_HASH; i++) {
 		sh = conf->stripe_hashtbl[i];
-		for (; sh; sh = sh->hash_next) {
+		hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) {
 			if (sh->raid_conf != conf)
 				continue;
 			print_sh(seq, sh);
@@ -2223,17 +2304,12 @@ static void raid6_quiesce(mddev_t *mddev, int state)
 		spin_unlock_irq(&conf->device_lock);
 		break;
 	}
-	if (mddev->thread) {
-		if (mddev->bitmap)
-			mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
-		else
-			mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
-		md_wakeup_thread(mddev->thread);
-	}
 }
-static mdk_personality_t raid6_personality=
+
+static struct mdk_personality raid6_personality =
 {
 	.name		= "raid6",
+	.level		= 6,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -2248,7 +2324,7 @@ static mdk_personality_t raid6_personality=
 	.quiesce	= raid6_quiesce,
 };
 
-static int __init raid6_init (void)
+static int __init raid6_init(void)
 {
 	int e;
 
@@ -2256,15 +2332,17 @@ static int __init raid6_init (void)
 	if ( e )
 		return e;
 
-	return register_md_personality (RAID6, &raid6_personality);
+	return register_md_personality(&raid6_personality);
 }
 
 static void raid6_exit (void)
 {
-	unregister_md_personality (RAID6);
+	unregister_md_personality(&raid6_personality);
 }
 
 module_init(raid6_init);
 module_exit(raid6_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-8"); /* RAID6 */
+MODULE_ALIAS("md-raid6");
+MODULE_ALIAS("md-level-6");
diff --git a/drivers/media/video/cpia_pp.c b/drivers/media/video/cpia_pp.c
index ddf184f95d8..6861d408f1b 100644
--- a/drivers/media/video/cpia_pp.c
+++ b/drivers/media/video/cpia_pp.c
@@ -170,16 +170,9 @@ static size_t cpia_read_nibble (struct parport *port,
 		/* Does the error line indicate end of data? */
 		if (((i /*& 1*/) == 0) &&
 		    (parport_read_status(port) & PARPORT_STATUS_ERROR)) {
-			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
-				DBG("%s: No more nibble data (%d bytes)\n",
-				port->name, i/2);
-
-			/* Go to reverse idle phase. */
-			parport_frob_control (port,
-					      PARPORT_CONTROL_AUTOFD,
-					      PARPORT_CONTROL_AUTOFD);
-			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-			break;
+			DBG("%s: No more nibble data (%d bytes)\n",
+			    port->name, i/2);
+			goto end_of_data;
 		}
 
 		/* Event 7: Set nAutoFd low. */
@@ -227,18 +220,21 @@ static size_t cpia_read_nibble (struct parport *port,
 			byte = nibble;
 	}
 
-	i /= 2; /* i is now in bytes */
-
 	if (i == len) {
 		/* Read the last nibble without checking data avail. */
-		port = port->physport;
-		if (parport_read_status (port) & PARPORT_STATUS_ERROR)
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
+		if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
+		end_of_data:
+			/* Go to reverse idle phase. */
+			parport_frob_control (port,
+					      PARPORT_CONTROL_AUTOFD,
+					      PARPORT_CONTROL_AUTOFD);
+			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
+		}
 		else
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
+			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
 	}
 
-	return i;
+	return i/2;
 }
 
 /* CPiA nonstandard "Nibble Stream" mode (2 nibbles per cycle, instead of 1)
diff --git a/drivers/message/i2o/Kconfig b/drivers/message/i2o/Kconfig
index 43a942a29c2..fef67710388 100644
--- a/drivers/message/i2o/Kconfig
+++ b/drivers/message/i2o/Kconfig
@@ -24,6 +24,18 @@ config I2O
 
 	  If unsure, say N.
 
+config I2O_LCT_NOTIFY_ON_CHANGES
+	bool "Enable LCT notification"
+	depends on I2O
+	default y
+	---help---
+	  Only say N here if you have a I2O controller from SUN. The SUN
+	  firmware doesn't support LCT notification on changes. If this option
+	  is enabled on such a controller the driver will hang up in a endless
+	  loop. On all other controllers say Y.
+
+	  If unsure, say Y.
+
 config I2O_EXT_ADAPTEC
 	bool "Enable Adaptec extensions"
 	depends on I2O
diff --git a/drivers/message/i2o/bus-osm.c b/drivers/message/i2o/bus-osm.c
index 151b228e1cb..ac06f10c54e 100644
--- a/drivers/message/i2o/bus-osm.c
+++ b/drivers/message/i2o/bus-osm.c
@@ -17,7 +17,7 @@
 #include <linux/i2o.h>
 
 #define OSM_NAME	"bus-osm"
-#define OSM_VERSION	"$Rev$"
+#define OSM_VERSION	"1.317"
 #define OSM_DESCRIPTION	"I2O Bus Adapter OSM"
 
 static struct i2o_driver i2o_bus_driver;
@@ -39,18 +39,18 @@ static struct i2o_class_id i2o_bus_class_id[] = {
  */
 static int i2o_bus_scan(struct i2o_device *dev)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
 		return -ETIMEDOUT;
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BUS_SCAN << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BUS_SCAN << 24 | HOST_TID << 12 | dev->lct_data.
+			tid);
 
-	return i2o_msg_post_wait(dev->iop, m, 60);
+	return i2o_msg_post_wait(dev->iop, msg, 60);
 };
 
 /**
@@ -59,8 +59,9 @@ static int i2o_bus_scan(struct i2o_device *dev)
  *
  *	Returns count.
  */
-static ssize_t i2o_bus_store_scan(struct device *d, struct device_attribute *attr, const char *buf,
-				  size_t count)
+static ssize_t i2o_bus_store_scan(struct device *d,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
 {
 	struct i2o_device *i2o_dev = to_i2o_device(d);
 	int rc;
diff --git a/drivers/message/i2o/config-osm.c b/drivers/message/i2o/config-osm.c
index 10432f66520..3bba7aa82e5 100644
--- a/drivers/message/i2o/config-osm.c
+++ b/drivers/message/i2o/config-osm.c
@@ -22,7 +22,7 @@
 #include <asm/uaccess.h>
 
 #define OSM_NAME	"config-osm"
-#define OSM_VERSION	"1.248"
+#define OSM_VERSION	"1.323"
 #define OSM_DESCRIPTION	"I2O Configuration OSM"
 
 /* access mode user rw */
diff --git a/drivers/message/i2o/core.h b/drivers/message/i2o/core.h
index 9eefedb1621..90628562851 100644
--- a/drivers/message/i2o/core.h
+++ b/drivers/message/i2o/core.h
@@ -14,8 +14,6 @@
  */
 
 /* Exec-OSM */
-extern struct bus_type i2o_bus_type;
-
 extern struct i2o_driver i2o_exec_driver;
 extern int i2o_exec_lct_get(struct i2o_controller *);
 
@@ -23,6 +21,8 @@ extern int __init i2o_exec_init(void);
 extern void __exit i2o_exec_exit(void);
 
 /* driver */
+extern struct bus_type i2o_bus_type;
+
 extern int i2o_driver_dispatch(struct i2o_controller *, u32);
 
 extern int __init i2o_driver_init(void);
@@ -33,19 +33,27 @@ extern int __init i2o_pci_init(void);
 extern void __exit i2o_pci_exit(void);
 
 /* device */
+extern struct device_attribute i2o_device_attrs[];
+
 extern void i2o_device_remove(struct i2o_device *);
 extern int i2o_device_parse_lct(struct i2o_controller *);
 
 /* IOP */
 extern struct i2o_controller *i2o_iop_alloc(void);
-extern void i2o_iop_free(struct i2o_controller *);
+
+/**
+ *	i2o_iop_free - Free the i2o_controller struct
+ *	@c: I2O controller to free
+ */
+static inline void i2o_iop_free(struct i2o_controller *c)
+{
+	i2o_pool_free(&c->in_msg);
+	kfree(c);
+}
 
 extern int i2o_iop_add(struct i2o_controller *);
 extern void i2o_iop_remove(struct i2o_controller *);
 
-/* config */
-extern int i2o_parm_issue(struct i2o_device *, int, void *, int, void *, int);
-
 /* control registers relative to c->base */
 #define I2O_IRQ_STATUS	0x30
 #define I2O_IRQ_MASK	0x34
diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c
index 8eb50cdb8ae..ee183053fa2 100644
--- a/drivers/message/i2o/device.c
+++ b/drivers/message/i2o/device.c
@@ -35,18 +35,18 @@
 static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd,
 					 u32 type)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid, &msg->u.head[1]);
-	writel(type, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid);
+	msg->body[0] = cpu_to_le32(type);
 
-	return i2o_msg_post_wait(dev->iop, m, 60);
+	return i2o_msg_post_wait(dev->iop, msg, 60);
 }
 
 /**
@@ -123,7 +123,6 @@ int i2o_device_claim_release(struct i2o_device *dev)
 	return rc;
 }
 
-
 /**
  *	i2o_device_release - release the memory for a I2O device
  *	@dev: I2O device which should be released
@@ -140,10 +139,10 @@ static void i2o_device_release(struct device *dev)
 	kfree(i2o_dev);
 }
 
-
 /**
- *	i2o_device_class_show_class_id - Displays class id of I2O device
- *	@cd: class device of which the class id should be displayed
+ *	i2o_device_show_class_id - Displays class id of I2O device
+ *	@dev: device of which the class id should be displayed
+ *	@attr: pointer to device attribute
  *	@buf: buffer into which the class id should be printed
  *
  *	Returns the number of bytes which are printed into the buffer.
@@ -159,15 +158,15 @@ static ssize_t i2o_device_show_class_id(struct device *dev,
 }
 
 /**
- *	i2o_device_class_show_tid - Displays TID of I2O device
- *	@cd: class device of which the TID should be displayed
- *	@buf: buffer into which the class id should be printed
+ *	i2o_device_show_tid - Displays TID of I2O device
+ *	@dev: device of which the TID should be displayed
+ *	@attr: pointer to device attribute
+ *	@buf: buffer into which the TID should be printed
  *
  *	Returns the number of bytes which are printed into the buffer.
  */
 static ssize_t i2o_device_show_tid(struct device *dev,
-				   struct device_attribute *attr,
-				   char *buf)
+				   struct device_attribute *attr, char *buf)
 {
 	struct i2o_device *i2o_dev = to_i2o_device(dev);
 
@@ -175,6 +174,7 @@ static ssize_t i2o_device_show_tid(struct device *dev,
 	return strlen(buf) + 1;
 }
 
+/* I2O device attributes */
 struct device_attribute i2o_device_attrs[] = {
 	__ATTR(class_id, S_IRUGO, i2o_device_show_class_id, NULL),
 	__ATTR(tid, S_IRUGO, i2o_device_show_tid, NULL),
@@ -193,12 +193,10 @@ static struct i2o_device *i2o_device_alloc(void)
 {
 	struct i2o_device *dev;
 
-	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	memset(dev, 0, sizeof(*dev));
-
 	INIT_LIST_HEAD(&dev->list);
 	init_MUTEX(&dev->lock);
 
@@ -209,66 +207,6 @@ static struct i2o_device *i2o_device_alloc(void)
 }
 
 /**
- *	i2o_setup_sysfs_links - Adds attributes to the I2O device
- *	@cd: I2O class device which is added to the I2O device class
- *
- *	This function get called when a I2O device is added to the class. It
- *	creates the attributes for each device and creates user/parent symlink
- *	if necessary.
- *
- *	Returns 0 on success or negative error code on failure.
- */
-static void i2o_setup_sysfs_links(struct i2o_device *i2o_dev)
-{
-	struct i2o_controller *c = i2o_dev->iop;
-	struct i2o_device *tmp;
-
-	/* create user entries for this device */
-	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid);
-	if (tmp && tmp != i2o_dev)
-		sysfs_create_link(&i2o_dev->device.kobj,
-				  &tmp->device.kobj, "user");
-
-	/* create user entries refering to this device */
-	list_for_each_entry(tmp, &c->devices, list)
-		if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid &&
-		    tmp != i2o_dev)
-			sysfs_create_link(&tmp->device.kobj,
-					  &i2o_dev->device.kobj, "user");
-
-	/* create parent entries for this device */
-	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid);
-	if (tmp && tmp != i2o_dev)
-		sysfs_create_link(&i2o_dev->device.kobj,
-				  &tmp->device.kobj, "parent");
-
-	/* create parent entries refering to this device */
-	list_for_each_entry(tmp, &c->devices, list)
-		if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid &&
-		    tmp != i2o_dev)
-		sysfs_create_link(&tmp->device.kobj,
-				  &i2o_dev->device.kobj, "parent");
-}
-
-static void i2o_remove_sysfs_links(struct i2o_device *i2o_dev)
-{
-	struct i2o_controller *c = i2o_dev->iop;
-	struct i2o_device *tmp;
-
-	sysfs_remove_link(&i2o_dev->device.kobj, "parent");
-	sysfs_remove_link(&i2o_dev->device.kobj, "user");
-
-	list_for_each_entry(tmp, &c->devices, list) {
-		if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
-			sysfs_remove_link(&tmp->device.kobj, "parent");
-		if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
-			sysfs_remove_link(&tmp->device.kobj, "user");
-	}
-}
-
-
-
-/**
  *	i2o_device_add - allocate a new I2O device and add it to the IOP
  *	@iop: I2O controller where the device is on
  *	@entry: LCT entry of the I2O device
@@ -282,33 +220,57 @@ static void i2o_remove_sysfs_links(struct i2o_device *i2o_dev)
 static struct i2o_device *i2o_device_add(struct i2o_controller *c,
 					 i2o_lct_entry * entry)
 {
-	struct i2o_device *dev;
+	struct i2o_device *i2o_dev, *tmp;
 
-	dev = i2o_device_alloc();
-	if (IS_ERR(dev)) {
+	i2o_dev = i2o_device_alloc();
+	if (IS_ERR(i2o_dev)) {
 		printk(KERN_ERR "i2o: unable to allocate i2o device\n");
-		return dev;
+		return i2o_dev;
 	}
 
-	dev->lct_data = *entry;
-	dev->iop = c;
+	i2o_dev->lct_data = *entry;
 
-	snprintf(dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit,
-		 dev->lct_data.tid);
+	snprintf(i2o_dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit,
+		 i2o_dev->lct_data.tid);
 
-	dev->device.parent = &c->device;
+	i2o_dev->iop = c;
+	i2o_dev->device.parent = &c->device;
 
-	device_register(&dev->device);
+	device_register(&i2o_dev->device);
 
-	list_add_tail(&dev->list, &c->devices);
+	list_add_tail(&i2o_dev->list, &c->devices);
 
-	i2o_setup_sysfs_links(dev);
+	/* create user entries for this device */
+	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid);
+	if (tmp && (tmp != i2o_dev))
+		sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj,
+				  "user");
 
-	i2o_driver_notify_device_add_all(dev);
+	/* create user entries refering to this device */
+	list_for_each_entry(tmp, &c->devices, list)
+	    if ((tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
+		&& (tmp != i2o_dev))
+		sysfs_create_link(&tmp->device.kobj,
+				  &i2o_dev->device.kobj, "user");
 
-	pr_debug("i2o: device %s added\n", dev->device.bus_id);
+	/* create parent entries for this device */
+	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid);
+	if (tmp && (tmp != i2o_dev))
+		sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj,
+				  "parent");
 
-	return dev;
+	/* create parent entries refering to this device */
+	list_for_each_entry(tmp, &c->devices, list)
+	    if ((tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
+		&& (tmp != i2o_dev))
+		sysfs_create_link(&tmp->device.kobj,
+				  &i2o_dev->device.kobj, "parent");
+
+	i2o_driver_notify_device_add_all(i2o_dev);
+
+	pr_debug("i2o: device %s added\n", i2o_dev->device.bus_id);
+
+	return i2o_dev;
 }
 
 /**
@@ -321,9 +283,22 @@ static struct i2o_device *i2o_device_add(struct i2o_controller *c,
  */
 void i2o_device_remove(struct i2o_device *i2o_dev)
 {
+	struct i2o_device *tmp;
+	struct i2o_controller *c = i2o_dev->iop;
+
 	i2o_driver_notify_device_remove_all(i2o_dev);
-	i2o_remove_sysfs_links(i2o_dev);
+
+	sysfs_remove_link(&i2o_dev->device.kobj, "parent");
+	sysfs_remove_link(&i2o_dev->device.kobj, "user");
+
+	list_for_each_entry(tmp, &c->devices, list) {
+		if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
+			sysfs_remove_link(&tmp->device.kobj, "parent");
+		if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
+			sysfs_remove_link(&tmp->device.kobj, "user");
+	}
 	list_del(&i2o_dev->list);
+
 	device_unregister(&i2o_dev->device);
 }
 
@@ -341,56 +316,83 @@ int i2o_device_parse_lct(struct i2o_controller *c)
 {
 	struct i2o_device *dev, *tmp;
 	i2o_lct *lct;
-	int i;
-	int max;
+	u32 *dlct = c->dlct.virt;
+	int max = 0, i = 0;
+	u16 table_size;
+	u32 buf;
 
 	down(&c->lct_lock);
 
 	kfree(c->lct);
 
-	lct = c->dlct.virt;
+	buf = le32_to_cpu(*dlct++);
+	table_size = buf & 0xffff;
 
-	c->lct = kmalloc(lct->table_size * 4, GFP_KERNEL);
-	if (!c->lct) {
+	lct = c->lct = kmalloc(table_size * 4, GFP_KERNEL);
+	if (!lct) {
 		up(&c->lct_lock);
 		return -ENOMEM;
 	}
 
-	if (lct->table_size * 4 > c->dlct.len) {
-		memcpy(c->lct, c->dlct.virt, c->dlct.len);
-		up(&c->lct_lock);
-		return -EAGAIN;
-	}
+	lct->lct_ver = buf >> 28;
+	lct->boot_tid = buf >> 16 & 0xfff;
+	lct->table_size = table_size;
+	lct->change_ind = le32_to_cpu(*dlct++);
+	lct->iop_flags = le32_to_cpu(*dlct++);
 
-	memcpy(c->lct, c->dlct.virt, lct->table_size * 4);
-
-	lct = c->lct;
-
-	max = (lct->table_size - 3) / 9;
+	table_size -= 3;
 
 	pr_debug("%s: LCT has %d entries (LCT size: %d)\n", c->name, max,
 		 lct->table_size);
 
-	/* remove devices, which are not in the LCT anymore */
-	list_for_each_entry_safe(dev, tmp, &c->devices, list) {
+	while (table_size > 0) {
+		i2o_lct_entry *entry = &lct->lct_entry[max];
 		int found = 0;
 
-		for (i = 0; i < max; i++) {
-			if (lct->lct_entry[i].tid == dev->lct_data.tid) {
+		buf = le32_to_cpu(*dlct++);
+		entry->entry_size = buf & 0xffff;
+		entry->tid = buf >> 16 & 0xfff;
+
+		entry->change_ind = le32_to_cpu(*dlct++);
+		entry->device_flags = le32_to_cpu(*dlct++);
+
+		buf = le32_to_cpu(*dlct++);
+		entry->class_id = buf & 0xfff;
+		entry->version = buf >> 12 & 0xf;
+		entry->vendor_id = buf >> 16;
+
+		entry->sub_class = le32_to_cpu(*dlct++);
+
+		buf = le32_to_cpu(*dlct++);
+		entry->user_tid = buf & 0xfff;
+		entry->parent_tid = buf >> 12 & 0xfff;
+		entry->bios_info = buf >> 24;
+
+		memcpy(&entry->identity_tag, dlct, 8);
+		dlct += 2;
+
+		entry->event_capabilities = le32_to_cpu(*dlct++);
+
+		/* add new devices, which are new in the LCT */
+		list_for_each_entry_safe(dev, tmp, &c->devices, list) {
+			if (entry->tid == dev->lct_data.tid) {
 				found = 1;
 				break;
 			}
 		}
 
 		if (!found)
-			i2o_device_remove(dev);
+			i2o_device_add(c, entry);
+
+		table_size -= 9;
+		max++;
 	}
 
-	/* add new devices, which are new in the LCT */
-	for (i = 0; i < max; i++) {
+	/* remove devices, which are not in the LCT anymore */
+	list_for_each_entry_safe(dev, tmp, &c->devices, list) {
 		int found = 0;
 
-		list_for_each_entry_safe(dev, tmp, &c->devices, list) {
+		for (i = 0; i < max; i++) {
 			if (lct->lct_entry[i].tid == dev->lct_data.tid) {
 				found = 1;
 				break;
@@ -398,14 +400,14 @@ int i2o_device_parse_lct(struct i2o_controller *c)
 		}
 
 		if (!found)
-			i2o_device_add(c, &lct->lct_entry[i]);
+			i2o_device_remove(dev);
 	}
+
 	up(&c->lct_lock);
 
 	return 0;
 }
 
-
 /*
  *	Run time support routines
  */
@@ -419,13 +421,9 @@ int i2o_device_parse_lct(struct i2o_controller *c)
  *	ResultCount, ErrorInfoSize, BlockStatus and BlockSize.
  */
 int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist,
-			  int oplen, void *reslist, int reslen)
+		   int oplen, void *reslist, int reslen)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
-	u32 *res32 = (u32 *) reslist;
-	u32 *restmp = (u32 *) reslist;
-	int len = 0;
+	struct i2o_message *msg;
 	int i = 0;
 	int rc;
 	struct i2o_dma res;
@@ -437,26 +435,27 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist,
 	if (i2o_dma_alloc(dev, &res, reslen, GFP_KERNEL))
 		return -ENOMEM;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY) {
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg)) {
 		i2o_dma_free(dev, &res);
-		return -ETIMEDOUT;
+		return PTR_ERR(msg);
 	}
 
 	i = 0;
-	writel(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(0, &msg->body[i++]);
-	writel(0x4C000000 | oplen, &msg->body[i++]);	/* OperationList */
-	memcpy_toio(&msg->body[i], oplist, oplen);
+	msg->u.head[1] =
+	    cpu_to_le32(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid);
+	msg->body[i++] = cpu_to_le32(0x00000000);
+	msg->body[i++] = cpu_to_le32(0x4C000000 | oplen);	/* OperationList */
+	memcpy(&msg->body[i], oplist, oplen);
 	i += (oplen / 4 + (oplen % 4 ? 1 : 0));
-	writel(0xD0000000 | res.len, &msg->body[i++]);	/* ResultList */
-	writel(res.phys, &msg->body[i++]);
+	msg->body[i++] = cpu_to_le32(0xD0000000 | res.len);	/* ResultList */
+	msg->body[i++] = cpu_to_le32(res.phys);
 
-	writel(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) |
-	       SGL_OFFSET_5, &msg->u.head[0]);
+	msg->u.head[0] =
+	    cpu_to_le32(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) |
+			SGL_OFFSET_5);
 
-	rc = i2o_msg_post_wait_mem(c, m, 10, &res);
+	rc = i2o_msg_post_wait_mem(c, msg, 10, &res);
 
 	/* This only looks like a memory leak - don't "fix" it. */
 	if (rc == -ETIMEDOUT)
@@ -465,36 +464,7 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist,
 	memcpy(reslist, res.virt, res.len);
 	i2o_dma_free(dev, &res);
 
-	/* Query failed */
-	if (rc)
-		return rc;
-	/*
-	 * Calculate number of bytes of Result LIST
-	 * We need to loop through each Result BLOCK and grab the length
-	 */
-	restmp = res32 + 1;
-	len = 1;
-	for (i = 0; i < (res32[0] & 0X0000FFFF); i++) {
-		if (restmp[0] & 0x00FF0000) {	/* BlockStatus != SUCCESS */
-			printk(KERN_WARNING
-			       "%s - Error:\n  ErrorInfoSize = 0x%02x, "
-			       "BlockStatus = 0x%02x, BlockSize = 0x%04x\n",
-			       (cmd ==
-				I2O_CMD_UTIL_PARAMS_SET) ? "PARAMS_SET" :
-			       "PARAMS_GET", res32[1] >> 24,
-			       (res32[1] >> 16) & 0xFF, res32[1] & 0xFFFF);
-
-			/*
-			 *      If this is the only request,than we return an error
-			 */
-			if ((res32[0] & 0x0000FFFF) == 1) {
-				return -((res32[1] >> 16) & 0xFF);	/* -BlockStatus */
-			}
-		}
-		len += restmp[0] & 0x0000FFFF;	/* Length of res BLOCK */
-		restmp += restmp[0] & 0x0000FFFF;	/* Skip to next BLOCK */
-	}
-	return (len << 2);	/* bytes used by result list */
+	return rc;
 }
 
 /*
@@ -503,28 +473,25 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist,
 int i2o_parm_field_get(struct i2o_device *i2o_dev, int group, int field,
 		       void *buf, int buflen)
 {
-	u16 opblk[] = { 1, 0, I2O_PARAMS_FIELD_GET, group, 1, field };
+	u32 opblk[] = { cpu_to_le32(0x00000001),
+		cpu_to_le32((u16) group << 16 | I2O_PARAMS_FIELD_GET),
+		cpu_to_le32((s16) field << 16 | 0x00000001)
+	};
 	u8 *resblk;		/* 8 bytes for header */
-	int size;
-
-	if (field == -1)	/* whole group */
-		opblk[4] = -1;
+	int rc;
 
 	resblk = kmalloc(buflen + 8, GFP_KERNEL | GFP_ATOMIC);
 	if (!resblk)
 		return -ENOMEM;
 
-	size = i2o_parm_issue(i2o_dev, I2O_CMD_UTIL_PARAMS_GET, opblk,
-			      sizeof(opblk), resblk, buflen + 8);
+	rc = i2o_parm_issue(i2o_dev, I2O_CMD_UTIL_PARAMS_GET, opblk,
+			    sizeof(opblk), resblk, buflen + 8);
 
 	memcpy(buf, resblk + 8, buflen);	/* cut off header */
 
 	kfree(resblk);
 
-	if (size > buflen)
-		return buflen;
-
-	return size;
+	return rc;
 }
 
 /*
@@ -534,12 +501,12 @@ int i2o_parm_field_get(struct i2o_device *i2o_dev, int group, int field,
  *		else return specific fields
  *			ibuf contains fieldindexes
  *
- * 	if oper == I2O_PARAMS_LIST_GET, get from specific rows
- * 		if fieldcount == -1 return all fields
+ *	if oper == I2O_PARAMS_LIST_GET, get from specific rows
+ *		if fieldcount == -1 return all fields
  *			ibuf contains rowcount, keyvalues
- * 		else return specific fields
+ *		else return specific fields
  *			fieldcount is # of fieldindexes
- *  			ibuf contains fieldindexes, rowcount, keyvalues
+ *			ibuf contains fieldindexes, rowcount, keyvalues
  *
  *	You could also use directly function i2o_issue_params().
  */
diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index 0fb9c4e2ad4..64130227574 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -61,12 +61,10 @@ static int i2o_bus_match(struct device *dev, struct device_driver *drv)
 };
 
 /* I2O bus type */
-extern struct device_attribute i2o_device_attrs[];
-
 struct bus_type i2o_bus_type = {
 	.name = "i2o",
 	.match = i2o_bus_match,
-	.dev_attrs = i2o_device_attrs,
+	.dev_attrs = i2o_device_attrs
 };
 
 /**
@@ -219,14 +217,14 @@ int i2o_driver_dispatch(struct i2o_controller *c, u32 m)
 		/* cut of header from message size (in 32-bit words) */
 		size = (le32_to_cpu(msg->u.head[0]) >> 16) - 5;
 
-		evt = kmalloc(size * 4 + sizeof(*evt), GFP_ATOMIC | __GFP_ZERO);
+		evt = kzalloc(size * 4 + sizeof(*evt), GFP_ATOMIC);
 		if (!evt)
 			return -ENOMEM;
 
 		evt->size = size;
 		evt->tcntxt = le32_to_cpu(msg->u.s.tcntxt);
 		evt->event_indicator = le32_to_cpu(msg->body[0]);
-		memcpy(&evt->tcntxt, &msg->u.s.tcntxt, size * 4);
+		memcpy(&evt->data, &msg->body[1], size * 4);
 
 		list_for_each_entry_safe(dev, tmp, &c->devices, list)
 		    if (dev->lct_data.tid == tid) {
@@ -349,12 +347,10 @@ int __init i2o_driver_init(void)
 	osm_info("max drivers = %d\n", i2o_max_drivers);
 
 	i2o_drivers =
-	    kmalloc(i2o_max_drivers * sizeof(*i2o_drivers), GFP_KERNEL);
+	    kzalloc(i2o_max_drivers * sizeof(*i2o_drivers), GFP_KERNEL);
 	if (!i2o_drivers)
 		return -ENOMEM;
 
-	memset(i2o_drivers, 0, i2o_max_drivers * sizeof(*i2o_drivers));
-
 	rc = bus_register(&i2o_bus_type);
 
 	if (rc < 0)
diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index 9c339a2505b..9bb9859f6df 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -33,7 +33,7 @@
 #include <linux/workqueue.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include <linux/sched.h>   /* wait_event_interruptible_timeout() needs this */
+#include <linux/sched.h>	/* wait_event_interruptible_timeout() needs this */
 #include <asm/param.h>		/* HZ */
 #include "core.h"
 
@@ -75,11 +75,9 @@ static struct i2o_exec_wait *i2o_exec_wait_alloc(void)
 {
 	struct i2o_exec_wait *wait;
 
-	wait = kmalloc(sizeof(*wait), GFP_KERNEL);
+	wait = kzalloc(sizeof(*wait), GFP_KERNEL);
 	if (!wait)
-		return ERR_PTR(-ENOMEM);
-
-	memset(wait, 0, sizeof(*wait));
+		return NULL;
 
 	INIT_LIST_HEAD(&wait->list);
 
@@ -114,13 +112,12 @@ static void i2o_exec_wait_free(struct i2o_exec_wait *wait)
  *	Returns 0 on success, negative error code on timeout or positive error
  *	code from reply.
  */
-int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long
-			  timeout, struct i2o_dma *dma)
+int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
+			  unsigned long timeout, struct i2o_dma *dma)
 {
 	DECLARE_WAIT_QUEUE_HEAD(wq);
 	struct i2o_exec_wait *wait;
 	static u32 tcntxt = 0x80000000;
-	struct i2o_message __iomem *msg = i2o_msg_in_to_virt(c, m);
 	int rc = 0;
 
 	wait = i2o_exec_wait_alloc();
@@ -138,15 +135,15 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long
 	 * We will only use transaction contexts >= 0x80000000 for POST WAIT,
 	 * so we could find a POST WAIT reply easier in the reply handler.
 	 */
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
 	wait->tcntxt = tcntxt++;
-	writel(wait->tcntxt, &msg->u.s.tcntxt);
+	msg->u.s.tcntxt = cpu_to_le32(wait->tcntxt);
 
 	/*
 	 * Post the message to the controller. At some point later it will
 	 * return. If we time out before it returns then complete will be zero.
 	 */
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	if (!wait->complete) {
 		wait->wq = &wq;
@@ -266,13 +263,14 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
  *
  *	Returns number of bytes printed into buffer.
  */
-static ssize_t i2o_exec_show_vendor_id(struct device *d, struct device_attribute *attr, char *buf)
+static ssize_t i2o_exec_show_vendor_id(struct device *d,
+				       struct device_attribute *attr, char *buf)
 {
 	struct i2o_device *dev = to_i2o_device(d);
 	u16 id;
 
-	if (i2o_parm_field_get(dev, 0x0000, 0, &id, 2)) {
-		sprintf(buf, "0x%04x", id);
+	if (!i2o_parm_field_get(dev, 0x0000, 0, &id, 2)) {
+		sprintf(buf, "0x%04x", le16_to_cpu(id));
 		return strlen(buf) + 1;
 	}
 
@@ -286,13 +284,15 @@ static ssize_t i2o_exec_show_vendor_id(struct device *d, struct device_attribute
  *
  *	Returns number of bytes printed into buffer.
  */
-static ssize_t i2o_exec_show_product_id(struct device *d, struct device_attribute *attr, char *buf)
+static ssize_t i2o_exec_show_product_id(struct device *d,
+					struct device_attribute *attr,
+					char *buf)
 {
 	struct i2o_device *dev = to_i2o_device(d);
 	u16 id;
 
-	if (i2o_parm_field_get(dev, 0x0000, 1, &id, 2)) {
-		sprintf(buf, "0x%04x", id);
+	if (!i2o_parm_field_get(dev, 0x0000, 1, &id, 2)) {
+		sprintf(buf, "0x%04x", le16_to_cpu(id));
 		return strlen(buf) + 1;
 	}
 
@@ -362,7 +362,9 @@ static void i2o_exec_lct_modified(struct i2o_controller *c)
 	if (i2o_device_parse_lct(c) != -EAGAIN)
 		change_ind = c->lct->change_ind + 1;
 
+#ifdef CONFIG_I2O_LCT_NOTIFY_ON_CHANGES
 	i2o_exec_lct_notify(c, change_ind);
+#endif
 };
 
 /**
@@ -385,23 +387,22 @@ static int i2o_exec_reply(struct i2o_controller *c, u32 m,
 	u32 context;
 
 	if (le32_to_cpu(msg->u.head[0]) & MSG_FAIL) {
+		struct i2o_message __iomem *pmsg;
+		u32 pm;
+
 		/*
 		 * If Fail bit is set we must take the transaction context of
 		 * the preserved message to find the right request again.
 		 */
-		struct i2o_message __iomem *pmsg;
-		u32 pm;
 
 		pm = le32_to_cpu(msg->body[3]);
-
 		pmsg = i2o_msg_in_to_virt(c, pm);
+		context = readl(&pmsg->u.s.tcntxt);
 
 		i2o_report_status(KERN_INFO, "i2o_core", msg);
 
-		context = readl(&pmsg->u.s.tcntxt);
-
 		/* Release the preserved msg */
-		i2o_msg_nop(c, pm);
+		i2o_msg_nop_mfa(c, pm);
 	} else
 		context = le32_to_cpu(msg->u.s.tcntxt);
 
@@ -462,25 +463,26 @@ static void i2o_exec_event(struct i2o_event *evt)
  */
 int i2o_exec_lct_get(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	int i = 0;
 	int rc = -EAGAIN;
 
 	for (i = 1; i <= I2O_LCT_GET_TRIES; i++) {
-		m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-		if (m == I2O_QUEUE_EMPTY)
-			return -ETIMEDOUT;
-
-		writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]);
-		writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID,
-		       &msg->u.head[1]);
-		writel(0xffffffff, &msg->body[0]);
-		writel(0x00000000, &msg->body[1]);
-		writel(0xd0000000 | c->dlct.len, &msg->body[2]);
-		writel(c->dlct.phys, &msg->body[3]);
-
-		rc = i2o_msg_post_wait(c, m, I2O_TIMEOUT_LCT_GET);
+		msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+		if (IS_ERR(msg))
+			return PTR_ERR(msg);
+
+		msg->u.head[0] =
+		    cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6);
+		msg->u.head[1] =
+		    cpu_to_le32(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 |
+				ADAPTER_TID);
+		msg->body[0] = cpu_to_le32(0xffffffff);
+		msg->body[1] = cpu_to_le32(0x00000000);
+		msg->body[2] = cpu_to_le32(0xd0000000 | c->dlct.len);
+		msg->body[3] = cpu_to_le32(c->dlct.phys);
+
+		rc = i2o_msg_post_wait(c, msg, I2O_TIMEOUT_LCT_GET);
 		if (rc < 0)
 			break;
 
@@ -506,29 +508,29 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind)
 {
 	i2o_status_block *sb = c->status_block.virt;
 	struct device *dev;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
 	dev = &c->pdev->dev;
 
-	if (i2o_dma_realloc(dev, &c->dlct, sb->expected_lct_size, GFP_KERNEL))
+	if (i2o_dma_realloc
+	    (dev, &c->dlct, le32_to_cpu(sb->expected_lct_size), GFP_KERNEL))
 		return -ENOMEM;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
-
-	writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]);
-	writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
-	writel(0, &msg->u.s.tcntxt);	/* FIXME */
-	writel(0xffffffff, &msg->body[0]);
-	writel(change_ind, &msg->body[1]);
-	writel(0xd0000000 | c->dlct.len, &msg->body[2]);
-	writel(c->dlct.phys, &msg->body[3]);
-
-	i2o_msg_post(c, m);
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
+
+	msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6);
+	msg->u.head[1] = cpu_to_le32(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 |
+				     ADAPTER_TID);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(0x00000000);
+	msg->body[0] = cpu_to_le32(0xffffffff);
+	msg->body[1] = cpu_to_le32(change_ind);
+	msg->body[2] = cpu_to_le32(0xd0000000 | c->dlct.len);
+	msg->body[3] = cpu_to_le32(c->dlct.phys);
+
+	i2o_msg_post(c, msg);
 
 	return 0;
 };
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 4f522527b7e..5b1febed313 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -59,10 +59,12 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 
+#include <scsi/scsi.h>
+
 #include "i2o_block.h"
 
 #define OSM_NAME	"block-osm"
-#define OSM_VERSION	"1.287"
+#define OSM_VERSION	"1.325"
 #define OSM_DESCRIPTION	"I2O Block Device OSM"
 
 static struct i2o_driver i2o_block_driver;
@@ -130,20 +132,20 @@ static int i2o_block_remove(struct device *dev)
  */
 static int i2o_block_device_flush(struct i2o_device *dev)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(60 << 16, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(60 << 16);
 	osm_debug("Flushing...\n");
 
-	return i2o_msg_post_wait(dev->iop, m, 60);
+	return i2o_msg_post_wait(dev->iop, msg, 60);
 };
 
 /**
@@ -181,21 +183,21 @@ static int i2o_block_issue_flush(request_queue_t * queue, struct gendisk *disk,
  */
 static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
-
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
-
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(-1, &msg->body[0]);
-	writel(0, &msg->body[1]);
+	struct i2o_message *msg;
+
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
+
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(-1);
+	msg->body[1] = cpu_to_le32(0x00000000);
 	osm_debug("Mounting...\n");
 
-	return i2o_msg_post_wait(dev->iop, m, 2);
+	return i2o_msg_post_wait(dev->iop, msg, 2);
 };
 
 /**
@@ -210,20 +212,20 @@ static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id)
  */
 static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg) == I2O_QUEUE_EMPTY)
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(-1, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(-1);
 	osm_debug("Locking...\n");
 
-	return i2o_msg_post_wait(dev->iop, m, 2);
+	return i2o_msg_post_wait(dev->iop, msg, 2);
 };
 
 /**
@@ -238,20 +240,20 @@ static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id)
  */
 static int i2o_block_device_unlock(struct i2o_device *dev, u32 media_id)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(media_id, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(media_id);
 	osm_debug("Unlocking...\n");
 
-	return i2o_msg_post_wait(dev->iop, m, 2);
+	return i2o_msg_post_wait(dev->iop, msg, 2);
 };
 
 /**
@@ -267,21 +269,21 @@ static int i2o_block_device_power(struct i2o_block_device *dev, u8 op)
 {
 	struct i2o_device *i2o_dev = dev->i2o_dev;
 	struct i2o_controller *c = i2o_dev->iop;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	int rc;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev->lct_data.
-	       tid, &msg->u.head[1]);
-	writel(op << 24, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(op << 24);
 	osm_debug("Power...\n");
 
-	rc = i2o_msg_post_wait(c, m, 60);
+	rc = i2o_msg_post_wait(c, msg, 60);
 	if (!rc)
 		dev->power = op;
 
@@ -331,7 +333,7 @@ static inline void i2o_block_request_free(struct i2o_block_request *ireq)
  */
 static inline int i2o_block_sglist_alloc(struct i2o_controller *c,
 					 struct i2o_block_request *ireq,
-					 u32 __iomem ** mptr)
+					 u32 ** mptr)
 {
 	int nents;
 	enum dma_data_direction direction;
@@ -745,10 +747,9 @@ static int i2o_block_transfer(struct request *req)
 	struct i2o_block_device *dev = req->rq_disk->private_data;
 	struct i2o_controller *c;
 	int tid = dev->i2o_dev->lct_data.tid;
-	struct i2o_message __iomem *msg;
-	u32 __iomem *mptr;
+	struct i2o_message *msg;
+	u32 *mptr;
 	struct i2o_block_request *ireq = req->special;
-	u32 m;
 	u32 tcntxt;
 	u32 sgl_offset = SGL_OFFSET_8;
 	u32 ctl_flags = 0x00000000;
@@ -763,9 +764,9 @@ static int i2o_block_transfer(struct request *req)
 
 	c = dev->i2o_dev->iop;
 
-	m = i2o_msg_get(c, &msg);
-	if (m == I2O_QUEUE_EMPTY) {
-		rc = -EBUSY;
+	msg = i2o_msg_get(c);
+	if (IS_ERR(msg)) {
+		rc = PTR_ERR(msg);
 		goto exit;
 	}
 
@@ -775,8 +776,8 @@ static int i2o_block_transfer(struct request *req)
 		goto nop_msg;
 	}
 
-	writel(i2o_block_driver.context, &msg->u.s.icntxt);
-	writel(tcntxt, &msg->u.s.tcntxt);
+	msg->u.s.icntxt = cpu_to_le32(i2o_block_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(tcntxt);
 
 	mptr = &msg->body[0];
 
@@ -834,11 +835,11 @@ static int i2o_block_transfer(struct request *req)
 
 		sgl_offset = SGL_OFFSET_12;
 
-		writel(I2O_CMD_PRIVATE << 24 | HOST_TID << 12 | tid,
-		       &msg->u.head[1]);
+		msg->u.head[1] =
+		    cpu_to_le32(I2O_CMD_PRIVATE << 24 | HOST_TID << 12 | tid);
 
-		writel(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC, mptr++);
-		writel(tid, mptr++);
+		*mptr++ = cpu_to_le32(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC);
+		*mptr++ = cpu_to_le32(tid);
 
 		/*
 		 * ENABLE_DISCONNECT
@@ -846,29 +847,31 @@ static int i2o_block_transfer(struct request *req)
 		 * RETURN_SENSE_DATA_IN_REPLY_MESSAGE_FRAME
 		 */
 		if (rq_data_dir(req) == READ) {
-			cmd[0] = 0x28;
+			cmd[0] = READ_10;
 			scsi_flags = 0x60a0000a;
 		} else {
-			cmd[0] = 0x2A;
+			cmd[0] = WRITE_10;
 			scsi_flags = 0xa0a0000a;
 		}
 
-		writel(scsi_flags, mptr++);
+		*mptr++ = cpu_to_le32(scsi_flags);
 
 		*((u32 *) & cmd[2]) = cpu_to_be32(req->sector * hwsec);
 		*((u16 *) & cmd[7]) = cpu_to_be16(req->nr_sectors * hwsec);
 
-		memcpy_toio(mptr, cmd, 10);
+		memcpy(mptr, cmd, 10);
 		mptr += 4;
-		writel(req->nr_sectors << KERNEL_SECTOR_SHIFT, mptr++);
+		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
 	} else
 #endif
 	{
-		writel(cmd | HOST_TID << 12 | tid, &msg->u.head[1]);
-		writel(ctl_flags, mptr++);
-		writel(req->nr_sectors << KERNEL_SECTOR_SHIFT, mptr++);
-		writel((u32) (req->sector << KERNEL_SECTOR_SHIFT), mptr++);
-		writel(req->sector >> (32 - KERNEL_SECTOR_SHIFT), mptr++);
+		msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid);
+		*mptr++ = cpu_to_le32(ctl_flags);
+		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
+		*mptr++ =
+		    cpu_to_le32((u32) (req->sector << KERNEL_SECTOR_SHIFT));
+		*mptr++ =
+		    cpu_to_le32(req->sector >> (32 - KERNEL_SECTOR_SHIFT));
 	}
 
 	if (!i2o_block_sglist_alloc(c, ireq, &mptr)) {
@@ -876,13 +879,13 @@ static int i2o_block_transfer(struct request *req)
 		goto context_remove;
 	}
 
-	writel(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) |
-	       sgl_offset, &msg->u.head[0]);
+	msg->u.head[0] =
+	    cpu_to_le32(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset);
 
 	list_add_tail(&ireq->queue, &dev->open_queue);
 	dev->open_queue_depth++;
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	return 0;
 
@@ -890,7 +893,7 @@ static int i2o_block_transfer(struct request *req)
 	i2o_cntxt_list_remove(c, req);
 
       nop_msg:
-	i2o_msg_nop(c, m);
+	i2o_msg_nop(c, msg);
 
       exit:
 	return rc;
@@ -978,13 +981,12 @@ static struct i2o_block_device *i2o_block_device_alloc(void)
 	struct request_queue *queue;
 	int rc;
 
-	dev = kmalloc(sizeof(*dev), GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev) {
 		osm_err("Insufficient memory to allocate I2O Block disk.\n");
 		rc = -ENOMEM;
 		goto exit;
 	}
-	memset(dev, 0, sizeof(*dev));
 
 	INIT_LIST_HEAD(&dev->open_queue);
 	spin_lock_init(&dev->lock);
@@ -1049,8 +1051,8 @@ static int i2o_block_probe(struct device *dev)
 	int rc;
 	u64 size;
 	u32 blocksize;
-	u32 flags, status;
 	u16 body_size = 4;
+	u16 power;
 	unsigned short max_sectors;
 
 #ifdef CONFIG_I2O_EXT_ADAPTEC
@@ -1108,22 +1110,20 @@ static int i2o_block_probe(struct device *dev)
 	 *      Ask for the current media data. If that isn't supported
 	 *      then we ask for the device capacity data
 	 */
-	if (i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) ||
-	    i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) {
-		blk_queue_hardsect_size(queue, blocksize);
+	if (!i2o_parm_field_get(i2o_dev, 0x0004, 1, &blocksize, 4) ||
+	    !i2o_parm_field_get(i2o_dev, 0x0000, 3, &blocksize, 4)) {
+		blk_queue_hardsect_size(queue, le32_to_cpu(blocksize));
 	} else
 		osm_warn("unable to get blocksize of %s\n", gd->disk_name);
 
-	if (i2o_parm_field_get(i2o_dev, 0x0004, 0, &size, 8) ||
-	    i2o_parm_field_get(i2o_dev, 0x0000, 4, &size, 8)) {
-		set_capacity(gd, size >> KERNEL_SECTOR_SHIFT);
+	if (!i2o_parm_field_get(i2o_dev, 0x0004, 0, &size, 8) ||
+	    !i2o_parm_field_get(i2o_dev, 0x0000, 4, &size, 8)) {
+		set_capacity(gd, le64_to_cpu(size) >> KERNEL_SECTOR_SHIFT);
 	} else
 		osm_warn("could not get size of %s\n", gd->disk_name);
 
-	if (!i2o_parm_field_get(i2o_dev, 0x0000, 2, &i2o_blk_dev->power, 2))
-		i2o_blk_dev->power = 0;
-	i2o_parm_field_get(i2o_dev, 0x0000, 5, &flags, 4);
-	i2o_parm_field_get(i2o_dev, 0x0000, 6, &status, 4);
+	if (!i2o_parm_field_get(i2o_dev, 0x0000, 2, &power, 2))
+		i2o_blk_dev->power = power;
 
 	i2o_event_register(i2o_dev, &i2o_block_driver, 0, 0xffffffff);
 
diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c
index 3c3a7abebb1..89daf67b764 100644
--- a/drivers/message/i2o/i2o_config.c
+++ b/drivers/message/i2o/i2o_config.c
@@ -36,12 +36,12 @@
 
 #include <asm/uaccess.h>
 
-#include "core.h"
-
 #define SG_TABLESIZE		30
 
-static int i2o_cfg_ioctl(struct inode *inode, struct file *fp, unsigned int cmd,
-			 unsigned long arg);
+extern int i2o_parm_issue(struct i2o_device *, int, void *, int, void *, int);
+
+static int i2o_cfg_ioctl(struct inode *, struct file *, unsigned int,
+			 unsigned long);
 
 static spinlock_t i2o_config_lock;
 
@@ -230,8 +230,7 @@ static int i2o_cfg_swdl(unsigned long arg)
 	struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
 	unsigned char maxfrag = 0, curfrag = 1;
 	struct i2o_dma buffer;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned int status = 0, swlen = 0, fragsize = 8192;
 	struct i2o_controller *c;
 
@@ -257,31 +256,34 @@ static int i2o_cfg_swdl(unsigned long arg)
 	if (!c)
 		return -ENXIO;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) {
-		i2o_msg_nop(c, m);
+		i2o_msg_nop(c, msg);
 		return -ENOMEM;
 	}
 
 	__copy_from_user(buffer.virt, kxfer.buf, fragsize);
 
-	writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]);
-	writel(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
-	writel((((u32) kxfer.flags) << 24) | (((u32) kxfer.sw_type) << 16) |
-	       (((u32) maxfrag) << 8) | (((u32) curfrag)), &msg->body[0]);
-	writel(swlen, &msg->body[1]);
-	writel(kxfer.sw_id, &msg->body[2]);
-	writel(0xD0000000 | fragsize, &msg->body[3]);
-	writel(buffer.phys, &msg->body[4]);
+	msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(0);
+	msg->body[0] =
+	    cpu_to_le32((((u32) kxfer.flags) << 24) | (((u32) kxfer.
+							sw_type) << 16) |
+			(((u32) maxfrag) << 8) | (((u32) curfrag)));
+	msg->body[1] = cpu_to_le32(swlen);
+	msg->body[2] = cpu_to_le32(kxfer.sw_id);
+	msg->body[3] = cpu_to_le32(0xD0000000 | fragsize);
+	msg->body[4] = cpu_to_le32(buffer.phys);
 
 	osm_debug("swdl frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize);
-	status = i2o_msg_post_wait_mem(c, m, 60, &buffer);
+	status = i2o_msg_post_wait_mem(c, msg, 60, &buffer);
 
 	if (status != -ETIMEDOUT)
 		i2o_dma_free(&c->pdev->dev, &buffer);
@@ -302,8 +304,7 @@ static int i2o_cfg_swul(unsigned long arg)
 	struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
 	unsigned char maxfrag = 0, curfrag = 1;
 	struct i2o_dma buffer;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned int status = 0, swlen = 0, fragsize = 8192;
 	struct i2o_controller *c;
 	int ret = 0;
@@ -330,30 +331,30 @@ static int i2o_cfg_swul(unsigned long arg)
 	if (!c)
 		return -ENXIO;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) {
-		i2o_msg_nop(c, m);
+		i2o_msg_nop(c, msg);
 		return -ENOMEM;
 	}
 
-	writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]);
-	writel(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
-	writel((u32) kxfer.flags << 24 | (u32) kxfer.
-	       sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag,
-	       &msg->body[0]);
-	writel(swlen, &msg->body[1]);
-	writel(kxfer.sw_id, &msg->body[2]);
-	writel(0xD0000000 | fragsize, &msg->body[3]);
-	writel(buffer.phys, &msg->body[4]);
+	msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(0);
+	msg->body[0] =
+	    cpu_to_le32((u32) kxfer.flags << 24 | (u32) kxfer.
+			sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag);
+	msg->body[1] = cpu_to_le32(swlen);
+	msg->body[2] = cpu_to_le32(kxfer.sw_id);
+	msg->body[3] = cpu_to_le32(0xD0000000 | fragsize);
+	msg->body[4] = cpu_to_le32(buffer.phys);
 
 	osm_debug("swul frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize);
-	status = i2o_msg_post_wait_mem(c, m, 60, &buffer);
+	status = i2o_msg_post_wait_mem(c, msg, 60, &buffer);
 
 	if (status != I2O_POST_WAIT_OK) {
 		if (status != -ETIMEDOUT)
@@ -380,8 +381,7 @@ static int i2o_cfg_swdel(unsigned long arg)
 	struct i2o_controller *c;
 	struct i2o_sw_xfer kxfer;
 	struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned int swlen;
 	int token;
 
@@ -395,21 +395,21 @@ static int i2o_cfg_swdel(unsigned long arg)
 	if (!c)
 		return -ENXIO;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
-	writel((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16,
-	       &msg->body[0]);
-	writel(swlen, &msg->body[1]);
-	writel(kxfer.sw_id, &msg->body[2]);
+	msg->u.head[0] = cpu_to_le32(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(0);
+	msg->body[0] =
+	    cpu_to_le32((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16);
+	msg->body[1] = cpu_to_le32(swlen);
+	msg->body[2] = cpu_to_le32(kxfer.sw_id);
 
-	token = i2o_msg_post_wait(c, m, 10);
+	token = i2o_msg_post_wait(c, msg, 10);
 
 	if (token != I2O_POST_WAIT_OK) {
 		osm_info("swdel failed, DetailedStatus = %d\n", token);
@@ -423,25 +423,24 @@ static int i2o_cfg_validate(unsigned long arg)
 {
 	int token;
 	int iop = (int)arg;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	struct i2o_controller *c;
 
 	c = i2o_find_iop(iop);
 	if (!c)
 		return -ENXIO;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(0);
 
-	token = i2o_msg_post_wait(c, m, 10);
+	token = i2o_msg_post_wait(c, msg, 10);
 
 	if (token != I2O_POST_WAIT_OK) {
 		osm_info("Can't validate configuration, ErrorStatus = %d\n",
@@ -454,8 +453,7 @@ static int i2o_cfg_validate(unsigned long arg)
 
 static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	struct i2o_evt_id __user *pdesc = (struct i2o_evt_id __user *)arg;
 	struct i2o_evt_id kdesc;
 	struct i2o_controller *c;
@@ -474,18 +472,19 @@ static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp)
 	if (!d)
 		return -ENODEV;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | kdesc.tid,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(i2o_cntxt_list_add(c, fp->private_data), &msg->u.head[3]);
-	writel(kdesc.evt_mask, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 |
+			kdesc.tid);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(i2o_cntxt_list_add(c, fp->private_data));
+	msg->body[0] = cpu_to_le32(kdesc.evt_mask);
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	return 0;
 }
@@ -537,7 +536,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 	u32 sg_index = 0;
 	i2o_status_block *sb;
 	struct i2o_message *msg;
-	u32 m;
 	unsigned int iop;
 
 	cmd = (struct i2o_cmd_passthru32 __user *)arg;
@@ -553,7 +551,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 		return -ENXIO;
 	}
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
 
 	sb = c->status_block.virt;
 
@@ -585,19 +583,15 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 	reply_size >>= 16;
 	reply_size <<= 2;
 
-	reply = kmalloc(reply_size, GFP_KERNEL);
+	reply = kzalloc(reply_size, GFP_KERNEL);
 	if (!reply) {
 		printk(KERN_WARNING "%s: Could not allocate reply buffer\n",
 		       c->name);
 		return -ENOMEM;
 	}
-	memset(reply, 0, reply_size);
 
 	sg_offset = (msg->u.head[0] >> 4) & 0x0f;
 
-	writel(i2o_config_driver.context, &msg->u.s.icntxt);
-	writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt);
-
 	memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
 	if (sg_offset) {
 		struct sg_simple_element *sg;
@@ -631,7 +625,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 				goto cleanup;
 			}
 			sg_size = sg[i].flag_count & 0xffffff;
-			p = &(sg_list[sg_index++]);
+			p = &(sg_list[sg_index]);
 			/* Allocate memory for the transfer */
 			if (i2o_dma_alloc
 			    (&c->pdev->dev, p, sg_size,
@@ -642,6 +636,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 				rcode = -ENOMEM;
 				goto sg_list_cleanup;
 			}
+			sg_index++;
 			/* Copy in the user's SG buffer if necessary */
 			if (sg[i].
 			    flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR */ ) {
@@ -662,9 +657,11 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 		}
 	}
 
-	rcode = i2o_msg_post_wait(c, m, 60);
-	if (rcode)
+	rcode = i2o_msg_post_wait(c, msg, 60);
+	if (rcode) {
+		reply[4] = ((u32) rcode) << 24;
 		goto sg_list_cleanup;
+	}
 
 	if (sg_offset) {
 		u32 msg[I2O_OUTBOUND_MSG_FRAME_SIZE];
@@ -714,6 +711,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 		}
 	}
 
+      sg_list_cleanup:
 	/* Copy back the reply to user space */
 	if (reply_size) {
 		// we wrote our own values for context - now restore the user supplied ones
@@ -731,7 +729,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 		}
 	}
 
-      sg_list_cleanup:
 	for (i = 0; i < sg_index; i++)
 		i2o_dma_free(&c->pdev->dev, &sg_list[i]);
 
@@ -780,8 +777,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 	u32 i = 0;
 	void *p = NULL;
 	i2o_status_block *sb;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned int iop;
 
 	if (get_user(iop, &cmd->iop) || get_user(user_msg, &cmd->msg))
@@ -793,7 +789,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 		return -ENXIO;
 	}
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
 
 	sb = c->status_block.virt;
 
@@ -820,19 +816,15 @@ static int i2o_cfg_passthru(unsigned long arg)
 	reply_size >>= 16;
 	reply_size <<= 2;
 
-	reply = kmalloc(reply_size, GFP_KERNEL);
+	reply = kzalloc(reply_size, GFP_KERNEL);
 	if (!reply) {
 		printk(KERN_WARNING "%s: Could not allocate reply buffer\n",
 		       c->name);
 		return -ENOMEM;
 	}
-	memset(reply, 0, reply_size);
 
 	sg_offset = (msg->u.head[0] >> 4) & 0x0f;
 
-	writel(i2o_config_driver.context, &msg->u.s.icntxt);
-	writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt);
-
 	memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
 	if (sg_offset) {
 		struct sg_simple_element *sg;
@@ -894,9 +886,11 @@ static int i2o_cfg_passthru(unsigned long arg)
 		}
 	}
 
-	rcode = i2o_msg_post_wait(c, m, 60);
-	if (rcode)
+	rcode = i2o_msg_post_wait(c, msg, 60);
+	if (rcode) {
+		reply[4] = ((u32) rcode) << 24;
 		goto sg_list_cleanup;
+	}
 
 	if (sg_offset) {
 		u32 msg[128];
@@ -946,6 +940,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 		}
 	}
 
+      sg_list_cleanup:
 	/* Copy back the reply to user space */
 	if (reply_size) {
 		// we wrote our own values for context - now restore the user supplied ones
@@ -962,7 +957,6 @@ static int i2o_cfg_passthru(unsigned long arg)
 		}
 	}
 
-      sg_list_cleanup:
 	for (i = 0; i < sg_index; i++)
 		kfree(sg_list[i]);
 
diff --git a/drivers/message/i2o/i2o_lan.h b/drivers/message/i2o/i2o_lan.h
index 561d63304d7..6502b817df5 100644
--- a/drivers/message/i2o/i2o_lan.h
+++ b/drivers/message/i2o/i2o_lan.h
@@ -103,14 +103,14 @@
 #define I2O_LAN_DSC_SUSPENDED			0x11
 
 struct i2o_packet_info {
-	u32 offset : 24;
-	u32 flags  : 8;
-	u32 len    : 24;
-	u32 status : 8;
+	u32 offset:24;
+	u32 flags:8;
+	u32 len:24;
+	u32 status:8;
 };
 
 struct i2o_bucket_descriptor {
-	u32 context; 			/* FIXME: 64bit support */
+	u32 context;		/* FIXME: 64bit support */
 	struct i2o_packet_info packet_info[1];
 };
 
@@ -127,14 +127,14 @@ struct i2o_lan_local {
 	u8 unit;
 	struct i2o_device *i2o_dev;
 
-	struct fddi_statistics stats;   /* see also struct net_device_stats */
-	unsigned short (*type_trans)(struct sk_buff *, struct net_device *);
-	atomic_t buckets_out;  		/* nbr of unused buckets on DDM */
-	atomic_t tx_out;		/* outstanding TXes */
-	u8 tx_count;  			/* packets in one TX message frame */
-	u16 tx_max_out;	   		/* DDM's Tx queue len */
-	u8 sgl_max;			/* max SGLs in one message frame */
-	u32 m;				/* IOP address of the batch msg frame */
+	struct fddi_statistics stats;	/* see also struct net_device_stats */
+	unsigned short (*type_trans) (struct sk_buff *, struct net_device *);
+	atomic_t buckets_out;	/* nbr of unused buckets on DDM */
+	atomic_t tx_out;	/* outstanding TXes */
+	u8 tx_count;		/* packets in one TX message frame */
+	u16 tx_max_out;		/* DDM's Tx queue len */
+	u8 sgl_max;		/* max SGLs in one message frame */
+	u32 m;			/* IOP address of the batch msg frame */
 
 	struct work_struct i2o_batch_send_task;
 	int send_active;
@@ -144,16 +144,16 @@ struct i2o_lan_local {
 
 	spinlock_t tx_lock;
 
-	u32 max_size_mc_table;		/* max number of multicast addresses */
+	u32 max_size_mc_table;	/* max number of multicast addresses */
 
 	/* LAN OSM configurable parameters are here: */
 
-	u16 max_buckets_out;		/* max nbr of buckets to send to DDM */
-	u16 bucket_thresh;		/* send more when this many used */
+	u16 max_buckets_out;	/* max nbr of buckets to send to DDM */
+	u16 bucket_thresh;	/* send more when this many used */
 	u16 rx_copybreak;
 
-	u8  tx_batch_mode;		/* Set when using batch mode sends */
-	u32 i2o_event_mask;		/* To turn on interesting event flags */
+	u8 tx_batch_mode;	/* Set when using batch mode sends */
+	u32 i2o_event_mask;	/* To turn on interesting event flags */
 };
 
-#endif /* _I2O_LAN_H */
+#endif				/* _I2O_LAN_H */
diff --git a/drivers/message/i2o/i2o_proc.c b/drivers/message/i2o/i2o_proc.c
index d559a175836..2a0c42b8cda 100644
--- a/drivers/message/i2o/i2o_proc.c
+++ b/drivers/message/i2o/i2o_proc.c
@@ -28,7 +28,7 @@
  */
 
 #define OSM_NAME	"proc-osm"
-#define OSM_VERSION	"1.145"
+#define OSM_VERSION	"1.316"
 #define OSM_DESCRIPTION	"I2O ProcFS OSM"
 
 #define I2O_MAX_MODULES 4
diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c
index 9f1744c3933..f9e5a23697a 100644
--- a/drivers/message/i2o/i2o_scsi.c
+++ b/drivers/message/i2o/i2o_scsi.c
@@ -70,7 +70,7 @@
 #include <scsi/sg_request.h>
 
 #define OSM_NAME	"scsi-osm"
-#define OSM_VERSION	"1.282"
+#define OSM_VERSION	"1.316"
 #define OSM_DESCRIPTION	"I2O SCSI Peripheral OSM"
 
 static struct i2o_driver i2o_scsi_driver;
@@ -113,7 +113,7 @@ static struct i2o_scsi_host *i2o_scsi_host_alloc(struct i2o_controller *c)
 
 	list_for_each_entry(i2o_dev, &c->devices, list)
 	    if (i2o_dev->lct_data.class_id == I2O_CLASS_BUS_ADAPTER) {
-		if (i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1)
+		if (!i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1)
 		    && (type == 0x01))	/* SCSI bus */
 			max_channel++;
 	}
@@ -146,7 +146,7 @@ static struct i2o_scsi_host *i2o_scsi_host_alloc(struct i2o_controller *c)
 	i = 0;
 	list_for_each_entry(i2o_dev, &c->devices, list)
 	    if (i2o_dev->lct_data.class_id == I2O_CLASS_BUS_ADAPTER) {
-		if (i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1)
+		if (!i2o_parm_field_get(i2o_dev, 0x0000, 0, &type, 1)
 		    && (type == 0x01))	/* only SCSI bus */
 			i2o_shost->channel[i++] = i2o_dev;
 
@@ -238,13 +238,15 @@ static int i2o_scsi_probe(struct device *dev)
 			u8 type;
 			struct i2o_device *d = i2o_shost->channel[0];
 
-			if (i2o_parm_field_get(d, 0x0000, 0, &type, 1)
+			if (!i2o_parm_field_get(d, 0x0000, 0, &type, 1)
 			    && (type == 0x01))	/* SCSI bus */
-				if (i2o_parm_field_get(d, 0x0200, 4, &id, 4)) {
+				if (!i2o_parm_field_get(d, 0x0200, 4, &id, 4)) {
 					channel = 0;
 					if (i2o_dev->lct_data.class_id ==
 					    I2O_CLASS_RANDOM_BLOCK_STORAGE)
-						lun = i2o_shost->lun++;
+						lun =
+						    cpu_to_le64(i2o_shost->
+								lun++);
 					else
 						lun = 0;
 				}
@@ -253,10 +255,10 @@ static int i2o_scsi_probe(struct device *dev)
 		break;
 
 	case I2O_CLASS_SCSI_PERIPHERAL:
-		if (i2o_parm_field_get(i2o_dev, 0x0000, 3, &id, 4) < 0)
+		if (i2o_parm_field_get(i2o_dev, 0x0000, 3, &id, 4))
 			return -EFAULT;
 
-		if (i2o_parm_field_get(i2o_dev, 0x0000, 4, &lun, 8) < 0)
+		if (i2o_parm_field_get(i2o_dev, 0x0000, 4, &lun, 8))
 			return -EFAULT;
 
 		parent = i2o_iop_find_device(c, i2o_dev->lct_data.parent_tid);
@@ -281,20 +283,22 @@ static int i2o_scsi_probe(struct device *dev)
 		return -EFAULT;
 	}
 
-	if (id >= scsi_host->max_id) {
-		osm_warn("SCSI device id (%d) >= max_id of I2O host (%d)", id,
-			 scsi_host->max_id);
+	if (le32_to_cpu(id) >= scsi_host->max_id) {
+		osm_warn("SCSI device id (%d) >= max_id of I2O host (%d)",
+			 le32_to_cpu(id), scsi_host->max_id);
 		return -EFAULT;
 	}
 
-	if (lun >= scsi_host->max_lun) {
-		osm_warn("SCSI device id (%d) >= max_lun of I2O host (%d)",
-			 (unsigned int)lun, scsi_host->max_lun);
+	if (le64_to_cpu(lun) >= scsi_host->max_lun) {
+		osm_warn("SCSI device lun (%lu) >= max_lun of I2O host (%d)",
+			 (long unsigned int)le64_to_cpu(lun),
+			 scsi_host->max_lun);
 		return -EFAULT;
 	}
 
 	scsi_dev =
-	    __scsi_add_device(i2o_shost->scsi_host, channel, id, lun, i2o_dev);
+	    __scsi_add_device(i2o_shost->scsi_host, channel, le32_to_cpu(id),
+			      le64_to_cpu(lun), i2o_dev);
 
 	if (IS_ERR(scsi_dev)) {
 		osm_warn("can not add SCSI device %03x\n",
@@ -305,8 +309,9 @@ static int i2o_scsi_probe(struct device *dev)
 	sysfs_create_link(&i2o_dev->device.kobj, &scsi_dev->sdev_gendev.kobj,
 			  "scsi");
 
-	osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %d\n",
-		 i2o_dev->lct_data.tid, channel, id, (unsigned int)lun);
+	osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %ld\n",
+		 i2o_dev->lct_data.tid, channel, le32_to_cpu(id),
+		 (long unsigned int)le64_to_cpu(lun));
 
 	return 0;
 };
@@ -510,8 +515,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	struct i2o_controller *c;
 	struct i2o_device *i2o_dev;
 	int tid;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	/*
 	 * ENABLE_DISCONNECT
 	 * SIMPLE_TAG
@@ -519,7 +523,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	 */
 	u32 scsi_flags = 0x20a00000;
 	u32 sgl_offset;
-	u32 __iomem *mptr;
+	u32 *mptr;
 	u32 cmd = I2O_CMD_SCSI_EXEC << 24;
 	int rc = 0;
 
@@ -576,8 +580,8 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	 *      throw it back to the scsi layer
 	 */
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY) {
+	msg = i2o_msg_get(c);
+	if (IS_ERR(msg)) {
 		rc = SCSI_MLQUEUE_HOST_BUSY;
 		goto exit;
 	}
@@ -617,16 +621,16 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 		if (sgl_offset == SGL_OFFSET_10)
 			sgl_offset = SGL_OFFSET_12;
 		cmd = I2O_CMD_PRIVATE << 24;
-		writel(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC, mptr++);
-		writel(adpt_flags | tid, mptr++);
+		*mptr++ = cpu_to_le32(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC);
+		*mptr++ = cpu_to_le32(adpt_flags | tid);
 	}
 #endif
 
-	writel(cmd | HOST_TID << 12 | tid, &msg->u.head[1]);
-	writel(i2o_scsi_driver.context, &msg->u.s.icntxt);
+	msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid);
+	msg->u.s.icntxt = cpu_to_le32(i2o_scsi_driver.context);
 
 	/* We want the SCSI control block back */
-	writel(i2o_cntxt_list_add(c, SCpnt), &msg->u.s.tcntxt);
+	msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, SCpnt));
 
 	/* LSI_920_PCI_QUIRK
 	 *
@@ -649,15 +653,15 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	   }
 	 */
 
-	writel(scsi_flags | SCpnt->cmd_len, mptr++);
+	*mptr++ = cpu_to_le32(scsi_flags | SCpnt->cmd_len);
 
 	/* Write SCSI command into the message - always 16 byte block */
-	memcpy_toio(mptr, SCpnt->cmnd, 16);
+	memcpy(mptr, SCpnt->cmnd, 16);
 	mptr += 4;
 
 	if (sgl_offset != SGL_OFFSET_0) {
 		/* write size of data addressed by SGL */
-		writel(SCpnt->request_bufflen, mptr++);
+		*mptr++ = cpu_to_le32(SCpnt->request_bufflen);
 
 		/* Now fill in the SGList and command */
 		if (SCpnt->use_sg) {
@@ -676,11 +680,11 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	}
 
 	/* Stick the headers on */
-	writel(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset,
-	       &msg->u.head[0]);
+	msg->u.head[0] =
+	    cpu_to_le32(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset);
 
 	/* Queue the message */
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	osm_debug("Issued %ld\n", SCpnt->serial_number);
 
@@ -688,7 +692,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 
       nomem:
 	rc = -ENOMEM;
-	i2o_msg_nop(c, m);
+	i2o_msg_nop(c, msg);
 
       exit:
 	return rc;
@@ -709,8 +713,7 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt)
 {
 	struct i2o_device *i2o_dev;
 	struct i2o_controller *c;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	int tid;
 	int status = FAILED;
 
@@ -720,16 +723,16 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt)
 	c = i2o_dev->iop;
 	tid = i2o_dev->lct_data.tid;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
 		return SCSI_MLQUEUE_HOST_BUSY;
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid,
-	       &msg->u.head[1]);
-	writel(i2o_cntxt_list_get_ptr(c, SCpnt), &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid);
+	msg->body[0] = cpu_to_le32(i2o_cntxt_list_get_ptr(c, SCpnt));
 
-	if (i2o_msg_post_wait(c, m, I2O_TIMEOUT_SCSI_SCB_ABORT))
+	if (i2o_msg_post_wait(c, msg, I2O_TIMEOUT_SCSI_SCB_ABORT))
 		status = SUCCESS;
 
 	return status;
diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c
index 4eb53258842..49216744693 100644
--- a/drivers/message/i2o/iop.c
+++ b/drivers/message/i2o/iop.c
@@ -32,7 +32,7 @@
 #include "core.h"
 
 #define OSM_NAME	"i2o"
-#define OSM_VERSION	"1.288"
+#define OSM_VERSION	"1.325"
 #define OSM_DESCRIPTION	"I2O subsystem"
 
 /* global I2O controller list */
@@ -47,27 +47,6 @@ static struct i2o_dma i2o_systab;
 static int i2o_hrt_get(struct i2o_controller *c);
 
 /**
- *	i2o_msg_nop - Returns a message which is not used
- *	@c: I2O controller from which the message was created
- *	@m: message which should be returned
- *
- *	If you fetch a message via i2o_msg_get, and can't use it, you must
- *	return the message with this function. Otherwise the message frame
- *	is lost.
- */
-void i2o_msg_nop(struct i2o_controller *c, u32 m)
-{
-	struct i2o_message __iomem *msg = i2o_msg_in_to_virt(c, m);
-
-	writel(THREE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_UTIL_NOP << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(0, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
-	i2o_msg_post(c, m);
-};
-
-/**
  *	i2o_msg_get_wait - obtain an I2O message from the IOP
  *	@c: I2O controller
  *	@msg: pointer to a I2O message pointer
@@ -81,22 +60,21 @@ void i2o_msg_nop(struct i2o_controller *c, u32 m)
  *	address from the read port (see the i2o spec). If no message is
  *	available returns I2O_QUEUE_EMPTY and msg is leaved untouched.
  */
-u32 i2o_msg_get_wait(struct i2o_controller *c,
-		     struct i2o_message __iomem ** msg, int wait)
+struct i2o_message *i2o_msg_get_wait(struct i2o_controller *c, int wait)
 {
 	unsigned long timeout = jiffies + wait * HZ;
-	u32 m;
+	struct i2o_message *msg;
 
-	while ((m = i2o_msg_get(c, msg)) == I2O_QUEUE_EMPTY) {
+	while (IS_ERR(msg = i2o_msg_get(c))) {
 		if (time_after(jiffies, timeout)) {
 			osm_debug("%s: Timeout waiting for message frame.\n",
 				  c->name);
-			return I2O_QUEUE_EMPTY;
+			return ERR_PTR(-ETIMEDOUT);
 		}
 		schedule_timeout_uninterruptible(1);
 	}
 
-	return m;
+	return msg;
 };
 
 #if BITS_PER_LONG == 64
@@ -301,8 +279,7 @@ struct i2o_device *i2o_iop_find_device(struct i2o_controller *c, u16 tid)
  */
 static int i2o_iop_quiesce(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	i2o_status_block *sb = c->status_block.virt;
 	int rc;
 
@@ -313,16 +290,17 @@ static int i2o_iop_quiesce(struct i2o_controller *c)
 	    (sb->iop_state != ADAPTER_STATE_OPERATIONAL))
 		return 0;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
 
 	/* Long timeout needed for quiesce if lots of devices */
-	if ((rc = i2o_msg_post_wait(c, m, 240)))
+	if ((rc = i2o_msg_post_wait(c, msg, 240)))
 		osm_info("%s: Unable to quiesce (status=%#x).\n", c->name, -rc);
 	else
 		osm_debug("%s: Quiesced.\n", c->name);
@@ -342,8 +320,7 @@ static int i2o_iop_quiesce(struct i2o_controller *c)
  */
 static int i2o_iop_enable(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	i2o_status_block *sb = c->status_block.virt;
 	int rc;
 
@@ -353,16 +330,17 @@ static int i2o_iop_enable(struct i2o_controller *c)
 	if (sb->iop_state != ADAPTER_STATE_READY)
 		return -EINVAL;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
 
 	/* How long of a timeout do we need? */
-	if ((rc = i2o_msg_post_wait(c, m, 240)))
+	if ((rc = i2o_msg_post_wait(c, msg, 240)))
 		osm_err("%s: Could not enable (status=%#x).\n", c->name, -rc);
 	else
 		osm_debug("%s: Enabled.\n", c->name);
@@ -413,22 +391,22 @@ static inline void i2o_iop_enable_all(void)
  */
 static int i2o_iop_clear(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	int rc;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	/* Quiesce all IOPs first */
 	i2o_iop_quiesce_all();
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
 
-	if ((rc = i2o_msg_post_wait(c, m, 30)))
+	if ((rc = i2o_msg_post_wait(c, msg, 30)))
 		osm_info("%s: Unable to clear (status=%#x).\n", c->name, -rc);
 	else
 		osm_debug("%s: Cleared.\n", c->name);
@@ -446,13 +424,13 @@ static int i2o_iop_clear(struct i2o_controller *c)
  *	Clear and (re)initialize IOP's outbound queue and post the message
  *	frames to the IOP.
  *
- *	Returns 0 on success or a negative errno code on failure.
+ *	Returns 0 on success or negative error code on failure.
  */
 static int i2o_iop_init_outbound_queue(struct i2o_controller *c)
 {
-	volatile u8 *status = c->status.virt;
 	u32 m;
-	struct i2o_message __iomem *msg;
+	volatile u8 *status = c->status.virt;
+	struct i2o_message *msg;
 	ulong timeout;
 	int i;
 
@@ -460,23 +438,24 @@ static int i2o_iop_init_outbound_queue(struct i2o_controller *c)
 
 	memset(c->status.virt, 0, 4);
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
-
-	writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]);
-	writel(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
-	writel(0x00000000, &msg->u.s.tcntxt);
-	writel(PAGE_SIZE, &msg->body[0]);
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
+
+	msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(0x00000000);
+	msg->body[0] = cpu_to_le32(PAGE_SIZE);
 	/* Outbound msg frame size in words and Initcode */
-	writel(I2O_OUTBOUND_MSG_FRAME_SIZE << 16 | 0x80, &msg->body[1]);
-	writel(0xd0000004, &msg->body[2]);
-	writel(i2o_dma_low(c->status.phys), &msg->body[3]);
-	writel(i2o_dma_high(c->status.phys), &msg->body[4]);
+	msg->body[1] = cpu_to_le32(I2O_OUTBOUND_MSG_FRAME_SIZE << 16 | 0x80);
+	msg->body[2] = cpu_to_le32(0xd0000004);
+	msg->body[3] = cpu_to_le32(i2o_dma_low(c->status.phys));
+	msg->body[4] = cpu_to_le32(i2o_dma_high(c->status.phys));
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	timeout = jiffies + I2O_TIMEOUT_INIT_OUTBOUND_QUEUE * HZ;
 	while (*status <= I2O_CMD_IN_PROGRESS) {
@@ -511,34 +490,34 @@ static int i2o_iop_init_outbound_queue(struct i2o_controller *c)
 static int i2o_iop_reset(struct i2o_controller *c)
 {
 	volatile u8 *status = c->status.virt;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned long timeout;
 	i2o_status_block *sb = c->status_block.virt;
 	int rc = 0;
 
 	osm_debug("%s: Resetting controller\n", c->name);
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	memset(c->status_block.virt, 0, 8);
 
 	/* Quiesce all IOPs first */
 	i2o_iop_quiesce_all();
 
-	writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
-	writel(0, &msg->u.s.tcntxt);	//FIXME: use reasonable transaction context
-	writel(0, &msg->body[0]);
-	writel(0, &msg->body[1]);
-	writel(i2o_dma_low(c->status.phys), &msg->body[2]);
-	writel(i2o_dma_high(c->status.phys), &msg->body[3]);
+	msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(0x00000000);
+	msg->body[0] = cpu_to_le32(0x00000000);
+	msg->body[1] = cpu_to_le32(0x00000000);
+	msg->body[2] = cpu_to_le32(i2o_dma_low(c->status.phys));
+	msg->body[3] = cpu_to_le32(i2o_dma_high(c->status.phys));
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	/* Wait for a reply */
 	timeout = jiffies + I2O_TIMEOUT_RESET * HZ;
@@ -567,18 +546,15 @@ static int i2o_iop_reset(struct i2o_controller *c)
 		osm_debug("%s: Reset in progress, waiting for reboot...\n",
 			  c->name);
 
-		m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET);
-		while (m == I2O_QUEUE_EMPTY) {
+		while (IS_ERR(msg = i2o_msg_get_wait(c, I2O_TIMEOUT_RESET))) {
 			if (time_after(jiffies, timeout)) {
 				osm_err("%s: IOP reset timeout.\n", c->name);
-				rc = -ETIMEDOUT;
+				rc = PTR_ERR(msg);
 				goto exit;
 			}
 			schedule_timeout_uninterruptible(1);
-
-			m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET);
 		}
-		i2o_msg_nop(c, m);
+		i2o_msg_nop(c, msg);
 
 		/* from here all quiesce commands are safe */
 		c->no_quiesce = 0;
@@ -686,8 +662,7 @@ static int i2o_iop_activate(struct i2o_controller *c)
  */
 static int i2o_iop_systab_set(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	i2o_status_block *sb = c->status_block.virt;
 	struct device *dev = &c->pdev->dev;
 	struct resource *root;
@@ -735,41 +710,38 @@ static int i2o_iop_systab_set(struct i2o_controller *c)
 		}
 	}
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	i2o_systab.phys = dma_map_single(dev, i2o_systab.virt, i2o_systab.len,
 					 PCI_DMA_TODEVICE);
 	if (!i2o_systab.phys) {
-		i2o_msg_nop(c, m);
+		i2o_msg_nop(c, msg);
 		return -ENOMEM;
 	}
 
-	writel(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6, &msg->u.head[0]);
-	writel(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
 
 	/*
 	 * Provide three SGL-elements:
 	 * System table (SysTab), Private memory space declaration and
 	 * Private i/o space declaration
-	 *
-	 * FIXME: is this still true?
-	 * Nasty one here. We can't use dma_alloc_coherent to send the
-	 * same table to everyone. We have to go remap it for them all
 	 */
 
-	writel(c->unit + 2, &msg->body[0]);
-	writel(0, &msg->body[1]);
-	writel(0x54000000 | i2o_systab.len, &msg->body[2]);
-	writel(i2o_systab.phys, &msg->body[3]);
-	writel(0x54000000 | sb->current_mem_size, &msg->body[4]);
-	writel(sb->current_mem_base, &msg->body[5]);
-	writel(0xd4000000 | sb->current_io_size, &msg->body[6]);
-	writel(sb->current_io_base, &msg->body[6]);
+	msg->body[0] = cpu_to_le32(c->unit + 2);
+	msg->body[1] = cpu_to_le32(0x00000000);
+	msg->body[2] = cpu_to_le32(0x54000000 | i2o_systab.len);
+	msg->body[3] = cpu_to_le32(i2o_systab.phys);
+	msg->body[4] = cpu_to_le32(0x54000000 | sb->current_mem_size);
+	msg->body[5] = cpu_to_le32(sb->current_mem_base);
+	msg->body[6] = cpu_to_le32(0xd4000000 | sb->current_io_size);
+	msg->body[6] = cpu_to_le32(sb->current_io_base);
 
-	rc = i2o_msg_post_wait(c, m, 120);
+	rc = i2o_msg_post_wait(c, msg, 120);
 
 	dma_unmap_single(dev, i2o_systab.phys, i2o_systab.len,
 			 PCI_DMA_TODEVICE);
@@ -780,8 +752,6 @@ static int i2o_iop_systab_set(struct i2o_controller *c)
 	else
 		osm_debug("%s: SysTab set.\n", c->name);
 
-	i2o_status_get(c);	// Entered READY state
-
 	return rc;
 }
 
@@ -791,7 +761,7 @@ static int i2o_iop_systab_set(struct i2o_controller *c)
  *
  *	Send the system table and enable the I2O controller.
  *
- *	Returns 0 on success or negativer error code on failure.
+ *	Returns 0 on success or negative error code on failure.
  */
 static int i2o_iop_online(struct i2o_controller *c)
 {
@@ -830,7 +800,6 @@ void i2o_iop_remove(struct i2o_controller *c)
 	list_for_each_entry_safe(dev, tmp, &c->devices, list)
 	    i2o_device_remove(dev);
 
-	class_device_unregister(c->classdev);
 	device_del(&c->device);
 
 	/* Ask the IOP to switch to RESET state */
@@ -869,12 +838,11 @@ static int i2o_systab_build(void)
 	i2o_systab.len = sizeof(struct i2o_sys_tbl) + num_controllers *
 	    sizeof(struct i2o_sys_tbl_entry);
 
-	systab = i2o_systab.virt = kmalloc(i2o_systab.len, GFP_KERNEL);
+	systab = i2o_systab.virt = kzalloc(i2o_systab.len, GFP_KERNEL);
 	if (!systab) {
 		osm_err("unable to allocate memory for System Table\n");
 		return -ENOMEM;
 	}
-	memset(systab, 0, i2o_systab.len);
 
 	systab->version = I2OVERSION;
 	systab->change_ind = change_ind + 1;
@@ -952,30 +920,30 @@ static int i2o_parse_hrt(struct i2o_controller *c)
  */
 int i2o_status_get(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	volatile u8 *status_block;
 	unsigned long timeout;
 
 	status_block = (u8 *) c->status_block.virt;
 	memset(c->status_block.virt, 0, sizeof(i2o_status_block));
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
-	writel(0, &msg->u.s.tcntxt);	// FIXME: use resonable transaction context
-	writel(0, &msg->body[0]);
-	writel(0, &msg->body[1]);
-	writel(i2o_dma_low(c->status_block.phys), &msg->body[2]);
-	writel(i2o_dma_high(c->status_block.phys), &msg->body[3]);
-	writel(sizeof(i2o_status_block), &msg->body[4]);	/* always 88 bytes */
+	msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(0x00000000);
+	msg->body[0] = cpu_to_le32(0x00000000);
+	msg->body[1] = cpu_to_le32(0x00000000);
+	msg->body[2] = cpu_to_le32(i2o_dma_low(c->status_block.phys));
+	msg->body[3] = cpu_to_le32(i2o_dma_high(c->status_block.phys));
+	msg->body[4] = cpu_to_le32(sizeof(i2o_status_block));	/* always 88 bytes */
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	/* Wait for a reply */
 	timeout = jiffies + I2O_TIMEOUT_STATUS_GET * HZ;
@@ -1002,7 +970,7 @@ int i2o_status_get(struct i2o_controller *c)
  *	The HRT contains information about possible hidden devices but is
  *	mostly useless to us.
  *
- *	Returns 0 on success or negativer error code on failure.
+ *	Returns 0 on success or negative error code on failure.
  */
 static int i2o_hrt_get(struct i2o_controller *c)
 {
@@ -1013,20 +981,20 @@ static int i2o_hrt_get(struct i2o_controller *c)
 	struct device *dev = &c->pdev->dev;
 
 	for (i = 0; i < I2O_HRT_GET_TRIES; i++) {
-		struct i2o_message __iomem *msg;
-		u32 m;
+		struct i2o_message *msg;
 
-		m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-		if (m == I2O_QUEUE_EMPTY)
-			return -ETIMEDOUT;
+		msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+		if (IS_ERR(msg))
+			return PTR_ERR(msg);
 
-		writel(SIX_WORD_MSG_SIZE | SGL_OFFSET_4, &msg->u.head[0]);
-		writel(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 | ADAPTER_TID,
-		       &msg->u.head[1]);
-		writel(0xd0000000 | c->hrt.len, &msg->body[0]);
-		writel(c->hrt.phys, &msg->body[1]);
+		msg->u.head[0] = cpu_to_le32(SIX_WORD_MSG_SIZE | SGL_OFFSET_4);
+		msg->u.head[1] =
+		    cpu_to_le32(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 |
+				ADAPTER_TID);
+		msg->body[0] = cpu_to_le32(0xd0000000 | c->hrt.len);
+		msg->body[1] = cpu_to_le32(c->hrt.phys);
 
-		rc = i2o_msg_post_wait_mem(c, m, 20, &c->hrt);
+		rc = i2o_msg_post_wait_mem(c, msg, 20, &c->hrt);
 
 		if (rc < 0) {
 			osm_err("%s: Unable to get HRT (status=%#x)\n", c->name,
@@ -1051,15 +1019,6 @@ static int i2o_hrt_get(struct i2o_controller *c)
 }
 
 /**
- *	i2o_iop_free - Free the i2o_controller struct
- *	@c: I2O controller to free
- */
-void i2o_iop_free(struct i2o_controller *c)
-{
-	kfree(c);
-};
-
-/**
  *	i2o_iop_release - release the memory for a I2O controller
  *	@dev: I2O controller which should be released
  *
@@ -1073,14 +1032,11 @@ static void i2o_iop_release(struct device *dev)
 	i2o_iop_free(c);
 };
 
-/* I2O controller class */
-static struct class *i2o_controller_class;
-
 /**
  *	i2o_iop_alloc - Allocate and initialize a i2o_controller struct
  *
  *	Allocate the necessary memory for a i2o_controller struct and
- *	initialize the lists.
+ *	initialize the lists and message mempool.
  *
  *	Returns a pointer to the I2O controller or a negative error code on
  *	failure.
@@ -1089,20 +1045,29 @@ struct i2o_controller *i2o_iop_alloc(void)
 {
 	static int unit = 0;	/* 0 and 1 are NULL IOP and Local Host */
 	struct i2o_controller *c;
+	char poolname[32];
 
-	c = kmalloc(sizeof(*c), GFP_KERNEL);
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
 	if (!c) {
 		osm_err("i2o: Insufficient memory to allocate a I2O controller."
 			"\n");
 		return ERR_PTR(-ENOMEM);
 	}
-	memset(c, 0, sizeof(*c));
+
+	c->unit = unit++;
+	sprintf(c->name, "iop%d", c->unit);
+
+	snprintf(poolname, sizeof(poolname), "i2o_%s_msg_inpool", c->name);
+	if (i2o_pool_alloc
+	    (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4,
+	     I2O_MSG_INPOOL_MIN)) {
+		kfree(c);
+		return ERR_PTR(-ENOMEM);
+	};
 
 	INIT_LIST_HEAD(&c->devices);
 	spin_lock_init(&c->lock);
 	init_MUTEX(&c->lct_lock);
-	c->unit = unit++;
-	sprintf(c->name, "iop%d", c->unit);
 
 	device_initialize(&c->device);
 
@@ -1137,36 +1102,29 @@ int i2o_iop_add(struct i2o_controller *c)
 		goto iop_reset;
 	}
 
-	c->classdev = class_device_create(i2o_controller_class, NULL, MKDEV(0,0),
-			&c->device, "iop%d", c->unit);
-	if (IS_ERR(c->classdev)) {
-		osm_err("%s: could not add controller class\n", c->name);
-		goto device_del;
-	}
-
 	osm_info("%s: Activating I2O controller...\n", c->name);
 	osm_info("%s: This may take a few minutes if there are many devices\n",
 		 c->name);
 
 	if ((rc = i2o_iop_activate(c))) {
 		osm_err("%s: could not activate controller\n", c->name);
-		goto class_del;
+		goto device_del;
 	}
 
 	osm_debug("%s: building sys table...\n", c->name);
 
 	if ((rc = i2o_systab_build()))
-		goto class_del;
+		goto device_del;
 
 	osm_debug("%s: online controller...\n", c->name);
 
 	if ((rc = i2o_iop_online(c)))
-		goto class_del;
+		goto device_del;
 
 	osm_debug("%s: getting LCT...\n", c->name);
 
 	if ((rc = i2o_exec_lct_get(c)))
-		goto class_del;
+		goto device_del;
 
 	list_add(&c->list, &i2o_controllers);
 
@@ -1176,9 +1134,6 @@ int i2o_iop_add(struct i2o_controller *c)
 
 	return 0;
 
-      class_del:
-	class_device_unregister(c->classdev);
-
       device_del:
 	device_del(&c->device);
 
@@ -1199,28 +1154,27 @@ int i2o_iop_add(struct i2o_controller *c)
  *	is waited for, or expected. If you do not want further notifications,
  *	call the i2o_event_register again with a evt_mask of 0.
  *
- *	Returns 0 on success or -ETIMEDOUT if no message could be fetched for
- *	sending the request.
+ *	Returns 0 on success or negative error code on failure.
  */
 int i2o_event_register(struct i2o_device *dev, struct i2o_driver *drv,
 		       int tcntxt, u32 evt_mask)
 {
 	struct i2o_controller *c = dev->iop;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev->lct_data.
-	       tid, &msg->u.head[1]);
-	writel(drv->context, &msg->u.s.icntxt);
-	writel(tcntxt, &msg->u.s.tcntxt);
-	writel(evt_mask, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->u.s.icntxt = cpu_to_le32(drv->context);
+	msg->u.s.tcntxt = cpu_to_le32(tcntxt);
+	msg->body[0] = cpu_to_le32(evt_mask);
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	return 0;
 };
@@ -1239,14 +1193,8 @@ static int __init i2o_iop_init(void)
 
 	printk(KERN_INFO OSM_DESCRIPTION " v" OSM_VERSION "\n");
 
-	i2o_controller_class = class_create(THIS_MODULE, "i2o_controller");
-	if (IS_ERR(i2o_controller_class)) {
-		osm_err("can't register class i2o_controller\n");
-		goto exit;
-	}
-
 	if ((rc = i2o_driver_init()))
-		goto class_exit;
+		goto exit;
 
 	if ((rc = i2o_exec_init()))
 		goto driver_exit;
@@ -1262,9 +1210,6 @@ static int __init i2o_iop_init(void)
       driver_exit:
 	i2o_driver_exit();
 
-      class_exit:
-	class_destroy(i2o_controller_class);
-
       exit:
 	return rc;
 }
@@ -1279,7 +1224,6 @@ static void __exit i2o_iop_exit(void)
 	i2o_pci_exit();
 	i2o_exec_exit();
 	i2o_driver_exit();
-	class_destroy(i2o_controller_class);
 };
 
 module_init(i2o_iop_init);
diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c
index ee7075fa1ec..c5b656cdea7 100644
--- a/drivers/message/i2o/pci.c
+++ b/drivers/message/i2o/pci.c
@@ -339,7 +339,7 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev,
 		       pci_name(pdev));
 
 	c->pdev = pdev;
-	c->device.parent = get_device(&pdev->dev);
+	c->device.parent = &pdev->dev;
 
 	/* Cards that fall apart if you hit them with large I/O loads... */
 	if (pdev->vendor == PCI_VENDOR_ID_NCR && pdev->device == 0x0630) {
@@ -410,8 +410,6 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev,
 	if ((rc = i2o_iop_add(c)))
 		goto uninstall;
 
-	get_device(&c->device);
-
 	if (i960)
 		pci_write_config_word(i960, 0x42, 0x03ff);
 
@@ -424,7 +422,6 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev,
 	i2o_pci_free(c);
 
       free_controller:
-	put_device(c->device.parent);
 	i2o_iop_free(c);
 
       disable:
@@ -454,7 +451,6 @@ static void __devexit i2o_pci_remove(struct pci_dev *pdev)
 
 	printk(KERN_INFO "%s: Controller removed.\n", c->name);
 
-	put_device(c->device.parent);
 	put_device(&c->device);
 };
 
@@ -483,4 +479,5 @@ void __exit i2o_pci_exit(void)
 {
 	pci_unregister_driver(&i2o_pci_driver);
 };
+
 MODULE_DEVICE_TABLE(pci, i2o_pci_ids);
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index c782a632980..fa39b944bc4 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -6,7 +6,7 @@ menu "PHY device support"
 
 config PHYLIB
 	tristate "PHY Device support and infrastructure"
-	depends on NET_ETHERNET && (BROKEN || !ARCH_S390)
+	depends on NET_ETHERNET && (BROKEN || !S390)
 	help
 	  Ethernet controllers are usually attached to PHY
 	  devices.  This option provides infrastructure for
diff --git a/drivers/net/plip.c b/drivers/net/plip.c
index 1bd22cd40c7..87ee3271b17 100644
--- a/drivers/net/plip.c
+++ b/drivers/net/plip.c
@@ -98,7 +98,6 @@ static const char version[] = "NET3 PLIP version 2.4-parport gniibe@mri.co.jp\n"
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
-#include <linux/lp.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -106,7 +105,6 @@ static const char version[] = "NET3 PLIP version 2.4-parport gniibe@mri.co.jp\n"
 #include <linux/skbuff.h>
 #include <linux/if_plip.h>
 #include <linux/workqueue.h>
-#include <linux/ioport.h>
 #include <linux/spinlock.h>
 #include <linux/parport.h>
 #include <linux/bitops.h>
diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig
index 725a14119f2..b8241561da4 100644
--- a/drivers/parport/Kconfig
+++ b/drivers/parport/Kconfig
@@ -77,7 +77,7 @@ config PARPORT_PC_SUPERIO
 
 config PARPORT_PC_PCMCIA
 	tristate "Support for PCMCIA management for PC-style ports"
-	depends on PARPORT!=n && (PCMCIA!=n && PARPORT_PC=m && PARPORT_PC || PARPORT_PC=y && PCMCIA)
+	depends on PCMCIA && PARPORT_PC
 	help
 	  Say Y here if you need PCMCIA support for your PC-style parallel
 	  ports. If unsure, say N.
diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c
index 075c7eb5c85..9ee67321b63 100644
--- a/drivers/parport/daisy.c
+++ b/drivers/parport/daisy.c
@@ -144,9 +144,9 @@ again:
 	add_dev (numdevs++, port, -1);
 
 	/* Find out the legacy device's IEEE 1284 device ID. */
-	deviceid = kmalloc (1000, GFP_KERNEL);
+	deviceid = kmalloc (1024, GFP_KERNEL);
 	if (deviceid) {
-		if (parport_device_id (numdevs - 1, deviceid, 1000) > 2)
+		if (parport_device_id (numdevs - 1, deviceid, 1024) > 2)
 			detected++;
 
 		kfree (deviceid);
@@ -252,7 +252,7 @@ struct pardevice *parport_open (int devnum, const char *name,
 		selected = port->daisy;
 		parport_release (dev);
 
-		if (selected != port->daisy) {
+		if (selected != daisy) {
 			/* No corresponding device. */
 			parport_unregister_device (dev);
 			return NULL;
@@ -344,9 +344,9 @@ static int cpp_daisy (struct parport *port, int cmd)
 			      PARPORT_CONTROL_STROBE,
 			      PARPORT_CONTROL_STROBE);
 	udelay (1);
+	s = parport_read_status (port);
 	parport_frob_control (port, PARPORT_CONTROL_STROBE, 0);
 	udelay (1);
-	s = parport_read_status (port);
 	parport_write_data (port, 0xff); udelay (2);
 
 	return s;
@@ -395,15 +395,15 @@ int parport_daisy_select (struct parport *port, int daisy, int mode)
 		case IEEE1284_MODE_EPP:
 		case IEEE1284_MODE_EPPSL:
 		case IEEE1284_MODE_EPPSWE:
-			return (cpp_daisy (port, 0x20 + daisy) &
-				PARPORT_STATUS_ERROR);
+			return !(cpp_daisy (port, 0x20 + daisy) &
+				 PARPORT_STATUS_ERROR);
 
 		// For these modes we should switch to ECP mode:
 		case IEEE1284_MODE_ECP:
 		case IEEE1284_MODE_ECPRLE:
 		case IEEE1284_MODE_ECPSWE: 
-			return (cpp_daisy (port, 0xd0 + daisy) &
-				PARPORT_STATUS_ERROR);
+			return !(cpp_daisy (port, 0xd0 + daisy) &
+				 PARPORT_STATUS_ERROR);
 
 		// Nothing was told for BECP in Daisy chain specification.
 		// May be it's wise to use ECP?
@@ -413,8 +413,8 @@ int parport_daisy_select (struct parport *port, int daisy, int mode)
 		case IEEE1284_MODE_BYTE:
 		case IEEE1284_MODE_COMPAT:
 		default:
-			return (cpp_daisy (port, 0xe0 + daisy) &
-				PARPORT_STATUS_ERROR);
+			return !(cpp_daisy (port, 0xe0 + daisy) &
+				 PARPORT_STATUS_ERROR);
 	}
 }
 
@@ -436,7 +436,7 @@ static int select_port (struct parport *port)
 
 static int assign_addrs (struct parport *port)
 {
-	unsigned char s, last_dev;
+	unsigned char s;
 	unsigned char daisy;
 	int thisdev = numdevs;
 	int detected;
@@ -472,10 +472,13 @@ static int assign_addrs (struct parport *port)
 	}
 
 	parport_write_data (port, 0x78); udelay (2);
-	last_dev = 0; /* We've just been speaking to a device, so we
-			 know there must be at least _one_ out there. */
+	s = parport_read_status (port);
 
-	for (daisy = 0; daisy < 4; daisy++) {
+	for (daisy = 0;
+	     (s & (PARPORT_STATUS_PAPEROUT|PARPORT_STATUS_SELECT))
+		     == (PARPORT_STATUS_PAPEROUT|PARPORT_STATUS_SELECT)
+		     && daisy < 4;
+	     ++daisy) {
 		parport_write_data (port, daisy);
 		udelay (2);
 		parport_frob_control (port,
@@ -485,14 +488,18 @@ static int assign_addrs (struct parport *port)
 		parport_frob_control (port, PARPORT_CONTROL_STROBE, 0);
 		udelay (1);
 
-		if (last_dev)
-			/* No more devices. */
-			break;
+		add_dev (numdevs++, port, daisy);
 
-		last_dev = !(parport_read_status (port)
-			     & PARPORT_STATUS_BUSY);
+		/* See if this device thought it was the last in the
+		 * chain. */
+		if (!(s & PARPORT_STATUS_BUSY))
+			break;
 
-		add_dev (numdevs++, port, daisy);
+		/* We are seeing pass through status now. We see
+		   last_dev from next device or if last_dev does not
+		   work status lines from some non-daisy chain
+		   device. */
+		s = parport_read_status (port);
 	}
 
 	parport_write_data (port, 0xff); udelay (2);
@@ -501,11 +508,11 @@ static int assign_addrs (struct parport *port)
 		 detected);
 
 	/* Ask the new devices to introduce themselves. */
-	deviceid = kmalloc (1000, GFP_KERNEL);
+	deviceid = kmalloc (1024, GFP_KERNEL);
 	if (!deviceid) return 0;
 
 	for (daisy = 0; thisdev < numdevs; thisdev++, daisy++)
-		parport_device_id (thisdev, deviceid, 1000);
+		parport_device_id (thisdev, deviceid, 1024);
 
 	kfree (deviceid);
 	return detected;
diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
index ce1e2aad8b1..d6c77658231 100644
--- a/drivers/parport/ieee1284_ops.c
+++ b/drivers/parport/ieee1284_ops.c
@@ -165,17 +165,7 @@ size_t parport_ieee1284_read_nibble (struct parport *port,
 		/* Does the error line indicate end of data? */
 		if (((i & 1) == 0) &&
 		    (parport_read_status(port) & PARPORT_STATUS_ERROR)) {
-			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
-			DPRINTK (KERN_DEBUG
-				"%s: No more nibble data (%d bytes)\n",
-				port->name, i/2);
-
-			/* Go to reverse idle phase. */
-			parport_frob_control (port,
-					      PARPORT_CONTROL_AUTOFD,
-					      PARPORT_CONTROL_AUTOFD);
-			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-			break;
+			goto end_of_data;
 		}
 
 		/* Event 7: Set nAutoFd low. */
@@ -225,18 +215,25 @@ size_t parport_ieee1284_read_nibble (struct parport *port,
 			byte = nibble;
 	}
 
-	i /= 2; /* i is now in bytes */
-
 	if (i == len) {
 		/* Read the last nibble without checking data avail. */
-		port = port->physport;
-		if (parport_read_status (port) & PARPORT_STATUS_ERROR)
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
+		if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
+		end_of_data:
+			DPRINTK (KERN_DEBUG
+				"%s: No more nibble data (%d bytes)\n",
+				port->name, i/2);
+
+			/* Go to reverse idle phase. */
+			parport_frob_control (port,
+					      PARPORT_CONTROL_AUTOFD,
+					      PARPORT_CONTROL_AUTOFD);
+			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
+		}
 		else
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
+			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
 	}
 
-	return i;
+	return i/2;
 #endif /* IEEE1284 support */
 }
 
@@ -256,17 +253,7 @@ size_t parport_ieee1284_read_byte (struct parport *port,
 
 		/* Data available? */
 		if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
-			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
-			DPRINTK (KERN_DEBUG
-				 "%s: No more byte data (%Zd bytes)\n",
-				 port->name, count);
-
-			/* Go to reverse idle phase. */
-			parport_frob_control (port,
-					      PARPORT_CONTROL_AUTOFD,
-					      PARPORT_CONTROL_AUTOFD);
-			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-			break;
+			goto end_of_data;
 		}
 
 		/* Event 14: Place data bus in high impedance state. */
@@ -318,11 +305,20 @@ size_t parport_ieee1284_read_byte (struct parport *port,
 
 	if (count == len) {
 		/* Read the last byte without checking data avail. */
-		port = port->physport;
-		if (parport_read_status (port) & PARPORT_STATUS_ERROR)
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
+		if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
+		end_of_data:
+			DPRINTK (KERN_DEBUG
+				 "%s: No more byte data (%Zd bytes)\n",
+				 port->name, count);
+
+			/* Go to reverse idle phase. */
+			parport_frob_control (port,
+					      PARPORT_CONTROL_AUTOFD,
+					      PARPORT_CONTROL_AUTOFD);
+			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
+		}
 		else
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
+			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
 	}
 
 	return count;
diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index c6493ad7c0c..18e85ccdae6 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -1169,7 +1169,7 @@ dump_parport_state ("fwd idle", port);
 
 /* GCC is not inlining extern inline function later overwriten to non-inline,
    so we use outlined_ variants here.  */
-static struct parport_operations parport_pc_ops =
+static const struct parport_operations parport_pc_ops =
 {
 	.write_data	= parport_pc_write_data,
 	.read_data	= parport_pc_read_data,
@@ -1211,10 +1211,11 @@ static struct parport_operations parport_pc_ops =
 static void __devinit show_parconfig_smsc37c669(int io, int key)
 {
 	int cr1,cr4,cra,cr23,cr26,cr27,i=0;
-	static const char *modes[]={ "SPP and Bidirectional (PS/2)",	
-				     "EPP and SPP",
-				     "ECP",
-				     "ECP and EPP" };
+	static const char *const modes[]={
+		"SPP and Bidirectional (PS/2)",
+		"EPP and SPP",
+		"ECP",
+		"ECP and EPP" };
 
 	outb(key,io);
 	outb(key,io);
@@ -1288,7 +1289,7 @@ static void __devinit show_parconfig_smsc37c669(int io, int key)
 static void __devinit show_parconfig_winbond(int io, int key)
 {
 	int cr30,cr60,cr61,cr70,cr74,crf0,i=0;
-	static const char *modes[] = {
+	static const char *const modes[] = {
 		"Standard (SPP) and Bidirectional(PS/2)", /* 0 */
 		"EPP-1.9 and SPP",
 		"ECP",
@@ -1297,7 +1298,9 @@ static void __devinit show_parconfig_winbond(int io, int key)
 		"EPP-1.7 and SPP",		/* 5 */
 		"undefined!",
 		"ECP and EPP-1.7" };
-	static char *irqtypes[] = { "pulsed low, high-Z", "follows nACK" };
+	static char *const irqtypes[] = {
+		"pulsed low, high-Z",
+		"follows nACK" };
 		
 	/* The registers are called compatible-PnP because the
            register layout is modelled after ISA-PnP, the access
@@ -2396,7 +2399,8 @@ EXPORT_SYMBOL (parport_pc_unregister_port);
 
 /* ITE support maintained by Rich Liu <richliu@poorman.org> */
 static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
-					 int autodma, struct parport_pc_via_data *via)
+					 int autodma,
+					 const struct parport_pc_via_data *via)
 {
 	short inta_addr[6] = { 0x2A0, 0x2C0, 0x220, 0x240, 0x1E0 };
 	struct resource *base_res;
@@ -2524,7 +2528,8 @@ static struct parport_pc_via_data via_8231_data __devinitdata = {
 };
 
 static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
-					 int autodma, struct parport_pc_via_data *via)
+				    int autodma,
+				    const struct parport_pc_via_data *via)
 {
 	u8 tmp, tmp2, siofunc;
 	u8 ppcontrol = 0;
@@ -2694,8 +2699,9 @@ enum parport_pc_sio_types {
 
 /* each element directly indexed from enum list, above */
 static struct parport_pc_superio {
-	int (*probe) (struct pci_dev *pdev, int autoirq, int autodma, struct parport_pc_via_data *via);
-	struct parport_pc_via_data *via;
+	int (*probe) (struct pci_dev *pdev, int autoirq, int autodma,
+		      const struct parport_pc_via_data *via);
+	const struct parport_pc_via_data *via;
 } parport_pc_superio_info[] __devinitdata = {
 	{ sio_via_probe, &via_686a_data, },
 	{ sio_via_probe, &via_8231_data, },
@@ -2828,7 +2834,7 @@ static struct parport_pc_pci {
 	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */
 };
 
-static struct pci_device_id parport_pc_pci_tbl[] = {
+static const struct pci_device_id parport_pc_pci_tbl[] = {
 	/* Super-IO onboard chips */
 	{ 0x1106, 0x0686, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sio_via_686a },
 	{ 0x1106, 0x8231, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sio_via_8231 },
diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c
index 4b48b31ec23..b62aee8de3c 100644
--- a/drivers/parport/probe.c
+++ b/drivers/parport/probe.c
@@ -11,9 +11,9 @@
 #include <linux/string.h>
 #include <asm/uaccess.h>
 
-static struct {
-	char *token;
-	char *descr;
+static const struct {
+	const char *token;
+	const char *descr;
 } classes[] = {
 	{ "",            "Legacy device" },
 	{ "PRINTER",     "Printer" },
@@ -128,8 +128,131 @@ static void parse_data(struct parport *port, int device, char *str)
 	kfree(txt);
 }
 
+/* Read up to count-1 bytes of device id. Terminate buffer with
+ * '\0'. Buffer begins with two Device ID length bytes as given by
+ * device. */
+static ssize_t parport_read_device_id (struct parport *port, char *buffer,
+				       size_t count)
+{
+	unsigned char length[2];
+	unsigned lelen, belen;
+	size_t idlens[4];
+	unsigned numidlens;
+	unsigned current_idlen;
+	ssize_t retval;
+	size_t len;
+
+	/* First two bytes are MSB,LSB of inclusive length. */
+	retval = parport_read (port, length, 2);
+
+	if (retval < 0)
+		return retval;
+	if (retval != 2)
+		return -EIO;
+
+	if (count < 2)
+		return 0;
+	memcpy(buffer, length, 2);
+	len = 2;
+
+	/* Some devices wrongly send LE length, and some send it two
+	 * bytes short. Construct a sorted array of lengths to try. */
+	belen = (length[0] << 8) + length[1];
+	lelen = (length[1] << 8) + length[0];
+	idlens[0] = min(belen, lelen);
+	idlens[1] = idlens[0]+2;
+	if (belen != lelen) {
+		int off = 2;
+		/* Don't try lenghts of 0x100 and 0x200 as 1 and 2 */
+		if (idlens[0] <= 2)
+			off = 0;
+		idlens[off] = max(belen, lelen);
+		idlens[off+1] = idlens[off]+2;
+		numidlens = off+2;
+	}
+	else {
+		/* Some devices don't truly implement Device ID, but
+		 * just return constant nibble forever. This catches
+		 * also those cases. */
+		if (idlens[0] == 0 || idlens[0] > 0xFFF) {
+			printk (KERN_DEBUG "%s: reported broken Device ID"
+				" length of %#zX bytes\n",
+				port->name, idlens[0]);
+			return -EIO;
+		}
+		numidlens = 2;
+	}
+
+	/* Try to respect the given ID length despite all the bugs in
+	 * the ID length. Read according to shortest possible ID
+	 * first. */
+	for (current_idlen = 0; current_idlen < numidlens; ++current_idlen) {
+		size_t idlen = idlens[current_idlen];
+		if (idlen+1 >= count)
+			break;
+
+		retval = parport_read (port, buffer+len, idlen-len);
+
+		if (retval < 0)
+			return retval;
+		len += retval;
+
+		if (port->physport->ieee1284.phase != IEEE1284_PH_HBUSY_DAVAIL) {
+			if (belen != len) {
+				printk (KERN_DEBUG "%s: Device ID was %d bytes"
+					" while device told it would be %d"
+					" bytes\n",
+					port->name, len, belen);
+			}
+			goto done;
+		}
+
+		/* This might end reading the Device ID too
+		 * soon. Hopefully the needed fields were already in
+		 * the first 256 bytes or so that we must have read so
+		 * far. */
+		if (buffer[len-1] == ';') {
+ 			printk (KERN_DEBUG "%s: Device ID reading stopped"
+				" before device told data not available. "
+				"Current idlen %d of %d, len bytes %02X %02X\n",
+				port->name, current_idlen, numidlens,
+				length[0], length[1]);
+			goto done;
+		}
+	}
+	if (current_idlen < numidlens) {
+		/* Buffer not large enough, read to end of buffer. */
+		size_t idlen, len2;
+		if (len+1 < count) {
+			retval = parport_read (port, buffer+len, count-len-1);
+			if (retval < 0)
+				return retval;
+			len += retval;
+		}
+		/* Read the whole ID since some devices would not
+		 * otherwise give back the Device ID from beginning
+		 * next time when asked. */
+		idlen = idlens[current_idlen];
+		len2 = len;
+		while(len2 < idlen && retval > 0) {
+			char tmp[4];
+			retval = parport_read (port, tmp,
+					       min(sizeof tmp, idlen-len2));
+			if (retval < 0)
+				return retval;
+			len2 += retval;
+		}
+	}
+	/* In addition, there are broken devices out there that don't
+	   even finish off with a semi-colon. We do not need to care
+	   about those at this time. */
+ done:
+	buffer[len] = '\0';
+	return len;
+}
+
 /* Get Std 1284 Device ID. */
-ssize_t parport_device_id (int devnum, char *buffer, size_t len)
+ssize_t parport_device_id (int devnum, char *buffer, size_t count)
 {
 	ssize_t retval = -ENXIO;
 	struct pardevice *dev = parport_open (devnum, "Device ID probe",
@@ -139,76 +262,20 @@ ssize_t parport_device_id (int devnum, char *buffer, size_t len)
 
 	parport_claim_or_block (dev);
 
-	/* Negotiate to compatibility mode, and then to device ID mode.
-	 * (This is in case we are already in device ID mode.) */
+	/* Negotiate to compatibility mode, and then to device ID
+	 * mode. (This so that we start form beginning of device ID if
+	 * already in device ID mode.) */
 	parport_negotiate (dev->port, IEEE1284_MODE_COMPAT);
 	retval = parport_negotiate (dev->port,
 				    IEEE1284_MODE_NIBBLE | IEEE1284_DEVICEID);
 
 	if (!retval) {
-		int idlen;
-		unsigned char length[2];
-
-		/* First two bytes are MSB,LSB of inclusive length. */
-		retval = parport_read (dev->port, length, 2);
-
-		if (retval != 2) goto end_id;
-
-		idlen = (length[0] << 8) + length[1] - 2;
-		/*
-		 * Check if the caller-allocated buffer is large enough
-		 * otherwise bail out or there will be an at least off by one.
-		 */
-		if (idlen + 1 < len)
-			len = idlen;
-		else {
-			retval = -EINVAL;
-			goto out;
-		}
-		retval = parport_read (dev->port, buffer, len);
-
-		if (retval != len)
-			printk (KERN_DEBUG "%s: only read %Zd of %Zd ID bytes\n",
-				dev->port->name, retval,
-				len);
-
-		/* Some printer manufacturers mistakenly believe that
-                   the length field is supposed to be _exclusive_.
-		   In addition, there are broken devices out there
-                   that don't even finish off with a semi-colon. */
-		if (buffer[len - 1] != ';') {
-			ssize_t diff;
-			diff = parport_read (dev->port, buffer + len, 2);
-			retval += diff;
-
-			if (diff)
-				printk (KERN_DEBUG
-					"%s: device reported incorrect "
-					"length field (%d, should be %Zd)\n",
-					dev->port->name, idlen, retval);
-			else {
-				/* One semi-colon short of a device ID. */
-				buffer[len++] = ';';
-				printk (KERN_DEBUG "%s: faking semi-colon\n",
-					dev->port->name);
-
-				/* If we get here, I don't think we
-                                   need to worry about the possible
-                                   standard violation of having read
-                                   more than we were told to.  The
-                                   device is non-compliant anyhow. */
-			}
-		}
-
-	end_id:
-		buffer[len] = '\0';
+		retval = parport_read_device_id (dev->port, buffer, count);
 		parport_negotiate (dev->port, IEEE1284_MODE_COMPAT);
+		if (retval > 2)
+			parse_data (dev->port, dev->daisy, buffer+2);
 	}
 
-	if (retval > 2)
-		parse_data (dev->port, dev->daisy, buffer);
-
-out:
 	parport_release (dev);
 	parport_close (dev);
 	return retval;
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 9cb3ab156b0..ea62bed6bc8 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -1002,6 +1002,7 @@ EXPORT_SYMBOL(parport_register_driver);
 EXPORT_SYMBOL(parport_unregister_driver);
 EXPORT_SYMBOL(parport_register_device);
 EXPORT_SYMBOL(parport_unregister_device);
+EXPORT_SYMBOL(parport_get_port);
 EXPORT_SYMBOL(parport_put_port);
 EXPORT_SYMBOL(parport_find_number);
 EXPORT_SYMBOL(parport_find_base);
diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c
index 6b7583f497d..a1f0b0ba2bf 100644
--- a/drivers/pnp/pnpbios/bioscalls.c
+++ b/drivers/pnp/pnpbios/bioscalls.c
@@ -31,15 +31,6 @@ static struct {
 } pnp_bios_callpoint;
 
 
-/* The PnP BIOS entries in the GDT */
-#define PNP_GDT    (GDT_ENTRY_PNPBIOS_BASE * 8)
-
-#define PNP_CS32   (PNP_GDT+0x00)	/* segment for calling fn */
-#define PNP_CS16   (PNP_GDT+0x08)	/* code segment for BIOS */
-#define PNP_DS     (PNP_GDT+0x10)	/* data segment for BIOS */
-#define PNP_TS1    (PNP_GDT+0x18)	/* transfer data segment */
-#define PNP_TS2    (PNP_GDT+0x20)	/* another data segment */
-
 /*
  * These are some opcodes for a "static asmlinkage"
  * As this code is *not* executed inside the linux kernel segment, but in a
@@ -67,16 +58,11 @@ __asm__(
 	".previous		\n"
 );
 
-#define Q_SET_SEL(cpu, selname, address, size) \
-do { \
-set_base(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], __va((u32)(address))); \
-set_limit(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], size); \
-} while(0)
-
 #define Q2_SET_SEL(cpu, selname, address, size) \
 do { \
-set_base(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], (u32)(address)); \
-set_limit(per_cpu(cpu_gdt_table,cpu)[(selname) >> 3], size); \
+struct desc_struct *gdt = get_cpu_gdt_table((cpu)); \
+set_base(gdt[(selname) >> 3], (u32)(address)); \
+set_limit(gdt[(selname) >> 3], size); \
 } while(0)
 
 static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
@@ -115,8 +101,8 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 		return PNP_FUNCTION_NOT_SUPPORTED;
 
 	cpu = get_cpu();
-	save_desc_40 = per_cpu(cpu_gdt_table,cpu)[0x40 / 8];
-	per_cpu(cpu_gdt_table,cpu)[0x40 / 8] = bad_bios_desc;
+	save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
+	get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
 
 	/* On some boxes IRQ's during PnP BIOS calls are deadly.  */
 	spin_lock_irqsave(&pnp_bios_lock, flags);
@@ -158,7 +144,7 @@ static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
 	);
 	spin_unlock_irqrestore(&pnp_bios_lock, flags);
 
-	per_cpu(cpu_gdt_table,cpu)[0x40 / 8] = save_desc_40;
+	get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
 	put_cpu();
 
 	/* If we get here and this is set then the PnP BIOS faulted on us. */
@@ -290,12 +276,15 @@ int pnp_bios_dev_node_info(struct pnp_dev_node_info *data)
 static int __pnp_bios_get_dev_node(u8 *nodenum, char boot, struct pnp_bios_node *data)
 {
 	u16 status;
+	u16 tmp_nodenum;
 	if (!pnp_bios_present())
 		return PNP_FUNCTION_NOT_SUPPORTED;
 	if ( !boot && pnpbios_dont_use_current_config )
 		return PNP_FUNCTION_NOT_SUPPORTED;
+	tmp_nodenum = *nodenum;
 	status = call_pnp_bios(PNP_GET_SYS_DEV_NODE, 0, PNP_TS1, 0, PNP_TS2, boot ? 2 : 1, PNP_DS, 0,
-			       nodenum, sizeof(char), data, 65536);
+			       &tmp_nodenum, sizeof(tmp_nodenum), data, 65536);
+	*nodenum = tmp_nodenum;
 	return status;
 }
 
@@ -535,10 +524,12 @@ void pnpbios_calls_init(union pnp_bios_install_struct *header)
 
 	set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
 	_set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
-	for(i=0; i < NR_CPUS; i++)
-	{
-		Q2_SET_SEL(i, PNP_CS32, &pnp_bios_callfunc, 64 * 1024);
-		Q_SET_SEL(i, PNP_CS16, header->fields.pm16cseg, 64 * 1024);
-		Q_SET_SEL(i, PNP_DS, header->fields.pm16dseg, 64 * 1024);
-	}
+ 	for (i = 0; i < NR_CPUS; i++) {
+  		struct desc_struct *gdt = get_cpu_gdt_table(i);
+  		if (!gdt)
+  			continue;
+ 		set_base(gdt[GDT_ENTRY_PNPBIOS_CS32], &pnp_bios_callfunc);
+ 		set_base(gdt[GDT_ENTRY_PNPBIOS_CS16], __va(header->fields.pm16cseg));
+ 		set_base(gdt[GDT_ENTRY_PNPBIOS_DS], __va(header->fields.pm16dseg));
+  	}
 }
diff --git a/drivers/s390/Makefile b/drivers/s390/Makefile
index c99a2fe92fb..9803c9352d7 100644
--- a/drivers/s390/Makefile
+++ b/drivers/s390/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the S/390 specific device drivers
 #
 
-obj-y += s390mach.o sysinfo.o
+obj-y += s390mach.o sysinfo.o s390_rdev.o
 obj-y += cio/ block/ char/ crypto/ net/ scsi/
 
 drivers-y += drivers/s390/built-in.o
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 6e7d7b06421..6f50cc9323d 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -1,11 +1,11 @@
-if ARCH_S390
+if S390
 
 comment "S/390 block device drivers"
-	depends on ARCH_S390
+	depends on S390
 
 config BLK_DEV_XPRAM
 	tristate "XPRAM disk support"
-	depends on ARCH_S390
+	depends on S390
 	help
 	  Select this option if you want to use your expanded storage on S/390
 	  or zSeries as a disk.  This is useful as a _fast_ swap device if you
@@ -49,7 +49,7 @@ config DASD_FBA
 
 config DASD_DIAG
 	tristate "Support for DIAG access to Disks"
-	depends on DASD && ( ARCH_S390X = 'n' || EXPERIMENTAL)
+	depends on DASD && ( 64BIT = 'n' || EXPERIMENTAL)
 	help
 	  Select this option if you want to use Diagnose250 command to access
 	  Disks under VM.  If you are not running under VM or unsure what it is,
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index fdb61380c52..f779f674dfa 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -7,7 +7,7 @@
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999-2001
  *
- * $Revision: 1.167 $
+ * $Revision: 1.172 $
  */
 
 #include <linux/config.h>
@@ -604,7 +604,7 @@ dasd_smalloc_request(char *magic, int cplength, int datasize,
 void
 dasd_kfree_request(struct dasd_ccw_req * cqr, struct dasd_device * device)
 {
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	struct ccw1 *ccw;
 
 	/* Clear any idals used for the request. */
@@ -1224,6 +1224,12 @@ __dasd_start_head(struct dasd_device * device)
 	if (list_empty(&device->ccw_queue))
 		return;
 	cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list);
+        /* check FAILFAST */
+	if (device->stopped & ~DASD_STOPPED_PENDING &&
+	    test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags)) {
+		cqr->status = DASD_CQR_FAILED;
+		dasd_schedule_bh(device);
+	}
 	if ((cqr->status == DASD_CQR_QUEUED) &&
 	    (!device->stopped)) {
 		/* try to start the first I/O that can be started */
@@ -1323,7 +1329,7 @@ void
 dasd_schedule_bh(struct dasd_device * device)
 {
 	/* Protect against rescheduling. */
-	if (atomic_compare_and_swap (0, 1, &device->tasklet_scheduled))
+	if (atomic_cmpxchg (&device->tasklet_scheduled, 0, 1) != 0)
 		return;
 	dasd_get_device(device);
 	tasklet_hi_schedule(&device->tasklet);
@@ -1750,8 +1756,10 @@ dasd_exit(void)
  * SECTION: common functions for ccw_driver use
  */
 
-/* initial attempt at a probe function. this can be simplified once
- * the other detection code is gone */
+/*
+ * Initial attempt at a probe function. this can be simplified once
+ * the other detection code is gone.
+ */
 int
 dasd_generic_probe (struct ccw_device *cdev,
 		    struct dasd_discipline *discipline)
@@ -1770,8 +1778,10 @@ dasd_generic_probe (struct ccw_device *cdev,
 	return ret;
 }
 
-/* this will one day be called from a global not_oper handler.
- * It is also used by driver_unregister during module unload */
+/*
+ * This will one day be called from a global not_oper handler.
+ * It is also used by driver_unregister during module unload.
+ */
 void
 dasd_generic_remove (struct ccw_device *cdev)
 {
@@ -1798,9 +1808,11 @@ dasd_generic_remove (struct ccw_device *cdev)
 	dasd_delete_device(device);
 }
 
-/* activate a device. This is called from dasd_{eckd,fba}_probe() when either
+/*
+ * Activate a device. This is called from dasd_{eckd,fba}_probe() when either
  * the device is detected for the first time and is supposed to be used
- * or the user has started activation through sysfs */
+ * or the user has started activation through sysfs.
+ */
 int
 dasd_generic_set_online (struct ccw_device *cdev,
 			 struct dasd_discipline *discipline)
@@ -1917,7 +1929,6 @@ dasd_generic_notify(struct ccw_device *cdev, int event)
 				if (cqr->status == DASD_CQR_IN_IO)
 					cqr->status = DASD_CQR_FAILED;
 			device->stopped |= DASD_STOPPED_DC_EIO;
-			dasd_schedule_bh(device);
 		} else {
 			list_for_each_entry(cqr, &device->ccw_queue, list)
 				if (cqr->status == DASD_CQR_IN_IO) {
@@ -1927,6 +1938,7 @@ dasd_generic_notify(struct ccw_device *cdev, int event)
 			device->stopped |= DASD_STOPPED_DC_WAIT;
 			dasd_set_timer(device, 0);
 		}
+		dasd_schedule_bh(device);
 		ret = 1;
 		break;
 	case CIO_OPER:
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index ab8754e566b..ba80fdea7eb 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -6,7 +6,7 @@
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
  *
- * $Revision: 1.51 $
+ * $Revision: 1.53 $
  */
 
 #include <linux/config.h>
@@ -25,6 +25,7 @@
 #include <asm/io.h>
 #include <asm/s390_ext.h>
 #include <asm/todclk.h>
+#include <asm/vtoc.h>
 
 #include "dasd_int.h"
 #include "dasd_diag.h"
@@ -74,7 +75,7 @@ dia250(void *iob, int cmd)
 	int rc;
 
 	__asm__ __volatile__(
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 		"	lghi	%0,3\n"
 		"	lgr	0,%3\n"
 		"	diag	0,%2,0x250\n"
@@ -329,7 +330,7 @@ dasd_diag_check_device(struct dasd_device *device)
 	struct dasd_diag_private *private;
 	struct dasd_diag_characteristics *rdc_data;
 	struct dasd_diag_bio bio;
-	struct dasd_diag_cms_label *label;
+	struct vtoc_cms_label *label;
 	blocknum_t end_block;
 	unsigned int sb, bsize;
 	int rc;
@@ -380,7 +381,7 @@ dasd_diag_check_device(struct dasd_device *device)
 	mdsk_term_io(device);
 
 	/* figure out blocksize of device */
-	label = (struct dasd_diag_cms_label *) get_zeroed_page(GFP_KERNEL);
+	label = (struct vtoc_cms_label *) get_zeroed_page(GFP_KERNEL);
 	if (label == NULL)  {
 		DEV_MESSAGE(KERN_WARNING, device, "%s",
 			    "No memory to allocate initialization request");
@@ -548,6 +549,8 @@ dasd_diag_build_cp(struct dasd_device * device, struct request *req)
 	}
 	cqr->retries = DIAG_MAX_RETRIES;
 	cqr->buildclk = get_clock();
+	if (req->flags & REQ_FAILFAST)
+		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = DIAG_TIMEOUT;
 	cqr->status = DASD_CQR_FILLED;
diff --git a/drivers/s390/block/dasd_diag.h b/drivers/s390/block/dasd_diag.h
index df31484d73a..a4f80bd735f 100644
--- a/drivers/s390/block/dasd_diag.h
+++ b/drivers/s390/block/dasd_diag.h
@@ -6,7 +6,7 @@
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
  *
- * $Revision: 1.8 $
+ * $Revision: 1.9 $
  */
 
 #define MDSK_WRITE_REQ 0x01
@@ -44,29 +44,8 @@ struct dasd_diag_characteristics {
 	u8 rdev_features;
 } __attribute__ ((packed, aligned(4)));
 
-struct dasd_diag_cms_label {
-	u8 label_id[4];
-	u8 vol_id[6];
-	u16 version_id;
-	u32 block_size;
-	u32 origin_ptr;
-	u32 usable_count;
-	u32 formatted_count;
-	u32 block_count;
-	u32 used_count;
-	u32 fst_size;
-	u32 fst_count;
-	u8 format_date[6];
-	u8 reserved1[2];
-	u32 disk_offset;
-	u32 map_block;
-	u32 hblk_disp;
-	u32 user_disp;
-	u8 reserved2[4];
-	u8 segment_name[8];
-} __attribute__ ((packed));
-
-#ifdef CONFIG_ARCH_S390X
+
+#ifdef CONFIG_64BIT
 #define DASD_DIAG_FLAGA_DEFAULT		DASD_DIAG_FLAGA_FORMAT_64BIT
 
 typedef u64 blocknum_t;
@@ -107,7 +86,7 @@ struct dasd_diag_rw_io {
 	struct dasd_diag_bio *bio_list;
 	u8  spare4[8];
 } __attribute__ ((packed, aligned(8)));
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 #define DASD_DIAG_FLAGA_DEFAULT		0x0
 
 typedef u32 blocknum_t;
@@ -146,4 +125,4 @@ struct dasd_diag_rw_io {
 	u32 interrupt_params;
 	u8 spare3[20];
 } __attribute__ ((packed, aligned(8)));
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 811060e10c0..96eb4825858 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -7,7 +7,7 @@
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
  *
- * $Revision: 1.71 $
+ * $Revision: 1.74 $
  */
 
 #include <linux/config.h>
@@ -1041,7 +1041,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req)
 				/* Eckd can only do full blocks. */
 				return ERR_PTR(-EINVAL);
 			count += bv->bv_len >> (device->s2b_shift + 9);
-#if defined(CONFIG_ARCH_S390X)
+#if defined(CONFIG_64BIT)
 			if (idal_is_needed (page_address(bv->bv_page),
 					    bv->bv_len))
 				cidaw += bv->bv_len >> (device->s2b_shift + 9);
@@ -1136,6 +1136,8 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req)
 			recid++;
 		}
 	}
+	if (req->flags & REQ_FAILFAST)
+		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */
 	cqr->lpm = private->path_data.ppm;
@@ -1252,6 +1254,7 @@ dasd_eckd_release(struct block_device *bdev, int no, long args)
 	cqr->cpaddr->cda = (__u32)(addr_t) cqr->data;
 	cqr->device = device;
 	clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
+	set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->retries = 0;
 	cqr->expires = 2 * HZ;
 	cqr->buildclk = get_clock();
@@ -1296,6 +1299,7 @@ dasd_eckd_reserve(struct block_device *bdev, int no, long args)
 	cqr->cpaddr->cda = (__u32)(addr_t) cqr->data;
 	cqr->device = device;
 	clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
+	set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->retries = 0;
 	cqr->expires = 2 * HZ;
 	cqr->buildclk = get_clock();
@@ -1339,6 +1343,7 @@ dasd_eckd_steal_lock(struct block_device *bdev, int no, long args)
 	cqr->cpaddr->cda = (__u32)(addr_t) cqr->data;
 	cqr->device = device;
 	clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
+	set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->retries = 0;
 	cqr->expires = 2 * HZ;
 	cqr->buildclk = get_clock();
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 28cb4613b7f..8ec75dc08e2 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -4,7 +4,7 @@
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
  *
- * $Revision: 1.40 $
+ * $Revision: 1.41 $
  */
 
 #include <linux/config.h>
@@ -271,7 +271,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req)
 				/* Fba can only do full blocks. */
 				return ERR_PTR(-EINVAL);
 			count += bv->bv_len >> (device->s2b_shift + 9);
-#if defined(CONFIG_ARCH_S390X)
+#if defined(CONFIG_64BIT)
 			if (idal_is_needed (page_address(bv->bv_page),
 					    bv->bv_len))
 				cidaw += bv->bv_len / blksize;
@@ -352,6 +352,8 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req)
 			recid++;
 		}
 	}
+	if (req->flags & REQ_FAILFAST)
+		set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags);
 	cqr->device = device;
 	cqr->expires = 5 * 60 * HZ;	/* 5 minutes */
 	cqr->retries = 32;
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 9fab04f3056..2fb05c4a528 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -6,7 +6,7 @@
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
  *
- * $Revision: 1.65 $
+ * $Revision: 1.68 $
  */
 
 #ifndef DASD_INT_H
@@ -208,6 +208,7 @@ struct dasd_ccw_req {
 
 /* per dasd_ccw_req flags */
 #define DASD_CQR_FLAGS_USE_ERP   0	/* use ERP for this request */
+#define DASD_CQR_FLAGS_FAILFAST  1	/* FAILFAST */
 
 /* Signature for error recovery functions. */
 typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *);
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 789595b3fa0..044b7537199 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -7,7 +7,7 @@
  * Bugreports.to..: <Linux390@de.ibm.com>
  * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999-2001
  *
- * $Revision: 1.47 $
+ * $Revision: 1.50 $
  *
  * i/o controls for the dasd driver.
  */
@@ -352,6 +352,9 @@ dasd_ioctl_read_profile(struct block_device *bdev, int no, long args)
 	if (device == NULL)
 		return -ENODEV;
 
+	if (dasd_profile_level == DASD_PROFILE_OFF)
+		return -EIO;
+
 	if (copy_to_user((long __user *) args, (long *) &device->profile,
 			 sizeof (struct dasd_profile_info_t)))
 		return -EFAULT;
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 4fde4118899..2e727f49ad1 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -15,7 +15,7 @@
 #include <asm/io.h>
 #include <linux/completion.h>
 #include <linux/interrupt.h>
-#include <asm/ccwdev.h> 	// for s390_root_dev_(un)register()
+#include <asm/s390_rdev.h>
 
 //#define DCSSBLK_DEBUG		/* Debug messages on/off */
 #define DCSSBLK_NAME "dcssblk"
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index d428c909b8a..bf3a67c3cc5 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -160,7 +160,7 @@ static int xpram_page_in (unsigned long page_addr, unsigned int xpage_index)
                 "0: ipm   %0\n"
 		"   srl   %0,28\n"
 		"1:\n"
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 		".section __ex_table,\"a\"\n"
 		"   .align 4\n"
 		"   .long  0b,1b\n"
@@ -208,7 +208,7 @@ static long xpram_page_out (unsigned long page_addr, unsigned int xpage_index)
                 "0: ipm   %0\n"
 		"   srl   %0,28\n"
 		"1:\n"
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 		".section __ex_table,\"a\"\n"
 		"   .align 4\n"
 		"   .long  0b,1b\n"
diff --git a/drivers/s390/char/sclp_cpi.c b/drivers/s390/char/sclp_cpi.c
index 5a6cef2dfa1..80f7f31310e 100644
--- a/drivers/s390/char/sclp_cpi.c
+++ b/drivers/s390/char/sclp_cpi.c
@@ -204,7 +204,7 @@ cpi_module_init(void)
 		printk(KERN_WARNING "cpi: no control program identification "
 		       "support\n");
 		sclp_unregister(&sclp_cpi_event);
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	}
 
 	req = cpi_prepare_req();
diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c
index 83f75774df6..56fa6916889 100644
--- a/drivers/s390/char/sclp_quiesce.c
+++ b/drivers/s390/char/sclp_quiesce.c
@@ -32,7 +32,7 @@ do_load_quiesce_psw(void * __unused)
 	psw_t quiesce_psw;
 	int cpu;
 
-	if (atomic_compare_and_swap(-1, smp_processor_id(), &cpuid))
+	if (atomic_cmpxchg(&cpuid, -1, smp_processor_id()) != -1)
 		signal_processor(smp_processor_id(), sigp_stop);
 	/* Wait for all other cpus to enter stopped state */
 	for_each_online_cpu(cpu) {
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 559d51490e2..5ced2725d6c 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -65,7 +65,7 @@ static void
 tapeblock_trigger_requeue(struct tape_device *device)
 {
 	/* Protect against rescheduling. */
-	if (atomic_compare_and_swap(0, 1, &device->blk_data.requeue_scheduled))
+	if (atomic_cmpxchg(&device->blk_data.requeue_scheduled, 0, 1) != 0)
 		return;
 	schedule_work(&device->blk_data.requeue_task);
 }
diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c
index 5473c23fcb5..5acc0ace3d7 100644
--- a/drivers/s390/char/vmwatchdog.c
+++ b/drivers/s390/char/vmwatchdog.c
@@ -66,7 +66,7 @@ static int __diag288(enum vmwdt_func func, unsigned int timeout,
 	__cmdl = len;
 	err = 0;
 	asm volatile (
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 		       "diag %2,%4,0x288\n"
 		"1:	\n"
 		".section .fixup,\"ax\"\n"
diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c
index a1c52a68219..daf21e03b21 100644
--- a/drivers/s390/cio/blacklist.c
+++ b/drivers/s390/cio/blacklist.c
@@ -1,7 +1,7 @@
 /*
  *  drivers/s390/cio/blacklist.c
  *   S/390 common I/O routines -- blacklisting of specific devices
- *   $Revision: 1.35 $
+ *   $Revision: 1.39 $
  *
  *    Copyright (C) 1999-2002 IBM Deutschland Entwicklung GmbH,
  *			      IBM Corporation
@@ -15,6 +15,7 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/ctype.h>
 #include <linux/device.h>
 
@@ -34,10 +35,10 @@
  * These can be single devices or ranges of devices
  */
 
-/* 65536 bits to indicate if a devno is blacklisted or not */
-#define __BL_DEV_WORDS ((__MAX_SUBCHANNELS + (8*sizeof(long) - 1)) / \
+/* 65536 bits for each set to indicate if a devno is blacklisted or not */
+#define __BL_DEV_WORDS ((__MAX_SUBCHANNEL + (8*sizeof(long) - 1)) / \
 			 (8*sizeof(long)))
-static unsigned long bl_dev[__BL_DEV_WORDS];
+static unsigned long bl_dev[__MAX_SSID + 1][__BL_DEV_WORDS];
 typedef enum {add, free} range_action;
 
 /*
@@ -45,21 +46,23 @@ typedef enum {add, free} range_action;
  * (Un-)blacklist the devices from-to
  */
 static inline void
-blacklist_range (range_action action, unsigned int from, unsigned int to)
+blacklist_range (range_action action, unsigned int from, unsigned int to,
+		 unsigned int ssid)
 {
 	if (!to)
 		to = from;
 
-	if (from > to || to > __MAX_SUBCHANNELS) {
+	if (from > to || to > __MAX_SUBCHANNEL || ssid > __MAX_SSID) {
 		printk (KERN_WARNING "Invalid blacklist range "
-			"0x%04x to 0x%04x, skipping\n", from, to);
+			"0.%x.%04x to 0.%x.%04x, skipping\n",
+			ssid, from, ssid, to);
 		return;
 	}
 	for (; from <= to; from++) {
 		if (action == add)
-			set_bit (from, bl_dev);
+			set_bit (from, bl_dev[ssid]);
 		else
-			clear_bit (from, bl_dev);
+			clear_bit (from, bl_dev[ssid]);
 	}
 }
 
@@ -69,7 +72,7 @@ blacklist_range (range_action action, unsigned int from, unsigned int to)
  * Shamelessly grabbed from dasd_devmap.c.
  */
 static inline int
-blacklist_busid(char **str, int *id0, int *id1, int *devno)
+blacklist_busid(char **str, int *id0, int *ssid, int *devno)
 {
 	int val, old_style;
 	char *sav;
@@ -86,7 +89,7 @@ blacklist_busid(char **str, int *id0, int *id1, int *devno)
 		goto confused;
 	val = simple_strtoul(*str, str, 16);
 	if (old_style || (*str)[0] != '.') {
-		*id0 = *id1 = 0;
+		*id0 = *ssid = 0;
 		if (val < 0 || val > 0xffff)
 			goto confused;
 		*devno = val;
@@ -105,7 +108,7 @@ blacklist_busid(char **str, int *id0, int *id1, int *devno)
 	val = simple_strtoul(*str, str, 16);
 	if (val < 0 || val > 0xff || (*str)++[0] != '.')
 		goto confused;
-	*id1 = val;
+	*ssid = val;
 	if (!isxdigit((*str)[0]))	/* We require at least one hex digit */
 		goto confused;
 	val = simple_strtoul(*str, str, 16);
@@ -125,7 +128,7 @@ confused:
 static inline int
 blacklist_parse_parameters (char *str, range_action action)
 {
-	unsigned int from, to, from_id0, to_id0, from_id1, to_id1;
+	unsigned int from, to, from_id0, to_id0, from_ssid, to_ssid;
 
 	while (*str != 0 && *str != '\n') {
 		range_action ra = action;
@@ -142,23 +145,25 @@ blacklist_parse_parameters (char *str, range_action action)
 		 */
 		if (strncmp(str,"all,",4) == 0 || strcmp(str,"all") == 0 ||
 		    strncmp(str,"all\n",4) == 0 || strncmp(str,"all ",4) == 0) {
-			from = 0;
-			to = __MAX_SUBCHANNELS;
+			int j;
+
 			str += 3;
+			for (j=0; j <= __MAX_SSID; j++)
+				blacklist_range(ra, 0, __MAX_SUBCHANNEL, j);
 		} else {
 			int rc;
 
 			rc = blacklist_busid(&str, &from_id0,
-					     &from_id1, &from);
+					     &from_ssid, &from);
 			if (rc)
 				continue;
 			to = from;
 			to_id0 = from_id0;
-			to_id1 = from_id1;
+			to_ssid = from_ssid;
 			if (*str == '-') {
 				str++;
 				rc = blacklist_busid(&str, &to_id0,
-						     &to_id1, &to);
+						     &to_ssid, &to);
 				if (rc)
 					continue;
 			}
@@ -168,18 +173,19 @@ blacklist_parse_parameters (char *str, range_action action)
 					strsep(&str, ",\n"));
 				continue;
 			}
-			if ((from_id0 != to_id0) || (from_id1 != to_id1)) {
+			if ((from_id0 != to_id0) ||
+			    (from_ssid != to_ssid)) {
 				printk(KERN_WARNING "invalid cio_ignore range "
 					"%x.%x.%04x-%x.%x.%04x\n",
-					from_id0, from_id1, from,
-					to_id0, to_id1, to);
+					from_id0, from_ssid, from,
+					to_id0, to_ssid, to);
 				continue;
 			}
+			pr_debug("blacklist_setup: adding range "
+				 "from %x.%x.%04x to %x.%x.%04x\n",
+				 from_id0, from_ssid, from, to_id0, to_ssid, to);
+			blacklist_range (ra, from, to, to_ssid);
 		}
-		/* FIXME: ignoring id0 and id1 here. */
-		pr_debug("blacklist_setup: adding range "
-			 "from 0.0.%04x to 0.0.%04x\n", from, to);
-		blacklist_range (ra, from, to);
 	}
 	return 1;
 }
@@ -213,12 +219,33 @@ __setup ("cio_ignore=", blacklist_setup);
  * Used by validate_subchannel()
  */
 int
-is_blacklisted (int devno)
+is_blacklisted (int ssid, int devno)
 {
-	return test_bit (devno, bl_dev);
+	return test_bit (devno, bl_dev[ssid]);
 }
 
 #ifdef CONFIG_PROC_FS
+static int
+__s390_redo_validation(struct subchannel_id schid, void *data)
+{
+	int ret;
+	struct subchannel *sch;
+
+	sch = get_subchannel_by_schid(schid);
+	if (sch) {
+		/* Already known. */
+		put_device(&sch->dev);
+		return 0;
+	}
+	ret = css_probe_device(schid);
+	if (ret == -ENXIO)
+		return ret; /* We're through. */
+	if (ret == -ENOMEM)
+		/* Stop validation for now. Bad, but no need for a panic. */
+		return ret;
+	return 0;
+}
+
 /*
  * Function: s390_redo_validation
  * Look for no longer blacklisted devices
@@ -226,29 +253,9 @@ is_blacklisted (int devno)
 static inline void
 s390_redo_validation (void)
 {
-	unsigned int irq;
-
 	CIO_TRACE_EVENT (0, "redoval");
-	for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) {
-		int ret;
-		struct subchannel *sch;
-
-		sch = get_subchannel_by_schid(irq);
-		if (sch) {
-			/* Already known. */
-			put_device(&sch->dev);
-			continue;
-		}
-		ret = css_probe_device(irq);
-		if (ret == -ENXIO)
-			break; /* We're through. */
-		if (ret == -ENOMEM)
-			/*
-			 * Stop validation for now. Bad, but no need for a
-			 * panic.
-			 */
-			break;
-	}
+
+	for_each_subchannel(__s390_redo_validation, NULL);
 }
 
 /*
@@ -278,41 +285,90 @@ blacklist_parse_proc_parameters (char *buf)
 	s390_redo_validation ();
 }
 
-/* FIXME: These should be real bus ids and not home-grown ones! */
-static int cio_ignore_read (char *page, char **start, off_t off,
-			    int count, int *eof, void *data)
+/* Iterator struct for all devices. */
+struct ccwdev_iter {
+	int devno;
+	int ssid;
+	int in_range;
+};
+
+static void *
+cio_ignore_proc_seq_start(struct seq_file *s, loff_t *offset)
 {
-	const unsigned int entry_size = 18; /* "0.0.ABCD-0.0.EFGH\n" */
-	long devno;
-	int len;
-
-	len = 0;
-	for (devno = off; /* abuse the page variable
-			   * as counter, see fs/proc/generic.c */
-	     devno < __MAX_SUBCHANNELS && len + entry_size < count; devno++) {
-		if (!test_bit(devno, bl_dev))
-			continue;
-		len += sprintf(page + len, "0.0.%04lx", devno);
-		if (test_bit(devno + 1, bl_dev)) { /* print range */
-			while (++devno < __MAX_SUBCHANNELS)
-				if (!test_bit(devno, bl_dev))
-					break;
-			len += sprintf(page + len, "-0.0.%04lx", --devno);
-		}
-		len += sprintf(page + len, "\n");
-	}
+	struct ccwdev_iter *iter;
+
+	if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1))
+		return NULL;
+	iter = kzalloc(sizeof(struct ccwdev_iter), GFP_KERNEL);
+	if (!iter)
+		return ERR_PTR(-ENOMEM);
+	iter->ssid = *offset / (__MAX_SUBCHANNEL + 1);
+	iter->devno = *offset % (__MAX_SUBCHANNEL + 1);
+	return iter;
+}
+
+static void
+cio_ignore_proc_seq_stop(struct seq_file *s, void *it)
+{
+	if (!IS_ERR(it))
+		kfree(it);
+}
+
+static void *
+cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset)
+{
+	struct ccwdev_iter *iter;
+
+	if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1))
+		return NULL;
+	iter = it;
+	if (iter->devno == __MAX_SUBCHANNEL) {
+		iter->devno = 0;
+		iter->ssid++;
+		if (iter->ssid > __MAX_SSID)
+			return NULL;
+	} else
+		iter->devno++;
+	(*offset)++;
+	return iter;
+}
 
-	if (devno < __MAX_SUBCHANNELS)
-		*eof = 1;
-	*start = (char *) (devno - off); /* number of checked entries */
-	return len;
+static int
+cio_ignore_proc_seq_show(struct seq_file *s, void *it)
+{
+	struct ccwdev_iter *iter;
+
+	iter = it;
+	if (!is_blacklisted(iter->ssid, iter->devno))
+		/* Not blacklisted, nothing to output. */
+		return 0;
+	if (!iter->in_range) {
+		/* First device in range. */
+		if ((iter->devno == __MAX_SUBCHANNEL) ||
+		    !is_blacklisted(iter->ssid, iter->devno + 1))
+			/* Singular device. */
+			return seq_printf(s, "0.%x.%04x\n",
+					  iter->ssid, iter->devno);
+		iter->in_range = 1;
+		return seq_printf(s, "0.%x.%04x-", iter->ssid, iter->devno);
+	}
+	if ((iter->devno == __MAX_SUBCHANNEL) ||
+	    !is_blacklisted(iter->ssid, iter->devno + 1)) {
+		/* Last device in range. */
+		iter->in_range = 0;
+		return seq_printf(s, "0.%x.%04x\n", iter->ssid, iter->devno);
+	}
+	return 0;
 }
 
-static int cio_ignore_write(struct file *file, const char __user *user_buf,
-			     unsigned long user_len, void *data)
+static ssize_t
+cio_ignore_write(struct file *file, const char __user *user_buf,
+		 size_t user_len, loff_t *offset)
 {
 	char *buf;
 
+	if (*offset)
+		return -EINVAL;
 	if (user_len > 65536)
 		user_len = 65536;
 	buf = vmalloc (user_len + 1); /* maybe better use the stack? */
@@ -330,6 +386,27 @@ static int cio_ignore_write(struct file *file, const char __user *user_buf,
 	return user_len;
 }
 
+static struct seq_operations cio_ignore_proc_seq_ops = {
+	.start = cio_ignore_proc_seq_start,
+	.stop  = cio_ignore_proc_seq_stop,
+	.next  = cio_ignore_proc_seq_next,
+	.show  = cio_ignore_proc_seq_show,
+};
+
+static int
+cio_ignore_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &cio_ignore_proc_seq_ops);
+}
+
+static struct file_operations cio_ignore_proc_fops = {
+	.open    = cio_ignore_proc_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+	.write   = cio_ignore_write,
+};
+
 static int
 cio_ignore_proc_init (void)
 {
@@ -340,8 +417,7 @@ cio_ignore_proc_init (void)
 	if (!entry)
 		return 0;
 
-	entry->read_proc  = cio_ignore_read;
-	entry->write_proc = cio_ignore_write;
+	entry->proc_fops = &cio_ignore_proc_fops;
 
 	return 1;
 }
diff --git a/drivers/s390/cio/blacklist.h b/drivers/s390/cio/blacklist.h
index fb42cafbe57..95e25c1df92 100644
--- a/drivers/s390/cio/blacklist.h
+++ b/drivers/s390/cio/blacklist.h
@@ -1,6 +1,6 @@
 #ifndef S390_BLACKLIST_H
 #define S390_BLACKLIST_H
 
-extern int is_blacklisted (int devno);
+extern int is_blacklisted (int ssid, int devno);
 
 #endif
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index be9d2d65c22..e849289d4f3 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -1,7 +1,7 @@
 /*
  *  drivers/s390/cio/ccwgroup.c
  *  bus driver for ccwgroup
- *   $Revision: 1.32 $
+ *   $Revision: 1.33 $
  *
  *    Copyright (C) 2002 IBM Deutschland Entwicklung GmbH,
  *                       IBM Corporation
@@ -263,7 +263,7 @@ ccwgroup_set_online(struct ccwgroup_device *gdev)
 	struct ccwgroup_driver *gdrv;
 	int ret;
 
-	if (atomic_compare_and_swap(0, 1, &gdev->onoff))
+	if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0)
 		return -EAGAIN;
 	if (gdev->state == CCWGROUP_ONLINE) {
 		ret = 0;
@@ -289,7 +289,7 @@ ccwgroup_set_offline(struct ccwgroup_device *gdev)
 	struct ccwgroup_driver *gdrv;
 	int ret;
 
-	if (atomic_compare_and_swap(0, 1, &gdev->onoff))
+	if (atomic_cmpxchg(&gdev->onoff, 0, 1) != 0)
 		return -EAGAIN;
 	if (gdev->state == CCWGROUP_OFFLINE) {
 		ret = 0;
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index fa3c23b80e3..7270808c02d 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -1,7 +1,7 @@
 /*
  *  drivers/s390/cio/chsc.c
  *   S/390 common I/O routines -- channel subsystem call
- *   $Revision: 1.120 $
+ *   $Revision: 1.126 $
  *
  *    Copyright (C) 1999-2002 IBM Deutschland Entwicklung GmbH,
  *			      IBM Corporation
@@ -24,8 +24,6 @@
 #include "ioasm.h"
 #include "chsc.h"
 
-static struct channel_path *chps[NR_CHPIDS];
-
 static void *sei_page;
 
 static int new_channel_path(int chpid);
@@ -33,13 +31,13 @@ static int new_channel_path(int chpid);
 static inline void
 set_chp_logically_online(int chp, int onoff)
 {
-	chps[chp]->state = onoff;
+	css[0]->chps[chp]->state = onoff;
 }
 
 static int
 get_chp_status(int chp)
 {
-	return (chps[chp] ? chps[chp]->state : -ENODEV);
+	return (css[0]->chps[chp] ? css[0]->chps[chp]->state : -ENODEV);
 }
 
 void
@@ -77,7 +75,9 @@ chsc_get_sch_desc_irq(struct subchannel *sch, void *page)
 
 	struct {
 		struct chsc_header request;
-		u16 reserved1;
+		u16 reserved1a:10;
+		u16 ssid:2;
+		u16 reserved1b:4;
 		u16 f_sch;	  /* first subchannel */
 		u16 reserved2;
 		u16 l_sch;	  /* last subchannel */
@@ -104,8 +104,9 @@ chsc_get_sch_desc_irq(struct subchannel *sch, void *page)
 		.code   = 0x0004,
 	};
 
-	ssd_area->f_sch = sch->irq;
-	ssd_area->l_sch = sch->irq;
+	ssd_area->ssid = sch->schid.ssid;
+	ssd_area->f_sch = sch->schid.sch_no;
+	ssd_area->l_sch = sch->schid.sch_no;
 
 	ccode = chsc(ssd_area);
 	if (ccode > 0) {
@@ -147,7 +148,8 @@ chsc_get_sch_desc_irq(struct subchannel *sch, void *page)
 	 */
 	if (ssd_area->st > 3) { /* uhm, that looks strange... */
 		CIO_CRW_EVENT(0, "Strange subchannel type %d"
-			      " for sch %04x\n", ssd_area->st, sch->irq);
+			      " for sch 0.%x.%04x\n", ssd_area->st,
+			      sch->schid.ssid, sch->schid.sch_no);
 		/*
 		 * There may have been a new subchannel type defined in the
 		 * time since this code was written; since we don't know which
@@ -156,8 +158,9 @@ chsc_get_sch_desc_irq(struct subchannel *sch, void *page)
 		return 0;
 	} else {
 		const char *type[4] = {"I/O", "chsc", "message", "ADM"};
-		CIO_CRW_EVENT(6, "ssd: sch %04x is %s subchannel\n",
-			      sch->irq, type[ssd_area->st]);
+		CIO_CRW_EVENT(6, "ssd: sch 0.%x.%04x is %s subchannel\n",
+			      sch->schid.ssid, sch->schid.sch_no,
+			      type[ssd_area->st]);
 
 		sch->ssd_info.valid = 1;
 		sch->ssd_info.type = ssd_area->st;
@@ -218,13 +221,13 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	int j;
 	int mask;
 	struct subchannel *sch;
-	__u8 *chpid;
+	struct channel_path *chpid;
 	struct schib schib;
 
 	sch = to_subchannel(dev);
 	chpid = data;
 	for (j = 0; j < 8; j++)
-		if (sch->schib.pmcw.chpid[j] == *chpid)
+		if (sch->schib.pmcw.chpid[j] == chpid->id)
 			break;
 	if (j >= 8)
 		return 0;
@@ -232,7 +235,7 @@ s390_subchannel_remove_chpid(struct device *dev, void *data)
 	mask = 0x80 >> j;
 	spin_lock(&sch->lock);
 
-	stsch(sch->irq, &schib);
+	stsch(sch->schid, &schib);
 	if (!schib.pmcw.dnv)
 		goto out_unreg;
 	memcpy(&sch->schib, &schib, sizeof(struct schib));
@@ -284,7 +287,7 @@ out_unlock:
 out_unreg:
 	spin_unlock(&sch->lock);
 	sch->lpm = 0;
-	if (css_enqueue_subchannel_slow(sch->irq)) {
+	if (css_enqueue_subchannel_slow(sch->schid)) {
 		css_clear_subchannel_slow_list();
 		need_rescan = 1;
 	}
@@ -295,23 +298,30 @@ static inline void
 s390_set_chpid_offline( __u8 chpid)
 {
 	char dbf_txt[15];
+	struct device *dev;
 
 	sprintf(dbf_txt, "chpr%x", chpid);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
 	if (get_chp_status(chpid) <= 0)
 		return;
-
-	bus_for_each_dev(&css_bus_type, NULL, &chpid,
+	dev = get_device(&css[0]->chps[chpid]->dev);
+	bus_for_each_dev(&css_bus_type, NULL, to_channelpath(dev),
 			 s390_subchannel_remove_chpid);
 
 	if (need_rescan || css_slow_subchannels_exist())
 		queue_work(slow_path_wq, &slow_path_work);
+	put_device(dev);
 }
 
+struct res_acc_data {
+	struct channel_path *chp;
+	u32 fla_mask;
+	u16 fla;
+};
+
 static int
-s390_process_res_acc_sch(u8 chpid, __u16 fla, u32 fla_mask,
-			 struct subchannel *sch)
+s390_process_res_acc_sch(struct res_acc_data *res_data, struct subchannel *sch)
 {
 	int found;
 	int chp;
@@ -323,8 +333,9 @@ s390_process_res_acc_sch(u8 chpid, __u16 fla, u32 fla_mask,
 		 * check if chpid is in information updated by ssd
 		 */
 		if (sch->ssd_info.valid &&
-		    sch->ssd_info.chpid[chp] == chpid &&
-		    (sch->ssd_info.fla[chp] & fla_mask) == fla) {
+		    sch->ssd_info.chpid[chp] == res_data->chp->id &&
+		    (sch->ssd_info.fla[chp] & res_data->fla_mask)
+		    == res_data->fla) {
 			found = 1;
 			break;
 		}
@@ -337,24 +348,87 @@ s390_process_res_acc_sch(u8 chpid, __u16 fla, u32 fla_mask,
 	 * new path information and eventually check for logically
 	 * offline chpids.
 	 */
-	ccode = stsch(sch->irq, &sch->schib);
+	ccode = stsch(sch->schid, &sch->schib);
 	if (ccode > 0)
 		return 0;
 
 	return 0x80 >> chp;
 }
 
+static inline int
+s390_process_res_acc_new_sch(struct subchannel_id schid)
+{
+	struct schib schib;
+	int ret;
+	/*
+	 * We don't know the device yet, but since a path
+	 * may be available now to the device we'll have
+	 * to do recognition again.
+	 * Since we don't have any idea about which chpid
+	 * that beast may be on we'll have to do a stsch
+	 * on all devices, grr...
+	 */
+	if (stsch_err(schid, &schib))
+		/* We're through */
+		return need_rescan ? -EAGAIN : -ENXIO;
+
+	/* Put it on the slow path. */
+	ret = css_enqueue_subchannel_slow(schid);
+	if (ret) {
+		css_clear_subchannel_slow_list();
+		need_rescan = 1;
+		return -EAGAIN;
+	}
+	return 0;
+}
+
 static int
-s390_process_res_acc (u8 chpid, __u16 fla, u32 fla_mask)
+__s390_process_res_acc(struct subchannel_id schid, void *data)
 {
+	int chp_mask, old_lpm;
+	struct res_acc_data *res_data;
 	struct subchannel *sch;
-	int irq, rc;
+
+	res_data = (struct res_acc_data *)data;
+	sch = get_subchannel_by_schid(schid);
+	if (!sch)
+		/* Check if a subchannel is newly available. */
+		return s390_process_res_acc_new_sch(schid);
+
+	spin_lock_irq(&sch->lock);
+
+	chp_mask = s390_process_res_acc_sch(res_data, sch);
+
+	if (chp_mask == 0) {
+		spin_unlock_irq(&sch->lock);
+		return 0;
+	}
+	old_lpm = sch->lpm;
+	sch->lpm = ((sch->schib.pmcw.pim &
+		     sch->schib.pmcw.pam &
+		     sch->schib.pmcw.pom)
+		    | chp_mask) & sch->opm;
+	if (!old_lpm && sch->lpm)
+		device_trigger_reprobe(sch);
+	else if (sch->driver && sch->driver->verify)
+		sch->driver->verify(&sch->dev);
+
+	spin_unlock_irq(&sch->lock);
+	put_device(&sch->dev);
+	return (res_data->fla_mask == 0xffff) ? -ENODEV : 0;
+}
+
+
+static int
+s390_process_res_acc (struct res_acc_data *res_data)
+{
+	int rc;
 	char dbf_txt[15];
 
-	sprintf(dbf_txt, "accpr%x", chpid);
+	sprintf(dbf_txt, "accpr%x", res_data->chp->id);
 	CIO_TRACE_EVENT( 2, dbf_txt);
-	if (fla != 0) {
-		sprintf(dbf_txt, "fla%x", fla);
+	if (res_data->fla != 0) {
+		sprintf(dbf_txt, "fla%x", res_data->fla);
 		CIO_TRACE_EVENT( 2, dbf_txt);
 	}
 
@@ -365,70 +439,11 @@ s390_process_res_acc (u8 chpid, __u16 fla, u32 fla_mask)
 	 * The more information we have (info), the less scanning
 	 * will we have to do.
 	 */
-
-	if (!get_chp_status(chpid))
-		return 0; /* no need to do the rest */
-
-	rc = 0;
-	for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) {
-		int chp_mask, old_lpm;
-
-		sch = get_subchannel_by_schid(irq);
-		if (!sch) {
-			struct schib schib;
-			int ret;
-			/*
-			 * We don't know the device yet, but since a path
-			 * may be available now to the device we'll have
-			 * to do recognition again.
-			 * Since we don't have any idea about which chpid
-			 * that beast may be on we'll have to do a stsch
-			 * on all devices, grr...
-			 */
-			if (stsch(irq, &schib)) {
-				/* We're through */
-				if (need_rescan)
-					rc = -EAGAIN;
-				break;
-			}
-			if (need_rescan) {
-				rc = -EAGAIN;
-				continue;
-			}
-			/* Put it on the slow path. */
-			ret = css_enqueue_subchannel_slow(irq);
-			if (ret) {
-				css_clear_subchannel_slow_list();
-				need_rescan = 1;
-			}
-			rc = -EAGAIN;
-			continue;
-		}
-	
-		spin_lock_irq(&sch->lock);
-
-		chp_mask = s390_process_res_acc_sch(chpid, fla, fla_mask, sch);
-
-		if (chp_mask == 0) {
-
-			spin_unlock_irq(&sch->lock);
-			continue;
-		}
-		old_lpm = sch->lpm;
-		sch->lpm = ((sch->schib.pmcw.pim &
-			     sch->schib.pmcw.pam &
-			     sch->schib.pmcw.pom)
-			    | chp_mask) & sch->opm;
-		if (!old_lpm && sch->lpm)
-			device_trigger_reprobe(sch);
-		else if (sch->driver && sch->driver->verify)
-			sch->driver->verify(&sch->dev);
-
-		spin_unlock_irq(&sch->lock);
-		put_device(&sch->dev);
-		if (fla_mask == 0xffff)
-			break;
-	}
+	rc = for_each_subchannel(__s390_process_res_acc, res_data);
+	if (css_slow_subchannels_exist())
+		rc = -EAGAIN;
+	else if (rc != -EAGAIN)
+		rc = 0;
 	return rc;
 }
 
@@ -466,6 +481,7 @@ int
 chsc_process_crw(void)
 {
 	int chpid, ret;
+	struct res_acc_data res_data;
 	struct {
 		struct chsc_header request;
 		u32 reserved1;
@@ -499,8 +515,9 @@ chsc_process_crw(void)
 	ret = 0;
 	do {
 		int ccode, status;
+		struct device *dev;
 		memset(sei_area, 0, sizeof(*sei_area));
-
+		memset(&res_data, 0, sizeof(struct res_acc_data));
 		sei_area->request = (struct chsc_header) {
 			.length = 0x0010,
 			.code   = 0x000e,
@@ -573,26 +590,25 @@ chsc_process_crw(void)
 			if (status < 0)
 				new_channel_path(sei_area->rsid);
 			else if (!status)
-				return 0;
-			if ((sei_area->vf & 0x80) == 0) {
-				pr_debug("chpid: %x\n", sei_area->rsid);
-				ret = s390_process_res_acc(sei_area->rsid,
-							   0, 0);
-			} else if ((sei_area->vf & 0xc0) == 0x80) {
-				pr_debug("chpid: %x link addr: %x\n",
-					 sei_area->rsid, sei_area->fla);
-				ret = s390_process_res_acc(sei_area->rsid,
-							   sei_area->fla,
-							   0xff00);
-			} else if ((sei_area->vf & 0xc0) == 0xc0) {
-				pr_debug("chpid: %x full link addr: %x\n",
-					 sei_area->rsid, sei_area->fla);
-				ret = s390_process_res_acc(sei_area->rsid,
-							   sei_area->fla,
-							   0xffff);
+				break;
+			dev = get_device(&css[0]->chps[sei_area->rsid]->dev);
+			res_data.chp = to_channelpath(dev);
+			pr_debug("chpid: %x", sei_area->rsid);
+			if ((sei_area->vf & 0xc0) != 0) {
+				res_data.fla = sei_area->fla;
+				if ((sei_area->vf & 0xc0) == 0xc0) {
+					pr_debug(" full link addr: %x",
+						 sei_area->fla);
+					res_data.fla_mask = 0xffff;
+				} else {
+					pr_debug(" link addr: %x",
+						 sei_area->fla);
+					res_data.fla_mask = 0xff00;
+				}
 			}
-			pr_debug("\n");
-			
+			ret = s390_process_res_acc(&res_data);
+			pr_debug("\n\n");
+			put_device(dev);
 			break;
 			
 		default: /* other stuff */
@@ -604,12 +620,72 @@ chsc_process_crw(void)
 	return ret;
 }
 
+static inline int
+__chp_add_new_sch(struct subchannel_id schid)
+{
+	struct schib schib;
+	int ret;
+
+	if (stsch(schid, &schib))
+		/* We're through */
+		return need_rescan ? -EAGAIN : -ENXIO;
+
+	/* Put it on the slow path. */
+	ret = css_enqueue_subchannel_slow(schid);
+	if (ret) {
+		css_clear_subchannel_slow_list();
+		need_rescan = 1;
+		return -EAGAIN;
+	}
+	return 0;
+}
+
+
 static int
-chp_add(int chpid)
+__chp_add(struct subchannel_id schid, void *data)
 {
+	int i;
+	struct channel_path *chp;
 	struct subchannel *sch;
-	int irq, ret, rc;
+
+	chp = (struct channel_path *)data;
+	sch = get_subchannel_by_schid(schid);
+	if (!sch)
+		/* Check if the subchannel is now available. */
+		return __chp_add_new_sch(schid);
+	spin_lock(&sch->lock);
+	for (i=0; i<8; i++)
+		if (sch->schib.pmcw.chpid[i] == chp->id) {
+			if (stsch(sch->schid, &sch->schib) != 0) {
+				/* Endgame. */
+				spin_unlock(&sch->lock);
+				return -ENXIO;
+			}
+			break;
+		}
+	if (i==8) {
+		spin_unlock(&sch->lock);
+		return 0;
+	}
+	sch->lpm = ((sch->schib.pmcw.pim &
+		     sch->schib.pmcw.pam &
+		     sch->schib.pmcw.pom)
+		    | 0x80 >> i) & sch->opm;
+
+	if (sch->driver && sch->driver->verify)
+		sch->driver->verify(&sch->dev);
+
+	spin_unlock(&sch->lock);
+	put_device(&sch->dev);
+	return 0;
+}
+
+static int
+chp_add(int chpid)
+{
+	int rc;
 	char dbf_txt[15];
+	struct device *dev;
 
 	if (!get_chp_status(chpid))
 		return 0; /* no need to do the rest */
@@ -617,59 +693,13 @@ chp_add(int chpid)
 	sprintf(dbf_txt, "cadd%x", chpid);
 	CIO_TRACE_EVENT(2, dbf_txt);
 
-	rc = 0;
-	for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) {
-		int i;
-
-		sch = get_subchannel_by_schid(irq);
-		if (!sch) {
-			struct schib schib;
-
-			if (stsch(irq, &schib)) {
-				/* We're through */
-				if (need_rescan)
-					rc = -EAGAIN;
-				break;
-			}
-			if (need_rescan) {
-				rc = -EAGAIN;
-				continue;
-			}
-			/* Put it on the slow path. */
-			ret = css_enqueue_subchannel_slow(irq);
-			if (ret) {
-				css_clear_subchannel_slow_list();
-				need_rescan = 1;
-			}
-			rc = -EAGAIN;
-			continue;
-		}
-	
-		spin_lock(&sch->lock);
-		for (i=0; i<8; i++)
-			if (sch->schib.pmcw.chpid[i] == chpid) {
-				if (stsch(sch->irq, &sch->schib) != 0) {
-					/* Endgame. */
-					spin_unlock(&sch->lock);
-					return rc;
-				}
-				break;
-			}
-		if (i==8) {
-			spin_unlock(&sch->lock);
-			return rc;
-		}
-		sch->lpm = ((sch->schib.pmcw.pim &
-			     sch->schib.pmcw.pam &
-			     sch->schib.pmcw.pom)
-			    | 0x80 >> i) & sch->opm;
-
-		if (sch->driver && sch->driver->verify)
-			sch->driver->verify(&sch->dev);
-
-		spin_unlock(&sch->lock);
-		put_device(&sch->dev);
-	}
+	dev = get_device(&css[0]->chps[chpid]->dev);
+	rc = for_each_subchannel(__chp_add, to_channelpath(dev));
+	if (css_slow_subchannels_exist())
+		rc = -EAGAIN;
+	if (rc != -EAGAIN)
+		rc = 0;
+	put_device(dev);
 	return rc;
 }
 
@@ -702,7 +732,7 @@ __check_for_io_and_kill(struct subchannel *sch, int index)
 	if (!device_is_online(sch))
 		/* cio could be doing I/O. */
 		return 0;
-	cc = stsch(sch->irq, &sch->schib);
+	cc = stsch(sch->schid, &sch->schib);
 	if (cc)
 		return 0;
 	if (sch->schib.scsw.actl && sch->schib.pmcw.lpum == (0x80 >> index)) {
@@ -743,7 +773,7 @@ __s390_subchannel_vary_chpid(struct subchannel *sch, __u8 chpid, int on)
 			 * just varied off path. Then kill it.
 			 */
 			if (!__check_for_io_and_kill(sch, chp) && !sch->lpm) {
-				if (css_enqueue_subchannel_slow(sch->irq)) {
+				if (css_enqueue_subchannel_slow(sch->schid)) {
 					css_clear_subchannel_slow_list();
 					need_rescan = 1;
 				}
@@ -781,6 +811,29 @@ s390_subchannel_vary_chpid_on(struct device *dev, void *data)
 	return 0;
 }
 
+static int
+__s390_vary_chpid_on(struct subchannel_id schid, void *data)
+{
+	struct schib schib;
+	struct subchannel *sch;
+
+	sch = get_subchannel_by_schid(schid);
+	if (sch) {
+		put_device(&sch->dev);
+		return 0;
+	}
+	if (stsch_err(schid, &schib))
+		/* We're through */
+		return -ENXIO;
+	/* Put it on the slow path. */
+	if (css_enqueue_subchannel_slow(schid)) {
+		css_clear_subchannel_slow_list();
+		need_rescan = 1;
+		return -EAGAIN;
+	}
+	return 0;
+}
+
 /*
  * Function: s390_vary_chpid
  * Varies the specified chpid online or offline
@@ -789,8 +842,7 @@ static int
 s390_vary_chpid( __u8 chpid, int on)
 {
 	char dbf_text[15];
-	int status, irq, ret;
-	struct subchannel *sch;
+	int status;
 
 	sprintf(dbf_text, on?"varyon%x":"varyoff%x", chpid);
 	CIO_TRACE_EVENT( 2, dbf_text);
@@ -815,30 +867,9 @@ s390_vary_chpid( __u8 chpid, int on)
 	bus_for_each_dev(&css_bus_type, NULL, &chpid, on ?
 			 s390_subchannel_vary_chpid_on :
 			 s390_subchannel_vary_chpid_off);
-	if (!on)
-		goto out;
-	/* Scan for new devices on varied on path. */
-	for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) {
-		struct schib schib;
-
-		if (need_rescan)
-			break;
-		sch = get_subchannel_by_schid(irq);
-		if (sch) {
-			put_device(&sch->dev);
-			continue;
-		}
-		if (stsch(irq, &schib))
-			/* We're through */
-			break;
-		/* Put it on the slow path. */
-		ret = css_enqueue_subchannel_slow(irq);
-		if (ret) {
-			css_clear_subchannel_slow_list();
-			need_rescan = 1;
-		}
-	}
-out:
+	if (on)
+		/* Scan for new devices on varied on path. */
+		for_each_subchannel(__s390_vary_chpid_on, NULL);
 	if (need_rescan || css_slow_subchannels_exist())
 		queue_work(slow_path_wq, &slow_path_work);
 	return 0;
@@ -995,7 +1026,7 @@ new_channel_path(int chpid)
 	chp->id = chpid;
 	chp->state = 1;
 	chp->dev = (struct device) {
-		.parent  = &css_bus_device,
+		.parent  = &css[0]->device,
 		.release = chp_release,
 	};
 	snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp0.%x", chpid);
@@ -1017,7 +1048,7 @@ new_channel_path(int chpid)
 		device_unregister(&chp->dev);
 		goto out_free;
 	} else
-		chps[chpid] = chp;
+		css[0]->chps[chpid] = chp;
 	return ret;
 out_free:
 	kfree(chp);
@@ -1030,7 +1061,7 @@ chsc_get_chp_desc(struct subchannel *sch, int chp_no)
 	struct channel_path *chp;
 	struct channel_path_desc *desc;
 
-	chp = chps[sch->schib.pmcw.chpid[chp_no]];
+	chp = css[0]->chps[sch->schib.pmcw.chpid[chp_no]];
 	if (!chp)
 		return NULL;
 	desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL);
@@ -1051,6 +1082,54 @@ chsc_alloc_sei_area(void)
 	return (sei_page ? 0 : -ENOMEM);
 }
 
+int __init
+chsc_enable_facility(int operation_code)
+{
+	int ret;
+	struct {
+		struct chsc_header request;
+		u8 reserved1:4;
+		u8 format:4;
+		u8 reserved2;
+		u16 operation_code;
+		u32 reserved3;
+		u32 reserved4;
+		u32 operation_data_area[252];
+		struct chsc_header response;
+		u32 reserved5:4;
+		u32 format2:4;
+		u32 reserved6:24;
+	} *sda_area;
+
+	sda_area = (void *)get_zeroed_page(GFP_KERNEL|GFP_DMA);
+	if (!sda_area)
+		return -ENOMEM;
+	sda_area->request = (struct chsc_header) {
+		.length = 0x0400,
+		.code = 0x0031,
+	};
+	sda_area->operation_code = operation_code;
+
+	ret = chsc(sda_area);
+	if (ret > 0) {
+		ret = (ret == 3) ? -ENODEV : -EBUSY;
+		goto out;
+	}
+	switch (sda_area->response.code) {
+	case 0x0003: /* invalid request block */
+	case 0x0007:
+		ret = -EINVAL;
+		break;
+	case 0x0004: /* command not provided */
+	case 0x0101: /* facility not provided */
+		ret = -EOPNOTSUPP;
+		break;
+	}
+ out:
+	free_page((unsigned long)sda_area);
+	return ret;
+}
+
 subsys_initcall(chsc_alloc_sei_area);
 
 struct css_general_char css_general_characteristics;
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index be20da49d14..44e4b4bb1c5 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -1,12 +1,12 @@
 #ifndef S390_CHSC_H
 #define S390_CHSC_H
 
-#define NR_CHPIDS 256
-
 #define CHSC_SEI_ACC_CHPID        1
 #define CHSC_SEI_ACC_LINKADDR     2
 #define CHSC_SEI_ACC_FULLLINKADDR 3
 
+#define CHSC_SDA_OC_MSS   0x2
+
 struct chsc_header {
 	u16 length;
 	u16 code;
@@ -43,7 +43,9 @@ struct css_general_char {
 	u32 ext_mb : 1;  /* bit 48 */
 	u32 : 7;
 	u32 aif_tdd : 1; /* bit 56 */
-	u32 : 10;
+	u32 : 1;
+	u32 qebsm : 1;   /* bit 58 */
+	u32 : 8;
 	u32 aif_osa : 1; /* bit 67 */
 	u32 : 28;
 }__attribute__((packed));
@@ -63,4 +65,9 @@ extern int chsc_determine_css_characteristics(void);
 extern int css_characteristics_avail;
 
 extern void *chsc_get_chp_desc(struct subchannel*, int);
+
+extern int chsc_enable_facility(int);
+
+#define to_channelpath(dev) container_of(dev, struct channel_path, dev)
+
 #endif
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 185bc73c3ec..7376bc87206 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -1,7 +1,7 @@
 /*
  *  drivers/s390/cio/cio.c
  *   S/390 common I/O routines -- low level i/o calls
- *   $Revision: 1.135 $
+ *   $Revision: 1.138 $
  *
  *    Copyright (C) 1999-2002 IBM Deutschland Entwicklung GmbH,
  *			      IBM Corporation
@@ -135,7 +135,7 @@ cio_tpi(void)
 		return 0;
 	irb = (struct irb *) __LC_IRB;
 	/* Store interrupt response block to lowcore. */
-	if (tsch (tpi_info->irq, irb) != 0)
+	if (tsch (tpi_info->schid, irb) != 0)
 		/* Not status pending or not operational. */
 		return 1;
 	sch = (struct subchannel *)(unsigned long)tpi_info->intparm;
@@ -163,10 +163,11 @@ cio_start_handle_notoper(struct subchannel *sch, __u8 lpm)
 	else
 		sch->lpm = 0;
 
-	stsch (sch->irq, &sch->schib);
+	stsch (sch->schid, &sch->schib);
 
 	CIO_MSG_EVENT(0, "cio_start: 'not oper' status for "
-		      "subchannel %04x!\n", sch->irq);
+		      "subchannel 0.%x.%04x!\n", sch->schid.ssid,
+		      sch->schid.sch_no);
 	sprintf(dbf_text, "no%s", sch->dev.bus_id);
 	CIO_TRACE_EVENT(0, dbf_text);
 	CIO_HEX_EVENT(0, &sch->schib, sizeof (struct schib));
@@ -194,7 +195,7 @@ cio_start_key (struct subchannel *sch,	/* subchannel structure */
 	sch->orb.spnd = sch->options.suspend;
 	sch->orb.ssic = sch->options.suspend && sch->options.inter;
 	sch->orb.lpm = (lpm != 0) ? (lpm & sch->opm) : sch->lpm;
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	/*
 	 * for 64 bit we always support 64 bit IDAWs with 4k page size only
 	 */
@@ -204,7 +205,7 @@ cio_start_key (struct subchannel *sch,	/* subchannel structure */
 	sch->orb.key = key >> 4;
 	/* issue "Start Subchannel" */
 	sch->orb.cpa = (__u32) __pa (cpa);
-	ccode = ssch (sch->irq, &sch->orb);
+	ccode = ssch (sch->schid, &sch->orb);
 
 	/* process condition code */
 	sprintf (dbf_txt, "ccode:%d", ccode);
@@ -243,7 +244,7 @@ cio_resume (struct subchannel *sch)
 	CIO_TRACE_EVENT (4, "resIO");
 	CIO_TRACE_EVENT (4, sch->dev.bus_id);
 
-	ccode = rsch (sch->irq);
+	ccode = rsch (sch->schid);
 
 	sprintf (dbf_txt, "ccode:%d", ccode);
 	CIO_TRACE_EVENT (4, dbf_txt);
@@ -283,7 +284,7 @@ cio_halt(struct subchannel *sch)
 	/*
 	 * Issue "Halt subchannel" and process condition code
 	 */
-	ccode = hsch (sch->irq);
+	ccode = hsch (sch->schid);
 
 	sprintf (dbf_txt, "ccode:%d", ccode);
 	CIO_TRACE_EVENT (2, dbf_txt);
@@ -318,7 +319,7 @@ cio_clear(struct subchannel *sch)
 	/*
 	 * Issue "Clear subchannel" and process condition code
 	 */
-	ccode = csch (sch->irq);
+	ccode = csch (sch->schid);
 
 	sprintf (dbf_txt, "ccode:%d", ccode);
 	CIO_TRACE_EVENT (2, dbf_txt);
@@ -351,7 +352,7 @@ cio_cancel (struct subchannel *sch)
 	CIO_TRACE_EVENT (2, "cancelIO");
 	CIO_TRACE_EVENT (2, sch->dev.bus_id);
 
-	ccode = xsch (sch->irq);
+	ccode = xsch (sch->schid);
 
 	sprintf (dbf_txt, "ccode:%d", ccode);
 	CIO_TRACE_EVENT (2, dbf_txt);
@@ -359,7 +360,7 @@ cio_cancel (struct subchannel *sch)
 	switch (ccode) {
 	case 0:		/* success */
 		/* Update information in scsw. */
-		stsch (sch->irq, &sch->schib);
+		stsch (sch->schid, &sch->schib);
 		return 0;
 	case 1:		/* status pending */
 		return -EBUSY;
@@ -381,7 +382,7 @@ cio_modify (struct subchannel *sch)
 
 	ret = 0;
 	for (retry = 0; retry < 5; retry++) {
-		ccode = msch_err (sch->irq, &sch->schib);
+		ccode = msch_err (sch->schid, &sch->schib);
 		if (ccode < 0)	/* -EIO if msch gets a program check. */
 			return ccode;
 		switch (ccode) {
@@ -414,7 +415,7 @@ cio_enable_subchannel (struct subchannel *sch, unsigned int isc)
 	CIO_TRACE_EVENT (2, "ensch");
 	CIO_TRACE_EVENT (2, sch->dev.bus_id);
 
-	ccode = stsch (sch->irq, &sch->schib);
+	ccode = stsch (sch->schid, &sch->schib);
 	if (ccode)
 		return -ENODEV;
 
@@ -432,13 +433,13 @@ cio_enable_subchannel (struct subchannel *sch, unsigned int isc)
 			 */
 			sch->schib.pmcw.csense = 0;
 		if (ret == 0) {
-			stsch (sch->irq, &sch->schib);
+			stsch (sch->schid, &sch->schib);
 			if (sch->schib.pmcw.ena)
 				break;
 		}
 		if (ret == -EBUSY) {
 			struct irb irb;
-			if (tsch(sch->irq, &irb) != 0)
+			if (tsch(sch->schid, &irb) != 0)
 				break;
 		}
 	}
@@ -461,7 +462,7 @@ cio_disable_subchannel (struct subchannel *sch)
 	CIO_TRACE_EVENT (2, "dissch");
 	CIO_TRACE_EVENT (2, sch->dev.bus_id);
 
-	ccode = stsch (sch->irq, &sch->schib);
+	ccode = stsch (sch->schid, &sch->schib);
 	if (ccode == 3)		/* Not operational. */
 		return -ENODEV;
 
@@ -485,7 +486,7 @@ cio_disable_subchannel (struct subchannel *sch)
 			 */
 			break;
 		if (ret == 0) {
-			stsch (sch->irq, &sch->schib);
+			stsch (sch->schid, &sch->schib);
 			if (!sch->schib.pmcw.ena)
 				break;
 		}
@@ -508,12 +509,12 @@ cio_disable_subchannel (struct subchannel *sch)
  *   -ENODEV for subchannels with invalid device number or blacklisted devices
  */
 int
-cio_validate_subchannel (struct subchannel *sch, unsigned int irq)
+cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid)
 {
 	char dbf_txt[15];
 	int ccode;
 
-	sprintf (dbf_txt, "valsch%x", irq);
+	sprintf (dbf_txt, "valsch%x", schid.sch_no);
 	CIO_TRACE_EVENT (4, dbf_txt);
 
 	/* Nuke all fields. */
@@ -522,17 +523,20 @@ cio_validate_subchannel (struct subchannel *sch, unsigned int irq)
 	spin_lock_init(&sch->lock);
 
 	/* Set a name for the subchannel */
-	snprintf (sch->dev.bus_id, BUS_ID_SIZE, "0.0.%04x", irq);
+	snprintf (sch->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x", schid.ssid,
+		  schid.sch_no);
 
 	/*
 	 * The first subchannel that is not-operational (ccode==3)
 	 *  indicates that there aren't any more devices available.
+	 * If stsch gets an exception, it means the current subchannel set
+	 *  is not valid.
 	 */
-	sch->irq = irq;
-	ccode = stsch (irq, &sch->schib);
+	ccode = stsch_err (schid, &sch->schib);
 	if (ccode)
-		return -ENXIO;
+		return (ccode == 3) ? -ENXIO : ccode;
 
+	sch->schid = schid;
 	/* Copy subchannel type from path management control word. */
 	sch->st = sch->schib.pmcw.st;
 
@@ -541,9 +545,9 @@ cio_validate_subchannel (struct subchannel *sch, unsigned int irq)
 	 */
 	if (sch->st != 0) {
 		CIO_DEBUG(KERN_INFO, 0,
-			  "Subchannel %04X reports "
+			  "Subchannel 0.%x.%04x reports "
 			  "non-I/O subchannel type %04X\n",
-			  sch->irq, sch->st);
+			  sch->schid.ssid, sch->schid.sch_no, sch->st);
 		/* We stop here for non-io subchannels. */
 		return sch->st;
 	}
@@ -554,26 +558,29 @@ cio_validate_subchannel (struct subchannel *sch, unsigned int irq)
 		return -ENODEV;
 
 	/* Devno is valid. */
-	if (is_blacklisted (sch->schib.pmcw.dev)) {
+	if (is_blacklisted (sch->schid.ssid, sch->schib.pmcw.dev)) {
 		/*
 		 * This device must not be known to Linux. So we simply
 		 * say that there is no device and return ENODEV.
 		 */
 		CIO_MSG_EVENT(0, "Blacklisted device detected "
-			      "at devno %04X\n", sch->schib.pmcw.dev);
+			      "at devno %04X, subchannel set %x\n",
+			      sch->schib.pmcw.dev, sch->schid.ssid);
 		return -ENODEV;
 	}
 	sch->opm = 0xff;
-	chsc_validate_chpids(sch);
+	if (!cio_is_console(sch->schid))
+		chsc_validate_chpids(sch);
 	sch->lpm = sch->schib.pmcw.pim &
 		sch->schib.pmcw.pam &
 		sch->schib.pmcw.pom &
 		sch->opm;
 
 	CIO_DEBUG(KERN_INFO, 0,
-		  "Detected device %04X on subchannel %04X"
+		  "Detected device %04x on subchannel 0.%x.%04X"
 		  " - PIM = %02X, PAM = %02X, POM = %02X\n",
-		  sch->schib.pmcw.dev, sch->irq, sch->schib.pmcw.pim,
+		  sch->schib.pmcw.dev, sch->schid.ssid,
+		  sch->schid.sch_no, sch->schib.pmcw.pim,
 		  sch->schib.pmcw.pam, sch->schib.pmcw.pom);
 
 	/*
@@ -632,7 +639,7 @@ do_IRQ (struct pt_regs *regs)
 		if (sch)
 			spin_lock(&sch->lock);
 		/* Store interrupt response block to lowcore. */
-		if (tsch (tpi_info->irq, irb) == 0 && sch) {
+		if (tsch (tpi_info->schid, irb) == 0 && sch) {
 			/* Keep subchannel information word up to date. */
 			memcpy (&sch->schib.scsw, &irb->scsw,
 				sizeof (irb->scsw));
@@ -691,28 +698,36 @@ wait_cons_dev (void)
 }
 
 static int
-cio_console_irq(void)
+cio_test_for_console(struct subchannel_id schid, void *data)
 {
-	int irq;
+	if (stsch_err(schid, &console_subchannel.schib) != 0)
+		return -ENXIO;
+	if (console_subchannel.schib.pmcw.dnv &&
+	    console_subchannel.schib.pmcw.dev ==
+	    console_devno) {
+		console_irq = schid.sch_no;
+		return 1; /* found */
+	}
+	return 0;
+}
+
+
+static int
+cio_get_console_sch_no(void)
+{
+	struct subchannel_id schid;
 	
+	init_subchannel_id(&schid);
 	if (console_irq != -1) {
 		/* VM provided us with the irq number of the console. */
-		if (stsch(console_irq, &console_subchannel.schib) != 0 ||
+		schid.sch_no = console_irq;
+		if (stsch(schid, &console_subchannel.schib) != 0 ||
 		    !console_subchannel.schib.pmcw.dnv)
 			return -1;
 		console_devno = console_subchannel.schib.pmcw.dev;
 	} else if (console_devno != -1) {
 		/* At least the console device number is known. */
-		for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) {
-			if (stsch(irq, &console_subchannel.schib) != 0)
-				break;
-			if (console_subchannel.schib.pmcw.dnv &&
-			    console_subchannel.schib.pmcw.dev ==
-			    console_devno) {
-				console_irq = irq;
-				break;
-			}
-		}
+		for_each_subchannel(cio_test_for_console, NULL);
 		if (console_irq == -1)
 			return -1;
 	} else {
@@ -728,17 +743,20 @@ cio_console_irq(void)
 struct subchannel *
 cio_probe_console(void)
 {
-	int irq, ret;
+	int sch_no, ret;
+	struct subchannel_id schid;
 
 	if (xchg(&console_subchannel_in_use, 1) != 0)
 		return ERR_PTR(-EBUSY);
-	irq = cio_console_irq();
-	if (irq == -1) {
+	sch_no = cio_get_console_sch_no();
+	if (sch_no == -1) {
 		console_subchannel_in_use = 0;
 		return ERR_PTR(-ENODEV);
 	}
 	memset(&console_subchannel, 0, sizeof(struct subchannel));
-	ret = cio_validate_subchannel(&console_subchannel, irq);
+	init_subchannel_id(&schid);
+	schid.sch_no = sch_no;
+	ret = cio_validate_subchannel(&console_subchannel, schid);
 	if (ret) {
 		console_subchannel_in_use = 0;
 		return ERR_PTR(-ENODEV);
@@ -770,11 +788,11 @@ cio_release_console(void)
 
 /* Bah... hack to catch console special sausages. */
 int
-cio_is_console(int irq)
+cio_is_console(struct subchannel_id schid)
 {
 	if (!console_subchannel_in_use)
 		return 0;
-	return (irq == console_subchannel.irq);
+	return schid_equal(&schid, &console_subchannel.schid);
 }
 
 struct subchannel *
@@ -787,7 +805,7 @@ cio_get_console_subchannel(void)
 
 #endif
 static inline int
-__disable_subchannel_easy(unsigned int schid, struct schib *schib)
+__disable_subchannel_easy(struct subchannel_id schid, struct schib *schib)
 {
 	int retry, cc;
 
@@ -805,7 +823,7 @@ __disable_subchannel_easy(unsigned int schid, struct schib *schib)
 }
 
 static inline int
-__clear_subchannel_easy(unsigned int schid)
+__clear_subchannel_easy(struct subchannel_id schid)
 {
 	int retry;
 
@@ -815,8 +833,8 @@ __clear_subchannel_easy(unsigned int schid)
 		struct tpi_info ti;
 
 		if (tpi(&ti)) {
-			tsch(ti.irq, (struct irb *)__LC_IRB);
-			if (ti.irq == schid)
+			tsch(ti.schid, (struct irb *)__LC_IRB);
+			if (schid_equal(&ti.schid, &schid))
 				return 0;
 		}
 		udelay(100);
@@ -825,31 +843,33 @@ __clear_subchannel_easy(unsigned int schid)
 }
 
 extern void do_reipl(unsigned long devno);
+static int
+__shutdown_subchannel_easy(struct subchannel_id schid, void *data)
+{
+	struct schib schib;
+
+	if (stsch_err(schid, &schib))
+		return -ENXIO;
+	if (!schib.pmcw.ena)
+		return 0;
+	switch(__disable_subchannel_easy(schid, &schib)) {
+	case 0:
+	case -ENODEV:
+		break;
+	default: /* -EBUSY */
+		if (__clear_subchannel_easy(schid))
+			break; /* give up... */
+		stsch(schid, &schib);
+		__disable_subchannel_easy(schid, &schib);
+	}
+	return 0;
+}
 
-/* Clear all subchannels. */
 void
 clear_all_subchannels(void)
 {
-	unsigned int schid;
-
 	local_irq_disable();
-	for (schid=0;schid<=highest_subchannel;schid++) {
-		struct schib schib;
-		if (stsch(schid, &schib))
-			break; /* break out of the loop */
-		if (!schib.pmcw.ena)
-			continue;
-		switch(__disable_subchannel_easy(schid, &schib)) {
-		case 0:
-		case -ENODEV:
-			break;
-		default: /* -EBUSY */
-			if (__clear_subchannel_easy(schid))
-				break; /* give up... jump out of switch */
-			stsch(schid, &schib);
-			__disable_subchannel_easy(schid, &schib);
-		}
-	}
+	for_each_subchannel(__shutdown_subchannel_easy, NULL);
 }
 
 /* Make sure all subchannels are quiet before we re-ipl an lpar. */
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index c50a9da420a..0ca987344e0 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -1,6 +1,8 @@
 #ifndef S390_CIO_H
 #define S390_CIO_H
 
+#include "schid.h"
+
 /*
  * where we put the ssd info
  */
@@ -83,7 +85,7 @@ struct orb {
 
 /* subchannel data structure used by I/O subroutines */
 struct subchannel {
-	unsigned int irq;	/* aka. subchannel number */
+	struct subchannel_id schid;
 	spinlock_t lock;	/* subchannel lock */
 
 	enum {
@@ -114,7 +116,7 @@ struct subchannel {
 
 #define to_subchannel(n) container_of(n, struct subchannel, dev)
 
-extern int cio_validate_subchannel (struct subchannel *, unsigned int);
+extern int cio_validate_subchannel (struct subchannel *, struct subchannel_id);
 extern int cio_enable_subchannel (struct subchannel *, unsigned int);
 extern int cio_disable_subchannel (struct subchannel *);
 extern int cio_cancel (struct subchannel *);
@@ -127,14 +129,15 @@ extern int cio_cancel (struct subchannel *);
 extern int cio_set_options (struct subchannel *, int);
 extern int cio_get_options (struct subchannel *);
 extern int cio_modify (struct subchannel *);
+
 /* Use with care. */
 #ifdef CONFIG_CCW_CONSOLE
 extern struct subchannel *cio_probe_console(void);
 extern void cio_release_console(void);
-extern int cio_is_console(int irq);
+extern int cio_is_console(struct subchannel_id);
 extern struct subchannel *cio_get_console_subchannel(void);
 #else
-#define cio_is_console(irq) 0
+#define cio_is_console(schid) 0
 #define cio_get_console_subchannel() NULL
 #endif
 
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index b978f7fe832..0b03714e696 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/s390/cio/cmf.c ($Revision: 1.16 $)
+ * linux/drivers/s390/cio/cmf.c ($Revision: 1.19 $)
  *
  * Linux on zSeries Channel Measurement Facility support
  *
@@ -178,7 +178,7 @@ set_schib(struct ccw_device *cdev, u32 mme, int mbfc, unsigned long address)
 	/* msch can silently fail, so do it again if necessary */
 	for (retry = 0; retry < 3; retry++) {
 		/* prepare schib */
-		stsch(sch->irq, schib);
+		stsch(sch->schid, schib);
 		schib->pmcw.mme  = mme;
 		schib->pmcw.mbfc = mbfc;
 		/* address can be either a block address or a block index */
@@ -188,7 +188,7 @@ set_schib(struct ccw_device *cdev, u32 mme, int mbfc, unsigned long address)
 			schib->pmcw.mbi = address;
 
 		/* try to submit it */
-		switch(ret = msch_err(sch->irq, schib)) {
+		switch(ret = msch_err(sch->schid, schib)) {
 			case 0:
 				break;
 			case 1:
@@ -202,7 +202,7 @@ set_schib(struct ccw_device *cdev, u32 mme, int mbfc, unsigned long address)
 				ret = -EINVAL;
 				break;
 		}
-		stsch(sch->irq, schib); /* restore the schib */
+		stsch(sch->schid, schib); /* restore the schib */
 
 		if (ret)
 			break;
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 555119cacc2..e565193650c 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -1,7 +1,7 @@
 /*
  *  drivers/s390/cio/css.c
  *  driver for channel subsystem
- *   $Revision: 1.85 $
+ *   $Revision: 1.93 $
  *
  *    Copyright (C) 2002 IBM Deutschland Entwicklung GmbH,
  *			 IBM Corporation
@@ -21,19 +21,35 @@
 #include "ioasm.h"
 #include "chsc.h"
 
-unsigned int highest_subchannel;
 int need_rescan = 0;
 int css_init_done = 0;
+static int max_ssid = 0;
+
+struct channel_subsystem *css[__MAX_CSSID + 1];
 
-struct pgid global_pgid;
 int css_characteristics_avail = 0;
 
-struct device css_bus_device = {
-	.bus_id = "css0",
-};
+inline int
+for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *data)
+{
+	struct subchannel_id schid;
+	int ret;
+
+	init_subchannel_id(&schid);
+	ret = -ENODEV;
+	do {
+		do {
+			ret = fn(schid, data);
+			if (ret)
+				break;
+		} while (schid.sch_no++ < __MAX_SUBCHANNEL);
+		schid.sch_no = 0;
+	} while (schid.ssid++ < max_ssid);
+	return ret;
+}
 
 static struct subchannel *
-css_alloc_subchannel(int irq)
+css_alloc_subchannel(struct subchannel_id schid)
 {
 	struct subchannel *sch;
 	int ret;
@@ -41,13 +57,11 @@ css_alloc_subchannel(int irq)
 	sch = kmalloc (sizeof (*sch), GFP_KERNEL | GFP_DMA);
 	if (sch == NULL)
 		return ERR_PTR(-ENOMEM);
-	ret = cio_validate_subchannel (sch, irq);
+	ret = cio_validate_subchannel (sch, schid);
 	if (ret < 0) {
 		kfree(sch);
 		return ERR_PTR(ret);
 	}
-	if (irq > highest_subchannel)
-		highest_subchannel = irq;
 
 	if (sch->st != SUBCHANNEL_TYPE_IO) {
 		/* For now we ignore all non-io subchannels. */
@@ -87,7 +101,7 @@ css_subchannel_release(struct device *dev)
 	struct subchannel *sch;
 
 	sch = to_subchannel(dev);
-	if (!cio_is_console(sch->irq))
+	if (!cio_is_console(sch->schid))
 		kfree(sch);
 }
 
@@ -99,7 +113,7 @@ css_register_subchannel(struct subchannel *sch)
 	int ret;
 
 	/* Initialize the subchannel structure */
-	sch->dev.parent = &css_bus_device;
+	sch->dev.parent = &css[0]->device;
 	sch->dev.bus = &css_bus_type;
 	sch->dev.release = &css_subchannel_release;
 	
@@ -114,12 +128,12 @@ css_register_subchannel(struct subchannel *sch)
 }
 
 int
-css_probe_device(int irq)
+css_probe_device(struct subchannel_id schid)
 {
 	int ret;
 	struct subchannel *sch;
 
-	sch = css_alloc_subchannel(irq);
+	sch = css_alloc_subchannel(schid);
 	if (IS_ERR(sch))
 		return PTR_ERR(sch);
 	ret = css_register_subchannel(sch);
@@ -132,26 +146,26 @@ static int
 check_subchannel(struct device * dev, void * data)
 {
 	struct subchannel *sch;
-	int irq = (unsigned long)data;
+	struct subchannel_id *schid = data;
 
 	sch = to_subchannel(dev);
-	return (sch->irq == irq);
+	return schid_equal(&sch->schid, schid);
 }
 
 struct subchannel *
-get_subchannel_by_schid(int irq)
+get_subchannel_by_schid(struct subchannel_id schid)
 {
 	struct device *dev;
 
 	dev = bus_find_device(&css_bus_type, NULL,
-			      (void *)(unsigned long)irq, check_subchannel);
+			      (void *)&schid, check_subchannel);
 
 	return dev ? to_subchannel(dev) : NULL;
 }
 
 
 static inline int
-css_get_subchannel_status(struct subchannel *sch, int schid)
+css_get_subchannel_status(struct subchannel *sch, struct subchannel_id schid)
 {
 	struct schib schib;
 	int cc;
@@ -170,13 +184,13 @@ css_get_subchannel_status(struct subchannel *sch, int schid)
 }
 	
 static int
-css_evaluate_subchannel(int irq, int slow)
+css_evaluate_subchannel(struct subchannel_id schid, int slow)
 {
 	int event, ret, disc;
 	struct subchannel *sch;
 	unsigned long flags;
 
-	sch = get_subchannel_by_schid(irq);
+	sch = get_subchannel_by_schid(schid);
 	disc = sch ? device_is_disconnected(sch) : 0;
 	if (disc && slow) {
 		if (sch)
@@ -194,9 +208,10 @@ css_evaluate_subchannel(int irq, int slow)
 			put_device(&sch->dev);
 		return -EAGAIN; /* Will be done on the slow path. */
 	}
-	event = css_get_subchannel_status(sch, irq);
-	CIO_MSG_EVENT(4, "Evaluating schid %04x, event %d, %s, %s path.\n",
-		      irq, event, sch?(disc?"disconnected":"normal"):"unknown",
+	event = css_get_subchannel_status(sch, schid);
+	CIO_MSG_EVENT(4, "Evaluating schid 0.%x.%04x, event %d, %s, %s path.\n",
+		      schid.ssid, schid.sch_no, event,
+		      sch?(disc?"disconnected":"normal"):"unknown",
 		      slow?"slow":"fast");
 	switch (event) {
 	case CIO_NO_PATH:
@@ -253,7 +268,7 @@ css_evaluate_subchannel(int irq, int slow)
 			sch->schib.pmcw.intparm = 0;
 			cio_modify(sch);
 			put_device(&sch->dev);
-			ret = css_probe_device(irq);
+			ret = css_probe_device(schid);
 		} else {
 			/*
 			 * We can't immediately deregister the disconnected
@@ -272,7 +287,7 @@ css_evaluate_subchannel(int irq, int slow)
 			device_trigger_reprobe(sch);
 			spin_unlock_irqrestore(&sch->lock, flags);
 		}
-		ret = sch ? 0 : css_probe_device(irq);
+		ret = sch ? 0 : css_probe_device(schid);
 		break;
 	default:
 		BUG();
@@ -281,28 +296,15 @@ css_evaluate_subchannel(int irq, int slow)
 	return ret;
 }
 
-static void
-css_rescan_devices(void)
+static int
+css_rescan_devices(struct subchannel_id schid, void *data)
 {
-	int irq, ret;
-
-	for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) {
-		ret = css_evaluate_subchannel(irq, 1);
-		/* No more memory. It doesn't make sense to continue. No
-		 * panic because this can happen in midflight and just
-		 * because we can't use a new device is no reason to crash
-		 * the system. */
-		if (ret == -ENOMEM)
-			break;
-		/* -ENXIO indicates that there are no more subchannels. */
-		if (ret == -ENXIO)
-			break;
-	}
+	return css_evaluate_subchannel(schid, 1);
 }
 
 struct slow_subchannel {
 	struct list_head slow_list;
-	unsigned long schid;
+	struct subchannel_id schid;
 };
 
 static LIST_HEAD(slow_subchannels_head);
@@ -315,7 +317,7 @@ css_trigger_slow_path(void)
 
 	if (need_rescan) {
 		need_rescan = 0;
-		css_rescan_devices();
+		for_each_subchannel(css_rescan_devices, NULL);
 		return;
 	}
 
@@ -354,23 +356,31 @@ css_reiterate_subchannels(void)
  * Called from the machine check handler for subchannel report words.
  */
 int
-css_process_crw(int irq)
+css_process_crw(int rsid1, int rsid2)
 {
 	int ret;
+	struct subchannel_id mchk_schid;
 
-	CIO_CRW_EVENT(2, "source is subchannel %04X\n", irq);
+	CIO_CRW_EVENT(2, "source is subchannel %04X, subsystem id %x\n",
+		      rsid1, rsid2);
 
 	if (need_rescan)
 		/* We need to iterate all subchannels anyway. */
 		return -EAGAIN;
+
+	init_subchannel_id(&mchk_schid);
+	mchk_schid.sch_no = rsid1;
+	if (rsid2 != 0)
+		mchk_schid.ssid = (rsid2 >> 8) & 3;
+
 	/* 
 	 * Since we are always presented with IPI in the CRW, we have to
 	 * use stsch() to find out if the subchannel in question has come
 	 * or gone.
 	 */
-	ret = css_evaluate_subchannel(irq, 0);
+	ret = css_evaluate_subchannel(mchk_schid, 0);
 	if (ret == -EAGAIN) {
-		if (css_enqueue_subchannel_slow(irq)) {
+		if (css_enqueue_subchannel_slow(mchk_schid)) {
 			css_clear_subchannel_slow_list();
 			need_rescan = 1;
 		}
@@ -378,22 +388,83 @@ css_process_crw(int irq)
 	return ret;
 }
 
-static void __init
-css_generate_pgid(void)
+static int __init
+__init_channel_subsystem(struct subchannel_id schid, void *data)
 {
-	/* Let's build our path group ID here. */
-	if (css_characteristics_avail && css_general_characteristics.mcss)
-		global_pgid.cpu_addr = 0x8000;
+	struct subchannel *sch;
+	int ret;
+
+	if (cio_is_console(schid))
+		sch = cio_get_console_subchannel();
 	else {
+		sch = css_alloc_subchannel(schid);
+		if (IS_ERR(sch))
+			ret = PTR_ERR(sch);
+		else
+			ret = 0;
+		switch (ret) {
+		case 0:
+			break;
+		case -ENOMEM:
+			panic("Out of memory in init_channel_subsystem\n");
+		/* -ENXIO: no more subchannels. */
+		case -ENXIO:
+			return ret;
+		default:
+			return 0;
+		}
+	}
+	/*
+	 * We register ALL valid subchannels in ioinfo, even those
+	 * that have been present before init_channel_subsystem.
+	 * These subchannels can't have been registered yet (kmalloc
+	 * not working) so we do it now. This is true e.g. for the
+	 * console subchannel.
+	 */
+	css_register_subchannel(sch);
+	return 0;
+}
+
+static void __init
+css_generate_pgid(struct channel_subsystem *css, u32 tod_high)
+{
+	if (css_characteristics_avail && css_general_characteristics.mcss) {
+		css->global_pgid.pgid_high.ext_cssid.version = 0x80;
+		css->global_pgid.pgid_high.ext_cssid.cssid = css->cssid;
+	} else {
 #ifdef CONFIG_SMP
-		global_pgid.cpu_addr = hard_smp_processor_id();
+		css->global_pgid.pgid_high.cpu_addr = hard_smp_processor_id();
 #else
-		global_pgid.cpu_addr = 0;
+		css->global_pgid.pgid_high.cpu_addr = 0;
 #endif
 	}
-	global_pgid.cpu_id = ((cpuid_t *) __LC_CPUID)->ident;
-	global_pgid.cpu_model = ((cpuid_t *) __LC_CPUID)->machine;
-	global_pgid.tod_high = (__u32) (get_clock() >> 32);
+	css->global_pgid.cpu_id = ((cpuid_t *) __LC_CPUID)->ident;
+	css->global_pgid.cpu_model = ((cpuid_t *) __LC_CPUID)->machine;
+	css->global_pgid.tod_high = tod_high;
+
+}
+
+static void
+channel_subsystem_release(struct device *dev)
+{
+	struct channel_subsystem *css;
+
+	css = to_css(dev);
+	kfree(css);
+}
+
+static inline void __init
+setup_css(int nr)
+{
+	u32 tod_high;
+
+	memset(css[nr], 0, sizeof(struct channel_subsystem));
+	css[nr]->valid = 1;
+	css[nr]->cssid = nr;
+	sprintf(css[nr]->device.bus_id, "css%x", nr);
+	css[nr]->device.release = channel_subsystem_release;
+	tod_high = (u32) (get_clock() >> 32);
+	css_generate_pgid(css[nr], tod_high);
 }
 
 /*
@@ -404,53 +475,50 @@ css_generate_pgid(void)
 static int __init
 init_channel_subsystem (void)
 {
-	int ret, irq;
+	int ret, i;
 
 	if (chsc_determine_css_characteristics() == 0)
 		css_characteristics_avail = 1;
 
-	css_generate_pgid();
-
 	if ((ret = bus_register(&css_bus_type)))
 		goto out;
-	if ((ret = device_register (&css_bus_device)))
-		goto out_bus;
 
+	/* Try to enable MSS. */
+	ret = chsc_enable_facility(CHSC_SDA_OC_MSS);
+	switch (ret) {
+	case 0: /* Success. */
+		max_ssid = __MAX_SSID;
+		break;
+	case -ENOMEM:
+		goto out_bus;
+	default:
+		max_ssid = 0;
+	}
+	/* Setup css structure. */
+	for (i = 0; i <= __MAX_CSSID; i++) {
+		css[i] = kmalloc(sizeof(struct channel_subsystem), GFP_KERNEL);
+		if (!css[i]) {
+			ret = -ENOMEM;
+			goto out_unregister;
+		}
+		setup_css(i);
+		ret = device_register(&css[i]->device);
+		if (ret)
+			goto out_free;
+	}
 	css_init_done = 1;
 
 	ctl_set_bit(6, 28);
 
-	for (irq = 0; irq < __MAX_SUBCHANNELS; irq++) {
-		struct subchannel *sch;
-
-		if (cio_is_console(irq))
-			sch = cio_get_console_subchannel();
-		else {
-			sch = css_alloc_subchannel(irq);
-			if (IS_ERR(sch))
-				ret = PTR_ERR(sch);
-			else
-				ret = 0;
-			if (ret == -ENOMEM)
-				panic("Out of memory in "
-				      "init_channel_subsystem\n");
-			/* -ENXIO: no more subchannels. */
-			if (ret == -ENXIO)
-				break;
-			if (ret)
-				continue;
-		}
-		/*
-		 * We register ALL valid subchannels in ioinfo, even those
-		 * that have been present before init_channel_subsystem.
-		 * These subchannels can't have been registered yet (kmalloc
-		 * not working) so we do it now. This is true e.g. for the
-		 * console subchannel.
-		 */
-		css_register_subchannel(sch);
-	}
+	for_each_subchannel(__init_channel_subsystem, NULL);
 	return 0;
-
+out_free:
+	kfree(css[i]);
+out_unregister:
+	while (i > 0) {
+		i--;
+		device_unregister(&css[i]->device);
+	}
 out_bus:
 	bus_unregister(&css_bus_type);
 out:
@@ -481,47 +549,8 @@ struct bus_type css_bus_type = {
 
 subsys_initcall(init_channel_subsystem);
 
-/*
- * Register root devices for some drivers. The release function must not be
- * in the device drivers, so we do it here.
- */
-static void
-s390_root_dev_release(struct device *dev)
-{
-	kfree(dev);
-}
-
-struct device *
-s390_root_dev_register(const char *name)
-{
-	struct device *dev;
-	int ret;
-
-	if (!strlen(name))
-		return ERR_PTR(-EINVAL);
-	dev = kmalloc(sizeof(struct device), GFP_KERNEL);
-	if (!dev)
-		return ERR_PTR(-ENOMEM);
-	memset(dev, 0, sizeof(struct device));
-	strncpy(dev->bus_id, name, min(strlen(name), (size_t)BUS_ID_SIZE));
-	dev->release = s390_root_dev_release;
-	ret = device_register(dev);
-	if (ret) {
-		kfree(dev);
-		return ERR_PTR(ret);
-	}
-	return dev;
-}
-
-void
-s390_root_dev_unregister(struct device *dev)
-{
-	if (dev)
-		device_unregister(dev);
-}
-
 int
-css_enqueue_subchannel_slow(unsigned long schid)
+css_enqueue_subchannel_slow(struct subchannel_id schid)
 {
 	struct slow_subchannel *new_slow_sch;
 	unsigned long flags;
@@ -564,6 +593,4 @@ css_slow_subchannels_exist(void)
 
 MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(css_bus_type);
-EXPORT_SYMBOL(s390_root_dev_register);
-EXPORT_SYMBOL(s390_root_dev_unregister);
 EXPORT_SYMBOL_GPL(css_characteristics_avail);
diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index 2004a6c4938..251ebd7a7d3 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -6,6 +6,8 @@
 
 #include <asm/cio.h>
 
+#include "schid.h"
+
 /*
  * path grouping stuff
  */
@@ -33,19 +35,25 @@ struct path_state {
 	__u8  resvd  : 3;	/* reserved */
 } __attribute__ ((packed));
 
+struct extended_cssid {
+	u8 version;
+	u8 cssid;
+} __attribute__ ((packed));
+
 struct pgid {
 	union {
 		__u8 fc;   	/* SPID function code */
 		struct path_state ps;	/* SNID path state */
 	} inf;
-	__u32 cpu_addr	: 16;	/* CPU address */
+	union {
+		__u32 cpu_addr	: 16;	/* CPU address */
+		struct extended_cssid ext_cssid;
+	} pgid_high;
 	__u32 cpu_id	: 24;	/* CPU identification */
 	__u32 cpu_model : 16;	/* CPU model */
 	__u32 tod_high;		/* high word TOD clock */
 } __attribute__ ((packed));
 
-extern struct pgid global_pgid;
-
 #define MAX_CIWS 8
 
 /*
@@ -68,7 +76,8 @@ struct ccw_device_private {
 	atomic_t onoff;
 	unsigned long registered;
 	__u16 devno;		/* device number */
-	__u16 irq;		/* subchannel number */
+	__u16 sch_no;		/* subchannel number */
+	__u8 ssid;              /* subchannel set id */
 	__u8 imask;		/* lpm mask for SNID/SID/SPGID */
 	int iretry;		/* retry counter SNID/SID/SPGID */
 	struct {
@@ -121,15 +130,27 @@ struct css_driver {
 extern struct bus_type css_bus_type;
 extern struct css_driver io_subchannel_driver;
 
-int css_probe_device(int irq);
-extern struct subchannel * get_subchannel_by_schid(int irq);
-extern unsigned int highest_subchannel;
+extern int css_probe_device(struct subchannel_id);
+extern struct subchannel * get_subchannel_by_schid(struct subchannel_id);
 extern int css_init_done;
-
-#define __MAX_SUBCHANNELS 65536
+extern int for_each_subchannel(int(*fn)(struct subchannel_id, void *), void *);
+
+#define __MAX_SUBCHANNEL 65535
+#define __MAX_SSID 3
+#define __MAX_CHPID 255
+#define __MAX_CSSID 0
+
+struct channel_subsystem {
+	u8 cssid;
+	int valid;
+	struct channel_path *chps[__MAX_CHPID];
+	struct device device;
+	struct pgid global_pgid;
+};
+#define to_css(dev) container_of(dev, struct channel_subsystem, device)
 
 extern struct bus_type css_bus_type;
-extern struct device css_bus_device;
+extern struct channel_subsystem *css[];
 
 /* Some helper functions for disconnected state. */
 int device_is_disconnected(struct subchannel *);
@@ -144,7 +165,7 @@ void device_set_waiting(struct subchannel *);
 void device_kill_pending_timer(struct subchannel *);
 
 /* Helper functions to build lists for the slow path. */
-int css_enqueue_subchannel_slow(unsigned long schid);
+extern int css_enqueue_subchannel_slow(struct subchannel_id schid);
 void css_walk_subchannel_slow_list(void (*fn)(unsigned long));
 void css_clear_subchannel_slow_list(void);
 int css_slow_subchannels_exist(void);
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 85908cacc3b..fa3e4c0a253 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1,7 +1,7 @@
 /*
  *  drivers/s390/cio/device.c
  *  bus driver for ccw devices
- *   $Revision: 1.131 $
+ *   $Revision: 1.137 $
  *
  *    Copyright (C) 2002 IBM Deutschland Entwicklung GmbH,
  *			 IBM Corporation
@@ -374,7 +374,7 @@ online_store (struct device *dev, struct device_attribute *attr, const char *buf
 	int i, force, ret;
 	char *tmp;
 
-	if (atomic_compare_and_swap(0, 1, &cdev->private->onoff))
+	if (atomic_cmpxchg(&cdev->private->onoff, 0, 1) != 0)
 		return -EAGAIN;
 
 	if (cdev->drv && !try_module_get(cdev->drv->owner)) {
@@ -535,7 +535,8 @@ ccw_device_register(struct ccw_device *cdev)
 }
 
 struct match_data {
-	unsigned int  devno;
+	unsigned int devno;
+	unsigned int ssid;
 	struct ccw_device * sibling;
 };
 
@@ -548,6 +549,7 @@ match_devno(struct device * dev, void * data)
 	cdev = to_ccwdev(dev);
 	if ((cdev->private->state == DEV_STATE_DISCONNECTED) &&
 	    (cdev->private->devno == d->devno) &&
+	    (cdev->private->ssid == d->ssid) &&
 	    (cdev != d->sibling)) {
 		cdev->private->state = DEV_STATE_NOT_OPER;
 		return 1;
@@ -556,11 +558,13 @@ match_devno(struct device * dev, void * data)
 }
 
 static struct ccw_device *
-get_disc_ccwdev_by_devno(unsigned int devno, struct ccw_device *sibling)
+get_disc_ccwdev_by_devno(unsigned int devno, unsigned int ssid,
+			 struct ccw_device *sibling)
 {
 	struct device *dev;
 	struct match_data data = {
-		.devno  = devno,
+		.devno   = devno,
+		.ssid    = ssid,
 		.sibling = sibling,
 	};
 
@@ -616,13 +620,13 @@ ccw_device_do_unreg_rereg(void *data)
 
 		need_rename = 1;
 		other_cdev = get_disc_ccwdev_by_devno(sch->schib.pmcw.dev,
-						      cdev);
+						      sch->schid.ssid, cdev);
 		if (other_cdev) {
 			struct subchannel *other_sch;
 
 			other_sch = to_subchannel(other_cdev->dev.parent);
 			if (get_device(&other_sch->dev)) {
-				stsch(other_sch->irq, &other_sch->schib);
+				stsch(other_sch->schid, &other_sch->schib);
 				if (other_sch->schib.pmcw.dnv) {
 					other_sch->schib.pmcw.intparm = 0;
 					cio_modify(other_sch);
@@ -639,8 +643,8 @@ ccw_device_do_unreg_rereg(void *data)
 	if (test_and_clear_bit(1, &cdev->private->registered))
 		device_del(&cdev->dev);
 	if (need_rename)
-		snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.0.%04x",
-			  sch->schib.pmcw.dev);
+		snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x",
+			  sch->schid.ssid, sch->schib.pmcw.dev);
 	PREPARE_WORK(&cdev->private->kick_work,
 		     ccw_device_add_changed, (void *)cdev);
 	queue_work(ccw_device_work, &cdev->private->kick_work);
@@ -769,18 +773,20 @@ io_subchannel_recog(struct ccw_device *cdev, struct subchannel *sch)
 	sch->dev.driver_data = cdev;
 	sch->driver = &io_subchannel_driver;
 	cdev->ccwlock = &sch->lock;
+
 	/* Init private data. */
 	priv = cdev->private;
 	priv->devno = sch->schib.pmcw.dev;
-	priv->irq = sch->irq;
+	priv->ssid = sch->schid.ssid;
+	priv->sch_no = sch->schid.sch_no;
 	priv->state = DEV_STATE_NOT_OPER;
 	INIT_LIST_HEAD(&priv->cmb_list);
 	init_waitqueue_head(&priv->wait_q);
 	init_timer(&priv->timer);
 
 	/* Set an initial name for the device. */
-	snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.0.%04x",
-		  sch->schib.pmcw.dev);
+	snprintf (cdev->dev.bus_id, BUS_ID_SIZE, "0.%x.%04x",
+		  sch->schid.ssid, sch->schib.pmcw.dev);
 
 	/* Increase counter of devices currently in recognition. */
 	atomic_inc(&ccw_device_init_count);
@@ -951,7 +957,7 @@ io_subchannel_shutdown(struct device *dev)
 	sch = to_subchannel(dev);
 	cdev = dev->driver_data;
 
-	if (cio_is_console(sch->irq))
+	if (cio_is_console(sch->schid))
 		return;
 	if (!sch->schib.pmcw.ena)
 		/* Nothing to do. */
@@ -986,10 +992,6 @@ ccw_device_console_enable (struct ccw_device *cdev, struct subchannel *sch)
 	cdev->dev = (struct device) {
 		.parent = &sch->dev,
 	};
-	/* Initialize the subchannel structure */
-	sch->dev.parent = &css_bus_device;
-	sch->dev.bus = &css_bus_type;
-
 	rc = io_subchannel_recog(cdev, sch);
 	if (rc)
 		return rc;
@@ -1146,6 +1148,16 @@ ccw_driver_unregister (struct ccw_driver *cdriver)
 	driver_unregister(&cdriver->driver);
 }
 
+/* Helper func for qdio. */
+struct subchannel_id
+ccw_device_get_subchannel_id(struct ccw_device *cdev)
+{
+	struct subchannel *sch;
+
+	sch = to_subchannel(cdev->dev.parent);
+	return sch->schid;
+}
+
 MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(ccw_device_set_online);
 EXPORT_SYMBOL(ccw_device_set_offline);
@@ -1155,3 +1167,4 @@ EXPORT_SYMBOL(get_ccwdev_by_busid);
 EXPORT_SYMBOL(ccw_bus_type);
 EXPORT_SYMBOL(ccw_device_work);
 EXPORT_SYMBOL(ccw_device_notify_work);
+EXPORT_SYMBOL_GPL(ccw_device_get_subchannel_id);
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index a3aa056d724..11587ebb728 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -110,6 +110,7 @@ int ccw_device_stlck(struct ccw_device *);
 
 /* qdio needs this. */
 void ccw_device_set_timeout(struct ccw_device *, int);
+extern struct subchannel_id ccw_device_get_subchannel_id(struct ccw_device *);
 
 void retry_set_schib(struct ccw_device *cdev);
 #endif
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index c1c89f4fd4e..23d12b65e5f 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -133,7 +133,7 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
 	int ret;
 
 	sch = to_subchannel(cdev->dev.parent);
-	ret = stsch(sch->irq, &sch->schib);
+	ret = stsch(sch->schid, &sch->schib);
 	if (ret || !sch->schib.pmcw.dnv)
 		return -ENODEV; 
 	if (!sch->schib.pmcw.ena || sch->schib.scsw.actl == 0)
@@ -231,7 +231,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 	 * through ssch() and the path information is up to date.
 	 */
 	old_lpm = sch->lpm;
-	stsch(sch->irq, &sch->schib);
+	stsch(sch->schid, &sch->schib);
 	sch->lpm = sch->schib.pmcw.pim &
 		sch->schib.pmcw.pam &
 		sch->schib.pmcw.pom &
@@ -257,8 +257,9 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 	switch (state) {
 	case DEV_STATE_NOT_OPER:
 		CIO_DEBUG(KERN_WARNING, 2,
-			  "SenseID : unknown device %04x on subchannel %04x\n",
-			  cdev->private->devno, sch->irq);
+			  "SenseID : unknown device %04x on subchannel "
+			  "0.%x.%04x\n", cdev->private->devno,
+			  sch->schid.ssid, sch->schid.sch_no);
 		break;
 	case DEV_STATE_OFFLINE:
 		if (cdev->private->state == DEV_STATE_DISCONNECTED_SENSE_ID) {
@@ -282,16 +283,18 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 			return;
 		}
 		/* Issue device info message. */
-		CIO_DEBUG(KERN_INFO, 2, "SenseID : device %04x reports: "
+		CIO_DEBUG(KERN_INFO, 2, "SenseID : device 0.%x.%04x reports: "
 			  "CU  Type/Mod = %04X/%02X, Dev Type/Mod = "
-			  "%04X/%02X\n", cdev->private->devno,
+			  "%04X/%02X\n",
+			  cdev->private->ssid, cdev->private->devno,
 			  cdev->id.cu_type, cdev->id.cu_model,
 			  cdev->id.dev_type, cdev->id.dev_model);
 		break;
 	case DEV_STATE_BOXED:
 		CIO_DEBUG(KERN_WARNING, 2,
-			  "SenseID : boxed device %04x on subchannel %04x\n",
-			  cdev->private->devno, sch->irq);
+			  "SenseID : boxed device %04x on subchannel "
+			  "0.%x.%04x\n", cdev->private->devno,
+			  sch->schid.ssid, sch->schid.sch_no);
 		break;
 	}
 	cdev->private->state = state;
@@ -359,7 +362,7 @@ ccw_device_done(struct ccw_device *cdev, int state)
 	if (state == DEV_STATE_BOXED)
 		CIO_DEBUG(KERN_WARNING, 2,
 			  "Boxed device %04x on subchannel %04x\n",
-			  cdev->private->devno, sch->irq);
+			  cdev->private->devno, sch->schid.sch_no);
 
 	if (cdev->private->flags.donotify) {
 		cdev->private->flags.donotify = 0;
@@ -592,7 +595,7 @@ ccw_device_offline(struct ccw_device *cdev)
 	struct subchannel *sch;
 
 	sch = to_subchannel(cdev->dev.parent);
-	if (stsch(sch->irq, &sch->schib) || !sch->schib.pmcw.dnv)
+	if (stsch(sch->schid, &sch->schib) || !sch->schib.pmcw.dnv)
 		return -ENODEV;
 	if (cdev->private->state != DEV_STATE_ONLINE) {
 		if (sch->schib.scsw.actl != 0)
@@ -711,7 +714,7 @@ ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event)
 	 * Since we might not just be coming from an interrupt from the
 	 * subchannel we have to update the schib.
 	 */
-	stsch(sch->irq, &sch->schib);
+	stsch(sch->schid, &sch->schib);
 
 	if (sch->schib.scsw.actl != 0 ||
 	    (cdev->private->irb.scsw.stctl & SCSW_STCTL_STATUS_PEND)) {
@@ -923,7 +926,7 @@ ccw_device_wait4io_irq(struct ccw_device *cdev, enum dev_event dev_event)
 
 	/* Iff device is idle, reset timeout. */
 	sch = to_subchannel(cdev->dev.parent);
-	if (!stsch(sch->irq, &sch->schib))
+	if (!stsch(sch->schid, &sch->schib))
 		if (sch->schib.scsw.actl == 0)
 			ccw_device_set_timeout(cdev, 0);
 	/* Call the handler. */
@@ -1035,7 +1038,7 @@ device_trigger_reprobe(struct subchannel *sch)
 		return;
 
 	/* Update some values. */
-	if (stsch(sch->irq, &sch->schib))
+	if (stsch(sch->schid, &sch->schib))
 		return;
 
 	/*
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index 0e68fb511dc..04ceba343db 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -27,7 +27,7 @@
 /*
  * diag210 is used under VM to get information about a virtual device
  */
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 int
 diag210(struct diag210 * addr)
 {
@@ -256,16 +256,17 @@ ccw_device_check_sense_id(struct ccw_device *cdev)
 		 *     sense id information. So, for intervention required,
 		 *     we use the "whack it until it talks" strategy...
 		 */
-		CIO_MSG_EVENT(2, "SenseID : device %04x on Subchannel %04x "
-			      "reports cmd reject\n",
-			      cdev->private->devno, sch->irq);
+		CIO_MSG_EVENT(2, "SenseID : device %04x on Subchannel "
+			      "0.%x.%04x reports cmd reject\n",
+			      cdev->private->devno, sch->schid.ssid,
+			      sch->schid.sch_no);
 		return -EOPNOTSUPP;
 	}
 	if (irb->esw.esw0.erw.cons) {
-		CIO_MSG_EVENT(2, "SenseID : UC on dev %04x, "
+		CIO_MSG_EVENT(2, "SenseID : UC on dev 0.%x.%04x, "
 			      "lpum %02X, cnt %02d, sns :"
 			      " %02X%02X%02X%02X %02X%02X%02X%02X ...\n",
-			      cdev->private->devno,
+			      cdev->private->ssid, cdev->private->devno,
 			      irb->esw.esw0.sublog.lpum,
 			      irb->esw.esw0.erw.scnt,
 			      irb->ecw[0], irb->ecw[1],
@@ -277,16 +278,17 @@ ccw_device_check_sense_id(struct ccw_device *cdev)
 	if (irb->scsw.cc == 3) {
 		if ((sch->orb.lpm &
 		     sch->schib.pmcw.pim & sch->schib.pmcw.pam) != 0)
-			CIO_MSG_EVENT(2, "SenseID : path %02X for device %04x on"
-				      " subchannel %04x is 'not operational'\n",
-				      sch->orb.lpm, cdev->private->devno,
-				      sch->irq);
+			CIO_MSG_EVENT(2, "SenseID : path %02X for device %04x "
+				      "on subchannel 0.%x.%04x is "
+				      "'not operational'\n", sch->orb.lpm,
+				      cdev->private->devno, sch->schid.ssid,
+				      sch->schid.sch_no);
 		return -EACCES;
 	}
 	/* Hmm, whatever happened, try again. */
 	CIO_MSG_EVENT(2, "SenseID : start_IO() for device %04x on "
-		      "subchannel %04x returns status %02X%02X\n",
-		      cdev->private->devno, sch->irq,
+		      "subchannel 0.%x.%04x returns status %02X%02X\n",
+		      cdev->private->devno, sch->schid.ssid, sch->schid.sch_no,
 		      irb->scsw.dstat, irb->scsw.cstat);
 	return -EAGAIN;
 }
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 85a3026e690..143b6c25a4e 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -1,7 +1,7 @@
 /*
  *  drivers/s390/cio/device_ops.c
  *
- *   $Revision: 1.57 $
+ *   $Revision: 1.58 $
  *
  *    Copyright (C) 2002 IBM Deutschland Entwicklung GmbH,
  *			 IBM Corporation
@@ -570,7 +570,7 @@ ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no)
 int
 _ccw_device_get_subchannel_number(struct ccw_device *cdev)
 {
-	return cdev->private->irq;
+	return cdev->private->sch_no;
 }
 
 int
diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index 0adac8a6733..052832d03d3 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -22,6 +22,7 @@
 #include "cio_debug.h"
 #include "css.h"
 #include "device.h"
+#include "ioasm.h"
 
 /*
  * Start Sense Path Group ID helper function. Used in ccw_device_recog
@@ -56,10 +57,10 @@ __ccw_device_sense_pgid_start(struct ccw_device *cdev)
 			if (ret != -EACCES)
 				return ret;
 			CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel "
-				      "%04x, lpm %02X, became 'not "
+				      "0.%x.%04x, lpm %02X, became 'not "
 				      "operational'\n",
-				      cdev->private->devno, sch->irq,
-				      cdev->private->imask);
+				      cdev->private->devno, sch->schid.ssid,
+				      sch->schid.sch_no, cdev->private->imask);
 
 		}
 		cdev->private->imask >>= 1;
@@ -105,10 +106,10 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev)
 		return -EOPNOTSUPP;
 	}
 	if (irb->esw.esw0.erw.cons) {
-		CIO_MSG_EVENT(2, "SNID - device %04x, unit check, "
+		CIO_MSG_EVENT(2, "SNID - device 0.%x.%04x, unit check, "
 			      "lpum %02X, cnt %02d, sns : "
 			      "%02X%02X%02X%02X %02X%02X%02X%02X ...\n",
-			      cdev->private->devno,
+			      cdev->private->ssid, cdev->private->devno,
 			      irb->esw.esw0.sublog.lpum,
 			      irb->esw.esw0.erw.scnt,
 			      irb->ecw[0], irb->ecw[1],
@@ -118,15 +119,17 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev)
 		return -EAGAIN;
 	}
 	if (irb->scsw.cc == 3) {
-		CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel "
-			      "%04x, lpm %02X, became 'not operational'\n",
-			      cdev->private->devno, sch->irq, sch->orb.lpm);
+		CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x,"
+			      " lpm %02X, became 'not operational'\n",
+			      cdev->private->devno, sch->schid.ssid,
+			      sch->schid.sch_no, sch->orb.lpm);
 		return -EACCES;
 	}
 	if (cdev->private->pgid.inf.ps.state2 == SNID_STATE2_RESVD_ELSE) {
-		CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel %04x "
+		CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x "
 			      "is reserved by someone else\n",
-			      cdev->private->devno, sch->irq);
+			      cdev->private->devno, sch->schid.ssid,
+			      sch->schid.sch_no);
 		return -EUSERS;
 	}
 	return 0;
@@ -162,7 +165,7 @@ ccw_device_sense_pgid_irq(struct ccw_device *cdev, enum dev_event dev_event)
 	/* 0, -ETIME, -EOPNOTSUPP, -EAGAIN, -EACCES or -EUSERS */
 	case 0:			/* Sense Path Group ID successful. */
 		if (cdev->private->pgid.inf.ps.state1 == SNID_STATE1_RESET)
-			memcpy(&cdev->private->pgid, &global_pgid,
+			memcpy(&cdev->private->pgid, &css[0]->global_pgid,
 			       sizeof(struct pgid));
 		ccw_device_sense_pgid_done(cdev, 0);
 		break;
@@ -235,8 +238,9 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func)
 	sch->lpm &= ~cdev->private->imask;
 	sch->vpm &= ~cdev->private->imask;
 	CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel "
-		      "%04x, lpm %02X, became 'not operational'\n",
-		      cdev->private->devno, sch->irq, cdev->private->imask);
+		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
+		      cdev->private->devno, sch->schid.ssid,
+		      sch->schid.sch_no, cdev->private->imask);
 	return ret;
 }
 
@@ -258,8 +262,10 @@ __ccw_device_check_pgid(struct ccw_device *cdev)
 		if (irb->ecw[0] & SNS0_CMD_REJECT)
 			return -EOPNOTSUPP;
 		/* Hmm, whatever happened, try again. */
-		CIO_MSG_EVENT(2, "SPID - device %04x, unit check, cnt %02d, "
+		CIO_MSG_EVENT(2, "SPID - device 0.%x.%04x, unit check, "
+			      "cnt %02d, "
 			      "sns : %02X%02X%02X%02X %02X%02X%02X%02X ...\n",
+			      cdev->private->ssid,
 			      cdev->private->devno, irb->esw.esw0.erw.scnt,
 			      irb->ecw[0], irb->ecw[1],
 			      irb->ecw[2], irb->ecw[3],
@@ -268,10 +274,10 @@ __ccw_device_check_pgid(struct ccw_device *cdev)
 		return -EAGAIN;
 	}
 	if (irb->scsw.cc == 3) {
-		CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel "
-			      "%04x, lpm %02X, became 'not operational'\n",
-			      cdev->private->devno, sch->irq,
-			      cdev->private->imask);
+		CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel 0.%x.%04x,"
+			      " lpm %02X, became 'not operational'\n",
+			      cdev->private->devno, sch->schid.ssid,
+			      sch->schid.sch_no, cdev->private->imask);
 		return -EACCES;
 	}
 	return 0;
@@ -364,8 +370,22 @@ ccw_device_verify_irq(struct ccw_device *cdev, enum dev_event dev_event)
 void
 ccw_device_verify_start(struct ccw_device *cdev)
 {
+	struct subchannel *sch = to_subchannel(cdev->dev.parent);
+
 	cdev->private->flags.pgid_single = 0;
 	cdev->private->iretry = 5;
+	/*
+	 * Update sch->lpm with current values to catch paths becoming
+	 * available again.
+	 */
+	if (stsch(sch->schid, &sch->schib)) {
+		ccw_device_verify_done(cdev, -ENODEV);
+		return;
+	}
+	sch->lpm = sch->schib.pmcw.pim &
+		sch->schib.pmcw.pam &
+		sch->schib.pmcw.pom &
+		sch->opm;
 	__ccw_device_verify_start(cdev);
 }
 
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index 12a24d4331a..db09c209098 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -36,15 +36,16 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb)
 		
 	CIO_MSG_EVENT(0, "Channel-Check or Interface-Control-Check "
 		      "received"
-		      " ... device %04X on subchannel %04X, dev_stat "
+		      " ... device %04x on subchannel 0.%x.%04x, dev_stat "
 		      ": %02X sch_stat : %02X\n",
-		      cdev->private->devno, cdev->private->irq,
+		      cdev->private->devno, cdev->private->ssid,
+		      cdev->private->sch_no,
 		      irb->scsw.dstat, irb->scsw.cstat);
 
 	if (irb->scsw.cc != 3) {
 		char dbf_text[15];
 
-		sprintf(dbf_text, "chk%x", cdev->private->irq);
+		sprintf(dbf_text, "chk%x", cdev->private->sch_no);
 		CIO_TRACE_EVENT(0, dbf_text);
 		CIO_HEX_EVENT(0, irb, sizeof (struct irb));
 	}
@@ -59,10 +60,11 @@ ccw_device_path_notoper(struct ccw_device *cdev)
 	struct subchannel *sch;
 
 	sch = to_subchannel(cdev->dev.parent);
-	stsch (sch->irq, &sch->schib);
+	stsch (sch->schid, &sch->schib);
 
-	CIO_MSG_EVENT(0, "%s(%04x) - path(s) %02x are "
-		      "not operational \n", __FUNCTION__, sch->irq,
+	CIO_MSG_EVENT(0, "%s(0.%x.%04x) - path(s) %02x are "
+		      "not operational \n", __FUNCTION__,
+		      sch->schid.ssid, sch->schid.sch_no,
 		      sch->schib.pmcw.pnom);
 
 	sch->lpm &= ~sch->schib.pmcw.pnom;
diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h
index 45480a2bc4c..95a9462f9a9 100644
--- a/drivers/s390/cio/ioasm.h
+++ b/drivers/s390/cio/ioasm.h
@@ -1,12 +1,13 @@
 #ifndef S390_CIO_IOASM_H
 #define S390_CIO_IOASM_H
 
+#include "schid.h"
+
 /*
  * TPI info structure
  */
 struct tpi_info {
-	__u32 reserved1	 : 16;	 /* reserved 0x00000001 */
-	__u32 irq	 : 16;	 /* aka. subchannel number */
+	struct subchannel_id schid;
 	__u32 intparm;		 /* interruption parameter */
 	__u32 adapter_IO : 1;
 	__u32 reserved2	 : 1;
@@ -21,7 +22,8 @@ struct tpi_info {
  * Some S390 specific IO instructions as inline
  */
 
-static inline int stsch(int irq, volatile struct schib *addr)
+static inline int stsch(struct subchannel_id schid,
+			    volatile struct schib *addr)
 {
 	int ccode;
 
@@ -31,12 +33,42 @@ static inline int stsch(int irq, volatile struct schib *addr)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "d" (irq | 0x10000), "a" (addr)
+		: "d" (schid), "a" (addr), "m" (*addr)
+		: "cc", "1" );
+	return ccode;
+}
+
+static inline int stsch_err(struct subchannel_id schid,
+				volatile struct schib *addr)
+{
+	int ccode;
+
+	__asm__ __volatile__(
+		"    lhi  %0,%3\n"
+		"    lr	  1,%1\n"
+		"    stsch 0(%2)\n"
+		"0:  ipm  %0\n"
+		"    srl  %0,28\n"
+		"1:\n"
+#ifdef CONFIG_64BIT
+		".section __ex_table,\"a\"\n"
+		"   .align 8\n"
+		"   .quad 0b,1b\n"
+		".previous"
+#else
+		".section __ex_table,\"a\"\n"
+		"   .align 4\n"
+		"   .long 0b,1b\n"
+		".previous"
+#endif
+		: "=&d" (ccode)
+		: "d" (schid), "a" (addr), "K" (-EIO), "m" (*addr)
 		: "cc", "1" );
 	return ccode;
 }
 
-static inline int msch(int irq, volatile struct schib *addr)
+static inline int msch(struct subchannel_id schid,
+			   volatile struct schib *addr)
 {
 	int ccode;
 
@@ -46,12 +78,13 @@ static inline int msch(int irq, volatile struct schib *addr)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "d" (irq | 0x10000L), "a" (addr)
+		: "d" (schid), "a" (addr), "m" (*addr)
 		: "cc", "1" );
 	return ccode;
 }
 
-static inline int msch_err(int irq, volatile struct schib *addr)
+static inline int msch_err(struct subchannel_id schid,
+			       volatile struct schib *addr)
 {
 	int ccode;
 
@@ -62,7 +95,7 @@ static inline int msch_err(int irq, volatile struct schib *addr)
 		"0:  ipm  %0\n"
 		"    srl  %0,28\n"
 		"1:\n"
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 		".section __ex_table,\"a\"\n"
 		"   .align 8\n"
 		"   .quad 0b,1b\n"
@@ -74,12 +107,13 @@ static inline int msch_err(int irq, volatile struct schib *addr)
 		".previous"
 #endif
 		: "=&d" (ccode)
-		: "d" (irq | 0x10000L), "a" (addr), "K" (-EIO)
+		: "d" (schid), "a" (addr), "K" (-EIO), "m" (*addr)
 		: "cc", "1" );
 	return ccode;
 }
 
-static inline int tsch(int irq, volatile struct irb *addr)
+static inline int tsch(struct subchannel_id schid,
+			   volatile struct irb *addr)
 {
 	int ccode;
 
@@ -89,7 +123,7 @@ static inline int tsch(int irq, volatile struct irb *addr)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "d" (irq | 0x10000L), "a" (addr)
+		: "d" (schid), "a" (addr), "m" (*addr)
 		: "cc", "1" );
 	return ccode;
 }
@@ -103,12 +137,13 @@ static inline int tpi( volatile struct tpi_info *addr)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "a" (addr)
+		: "a" (addr), "m" (*addr)
 		: "cc", "1" );
 	return ccode;
 }
 
-static inline int ssch(int irq, volatile struct orb *addr)
+static inline int ssch(struct subchannel_id schid,
+			   volatile struct orb *addr)
 {
 	int ccode;
 
@@ -118,12 +153,12 @@ static inline int ssch(int irq, volatile struct orb *addr)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "d" (irq | 0x10000L), "a" (addr)
+		: "d" (schid), "a" (addr), "m" (*addr)
 		: "cc", "1" );
 	return ccode;
 }
 
-static inline int rsch(int irq)
+static inline int rsch(struct subchannel_id schid)
 {
 	int ccode;
 
@@ -133,12 +168,12 @@ static inline int rsch(int irq)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "d" (irq | 0x10000L)
+		: "d" (schid)
 		: "cc", "1" );
 	return ccode;
 }
 
-static inline int csch(int irq)
+static inline int csch(struct subchannel_id schid)
 {
 	int ccode;
 
@@ -148,12 +183,12 @@ static inline int csch(int irq)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "d" (irq | 0x10000L)
+		: "d" (schid)
 		: "cc", "1" );
 	return ccode;
 }
 
-static inline int hsch(int irq)
+static inline int hsch(struct subchannel_id schid)
 {
 	int ccode;
 
@@ -163,12 +198,12 @@ static inline int hsch(int irq)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "d" (irq | 0x10000L)
+		: "d" (schid)
 		: "cc", "1" );
 	return ccode;
 }
 
-static inline int xsch(int irq)
+static inline int xsch(struct subchannel_id schid)
 {
 	int ccode;
 
@@ -178,21 +213,22 @@ static inline int xsch(int irq)
 		"   ipm	  %0\n"
 		"   srl	  %0,28"
 		: "=d" (ccode)
-		: "d" (irq | 0x10000L)
+		: "d" (schid)
 		: "cc", "1" );
 	return ccode;
 }
 
 static inline int chsc(void *chsc_area)
 {
+	typedef struct { char _[4096]; } addr_type;
 	int cc;
 
 	__asm__ __volatile__ (
-		".insn	rre,0xb25f0000,%1,0	\n\t"
+		".insn	rre,0xb25f0000,%2,0	\n\t"
 		"ipm	%0	\n\t"
 		"srl	%0,28	\n\t"
-		: "=d" (cc)
-		: "d" (chsc_area)
+		: "=d" (cc), "=m" (*(addr_type *) chsc_area)
+		: "d" (chsc_area), "m" (*(addr_type *) chsc_area)
 		: "cc" );
 
 	return cc;
diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c
index eb39218b925..30a836ffc31 100644
--- a/drivers/s390/cio/qdio.c
+++ b/drivers/s390/cio/qdio.c
@@ -56,7 +56,7 @@
 #include "ioasm.h"
 #include "chsc.h"
 
-#define VERSION_QDIO_C "$Revision: 1.108 $"
+#define VERSION_QDIO_C "$Revision: 1.114 $"
 
 /****************** MODULE PARAMETER VARIABLES ********************/
 MODULE_AUTHOR("Utz Bacher <utz.bacher@de.ibm.com>");
@@ -76,6 +76,7 @@ static struct qdio_perf_stats perf_stats;
 #endif /* QDIO_PERFORMANCE_STATS */
 
 static int hydra_thinints;
+static int is_passthrough = 0;
 static int omit_svs;
 
 static int indicator_used[INDICATORS_PER_CACHELINE];
@@ -136,12 +137,126 @@ qdio_release_q(struct qdio_q *q)
 	atomic_dec(&q->use_count);
 }
 
-static volatile inline void 
-qdio_set_slsb(volatile char *slsb, unsigned char value)
+/*check ccq  */
+static inline int
+qdio_check_ccq(struct qdio_q *q, unsigned int ccq)
+{
+	char dbf_text[15];
+
+	if (ccq == 0 || ccq == 32 || ccq == 96)
+		return 0;
+	if (ccq == 97)
+		return 1;
+	/*notify devices immediately*/
+	sprintf(dbf_text,"%d", ccq);
+	QDIO_DBF_TEXT2(1,trace,dbf_text);
+	return -EIO;
+}
+/* EQBS: extract buffer states */
+static inline int
+qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
+	     unsigned int *start, unsigned int *cnt)
+{
+	struct qdio_irq *irq;
+	unsigned int tmp_cnt, q_no, ccq;
+	int rc ;
+	char dbf_text[15];
+
+	ccq = 0;
+	tmp_cnt = *cnt;
+	irq = (struct qdio_irq*)q->irq_ptr;
+	q_no = q->q_no;
+	if(!q->is_input_q)
+		q_no += irq->no_input_qs;
+	ccq = do_eqbs(irq->sch_token, state, q_no, start, cnt);
+	rc = qdio_check_ccq(q, ccq);
+	if (rc < 0) {
+                QDIO_DBF_TEXT2(1,trace,"eqberr");
+                sprintf(dbf_text,"%2x,%2x,%d,%d",tmp_cnt, *cnt, ccq, q_no);
+                QDIO_DBF_TEXT2(1,trace,dbf_text);
+		q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION|
+				QDIO_STATUS_LOOK_FOR_ERROR,
+				0, 0, 0, -1, -1, q->int_parm);
+		return 0;
+	}
+	return (tmp_cnt - *cnt);
+}
+
+/* SQBS: set buffer states */
+static inline int
+qdio_do_sqbs(struct qdio_q *q, unsigned char state,
+	     unsigned int *start, unsigned int *cnt)
 {
-	xchg((char*)slsb,value);
+	struct qdio_irq *irq;
+	unsigned int tmp_cnt, q_no, ccq;
+	int rc;
+	char dbf_text[15];
+
+	ccq = 0;
+	tmp_cnt = *cnt;
+	irq = (struct qdio_irq*)q->irq_ptr;
+	q_no = q->q_no;
+	if(!q->is_input_q)
+		q_no += irq->no_input_qs;
+	ccq = do_sqbs(irq->sch_token, state, q_no, start, cnt);
+	rc = qdio_check_ccq(q, ccq);
+	if (rc < 0) {
+                QDIO_DBF_TEXT3(1,trace,"sqberr");
+                sprintf(dbf_text,"%2x,%2x,%d,%d",tmp_cnt,*cnt,ccq,q_no);
+                QDIO_DBF_TEXT3(1,trace,dbf_text);
+		q->handler(q->cdev,QDIO_STATUS_ACTIVATE_CHECK_CONDITION|
+				QDIO_STATUS_LOOK_FOR_ERROR,
+				0, 0, 0, -1, -1, q->int_parm);
+		return 0;
+	}
+	return (tmp_cnt - *cnt);
 }
 
+static inline int
+qdio_set_slsb(struct qdio_q *q, unsigned int *bufno,
+	      unsigned char state, unsigned int *count)
+{
+	volatile char *slsb;
+	struct qdio_irq *irq;
+
+	irq = (struct qdio_irq*)q->irq_ptr;
+	if (!irq->is_qebsm) {
+		slsb = (char *)&q->slsb.acc.val[(*bufno)];
+		xchg(slsb, state);
+		return 1;
+	}
+	return qdio_do_sqbs(q, state, bufno, count);
+}
+
+#ifdef CONFIG_QDIO_DEBUG
+static inline void
+qdio_trace_slsb(struct qdio_q *q)
+{
+	if (q->queue_type==QDIO_TRACE_QTYPE) {
+		if (q->is_input_q)
+			QDIO_DBF_HEX2(0,slsb_in,&q->slsb,
+				      QDIO_MAX_BUFFERS_PER_Q);
+		else
+			QDIO_DBF_HEX2(0,slsb_out,&q->slsb,
+				      QDIO_MAX_BUFFERS_PER_Q);
+	}
+}
+#endif
+
+static inline int
+set_slsb(struct qdio_q *q, unsigned int *bufno,
+	 unsigned char state, unsigned int *count)
+{
+	int rc;
+#ifdef CONFIG_QDIO_DEBUG
+	qdio_trace_slsb(q);
+#endif
+	rc = qdio_set_slsb(q, bufno, state, count);
+#ifdef CONFIG_QDIO_DEBUG
+	qdio_trace_slsb(q);
+#endif
+	return rc;
+}
 static inline int 
 qdio_siga_sync(struct qdio_q *q, unsigned int gpr2,
 	       unsigned int gpr3)
@@ -155,7 +270,7 @@ qdio_siga_sync(struct qdio_q *q, unsigned int gpr2,
 	perf_stats.siga_syncs++;
 #endif /* QDIO_PERFORMANCE_STATS */
 
-	cc = do_siga_sync(q->irq, gpr2, gpr3);
+	cc = do_siga_sync(q->schid, gpr2, gpr3);
 	if (cc)
 		QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*));
 
@@ -170,6 +285,23 @@ qdio_siga_sync_q(struct qdio_q *q)
 	return qdio_siga_sync(q, q->mask, 0);
 }
 
+static int
+__do_siga_output(struct qdio_q *q, unsigned int *busy_bit)
+{
+       struct qdio_irq *irq;
+       unsigned int fc = 0;
+       unsigned long schid;
+
+       irq = (struct qdio_irq *) q->irq_ptr;
+       if (!irq->is_qebsm)
+	       schid = *((u32 *)&q->schid);
+       else {
+	       schid = irq->sch_token;
+	       fc |= 0x80;
+       }
+       return do_siga_output(schid, q->mask, busy_bit, fc);
+}
+
 /* 
  * returns QDIO_SIGA_ERROR_ACCESS_EXCEPTION as cc, when SIGA returns
  * an access exception 
@@ -189,7 +321,7 @@ qdio_siga_output(struct qdio_q *q)
 	QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
 
 	for (;;) {
-		cc = do_siga_output(q->irq, q->mask, &busy_bit);
+		cc = __do_siga_output(q, &busy_bit);
 //QDIO_PRINT_ERR("cc=%x, busy=%x\n",cc,busy_bit);
 		if ((cc==2) && (busy_bit) && (q->is_iqdio_q)) {
 			if (!start_time) 
@@ -221,7 +353,7 @@ qdio_siga_input(struct qdio_q *q)
 	perf_stats.siga_ins++;
 #endif /* QDIO_PERFORMANCE_STATS */
 
-	cc = do_siga_input(q->irq, q->mask);
+	cc = do_siga_input(q->schid, q->mask);
 	
 	if (cc)
 		QDIO_DBF_HEX3(0,trace,&cc,sizeof(int*));
@@ -230,7 +362,7 @@ qdio_siga_input(struct qdio_q *q)
 }
 
 /* locked by the locks in qdio_activate and qdio_cleanup */
-static __u32 volatile *
+static __u32 *
 qdio_get_indicator(void)
 {
 	int i;
@@ -258,7 +390,7 @@ qdio_put_indicator(__u32 *addr)
 		atomic_dec(&spare_indicator_usecount);
 }
 
-static inline volatile void 
+static inline void
 tiqdio_clear_summary_bit(__u32 *location)
 {
 	QDIO_DBF_TEXT5(0,trace,"clrsummb");
@@ -267,7 +399,7 @@ tiqdio_clear_summary_bit(__u32 *location)
 	xchg(location,0);
 }
 
-static inline volatile void
+static inline  void
 tiqdio_set_summary_bit(__u32 *location)
 {
 	QDIO_DBF_TEXT5(0,trace,"setsummb");
@@ -336,7 +468,9 @@ static inline int
 qdio_stop_polling(struct qdio_q *q)
 {
 #ifdef QDIO_USE_PROCESSING_STATE
-	int gsf;
+       unsigned int tmp, gsf, count = 1;
+       unsigned char state = 0;
+       struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
 
 	if (!atomic_swap(&q->polling,0)) 
 		return 1;
@@ -348,17 +482,22 @@ qdio_stop_polling(struct qdio_q *q)
 	if (!q->is_input_q)
 		return 1;
 
-	gsf=GET_SAVED_FRONTIER(q);
-	set_slsb(&q->slsb.acc.val[(gsf+QDIO_MAX_BUFFERS_PER_Q-1)&
-				  (QDIO_MAX_BUFFERS_PER_Q-1)],
-		 SLSB_P_INPUT_NOT_INIT);
+       tmp = gsf = GET_SAVED_FRONTIER(q);
+       tmp = ((tmp + QDIO_MAX_BUFFERS_PER_Q-1) & (QDIO_MAX_BUFFERS_PER_Q-1) );
+       set_slsb(q, &tmp, SLSB_P_INPUT_NOT_INIT, &count);
+
 	/* 
 	 * we don't issue this SYNC_MEMORY, as we trust Rick T and
 	 * moreover will not use the PROCESSING state under VM, so
 	 * q->polling was 0 anyway
 	 */
 	/*SYNC_MEMORY;*/
-	if (q->slsb.acc.val[gsf]!=SLSB_P_INPUT_PRIMED)
+       if (irq->is_qebsm) {
+               count = 1;
+               qdio_do_eqbs(q, &state, &gsf, &count);
+       } else
+               state = q->slsb.acc.val[gsf];
+       if (state != SLSB_P_INPUT_PRIMED)
 		return 1;
 	/* 
 	 * set our summary bit again, as otherwise there is a
@@ -431,18 +570,136 @@ tiqdio_clear_global_summary(void)
 
 
 /************************* OUTBOUND ROUTINES *******************************/
+static int
+qdio_qebsm_get_outbound_buffer_frontier(struct qdio_q *q)
+{
+        struct qdio_irq *irq;
+        unsigned char state;
+        unsigned int cnt, count, ftc;
+
+        irq = (struct qdio_irq *) q->irq_ptr;
+        if ((!q->is_iqdio_q) && (!q->hydra_gives_outbound_pcis))
+                SYNC_MEMORY;
+
+        ftc = q->first_to_check;
+        count = qdio_min(atomic_read(&q->number_of_buffers_used),
+                        (QDIO_MAX_BUFFERS_PER_Q-1));
+        if (count == 0)
+                return q->first_to_check;
+        cnt = qdio_do_eqbs(q, &state, &ftc, &count);
+        if (cnt == 0)
+                return q->first_to_check;
+        switch (state) {
+        case SLSB_P_OUTPUT_ERROR:
+                QDIO_DBF_TEXT3(0,trace,"outperr");
+                atomic_sub(cnt , &q->number_of_buffers_used);
+                if (q->qdio_error)
+                        q->error_status_flags |=
+                                QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR;
+                q->qdio_error = SLSB_P_OUTPUT_ERROR;
+                q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR;
+                q->first_to_check = ftc;
+                break;
+        case SLSB_P_OUTPUT_EMPTY:
+                QDIO_DBF_TEXT5(0,trace,"outpempt");
+                atomic_sub(cnt, &q->number_of_buffers_used);
+                q->first_to_check = ftc;
+                break;
+        case SLSB_CU_OUTPUT_PRIMED:
+                /* all buffers primed */
+                QDIO_DBF_TEXT5(0,trace,"outpprim");
+                break;
+        default:
+                break;
+        }
+        QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int));
+        return q->first_to_check;
+}
+
+static int
+qdio_qebsm_get_inbound_buffer_frontier(struct qdio_q *q)
+{
+        struct qdio_irq *irq;
+        unsigned char state;
+        int tmp, ftc, count, cnt;
+        char dbf_text[15];
+
+
+        irq = (struct qdio_irq *) q->irq_ptr;
+        ftc = q->first_to_check;
+        count = qdio_min(atomic_read(&q->number_of_buffers_used),
+                        (QDIO_MAX_BUFFERS_PER_Q-1));
+        if (count == 0)
+                 return q->first_to_check;
+        cnt = qdio_do_eqbs(q, &state, &ftc, &count);
+        if (cnt == 0)
+                 return q->first_to_check;
+        switch (state) {
+        case SLSB_P_INPUT_ERROR :
+#ifdef CONFIG_QDIO_DEBUG
+                QDIO_DBF_TEXT3(1,trace,"inperr");
+                sprintf(dbf_text,"%2x,%2x",ftc,count);
+                QDIO_DBF_TEXT3(1,trace,dbf_text);
+#endif /* CONFIG_QDIO_DEBUG */
+                if (q->qdio_error)
+                        q->error_status_flags |=
+                                QDIO_STATUS_MORE_THAN_ONE_QDIO_ERROR;
+                q->qdio_error = SLSB_P_INPUT_ERROR;
+                q->error_status_flags |= QDIO_STATUS_LOOK_FOR_ERROR;
+                atomic_sub(cnt, &q->number_of_buffers_used);
+                q->first_to_check = ftc;
+                break;
+        case SLSB_P_INPUT_PRIMED :
+                QDIO_DBF_TEXT3(0,trace,"inptprim");
+                sprintf(dbf_text,"%2x,%2x",ftc,count);
+                QDIO_DBF_TEXT3(1,trace,dbf_text);
+                tmp = 0;
+                ftc = q->first_to_check;
+#ifdef QDIO_USE_PROCESSING_STATE
+		if (cnt > 1) {
+			cnt -= 1;
+			tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt);
+			if (!tmp)
+				break;
+		}
+		cnt = 1;
+		tmp += set_slsb(q, &ftc,
+			       SLSB_P_INPUT_PROCESSING, &cnt);
+		atomic_set(&q->polling, 1);
+#else
+                tmp = set_slsb(q, &ftc, SLSB_P_INPUT_NOT_INIT, &cnt);
+#endif
+                atomic_sub(tmp, &q->number_of_buffers_used);
+                q->first_to_check = ftc;
+                break;
+        case SLSB_CU_INPUT_EMPTY:
+        case SLSB_P_INPUT_NOT_INIT:
+        case SLSB_P_INPUT_PROCESSING:
+                QDIO_DBF_TEXT5(0,trace,"inpnipro");
+                break;
+        default:
+                break;
+        }
+        QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int));
+        return q->first_to_check;
+}
 
 static inline int
 qdio_get_outbound_buffer_frontier(struct qdio_q *q)
 {
-	int f,f_mod_no;
-	volatile char *slsb;
-	int first_not_to_check;
+	struct qdio_irq *irq;
+        volatile char *slsb;
+        unsigned int count = 1;
+        int first_not_to_check, f, f_mod_no;
 	char dbf_text[15];
 
 	QDIO_DBF_TEXT4(0,trace,"getobfro");
 	QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
 
+	irq = (struct qdio_irq *) q->irq_ptr;
+	if (irq->is_qebsm)
+		return qdio_qebsm_get_outbound_buffer_frontier(q);
+
 	slsb=&q->slsb.acc.val[0];
 	f_mod_no=f=q->first_to_check;
 	/* 
@@ -484,7 +741,7 @@ check_next:
 		QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256);
 
 		/* kind of process the buffer */
-		set_slsb(&q->slsb.acc.val[f_mod_no], SLSB_P_OUTPUT_NOT_INIT);
+		set_slsb(q, &f_mod_no, SLSB_P_OUTPUT_NOT_INIT, &count);
 
 		/* 
 		 * we increment the frontier, as this buffer
@@ -597,48 +854,48 @@ qdio_kick_outbound_q(struct qdio_q *q)
 
 	result=qdio_siga_output(q);
 
-		switch (result) {
-		case 0:
-			/* went smooth this time, reset timestamp */
+	switch (result) {
+	case 0:
+		/* went smooth this time, reset timestamp */
 #ifdef CONFIG_QDIO_DEBUG
-			QDIO_DBF_TEXT3(0,trace,"cc2reslv");
-			sprintf(dbf_text,"%4x%2x%2x",q->irq,q->q_no,
-				atomic_read(&q->busy_siga_counter));
-			QDIO_DBF_TEXT3(0,trace,dbf_text);
+		QDIO_DBF_TEXT3(0,trace,"cc2reslv");
+		sprintf(dbf_text,"%4x%2x%2x",q->schid.sch_no,q->q_no,
+			atomic_read(&q->busy_siga_counter));
+		QDIO_DBF_TEXT3(0,trace,dbf_text);
 #endif /* CONFIG_QDIO_DEBUG */
-			q->timing.busy_start=0;
+		q->timing.busy_start=0;
+		break;
+	case (2|QDIO_SIGA_ERROR_B_BIT_SET):
+		/* cc=2 and busy bit: */
+		atomic_inc(&q->busy_siga_counter);
+
+		/* if the last siga was successful, save
+		 * timestamp here */
+		if (!q->timing.busy_start)
+			q->timing.busy_start=NOW;
+
+		/* if we're in time, don't touch error_status_flags
+		 * and siga_error */
+		if (NOW-q->timing.busy_start<QDIO_BUSY_BIT_GIVE_UP) {
+			qdio_mark_q(q);
 			break;
-		case (2|QDIO_SIGA_ERROR_B_BIT_SET):
-			/* cc=2 and busy bit: */
-			atomic_inc(&q->busy_siga_counter);
-
-			/* if the last siga was successful, save
-			 * timestamp here */
-			if (!q->timing.busy_start)
-				q->timing.busy_start=NOW;
-
-			/* if we're in time, don't touch error_status_flags
-			 * and siga_error */
-			if (NOW-q->timing.busy_start<QDIO_BUSY_BIT_GIVE_UP) {
-				qdio_mark_q(q);
-				break;
-			}
-			QDIO_DBF_TEXT2(0,trace,"cc2REPRT");
+		}
+		QDIO_DBF_TEXT2(0,trace,"cc2REPRT");
 #ifdef CONFIG_QDIO_DEBUG
-			sprintf(dbf_text,"%4x%2x%2x",q->irq,q->q_no,
-				atomic_read(&q->busy_siga_counter));
-			QDIO_DBF_TEXT3(0,trace,dbf_text);
+		sprintf(dbf_text,"%4x%2x%2x",q->schid.sch_no,q->q_no,
+			atomic_read(&q->busy_siga_counter));
+		QDIO_DBF_TEXT3(0,trace,dbf_text);
 #endif /* CONFIG_QDIO_DEBUG */
-			/* else fallthrough and report error */
-		default:
-			/* for plain cc=1, 2 or 3: */
-			if (q->siga_error)
-				q->error_status_flags|=
-					QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR;
+		/* else fallthrough and report error */
+	default:
+		/* for plain cc=1, 2 or 3: */
+		if (q->siga_error)
 			q->error_status_flags|=
-				QDIO_STATUS_LOOK_FOR_ERROR;
-			q->siga_error=result;
-		}
+				QDIO_STATUS_MORE_THAN_ONE_SIGA_ERROR;
+		q->error_status_flags|=
+			QDIO_STATUS_LOOK_FOR_ERROR;
+		q->siga_error=result;
+	}
 }
 
 static inline void
@@ -743,8 +1000,10 @@ qdio_outbound_processing(struct qdio_q *q)
 static inline int
 qdio_get_inbound_buffer_frontier(struct qdio_q *q)
 {
+	struct qdio_irq *irq;
 	int f,f_mod_no;
 	volatile char *slsb;
+	unsigned int count = 1;
 	int first_not_to_check;
 #ifdef CONFIG_QDIO_DEBUG
 	char dbf_text[15];
@@ -756,6 +1015,10 @@ qdio_get_inbound_buffer_frontier(struct qdio_q *q)
 	QDIO_DBF_TEXT4(0,trace,"getibfro");
 	QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
 
+	irq = (struct qdio_irq *) q->irq_ptr;
+	if (irq->is_qebsm)
+		return qdio_qebsm_get_inbound_buffer_frontier(q);
+
 	slsb=&q->slsb.acc.val[0];
 	f_mod_no=f=q->first_to_check;
 	/* 
@@ -792,19 +1055,19 @@ check_next:
 		 * kill VM in terms of CP overhead 
 		 */
 		if (q->siga_sync) {
-			set_slsb(&slsb[f_mod_no],SLSB_P_INPUT_NOT_INIT);
+			set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count);
 		} else {
 			/* set the previous buffer to NOT_INIT. The current
 			 * buffer will be set to PROCESSING at the end of
 			 * this function to avoid further interrupts. */
 			if (last_position>=0)
-				set_slsb(&slsb[last_position],
-					 SLSB_P_INPUT_NOT_INIT);
+				set_slsb(q, &last_position,
+					 SLSB_P_INPUT_NOT_INIT, &count);
 			atomic_set(&q->polling,1);
 			last_position=f_mod_no;
 		}
 #else /* QDIO_USE_PROCESSING_STATE */
-		set_slsb(&slsb[f_mod_no],SLSB_P_INPUT_NOT_INIT);
+		set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count);
 #endif /* QDIO_USE_PROCESSING_STATE */
 		/* 
 		 * not needed, as the inbound queue will be synced on the next
@@ -829,7 +1092,7 @@ check_next:
 		QDIO_DBF_HEX2(1,sbal,q->sbal[f_mod_no],256);
 
 		/* kind of process the buffer */
-		set_slsb(&slsb[f_mod_no],SLSB_P_INPUT_NOT_INIT);
+		set_slsb(q, &f_mod_no, SLSB_P_INPUT_NOT_INIT, &count);
 
 		if (q->qdio_error)
 			q->error_status_flags|=
@@ -857,7 +1120,7 @@ out:
 
 #ifdef QDIO_USE_PROCESSING_STATE
 	if (last_position>=0)
-		set_slsb(&slsb[last_position],SLSB_P_INPUT_PROCESSING);
+		set_slsb(q, &last_position, SLSB_P_INPUT_NOT_INIT, &count);
 #endif /* QDIO_USE_PROCESSING_STATE */
 
 	QDIO_DBF_HEX4(0,trace,&q->first_to_check,sizeof(int));
@@ -902,6 +1165,10 @@ static inline int
 tiqdio_is_inbound_q_done(struct qdio_q *q)
 {
 	int no_used;
+	unsigned int start_buf, count;
+	unsigned char state = 0;
+	struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
+
 #ifdef CONFIG_QDIO_DEBUG
 	char dbf_text[15];
 #endif
@@ -927,8 +1194,13 @@ tiqdio_is_inbound_q_done(struct qdio_q *q)
 	if (!q->siga_sync)
 		/* we'll check for more primed buffers in qeth_stop_polling */
 		return 0;
-
-	if (q->slsb.acc.val[q->first_to_check]!=SLSB_P_INPUT_PRIMED)
+	if (irq->is_qebsm) {
+		count = 1;
+		start_buf = q->first_to_check;
+		qdio_do_eqbs(q, &state, &start_buf, &count);
+	} else
+		state = q->slsb.acc.val[q->first_to_check];
+	if (state != SLSB_P_INPUT_PRIMED)
 		/* 
 		 * nothing more to do, if next buffer is not PRIMED.
 		 * note that we did a SYNC_MEMORY before, that there
@@ -955,6 +1227,10 @@ static inline int
 qdio_is_inbound_q_done(struct qdio_q *q)
 {
 	int no_used;
+	unsigned int start_buf, count;
+	unsigned char state = 0;
+	struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
+
 #ifdef CONFIG_QDIO_DEBUG
 	char dbf_text[15];
 #endif
@@ -973,8 +1249,13 @@ qdio_is_inbound_q_done(struct qdio_q *q)
 		QDIO_DBF_TEXT4(0,trace,dbf_text);
 		return 1;
 	}
-
-	if (q->slsb.acc.val[q->first_to_check]==SLSB_P_INPUT_PRIMED) {
+	if (irq->is_qebsm) {
+		count = 1;
+		start_buf = q->first_to_check;
+		qdio_do_eqbs(q, &state, &start_buf, &count);
+	} else
+		state = q->slsb.acc.val[q->first_to_check];
+	if (state == SLSB_P_INPUT_PRIMED) {
 		/* we got something to do */
 		QDIO_DBF_TEXT4(0,trace,"inqisntA");
 		QDIO_DBF_HEX4(0,trace,&q,sizeof(void*));
@@ -1456,7 +1737,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev,
 	void *ptr;
 	int available;
 
-	sprintf(dbf_text,"qfqs%4x",cdev->private->irq);
+	sprintf(dbf_text,"qfqs%4x",cdev->private->sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	for (i=0;i<no_input_qs;i++) {
 		q=irq_ptr->input_qs[i];
@@ -1476,7 +1757,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev,
 
                 q->queue_type=q_format;
 		q->int_parm=int_parm;
-		q->irq=irq_ptr->irq;
+		q->schid = irq_ptr->schid;
 		q->irq_ptr = irq_ptr;
 		q->cdev = cdev;
 		q->mask=1<<(31-i);
@@ -1523,11 +1804,11 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev,
 		QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*));
 
 		/* fill in slsb */
-		for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++) {
-			set_slsb(&q->slsb.acc.val[j],
-		   		 SLSB_P_INPUT_NOT_INIT);
-/*			q->sbal[j]->element[1].sbalf.i1.key=QDIO_STORAGE_KEY;*/
-		}
+		if (!irq_ptr->is_qebsm) {
+                        unsigned int count = 1;
+                        for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
+                                set_slsb(q, &j, SLSB_P_INPUT_NOT_INIT, &count);
+                }
 	}
 
 	for (i=0;i<no_output_qs;i++) {
@@ -1549,7 +1830,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev,
                 q->queue_type=q_format;
 		q->int_parm=int_parm;
 		q->is_input_q=0;
-		q->irq=irq_ptr->irq;
+		q->schid = irq_ptr->schid;
 		q->cdev = cdev;
 		q->irq_ptr = irq_ptr;
 		q->mask=1<<(31-i);
@@ -1584,11 +1865,11 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev,
 		QDIO_DBF_HEX2(0,setup,&ptr,sizeof(void*));
 
 		/* fill in slsb */
-		for (j=0;j<QDIO_MAX_BUFFERS_PER_Q;j++) {
-			set_slsb(&q->slsb.acc.val[j],
-		   		 SLSB_P_OUTPUT_NOT_INIT);
-/*			q->sbal[j]->element[1].sbalf.i1.key=QDIO_STORAGE_KEY;*/
-		}
+                if (!irq_ptr->is_qebsm) {
+                        unsigned int count = 1;
+                        for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++)
+                                set_slsb(q, &j, SLSB_P_OUTPUT_NOT_INIT, &count);
+                }
 	}
 }
 
@@ -1656,7 +1937,7 @@ qdio_set_state(struct qdio_irq *irq_ptr, enum qdio_irq_states state)
 	char dbf_text[15];
 
 	QDIO_DBF_TEXT5(0,trace,"newstate");
-	sprintf(dbf_text,"%4x%4x",irq_ptr->irq,state);
+	sprintf(dbf_text,"%4x%4x",irq_ptr->schid.sch_no,state);
 	QDIO_DBF_TEXT5(0,trace,dbf_text);
 #endif /* CONFIG_QDIO_DEBUG */
 
@@ -1669,12 +1950,12 @@ qdio_set_state(struct qdio_irq *irq_ptr, enum qdio_irq_states state)
 }
 
 static inline void
-qdio_irq_check_sense(int irq, struct irb *irb)
+qdio_irq_check_sense(struct subchannel_id schid, struct irb *irb)
 {
 	char dbf_text[15];
 
 	if (irb->esw.esw0.erw.cons) {
-		sprintf(dbf_text,"sens%4x",irq);
+		sprintf(dbf_text,"sens%4x",schid.sch_no);
 		QDIO_DBF_TEXT2(1,trace,dbf_text);
 		QDIO_DBF_HEX0(0,sense,irb,QDIO_DBF_SENSE_LEN);
 
@@ -1785,21 +2066,22 @@ qdio_timeout_handler(struct ccw_device *cdev)
 
 	switch (irq_ptr->state) {
 	case QDIO_IRQ_STATE_INACTIVE:
-		QDIO_PRINT_ERR("establish queues on irq %04x: timed out\n",
-			       irq_ptr->irq);
+		QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: timed out\n",
+			       irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 		QDIO_DBF_TEXT2(1,setup,"eq:timeo");
 		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
 		break;
 	case QDIO_IRQ_STATE_CLEANUP:
-		QDIO_PRINT_INFO("Did not get interrupt on cleanup, irq=0x%x.\n",
-				irq_ptr->irq);
+		QDIO_PRINT_INFO("Did not get interrupt on cleanup, "
+				"irq=0.%x.%x.\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
 		break;
 	case QDIO_IRQ_STATE_ESTABLISHED:
 	case QDIO_IRQ_STATE_ACTIVE:
 		/* I/O has been terminated by common I/O layer. */
-		QDIO_PRINT_INFO("Queues on irq %04x killed by cio.\n",
-				irq_ptr->irq);
+		QDIO_PRINT_INFO("Queues on irq 0.%x.%04x killed by cio.\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 		QDIO_DBF_TEXT2(1, trace, "cio:term");
 		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
 		if (get_device(&cdev->dev)) {
@@ -1862,7 +2144,7 @@ qdio_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb)
 		}
 	}
 
-	qdio_irq_check_sense(irq_ptr->irq, irb);
+	qdio_irq_check_sense(irq_ptr->schid, irb);
 
 #ifdef CONFIG_QDIO_DEBUG
 	sprintf(dbf_text, "state:%d", irq_ptr->state);
@@ -1905,7 +2187,7 @@ int
 qdio_synchronize(struct ccw_device *cdev, unsigned int flags,
 		 unsigned int queue_number)
 {
-	int cc;
+	int cc = 0;
 	struct qdio_q *q;
 	struct qdio_irq *irq_ptr;
 	void *ptr;
@@ -1918,7 +2200,7 @@ qdio_synchronize(struct ccw_device *cdev, unsigned int flags,
 		return -ENODEV;
 
 #ifdef CONFIG_QDIO_DEBUG
-	*((int*)(&dbf_text[4])) = irq_ptr->irq;
+	*((int*)(&dbf_text[4])) = irq_ptr->schid.sch_no;
 	QDIO_DBF_HEX4(0,trace,dbf_text,QDIO_DBF_TRACE_LEN);
 	*((int*)(&dbf_text[0]))=flags;
 	*((int*)(&dbf_text[4]))=queue_number;
@@ -1929,12 +2211,14 @@ qdio_synchronize(struct ccw_device *cdev, unsigned int flags,
 		q=irq_ptr->input_qs[queue_number];
 		if (!q)
 			return -EINVAL;
-		cc = do_siga_sync(q->irq, 0, q->mask);
+		if (!(irq_ptr->is_qebsm))
+			cc = do_siga_sync(q->schid, 0, q->mask);
 	} else if (flags&QDIO_FLAG_SYNC_OUTPUT) {
 		q=irq_ptr->output_qs[queue_number];
 		if (!q)
 			return -EINVAL;
-		cc = do_siga_sync(q->irq, q->mask, 0);
+		if (!(irq_ptr->is_qebsm))
+			cc = do_siga_sync(q->schid, q->mask, 0);
 	} else 
 		return -EINVAL;
 
@@ -1945,15 +2229,54 @@ qdio_synchronize(struct ccw_device *cdev, unsigned int flags,
 	return cc;
 }
 
-static unsigned char
-qdio_check_siga_needs(int sch)
+static inline void
+qdio_check_subchannel_qebsm(struct qdio_irq *irq_ptr, unsigned char qdioac,
+			    unsigned long token)
+{
+	struct qdio_q *q;
+	int i;
+	unsigned int count, start_buf;
+	char dbf_text[15];
+
+	/*check if QEBSM is disabled */
+	if (!(irq_ptr->is_qebsm) || !(qdioac & 0x01)) {
+		irq_ptr->is_qebsm  = 0;
+		irq_ptr->sch_token = 0;
+		irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM;
+		QDIO_DBF_TEXT0(0,setup,"noV=V");
+		return;
+	}
+	irq_ptr->sch_token = token;
+	/*input queue*/
+	for (i = 0; i < irq_ptr->no_input_qs;i++) {
+		q = irq_ptr->input_qs[i];
+		count = QDIO_MAX_BUFFERS_PER_Q;
+		start_buf = 0;
+		set_slsb(q, &start_buf, SLSB_P_INPUT_NOT_INIT, &count);
+	}
+	sprintf(dbf_text,"V=V:%2x",irq_ptr->is_qebsm);
+	QDIO_DBF_TEXT0(0,setup,dbf_text);
+	sprintf(dbf_text,"%8lx",irq_ptr->sch_token);
+	QDIO_DBF_TEXT0(0,setup,dbf_text);
+	/*output queue*/
+	for (i = 0; i < irq_ptr->no_output_qs; i++) {
+		q = irq_ptr->output_qs[i];
+		count = QDIO_MAX_BUFFERS_PER_Q;
+		start_buf = 0;
+		set_slsb(q, &start_buf, SLSB_P_OUTPUT_NOT_INIT, &count);
+	}
+}
+
+static void
+qdio_get_ssqd_information(struct qdio_irq *irq_ptr)
 {
 	int result;
 	unsigned char qdioac;
-
 	struct {
 		struct chsc_header request;
-		u16 reserved1;
+		u16 reserved1:10;
+		u16 ssid:2;
+		u16 fmt:4;
 		u16 first_sch;
 		u16 reserved2;
 		u16 last_sch;
@@ -1964,67 +2287,83 @@ qdio_check_siga_needs(int sch)
 		u8  reserved5;
 		u16 sch;
 		u8  qfmt;
-		u8  reserved6;
-		u8  qdioac;
+		u8  parm;
+		u8  qdioac1;
 		u8  sch_class;
 		u8  reserved7;
 		u8  icnt;
 		u8  reserved8;
 		u8  ocnt;
+		u8 reserved9;
+		u8 mbccnt;
+		u16 qdioac2;
+		u64 sch_token;
 	} *ssqd_area;
 
+	QDIO_DBF_TEXT0(0,setup,"getssqd");
+	qdioac = 0;
 	ssqd_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
 	if (!ssqd_area) {
 	        QDIO_PRINT_WARN("Could not get memory for chsc. Using all " \
-				"SIGAs for sch x%x.\n", sch);
-		return CHSC_FLAG_SIGA_INPUT_NECESSARY ||
-			CHSC_FLAG_SIGA_OUTPUT_NECESSARY ||
-			CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */
+				"SIGAs for sch x%x.\n", irq_ptr->schid.sch_no);
+		irq_ptr->qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY ||
+				  CHSC_FLAG_SIGA_OUTPUT_NECESSARY ||
+				  CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */
+		irq_ptr->is_qebsm = 0;
+		irq_ptr->sch_token = 0;
+		irq_ptr->qib.rflags &= ~QIB_RFLAGS_ENABLE_QEBSM;
+		return;
 	}
+
 	ssqd_area->request = (struct chsc_header) {
 		.length = 0x0010,
 		.code   = 0x0024,
 	};
-
-	ssqd_area->first_sch = sch;
-	ssqd_area->last_sch = sch;
-
-	result=chsc(ssqd_area);
+	ssqd_area->first_sch = irq_ptr->schid.sch_no;
+	ssqd_area->last_sch = irq_ptr->schid.sch_no;
+	ssqd_area->ssid = irq_ptr->schid.ssid;
+	result = chsc(ssqd_area);
 
 	if (result) {
 		QDIO_PRINT_WARN("CHSC returned cc %i. Using all " \
-				"SIGAs for sch x%x.\n",
-				result,sch);
+				"SIGAs for sch 0.%x.%x.\n", result,
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 		qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY ||
 			CHSC_FLAG_SIGA_OUTPUT_NECESSARY ||
 			CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */
+		irq_ptr->is_qebsm  = 0;
 		goto out;
 	}
 
 	if (ssqd_area->response.code != QDIO_CHSC_RESPONSE_CODE_OK) {
 		QDIO_PRINT_WARN("response upon checking SIGA needs " \
-				"is 0x%x. Using all SIGAs for sch x%x.\n",
-				ssqd_area->response.code, sch);
+				"is 0x%x. Using all SIGAs for sch 0.%x.%x.\n",
+				ssqd_area->response.code,
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 		qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY ||
 			CHSC_FLAG_SIGA_OUTPUT_NECESSARY ||
 			CHSC_FLAG_SIGA_SYNC_NECESSARY; /* all flags set */
+		irq_ptr->is_qebsm  = 0;
 		goto out;
 	}
 	if (!(ssqd_area->flags & CHSC_FLAG_QDIO_CAPABILITY) ||
 	    !(ssqd_area->flags & CHSC_FLAG_VALIDITY) ||
-	    (ssqd_area->sch != sch)) {
-		QDIO_PRINT_WARN("huh? problems checking out sch x%x... " \
-				"using all SIGAs.\n",sch);
+	    (ssqd_area->sch != irq_ptr->schid.sch_no)) {
+		QDIO_PRINT_WARN("huh? problems checking out sch 0.%x.%x... " \
+				"using all SIGAs.\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 		qdioac = CHSC_FLAG_SIGA_INPUT_NECESSARY |
 			CHSC_FLAG_SIGA_OUTPUT_NECESSARY |
 			CHSC_FLAG_SIGA_SYNC_NECESSARY; /* worst case */
+		irq_ptr->is_qebsm  = 0;
 		goto out;
 	}
-
-	qdioac = ssqd_area->qdioac;
+	qdioac = ssqd_area->qdioac1;
 out:
+	qdio_check_subchannel_qebsm(irq_ptr, qdioac,
+				    ssqd_area->sch_token);
 	free_page ((unsigned long) ssqd_area);
-	return qdioac;
+	irq_ptr->qdioac = qdioac;
 }
 
 static unsigned int
@@ -2055,6 +2394,13 @@ tiqdio_check_chsc_availability(void)
 	sprintf(dbf_text,"hydrati%1x", hydra_thinints);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 
+#ifdef CONFIG_64BIT
+	/* Check for QEBSM support in general (bit 58). */
+	is_passthrough = css_general_characteristics.qebsm;
+#endif
+	sprintf(dbf_text,"cssQBS:%1x", is_passthrough);
+	QDIO_DBF_TEXT0(0,setup,dbf_text);
+
 	/* Check for aif time delay disablement fac (bit 56). If installed,
 	 * omit svs even under lpar (good point by rick again) */
 	omit_svs = css_general_characteristics.aif_tdd;
@@ -2091,7 +2437,7 @@ tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero)
 		/* set to 0x10000000 to enable
 		 * time delay disablement facility */
 		u32 reserved5;
-		u32 subsystem_id;
+		struct subchannel_id schid;
 		u32 reserved6[1004];
 		struct chsc_header response;
 		u32 reserved7;
@@ -2113,7 +2459,8 @@ tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero)
 	scssc_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
 	if (!scssc_area) {
 		QDIO_PRINT_WARN("No memory for setting indicators on " \
-				"subchannel x%x.\n", irq_ptr->irq);
+				"subchannel 0.%x.%x.\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 		return -ENOMEM;
 	}
 	scssc_area->request = (struct chsc_header) {
@@ -2127,7 +2474,7 @@ tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero)
 	scssc_area->ks = QDIO_STORAGE_KEY;
 	scssc_area->kc = QDIO_STORAGE_KEY;
 	scssc_area->isc = TIQDIO_THININT_ISC;
-	scssc_area->subsystem_id = (1<<16) + irq_ptr->irq;
+	scssc_area->schid = irq_ptr->schid;
 	/* enables the time delay disablement facility. Don't care
 	 * whether it is really there (i.e. we haven't checked for
 	 * it) */
@@ -2137,12 +2484,11 @@ tiqdio_set_subchannel_ind(struct qdio_irq *irq_ptr, int reset_to_zero)
 		QDIO_PRINT_WARN("Time delay disablement facility " \
 				"not available\n");
 
-
-
 	result = chsc(scssc_area);
 	if (result) {
-		QDIO_PRINT_WARN("could not set indicators on irq x%x, " \
-				"cc=%i.\n",irq_ptr->irq,result);
+		QDIO_PRINT_WARN("could not set indicators on irq 0.%x.%x, " \
+				"cc=%i.\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no,result);
 		result = -EIO;
 		goto out;
 	}
@@ -2198,7 +2544,8 @@ tiqdio_set_delay_target(struct qdio_irq *irq_ptr, unsigned long delay_target)
 	scsscf_area = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
 	if (!scsscf_area) {
 		QDIO_PRINT_WARN("No memory for setting delay target on " \
-				"subchannel x%x.\n", irq_ptr->irq);
+				"subchannel 0.%x.%x.\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 		return -ENOMEM;
 	}
 	scsscf_area->request = (struct chsc_header) {
@@ -2210,8 +2557,10 @@ tiqdio_set_delay_target(struct qdio_irq *irq_ptr, unsigned long delay_target)
 
 	result=chsc(scsscf_area);
 	if (result) {
-		QDIO_PRINT_WARN("could not set delay target on irq x%x, " \
-				"cc=%i. Continuing.\n",irq_ptr->irq,result);
+		QDIO_PRINT_WARN("could not set delay target on irq 0.%x.%x, " \
+				"cc=%i. Continuing.\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
+				result);
 		result = -EIO;
 		goto out;
 	}
@@ -2245,7 +2594,7 @@ qdio_cleanup(struct ccw_device *cdev, int how)
 	if (!irq_ptr)
 		return -ENODEV;
 
-	sprintf(dbf_text,"qcln%4x",irq_ptr->irq);
+	sprintf(dbf_text,"qcln%4x",irq_ptr->schid.sch_no);
 	QDIO_DBF_TEXT1(0,trace,dbf_text);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 
@@ -2272,7 +2621,7 @@ qdio_shutdown(struct ccw_device *cdev, int how)
 
 	down(&irq_ptr->setting_up_sema);
 
-	sprintf(dbf_text,"qsqs%4x",irq_ptr->irq);
+	sprintf(dbf_text,"qsqs%4x",irq_ptr->schid.sch_no);
 	QDIO_DBF_TEXT1(0,trace,dbf_text);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 
@@ -2378,7 +2727,7 @@ qdio_free(struct ccw_device *cdev)
 
 	down(&irq_ptr->setting_up_sema);
 
-	sprintf(dbf_text,"qfqs%4x",irq_ptr->irq);
+	sprintf(dbf_text,"qfqs%4x",irq_ptr->schid.sch_no);
 	QDIO_DBF_TEXT1(0,trace,dbf_text);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 
@@ -2526,13 +2875,14 @@ qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat,
 	irq_ptr = cdev->private->qdio_data;
 
 	if (cstat || (dstat & ~(DEV_STAT_CHN_END|DEV_STAT_DEV_END))) {
-		sprintf(dbf_text,"ick1%4x",irq_ptr->irq);
+		sprintf(dbf_text,"ick1%4x",irq_ptr->schid.sch_no);
 		QDIO_DBF_TEXT2(1,trace,dbf_text);
 		QDIO_DBF_HEX2(0,trace,&dstat,sizeof(int));
 		QDIO_DBF_HEX2(0,trace,&cstat,sizeof(int));
 		QDIO_PRINT_ERR("received check condition on establish " \
-			       "queues on irq 0x%x (cs=x%x, ds=x%x).\n",
-			       irq_ptr->irq,cstat,dstat);
+			       "queues on irq 0.%x.%x (cs=x%x, ds=x%x).\n",
+			       irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
+			       cstat,dstat);
 		qdio_set_state(irq_ptr,QDIO_IRQ_STATE_ERR);
 	}
 	
@@ -2540,9 +2890,10 @@ qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat,
 		QDIO_DBF_TEXT2(1,setup,"eq:no de");
 		QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat));
 		QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat));
-		QDIO_PRINT_ERR("establish queues on irq %04x: didn't get "
+		QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: didn't get "
 			       "device end: dstat=%02x, cstat=%02x\n",
-			       irq_ptr->irq, dstat, cstat);
+			       irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
+			       dstat, cstat);
 		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
 		return 1;
 	}
@@ -2551,10 +2902,10 @@ qdio_establish_irq_check_for_errors(struct ccw_device *cdev, int cstat,
 		QDIO_DBF_TEXT2(1,setup,"eq:badio");
 		QDIO_DBF_HEX2(0,setup,&dstat, sizeof(dstat));
 		QDIO_DBF_HEX2(0,setup,&cstat, sizeof(cstat));
-		QDIO_PRINT_ERR("establish queues on irq %04x: got "
+		QDIO_PRINT_ERR("establish queues on irq 0.%x.%04x: got "
 			       "the following devstat: dstat=%02x, "
-			       "cstat=%02x\n",
-			       irq_ptr->irq, dstat, cstat);
+			       "cstat=%02x\n", irq_ptr->schid.ssid,
+			       irq_ptr->schid.sch_no, dstat, cstat);
 		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
 		return 1;
 	}
@@ -2569,7 +2920,7 @@ qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat)
 
 	irq_ptr = cdev->private->qdio_data;
 
-	sprintf(dbf_text,"qehi%4x",cdev->private->irq);
+	sprintf(dbf_text,"qehi%4x",cdev->private->sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	QDIO_DBF_TEXT0(0,trace,dbf_text);
 
@@ -2588,7 +2939,7 @@ qdio_initialize(struct qdio_initialize *init_data)
 	int rc;
 	char dbf_text[15];
 
-	sprintf(dbf_text,"qini%4x",init_data->cdev->private->irq);
+	sprintf(dbf_text,"qini%4x",init_data->cdev->private->sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	QDIO_DBF_TEXT0(0,trace,dbf_text);
 
@@ -2609,7 +2960,7 @@ qdio_allocate(struct qdio_initialize *init_data)
 	struct qdio_irq *irq_ptr;
 	char dbf_text[15];
 
-	sprintf(dbf_text,"qalc%4x",init_data->cdev->private->irq);
+	sprintf(dbf_text,"qalc%4x",init_data->cdev->private->sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	QDIO_DBF_TEXT0(0,trace,dbf_text);
 	if ( (init_data->no_input_qs>QDIO_MAX_QUEUES_PER_IRQ) ||
@@ -2682,7 +3033,7 @@ int qdio_fill_irq(struct qdio_initialize *init_data)
 
 	irq_ptr->int_parm=init_data->int_parm;
 
-	irq_ptr->irq = init_data->cdev->private->irq;
+	irq_ptr->schid = ccw_device_get_subchannel_id(init_data->cdev);
 	irq_ptr->no_input_qs=init_data->no_input_qs;
 	irq_ptr->no_output_qs=init_data->no_output_qs;
 
@@ -2698,11 +3049,12 @@ int qdio_fill_irq(struct qdio_initialize *init_data)
 	QDIO_DBF_TEXT2(0,setup,dbf_text);
 
 	if (irq_ptr->is_thinint_irq) {
-		irq_ptr->dev_st_chg_ind=qdio_get_indicator();
+		irq_ptr->dev_st_chg_ind = qdio_get_indicator();
 		QDIO_DBF_HEX1(0,setup,&irq_ptr->dev_st_chg_ind,sizeof(void*));
 		if (!irq_ptr->dev_st_chg_ind) {
 			QDIO_PRINT_WARN("no indicator location available " \
-					"for irq 0x%x\n",irq_ptr->irq);
+					"for irq 0.%x.%x\n",
+					irq_ptr->schid.ssid, irq_ptr->schid.sch_no);
 			qdio_release_irq_memory(irq_ptr);
 			return -ENOBUFS;
 		}
@@ -2747,6 +3099,10 @@ int qdio_fill_irq(struct qdio_initialize *init_data)
 	irq_ptr->qdr->qkey=QDIO_STORAGE_KEY;
 
 	/* fill in qib */
+	irq_ptr->is_qebsm = is_passthrough;
+	if (irq_ptr->is_qebsm)
+		irq_ptr->qib.rflags |= QIB_RFLAGS_ENABLE_QEBSM;
+
 	irq_ptr->qib.qfmt=init_data->q_format;
 	if (init_data->no_input_qs)
 		irq_ptr->qib.isliba=(unsigned long)(irq_ptr->input_qs[0]->slib);
@@ -2829,7 +3185,7 @@ qdio_establish(struct qdio_initialize *init_data)
 		tiqdio_set_delay_target(irq_ptr,TIQDIO_DELAY_TARGET);
 	}
 
-	sprintf(dbf_text,"qest%4x",cdev->private->irq);
+	sprintf(dbf_text,"qest%4x",cdev->private->sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	QDIO_DBF_TEXT0(0,trace,dbf_text);
 
@@ -2855,9 +3211,10 @@ qdio_establish(struct qdio_initialize *init_data)
 			sprintf(dbf_text,"eq:io%4x",result);
 			QDIO_DBF_TEXT2(1,setup,dbf_text);
 		}
-		QDIO_PRINT_WARN("establish queues on irq %04x: do_IO " \
-                           "returned %i, next try returned %i\n",
-                           irq_ptr->irq,result,result2);
+		QDIO_PRINT_WARN("establish queues on irq 0.%x.%04x: do_IO " \
+				"returned %i, next try returned %i\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
+				result, result2);
 		result=result2;
 		if (result)
 			ccw_device_set_timeout(cdev, 0);
@@ -2884,7 +3241,7 @@ qdio_establish(struct qdio_initialize *init_data)
 		return -EIO;
 	}
 
-	irq_ptr->qdioac=qdio_check_siga_needs(irq_ptr->irq);
+	qdio_get_ssqd_information(irq_ptr);
 	/* if this gets set once, we're running under VM and can omit SVSes */
 	if (irq_ptr->qdioac&CHSC_FLAG_SIGA_SYNC_NECESSARY)
 		omit_svs=1;
@@ -2930,7 +3287,7 @@ qdio_activate(struct ccw_device *cdev, int flags)
 		goto out;
 	}
 
-	sprintf(dbf_text,"qact%4x", irq_ptr->irq);
+	sprintf(dbf_text,"qact%4x", irq_ptr->schid.sch_no);
 	QDIO_DBF_TEXT2(0,setup,dbf_text);
 	QDIO_DBF_TEXT2(0,trace,dbf_text);
 
@@ -2955,9 +3312,10 @@ qdio_activate(struct ccw_device *cdev, int flags)
 			sprintf(dbf_text,"aq:io%4x",result);
 			QDIO_DBF_TEXT2(1,setup,dbf_text);
 		}
-		QDIO_PRINT_WARN("activate queues on irq %04x: do_IO " \
-                           "returned %i, next try returned %i\n",
-                           irq_ptr->irq,result,result2);
+		QDIO_PRINT_WARN("activate queues on irq 0.%x.%04x: do_IO " \
+				"returned %i, next try returned %i\n",
+				irq_ptr->schid.ssid, irq_ptr->schid.sch_no,
+				result, result2);
 		result=result2;
 	}
 
@@ -3015,30 +3373,40 @@ static inline void
 qdio_do_qdio_fill_input(struct qdio_q *q, unsigned int qidx,
 			unsigned int count, struct qdio_buffer *buffers)
 {
+	struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
+	qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1);
+	if (irq->is_qebsm) {
+		while (count)
+			set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count);
+		return;
+	}
 	for (;;) {
-		set_slsb(&q->slsb.acc.val[qidx],SLSB_CU_INPUT_EMPTY);
+		set_slsb(q, &qidx, SLSB_CU_INPUT_EMPTY, &count);
 		count--;
 		if (!count) break;
-		qidx=(qidx+1)&(QDIO_MAX_BUFFERS_PER_Q-1);
+		qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1);
 	}
-
-	/* not necessary, as the queues are synced during the SIGA read */
-	/*SYNC_MEMORY;*/
 }
 
 static inline void
 qdio_do_qdio_fill_output(struct qdio_q *q, unsigned int qidx,
 			 unsigned int count, struct qdio_buffer *buffers)
 {
+	struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
+
+	qidx &= (QDIO_MAX_BUFFERS_PER_Q - 1);
+	if (irq->is_qebsm) {
+		while (count)
+			set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count);
+		return;
+	}
+
 	for (;;) {
-		set_slsb(&q->slsb.acc.val[qidx],SLSB_CU_OUTPUT_PRIMED);
+		set_slsb(q, &qidx, SLSB_CU_OUTPUT_PRIMED, &count);
 		count--;
 		if (!count) break;
-		qidx=(qidx+1)&(QDIO_MAX_BUFFERS_PER_Q-1);
+		qidx = (qidx + 1) & (QDIO_MAX_BUFFERS_PER_Q - 1);
 	}
-
-	/* SIGA write will sync the queues */
-	/*SYNC_MEMORY;*/
 }
 
 static inline void
@@ -3083,6 +3451,9 @@ do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags,
 			struct qdio_buffer *buffers)
 {
 	int used_elements;
+	unsigned int cnt, start_buf;
+	unsigned char state = 0;
+	struct qdio_irq *irq = (struct qdio_irq *) q->irq_ptr;
 
 	/* This is the outbound handling of queues */
 #ifdef QDIO_PERFORMANCE_STATS
@@ -3115,9 +3486,15 @@ do_qdio_handle_outbound(struct qdio_q *q, unsigned int callflags,
 			 * SYNC_MEMORY :-/ ), we try to
 			 * fast-requeue buffers 
 			 */
-			if (q->slsb.acc.val[(qidx+QDIO_MAX_BUFFERS_PER_Q-1)
-					    &(QDIO_MAX_BUFFERS_PER_Q-1)]!=
-			    SLSB_CU_OUTPUT_PRIMED) {
+			if (irq->is_qebsm) {
+				cnt = 1;
+				start_buf = ((qidx+QDIO_MAX_BUFFERS_PER_Q-1) &
+					     (QDIO_MAX_BUFFERS_PER_Q-1));
+				qdio_do_eqbs(q, &state, &start_buf, &cnt);
+			} else
+				state = q->slsb.acc.val[(qidx+QDIO_MAX_BUFFERS_PER_Q-1)
+					&(QDIO_MAX_BUFFERS_PER_Q-1) ];
+			 if (state != SLSB_CU_OUTPUT_PRIMED) {
 				qdio_kick_outbound_q(q);
 			} else {
 				QDIO_DBF_TEXT3(0,trace, "fast-req");
@@ -3150,7 +3527,7 @@ do_QDIO(struct ccw_device *cdev,unsigned int callflags,
 #ifdef CONFIG_QDIO_DEBUG
 	char dbf_text[20];
 
-	sprintf(dbf_text,"doQD%04x",cdev->private->irq);
+	sprintf(dbf_text,"doQD%04x",cdev->private->sch_no);
  	QDIO_DBF_TEXT3(0,trace,dbf_text);
 #endif /* CONFIG_QDIO_DEBUG */
 
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index 328e31cc685..fa385e761fe 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -3,14 +3,15 @@
 
 #include <asm/page.h>
 
-#define VERSION_CIO_QDIO_H "$Revision: 1.33 $"
+#include "schid.h"
+
+#define VERSION_CIO_QDIO_H "$Revision: 1.40 $"
 
 #ifdef CONFIG_QDIO_DEBUG
 #define QDIO_VERBOSE_LEVEL 9
 #else /* CONFIG_QDIO_DEBUG */
 #define QDIO_VERBOSE_LEVEL 5
 #endif /* CONFIG_QDIO_DEBUG */
-
 #define QDIO_USE_PROCESSING_STATE
 
 #ifdef CONFIG_QDIO_PERF_STATS
@@ -265,12 +266,64 @@ QDIO_PRINT_##importance(header "%02x %02x %02x %02x  %02x %02x %02x %02x  " \
 /*
  * Some instructions as assembly
  */
+
+static inline int
+do_sqbs(unsigned long sch, unsigned char state, int queue,
+       unsigned int *start, unsigned int *count)
+{
+#ifdef CONFIG_64BIT
+       register unsigned long _ccq asm ("0") = *count;
+       register unsigned long _sch asm ("1") = sch;
+       unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
+
+       asm volatile (
+              " .insn rsy,0xeb000000008A,%1,0,0(%2)\n\t"
+              : "+d" (_ccq), "+d" (_queuestart)
+              : "d" ((unsigned long)state), "d" (_sch)
+              : "memory", "cc"
+       );
+       *count = _ccq & 0xff;
+       *start = _queuestart & 0xff;
+
+       return (_ccq >> 32) & 0xff;
+#else
+       return 0;
+#endif
+}
+
+static inline int
+do_eqbs(unsigned long sch, unsigned char *state, int queue,
+	unsigned int *start, unsigned int *count)
+{
+#ifdef CONFIG_64BIT
+	register unsigned long _ccq asm ("0") = *count;
+	register unsigned long _sch asm ("1") = sch;
+	unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
+	unsigned long _state = 0;
+
+	asm volatile (
+	      " .insn rrf,0xB99c0000,%1,%2,0,0  \n\t"
+	      : "+d" (_ccq), "+d" (_queuestart), "+d" (_state)
+	      : "d" (_sch)
+	      : "memory", "cc"
+	);
+	*count = _ccq & 0xff;
+	*start = _queuestart & 0xff;
+	*state = _state & 0xff;
+
+	return (_ccq >> 32) & 0xff;
+#else
+	return 0;
+#endif
+}
+
+
 static inline int
-do_siga_sync(unsigned int irq, unsigned int mask1, unsigned int mask2)
+do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2)
 {
 	int cc;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	asm volatile (
 		"lhi	0,2	\n\t"
 		"lr	1,%1	\n\t"
@@ -280,10 +333,10 @@ do_siga_sync(unsigned int irq, unsigned int mask1, unsigned int mask2)
 		"ipm	%0	\n\t"
 		"srl	%0,28	\n\t"
 		: "=d" (cc)
-		: "d" (0x10000|irq), "d" (mask1), "d" (mask2)
+		: "d" (schid), "d" (mask1), "d" (mask2)
 		: "cc", "0", "1", "2", "3"
 		);
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	asm volatile (
 		"lghi	0,2	\n\t"
 		"llgfr	1,%1	\n\t"
@@ -293,19 +346,19 @@ do_siga_sync(unsigned int irq, unsigned int mask1, unsigned int mask2)
 		"ipm	%0	\n\t"
 		"srl	%0,28	\n\t"
 		: "=d" (cc)
-		: "d" (0x10000|irq), "d" (mask1), "d" (mask2)
+		: "d" (schid), "d" (mask1), "d" (mask2)
 		: "cc", "0", "1", "2", "3"
 		);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	return cc;
 }
 
 static inline int
-do_siga_input(unsigned int irq, unsigned int mask)
+do_siga_input(struct subchannel_id schid, unsigned int mask)
 {
 	int cc;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	asm volatile (
 		"lhi	0,1	\n\t"
 		"lr	1,%1	\n\t"
@@ -314,10 +367,10 @@ do_siga_input(unsigned int irq, unsigned int mask)
 		"ipm	%0	\n\t"
 		"srl	%0,28	\n\t"
 		: "=d" (cc)
-		: "d" (0x10000|irq), "d" (mask)
+		: "d" (schid), "d" (mask)
 		: "cc", "0", "1", "2", "memory"
 		);
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	asm volatile (
 		"lghi	0,1	\n\t"
 		"llgfr	1,%1	\n\t"
@@ -326,21 +379,22 @@ do_siga_input(unsigned int irq, unsigned int mask)
 		"ipm	%0	\n\t"
 		"srl	%0,28	\n\t"
 		: "=d" (cc)
-		: "d" (0x10000|irq), "d" (mask)
+		: "d" (schid), "d" (mask)
 		: "cc", "0", "1", "2", "memory"
 		);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	
 	return cc;
 }
 
 static inline int
-do_siga_output(unsigned long irq, unsigned long mask, __u32 *bb)
+do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb,
+	       unsigned int fc)
 {
 	int cc;
 	__u32 busy_bit;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	asm volatile (
 		"lhi	0,0	\n\t"
 		"lr	1,%2	\n\t"
@@ -366,14 +420,14 @@ do_siga_output(unsigned long irq, unsigned long mask, __u32 *bb)
 		".long	0b,2b	\n\t"
 		".previous	\n\t"
 		: "=d" (cc), "=d" (busy_bit)
-		: "d" (0x10000|irq), "d" (mask),
+		: "d" (schid), "d" (mask),
 		"i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION)
 		: "cc", "0", "1", "2", "memory"
 		);
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	asm volatile (
-		"lghi	0,0	\n\t"
-		"llgfr	1,%2	\n\t"
+        	"llgfr  0,%5    \n\t"
+                "lgr    1,%2    \n\t"
 		"llgfr	2,%3	\n\t"
 		"siga	0	\n\t"
 		"0:"
@@ -391,11 +445,11 @@ do_siga_output(unsigned long irq, unsigned long mask, __u32 *bb)
 		".quad	0b,1b	\n\t"
 		".previous	\n\t"
 		: "=d" (cc), "=d" (busy_bit)
-		: "d" (0x10000|irq), "d" (mask),
-		"i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION)
+		: "d" (schid), "d" (mask),
+		"i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION), "d" (fc)
 		: "cc", "0", "1", "2", "memory"
 		);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	
 	(*bb) = busy_bit;
 	return cc;
@@ -407,21 +461,21 @@ do_clear_global_summary(void)
 
 	unsigned long time;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	asm volatile (
 		"lhi	1,3	\n\t"
 		".insn	rre,0xb2650000,2,0	\n\t"
 		"lr	%0,3	\n\t"
 		: "=d" (time) : : "cc", "1", "2", "3"
 		);
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	asm volatile (
 		"lghi	1,3	\n\t"
 		".insn	rre,0xb2650000,2,0	\n\t"
 		"lgr	%0,3	\n\t"
 		: "=d" (time) : : "cc", "1", "2", "3"
 		);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	
 	return time;
 }
@@ -488,42 +542,21 @@ struct qdio_perf_stats {
 
 #define MY_MODULE_STRING(x) #x
 
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 #define QDIO_GET_ADDR(x) ((__u32)(unsigned long)x)
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 #define QDIO_GET_ADDR(x) ((__u32)(long)x)
-#endif /* CONFIG_ARCH_S390X */
-
-#ifdef CONFIG_QDIO_DEBUG
-#define set_slsb(x,y) \
-  if(q->queue_type==QDIO_TRACE_QTYPE) { \
-        if(q->is_input_q) { \
-            QDIO_DBF_HEX2(0,slsb_in,&q->slsb,QDIO_MAX_BUFFERS_PER_Q); \
-        } else { \
-            QDIO_DBF_HEX2(0,slsb_out,&q->slsb,QDIO_MAX_BUFFERS_PER_Q); \
-        } \
-  } \
-  qdio_set_slsb(x,y); \
-  if(q->queue_type==QDIO_TRACE_QTYPE) { \
-        if(q->is_input_q) { \
-            QDIO_DBF_HEX2(0,slsb_in,&q->slsb,QDIO_MAX_BUFFERS_PER_Q); \
-        } else { \
-            QDIO_DBF_HEX2(0,slsb_out,&q->slsb,QDIO_MAX_BUFFERS_PER_Q); \
-        } \
-  }
-#else /* CONFIG_QDIO_DEBUG */
-#define set_slsb(x,y) qdio_set_slsb(x,y)
-#endif /* CONFIG_QDIO_DEBUG */
+#endif /* CONFIG_64BIT */
 
 struct qdio_q {
 	volatile struct slsb slsb;
 
 	char unused[QDIO_MAX_BUFFERS_PER_Q];
 
-	__u32 * volatile dev_st_chg_ind;
+	__u32 * dev_st_chg_ind;
 
 	int is_input_q;
-	int irq;
+	struct subchannel_id schid;
 	struct ccw_device *cdev;
 
 	unsigned int is_iqdio_q;
@@ -568,6 +601,7 @@ struct qdio_q {
 	struct tasklet_struct tasklet;
 #endif /* QDIO_USE_TIMERS_FOR_POLLING */
 
+
 	enum qdio_irq_states state;
 
 	/* used to store the error condition during a data transfer */
@@ -617,13 +651,17 @@ struct qdio_irq {
 	__u32 * volatile dev_st_chg_ind;
 
 	unsigned long int_parm;
-	int irq;
+	struct subchannel_id schid;
 
 	unsigned int is_iqdio_irq;
 	unsigned int is_thinint_irq;
 	unsigned int hydra_gives_outbound_pcis;
 	unsigned int sync_done_on_outb_pcis;
 
+	/* QEBSM facility */
+	unsigned int is_qebsm;
+	unsigned long sch_token;
+
 	enum qdio_irq_states state;
 
 	unsigned int no_input_qs;
diff --git a/drivers/s390/cio/schid.h b/drivers/s390/cio/schid.h
new file mode 100644
index 00000000000..54328fec5ad
--- /dev/null
+++ b/drivers/s390/cio/schid.h
@@ -0,0 +1,26 @@
+#ifndef S390_SCHID_H
+#define S390_SCHID_H
+
+struct subchannel_id {
+	__u32 reserved:13;
+	__u32 ssid:2;
+	__u32 one:1;
+	__u32 sch_no:16;
+} __attribute__ ((packed,aligned(4)));
+
+
+/* Helper function for sane state of pre-allocated subchannel_id. */
+static inline void
+init_subchannel_id(struct subchannel_id *schid)
+{
+	memset(schid, 0, sizeof(struct subchannel_id));
+	schid->one = 1;
+}
+
+static inline int
+schid_equal(struct subchannel_id *schid1, struct subchannel_id *schid2)
+{
+	return !memcmp(schid1, schid2, sizeof(struct subchannel_id));
+}
+
+#endif /* S390_SCHID_H */
diff --git a/drivers/s390/crypto/z90common.h b/drivers/s390/crypto/z90common.h
index e319e78b5ea..f87c785f203 100644
--- a/drivers/s390/crypto/z90common.h
+++ b/drivers/s390/crypto/z90common.h
@@ -1,9 +1,9 @@
 /*
  *  linux/drivers/s390/crypto/z90common.h
  *
- *  z90crypt 1.3.2
+ *  z90crypt 1.3.3
  *
- *  Copyright (C)  2001, 2004 IBM Corporation
+ *  Copyright (C)  2001, 2005 IBM Corporation
  *  Author(s): Robert Burroughs (burrough@us.ibm.com)
  *             Eric Rossman (edrossma@us.ibm.com)
  *
@@ -91,12 +91,13 @@ enum hdstat {
 #define TSQ_FATAL_ERROR 34
 #define RSQ_FATAL_ERROR 35
 
-#define Z90CRYPT_NUM_TYPES	5
+#define Z90CRYPT_NUM_TYPES	6
 #define PCICA		0
 #define PCICC		1
 #define PCIXCC_MCL2	2
 #define PCIXCC_MCL3	3
 #define CEX2C		4
+#define CEX2A		5
 #define NILDEV		-1
 #define ANYDEV		-1
 #define PCIXCC_UNK	-2
@@ -105,7 +106,7 @@ enum hdevice_type {
 	PCICC_HW  = 3,
 	PCICA_HW  = 4,
 	PCIXCC_HW = 5,
-	OTHER_HW  = 6,
+	CEX2A_HW  = 6,
 	CEX2C_HW  = 7
 };
 
diff --git a/drivers/s390/crypto/z90crypt.h b/drivers/s390/crypto/z90crypt.h
index 0a3bb5a10dd..3a18443fdfa 100644
--- a/drivers/s390/crypto/z90crypt.h
+++ b/drivers/s390/crypto/z90crypt.h
@@ -1,9 +1,9 @@
 /*
  *  linux/drivers/s390/crypto/z90crypt.h
  *
- *  z90crypt 1.3.2
+ *  z90crypt 1.3.3
  *
- *  Copyright (C)  2001, 2004 IBM Corporation
+ *  Copyright (C)  2001, 2005 IBM Corporation
  *  Author(s): Robert Burroughs (burrough@us.ibm.com)
  *             Eric Rossman (edrossma@us.ibm.com)
  *
@@ -29,11 +29,11 @@
 
 #include <linux/ioctl.h>
 
-#define VERSION_Z90CRYPT_H "$Revision: 1.11 $"
+#define VERSION_Z90CRYPT_H "$Revision: 1.2.2.4 $"
 
 #define z90crypt_VERSION 1
 #define z90crypt_RELEASE 3	// 2 = PCIXCC, 3 = rewrite for coding standards
-#define z90crypt_VARIANT 2	// 2 = added PCIXCC MCL3 and CEX2C support
+#define z90crypt_VARIANT 3	// 3 = CEX2A support
 
 /**
  * struct ica_rsa_modexpo
@@ -122,6 +122,9 @@ struct ica_rsa_modexpo_crt {
  *   Z90STAT_CEX2CCOUNT
  *     Return an integer count of all CEX2Cs.
  *
+ *   Z90STAT_CEX2ACOUNT
+ *     Return an integer count of all CEX2As.
+ *
  *   Z90STAT_REQUESTQ_COUNT
  *     Return an integer count of the number of entries waiting to be
  *     sent to a device.
@@ -144,6 +147,7 @@ struct ica_rsa_modexpo_crt {
  *       0x03: PCIXCC_MCL2
  *       0x04: PCIXCC_MCL3
  *       0x05: CEX2C
+ *       0x06: CEX2A
  *       0x0d: device is disabled via the proc filesystem
  *
  *   Z90STAT_QDEPTH_MASK
@@ -199,6 +203,7 @@ struct ica_rsa_modexpo_crt {
 #define Z90STAT_PCIXCCMCL2COUNT	_IOR(Z90_IOCTL_MAGIC, 0x4b, int)
 #define Z90STAT_PCIXCCMCL3COUNT	_IOR(Z90_IOCTL_MAGIC, 0x4c, int)
 #define Z90STAT_CEX2CCOUNT	_IOR(Z90_IOCTL_MAGIC, 0x4d, int)
+#define Z90STAT_CEX2ACOUNT	_IOR(Z90_IOCTL_MAGIC, 0x4e, int)
 #define Z90STAT_REQUESTQ_COUNT	_IOR(Z90_IOCTL_MAGIC, 0x44, int)
 #define Z90STAT_PENDINGQ_COUNT	_IOR(Z90_IOCTL_MAGIC, 0x45, int)
 #define Z90STAT_TOTALOPEN_COUNT _IOR(Z90_IOCTL_MAGIC, 0x46, int)
diff --git a/drivers/s390/crypto/z90hardware.c b/drivers/s390/crypto/z90hardware.c
index c215e088973..d7f7494a0cb 100644
--- a/drivers/s390/crypto/z90hardware.c
+++ b/drivers/s390/crypto/z90hardware.c
@@ -1,9 +1,9 @@
 /*
  *  linux/drivers/s390/crypto/z90hardware.c
  *
- *  z90crypt 1.3.2
+ *  z90crypt 1.3.3
  *
- *  Copyright (C)  2001, 2004 IBM Corporation
+ *  Copyright (C)  2001, 2005 IBM Corporation
  *  Author(s): Robert Burroughs (burrough@us.ibm.com)
  *             Eric Rossman (edrossma@us.ibm.com)
  *
@@ -648,6 +648,87 @@ static struct cca_public_sec static_cca_pub_sec = {
 #define RESPONSE_CPRB_SIZE  0x000006B8
 #define RESPONSE_CPRBX_SIZE 0x00000724
 
+struct type50_hdr {
+	u8    reserved1;
+	u8    msg_type_code;
+	u16   msg_len;
+	u8    reserved2;
+	u8    ignored;
+	u16   reserved3;
+};
+
+#define TYPE50_TYPE_CODE 0x50
+
+#define TYPE50_MEB1_LEN (sizeof(struct type50_meb1_msg))
+#define TYPE50_MEB2_LEN (sizeof(struct type50_meb2_msg))
+#define TYPE50_CRB1_LEN (sizeof(struct type50_crb1_msg))
+#define TYPE50_CRB2_LEN (sizeof(struct type50_crb2_msg))
+
+#define TYPE50_MEB1_FMT 0x0001
+#define TYPE50_MEB2_FMT 0x0002
+#define TYPE50_CRB1_FMT 0x0011
+#define TYPE50_CRB2_FMT 0x0012
+
+struct type50_meb1_msg {
+	struct type50_hdr	header;
+	u16			keyblock_type;
+	u8			reserved[6];
+	u8			exponent[128];
+	u8			modulus[128];
+	u8			message[128];
+};
+
+struct type50_meb2_msg {
+	struct type50_hdr	header;
+	u16			keyblock_type;
+	u8			reserved[6];
+	u8			exponent[256];
+	u8			modulus[256];
+	u8			message[256];
+};
+
+struct type50_crb1_msg {
+	struct type50_hdr	header;
+	u16			keyblock_type;
+	u8			reserved[6];
+	u8			p[64];
+	u8			q[64];
+	u8			dp[64];
+	u8			dq[64];
+	u8			u[64];
+	u8			message[128];
+};
+
+struct type50_crb2_msg {
+	struct type50_hdr	header;
+	u16			keyblock_type;
+	u8			reserved[6];
+	u8			p[128];
+	u8			q[128];
+	u8			dp[128];
+	u8			dq[128];
+	u8			u[128];
+	u8			message[256];
+};
+
+union type50_msg {
+	struct type50_meb1_msg meb1;
+	struct type50_meb2_msg meb2;
+	struct type50_crb1_msg crb1;
+	struct type50_crb2_msg crb2;
+};
+
+struct type80_hdr {
+	u8	reserved1;
+	u8	type;
+	u16	len;
+	u8	code;
+	u8	reserved2[3];
+	u8	reserved3[8];
+};
+
+#define TYPE80_RSP_CODE 0x80
+
 struct error_hdr {
 	unsigned char reserved1;
 	unsigned char type;
@@ -657,6 +738,7 @@ struct error_hdr {
 };
 
 #define TYPE82_RSP_CODE 0x82
+#define TYPE88_RSP_CODE 0x88
 
 #define REP82_ERROR_MACHINE_FAILURE  0x10
 #define REP82_ERROR_PREEMPT_FAILURE  0x12
@@ -679,6 +761,22 @@ struct error_hdr {
 #define REP82_ERROR_PACKET_TRUNCATED 0xA0
 #define REP82_ERROR_ZERO_BUFFER_LEN  0xB0
 
+#define REP88_ERROR_MODULE_FAILURE   0x10
+#define REP88_ERROR_MODULE_TIMEOUT   0x11
+#define REP88_ERROR_MODULE_NOTINIT   0x13
+#define REP88_ERROR_MODULE_NOTAVAIL  0x14
+#define REP88_ERROR_MODULE_DISABLED  0x15
+#define REP88_ERROR_MODULE_IN_DIAGN  0x17
+#define REP88_ERROR_FASTPATH_DISABLD 0x19
+#define REP88_ERROR_MESSAGE_TYPE     0x20
+#define REP88_ERROR_MESSAGE_MALFORMD 0x22
+#define REP88_ERROR_MESSAGE_LENGTH   0x23
+#define REP88_ERROR_RESERVED_FIELD   0x24
+#define REP88_ERROR_KEY_TYPE         0x34
+#define REP88_ERROR_INVALID_KEY      0x82
+#define REP88_ERROR_OPERAND          0x84
+#define REP88_ERROR_OPERAND_EVEN_MOD 0x85
+
 #define CALLER_HEADER 12
 
 static inline int
@@ -687,7 +785,7 @@ testq(int q_nr, int *q_depth, int *dev_type, struct ap_status_word *stat)
 	int ccode;
 
 	asm volatile
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	("	llgfr	0,%4		\n"
 	 "	slgr	1,1		\n"
 	 "	lgr	2,1		\n"
@@ -757,7 +855,7 @@ resetq(int q_nr, struct ap_status_word *stat_p)
 	int ccode;
 
 	asm volatile
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	("	llgfr	0,%2		\n"
 	 "	lghi	1,1		\n"
 	 "	sll	1,24		\n"
@@ -823,7 +921,7 @@ sen(int msg_len, unsigned char *msg_ext, struct ap_status_word *stat)
 	int ccode;
 
 	asm volatile
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	("	lgr	6,%3		\n"
 	 "	llgfr	7,%2		\n"
 	 "	llgt	0,0(6)		\n"
@@ -902,7 +1000,7 @@ rec(int q_nr, int buff_l, unsigned char *rsp, unsigned char *id,
 	int ccode;
 
 	asm volatile
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	("	llgfr	0,%2		\n"
 	 "	lgr	3,%4		\n"
 	 "	lgr	6,%3		\n"
@@ -1029,10 +1127,6 @@ query_online(int deviceNr, int cdx, int resetNr, int *q_depth, int *dev_type)
 			stat = HD_ONLINE;
 			*q_depth = t_depth + 1;
 			switch (t_dev_type) {
-			case OTHER_HW:
-				stat = HD_NOT_THERE;
-				*dev_type = NILDEV;
-				break;
 			case PCICA_HW:
 				*dev_type = PCICA;
 				break;
@@ -1045,6 +1139,9 @@ query_online(int deviceNr, int cdx, int resetNr, int *q_depth, int *dev_type)
 			case CEX2C_HW:
 				*dev_type = CEX2C;
 				break;
+			case CEX2A_HW:
+				*dev_type = CEX2A;
+				break;
 			default:
 				*dev_type = NILDEV;
 				break;
@@ -2029,6 +2126,177 @@ ICACRT_msg_to_type6CRT_msgX(struct ica_rsa_modexpo_crt *icaMsg_p, int cdx,
 	return 0;
 }
 
+static int
+ICAMEX_msg_to_type50MEX_msg(struct ica_rsa_modexpo *icaMex_p, int *z90cMsg_l_p,
+			    union type50_msg *z90cMsg_p)
+{
+	int mod_len, msg_size, mod_tgt_len, exp_tgt_len, inp_tgt_len;
+	unsigned char *mod_tgt, *exp_tgt, *inp_tgt;
+	union type50_msg *tmp_type50_msg;
+
+	mod_len = icaMex_p->inputdatalength;
+
+	msg_size = ((mod_len <= 128) ? TYPE50_MEB1_LEN : TYPE50_MEB2_LEN) +
+		    CALLER_HEADER;
+
+	memset(z90cMsg_p, 0, msg_size);
+
+	tmp_type50_msg = (union type50_msg *)
+		((unsigned char *) z90cMsg_p + CALLER_HEADER);
+
+	tmp_type50_msg->meb1.header.msg_type_code = TYPE50_TYPE_CODE;
+
+	if (mod_len <= 128) {
+		tmp_type50_msg->meb1.header.msg_len = TYPE50_MEB1_LEN;
+		tmp_type50_msg->meb1.keyblock_type = TYPE50_MEB1_FMT;
+		mod_tgt = tmp_type50_msg->meb1.modulus;
+		mod_tgt_len = sizeof(tmp_type50_msg->meb1.modulus);
+		exp_tgt = tmp_type50_msg->meb1.exponent;
+		exp_tgt_len = sizeof(tmp_type50_msg->meb1.exponent);
+		inp_tgt = tmp_type50_msg->meb1.message;
+		inp_tgt_len = sizeof(tmp_type50_msg->meb1.message);
+	} else {
+		tmp_type50_msg->meb2.header.msg_len = TYPE50_MEB2_LEN;
+		tmp_type50_msg->meb2.keyblock_type = TYPE50_MEB2_FMT;
+		mod_tgt = tmp_type50_msg->meb2.modulus;
+		mod_tgt_len = sizeof(tmp_type50_msg->meb2.modulus);
+		exp_tgt = tmp_type50_msg->meb2.exponent;
+		exp_tgt_len = sizeof(tmp_type50_msg->meb2.exponent);
+		inp_tgt = tmp_type50_msg->meb2.message;
+		inp_tgt_len = sizeof(tmp_type50_msg->meb2.message);
+	}
+
+	mod_tgt += (mod_tgt_len - mod_len);
+	if (copy_from_user(mod_tgt, icaMex_p->n_modulus, mod_len))
+		return SEN_RELEASED;
+	if (is_empty(mod_tgt, mod_len))
+		return SEN_USER_ERROR;
+	exp_tgt += (exp_tgt_len - mod_len);
+	if (copy_from_user(exp_tgt, icaMex_p->b_key, mod_len))
+		return SEN_RELEASED;
+	if (is_empty(exp_tgt, mod_len))
+		return SEN_USER_ERROR;
+	inp_tgt += (inp_tgt_len - mod_len);
+	if (copy_from_user(inp_tgt, icaMex_p->inputdata, mod_len))
+		return SEN_RELEASED;
+	if (is_empty(inp_tgt, mod_len))
+		return SEN_USER_ERROR;
+
+	*z90cMsg_l_p = msg_size - CALLER_HEADER;
+
+	return 0;
+}
+
+static int
+ICACRT_msg_to_type50CRT_msg(struct ica_rsa_modexpo_crt *icaMsg_p,
+			    int *z90cMsg_l_p, union type50_msg *z90cMsg_p)
+{
+	int mod_len, short_len, long_len, tmp_size, p_tgt_len, q_tgt_len,
+	    dp_tgt_len, dq_tgt_len, u_tgt_len, inp_tgt_len, long_offset;
+	unsigned char *p_tgt, *q_tgt, *dp_tgt, *dq_tgt, *u_tgt, *inp_tgt,
+		      temp[8];
+	union type50_msg *tmp_type50_msg;
+
+	mod_len = icaMsg_p->inputdatalength;
+	short_len = mod_len / 2;
+	long_len = mod_len / 2 + 8;
+	long_offset = 0;
+
+	if (long_len > 128) {
+		memset(temp, 0x00, sizeof(temp));
+		if (copy_from_user(temp, icaMsg_p->np_prime, long_len-128))
+			return SEN_RELEASED;
+		if (!is_empty(temp, 8))
+			return SEN_NOT_AVAIL;
+		if (copy_from_user(temp, icaMsg_p->bp_key, long_len-128))
+			return SEN_RELEASED;
+		if (!is_empty(temp, 8))
+			return SEN_NOT_AVAIL;
+		if (copy_from_user(temp, icaMsg_p->u_mult_inv, long_len-128))
+			return SEN_RELEASED;
+		if (!is_empty(temp, 8))
+			return SEN_NOT_AVAIL;
+		long_offset = long_len - 128;
+		long_len = 128;
+	}
+
+	tmp_size = ((mod_len <= 128) ? TYPE50_CRB1_LEN : TYPE50_CRB2_LEN) +
+		    CALLER_HEADER;
+
+	memset(z90cMsg_p, 0, tmp_size);
+
+	tmp_type50_msg = (union type50_msg *)
+		((unsigned char *) z90cMsg_p + CALLER_HEADER);
+
+	tmp_type50_msg->crb1.header.msg_type_code = TYPE50_TYPE_CODE;
+	if (long_len <= 64) {
+		tmp_type50_msg->crb1.header.msg_len = TYPE50_CRB1_LEN;
+		tmp_type50_msg->crb1.keyblock_type = TYPE50_CRB1_FMT;
+		p_tgt = tmp_type50_msg->crb1.p;
+		p_tgt_len = sizeof(tmp_type50_msg->crb1.p);
+		q_tgt = tmp_type50_msg->crb1.q;
+		q_tgt_len = sizeof(tmp_type50_msg->crb1.q);
+		dp_tgt = tmp_type50_msg->crb1.dp;
+		dp_tgt_len = sizeof(tmp_type50_msg->crb1.dp);
+		dq_tgt = tmp_type50_msg->crb1.dq;
+		dq_tgt_len = sizeof(tmp_type50_msg->crb1.dq);
+		u_tgt = tmp_type50_msg->crb1.u;
+		u_tgt_len = sizeof(tmp_type50_msg->crb1.u);
+		inp_tgt = tmp_type50_msg->crb1.message;
+		inp_tgt_len = sizeof(tmp_type50_msg->crb1.message);
+	} else {
+		tmp_type50_msg->crb2.header.msg_len = TYPE50_CRB2_LEN;
+		tmp_type50_msg->crb2.keyblock_type = TYPE50_CRB2_FMT;
+		p_tgt = tmp_type50_msg->crb2.p;
+		p_tgt_len = sizeof(tmp_type50_msg->crb2.p);
+		q_tgt = tmp_type50_msg->crb2.q;
+		q_tgt_len = sizeof(tmp_type50_msg->crb2.q);
+		dp_tgt = tmp_type50_msg->crb2.dp;
+		dp_tgt_len = sizeof(tmp_type50_msg->crb2.dp);
+		dq_tgt = tmp_type50_msg->crb2.dq;
+		dq_tgt_len = sizeof(tmp_type50_msg->crb2.dq);
+		u_tgt = tmp_type50_msg->crb2.u;
+		u_tgt_len = sizeof(tmp_type50_msg->crb2.u);
+		inp_tgt = tmp_type50_msg->crb2.message;
+		inp_tgt_len = sizeof(tmp_type50_msg->crb2.message);
+	}
+
+	p_tgt += (p_tgt_len - long_len);
+	if (copy_from_user(p_tgt, icaMsg_p->np_prime + long_offset, long_len))
+		return SEN_RELEASED;
+	if (is_empty(p_tgt, long_len))
+		return SEN_USER_ERROR;
+	q_tgt += (q_tgt_len - short_len);
+	if (copy_from_user(q_tgt, icaMsg_p->nq_prime, short_len))
+		return SEN_RELEASED;
+	if (is_empty(q_tgt, short_len))
+		return SEN_USER_ERROR;
+	dp_tgt += (dp_tgt_len - long_len);
+	if (copy_from_user(dp_tgt, icaMsg_p->bp_key + long_offset, long_len))
+		return SEN_RELEASED;
+	if (is_empty(dp_tgt, long_len))
+		return SEN_USER_ERROR;
+	dq_tgt += (dq_tgt_len - short_len);
+	if (copy_from_user(dq_tgt, icaMsg_p->bq_key, short_len))
+		return SEN_RELEASED;
+	if (is_empty(dq_tgt, short_len))
+		return SEN_USER_ERROR;
+	u_tgt += (u_tgt_len - long_len);
+	if (copy_from_user(u_tgt, icaMsg_p->u_mult_inv + long_offset, long_len))
+		return SEN_RELEASED;
+	if (is_empty(u_tgt, long_len))
+		return SEN_USER_ERROR;
+	inp_tgt += (inp_tgt_len - mod_len);
+	if (copy_from_user(inp_tgt, icaMsg_p->inputdata, mod_len))
+		return SEN_RELEASED;
+	if (is_empty(inp_tgt, mod_len))
+		return SEN_USER_ERROR;
+
+	*z90cMsg_l_p = tmp_size - CALLER_HEADER;
+
+	return 0;
+}
+
 int
 convert_request(unsigned char *buffer, int func, unsigned short function,
 		int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p)
@@ -2071,6 +2339,16 @@ convert_request(unsigned char *buffer, int func, unsigned short function,
 				cdx, msg_l_p, (struct type6_msg *) msg_p,
 				dev_type);
 	}
+	if (dev_type == CEX2A) {
+		if (func == ICARSACRT)
+			return ICACRT_msg_to_type50CRT_msg(
+				(struct ica_rsa_modexpo_crt *) buffer,
+				msg_l_p, (union type50_msg *) msg_p);
+		else
+			return ICAMEX_msg_to_type50MEX_msg(
+				(struct ica_rsa_modexpo *) buffer,
+				msg_l_p, (union type50_msg *) msg_p);
+	}
 
 	return 0;
 }
@@ -2081,8 +2359,8 @@ unset_ext_bitlens(void)
 {
 	if (!ext_bitlens_msg_count) {
 		PRINTK("Unable to use coprocessors for extended bitlengths. "
-		       "Using PCICAs (if present) for extended bitlengths. "
-		       "This is not an error.\n");
+		       "Using PCICAs/CEX2As (if present) for extended "
+		       "bitlengths. This is not an error.\n");
 		ext_bitlens_msg_count++;
 	}
 	ext_bitlens = 0;
@@ -2094,6 +2372,7 @@ convert_response(unsigned char *response, unsigned char *buffer,
 {
 	struct ica_rsa_modexpo *icaMsg_p = (struct ica_rsa_modexpo *) buffer;
 	struct error_hdr *errh_p = (struct error_hdr *) response;
+	struct type80_hdr *t80h_p = (struct type80_hdr *) response;
 	struct type84_hdr *t84h_p = (struct type84_hdr *) response;
 	struct type86_fmt2_msg *t86m_p =  (struct type86_fmt2_msg *) response;
 	int reply_code, service_rc, service_rs, src_l;
@@ -2108,6 +2387,7 @@ convert_response(unsigned char *response, unsigned char *buffer,
 	src_l = 0;
 	switch (errh_p->type) {
 	case TYPE82_RSP_CODE:
+	case TYPE88_RSP_CODE:
 		reply_code = errh_p->reply_code;
 		src_p = (unsigned char *)errh_p;
 		PRINTK("Hardware error: Type %02X Message Header: "
@@ -2116,6 +2396,10 @@ convert_response(unsigned char *response, unsigned char *buffer,
 		       src_p[0], src_p[1], src_p[2], src_p[3],
 		       src_p[4], src_p[5], src_p[6], src_p[7]);
 		break;
+	case TYPE80_RSP_CODE:
+		src_l = icaMsg_p->outputdatalength;
+		src_p = response + (int)t80h_p->len - src_l;
+		break;
 	case TYPE84_RSP_CODE:
 		src_l = icaMsg_p->outputdatalength;
 		src_p = response + (int)t84h_p->len - src_l;
@@ -2202,6 +2486,7 @@ convert_response(unsigned char *response, unsigned char *buffer,
 	if (reply_code)
 		switch (reply_code) {
 		case REP82_ERROR_OPERAND_INVALID:
+		case REP88_ERROR_MESSAGE_MALFORMD:
 			return REC_OPERAND_INV;
 		case REP82_ERROR_OPERAND_SIZE:
 			return REC_OPERAND_SIZE;
diff --git a/drivers/s390/crypto/z90main.c b/drivers/s390/crypto/z90main.c
index 790fcbb74b4..135ae04e6e7 100644
--- a/drivers/s390/crypto/z90main.c
+++ b/drivers/s390/crypto/z90main.c
@@ -228,7 +228,7 @@ struct device_x {
  */
 struct device {
 	int		 dev_type;	    // PCICA, PCICC, PCIXCC_MCL2,
-					    // PCIXCC_MCL3, CEX2C
+					    // PCIXCC_MCL3, CEX2C, CEX2A
 	enum devstat	 dev_stat;	    // current device status
 	int		 dev_self_x;	    // Index in array
 	int		 disabled;	    // Set when device is in error
@@ -295,26 +295,30 @@ struct caller {
 /**
  * Function prototypes from z90hardware.c
  */
-enum hdstat query_online(int, int, int, int *, int *);
-enum devstat reset_device(int, int, int);
-enum devstat send_to_AP(int, int, int, unsigned char *);
-enum devstat receive_from_AP(int, int, int, unsigned char *, unsigned char *);
-int convert_request(unsigned char *, int, short, int, int, int *,
-		    unsigned char *);
-int convert_response(unsigned char *, unsigned char *, int *, unsigned char *);
+enum hdstat query_online(int deviceNr, int cdx, int resetNr, int *q_depth,
+			 int *dev_type);
+enum devstat reset_device(int deviceNr, int cdx, int resetNr);
+enum devstat send_to_AP(int dev_nr, int cdx, int msg_len, unsigned char *msg_ext);
+enum devstat receive_from_AP(int dev_nr, int cdx, int resplen,
+			     unsigned char *resp, unsigned char *psmid);
+int convert_request(unsigned char *buffer, int func, unsigned short function,
+		    int cdx, int dev_type, int *msg_l_p, unsigned char *msg_p);
+int convert_response(unsigned char *response, unsigned char *buffer,
+		     int *respbufflen_p, unsigned char *resp_buff);
 
 /**
  * Low level function prototypes
  */
-static int create_z90crypt(int *);
-static int refresh_z90crypt(int *);
-static int find_crypto_devices(struct status *);
-static int create_crypto_device(int);
-static int destroy_crypto_device(int);
+static int create_z90crypt(int *cdx_p);
+static int refresh_z90crypt(int *cdx_p);
+static int find_crypto_devices(struct status *deviceMask);
+static int create_crypto_device(int index);
+static int destroy_crypto_device(int index);
 static void destroy_z90crypt(void);
-static int refresh_index_array(struct status *, struct device_x *);
-static int probe_device_type(struct device *);
-static int probe_PCIXCC_type(struct device *);
+static int refresh_index_array(struct status *status_str,
+			       struct device_x *index_array);
+static int probe_device_type(struct device *devPtr);
+static int probe_PCIXCC_type(struct device *devPtr);
 
 /**
  * proc fs definitions
@@ -425,7 +429,7 @@ static struct miscdevice z90crypt_misc_device = {
 MODULE_AUTHOR("zSeries Linux Crypto Team: Robert H. Burroughs, Eric D. Rossman"
 	      "and Jochen Roehrig");
 MODULE_DESCRIPTION("zSeries Linux Cryptographic Coprocessor device driver, "
-		   "Copyright 2001, 2004 IBM Corporation");
+		   "Copyright 2001, 2005 IBM Corporation");
 MODULE_LICENSE("GPL");
 module_param(domain, int, 0);
 MODULE_PARM_DESC(domain, "domain index for device");
@@ -860,6 +864,12 @@ get_status_CEX2Ccount(void)
 }
 
 static inline int
+get_status_CEX2Acount(void)
+{
+	return z90crypt.hdware_info->type_mask[CEX2A].st_count;
+}
+
+static inline int
 get_status_requestq_count(void)
 {
 	return requestq_count;
@@ -1008,11 +1018,13 @@ static inline int
 select_device_type(int *dev_type_p, int bytelength)
 {
 	static int count = 0;
-	int PCICA_avail, PCIXCC_MCL3_avail, CEX2C_avail, index_to_use;
+	int PCICA_avail, PCIXCC_MCL3_avail, CEX2C_avail, CEX2A_avail,
+	    index_to_use;
 	struct status *stat;
 	if ((*dev_type_p != PCICC) && (*dev_type_p != PCICA) &&
 	    (*dev_type_p != PCIXCC_MCL2) && (*dev_type_p != PCIXCC_MCL3) &&
-	    (*dev_type_p != CEX2C) && (*dev_type_p != ANYDEV))
+	    (*dev_type_p != CEX2C) && (*dev_type_p != CEX2A) &&
+	    (*dev_type_p != ANYDEV))
 		return -1;
 	if (*dev_type_p != ANYDEV) {
 		stat = &z90crypt.hdware_info->type_mask[*dev_type_p];
@@ -1022,7 +1034,13 @@ select_device_type(int *dev_type_p, int bytelength)
 		return -1;
 	}
 
-	/* Assumption: PCICA, PCIXCC_MCL3, and CEX2C are all similar in speed */
+	/**
+	 * Assumption: PCICA, PCIXCC_MCL3, CEX2C, and CEX2A are all similar in
+	 * speed.
+	 *
+	 * PCICA and CEX2A do NOT co-exist, so it would be either one or the
+	 * other present.
+	 */
 	stat = &z90crypt.hdware_info->type_mask[PCICA];
 	PCICA_avail = stat->st_count -
 			(stat->disabled_count + stat->user_disabled_count);
@@ -1032,29 +1050,38 @@ select_device_type(int *dev_type_p, int bytelength)
 	stat = &z90crypt.hdware_info->type_mask[CEX2C];
 	CEX2C_avail = stat->st_count -
 			(stat->disabled_count + stat->user_disabled_count);
-	if (PCICA_avail || PCIXCC_MCL3_avail || CEX2C_avail) {
+	stat = &z90crypt.hdware_info->type_mask[CEX2A];
+	CEX2A_avail = stat->st_count -
+			(stat->disabled_count + stat->user_disabled_count);
+	if (PCICA_avail || PCIXCC_MCL3_avail || CEX2C_avail || CEX2A_avail) {
 		/**
-		 * bitlength is a factor, PCICA is the most capable, even with
-		 * the new MCL for PCIXCC.
+		 * bitlength is a factor, PCICA or CEX2A are the most capable,
+		 * even with the new MCL for PCIXCC.
 		 */
 		if ((bytelength < PCIXCC_MIN_MOD_SIZE) ||
 		    (!ext_bitlens && (bytelength < OLD_PCIXCC_MIN_MOD_SIZE))) {
-			if (!PCICA_avail)
-				return -1;
-			else {
+			if (PCICA_avail) {
 				*dev_type_p = PCICA;
 				return 0;
 			}
+			if (CEX2A_avail) {
+				*dev_type_p = CEX2A;
+				return 0;
+			}
+			return -1;
 		}
 
 		index_to_use = count % (PCICA_avail + PCIXCC_MCL3_avail +
-					CEX2C_avail);
+					CEX2C_avail + CEX2A_avail);
 		if (index_to_use < PCICA_avail)
 			*dev_type_p = PCICA;
 		else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail))
 			*dev_type_p = PCIXCC_MCL3;
-		else
+		else if (index_to_use < (PCICA_avail + PCIXCC_MCL3_avail +
+					 CEX2C_avail))
 			*dev_type_p = CEX2C;
+		else
+			*dev_type_p = CEX2A;
 		count++;
 		return 0;
 	}
@@ -1359,7 +1386,7 @@ build_caller(struct work_element *we_p, short function)
 
 	if ((we_p->devtype != PCICC) && (we_p->devtype != PCICA) &&
 	    (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) &&
-	    (we_p->devtype != CEX2C))
+	    (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A))
 		return SEN_NOT_AVAIL;
 
 	memcpy(caller_p->caller_id, we_p->caller_id,
@@ -1428,7 +1455,8 @@ get_crypto_request_buffer(struct work_element *we_p)
 
 	if ((we_p->devtype != PCICA) && (we_p->devtype != PCICC) &&
 	    (we_p->devtype != PCIXCC_MCL2) && (we_p->devtype != PCIXCC_MCL3) &&
-	    (we_p->devtype != CEX2C) && (we_p->devtype != ANYDEV)) {
+	    (we_p->devtype != CEX2C) && (we_p->devtype != CEX2A) &&
+	    (we_p->devtype != ANYDEV)) {
 		PRINTK("invalid device type\n");
 		return SEN_USER_ERROR;
 	}
@@ -1503,8 +1531,9 @@ get_crypto_request_buffer(struct work_element *we_p)
 
 	function = PCI_FUNC_KEY_ENCRYPT;
 	switch (we_p->devtype) {
-	/* PCICA does everything with a simple RSA mod-expo operation */
+	/* PCICA and CEX2A do everything with a simple RSA mod-expo operation */
 	case PCICA:
+	case CEX2A:
 		function = PCI_FUNC_KEY_ENCRYPT;
 		break;
 	/**
@@ -1662,7 +1691,8 @@ z90crypt_rsa(struct priv_data *private_data_p, pid_t pid,
 		 * trigger a fallback to software.
 		 */
 		case -EINVAL:
-			if (we_p->devtype != PCICA)
+			if ((we_p->devtype != PCICA) &&
+			    (we_p->devtype != CEX2A))
 				rv = -EGETBUFF;
 			break;
 		case -ETIMEOUT:
@@ -1779,6 +1809,12 @@ z90crypt_unlocked_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			ret = -EFAULT;
 		break;
 
+	case Z90STAT_CEX2ACOUNT:
+		tempstat = get_status_CEX2Acount();
+		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
+			ret = -EFAULT;
+		break;
+
 	case Z90STAT_REQUESTQ_COUNT:
 		tempstat = get_status_requestq_count();
 		if (copy_to_user((int __user *)arg, &tempstat, sizeof(int)) != 0)
@@ -2019,6 +2055,8 @@ z90crypt_status(char *resp_buff, char **start, off_t offset,
 		get_status_PCIXCCMCL3count());
 	len += sprintf(resp_buff+len, "CEX2C count: %d\n",
 		get_status_CEX2Ccount());
+	len += sprintf(resp_buff+len, "CEX2A count: %d\n",
+		get_status_CEX2Acount());
 	len += sprintf(resp_buff+len, "requestq count: %d\n",
 		get_status_requestq_count());
 	len += sprintf(resp_buff+len, "pendingq count: %d\n",
@@ -2026,8 +2064,8 @@ z90crypt_status(char *resp_buff, char **start, off_t offset,
 	len += sprintf(resp_buff+len, "Total open handles: %d\n\n",
 		get_status_totalopen_count());
 	len += sprinthx(
-		"Online devices: 1: PCICA, 2: PCICC, 3: PCIXCC (MCL2), "
-		"4: PCIXCC (MCL3), 5: CEX2C",
+		"Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) "
+		"4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A",
 		resp_buff+len,
 		get_status_status_mask(workarea),
 		Z90CRYPT_NUM_APS);
@@ -2140,6 +2178,7 @@ z90crypt_status_write(struct file *file, const char __user *buffer,
 		case '3':	// PCIXCC_MCL2
 		case '4':	// PCIXCC_MCL3
 		case '5':	// CEX2C
+		case '6':       // CEX2A
 			j++;
 			break;
 		case 'd':
@@ -3007,7 +3046,9 @@ create_crypto_device(int index)
 			z90crypt.hdware_info->device_type_array[index] = 4;
 		else if (deviceType == CEX2C)
 			z90crypt.hdware_info->device_type_array[index] = 5;
-		else
+		else if (deviceType == CEX2A)
+			z90crypt.hdware_info->device_type_array[index] = 6;
+		else // No idea how this would happen.
 			z90crypt.hdware_info->device_type_array[index] = -1;
 	}
 
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index a7efc394515..54885475492 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -1,5 +1,5 @@
 menu "S/390 network device drivers"
-	depends on NETDEVICES && ARCH_S390
+	depends on NETDEVICES && S390
 
 config LCS
 	tristate "Lan Channel Station Interface"
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 6b63d21612e..e70af7f3994 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -1603,7 +1603,7 @@ dumpit(char* buf, int len)
         __u32      ct, sw, rm, dup;
         char       *ptr, *rptr;
         char       tbuf[82], tdup[82];
-#if (CONFIG_ARCH_S390X)
+#if (CONFIG_64BIT)
         char       addr[22];
 #else
         char       addr[12];
@@ -1619,7 +1619,7 @@ dumpit(char* buf, int len)
         dup = 0;
         for ( ct=0; ct < len; ct++, ptr++, rptr++ )  {
                 if (sw == 0) {
-#if (CONFIG_ARCH_S390X)
+#if (CONFIG_64BIT)
                         sprintf(addr, "%16.16lX",(unsigned long)rptr);
 #else
                         sprintf(addr, "%8.8X",(__u32)rptr);
@@ -1634,7 +1634,7 @@ dumpit(char* buf, int len)
                 if (sw == 8) {
                         strcat(bhex, "  ");
                 }
-#if (CONFIG_ARCH_S390X)
+#if (CONFIG_64BIT)
                 sprintf(tbuf,"%2.2lX", (unsigned long)*ptr);
 #else
                 sprintf(tbuf,"%2.2X", (__u32)*ptr);
diff --git a/drivers/s390/net/cu3088.c b/drivers/s390/net/cu3088.c
index 0075894c71d..77dacb46573 100644
--- a/drivers/s390/net/cu3088.c
+++ b/drivers/s390/net/cu3088.c
@@ -1,5 +1,5 @@
 /*
- * $Id: cu3088.c,v 1.35 2005/03/30 19:28:52 richtera Exp $
+ * $Id: cu3088.c,v 1.36 2005/10/25 14:37:17 cohuck Exp $
  *
  * CTC / LCS ccw_device driver
  *
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/err.h>
 
+#include <asm/s390_rdev.h>
 #include <asm/ccwdev.h>
 #include <asm/ccwgroup.h>
 
diff --git a/drivers/s390/net/iucv.c b/drivers/s390/net/iucv.c
index df7647c3c10..ea817739256 100644
--- a/drivers/s390/net/iucv.c
+++ b/drivers/s390/net/iucv.c
@@ -1,5 +1,5 @@
 /* 
- * $Id: iucv.c,v 1.45 2005/04/26 22:59:06 braunu Exp $
+ * $Id: iucv.c,v 1.47 2005/11/21 11:35:22 mschwide Exp $
  *
  * IUCV network driver
  *
@@ -29,7 +29,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
- * RELEASE-TAG: IUCV lowlevel driver $Revision: 1.45 $
+ * RELEASE-TAG: IUCV lowlevel driver $Revision: 1.47 $
  *
  */
 
@@ -54,7 +54,7 @@
 #include <asm/s390_ext.h>
 #include <asm/ebcdic.h>
 #include <asm/smp.h>
-#include <asm/ccwdev.h> //for root device stuff
+#include <asm/s390_rdev.h>
 
 /* FLAGS:
  * All flags are defined in the field IPFLAGS1 of each function
@@ -355,7 +355,7 @@ do { \
 static void
 iucv_banner(void)
 {
-	char vbuf[] = "$Revision: 1.45 $";
+	char vbuf[] = "$Revision: 1.47 $";
 	char *version = vbuf;
 
 	if ((version = strchr(version, ':'))) {
@@ -477,7 +477,7 @@ grab_param(void)
 		ptr++;
 		if (ptr >= iucv_param_pool + PARAM_POOL_SIZE)
 			ptr = iucv_param_pool;
-	} while (atomic_compare_and_swap(0, 1, &ptr->in_use));
+	} while (atomic_cmpxchg(&ptr->in_use, 0, 1) != 0);
 	hint = ptr - iucv_param_pool;
 
 	memset(&ptr->param, 0, sizeof(ptr->param));
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index f8f55cc468b..97f927c01a8 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -65,6 +65,7 @@
 #include <asm/timex.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
+#include <asm/s390_rdev.h>
 
 #include "qeth.h"
 #include "qeth_mpc.h"
@@ -1396,7 +1397,7 @@ qeth_idx_activate_get_answer(struct qeth_channel *channel,
 	channel->ccw.cda = (__u32) __pa(iob->data);
 
 	wait_event(card->wait_q,
-		   atomic_compare_and_swap(0,1,&channel->irq_pending) == 0);
+		   atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0);
 	QETH_DBF_TEXT(setup, 6, "noirqpnd");
 	spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
 	rc = ccw_device_start(channel->ccwdev,
@@ -1463,7 +1464,7 @@ qeth_idx_activate_channel(struct qeth_channel *channel,
 	memcpy(QETH_IDX_ACT_QDIO_DEV_REALADDR(iob->data), &temp, 2);
 
 	wait_event(card->wait_q,
-		   atomic_compare_and_swap(0,1,&channel->irq_pending) == 0);
+		   atomic_cmpxchg(&channel->irq_pending, 0, 1) == 0);
 	QETH_DBF_TEXT(setup, 6, "noirqpnd");
 	spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags);
 	rc = ccw_device_start(channel->ccwdev,
@@ -1616,7 +1617,7 @@ qeth_issue_next_read(struct qeth_card *card)
 	}
 	qeth_setup_ccw(&card->read, iob->data, QETH_BUFSIZE);
 	wait_event(card->wait_q,
-		   atomic_compare_and_swap(0,1,&card->read.irq_pending) == 0);
+		   atomic_cmpxchg(&card->read.irq_pending, 0, 1) == 0);
 	QETH_DBF_TEXT(trace, 6, "noirqpnd");
 	rc = ccw_device_start(card->read.ccwdev, &card->read.ccw,
 			      (addr_t) iob, 0, 0);
@@ -1882,7 +1883,7 @@ qeth_send_control_data(struct qeth_card *card, int len,
 	spin_unlock_irqrestore(&card->lock, flags);
 	QETH_DBF_HEX(control, 2, iob->data, QETH_DBF_CONTROL_LEN);
 	wait_event(card->wait_q,
-		   atomic_compare_and_swap(0,1,&card->write.irq_pending) == 0);
+		   atomic_cmpxchg(&card->write.irq_pending, 0, 1) == 0);
 	qeth_prepare_control_data(card, len, iob);
 	if (IS_IPA(iob->data))
 		timer.expires = jiffies + QETH_IPA_TIMEOUT;
@@ -1924,7 +1925,7 @@ qeth_osn_send_control_data(struct qeth_card *card, int len,
 	QETH_DBF_TEXT(trace, 5, "osndctrd");
 
 	wait_event(card->wait_q,
-		   atomic_compare_and_swap(0,1,&card->write.irq_pending) == 0);
+		   atomic_cmpxchg(&card->write.irq_pending, 0, 1) == 0);
 	qeth_prepare_control_data(card, len, iob);
 	QETH_DBF_TEXT(trace, 6, "osnoirqp");
 	spin_lock_irqsave(get_ccwdev_lock(card->write.ccwdev), flags);
@@ -4236,9 +4237,8 @@ qeth_do_send_packet_fast(struct qeth_card *card, struct qeth_qdio_out_q *queue,
 	QETH_DBF_TEXT(trace, 6, "dosndpfa");
 
 	/* spin until we get the queue ... */
-	while (atomic_compare_and_swap(QETH_OUT_Q_UNLOCKED,
-				       QETH_OUT_Q_LOCKED,
-				       &queue->state));
+	while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
+			      QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
 	/* ... now we've got the queue */
 	index = queue->next_buf_to_fill;
 	buffer = &queue->bufs[queue->next_buf_to_fill];
@@ -4292,9 +4292,8 @@ qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
 	QETH_DBF_TEXT(trace, 6, "dosndpkt");
 
 	/* spin until we get the queue ... */
-	while (atomic_compare_and_swap(QETH_OUT_Q_UNLOCKED,
-				       QETH_OUT_Q_LOCKED,
-				       &queue->state));
+	while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
+			      QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
 	start_index = queue->next_buf_to_fill;
 	buffer = &queue->bufs[queue->next_buf_to_fill];
 	/*
diff --git a/drivers/s390/s390_rdev.c b/drivers/s390/s390_rdev.c
new file mode 100644
index 00000000000..566cc3d185b
--- /dev/null
+++ b/drivers/s390/s390_rdev.c
@@ -0,0 +1,53 @@
+/*
+ *  drivers/s390/s390_rdev.c
+ *  s390 root device
+ *   $Revision: 1.2 $
+ *
+ *    Copyright (C) 2002, 2005 IBM Deutschland Entwicklung GmbH,
+ *			 IBM Corporation
+ *    Author(s): Cornelia Huck (cohuck@de.ibm.com)
+ *		  Carsten Otte  (cotte@de.ibm.com)
+ */
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <asm/s390_rdev.h>
+
+static void
+s390_root_dev_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+struct device *
+s390_root_dev_register(const char *name)
+{
+	struct device *dev;
+	int ret;
+
+	if (!strlen(name))
+		return ERR_PTR(-EINVAL);
+	dev = kmalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+	memset(dev, 0, sizeof(struct device));
+	strncpy(dev->bus_id, name, min(strlen(name), (size_t)BUS_ID_SIZE));
+	dev->release = s390_root_dev_release;
+	ret = device_register(dev);
+	if (ret) {
+		kfree(dev);
+		return ERR_PTR(ret);
+	}
+	return dev;
+}
+
+void
+s390_root_dev_unregister(struct device *dev)
+{
+	if (dev)
+		device_unregister(dev);
+}
+
+EXPORT_SYMBOL(s390_root_dev_register);
+EXPORT_SYMBOL(s390_root_dev_unregister);
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 4191fd9d4d1..3bf46660351 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -23,7 +23,7 @@
 
 static struct semaphore m_sem;
 
-extern int css_process_crw(int);
+extern int css_process_crw(int, int);
 extern int chsc_process_crw(void);
 extern int chp_process_crw(int, int);
 extern void css_reiterate_subchannels(void);
@@ -49,9 +49,10 @@ s390_handle_damage(char *msg)
 static int
 s390_collect_crw_info(void *param)
 {
-	struct crw crw;
+	struct crw crw[2];
 	int ccode, ret, slow;
 	struct semaphore *sem;
+	unsigned int chain;
 
 	sem = (struct semaphore *)param;
 	/* Set a nice name. */
@@ -59,25 +60,50 @@ s390_collect_crw_info(void *param)
 repeat:
 	down_interruptible(sem);
 	slow = 0;
+	chain = 0;
 	while (1) {
-		ccode = stcrw(&crw);
+		if (unlikely(chain > 1)) {
+			struct crw tmp_crw;
+
+			printk(KERN_WARNING"%s: Code does not support more "
+			       "than two chained crws; please report to "
+			       "linux390@de.ibm.com!\n", __FUNCTION__);
+			ccode = stcrw(&tmp_crw);
+			printk(KERN_WARNING"%s: crw reports slct=%d, oflw=%d, "
+			       "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
+			       __FUNCTION__, tmp_crw.slct, tmp_crw.oflw,
+			       tmp_crw.chn, tmp_crw.rsc, tmp_crw.anc,
+			       tmp_crw.erc, tmp_crw.rsid);
+			printk(KERN_WARNING"%s: This was crw number %x in the "
+			       "chain\n", __FUNCTION__, chain);
+			if (ccode != 0)
+				break;
+			chain = tmp_crw.chn ? chain + 1 : 0;
+			continue;
+		}
+		ccode = stcrw(&crw[chain]);
 		if (ccode != 0)
 			break;
 		DBG(KERN_DEBUG "crw_info : CRW reports slct=%d, oflw=%d, "
 		    "chn=%d, rsc=%X, anc=%d, erc=%X, rsid=%X\n",
-		    crw.slct, crw.oflw, crw.chn, crw.rsc, crw.anc,
-		    crw.erc, crw.rsid);
+		    crw[chain].slct, crw[chain].oflw, crw[chain].chn,
+		    crw[chain].rsc, crw[chain].anc, crw[chain].erc,
+		    crw[chain].rsid);
 		/* Check for overflows. */
-		if (crw.oflw) {
+		if (crw[chain].oflw) {
 			pr_debug("%s: crw overflow detected!\n", __FUNCTION__);
 			css_reiterate_subchannels();
+			chain = 0;
 			slow = 1;
 			continue;
 		}
-		switch (crw.rsc) {
+		switch (crw[chain].rsc) {
 		case CRW_RSC_SCH:
-			pr_debug("source is subchannel %04X\n", crw.rsid);
-			ret = css_process_crw (crw.rsid);
+			if (crw[0].chn && !chain)
+				break;
+			pr_debug("source is subchannel %04X\n", crw[0].rsid);
+			ret = css_process_crw (crw[0].rsid,
+					       chain ? crw[1].rsid : 0);
 			if (ret == -EAGAIN)
 				slow = 1;
 			break;
@@ -85,18 +111,18 @@ repeat:
 			pr_debug("source is monitoring facility\n");
 			break;
 		case CRW_RSC_CPATH:
-			pr_debug("source is channel path %02X\n", crw.rsid);
-			switch (crw.erc) {
+			pr_debug("source is channel path %02X\n", crw[0].rsid);
+			switch (crw[0].erc) {
 			case CRW_ERC_IPARM: /* Path has come. */
-				ret = chp_process_crw(crw.rsid, 1);
+				ret = chp_process_crw(crw[0].rsid, 1);
 				break;
 			case CRW_ERC_PERRI: /* Path has gone. */
 			case CRW_ERC_PERRN:
-				ret = chp_process_crw(crw.rsid, 0);
+				ret = chp_process_crw(crw[0].rsid, 0);
 				break;
 			default:
 				pr_debug("Don't know how to handle erc=%x\n",
-					 crw.erc);
+					 crw[0].erc);
 				ret = 0;
 			}
 			if (ret == -EAGAIN)
@@ -115,6 +141,8 @@ repeat:
 			pr_debug("unknown source\n");
 			break;
 		}
+		/* chain is always 0 or 1 here. */
+		chain = crw[chain].chn ? chain + 1 : 0;
 	}
 	if (slow)
 		queue_work(slow_path_wq, &slow_path_work);
@@ -218,7 +246,7 @@ s390_revalidate_registers(struct mci *mci)
 		 */
 		kill_task = 1;
 
-#ifndef __s390x__
+#ifndef CONFIG_64BIT
 	asm volatile("ld 0,0(%0)\n"
 		     "ld 2,8(%0)\n"
 		     "ld 4,16(%0)\n"
@@ -227,7 +255,7 @@ s390_revalidate_registers(struct mci *mci)
 #endif
 
 	if (MACHINE_HAS_IEEE) {
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 		fpt_save_area = &S390_lowcore.floating_pt_save_area;
 		fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
 #else
@@ -286,7 +314,7 @@ s390_revalidate_registers(struct mci *mci)
 		 */
 		s390_handle_damage("invalid control registers.");
 	else
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 		asm volatile("lctlg 0,15,0(%0)"
 			     : : "a" (&S390_lowcore.cregs_save_area));
 #else
@@ -299,7 +327,7 @@ s390_revalidate_registers(struct mci *mci)
 	 * can't write something sensible into that register.
 	 */
 
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	/*
 	 * See if we can revalidate the TOD programmable register with its
 	 * old contents (should be zero) otherwise set it to zero.
@@ -356,7 +384,7 @@ s390_do_machine_check(struct pt_regs *regs)
 		if (mci->b) {
 			/* Processing backup -> verify if we can survive this */
 			u64 z_mcic, o_mcic, t_mcic;
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
 			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
 				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c
index 87c2db1bd4f..66da840c931 100644
--- a/drivers/s390/sysinfo.c
+++ b/drivers/s390/sysinfo.c
@@ -106,7 +106,7 @@ static inline int stsi (void *sysinfo,
 {
 	int cc, retv;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	__asm__ __volatile__ (	"lr\t0,%2\n"
 				"\tlr\t1,%3\n"
 				"\tstsi\t0(%4)\n"
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4c42065dea8..3c606cf8c8c 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -914,7 +914,7 @@ config SCSI_INIA100
 
 config SCSI_PPA
 	tristate "IOMEGA parallel port (ppa - older drives)"
-	depends on SCSI && PARPORT
+	depends on SCSI && PARPORT_PC
 	---help---
 	  This driver supports older versions of IOMEGA's parallel port ZIP
 	  drive (a 100 MB removable media device).
@@ -941,7 +941,7 @@ config SCSI_PPA
 
 config SCSI_IMM
 	tristate "IOMEGA parallel port (imm - newer drives)"
-	depends on SCSI && PARPORT
+	depends on SCSI && PARPORT_PC
 	---help---
 	  This driver supports newer versions of IOMEGA's parallel port ZIP
 	  drive (a 100 MB removable media device).
@@ -968,7 +968,7 @@ config SCSI_IMM
 
 config SCSI_IZIP_EPP16
 	bool "ppa/imm option - Use slow (but safe) EPP-16"
-	depends on PARPORT && (SCSI_PPA || SCSI_IMM)
+	depends on SCSI_PPA || SCSI_IMM
 	---help---
 	  EPP (Enhanced Parallel Port) is a standard for parallel ports which
 	  allows them to act as expansion buses that can handle up to 64
@@ -983,7 +983,7 @@ config SCSI_IZIP_EPP16
 
 config SCSI_IZIP_SLOW_CTR
 	bool "ppa/imm option - Assume slow parport control register"
-	depends on PARPORT && (SCSI_PPA || SCSI_IMM)
+	depends on SCSI_PPA || SCSI_IMM
 	help
 	  Some parallel ports are known to have excessive delays between
 	  changing the parallel port control register and good data being
@@ -1815,7 +1815,7 @@ config SCSI_SUNESP
 
 config ZFCP
 	tristate "FCP host bus adapter driver for IBM eServer zSeries"
-	depends on ARCH_S390 && QDIO && SCSI
+	depends on S390 && QDIO && SCSI
 	select SCSI_FC_ATTRS
 	help
           If you want to access SCSI devices attached to your IBM eServer
diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c
index 4b647eefc9a..557788ec4ee 100644
--- a/drivers/scsi/ata_piix.c
+++ b/drivers/scsi/ata_piix.c
@@ -166,6 +166,8 @@ static struct pci_driver piix_pci_driver = {
 	.id_table		= piix_pci_tbl,
 	.probe			= piix_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static struct scsi_host_template piix_sht = {
@@ -185,6 +187,8 @@ static struct scsi_host_template piix_sht = {
 	.dma_boundary		= ATA_DMA_BOUNDARY,
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static const struct ata_port_operations piix_pata_ops = {
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index bdfb0a88cd6..f55b9b3f7b3 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -4173,6 +4173,96 @@ err_out:
  *	Inherited from caller.
  */
 
+/*
+ * Execute a 'simple' command, that only consists of the opcode 'cmd' itself,
+ * without filling any other registers
+ */
+static int ata_do_simple_cmd(struct ata_port *ap, struct ata_device *dev,
+			     u8 cmd)
+{
+	struct ata_taskfile tf;
+	int err;
+
+	ata_tf_init(ap, &tf, dev->devno);
+
+	tf.command = cmd;
+	tf.flags |= ATA_TFLAG_DEVICE;
+	tf.protocol = ATA_PROT_NODATA;
+
+	err = ata_exec_internal(ap, dev, &tf, DMA_NONE, NULL, 0);
+	if (err)
+		printk(KERN_ERR "%s: ata command failed: %d\n",
+				__FUNCTION__, err);
+
+	return err;
+}
+
+static int ata_flush_cache(struct ata_port *ap, struct ata_device *dev)
+{
+	u8 cmd;
+
+	if (!ata_try_flush_cache(dev))
+		return 0;
+
+	if (ata_id_has_flush_ext(dev->id))
+		cmd = ATA_CMD_FLUSH_EXT;
+	else
+		cmd = ATA_CMD_FLUSH;
+
+	return ata_do_simple_cmd(ap, dev, cmd);
+}
+
+static int ata_standby_drive(struct ata_port *ap, struct ata_device *dev)
+{
+	return ata_do_simple_cmd(ap, dev, ATA_CMD_STANDBYNOW1);
+}
+
+static int ata_start_drive(struct ata_port *ap, struct ata_device *dev)
+{
+	return ata_do_simple_cmd(ap, dev, ATA_CMD_IDLEIMMEDIATE);
+}
+
+/**
+ *	ata_device_resume - wakeup a previously suspended devices
+ *
+ *	Kick the drive back into action, by sending it an idle immediate
+ *	command and making sure its transfer mode matches between drive
+ *	and host.
+ *
+ */
+int ata_device_resume(struct ata_port *ap, struct ata_device *dev)
+{
+	if (ap->flags & ATA_FLAG_SUSPENDED) {
+		ap->flags &= ~ATA_FLAG_SUSPENDED;
+		ata_set_mode(ap);
+	}
+	if (!ata_dev_present(dev))
+		return 0;
+	if (dev->class == ATA_DEV_ATA)
+		ata_start_drive(ap, dev);
+
+	return 0;
+}
+
+/**
+ *	ata_device_suspend - prepare a device for suspend
+ *
+ *	Flush the cache on the drive, if appropriate, then issue a
+ *	standbynow command.
+ *
+ */
+int ata_device_suspend(struct ata_port *ap, struct ata_device *dev)
+{
+	if (!ata_dev_present(dev))
+		return 0;
+	if (dev->class == ATA_DEV_ATA)
+		ata_flush_cache(ap, dev);
+
+	ata_standby_drive(ap, dev);
+	ap->flags |= ATA_FLAG_SUSPENDED;
+	return 0;
+}
+
 int ata_port_start (struct ata_port *ap)
 {
 	struct device *dev = ap->host_set->dev;
@@ -4921,6 +5011,23 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
 
 	return (tmp == bits->val) ? 1 : 0;
 }
+
+int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+	return 0;
+}
+
+int ata_pci_device_resume(struct pci_dev *pdev)
+{
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_enable_device(pdev);
+	pci_set_master(pdev);
+	return 0;
+}
 #endif /* CONFIG_PCI */
 
 
@@ -5024,4 +5131,11 @@ EXPORT_SYMBOL_GPL(ata_pci_host_stop);
 EXPORT_SYMBOL_GPL(ata_pci_init_native_mode);
 EXPORT_SYMBOL_GPL(ata_pci_init_one);
 EXPORT_SYMBOL_GPL(ata_pci_remove_one);
+EXPORT_SYMBOL_GPL(ata_pci_device_suspend);
+EXPORT_SYMBOL_GPL(ata_pci_device_resume);
 #endif /* CONFIG_PCI */
+
+EXPORT_SYMBOL_GPL(ata_device_suspend);
+EXPORT_SYMBOL_GPL(ata_device_resume);
+EXPORT_SYMBOL_GPL(ata_scsi_device_suspend);
+EXPORT_SYMBOL_GPL(ata_scsi_device_resume);
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 2c644cbb6e9..cfbceb50471 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -396,6 +396,22 @@ void ata_dump_status(unsigned id, struct ata_taskfile *tf)
 	}
 }
 
+int ata_scsi_device_resume(struct scsi_device *sdev)
+{
+	struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0];
+	struct ata_device *dev = &ap->device[sdev->id];
+
+	return ata_device_resume(ap, dev);
+}
+
+int ata_scsi_device_suspend(struct scsi_device *sdev)
+{
+	struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0];
+	struct ata_device *dev = &ap->device[sdev->id];
+
+	return ata_device_suspend(ap, dev);
+}
+
 /**
  *	ata_to_sense_error - convert ATA error to SCSI error
  *	@id: ATA device number
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 15842b1f0f4..ea7f3a43357 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -263,9 +263,40 @@ static int scsi_bus_match(struct device *dev, struct device_driver *gendrv)
 	return (sdp->inq_periph_qual == SCSI_INQ_PQ_CON)? 1: 0;
 }
 
+static int scsi_bus_suspend(struct device * dev, pm_message_t state)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct scsi_host_template *sht = sdev->host->hostt;
+	int err;
+
+	err = scsi_device_quiesce(sdev);
+	if (err)
+		return err;
+
+	if (sht->suspend)
+		err = sht->suspend(sdev);
+
+	return err;
+}
+
+static int scsi_bus_resume(struct device * dev)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct scsi_host_template *sht = sdev->host->hostt;
+	int err = 0;
+
+	if (sht->resume)
+		err = sht->resume(sdev);
+
+	scsi_device_resume(sdev);
+	return err;
+}
+
 struct bus_type scsi_bus_type = {
         .name		= "scsi",
         .match		= scsi_bus_match,
+	.suspend	= scsi_bus_suspend,
+	.resume		= scsi_bus_resume,
 };
 
 int scsi_sysfs_register(void)
diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index b8727d9bf69..1288d6203e9 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -37,11 +37,11 @@
  * by the bootloader or in the platform init code.
  *
  * The idx field must be equal to the PSC index ( e.g. 0 for PSC1, 1 for PSC2,
- * and so on). So the PSC1 is mapped to /dev/ttyS0, PSC2 to /dev/ttyS1 and so
- * on. But be warned, it's an ABSOLUTE REQUIREMENT ! This is needed mainly for
- * the console code : without this 1:1 mapping, at early boot time, when we are
- * parsing the kernel args console=ttyS?, we wouldn't know wich PSC it will be
- * mapped to.
+ * and so on). So the PSC1 is mapped to /dev/ttyPSC0, PSC2 to /dev/ttyPSC1 and
+ * so on. But be warned, it's an ABSOLUTE REQUIREMENT ! This is needed mainly
+ * fpr the console code : without this 1:1 mapping, at early boot time, when we
+ * are parsing the kernel args console=ttyPSC?, we wouldn't know wich PSC it
+ * will be mapped to.
  */
 
 #include <linux/config.h>
@@ -65,6 +65,10 @@
 #include <linux/serial_core.h>
 
 
+/* We've been assigned a range on the "Low-density serial ports" major */
+#define SERIAL_PSC_MAJOR	204
+#define SERIAL_PSC_MINOR	148
+
 
 #define ISR_PASS_LIMIT 256	/* Max number of iteration in the interrupt */
 
@@ -668,15 +672,15 @@ mpc52xx_console_setup(struct console *co, char *options)
 }
 
 
-extern struct uart_driver mpc52xx_uart_driver;
+static struct uart_driver mpc52xx_uart_driver;
 
 static struct console mpc52xx_console = {
-	.name	= "ttyS",
+	.name	= "ttyPSC",
 	.write	= mpc52xx_console_write,
 	.device	= uart_console_device,
 	.setup	= mpc52xx_console_setup,
 	.flags	= CON_PRINTBUFFER,
-	.index	= -1,	/* Specified on the cmdline (e.g. console=ttyS0 ) */
+	.index	= -1,	/* Specified on the cmdline (e.g. console=ttyPSC0 ) */
 	.data	= &mpc52xx_uart_driver,
 };
 
@@ -703,10 +707,10 @@ console_initcall(mpc52xx_console_init);
 static struct uart_driver mpc52xx_uart_driver = {
 	.owner		= THIS_MODULE,
 	.driver_name	= "mpc52xx_psc_uart",
-	.dev_name	= "ttyS",
-	.devfs_name	= "ttyS",
-	.major		= TTY_MAJOR,
-	.minor		= 64,
+	.dev_name	= "ttyPSC",
+	.devfs_name	= "ttyPSC",
+	.major		= SERIAL_PSC_MAJOR,
+	.minor		= SERIAL_PSC_MINOR,
 	.nr		= MPC52xx_PSC_MAXNUM,
 	.cons		= MPC52xx_PSC_CONSOLE,
 };