54 files changed, 6992 insertions, 647 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 503d8256944..6d9d7fab77f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -15,6 +15,8 @@ obj-$(CONFIG_ACPI)		+= acpi/
 obj-$(CONFIG_PNP)		+= pnp/
 obj-$(CONFIG_ARM_AMBA)		+= amba/
 
+obj-$(CONFIG_XEN)		+= xen/
+
 # char/ comes before serial/ etc so that the VT console is the boot-time
 # default.
 obj-y				+= char/
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 88a6fc7fd27..58f1338981b 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -40,6 +40,7 @@
 #include <linux/jiffies.h>
 #include <linux/kmod.h>
 #include <linux/seq_file.h>
+#include <linux/reboot.h>
 #include <asm/uaccess.h>
 
 #include <acpi/acpi_bus.h>
@@ -59,7 +60,6 @@
 #define ACPI_THERMAL_NOTIFY_CRITICAL	0xF0
 #define ACPI_THERMAL_NOTIFY_HOT		0xF1
 #define ACPI_THERMAL_MODE_ACTIVE	0x00
-#define ACPI_THERMAL_PATH_POWEROFF	"/sbin/poweroff"
 
 #define ACPI_THERMAL_MAX_ACTIVE	10
 #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
@@ -419,26 +419,6 @@ static int acpi_thermal_get_devices(struct acpi_thermal *tz)
 	return 0;
 }
 
-static int acpi_thermal_call_usermode(char *path)
-{
-	char *argv[2] = { NULL, NULL };
-	char *envp[3] = { NULL, NULL, NULL };
-
-
-	if (!path)
-		return -EINVAL;
-
-	argv[0] = path;
-
-	/* minimal command environment */
-	envp[0] = "HOME=/";
-	envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-
-	call_usermodehelper(argv[0], argv, envp, 0);
-
-	return 0;
-}
-
 static int acpi_thermal_critical(struct acpi_thermal *tz)
 {
 	if (!tz || !tz->trips.critical.flags.valid)
@@ -456,7 +436,7 @@ static int acpi_thermal_critical(struct acpi_thermal *tz)
 	acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL,
 				tz->trips.critical.flags.enabled);
 
-	acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF);
+	orderly_poweroff(true);
 
 	return 0;
 }
diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig
index bb4ae628149..bed9f58c2d5 100644
--- a/drivers/atm/Kconfig
+++ b/drivers/atm/Kconfig
@@ -172,7 +172,7 @@ config ATM_ZATM_DEBUG
 
 config ATM_NICSTAR
 	tristate "IDT 77201 (NICStAR) (ForeRunnerLE)"
-	depends on PCI && !64BIT
+	depends on PCI && !64BIT && VIRT_TO_BUS
 	help
 	  The NICStAR chipset family is used in a large number of ATM NICs for
 	  25 and for 155 Mbps, including IDT cards and the Fore ForeRunnerLE
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 77637e780d4..41b2204ebc6 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -1738,7 +1738,8 @@ static int __devinit eni_do_init(struct atm_dev *dev)
 			printk(KERN_ERR KERN_ERR DEV_LABEL "(itf %d): bad "
 			    "magic - expected 0x%x, got 0x%x\n",dev->number,
 			    ENI155_MAGIC,(unsigned) readl(&eprom->magic));
-			return -EINVAL;
+			error = -EINVAL;
+			goto unmap;
 		}
 	}
 	eni_dev->phy = base+PHY_BASE;
@@ -1765,17 +1766,27 @@ static int __devinit eni_do_init(struct atm_dev *dev)
 		printk(")\n");
 		printk(KERN_ERR DEV_LABEL "(itf %d): ERROR - wrong id 0x%x\n",
 		    dev->number,(unsigned) eni_in(MID_RES_ID_MCON));
-		return -EINVAL;
+		error = -EINVAL;
+		goto unmap;
 	}
 	error = eni_dev->asic ? get_esi_asic(dev) : get_esi_fpga(dev,base);
-	if (error) return error;
+	if (error)
+		goto unmap;
 	for (i = 0; i < ESI_LEN; i++)
 		printk("%s%02X",i ? "-" : "",dev->esi[i]);
 	printk(")\n");
 	printk(KERN_NOTICE DEV_LABEL "(itf %d): %s,%s\n",dev->number,
 	    eni_in(MID_RES_ID_MCON) & 0x200 ? "ASIC" : "FPGA",
 	    media_name[eni_in(MID_RES_ID_MCON) & DAUGTHER_ID]);
-	return suni_init(dev);
+
+	error = suni_init(dev);
+	if (error)
+		goto unmap;
+out:
+	return error;
+unmap:
+	iounmap(base);
+	goto out;
 }
 
 
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 38b688f9f6a..737cea49f87 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1710,7 +1710,7 @@ static int __devinit fs_init (struct fs_dev *dev)
 		/* This bit is documented as "RESERVED" */
 		if (isr & ISR_INIT_ERR) {
 			printk (KERN_ERR "Error initializing the FS... \n");
-			return 1;
+			goto unmap;
 		}
 		if (isr & ISR_INIT) {
 			fs_dprintk (FS_DEBUG_INIT, "Ha! Initialized OK!\n");
@@ -1723,7 +1723,7 @@ static int __devinit fs_init (struct fs_dev *dev)
 
 	if (!to) {
 		printk (KERN_ERR "timeout initializing the FS... \n");
-		return 1;
+		goto unmap;
 	}
 
 	/* XXX fix for fs155 */
@@ -1803,7 +1803,7 @@ static int __devinit fs_init (struct fs_dev *dev)
 	if (!dev->atm_vccs) {
 		printk (KERN_WARNING "Couldn't allocate memory for VCC buffers. Woops!\n");
 		/* XXX Clean up..... */
-		return 1;
+		goto unmap;
 	}
 
 	dev->tx_inuse = kzalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL);
@@ -1813,7 +1813,7 @@ static int __devinit fs_init (struct fs_dev *dev)
 	if (!dev->tx_inuse) {
 		printk (KERN_WARNING "Couldn't allocate memory for tx_inuse bits!\n");
 		/* XXX Clean up..... */
-		return 1;
+		goto unmap;
 	}
 	/* -- RAS1 : FS155 and 50 differ. Default (0) should be OK for both */
 	/* -- RAS2 : FS50 only: Default is OK. */
@@ -1840,7 +1840,7 @@ static int __devinit fs_init (struct fs_dev *dev)
 	if (request_irq (dev->irq, fs_irq, IRQF_SHARED, "firestream", dev)) {
 		printk (KERN_WARNING "couldn't get irq %d for firestream.\n", pci_dev->irq);
 		/* XXX undo all previous stuff... */
-		return 1;
+		goto unmap;
 	}
 	fs_dprintk (FS_DEBUG_INIT, "Grabbed irq %d for dev at %p.\n", dev->irq, dev);
   
@@ -1890,6 +1890,9 @@ static int __devinit fs_init (struct fs_dev *dev)
   
 	func_exit ();
 	return 0;
+unmap:
+	iounmap(dev->base);
+	return 1;
 }
 
 static int __devinit firestream_init_one (struct pci_dev *pci_dev,
@@ -2012,6 +2015,7 @@ static void __devexit firestream_remove_one (struct pci_dev *pdev)
 		for (i=0;i < FS_NR_RX_QUEUES;i++)
 			free_queue (dev, &dev->rx_rq[i]);
 
+		iounmap(dev->base);
 		fs_dprintk (FS_DEBUG_ALLOC, "Free fs-dev: %p\n", dev);
 		nxtdev = dev->next;
 		kfree (dev);
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 8f995ce8d73..f8b1700f4c1 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -65,7 +65,7 @@ static char const rcsid[] =
 static unsigned int vpibits = 1;
 
 
-#define CONFIG_ATM_IDT77252_SEND_IDLE 1
+#define ATM_IDT77252_SEND_IDLE 1
 
 
 /*
@@ -3404,7 +3404,7 @@ init_card(struct atm_dev *dev)
 	conf =	SAR_CFG_TX_FIFO_SIZE_9 |	/* Use maximum fifo size */
 		SAR_CFG_RXSTQ_SIZE_8k |		/* Receive Status Queue is 8k */
 		SAR_CFG_IDLE_CLP |		/* Set CLP on idle cells */
-#ifndef CONFIG_ATM_IDT77252_SEND_IDLE
+#ifndef ATM_IDT77252_SEND_IDLE
 		SAR_CFG_NO_IDLE |		/* Do not send idle cells */
 #endif
 		0;
@@ -3541,7 +3541,7 @@ init_card(struct atm_dev *dev)
 	printk("%s: Linkrate on ATM line : %u bit/s, %u cell/s.\n",
 	       card->name, linkrate, card->link_pcr);
 
-#ifdef CONFIG_ATM_IDT77252_SEND_IDLE
+#ifdef ATM_IDT77252_SEND_IDLE
 	card->utopia_pcr = card->link_pcr;
 #else
 	card->utopia_pcr = (160000000 / 8 / 54);
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 0e2c1ae650e..55fd1b4543f 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -552,8 +552,8 @@ static inline void sram_write(const struct lanai_dev *lanai,
 	writel(val, sram_addr(lanai, offset));
 }
 
-static int __init sram_test_word(
-	const struct lanai_dev *lanai, int offset, u32 pattern)
+static int __devinit sram_test_word(const struct lanai_dev *lanai,
+				    int offset, u32 pattern)
 {
 	u32 readback;
 	sram_write(lanai, pattern, offset);
diff --git a/drivers/atm/nicstarmac.c b/drivers/atm/nicstarmac.c
index 480947f4e01..842e26c4555 100644
--- a/drivers/atm/nicstarmac.c
+++ b/drivers/atm/nicstarmac.c
@@ -134,7 +134,7 @@ nicstar_read_eprom_status( virt_addr_t base )
    /* Send read instruction */
    val = NICSTAR_REG_READ( base, NICSTAR_REG_GENERAL_PURPOSE ) & 0xFFFFFFF0;
 
-   for (i=0; i<sizeof rdsrtab/sizeof rdsrtab[0]; i++)
+   for (i=0; i<ARRAY_SIZE(rdsrtab); i++)
    {
 	NICSTAR_REG_WRITE( base, NICSTAR_REG_GENERAL_PURPOSE,
 		(val | rdsrtab[i]) );
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 8f65b88cf71..a4a31199240 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -427,4 +427,13 @@ config XILINX_SYSACE
 	help
 	  Include support for the Xilinx SystemACE CompactFlash interface
 
+config XEN_BLKDEV_FRONTEND
+	tristate "Xen virtual block device support"
+	depends on XEN
+	default y
+	help
+	  This driver implements the front-end of the Xen virtual
+	  block device driver.  It communicates with a back-end driver
+	  in another domain which drives the actual block device.
+
 endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 9ee08ab4ffa..3e31532df0e 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_VIODASD)		+= viodasd.o
 obj-$(CONFIG_BLK_DEV_SX8)	+= sx8.o
 obj-$(CONFIG_BLK_DEV_UB)	+= ub.o
 
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= xen-blkfront.o
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 0f5e3caf85d..2288b55d916 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -45,8 +45,6 @@ struct vdc_req_entry {
 struct vdc_port {
 	struct vio_driver_state	vio;
 
-	struct vdc		*vp;
-
 	struct gendisk		*disk;
 
 	struct vdc_completion	*cmp;
@@ -72,8 +70,6 @@ struct vdc_port {
 
 	struct vio_disk_geom	geom;
 	struct vio_disk_vtoc	label;
-
-	struct list_head	list;
 };
 
 static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
@@ -81,15 +77,6 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
 	return container_of(vio, struct vdc_port, vio);
 }
 
-struct vdc {
-	/* Protects prot_list.  */
-	spinlock_t		lock;
-
-	struct vio_dev		*dev;
-
-	struct list_head	port_list;
-};
-
 /* Ordered from largest major to lowest */
 static struct vio_version vdc_versions[] = {
 	{ .major = 1, .minor = 0 },
@@ -747,21 +734,23 @@ static struct vio_driver_ops vdc_vio_ops = {
 	.handshake_complete	= vdc_handshake_complete,
 };
 
+static void print_version(void)
+{
+	static int version_printed;
+
+	if (version_printed++ == 0)
+		printk(KERN_INFO "%s", version);
+}
+
 static int __devinit vdc_port_probe(struct vio_dev *vdev,
 				    const struct vio_device_id *id)
 {
 	struct mdesc_handle *hp;
 	struct vdc_port *port;
-	unsigned long flags;
-	struct vdc *vp;
 	const u64 *port_id;
 	int err;
 
-	vp = dev_get_drvdata(vdev->dev.parent);
-	if (!vp) {
-		printk(KERN_ERR PFX "Cannot find port parent vdc.\n");
-		return -ENODEV;
-	}
+	print_version();
 
 	hp = mdesc_grab();
 
@@ -783,7 +772,6 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev,
 		goto err_out_release_mdesc;
 	}
 
-	port->vp = vp;
 	port->dev_no = *port_id;
 
 	if (port->dev_no >= 26)
@@ -818,12 +806,6 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev,
 	if (err)
 		goto err_out_free_tx_ring;
 
-	INIT_LIST_HEAD(&port->list);
-
-	spin_lock_irqsave(&vp->lock, flags);
-	list_add(&port->list, &vp->port_list);
-	spin_unlock_irqrestore(&vp->lock, flags);
-
 	dev_set_drvdata(&vdev->dev, port);
 
 	mdesc_release(hp);
@@ -879,58 +861,6 @@ static struct vio_driver vdc_port_driver = {
 	}
 };
 
-static int __devinit vdc_probe(struct vio_dev *vdev,
-			       const struct vio_device_id *id)
-{
-	static int vdc_version_printed;
-	struct vdc *vp;
-
-	if (vdc_version_printed++ == 0)
-		printk(KERN_INFO "%s", version);
-
-	vp = kzalloc(sizeof(struct vdc), GFP_KERNEL);
-	if (!vp)
-		return -ENOMEM;
-
-	spin_lock_init(&vp->lock);
-	vp->dev = vdev;
-	INIT_LIST_HEAD(&vp->port_list);
-
-	dev_set_drvdata(&vdev->dev, vp);
-
-	return 0;
-}
-
-static int vdc_remove(struct vio_dev *vdev)
-{
-
-	struct vdc *vp = dev_get_drvdata(&vdev->dev);
-
-	if (vp) {
-		kfree(vp);
-		dev_set_drvdata(&vdev->dev, NULL);
-	}
-	return 0;
-}
-
-static struct vio_device_id vdc_match[] = {
-	{
-		.type = "block",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(vio, vdc_match);
-
-static struct vio_driver vdc_driver = {
-	.id_table	= vdc_match,
-	.probe		= vdc_probe,
-	.remove		= vdc_remove,
-	.driver		= {
-		.name	= "vdc",
-		.owner	= THIS_MODULE,
-	}
-};
-
 static int __init vdc_init(void)
 {
 	int err;
@@ -940,19 +870,13 @@ static int __init vdc_init(void)
 		goto out_err;
 
 	vdc_major = err;
-	err = vio_register_driver(&vdc_driver);
-	if (err)
-		goto out_unregister_blkdev;
 
 	err = vio_register_driver(&vdc_port_driver);
 	if (err)
-		goto out_unregister_vdc;
+		goto out_unregister_blkdev;
 
 	return 0;
 
-out_unregister_vdc:
-	vio_unregister_driver(&vdc_driver);
-
 out_unregister_blkdev:
 	unregister_blkdev(vdc_major, VDCBLK_NAME);
 	vdc_major = 0;
@@ -964,7 +888,6 @@ out_err:
 static void __exit vdc_exit(void)
 {
 	vio_unregister_driver(&vdc_port_driver);
-	vio_unregister_driver(&vdc_driver);
 	unregister_blkdev(vdc_major, VDCBLK_NAME);
 }
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
new file mode 100644
index 00000000000..6746c29181f
--- /dev/null
+++ b/drivers/block/xen-blkfront.c
@@ -0,0 +1,988 @@
+/*
+ * blkfront.c
+ *
+ * XenLinux virtual block device driver.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ * Copyright (c) 2004, Andrew Warfield
+ * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2005, XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/blkdev.h>
+#include <linux/module.h>
+
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/page.h>
+
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/blkif.h>
+
+#include <asm/xen/hypervisor.h>
+
+enum blkif_state {
+	BLKIF_STATE_DISCONNECTED,
+	BLKIF_STATE_CONNECTED,
+	BLKIF_STATE_SUSPENDED,
+};
+
+struct blk_shadow {
+	struct blkif_request req;
+	unsigned long request;
+	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+static struct block_device_operations xlvbd_block_fops;
+
+#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.  They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct blkfront_info
+{
+	struct xenbus_device *xbdev;
+	dev_t dev;
+	struct gendisk *gd;
+	int vdevice;
+	blkif_vdev_t handle;
+	enum blkif_state connected;
+	int ring_ref;
+	struct blkif_front_ring ring;
+	unsigned int evtchn, irq;
+	struct request_queue *rq;
+	struct work_struct work;
+	struct gnttab_free_callback callback;
+	struct blk_shadow shadow[BLK_RING_SIZE];
+	unsigned long shadow_free;
+	int feature_barrier;
+
+	/**
+	 * The number of people holding this device open.  We won't allow a
+	 * hot-unplug unless this is 0.
+	 */
+	int users;
+};
+
+static DEFINE_SPINLOCK(blkif_io_lock);
+
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
+#define GRANT_INVALID_REF	0
+
+#define PARTS_PER_DISK		16
+
+#define BLKIF_MAJOR(dev) ((dev)>>8)
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
+
+#define DEV_NAME	"xvd"	/* name in /dev */
+
+/* Information about our VBDs. */
+#define MAX_VBDS 64
+static LIST_HEAD(vbds_list);
+
+static int get_id_from_freelist(struct blkfront_info *info)
+{
+	unsigned long free = info->shadow_free;
+	BUG_ON(free > BLK_RING_SIZE);
+	info->shadow_free = info->shadow[free].req.id;
+	info->shadow[free].req.id = 0x0fffffee; /* debug */
+	return free;
+}
+
+static void add_id_to_freelist(struct blkfront_info *info,
+			       unsigned long id)
+{
+	info->shadow[id].req.id  = info->shadow_free;
+	info->shadow[id].request = 0;
+	info->shadow_free = id;
+}
+
+static void blkif_restart_queue_callback(void *arg)
+{
+	struct blkfront_info *info = (struct blkfront_info *)arg;
+	schedule_work(&info->work);
+}
+
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ *   virtual address in the guest os.
+ */
+static int blkif_queue_request(struct request *req)
+{
+	struct blkfront_info *info = req->rq_disk->private_data;
+	unsigned long buffer_mfn;
+	struct blkif_request *ring_req;
+	struct bio *bio;
+	struct bio_vec *bvec;
+	int idx;
+	unsigned long id;
+	unsigned int fsect, lsect;
+	int ref;
+	grant_ref_t gref_head;
+
+	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+		return 1;
+
+	if (gnttab_alloc_grant_references(
+		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+		gnttab_request_free_callback(
+			&info->callback,
+			blkif_restart_queue_callback,
+			info,
+			BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		return 1;
+	}
+
+	/* Fill out a communications ring structure. */
+	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+	id = get_id_from_freelist(info);
+	info->shadow[id].request = (unsigned long)req;
+
+	ring_req->id = id;
+	ring_req->sector_number = (blkif_sector_t)req->sector;
+	ring_req->handle = info->handle;
+
+	ring_req->operation = rq_data_dir(req) ?
+		BLKIF_OP_WRITE : BLKIF_OP_READ;
+	if (blk_barrier_rq(req))
+		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+
+	ring_req->nr_segments = 0;
+	rq_for_each_bio (bio, req) {
+		bio_for_each_segment (bvec, bio, idx) {
+			BUG_ON(ring_req->nr_segments
+			       == BLKIF_MAX_SEGMENTS_PER_REQUEST);
+			buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
+			fsect = bvec->bv_offset >> 9;
+			lsect = fsect + (bvec->bv_len >> 9) - 1;
+			/* install a grant reference. */
+			ref = gnttab_claim_grant_reference(&gref_head);
+			BUG_ON(ref == -ENOSPC);
+
+			gnttab_grant_foreign_access_ref(
+				ref,
+				info->xbdev->otherend_id,
+				buffer_mfn,
+				rq_data_dir(req) );
+
+			info->shadow[id].frame[ring_req->nr_segments] =
+				mfn_to_pfn(buffer_mfn);
+
+			ring_req->seg[ring_req->nr_segments] =
+				(struct blkif_request_segment) {
+					.gref       = ref,
+					.first_sect = fsect,
+					.last_sect  = lsect };
+
+			ring_req->nr_segments++;
+		}
+	}
+
+	info->ring.req_prod_pvt++;
+
+	/* Keep a private copy so we can reissue requests when recovering. */
+	info->shadow[id].req = *ring_req;
+
+	gnttab_free_grant_references(gref_head);
+
+	return 0;
+}
+
+
+static inline void flush_requests(struct blkfront_info *info)
+{
+	int notify;
+
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+
+	if (notify)
+		notify_remote_via_irq(info->irq);
+}
+
+/*
+ * do_blkif_request
+ *  read a block; request is in a request queue
+ */
+static void do_blkif_request(request_queue_t *rq)
+{
+	struct blkfront_info *info = NULL;
+	struct request *req;
+	int queued;
+
+	pr_debug("Entered do_blkif_request\n");
+
+	queued = 0;
+
+	while ((req = elv_next_request(rq)) != NULL) {
+		info = req->rq_disk->private_data;
+		if (!blk_fs_request(req)) {
+			end_request(req, 0);
+			continue;
+		}
+
+		if (RING_FULL(&info->ring))
+			goto wait;
+
+		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
+			 "(%u/%li) buffer:%p [%s]\n",
+			 req, req->cmd, (unsigned long)req->sector,
+			 req->current_nr_sectors,
+			 req->nr_sectors, req->buffer,
+			 rq_data_dir(req) ? "write" : "read");
+
+
+		blkdev_dequeue_request(req);
+		if (blkif_queue_request(req)) {
+			blk_requeue_request(rq, req);
+wait:
+			/* Avoid pointless unplugs. */
+			blk_stop_queue(rq);
+			break;
+		}
+
+		queued++;
+	}
+
+	if (queued != 0)
+		flush_requests(info);
+}
+
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+{
+	request_queue_t *rq;
+
+	rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+	if (rq == NULL)
+		return -1;
+
+	elevator_init(rq, "noop");
+
+	/* Hard sector size and max sectors impersonate the equiv. hardware. */
+	blk_queue_hardsect_size(rq, sector_size);
+	blk_queue_max_sectors(rq, 512);
+
+	/* Each segment in a request is up to an aligned page in size. */
+	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+	blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+	/* Ensure a merged request will fit in a single I/O ring slot. */
+	blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+	/* Make sure buffer addresses are sector-aligned. */
+	blk_queue_dma_alignment(rq, 511);
+
+	gd->queue = rq;
+
+	return 0;
+}
+
+
+static int xlvbd_barrier(struct blkfront_info *info)
+{
+	int err;
+
+	err = blk_queue_ordered(info->rq,
+				info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
+				NULL);
+
+	if (err)
+		return err;
+
+	printk(KERN_INFO "blkfront: %s: barriers %s\n",
+	       info->gd->disk_name,
+	       info->feature_barrier ? "enabled" : "disabled");
+	return 0;
+}
+
+
+static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
+			       int vdevice, u16 vdisk_info, u16 sector_size,
+			       struct blkfront_info *info)
+{
+	struct gendisk *gd;
+	int nr_minors = 1;
+	int err = -ENODEV;
+
+	BUG_ON(info->gd != NULL);
+	BUG_ON(info->rq != NULL);
+
+	if ((minor % PARTS_PER_DISK) == 0)
+		nr_minors = PARTS_PER_DISK;
+
+	gd = alloc_disk(nr_minors);
+	if (gd == NULL)
+		goto out;
+
+	if (nr_minors > 1)
+		sprintf(gd->disk_name, "%s%c", DEV_NAME,
+			'a' + minor / PARTS_PER_DISK);
+	else
+		sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
+			'a' + minor / PARTS_PER_DISK,
+			minor % PARTS_PER_DISK);
+
+	gd->major = XENVBD_MAJOR;
+	gd->first_minor = minor;
+	gd->fops = &xlvbd_block_fops;
+	gd->private_data = info;
+	gd->driverfs_dev = &(info->xbdev->dev);
+	set_capacity(gd, capacity);
+
+	if (xlvbd_init_blk_queue(gd, sector_size)) {
+		del_gendisk(gd);
+		goto out;
+	}
+
+	info->rq = gd->queue;
+	info->gd = gd;
+
+	if (info->feature_barrier)
+		xlvbd_barrier(info);
+
+	if (vdisk_info & VDISK_READONLY)
+		set_disk_ro(gd, 1);
+
+	if (vdisk_info & VDISK_REMOVABLE)
+		gd->flags |= GENHD_FL_REMOVABLE;
+
+	if (vdisk_info & VDISK_CDROM)
+		gd->flags |= GENHD_FL_CD;
+
+	return 0;
+
+ out:
+	return err;
+}
+
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+	if (!RING_FULL(&info->ring)) {
+		/* Re-enable calldowns. */
+		blk_start_queue(info->rq);
+		/* Kick things off immediately. */
+		do_blkif_request(info->rq);
+	}
+}
+
+static void blkif_restart_queue(struct work_struct *work)
+{
+	struct blkfront_info *info = container_of(work, struct blkfront_info, work);
+
+	spin_lock_irq(&blkif_io_lock);
+	if (info->connected == BLKIF_STATE_CONNECTED)
+		kick_pending_request_queues(info);
+	spin_unlock_irq(&blkif_io_lock);
+}
+
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+	/* Prevent new requests being issued until we fix things up. */
+	spin_lock_irq(&blkif_io_lock);
+	info->connected = suspend ?
+		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+	/* No more blkif_request(). */
+	if (info->rq)
+		blk_stop_queue(info->rq);
+	/* No more gnttab callback work. */
+	gnttab_cancel_free_callback(&info->callback);
+	spin_unlock_irq(&blkif_io_lock);
+
+	/* Flush gnttab callback work. Must be done with no locks held. */
+	flush_scheduled_work();
+
+	/* Free resources associated with old device channel. */
+	if (info->ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(info->ring_ref, 0,
+					  (unsigned long)info->ring.sring);
+		info->ring_ref = GRANT_INVALID_REF;
+		info->ring.sring = NULL;
+	}
+	if (info->irq)
+		unbind_from_irqhandler(info->irq, info);
+	info->evtchn = info->irq = 0;
+
+}
+
+static void blkif_completion(struct blk_shadow *s)
+{
+	int i;
+	for (i = 0; i < s->req.nr_segments; i++)
+		gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
+}
+
+static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+{
+	struct request *req;
+	struct blkif_response *bret;
+	RING_IDX i, rp;
+	unsigned long flags;
+	struct blkfront_info *info = (struct blkfront_info *)dev_id;
+	int uptodate;
+
+	spin_lock_irqsave(&blkif_io_lock, flags);
+
+	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+		spin_unlock_irqrestore(&blkif_io_lock, flags);
+		return IRQ_HANDLED;
+	}
+
+ again:
+	rp = info->ring.sring->rsp_prod;
+	rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+	for (i = info->ring.rsp_cons; i != rp; i++) {
+		unsigned long id;
+		int ret;
+
+		bret = RING_GET_RESPONSE(&info->ring, i);
+		id   = bret->id;
+		req  = (struct request *)info->shadow[id].request;
+
+		blkif_completion(&info->shadow[id]);
+
+		add_id_to_freelist(info, id);
+
+		uptodate = (bret->status == BLKIF_RSP_OKAY);
+		switch (bret->operation) {
+		case BLKIF_OP_WRITE_BARRIER:
+			if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
+				       info->gd->disk_name);
+				uptodate = -EOPNOTSUPP;
+				info->feature_barrier = 0;
+				xlvbd_barrier(info);
+			}
+			/* fall through */
+		case BLKIF_OP_READ:
+		case BLKIF_OP_WRITE:
+			if (unlikely(bret->status != BLKIF_RSP_OKAY))
+				dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
+					"request: %x\n", bret->status);
+
+			ret = end_that_request_first(req, uptodate,
+				req->hard_nr_sectors);
+			BUG_ON(ret);
+			end_that_request_last(req, uptodate);
+			break;
+		default:
+			BUG();
+		}
+	}
+
+	info->ring.rsp_cons = i;
+
+	if (i != info->ring.req_prod_pvt) {
+		int more_to_do;
+		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+		if (more_to_do)
+			goto again;
+	} else
+		info->ring.sring->rsp_event = i + 1;
+
+	kick_pending_request_queues(info);
+
+	spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+
+static int setup_blkring(struct xenbus_device *dev,
+			 struct blkfront_info *info)
+{
+	struct blkif_sring *sring;
+	int err;
+
+	info->ring_ref = GRANT_INVALID_REF;
+
+	sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL);
+	if (!sring) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+		return -ENOMEM;
+	}
+	SHARED_RING_INIT(sring);
+	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+	if (err < 0) {
+		free_page((unsigned long)sring);
+		info->ring.sring = NULL;
+		goto fail;
+	}
+	info->ring_ref = err;
+
+	err = xenbus_alloc_evtchn(dev, &info->evtchn);
+	if (err)
+		goto fail;
+
+	err = bind_evtchn_to_irqhandler(info->evtchn,
+					blkif_interrupt,
+					IRQF_SAMPLE_RANDOM, "blkif", info);
+	if (err <= 0) {
+		xenbus_dev_fatal(dev, err,
+				 "bind_evtchn_to_irqhandler failed");
+		goto fail;
+	}
+	info->irq = err;
+
+	return 0;
+fail:
+	blkif_free(info, 0);
+	return err;
+}
+
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+			   struct blkfront_info *info)
+{
+	const char *message = NULL;
+	struct xenbus_transaction xbt;
+	int err;
+
+	/* Create shared ring, alloc event channel. */
+	err = setup_blkring(dev, info);
+	if (err)
+		goto out;
+
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		goto destroy_blkring;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename,
+			    "ring-ref", "%u", info->ring_ref);
+	if (err) {
+		message = "writing ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename,
+			    "event-channel", "%u", info->evtchn);
+	if (err) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err) {
+		if (err == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto destroy_blkring;
+	}
+
+	xenbus_switch_state(dev, XenbusStateInitialised);
+
+	return 0;
+
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	if (message)
+		xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_blkring:
+	blkif_free(info, 0);
+ out:
+	return err;
+}
+
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffer for communication with the backend, and
+ * inform the backend of the appropriate details for those.  Switch to
+ * Initialised state.
+ */
+static int blkfront_probe(struct xenbus_device *dev,
+			  const struct xenbus_device_id *id)
+{
+	int err, vdevice, i;
+	struct blkfront_info *info;
+
+	/* FIXME: Use dynamic device id if this is not set. */
+	err = xenbus_scanf(XBT_NIL, dev->nodename,
+			   "virtual-device", "%i", &vdevice);
+	if (err != 1) {
+		xenbus_dev_fatal(dev, err, "reading virtual-device");
+		return err;
+	}
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+		return -ENOMEM;
+	}
+
+	info->xbdev = dev;
+	info->vdevice = vdevice;
+	info->connected = BLKIF_STATE_DISCONNECTED;
+	INIT_WORK(&info->work, blkif_restart_queue);
+
+	for (i = 0; i < BLK_RING_SIZE; i++)
+		info->shadow[i].req.id = i+1;
+	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+	/* Front end dir is a number, which is used as the id. */
+	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
+	dev->dev.driver_data = info;
+
+	err = talk_to_backend(dev, info);
+	if (err) {
+		kfree(info);
+		dev->dev.driver_data = NULL;
+		return err;
+	}
+
+	return 0;
+}
+
+
+static int blkif_recover(struct blkfront_info *info)
+{
+	int i;
+	struct blkif_request *req;
+	struct blk_shadow *copy;
+	int j;
+
+	/* Stage 1: Make a safe copy of the shadow state. */
+	copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
+	if (!copy)
+		return -ENOMEM;
+	memcpy(copy, info->shadow, sizeof(info->shadow));
+
+	/* Stage 2: Set up free list. */
+	memset(&info->shadow, 0, sizeof(info->shadow));
+	for (i = 0; i < BLK_RING_SIZE; i++)
+		info->shadow[i].req.id = i+1;
+	info->shadow_free = info->ring.req_prod_pvt;
+	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+	/* Stage 3: Find pending requests and requeue them. */
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		/* Not in use? */
+		if (copy[i].request == 0)
+			continue;
+
+		/* Grab a request slot and copy shadow state into it. */
+		req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+		*req = copy[i].req;
+
+		/* We get a new request id, and must reset the shadow state. */
+		req->id = get_id_from_freelist(info);
+		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+
+		/* Rewrite any grant references invalidated by susp/resume. */
+		for (j = 0; j < req->nr_segments; j++)
+			gnttab_grant_foreign_access_ref(
+				req->seg[j].gref,
+				info->xbdev->otherend_id,
+				pfn_to_mfn(info->shadow[req->id].frame[j]),
+				rq_data_dir(
+					(struct request *)
+					info->shadow[req->id].request));
+		info->shadow[req->id].req = *req;
+
+		info->ring.req_prod_pvt++;
+	}
+
+	kfree(copy);
+
+	xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+	spin_lock_irq(&blkif_io_lock);
+
+	/* Now safe for us to use the shared ring */
+	info->connected = BLKIF_STATE_CONNECTED;
+
+	/* Send off requeued requests */
+	flush_requests(info);
+
+	/* Kick any other new requests queued since we resumed */
+	kick_pending_request_queues(info);
+
+	spin_unlock_irq(&blkif_io_lock);
+
+	return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our blkif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int blkfront_resume(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev->dev.driver_data;
+	int err;
+
+	dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
+
+	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
+
+	err = talk_to_backend(dev, info);
+	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
+		err = blkif_recover(info);
+
+	return err;
+}
+
+
+/*
+ * Invoked when the backend is finally 'ready' (and has told produced
+ * the details about the physical device - #sectors, size, etc).
+ */
+static void blkfront_connect(struct blkfront_info *info)
+{
+	unsigned long long sectors;
+	unsigned long sector_size;
+	unsigned int binfo;
+	int err;
+
+	if ((info->connected == BLKIF_STATE_CONNECTED) ||
+	    (info->connected == BLKIF_STATE_SUSPENDED) )
+		return;
+
+	dev_dbg(&info->xbdev->dev, "%s:%s.\n",
+		__func__, info->xbdev->otherend);
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "sectors", "%llu", &sectors,
+			    "info", "%u", &binfo,
+			    "sector-size", "%lu", &sector_size,
+			    NULL);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err,
+				 "reading backend fields at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-barrier", "%lu", &info->feature_barrier,
+			    NULL);
+	if (err)
+		info->feature_barrier = 0;
+
+	err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
+				  sectors, info->vdevice,
+				  binfo, sector_size, info);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+
+	xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+	/* Kick pending requests. */
+	spin_lock_irq(&blkif_io_lock);
+	info->connected = BLKIF_STATE_CONNECTED;
+	kick_pending_request_queues(info);
+	spin_unlock_irq(&blkif_io_lock);
+
+	add_disk(info->gd);
+}
+
+/**
+ * Handle the change of state of the backend to Closing.  We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend.  Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+static void blkfront_closing(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev->dev.driver_data;
+	unsigned long flags;
+
+	dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
+
+	if (info->rq == NULL)
+		goto out;
+
+	spin_lock_irqsave(&blkif_io_lock, flags);
+
+	del_gendisk(info->gd);
+
+	/* No more blkif_request(). */
+	blk_stop_queue(info->rq);
+
+	/* No more gnttab callback work. */
+	gnttab_cancel_free_callback(&info->callback);
+	spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+	/* Flush gnttab callback work. Must be done with no locks held. */
+	flush_scheduled_work();
+
+	blk_cleanup_queue(info->rq);
+	info->rq = NULL;
+
+ out:
+	xenbus_frontend_closed(dev);
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+			    enum xenbus_state backend_state)
+{
+	struct blkfront_info *info = dev->dev.driver_data;
+	struct block_device *bd;
+
+	dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
+
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+	case XenbusStateUnknown:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateConnected:
+		blkfront_connect(info);
+		break;
+
+	case XenbusStateClosing:
+		bd = bdget(info->dev);
+		if (bd == NULL)
+			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
+
+		mutex_lock(&bd->bd_mutex);
+		if (info->users > 0)
+			xenbus_dev_error(dev, -EBUSY,
+					 "Device in use; refusing to close");
+		else
+			blkfront_closing(dev);
+		mutex_unlock(&bd->bd_mutex);
+		bdput(bd);
+		break;
+	}
+}
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+	struct blkfront_info *info = dev->dev.driver_data;
+
+	dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
+
+	blkif_free(info, 0);
+
+	kfree(info);
+
+	return 0;
+}
+
+static int blkif_open(struct inode *inode, struct file *filep)
+{
+	struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+	info->users++;
+	return 0;
+}
+
+static int blkif_release(struct inode *inode, struct file *filep)
+{
+	struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+	info->users--;
+	if (info->users == 0) {
+		/* Check whether we have been instructed to close.  We will
+		   have ignored this request initially, as the device was
+		   still mounted. */
+		struct xenbus_device *dev = info->xbdev;
+		enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
+
+		if (state == XenbusStateClosing)
+			blkfront_closing(dev);
+	}
+	return 0;
+}
+
+static struct block_device_operations xlvbd_block_fops =
+{
+	.owner = THIS_MODULE,
+	.open = blkif_open,
+	.release = blkif_release,
+};
+
+
+static struct xenbus_device_id blkfront_ids[] = {
+	{ "vbd" },
+	{ "" }
+};
+
+static struct xenbus_driver blkfront = {
+	.name = "vbd",
+	.owner = THIS_MODULE,
+	.ids = blkfront_ids,
+	.probe = blkfront_probe,
+	.remove = blkfront_remove,
+	.resume = blkfront_resume,
+	.otherend_changed = backend_changed,
+};
+
+static int __init xlblk_init(void)
+{
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
+		printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
+		       XENVBD_MAJOR, DEV_NAME);
+		return -ENODEV;
+	}
+
+	return xenbus_register_frontend(&blkfront);
+}
+module_init(xlblk_init);
+
+
+static void xlblk_exit(void)
+{
+	return xenbus_unregister_driver(&blkfront);
+}
+module_exit(xlblk_exit);
+
+MODULE_DESCRIPTION("Xen virtual block device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 97bd71bc3ae..9e8f21410d2 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -604,6 +604,14 @@ config HVC_BEAT
 	help
 	  Toshiba's Cell Reference Set Beat Console device driver
 
+config HVC_XEN
+	bool "Xen Hypervisor Console support"
+	depends on XEN
+	select HVC_DRIVER
+	default y
+	help
+	  Xen virtual console device driver
+
 config HVCS
 	tristate "IBM Hypervisor Virtual Console Server support"
 	depends on PPC_PSERIES
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index f2996a95eb0..8852b8d643c 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES)	+= hvc_iseries.o
 obj-$(CONFIG_HVC_RTAS)		+= hvc_rtas.o
 obj-$(CONFIG_HVC_BEAT)		+= hvc_beat.o
 obj-$(CONFIG_HVC_DRIVER)	+= hvc_console.o
+obj-$(CONFIG_HVC_XEN)		+= hvc_xen.o
 obj-$(CONFIG_RAW_DRIVER)	+= raw.o
 obj-$(CONFIG_SGI_SNSC)		+= snsc.o snsc_event.o
 obj-$(CONFIG_MSPEC)		+= mspec.o
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
new file mode 100644
index 00000000000..dd68f8541c2
--- /dev/null
+++ b/drivers/char/hvc_xen.c
@@ -0,0 +1,159 @@
+/*
+ * xen console driver interface to hvc_console.c
+ *
+ * (c) 2007 Gerd Hoffmann <kraxel@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include <asm/xen/hypervisor.h>
+#include <xen/page.h>
+#include <xen/events.h>
+#include <xen/interface/io/console.h>
+#include <xen/hvc-console.h>
+
+#include "hvc_console.h"
+
+#define HVC_COOKIE   0x58656e /* "Xen" in hex */
+
+static struct hvc_struct *hvc;
+static int xencons_irq;
+
+/* ------------------------------------------------------------------ */
+
+static inline struct xencons_interface *xencons_interface(void)
+{
+	return mfn_to_virt(xen_start_info->console.domU.mfn);
+}
+
+static inline void notify_daemon(void)
+{
+	/* Use evtchn: this is called early, before irq is set up. */
+	notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
+}
+
+static int write_console(uint32_t vtermno, const char *data, int len)
+{
+	struct xencons_interface *intf = xencons_interface();
+	XENCONS_RING_IDX cons, prod;
+	int sent = 0;
+
+	cons = intf->out_cons;
+	prod = intf->out_prod;
+	mb();			/* update queue values before going on */
+	BUG_ON((prod - cons) > sizeof(intf->out));
+
+	while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
+		intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
+
+	wmb();			/* write ring before updating pointer */
+	intf->out_prod = prod;
+
+	notify_daemon();
+	return sent;
+}
+
+static int read_console(uint32_t vtermno, char *buf, int len)
+{
+	struct xencons_interface *intf = xencons_interface();
+	XENCONS_RING_IDX cons, prod;
+	int recv = 0;
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+	mb();			/* get pointers before reading ring */
+	BUG_ON((prod - cons) > sizeof(intf->in));
+
+	while (cons != prod && recv < len)
+		buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)];
+
+	mb();			/* read ring before consuming */
+	intf->in_cons = cons;
+
+	notify_daemon();
+	return recv;
+}
+
+static struct hv_ops hvc_ops = {
+	.get_chars = read_console,
+	.put_chars = write_console,
+};
+
+static int __init xen_init(void)
+{
+	struct hvc_struct *hp;
+
+	if (!is_running_on_xen())
+		return 0;
+
+	xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
+	if (xencons_irq < 0)
+		xencons_irq = 0 /* NO_IRQ */;
+	hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
+	if (IS_ERR(hp))
+		return PTR_ERR(hp);
+
+	hvc = hp;
+	return 0;
+}
+
+static void __exit xen_fini(void)
+{
+	if (hvc)
+		hvc_remove(hvc);
+}
+
+static int xen_cons_init(void)
+{
+	if (!is_running_on_xen())
+		return 0;
+
+	hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
+	return 0;
+}
+
+module_init(xen_init);
+module_exit(xen_fini);
+console_initcall(xen_cons_init);
+
+static void xenboot_write_console(struct console *console, const char *string,
+				  unsigned len)
+{
+	unsigned int linelen, off = 0;
+	const char *pos;
+
+	while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
+		linelen = pos-string+off;
+		if (off + linelen > len)
+			break;
+		write_console(0, string+off, linelen);
+		write_console(0, "\r\n", 2);
+		off += linelen + 1;
+	}
+	if (off < len)
+		write_console(0, string+off, len-off);
+}
+
+struct console xenboot_console = {
+	.name		= "xenboot",
+	.write		= xenboot_write_console,
+	.flags		= CON_PRINTBUFFER | CON_BOOT,
+};
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index dbb22403979..3d90fc00209 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -1770,7 +1770,8 @@ static int call_critical_overtemp(void)
 				"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 				NULL };
 
-	return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
+	return call_usermodehelper(critical_overtemp_path,
+				   argv, envp, UMH_WAIT_EXEC);
 }
 
 
diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c
index e18d265d5d3..516d943227e 100644
--- a/drivers/macintosh/windfarm_core.c
+++ b/drivers/macintosh/windfarm_core.c
@@ -80,7 +80,8 @@ int wf_critical_overtemp(void)
 				"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
 				NULL };
 
-	return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
+	return call_usermodehelper(critical_overtemp_path,
+				   argv, envp, UMH_WAIT_EXEC);
 }
 EXPORT_SYMBOL_GPL(wf_critical_overtemp);
 
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 555d594d181..1cb22bfae75 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -33,6 +33,7 @@
 #include <linux/moduleparam.h>
 #include <linux/stringify.h>
 #include <linux/stat.h>
+#include <linux/log2.h>
 #include "ubi.h"
 
 /* Maximum length of the 'mtd=' parameter */
@@ -369,7 +370,7 @@ static int attach_by_scanning(struct ubi_device *ubi)
 out_wl:
 	ubi_wl_close(ubi);
 out_vtbl:
-	kfree(ubi->vtbl);
+	vfree(ubi->vtbl);
 out_si:
 	ubi_scan_destroy_si(si);
 	return err;
@@ -422,8 +423,7 @@ static int io_init(struct ubi_device *ubi)
 	ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
 
 	/* Make sure minimal I/O unit is power of 2 */
-	if (ubi->min_io_size == 0 ||
-	    (ubi->min_io_size & (ubi->min_io_size - 1))) {
+	if (!is_power_of_2(ubi->min_io_size)) {
 		ubi_err("bad min. I/O unit");
 		return -EINVAL;
 	}
@@ -593,8 +593,6 @@ static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset,
 	if (err)
 		goto out_detach;
 
-	ubi_devices_cnt += 1;
-
 	ubi_msg("attached mtd%d to ubi%d", ubi->mtd->index, ubi_devices_cnt);
 	ubi_msg("MTD device name:            \"%s\"", ubi->mtd->name);
 	ubi_msg("MTD device size:            %llu MiB", ubi->flash_size >> 20);
@@ -624,12 +622,13 @@ static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset,
 		wake_up_process(ubi->bgt_thread);
 	}
 
+	ubi_devices_cnt += 1;
 	return 0;
 
 out_detach:
 	ubi_eba_close(ubi);
 	ubi_wl_close(ubi);
-	kfree(ubi->vtbl);
+	vfree(ubi->vtbl);
 out_free:
 	kfree(ubi);
 out_mtd:
@@ -650,7 +649,7 @@ static void detach_mtd_dev(struct ubi_device *ubi)
 	uif_close(ubi);
 	ubi_eba_close(ubi);
 	ubi_wl_close(ubi);
-	kfree(ubi->vtbl);
+	vfree(ubi->vtbl);
 	put_mtd_device(ubi->mtd);
 	kfree(ubi_devices[ubi_num]);
 	ubi_devices[ubi_num] = NULL;
@@ -686,13 +685,6 @@ static int __init ubi_init(void)
 		struct mtd_dev_param *p = &mtd_dev_param[i];
 
 		cond_resched();
-
-		if (!p->name) {
-			dbg_err("empty name");
-			err = -EINVAL;
-			goto out_detach;
-		}
-
 		err = attach_mtd_dev(p->name, p->vid_hdr_offs, p->data_offs);
 		if (err)
 			goto out_detach;
@@ -799,7 +791,7 @@ static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
 
 	/* Get rid of the final newline */
 	if (buf[len - 1] == '\n')
-		buf[len - 1] = 0;
+		buf[len - 1] = '\0';
 
 	for (i = 0; i < 3; i++)
 		tokens[i] = strsep(&pbuf, ",");
@@ -809,9 +801,6 @@ static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
 		return -EINVAL;
 	}
 
-	if (tokens[0] == '\0')
-		return -EINVAL;
-
 	p = &mtd_dev_param[mtd_devs];
 	strcpy(&p->name[0], tokens[0]);
 
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 6612eb79bf1..fe4da1e96c5 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -64,6 +64,7 @@ static struct ubi_device *major_to_device(int major)
 		if (ubi_devices[i] && ubi_devices[i]->major == major)
 			return ubi_devices[i];
 	BUG();
+	return NULL;
 }
 
 /**
@@ -153,7 +154,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file)
 		ubi_warn("update of volume %d not finished, volume is damaged",
 			 vol->vol_id);
 		vol->updating = 0;
-		kfree(vol->upd_buf);
+		vfree(vol->upd_buf);
 	}
 
 	ubi_close_volume(desc);
@@ -232,7 +233,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
 	tbuf_size = vol->usable_leb_size;
 	if (count < tbuf_size)
 		tbuf_size = ALIGN(count, ubi->min_io_size);
-	tbuf = kmalloc(tbuf_size, GFP_KERNEL);
+	tbuf = vmalloc(tbuf_size);
 	if (!tbuf)
 		return -ENOMEM;
 
@@ -271,7 +272,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
 		len = count > tbuf_size ? tbuf_size : count;
 	} while (count);
 
-	kfree(tbuf);
+	vfree(tbuf);
 	return err ? err : count_save - count;
 }
 
@@ -320,7 +321,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
 	tbuf_size = vol->usable_leb_size;
 	if (count < tbuf_size)
 		tbuf_size = ALIGN(count, ubi->min_io_size);
-	tbuf = kmalloc(tbuf_size, GFP_KERNEL);
+	tbuf = vmalloc(tbuf_size);
 	if (!tbuf)
 		return -ENOMEM;
 
@@ -355,7 +356,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
 		len = count > tbuf_size ? tbuf_size : count;
 	}
 
-	kfree(tbuf);
+	vfree(tbuf);
 	return err ? err : count_save - count;
 }
 
@@ -397,6 +398,7 @@ static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
 			vol->corrupted = 1;
 		}
 		vol->checked = 1;
+		ubi_gluebi_updated(vol);
 		revoke_exclusive(desc, UBI_READWRITE);
 	}
 
@@ -413,19 +415,7 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
 	struct ubi_device *ubi = vol->ubi;
 	void __user *argp = (void __user *)arg;
 
-	if (_IOC_NR(cmd) > VOL_CDEV_IOC_MAX_SEQ ||
-	    _IOC_TYPE(cmd) != UBI_VOL_IOC_MAGIC)
-		return -ENOTTY;
-
-	if (_IOC_DIR(cmd) && _IOC_READ)
-		err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd));
-	else if (_IOC_DIR(cmd) && _IOC_WRITE)
-		err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd));
-	if (err)
-		return -EFAULT;
-
 	switch (cmd) {
-
 	/* Volume update command */
 	case UBI_IOCVOLUP:
 	{
@@ -471,7 +461,7 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
 	{
 		int32_t lnum;
 
-		err = __get_user(lnum, (__user int32_t *)argp);
+		err = get_user(lnum, (__user int32_t *)argp);
 		if (err) {
 			err = -EFAULT;
 			break;
@@ -587,17 +577,6 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 	struct ubi_volume_desc *desc;
 	void __user *argp = (void __user *)arg;
 
-	if (_IOC_NR(cmd) > UBI_CDEV_IOC_MAX_SEQ ||
-	    _IOC_TYPE(cmd) != UBI_IOC_MAGIC)
-		return -ENOTTY;
-
-	if (_IOC_DIR(cmd) && _IOC_READ)
-		err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd));
-	else if (_IOC_DIR(cmd) && _IOC_WRITE)
-		err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd));
-	if (err)
-		return -EFAULT;
-
 	if (!capable(CAP_SYS_RESOURCE))
 		return -EPERM;
 
@@ -612,7 +591,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 		struct ubi_mkvol_req req;
 
 		dbg_msg("create volume");
-		err = __copy_from_user(&req, argp,
+		err = copy_from_user(&req, argp,
 				       sizeof(struct ubi_mkvol_req));
 		if (err) {
 			err = -EFAULT;
@@ -629,7 +608,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 		if (err)
 			break;
 
-		err = __put_user(req.vol_id, (__user int32_t *)argp);
+		err = put_user(req.vol_id, (__user int32_t *)argp);
 		if (err)
 			err = -EFAULT;
 
@@ -642,7 +621,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 		int vol_id;
 
 		dbg_msg("remove volume");
-		err = __get_user(vol_id, (__user int32_t *)argp);
+		err = get_user(vol_id, (__user int32_t *)argp);
 		if (err) {
 			err = -EFAULT;
 			break;
@@ -669,7 +648,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 		struct ubi_rsvol_req req;
 
 		dbg_msg("re-size volume");
-		err = __copy_from_user(&req, argp,
+		err = copy_from_user(&req, argp,
 				       sizeof(struct ubi_rsvol_req));
 		if (err) {
 			err = -EFAULT;
@@ -707,7 +686,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
 struct file_operations ubi_cdev_operations = {
 	.owner = THIS_MODULE,
 	.ioctl = ubi_cdev_ioctl,
-	.llseek = no_llseek
+	.llseek = no_llseek,
 };
 
 /* UBI volume character device operations */
@@ -718,5 +697,5 @@ struct file_operations ubi_vol_cdev_operations = {
 	.llseek  = vol_cdev_llseek,
 	.read    = vol_cdev_read,
 	.write   = vol_cdev_write,
-	.ioctl   = vol_cdev_ioctl
+	.ioctl   = vol_cdev_ioctl,
 };
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 86364221faf..310341e5cd4 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -35,12 +35,12 @@
 void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
 {
 	dbg_msg("erase counter header dump:");
-	dbg_msg("magic          %#08x", ubi32_to_cpu(ec_hdr->magic));
+	dbg_msg("magic          %#08x", be32_to_cpu(ec_hdr->magic));
 	dbg_msg("version        %d",    (int)ec_hdr->version);
-	dbg_msg("ec             %llu",  (long long)ubi64_to_cpu(ec_hdr->ec));
-	dbg_msg("vid_hdr_offset %d",    ubi32_to_cpu(ec_hdr->vid_hdr_offset));
-	dbg_msg("data_offset    %d",    ubi32_to_cpu(ec_hdr->data_offset));
-	dbg_msg("hdr_crc        %#08x", ubi32_to_cpu(ec_hdr->hdr_crc));
+	dbg_msg("ec             %llu",  (long long)be64_to_cpu(ec_hdr->ec));
+	dbg_msg("vid_hdr_offset %d",    be32_to_cpu(ec_hdr->vid_hdr_offset));
+	dbg_msg("data_offset    %d",    be32_to_cpu(ec_hdr->data_offset));
+	dbg_msg("hdr_crc        %#08x", be32_to_cpu(ec_hdr->hdr_crc));
 	dbg_msg("erase counter header hexdump:");
 	ubi_dbg_hexdump(ec_hdr, UBI_EC_HDR_SIZE);
 }
@@ -52,20 +52,20 @@ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
 void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
 {
 	dbg_msg("volume identifier header dump:");
-	dbg_msg("magic     %08x", ubi32_to_cpu(vid_hdr->magic));
+	dbg_msg("magic     %08x", be32_to_cpu(vid_hdr->magic));
 	dbg_msg("version   %d",   (int)vid_hdr->version);
 	dbg_msg("vol_type  %d",   (int)vid_hdr->vol_type);
 	dbg_msg("copy_flag %d",   (int)vid_hdr->copy_flag);
 	dbg_msg("compat    %d",   (int)vid_hdr->compat);
-	dbg_msg("vol_id    %d",   ubi32_to_cpu(vid_hdr->vol_id));
-	dbg_msg("lnum      %d",   ubi32_to_cpu(vid_hdr->lnum));
-	dbg_msg("leb_ver   %u",   ubi32_to_cpu(vid_hdr->leb_ver));
-	dbg_msg("data_size %d",   ubi32_to_cpu(vid_hdr->data_size));
-	dbg_msg("used_ebs  %d",   ubi32_to_cpu(vid_hdr->used_ebs));
-	dbg_msg("data_pad  %d",   ubi32_to_cpu(vid_hdr->data_pad));
+	dbg_msg("vol_id    %d",   be32_to_cpu(vid_hdr->vol_id));
+	dbg_msg("lnum      %d",   be32_to_cpu(vid_hdr->lnum));
+	dbg_msg("leb_ver   %u",   be32_to_cpu(vid_hdr->leb_ver));
+	dbg_msg("data_size %d",   be32_to_cpu(vid_hdr->data_size));
+	dbg_msg("used_ebs  %d",   be32_to_cpu(vid_hdr->used_ebs));
+	dbg_msg("data_pad  %d",   be32_to_cpu(vid_hdr->data_pad));
 	dbg_msg("sqnum     %llu",
-		(unsigned long long)ubi64_to_cpu(vid_hdr->sqnum));
-	dbg_msg("hdr_crc   %08x", ubi32_to_cpu(vid_hdr->hdr_crc));
+		(unsigned long long)be64_to_cpu(vid_hdr->sqnum));
+	dbg_msg("hdr_crc   %08x", be32_to_cpu(vid_hdr->hdr_crc));
 	dbg_msg("volume identifier header hexdump:");
 }
 
@@ -91,7 +91,7 @@ void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
 
 	if (vol->name_len <= UBI_VOL_NAME_MAX &&
 	    strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
-		dbg_msg("name          %s", vol->name);
+		dbg_msg("name            %s", vol->name);
 	} else {
 		dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c",
 			vol->name[0], vol->name[1], vol->name[2],
@@ -106,30 +106,30 @@ void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
  */
 void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
 {
-	int name_len = ubi16_to_cpu(r->name_len);
+	int name_len = be16_to_cpu(r->name_len);
 
 	dbg_msg("volume table record %d dump:", idx);
-	dbg_msg("reserved_pebs   %d", ubi32_to_cpu(r->reserved_pebs));
-	dbg_msg("alignment       %d", ubi32_to_cpu(r->alignment));
-	dbg_msg("data_pad        %d", ubi32_to_cpu(r->data_pad));
+	dbg_msg("reserved_pebs   %d", be32_to_cpu(r->reserved_pebs));
+	dbg_msg("alignment       %d", be32_to_cpu(r->alignment));
+	dbg_msg("data_pad        %d", be32_to_cpu(r->data_pad));
 	dbg_msg("vol_type        %d", (int)r->vol_type);
 	dbg_msg("upd_marker      %d", (int)r->upd_marker);
 	dbg_msg("name_len        %d", name_len);
 
 	if (r->name[0] == '\0') {
-		dbg_msg("name          NULL");
+		dbg_msg("name            NULL");
 		return;
 	}
 
 	if (name_len <= UBI_VOL_NAME_MAX &&
 	    strnlen(&r->name[0], name_len + 1) == name_len) {
-		dbg_msg("name          %s", &r->name[0]);
+		dbg_msg("name            %s", &r->name[0]);
 	} else {
 		dbg_msg("1st 5 characters of the name: %c%c%c%c%c",
 			r->name[0], r->name[1], r->name[2], r->name[3],
 			r->name[4]);
 	}
-	dbg_msg("crc             %#08x", ubi32_to_cpu(r->crc));
+	dbg_msg("crc             %#08x", be32_to_cpu(r->crc));
 }
 
 /**
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index f816ad9a36c..ff8f39548cd 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -52,7 +52,6 @@ struct ubi_scan_volume;
 struct ubi_scan_leb;
 struct ubi_mkvol_req;
 
-void ubi_dbg_print(int type, const char *func, const char *fmt, ...);
 void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr);
 void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr);
 void ubi_dbg_dump_vol_info(const struct ubi_volume *vol);
@@ -66,7 +65,6 @@ void ubi_dbg_hexdump(const void *buf, int size);
 
 #define dbg_msg(fmt, ...)    ({})
 #define ubi_dbg_dump_stack() ({})
-#define ubi_dbg_print(func, fmt, ...)    ({})
 #define ubi_dbg_dump_ec_hdr(ec_hdr)      ({})
 #define ubi_dbg_dump_vid_hdr(vid_hdr)    ({})
 #define ubi_dbg_dump_vol_info(vol)       ({})
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 7c6b223b3f8..8aff9385613 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -425,10 +425,10 @@ retry:
 		} else if (err == UBI_IO_BITFLIPS)
 			scrub = 1;
 
-		ubi_assert(lnum < ubi32_to_cpu(vid_hdr->used_ebs));
-		ubi_assert(len == ubi32_to_cpu(vid_hdr->data_size));
+		ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs));
+		ubi_assert(len == be32_to_cpu(vid_hdr->data_size));
 
-		crc = ubi32_to_cpu(vid_hdr->data_crc);
+		crc = be32_to_cpu(vid_hdr->data_crc);
 		ubi_free_vid_hdr(ubi, vid_hdr);
 	}
 
@@ -518,13 +518,13 @@ retry:
 		goto out_put;
 	}
 
-	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
 	if (err)
 		goto write_error;
 
 	data_size = offset + len;
-	new_buf = kmalloc(data_size, GFP_KERNEL);
+	new_buf = vmalloc(data_size);
 	if (!new_buf) {
 		err = -ENOMEM;
 		goto out_put;
@@ -535,7 +535,7 @@ retry:
 	if (offset > 0) {
 		err = ubi_io_read_data(ubi, new_buf, pnum, 0, offset);
 		if (err && err != UBI_IO_BITFLIPS) {
-			kfree(new_buf);
+			vfree(new_buf);
 			goto out_put;
 		}
 	}
@@ -544,11 +544,11 @@ retry:
 
 	err = ubi_io_write_data(ubi, new_buf, new_pnum, 0, data_size);
 	if (err) {
-		kfree(new_buf);
+		vfree(new_buf);
 		goto write_error;
 	}
 
-	kfree(new_buf);
+	vfree(new_buf);
 	ubi_free_vid_hdr(ubi, vid_hdr);
 
 	vol->eba_tbl[lnum] = new_pnum;
@@ -634,11 +634,11 @@ int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum,
 	}
 
 	vid_hdr->vol_type = UBI_VID_DYNAMIC;
-	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
-	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
-	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_be32(vol_id);
+	vid_hdr->lnum = cpu_to_be32(lnum);
 	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
-	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+	vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
 
 retry:
 	pnum = ubi_wl_get_peb(ubi, dtype);
@@ -692,7 +692,7 @@ write_error:
 		return err;
 	}
 
-	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 	ubi_msg("try another PEB");
 	goto retry;
 }
@@ -748,17 +748,17 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum,
 		return err;
 	}
 
-	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
-	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
-	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_be32(vol_id);
+	vid_hdr->lnum = cpu_to_be32(lnum);
 	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
-	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+	vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
 
 	crc = crc32(UBI_CRC32_INIT, buf, data_size);
 	vid_hdr->vol_type = UBI_VID_STATIC;
-	vid_hdr->data_size = cpu_to_ubi32(data_size);
-	vid_hdr->used_ebs = cpu_to_ubi32(used_ebs);
-	vid_hdr->data_crc = cpu_to_ubi32(crc);
+	vid_hdr->data_size = cpu_to_be32(data_size);
+	vid_hdr->used_ebs = cpu_to_be32(used_ebs);
+	vid_hdr->data_crc = cpu_to_be32(crc);
 
 retry:
 	pnum = ubi_wl_get_peb(ubi, dtype);
@@ -813,7 +813,7 @@ write_error:
 		return err;
 	}
 
-	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 	ubi_msg("try another PEB");
 	goto retry;
 }
@@ -854,17 +854,17 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum,
 		return err;
 	}
 
-	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
-	vid_hdr->vol_id = cpu_to_ubi32(vol_id);
-	vid_hdr->lnum = cpu_to_ubi32(lnum);
+	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
+	vid_hdr->vol_id = cpu_to_be32(vol_id);
+	vid_hdr->lnum = cpu_to_be32(lnum);
 	vid_hdr->compat = ubi_get_compat(ubi, vol_id);
-	vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+	vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
 
 	crc = crc32(UBI_CRC32_INIT, buf, len);
-	vid_hdr->vol_type = UBI_VID_STATIC;
-	vid_hdr->data_size = cpu_to_ubi32(len);
+	vid_hdr->vol_type = UBI_VID_DYNAMIC;
+	vid_hdr->data_size = cpu_to_be32(len);
 	vid_hdr->copy_flag = 1;
-	vid_hdr->data_crc = cpu_to_ubi32(crc);
+	vid_hdr->data_crc = cpu_to_be32(crc);
 
 retry:
 	pnum = ubi_wl_get_peb(ubi, dtype);
@@ -891,11 +891,13 @@ retry:
 		goto write_error;
 	}
 
-	err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
-	if (err) {
-		ubi_free_vid_hdr(ubi, vid_hdr);
-		leb_write_unlock(ubi, vol_id, lnum);
-		return err;
+	if (vol->eba_tbl[lnum] >= 0) {
+		err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
+		if (err) {
+			ubi_free_vid_hdr(ubi, vid_hdr);
+			leb_write_unlock(ubi, vol_id, lnum);
+			return err;
+		}
 	}
 
 	vol->eba_tbl[lnum] = pnum;
@@ -924,7 +926,7 @@ write_error:
 		return err;
 	}
 
-	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 	ubi_msg("try another PEB");
 	goto retry;
 }
@@ -965,19 +967,19 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	uint32_t crc;
 	void *buf, *buf1 = NULL;
 
-	vol_id = ubi32_to_cpu(vid_hdr->vol_id);
-	lnum = ubi32_to_cpu(vid_hdr->lnum);
+	vol_id = be32_to_cpu(vid_hdr->vol_id);
+	lnum = be32_to_cpu(vid_hdr->lnum);
 
 	dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to);
 
 	if (vid_hdr->vol_type == UBI_VID_STATIC) {
-		data_size = ubi32_to_cpu(vid_hdr->data_size);
+		data_size = be32_to_cpu(vid_hdr->data_size);
 		aldata_size = ALIGN(data_size, ubi->min_io_size);
 	} else
 		data_size = aldata_size =
-			    ubi->leb_size - ubi32_to_cpu(vid_hdr->data_pad);
+			    ubi->leb_size - be32_to_cpu(vid_hdr->data_pad);
 
-	buf = kmalloc(aldata_size, GFP_KERNEL);
+	buf = vmalloc(aldata_size);
 	if (!buf)
 		return -ENOMEM;
 
@@ -987,7 +989,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	 */
 	err = leb_write_lock(ubi, vol_id, lnum);
 	if (err) {
-		kfree(buf);
+		vfree(buf);
 		return err;
 	}
 
@@ -1054,10 +1056,10 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	 */
 	if (data_size > 0) {
 		vid_hdr->copy_flag = 1;
-		vid_hdr->data_size = cpu_to_ubi32(data_size);
-		vid_hdr->data_crc = cpu_to_ubi32(crc);
+		vid_hdr->data_size = cpu_to_be32(data_size);
+		vid_hdr->data_crc = cpu_to_be32(crc);
 	}
-	vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+	vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
 
 	err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
 	if (err)
@@ -1082,7 +1084,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 		 * We've written the data and are going to read it back to make
 		 * sure it was written correctly.
 		 */
-		buf1 = kmalloc(aldata_size, GFP_KERNEL);
+		buf1 = vmalloc(aldata_size);
 		if (!buf1) {
 			err = -ENOMEM;
 			goto out_unlock;
@@ -1111,15 +1113,15 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	vol->eba_tbl[lnum] = to;
 
 	leb_write_unlock(ubi, vol_id, lnum);
-	kfree(buf);
-	kfree(buf1);
+	vfree(buf);
+	vfree(buf1);
 
 	return 0;
 
 out_unlock:
 	leb_write_unlock(ubi, vol_id, lnum);
-	kfree(buf);
-	kfree(buf1);
+	vfree(buf);
+	vfree(buf1);
 	return err;
 }
 
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index fc9478d605f..41ff74c60e1 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -282,7 +282,6 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
 		mtd->flags = MTD_WRITEABLE;
 	mtd->writesize  = ubi->min_io_size;
 	mtd->owner      = THIS_MODULE;
-	mtd->size       = vol->usable_leb_size * vol->reserved_pebs;
 	mtd->erasesize  = vol->usable_leb_size;
 	mtd->read       = gluebi_read;
 	mtd->write      = gluebi_write;
@@ -290,6 +289,15 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
 	mtd->get_device = gluebi_get_device;
 	mtd->put_device = gluebi_put_device;
 
+	/*
+	 * In case of dynamic volume, MTD device size is just volume size. In
+	 * case of a static volume the size is equivalent to the amount of data
+	 * bytes, which is zero at this moment and will be changed after volume
+	 * update.
+	 */
+	if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+		mtd->size = vol->usable_leb_size * vol->reserved_pebs;
+
 	if (add_mtd_device(mtd)) {
 		ubi_err("cannot not add MTD device\n");
 		kfree(mtd->name);
@@ -321,3 +329,20 @@ int ubi_destroy_gluebi(struct ubi_volume *vol)
 	kfree(mtd->name);
 	return 0;
 }
+
+/**
+ * ubi_gluebi_updated - UBI volume was updated notifier.
+ * @vol: volume description object
+ *
+ * This function is called every time an UBI volume is updated. This function
+ * does nothing if volume @vol is dynamic, and changes MTD device size if the
+ * volume is static. This is needed because static volumes cannot be read past
+ * data they contain.
+ */
+void ubi_gluebi_updated(struct ubi_volume *vol)
+{
+	struct mtd_info *mtd = &vol->gluebi_mtd;
+
+	if (vol->vol_type == UBI_STATIC_VOLUME)
+		mtd->size = vol->used_bytes;
+}
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 438914d0515..b0d8f4cede9 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -125,9 +125,9 @@ static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
  * o %UBI_IO_BITFLIPS if all the requested data were successfully read, but
  *   correctable bit-flips were detected; this is harmless but may indicate
  *   that this eraseblock may become bad soon (but do not have to);
- * o %-EBADMSG if the MTD subsystem reported about data data integrity
- *   problems, for example it can me an ECC error in case of NAND; this most
- *   probably means that the data is corrupted;
+ * o %-EBADMSG if the MTD subsystem reported about data integrity problems, for
+ *   example it can be an ECC error in case of NAND; this most probably means
+ *   that the data is corrupted;
  * o %-EIO if some I/O error occurred;
  * o other negative error codes in case of other errors.
  */
@@ -298,7 +298,7 @@ retry:
 	memset(&ei, 0, sizeof(struct erase_info));
 
 	ei.mtd      = ubi->mtd;
-	ei.addr     = pnum * ubi->peb_size;
+	ei.addr     = (loff_t)pnum * ubi->peb_size;
 	ei.len      = ubi->peb_size;
 	ei.callback = erase_callback;
 	ei.priv     = (unsigned long)&wq;
@@ -382,7 +382,7 @@ static int torture_peb(const struct ubi_device *ubi, int pnum)
 	void *buf;
 	int err, i, patt_count;
 
-	buf = kmalloc(ubi->peb_size, GFP_KERNEL);
+	buf = vmalloc(ubi->peb_size);
 	if (!buf)
 		return -ENOMEM;
 
@@ -437,7 +437,7 @@ out:
 		 * physical eraseblock which means something is wrong with it.
 		 */
 		err = -EIO;
-	kfree(buf);
+	vfree(buf);
 	return err;
 }
 
@@ -557,9 +557,9 @@ static int validate_ec_hdr(const struct ubi_device *ubi,
 	long long ec;
 	int vid_hdr_offset, leb_start;
 
-	ec = ubi64_to_cpu(ec_hdr->ec);
-	vid_hdr_offset = ubi32_to_cpu(ec_hdr->vid_hdr_offset);
-	leb_start = ubi32_to_cpu(ec_hdr->data_offset);
+	ec = be64_to_cpu(ec_hdr->ec);
+	vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset);
+	leb_start = be32_to_cpu(ec_hdr->data_offset);
 
 	if (ec_hdr->version != UBI_VERSION) {
 		ubi_err("node with incompatible UBI version found: "
@@ -640,7 +640,7 @@ int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum,
 		read_err = err;
 	}
 
-	magic = ubi32_to_cpu(ec_hdr->magic);
+	magic = be32_to_cpu(ec_hdr->magic);
 	if (magic != UBI_EC_HDR_MAGIC) {
 		/*
 		 * The magic field is wrong. Let's check if we have read all
@@ -684,7 +684,7 @@ int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum,
 	}
 
 	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
-	hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc);
+	hdr_crc = be32_to_cpu(ec_hdr->hdr_crc);
 
 	if (hdr_crc != crc) {
 		if (verbose) {
@@ -729,12 +729,12 @@ int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum,
 	dbg_io("write EC header to PEB %d", pnum);
 	ubi_assert(pnum >= 0 &&  pnum < ubi->peb_count);
 
-	ec_hdr->magic = cpu_to_ubi32(UBI_EC_HDR_MAGIC);
+	ec_hdr->magic = cpu_to_be32(UBI_EC_HDR_MAGIC);
 	ec_hdr->version = UBI_VERSION;
-	ec_hdr->vid_hdr_offset = cpu_to_ubi32(ubi->vid_hdr_offset);
-	ec_hdr->data_offset = cpu_to_ubi32(ubi->leb_start);
+	ec_hdr->vid_hdr_offset = cpu_to_be32(ubi->vid_hdr_offset);
+	ec_hdr->data_offset = cpu_to_be32(ubi->leb_start);
 	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
-	ec_hdr->hdr_crc = cpu_to_ubi32(crc);
+	ec_hdr->hdr_crc = cpu_to_be32(crc);
 
 	err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr);
 	if (err)
@@ -757,13 +757,13 @@ static int validate_vid_hdr(const struct ubi_device *ubi,
 {
 	int vol_type = vid_hdr->vol_type;
 	int copy_flag = vid_hdr->copy_flag;
-	int vol_id = ubi32_to_cpu(vid_hdr->vol_id);
-	int lnum = ubi32_to_cpu(vid_hdr->lnum);
+	int vol_id = be32_to_cpu(vid_hdr->vol_id);
+	int lnum = be32_to_cpu(vid_hdr->lnum);
 	int compat = vid_hdr->compat;
-	int data_size = ubi32_to_cpu(vid_hdr->data_size);
-	int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
-	int data_pad = ubi32_to_cpu(vid_hdr->data_pad);
-	int data_crc = ubi32_to_cpu(vid_hdr->data_crc);
+	int data_size = be32_to_cpu(vid_hdr->data_size);
+	int used_ebs = be32_to_cpu(vid_hdr->used_ebs);
+	int data_pad = be32_to_cpu(vid_hdr->data_pad);
+	int data_crc = be32_to_cpu(vid_hdr->data_crc);
 	int usable_leb_size = ubi->leb_size - data_pad;
 
 	if (copy_flag != 0 && copy_flag != 1) {
@@ -914,7 +914,7 @@ int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum,
 		read_err = err;
 	}
 
-	magic = ubi32_to_cpu(vid_hdr->magic);
+	magic = be32_to_cpu(vid_hdr->magic);
 	if (magic != UBI_VID_HDR_MAGIC) {
 		/*
 		 * If we have read all 0xFF bytes, the VID header probably does
@@ -957,7 +957,7 @@ int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum,
 	}
 
 	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
-	hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc);
+	hdr_crc = be32_to_cpu(vid_hdr->hdr_crc);
 
 	if (hdr_crc != crc) {
 		if (verbose) {
@@ -1007,10 +1007,10 @@ int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum,
 	if (err)
 		return err > 0 ? -EINVAL: err;
 
-	vid_hdr->magic = cpu_to_ubi32(UBI_VID_HDR_MAGIC);
+	vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC);
 	vid_hdr->version = UBI_VERSION;
 	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
-	vid_hdr->hdr_crc = cpu_to_ubi32(crc);
+	vid_hdr->hdr_crc = cpu_to_be32(crc);
 
 	err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr);
 	if (err)
@@ -1060,7 +1060,7 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
 	int err;
 	uint32_t magic;
 
-	magic = ubi32_to_cpu(ec_hdr->magic);
+	magic = be32_to_cpu(ec_hdr->magic);
 	if (magic != UBI_EC_HDR_MAGIC) {
 		ubi_err("bad magic %#08x, must be %#08x",
 			magic, UBI_EC_HDR_MAGIC);
@@ -1105,7 +1105,7 @@ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum)
 		goto exit;
 
 	crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
-	hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc);
+	hdr_crc = be32_to_cpu(ec_hdr->hdr_crc);
 	if (hdr_crc != crc) {
 		ubi_err("bad CRC, calculated %#08x, read %#08x", crc, hdr_crc);
 		ubi_err("paranoid check failed for PEB %d", pnum);
@@ -1137,7 +1137,7 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
 	int err;
 	uint32_t magic;
 
-	magic = ubi32_to_cpu(vid_hdr->magic);
+	magic = be32_to_cpu(vid_hdr->magic);
 	if (magic != UBI_VID_HDR_MAGIC) {
 		ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x",
 			magic, pnum, UBI_VID_HDR_MAGIC);
@@ -1187,7 +1187,7 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum)
 		goto exit;
 
 	crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC);
-	hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc);
+	hdr_crc = be32_to_cpu(vid_hdr->hdr_crc);
 	if (hdr_crc != crc) {
 		ubi_err("bad VID header CRC at PEB %d, calculated %#08x, "
 			"read %#08x", pnum, crc, hdr_crc);
@@ -1224,9 +1224,10 @@ static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
 	void *buf;
 	loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
 
-	buf = kzalloc(len, GFP_KERNEL);
+	buf = vmalloc(len);
 	if (!buf)
 		return -ENOMEM;
+	memset(buf, 0, len);
 
 	err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
 	if (err && err != -EUCLEAN) {
@@ -1242,7 +1243,7 @@ static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
 		goto fail;
 	}
 
-	kfree(buf);
+	vfree(buf);
 	return 0;
 
 fail:
@@ -1252,7 +1253,7 @@ fail:
 	err = 1;
 error:
 	ubi_dbg_dump_stack();
-	kfree(buf);
+	vfree(buf);
 	return err;
 }
 
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index d352c4575c3..4a458e83e4e 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -37,14 +37,9 @@ int ubi_get_device_info(int ubi_num, struct ubi_device_info *di)
 {
 	const struct ubi_device *ubi;
 
-	if (!try_module_get(THIS_MODULE))
-		return -ENODEV;
-
 	if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES ||
-	    !ubi_devices[ubi_num]) {
-		module_put(THIS_MODULE);
+	    !ubi_devices[ubi_num])
 		return -ENODEV;
-	}
 
 	ubi = ubi_devices[ubi_num];
 	di->ubi_num = ubi->ubi_num;
@@ -52,7 +47,6 @@ int ubi_get_device_info(int ubi_num, struct ubi_device_info *di)
 	di->min_io_size = ubi->min_io_size;
 	di->ro_mode = ubi->ro_mode;
 	di->cdev = MKDEV(ubi->major, 0);
-	module_put(THIS_MODULE);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ubi_get_device_info);
@@ -319,9 +313,14 @@ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
 	    offset + len > vol->usable_leb_size)
 		return -EINVAL;
 
-	if (vol->vol_type == UBI_STATIC_VOLUME && lnum == vol->used_ebs - 1 &&
-	    offset + len > vol->last_eb_bytes)
-		return -EINVAL;
+	if (vol->vol_type == UBI_STATIC_VOLUME) {
+		if (vol->used_ebs == 0)
+			/* Empty static UBI volume */
+			return 0;
+		if (lnum == vol->used_ebs - 1 &&
+		    offset + len > vol->last_eb_bytes)
+			return -EINVAL;
+	}
 
 	if (vol->upd_marker)
 		return -EBADF;
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
index 38d4e6757dc..9e2338c8e2c 100644
--- a/drivers/mtd/ubi/misc.c
+++ b/drivers/mtd/ubi/misc.c
@@ -67,7 +67,7 @@ int ubi_check_volume(struct ubi_device *ubi, int vol_id)
 	if (vol->vol_type != UBI_STATIC_VOLUME)
 		return 0;
 
-	buf = kmalloc(vol->usable_leb_size, GFP_KERNEL);
+	buf = vmalloc(vol->usable_leb_size);
 	if (!buf)
 		return -ENOMEM;
 
@@ -87,7 +87,7 @@ int ubi_check_volume(struct ubi_device *ubi, int vol_id)
 		}
 	}
 
-	kfree(buf);
+	vfree(buf);
 	return err;
 }
 
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 473f3200b86..94ee5493441 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -24,7 +24,7 @@
  * This unit is responsible for scanning the flash media, checking UBI
  * headers and providing complete information about the UBI flash image.
  *
- * The scanning information is reoresented by a &struct ubi_scan_info' object.
+ * The scanning information is represented by a &struct ubi_scan_info' object.
  * Information about found volumes is represented by &struct ubi_scan_volume
  * objects which are kept in volume RB-tree with root at the @volumes field.
  * The RB-tree is indexed by the volume ID.
@@ -55,8 +55,19 @@ static int paranoid_check_si(const struct ubi_device *ubi,
 static struct ubi_ec_hdr *ech;
 static struct ubi_vid_hdr *vidh;
 
-int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec,
-			 struct list_head *list)
+/**
+ * add_to_list - add physical eraseblock to a list.
+ * @si: scanning information
+ * @pnum: physical eraseblock number to add
+ * @ec: erase counter of the physical eraseblock
+ * @list: the list to add to
+ *
+ * This function adds physical eraseblock @pnum to free, erase, corrupted or
+ * alien lists. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
+		       struct list_head *list)
 {
 	struct ubi_scan_leb *seb;
 
@@ -121,9 +132,9 @@ static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr,
 			    const struct ubi_scan_volume *sv, int pnum)
 {
 	int vol_type = vid_hdr->vol_type;
-	int vol_id = ubi32_to_cpu(vid_hdr->vol_id);
-	int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
-	int data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+	int vol_id = be32_to_cpu(vid_hdr->vol_id);
+	int used_ebs = be32_to_cpu(vid_hdr->used_ebs);
+	int data_pad = be32_to_cpu(vid_hdr->data_pad);
 
 	if (sv->leb_count != 0) {
 		int sv_vol_type;
@@ -189,7 +200,7 @@ static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id,
 	struct ubi_scan_volume *sv;
 	struct rb_node **p = &si->volumes.rb_node, *parent = NULL;
 
-	ubi_assert(vol_id == ubi32_to_cpu(vid_hdr->vol_id));
+	ubi_assert(vol_id == be32_to_cpu(vid_hdr->vol_id));
 
 	/* Walk the volume RB-tree to look if this volume is already present */
 	while (*p) {
@@ -211,11 +222,10 @@ static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id,
 		return ERR_PTR(-ENOMEM);
 
 	sv->highest_lnum = sv->leb_count = 0;
-	si->max_sqnum = 0;
 	sv->vol_id = vol_id;
 	sv->root = RB_ROOT;
-	sv->used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
-	sv->data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+	sv->used_ebs = be32_to_cpu(vid_hdr->used_ebs);
+	sv->data_pad = be32_to_cpu(vid_hdr->data_pad);
 	sv->compat = vid_hdr->compat;
 	sv->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME
 							    : UBI_STATIC_VOLUME;
@@ -257,10 +267,10 @@ static int compare_lebs(const struct ubi_device *ubi,
 	int len, err, second_is_newer, bitflips = 0, corrupted = 0;
 	uint32_t data_crc, crc;
 	struct ubi_vid_hdr *vidh = NULL;
-	unsigned long long sqnum2 = ubi64_to_cpu(vid_hdr->sqnum);
+	unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);
 
 	if (seb->sqnum == 0 && sqnum2 == 0) {
-		long long abs, v1 = seb->leb_ver, v2 = ubi32_to_cpu(vid_hdr->leb_ver);
+		long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
 
 		/*
 		 * UBI constantly increases the logical eraseblock version
@@ -344,8 +354,8 @@ static int compare_lebs(const struct ubi_device *ubi,
 
 	/* Read the data of the copy and check the CRC */
 
-	len = ubi32_to_cpu(vid_hdr->data_size);
-	buf = kmalloc(len, GFP_KERNEL);
+	len = be32_to_cpu(vid_hdr->data_size);
+	buf = vmalloc(len);
 	if (!buf) {
 		err = -ENOMEM;
 		goto out_free_vidh;
@@ -355,7 +365,7 @@ static int compare_lebs(const struct ubi_device *ubi,
 	if (err && err != UBI_IO_BITFLIPS)
 		goto out_free_buf;
 
-	data_crc = ubi32_to_cpu(vid_hdr->data_crc);
+	data_crc = be32_to_cpu(vid_hdr->data_crc);
 	crc = crc32(UBI_CRC32_INIT, buf, len);
 	if (crc != data_crc) {
 		dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x",
@@ -368,7 +378,7 @@ static int compare_lebs(const struct ubi_device *ubi,
 		bitflips = !!err;
 	}
 
-	kfree(buf);
+	vfree(buf);
 	ubi_free_vid_hdr(ubi, vidh);
 
 	if (second_is_newer)
@@ -379,7 +389,7 @@ static int compare_lebs(const struct ubi_device *ubi,
 	return second_is_newer | (bitflips << 1) | (corrupted << 2);
 
 out_free_buf:
-	kfree(buf);
+	vfree(buf);
 out_free_vidh:
 	ubi_free_vid_hdr(ubi, vidh);
 	ubi_assert(err < 0);
@@ -396,8 +406,12 @@ out_free_vidh:
  * @vid_hdr: the volume identifier header
  * @bitflips: if bit-flips were detected when this physical eraseblock was read
  *
- * This function returns zero in case of success and a negative error code in
- * case of failure.
+ * This function adds information about a used physical eraseblock to the
+ * 'used' tree of the corresponding volume. The function is rather complex
+ * because it has to handle cases when this is not the first physical
+ * eraseblock belonging to the same logical eraseblock, and the newer one has
+ * to be picked, while the older one has to be dropped. This function returns
+ * zero in case of success and a negative error code in case of failure.
  */
 int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
 		      int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
@@ -410,10 +424,10 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
 	struct ubi_scan_leb *seb;
 	struct rb_node **p, *parent = NULL;
 
-	vol_id = ubi32_to_cpu(vid_hdr->vol_id);
-	lnum = ubi32_to_cpu(vid_hdr->lnum);
-	sqnum = ubi64_to_cpu(vid_hdr->sqnum);
-	leb_ver = ubi32_to_cpu(vid_hdr->leb_ver);
+	vol_id = be32_to_cpu(vid_hdr->vol_id);
+	lnum = be32_to_cpu(vid_hdr->lnum);
+	sqnum = be64_to_cpu(vid_hdr->sqnum);
+	leb_ver = be32_to_cpu(vid_hdr->leb_ver);
 
 	dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
 		pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);
@@ -422,6 +436,9 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
 	if (IS_ERR(sv) < 0)
 		return PTR_ERR(sv);
 
+	if (si->max_sqnum < sqnum)
+		si->max_sqnum = sqnum;
+
 	/*
 	 * Walk the RB-tree of logical eraseblocks of volume @vol_id to look
 	 * if this is the first instance of this logical eraseblock or not.
@@ -492,11 +509,11 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
 				return err;
 
 			if (cmp_res & 4)
-				err = ubi_scan_add_to_list(si, seb->pnum,
-							   seb->ec, &si->corr);
+				err = add_to_list(si, seb->pnum, seb->ec,
+						  &si->corr);
 			else
-				err = ubi_scan_add_to_list(si, seb->pnum,
-							   seb->ec, &si->erase);
+				err = add_to_list(si, seb->pnum, seb->ec,
+						  &si->erase);
 			if (err)
 				return err;
 
@@ -508,7 +525,7 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
 
 			if (sv->highest_lnum == lnum)
 				sv->last_data_size =
-					ubi32_to_cpu(vid_hdr->data_size);
+					be32_to_cpu(vid_hdr->data_size);
 
 			return 0;
 		} else {
@@ -517,11 +534,9 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
 			 * previously.
 			 */
 			if (cmp_res & 4)
-				return ubi_scan_add_to_list(si, pnum, ec,
-							    &si->corr);
+				return add_to_list(si, pnum, ec, &si->corr);
 			else
-				return ubi_scan_add_to_list(si, pnum, ec,
-							    &si->erase);
+				return add_to_list(si, pnum, ec, &si->erase);
 		}
 	}
 
@@ -547,12 +562,9 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
 
 	if (sv->highest_lnum <= lnum) {
 		sv->highest_lnum = lnum;
-		sv->last_data_size = ubi32_to_cpu(vid_hdr->data_size);
+		sv->last_data_size = be32_to_cpu(vid_hdr->data_size);
 	}
 
-	if (si->max_sqnum < sqnum)
-		si->max_sqnum = sqnum;
-
 	sv->leb_count += 1;
 	rb_link_node(&seb->u.rb, parent, p);
 	rb_insert_color(&seb->u.rb, &sv->root);
@@ -674,7 +686,7 @@ int ubi_scan_erase_peb(const struct ubi_device *ubi,
 		return -EINVAL;
 	}
 
-	ec_hdr->ec = cpu_to_ubi64(ec);
+	ec_hdr->ec = cpu_to_be64(ec);
 
 	err = ubi_io_sync_erase(ubi, pnum, 0);
 	if (err < 0)
@@ -754,7 +766,7 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(const struct ubi_device *ubi,
  * @si: scanning information
  * @pnum: the physical eraseblock number
  *
- * This function returns a zero if the physical eraseblock was succesfully
+ * This function returns a zero if the physical eraseblock was successfully
  * handled and a negative error code in case of failure.
  */
 static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum)
@@ -783,8 +795,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
 	else if (err == UBI_IO_BITFLIPS)
 		bitflips = 1;
 	else if (err == UBI_IO_PEB_EMPTY)
-		return ubi_scan_add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC,
-					    &si->erase);
+		return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase);
 	else if (err == UBI_IO_BAD_EC_HDR) {
 		/*
 		 * We have to also look at the VID header, possibly it is not
@@ -806,7 +817,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
 			return -EINVAL;
 		}
 
-		ec = ubi64_to_cpu(ech->ec);
+		ec = be64_to_cpu(ech->ec);
 		if (ec > UBI_MAX_ERASECOUNTER) {
 			/*
 			 * Erase counter overflow. The EC headers have 64 bits
@@ -832,28 +843,28 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
 	else if (err == UBI_IO_BAD_VID_HDR ||
 		 (err == UBI_IO_PEB_FREE && ec_corr)) {
 		/* VID header is corrupted */
-		err = ubi_scan_add_to_list(si, pnum, ec, &si->corr);
+		err = add_to_list(si, pnum, ec, &si->corr);
 		if (err)
 			return err;
 		goto adjust_mean_ec;
 	} else if (err == UBI_IO_PEB_FREE) {
 		/* No VID header - the physical eraseblock is free */
-		err = ubi_scan_add_to_list(si, pnum, ec, &si->free);
+		err = add_to_list(si, pnum, ec, &si->free);
 		if (err)
 			return err;
 		goto adjust_mean_ec;
 	}
 
-	vol_id = ubi32_to_cpu(vidh->vol_id);
+	vol_id = be32_to_cpu(vidh->vol_id);
 	if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOL_ID) {
-		int lnum = ubi32_to_cpu(vidh->lnum);
+		int lnum = be32_to_cpu(vidh->lnum);
 
 		/* Unsupported internal volume */
 		switch (vidh->compat) {
 		case UBI_COMPAT_DELETE:
 			ubi_msg("\"delete\" compatible internal volume %d:%d"
 				" found, remove it", vol_id, lnum);
-			err = ubi_scan_add_to_list(si, pnum, ec, &si->corr);
+			err = add_to_list(si, pnum, ec, &si->corr);
 			if (err)
 				return err;
 			break;
@@ -868,7 +879,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
 		case UBI_COMPAT_PRESERVE:
 			ubi_msg("\"preserve\" compatible internal volume %d:%d"
 				" found", vol_id, lnum);
-			err = ubi_scan_add_to_list(si, pnum, ec, &si->alien);
+			err = add_to_list(si, pnum, ec, &si->alien);
 			if (err)
 				return err;
 			si->alien_peb_count += 1;
@@ -1109,7 +1120,7 @@ static int paranoid_check_si(const struct ubi_device *ubi,
 	uint8_t *buf;
 
 	/*
-	 * At first, check that scanning information is ok.
+	 * At first, check that scanning information is OK.
 	 */
 	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
 		int leb_count = 0;
@@ -1249,12 +1260,12 @@ static int paranoid_check_si(const struct ubi_device *ubi,
 				goto bad_vid_hdr;
 			}
 
-			if (seb->sqnum != ubi64_to_cpu(vidh->sqnum)) {
+			if (seb->sqnum != be64_to_cpu(vidh->sqnum)) {
 				ubi_err("bad sqnum %llu", seb->sqnum);
 				goto bad_vid_hdr;
 			}
 
-			if (sv->vol_id != ubi32_to_cpu(vidh->vol_id)) {
+			if (sv->vol_id != be32_to_cpu(vidh->vol_id)) {
 				ubi_err("bad vol_id %d", sv->vol_id);
 				goto bad_vid_hdr;
 			}
@@ -1264,22 +1275,22 @@ static int paranoid_check_si(const struct ubi_device *ubi,
 				goto bad_vid_hdr;
 			}
 
-			if (seb->lnum != ubi32_to_cpu(vidh->lnum)) {
+			if (seb->lnum != be32_to_cpu(vidh->lnum)) {
 				ubi_err("bad lnum %d", seb->lnum);
 				goto bad_vid_hdr;
 			}
 
-			if (sv->used_ebs != ubi32_to_cpu(vidh->used_ebs)) {
+			if (sv->used_ebs != be32_to_cpu(vidh->used_ebs)) {
 				ubi_err("bad used_ebs %d", sv->used_ebs);
 				goto bad_vid_hdr;
 			}
 
-			if (sv->data_pad != ubi32_to_cpu(vidh->data_pad)) {
+			if (sv->data_pad != be32_to_cpu(vidh->data_pad)) {
 				ubi_err("bad data_pad %d", sv->data_pad);
 				goto bad_vid_hdr;
 			}
 
-			if (seb->leb_ver != ubi32_to_cpu(vidh->leb_ver)) {
+			if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) {
 				ubi_err("bad leb_ver %u", seb->leb_ver);
 				goto bad_vid_hdr;
 			}
@@ -1288,12 +1299,12 @@ static int paranoid_check_si(const struct ubi_device *ubi,
 		if (!last_seb)
 			continue;
 
-		if (sv->highest_lnum != ubi32_to_cpu(vidh->lnum)) {
+		if (sv->highest_lnum != be32_to_cpu(vidh->lnum)) {
 			ubi_err("bad highest_lnum %d", sv->highest_lnum);
 			goto bad_vid_hdr;
 		}
 
-		if (sv->last_data_size != ubi32_to_cpu(vidh->data_size)) {
+		if (sv->last_data_size != be32_to_cpu(vidh->data_size)) {
 			ubi_err("bad last_data_size %d", sv->last_data_size);
 			goto bad_vid_hdr;
 		}
@@ -1310,8 +1321,10 @@ static int paranoid_check_si(const struct ubi_device *ubi,
 	memset(buf, 1, ubi->peb_count);
 	for (pnum = 0; pnum < ubi->peb_count; pnum++) {
 		err = ubi_io_is_bad(ubi, pnum);
-		if (err < 0)
+		if (err < 0) {
+			kfree(buf);
 			return err;
+		}
 		else if (err)
 			buf[pnum] = 0;
 	}
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
index 3949f6192c7..140e82e2653 100644
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -147,8 +147,6 @@ static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv,
 		list_add_tail(&seb->u.list, list);
 }
 
-int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec,
-			 struct list_head *list);
 int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
 		      int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
 		      int bitflips);
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index feb647f108f..5959f91be24 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -35,6 +35,7 @@
 #include <linux/cdev.h>
 #include <linux/device.h>
 #include <linux/string.h>
+#include <linux/vmalloc.h>
 #include <linux/mtd/mtd.h>
 
 #include <mtd/ubi-header.h>
@@ -374,9 +375,11 @@ void ubi_calculate_reserved(struct ubi_device *ubi);
 #ifdef CONFIG_MTD_UBI_GLUEBI
 int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol);
 int ubi_destroy_gluebi(struct ubi_volume *vol);
+void ubi_gluebi_updated(struct ubi_volume *vol);
 #else
 #define ubi_create_gluebi(ubi, vol) 0
 #define ubi_destroy_gluebi(vol) 0
+#define ubi_gluebi_updated(vol)
 #endif
 
 /* eba.c */
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 8925b977e3d..0efc586a832 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -150,7 +150,7 @@ int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes)
 			vol->updating = 0;
 	}
 
-	vol->upd_buf = kmalloc(ubi->leb_size, GFP_KERNEL);
+	vol->upd_buf = vmalloc(ubi->leb_size);
 	if (!vol->upd_buf)
 		return -ENOMEM;
 
@@ -339,7 +339,7 @@ int ubi_more_update_data(struct ubi_device *ubi, int vol_id,
 		err = ubi_wl_flush(ubi);
 		if (err == 0) {
 			err = to_write;
-			kfree(vol->upd_buf);
+			vfree(vol->upd_buf);
 			vol->updating = 0;
 		}
 	}
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 622d0d18952..ea0d5c825ab 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -228,7 +228,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	for (i = 0; i < ubi->vtbl_slots; i++)
 		if (ubi->volumes[i] &&
 		    ubi->volumes[i]->name_len == req->name_len &&
-		    strcmp(ubi->volumes[i]->name, req->name) == 0) {
+		    !strcmp(ubi->volumes[i]->name, req->name)) {
 			dbg_err("volume \"%s\" exists (ID %d)", req->name, i);
 			goto out_unlock;
 		}
@@ -243,7 +243,6 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	/* Reserve physical eraseblocks */
 	if (vol->reserved_pebs > ubi->avail_pebs) {
 		dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
-		spin_unlock(&ubi->volumes_lock);
 		err = -ENOSPC;
 		goto out_unlock;
 	}
@@ -281,7 +280,8 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
 		vol->used_ebs = vol->reserved_pebs;
 		vol->last_eb_bytes = vol->usable_leb_size;
-		vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+		vol->used_bytes =
+			(long long)vol->used_ebs * vol->usable_leb_size;
 	} else {
 		bytes = vol->used_bytes;
 		vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size);
@@ -320,10 +320,10 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
 
 	/* Fill volume table record */
 	memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record));
-	vtbl_rec.reserved_pebs = cpu_to_ubi32(vol->reserved_pebs);
-	vtbl_rec.alignment     = cpu_to_ubi32(vol->alignment);
-	vtbl_rec.data_pad      = cpu_to_ubi32(vol->data_pad);
-	vtbl_rec.name_len      = cpu_to_ubi16(vol->name_len);
+	vtbl_rec.reserved_pebs = cpu_to_be32(vol->reserved_pebs);
+	vtbl_rec.alignment     = cpu_to_be32(vol->alignment);
+	vtbl_rec.data_pad      = cpu_to_be32(vol->data_pad);
+	vtbl_rec.name_len      = cpu_to_be16(vol->name_len);
 	if (vol->vol_type == UBI_DYNAMIC_VOLUME)
 		vtbl_rec.vol_type = UBI_VID_DYNAMIC;
 	else
@@ -352,6 +352,7 @@ out_acc:
 	spin_lock(&ubi->volumes_lock);
 	ubi->rsvd_pebs -= vol->reserved_pebs;
 	ubi->avail_pebs += vol->reserved_pebs;
+	ubi->volumes[vol_id] = NULL;
 out_unlock:
 	spin_unlock(&ubi->volumes_lock);
 	kfree(vol);
@@ -368,6 +369,7 @@ out_sysfs:
 	spin_lock(&ubi->volumes_lock);
 	ubi->rsvd_pebs -= vol->reserved_pebs;
 	ubi->avail_pebs += vol->reserved_pebs;
+	ubi->volumes[vol_id] = NULL;
 	spin_unlock(&ubi->volumes_lock);
 	volume_sysfs_close(vol);
 	return err;
@@ -503,7 +505,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
 
 	/* Change volume table record */
 	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
-	vtbl_rec.reserved_pebs = cpu_to_ubi32(reserved_pebs);
+	vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs);
 	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
 	if (err)
 		goto out_acc;
@@ -537,7 +539,8 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
 	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
 		vol->used_ebs = reserved_pebs;
 		vol->last_eb_bytes = vol->usable_leb_size;
-		vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+		vol->used_bytes =
+			(long long)vol->used_ebs * vol->usable_leb_size;
 	}
 
 	paranoid_check_volumes(ubi);
@@ -643,21 +646,33 @@ void ubi_free_volume(struct ubi_device *ubi, int vol_id)
  * @ubi: UBI device description object
  * @vol_id: volume ID
  */
-static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
+static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
 {
 	int idx = vol_id2idx(ubi, vol_id);
 	int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
-	const struct ubi_volume *vol = ubi->volumes[idx];
+	const struct ubi_volume *vol;
 	long long n;
 	const char *name;
 
-	reserved_pebs = ubi32_to_cpu(ubi->vtbl[vol_id].reserved_pebs);
+	spin_lock(&ubi->volumes_lock);
+	reserved_pebs = be32_to_cpu(ubi->vtbl[vol_id].reserved_pebs);
+	vol = ubi->volumes[idx];
 
 	if (!vol) {
 		if (reserved_pebs) {
 			ubi_err("no volume info, but volume exists");
 			goto fail;
 		}
+		spin_unlock(&ubi->volumes_lock);
+		return;
+	}
+
+	if (vol->exclusive) {
+		/*
+		 * The volume may be being created at the moment, do not check
+		 * it (e.g., it may be in the middle of ubi_create_volume().
+		 */
+		spin_unlock(&ubi->volumes_lock);
 		return;
 	}
 
@@ -726,7 +741,7 @@ static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
 		goto fail;
 	}
 
-	n = vol->used_ebs * vol->usable_leb_size;
+	n = (long long)vol->used_ebs * vol->usable_leb_size;
 	if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
 		if (vol->corrupted != 0) {
 			ubi_err("corrupted dynamic volume");
@@ -765,9 +780,9 @@ static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
 		}
 	}
 
-	alignment  = ubi32_to_cpu(ubi->vtbl[vol_id].alignment);
-	data_pad   = ubi32_to_cpu(ubi->vtbl[vol_id].data_pad);
-	name_len   = ubi16_to_cpu(ubi->vtbl[vol_id].name_len);
+	alignment  = be32_to_cpu(ubi->vtbl[vol_id].alignment);
+	data_pad   = be32_to_cpu(ubi->vtbl[vol_id].data_pad);
+	name_len   = be16_to_cpu(ubi->vtbl[vol_id].name_len);
 	upd_marker = ubi->vtbl[vol_id].upd_marker;
 	name       = &ubi->vtbl[vol_id].name[0];
 	if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC)
@@ -782,12 +797,14 @@ static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
 		goto fail;
 	}
 
+	spin_unlock(&ubi->volumes_lock);
 	return;
 
 fail:
-	ubi_err("paranoid check failed");
+	ubi_err("paranoid check failed for volume %d", vol_id);
 	ubi_dbg_dump_vol_info(vol);
 	ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
+	spin_unlock(&ubi->volumes_lock);
 	BUG();
 }
 
@@ -800,10 +817,8 @@ static void paranoid_check_volumes(struct ubi_device *ubi)
 	int i;
 
 	mutex_lock(&ubi->vtbl_mutex);
-	spin_lock(&ubi->volumes_lock);
 	for (i = 0; i < ubi->vtbl_slots; i++)
 		paranoid_check_volume(ubi, i);
-	spin_unlock(&ubi->volumes_lock);
 	mutex_unlock(&ubi->vtbl_mutex);
 }
 #endif
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index b6fd6bbd941..bc5df50813d 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -93,12 +93,9 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
 		vtbl_rec = &empty_vtbl_record;
 	else {
 		crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC);
-		vtbl_rec->crc = cpu_to_ubi32(crc);
+		vtbl_rec->crc = cpu_to_be32(crc);
 	}
 
-	dbg_msg("change record %d", idx);
-	ubi_dbg_dump_vtbl_record(vtbl_rec, idx);
-
 	mutex_lock(&ubi->vtbl_mutex);
 	memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record));
 	for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
@@ -141,18 +138,18 @@ static int vtbl_check(const struct ubi_device *ubi,
 	for (i = 0; i < ubi->vtbl_slots; i++) {
 		cond_resched();
 
-		reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs);
-		alignment = ubi32_to_cpu(vtbl[i].alignment);
-		data_pad = ubi32_to_cpu(vtbl[i].data_pad);
+		reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
+		alignment = be32_to_cpu(vtbl[i].alignment);
+		data_pad = be32_to_cpu(vtbl[i].data_pad);
 		upd_marker = vtbl[i].upd_marker;
 		vol_type = vtbl[i].vol_type;
-		name_len = ubi16_to_cpu(vtbl[i].name_len);
+		name_len = be16_to_cpu(vtbl[i].name_len);
 		name = &vtbl[i].name[0];
 
 		crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC);
-		if (ubi32_to_cpu(vtbl[i].crc) != crc) {
+		if (be32_to_cpu(vtbl[i].crc) != crc) {
 			ubi_err("bad CRC at record %u: %#08x, not %#08x",
-				 i, crc, ubi32_to_cpu(vtbl[i].crc));
+				 i, crc, be32_to_cpu(vtbl[i].crc));
 			ubi_dbg_dump_vtbl_record(&vtbl[i], i);
 			return 1;
 		}
@@ -225,8 +222,8 @@ static int vtbl_check(const struct ubi_device *ubi,
 	/* Checks that all names are unique */
 	for (i = 0; i < ubi->vtbl_slots - 1; i++) {
 		for (n = i + 1; n < ubi->vtbl_slots; n++) {
-			int len1 = ubi16_to_cpu(vtbl[i].name_len);
-			int len2 = ubi16_to_cpu(vtbl[n].name_len);
+			int len1 = be16_to_cpu(vtbl[i].name_len);
+			int len2 = be16_to_cpu(vtbl[n].name_len);
 
 			if (len1 > 0 && len1 == len2 &&
 			    !strncmp(vtbl[i].name, vtbl[n].name, len1)) {
@@ -288,13 +285,13 @@ retry:
 	}
 
 	vid_hdr->vol_type = UBI_VID_DYNAMIC;
-	vid_hdr->vol_id = cpu_to_ubi32(UBI_LAYOUT_VOL_ID);
+	vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOL_ID);
 	vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT;
 	vid_hdr->data_size = vid_hdr->used_ebs =
-			     vid_hdr->data_pad = cpu_to_ubi32(0);
-	vid_hdr->lnum = cpu_to_ubi32(copy);
-	vid_hdr->sqnum = cpu_to_ubi64(++si->max_sqnum);
-	vid_hdr->leb_ver = cpu_to_ubi32(old_seb ? old_seb->leb_ver + 1: 0);
+			     vid_hdr->data_pad = cpu_to_be32(0);
+	vid_hdr->lnum = cpu_to_be32(copy);
+	vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
+	vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
 
 	/* The EC header is already there, write the VID header */
 	err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
@@ -317,14 +314,15 @@ retry:
 	return err;
 
 write_error:
-	kfree(new_seb);
-	/* May be this physical eraseblock went bad, try to pick another one */
-	if (++tries <= 5) {
-		err = ubi_scan_add_to_list(si, new_seb->pnum, new_seb->ec,
-					   &si->corr);
-		if (!err)
-			goto retry;
+	if (err == -EIO && ++tries <= 5) {
+		/*
+		 * Probably this physical eraseblock went bad, try to pick
+		 * another one.
+		 */
+		list_add_tail(&new_seb->u.list, &si->corr);
+		goto retry;
 	}
+	kfree(new_seb);
 out_free:
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	return err;
@@ -380,11 +378,12 @@ static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi,
 
 	/* Read both LEB 0 and LEB 1 into memory */
 	ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
-		leb[seb->lnum] = kzalloc(ubi->vtbl_size, GFP_KERNEL);
+		leb[seb->lnum] = vmalloc(ubi->vtbl_size);
 		if (!leb[seb->lnum]) {
 			err = -ENOMEM;
 			goto out_free;
 		}
+		memset(leb[seb->lnum], 0, ubi->vtbl_size);
 
 		err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
 				       ubi->vtbl_size);
@@ -415,7 +414,7 @@ static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi,
 		}
 
 		/* Both LEB 1 and LEB 2 are OK and consistent */
-		kfree(leb[1]);
+		vfree(leb[1]);
 		return leb[0];
 	} else {
 		/* LEB 0 is corrupted or does not exist */
@@ -436,13 +435,13 @@ static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi,
 			goto out_free;
 		ubi_msg("volume table was restored");
 
-		kfree(leb[0]);
+		vfree(leb[0]);
 		return leb[1];
 	}
 
 out_free:
-	kfree(leb[0]);
-	kfree(leb[1]);
+	vfree(leb[0]);
+	vfree(leb[1]);
 	return ERR_PTR(err);
 }
 
@@ -460,9 +459,10 @@ static struct ubi_vtbl_record *create_empty_lvol(const struct ubi_device *ubi,
 	int i;
 	struct ubi_vtbl_record *vtbl;
 
-	vtbl = kzalloc(ubi->vtbl_size, GFP_KERNEL);
+	vtbl = vmalloc(ubi->vtbl_size);
 	if (!vtbl)
 		return ERR_PTR(-ENOMEM);
+	memset(vtbl, 0, ubi->vtbl_size);
 
 	for (i = 0; i < ubi->vtbl_slots; i++)
 		memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE);
@@ -472,7 +472,7 @@ static struct ubi_vtbl_record *create_empty_lvol(const struct ubi_device *ubi,
 
 		err = create_vtbl(ubi, si, i, vtbl);
 		if (err) {
-			kfree(vtbl);
+			vfree(vtbl);
 			return ERR_PTR(err);
 		}
 	}
@@ -500,19 +500,19 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
 	for (i = 0; i < ubi->vtbl_slots; i++) {
 		cond_resched();
 
-		if (ubi32_to_cpu(vtbl[i].reserved_pebs) == 0)
+		if (be32_to_cpu(vtbl[i].reserved_pebs) == 0)
 			continue; /* Empty record */
 
 		vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
 		if (!vol)
 			return -ENOMEM;
 
-		vol->reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs);
-		vol->alignment = ubi32_to_cpu(vtbl[i].alignment);
-		vol->data_pad = ubi32_to_cpu(vtbl[i].data_pad);
+		vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
+		vol->alignment = be32_to_cpu(vtbl[i].alignment);
+		vol->data_pad = be32_to_cpu(vtbl[i].data_pad);
 		vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ?
 					UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
-		vol->name_len = ubi16_to_cpu(vtbl[i].name_len);
+		vol->name_len = be16_to_cpu(vtbl[i].name_len);
 		vol->usable_leb_size = ubi->leb_size - vol->data_pad;
 		memcpy(vol->name, vtbl[i].name, vol->name_len);
 		vol->name[vol->name_len] = '\0';
@@ -531,7 +531,8 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
 		if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
 			vol->used_ebs = vol->reserved_pebs;
 			vol->last_eb_bytes = vol->usable_leb_size;
-			vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+			vol->used_bytes =
+				(long long)vol->used_ebs * vol->usable_leb_size;
 			continue;
 		}
 
@@ -561,7 +562,8 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
 		}
 
 		vol->used_ebs = sv->used_ebs;
-		vol->used_bytes = (vol->used_ebs - 1) * vol->usable_leb_size;
+		vol->used_bytes =
+			(long long)(vol->used_ebs - 1) * vol->usable_leb_size;
 		vol->used_bytes += sv->last_data_size;
 		vol->last_eb_bytes = sv->last_data_size;
 	}
@@ -578,7 +580,8 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
 	vol->usable_leb_size = ubi->leb_size;
 	vol->used_ebs = vol->reserved_pebs;
 	vol->last_eb_bytes = vol->reserved_pebs;
-	vol->used_bytes = vol->used_ebs * (ubi->leb_size - vol->data_pad);
+	vol->used_bytes =
+		(long long)vol->used_ebs * (ubi->leb_size - vol->data_pad);
 	vol->vol_id = UBI_LAYOUT_VOL_ID;
 
 	ubi_assert(!ubi->volumes[i]);
@@ -718,7 +721,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
 	int i, err;
 	struct ubi_scan_volume *sv;
 
-	empty_vtbl_record.crc = cpu_to_ubi32(0xf116c36b);
+	empty_vtbl_record.crc = cpu_to_be32(0xf116c36b);
 
 	/*
 	 * The number of supported volumes is limited by the eraseblock size
@@ -783,7 +786,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
 	return 0;
 
 out_free:
-	kfree(ubi->vtbl);
+	vfree(ubi->vtbl);
 	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
 		if (ubi->volumes[i]) {
 			kfree(ubi->volumes[i]);
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index ab2174a56bc..9de95376209 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -667,7 +667,7 @@ static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int tortur
 
 	dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
 
-	ec_hdr->ec = cpu_to_ubi64(ec);
+	ec_hdr->ec = cpu_to_be64(ec);
 
 	err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
 	if (err)
@@ -1060,9 +1060,8 @@ out_unlock:
 static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
 			int cancel)
 {
-	int err;
 	struct ubi_wl_entry *e = wl_wrk->e;
-	int pnum = e->pnum;
+	int pnum = e->pnum, err, need;
 
 	if (cancel) {
 		dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
@@ -1097,62 +1096,70 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
 	kfree(wl_wrk);
 	kmem_cache_free(wl_entries_slab, e);
 
-	if (err != -EIO) {
+	if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
+	    err == -EBUSY) {
+		int err1;
+
+		/* Re-schedule the LEB for erasure */
+		err1 = schedule_erase(ubi, e, 0);
+		if (err1) {
+			err = err1;
+			goto out_ro;
+		}
+		return err;
+	} else if (err != -EIO) {
 		/*
 		 * If this is not %-EIO, we have no idea what to do. Scheduling
 		 * this physical eraseblock for erasure again would cause
 		 * errors again and again. Well, lets switch to RO mode.
 		 */
-		ubi_ro_mode(ubi);
-		return err;
+		goto out_ro;
 	}
 
 	/* It is %-EIO, the PEB went bad */
 
 	if (!ubi->bad_allowed) {
 		ubi_err("bad physical eraseblock %d detected", pnum);
-		ubi_ro_mode(ubi);
-		err = -EIO;
-	} else {
-		int need;
-
-		spin_lock(&ubi->volumes_lock);
-		need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1;
-		if (need > 0) {
-			need = ubi->avail_pebs >= need ? need : ubi->avail_pebs;
-			ubi->avail_pebs -= need;
-			ubi->rsvd_pebs += need;
-			ubi->beb_rsvd_pebs += need;
-			if (need > 0)
-				ubi_msg("reserve more %d PEBs", need);
-		}
+		goto out_ro;
+	}
 
-		if (ubi->beb_rsvd_pebs == 0) {
-			spin_unlock(&ubi->volumes_lock);
-			ubi_err("no reserved physical eraseblocks");
-			ubi_ro_mode(ubi);
-			return -EIO;
-		}
+	spin_lock(&ubi->volumes_lock);
+	need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1;
+	if (need > 0) {
+		need = ubi->avail_pebs >= need ? need : ubi->avail_pebs;
+		ubi->avail_pebs -= need;
+		ubi->rsvd_pebs += need;
+		ubi->beb_rsvd_pebs += need;
+		if (need > 0)
+			ubi_msg("reserve more %d PEBs", need);
+	}
 
+	if (ubi->beb_rsvd_pebs == 0) {
 		spin_unlock(&ubi->volumes_lock);
-		ubi_msg("mark PEB %d as bad", pnum);
+		ubi_err("no reserved physical eraseblocks");
+		goto out_ro;
+	}
 
-		err = ubi_io_mark_bad(ubi, pnum);
-		if (err) {
-			ubi_ro_mode(ubi);
-			return err;
-		}
+	spin_unlock(&ubi->volumes_lock);
+	ubi_msg("mark PEB %d as bad", pnum);
 
-		spin_lock(&ubi->volumes_lock);
-		ubi->beb_rsvd_pebs -= 1;
-		ubi->bad_peb_count += 1;
-		ubi->good_peb_count -= 1;
-		ubi_calculate_reserved(ubi);
-		if (ubi->beb_rsvd_pebs == 0)
-			ubi_warn("last PEB from the reserved pool was used");
-		spin_unlock(&ubi->volumes_lock);
-	}
+	err = ubi_io_mark_bad(ubi, pnum);
+	if (err)
+		goto out_ro;
+
+	spin_lock(&ubi->volumes_lock);
+	ubi->beb_rsvd_pebs -= 1;
+	ubi->bad_peb_count += 1;
+	ubi->good_peb_count -= 1;
+	ubi_calculate_reserved(ubi);
+	if (ubi->beb_rsvd_pebs == 0)
+		ubi_warn("last PEB from the reserved pool was used");
+	spin_unlock(&ubi->volumes_lock);
+
+	return err;
 
+out_ro:
+	ubi_ro_mode(ubi);
 	return err;
 }
 
@@ -1634,7 +1641,7 @@ static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec)
 		goto out_free;
 	}
 
-	read_ec = ubi64_to_cpu(ec_hdr->ec);
+	read_ec = be64_to_cpu(ec_hdr->ec);
 	if (ec != read_ec) {
 		ubi_err("paranoid check failed for PEB %d", pnum);
 		ubi_err("read EC is %lld, should be %d", read_ec, ec);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 43d03178064..5fb659f8b20 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2486,6 +2486,18 @@ source "drivers/atm/Kconfig"
 
 source "drivers/s390/net/Kconfig"
 
+config XEN_NETDEV_FRONTEND
+	tristate "Xen network device frontend driver"
+	depends on XEN
+	default y
+	help
+	  The network device frontend driver allows the kernel to
+	  access network devices exported exported by a virtual
+	  machine containing a physical network device driver. The
+	  frontend driver is intended for unprivileged guest domains;
+	  if you are compiling a kernel for a Xen guest, you almost
+	  certainly want to enable this.
+
 config ISERIES_VETH
 	tristate "iSeries Virtual Ethernet driver support"
 	depends on PPC_ISERIES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index eb4167622a6..0e286ab8855 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -127,6 +127,8 @@ obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2tp.o
 obj-$(CONFIG_SLIP) += slip.o
 obj-$(CONFIG_SLHC) += slhc.o
 
+obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
+
 obj-$(CONFIG_DUMMY) += dummy.o
 obj-$(CONFIG_IFB) += ifb.o
 obj-$(CONFIG_MACVLAN) += macvlan.o
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d23861c8658..a729da061bb 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -54,8 +54,8 @@
 
 #define DRV_MODULE_NAME		"bnx2"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"1.6.2"
-#define DRV_MODULE_RELDATE	"July 6, 2007"
+#define DRV_MODULE_VERSION	"1.6.3"
+#define DRV_MODULE_RELDATE	"July 16, 2007"
 
 #define RUN_AT(x) (jiffies + (x))
 
@@ -126,91 +126,102 @@ static struct pci_device_id bnx2_pci_tbl[] = {
 
 static struct flash_spec flash_table[] =
 {
+#define BUFFERED_FLAGS		(BNX2_NV_BUFFERED | BNX2_NV_TRANSLATE)
+#define NONBUFFERED_FLAGS	(BNX2_NV_WREN)
 	/* Slow EEPROM */
 	{0x00000000, 0x40830380, 0x009f0081, 0xa184a053, 0xaf000400,
-	 1, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
+	 BUFFERED_FLAGS, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
 	 SEEPROM_BYTE_ADDR_MASK, SEEPROM_TOTAL_SIZE,
 	 "EEPROM - slow"},
 	/* Expansion entry 0001 */
 	{0x08000002, 0x4b808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
 	 "Entry 0001"},
 	/* Saifun SA25F010 (non-buffered flash) */
 	/* strap, cfg1, & write1 need updates */
 	{0x04000001, 0x47808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE*2,
 	 "Non-buffered flash (128kB)"},
 	/* Saifun SA25F020 (non-buffered flash) */
 	/* strap, cfg1, & write1 need updates */
 	{0x0c000003, 0x4f808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE*4,
 	 "Non-buffered flash (256kB)"},
 	/* Expansion entry 0100 */
 	{0x11000000, 0x53808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
 	 "Entry 0100"},
 	/* Entry 0101: ST M45PE10 (non-buffered flash, TetonII B0) */
 	{0x19000002, 0x5b808201, 0x000500db, 0x03840253, 0xaf020406,
-	 0, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
 	 ST_MICRO_FLASH_BYTE_ADDR_MASK, ST_MICRO_FLASH_BASE_TOTAL_SIZE*2,
 	 "Entry 0101: ST M45PE10 (128kB non-bufferred)"},
 	/* Entry 0110: ST M45PE20 (non-buffered flash)*/
 	{0x15000001, 0x57808201, 0x000500db, 0x03840253, 0xaf020406,
-	 0, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
 	 ST_MICRO_FLASH_BYTE_ADDR_MASK, ST_MICRO_FLASH_BASE_TOTAL_SIZE*4,
 	 "Entry 0110: ST M45PE20 (256kB non-bufferred)"},
 	/* Saifun SA25F005 (non-buffered flash) */
 	/* strap, cfg1, & write1 need updates */
 	{0x1d000003, 0x5f808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE,
 	 "Non-buffered flash (64kB)"},
 	/* Fast EEPROM */
 	{0x22000000, 0x62808380, 0x009f0081, 0xa184a053, 0xaf000400,
-	 1, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
+	 BUFFERED_FLAGS, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
 	 SEEPROM_BYTE_ADDR_MASK, SEEPROM_TOTAL_SIZE,
 	 "EEPROM - fast"},
 	/* Expansion entry 1001 */
 	{0x2a000002, 0x6b808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
 	 "Entry 1001"},
 	/* Expansion entry 1010 */
 	{0x26000001, 0x67808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
 	 "Entry 1010"},
 	/* ATMEL AT45DB011B (buffered flash) */
 	{0x2e000003, 0x6e808273, 0x00570081, 0x68848353, 0xaf000400,
-	 1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+	 BUFFERED_FLAGS, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
 	 BUFFERED_FLASH_BYTE_ADDR_MASK, BUFFERED_FLASH_TOTAL_SIZE,
 	 "Buffered flash (128kB)"},
 	/* Expansion entry 1100 */
 	{0x33000000, 0x73808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
 	 "Entry 1100"},
 	/* Expansion entry 1101 */
 	{0x3b000002, 0x7b808201, 0x00050081, 0x03840253, 0xaf020406,
-	 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+	 NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
 	 SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
 	 "Entry 1101"},
 	/* Ateml Expansion entry 1110 */
 	{0x37000001, 0x76808273, 0x00570081, 0x68848353, 0xaf000400,
-	 1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+	 BUFFERED_FLAGS, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
 	 BUFFERED_FLASH_BYTE_ADDR_MASK, 0,
 	 "Entry 1110 (Atmel)"},
 	/* ATMEL AT45DB021B (buffered flash) */
 	{0x3f000003, 0x7e808273, 0x00570081, 0x68848353, 0xaf000400,
-	 1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+	 BUFFERED_FLAGS, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
 	 BUFFERED_FLASH_BYTE_ADDR_MASK, BUFFERED_FLASH_TOTAL_SIZE*2,
 	 "Buffered flash (256kB)"},
 };
 
+static struct flash_spec flash_5709 = {
+	.flags		= BNX2_NV_BUFFERED,
+	.page_bits	= BCM5709_FLASH_PAGE_BITS,
+	.page_size	= BCM5709_FLASH_PAGE_SIZE,
+	.addr_mask	= BCM5709_FLASH_BYTE_ADDR_MASK,
+	.total_size	= BUFFERED_FLASH_TOTAL_SIZE*2,
+	.name		= "5709 Buffered flash (256kB)",
+};
+
 MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl);
 
 static inline u32 bnx2_tx_avail(struct bnx2 *bp)
@@ -3289,7 +3300,7 @@ bnx2_enable_nvram_write(struct bnx2 *bp)
 	val = REG_RD(bp, BNX2_MISC_CFG);
 	REG_WR(bp, BNX2_MISC_CFG, val | BNX2_MISC_CFG_NVM_WR_EN_PCI);
 
-	if (!bp->flash_info->buffered) {
+	if (bp->flash_info->flags & BNX2_NV_WREN) {
 		int j;
 
 		REG_WR(bp, BNX2_NVM_COMMAND, BNX2_NVM_COMMAND_DONE);
@@ -3349,7 +3360,7 @@ bnx2_nvram_erase_page(struct bnx2 *bp, u32 offset)
 	u32 cmd;
 	int j;
 
-	if (bp->flash_info->buffered)
+	if (bp->flash_info->flags & BNX2_NV_BUFFERED)
 		/* Buffered flash, no erase needed */
 		return 0;
 
@@ -3392,8 +3403,8 @@ bnx2_nvram_read_dword(struct bnx2 *bp, u32 offset, u8 *ret_val, u32 cmd_flags)
 	/* Build the command word. */
 	cmd = BNX2_NVM_COMMAND_DOIT | cmd_flags;
 
-	/* Calculate an offset of a buffered flash. */
-	if (bp->flash_info->buffered) {
+	/* Calculate an offset of a buffered flash, not needed for 5709. */
+	if (bp->flash_info->flags & BNX2_NV_TRANSLATE) {
 		offset = ((offset / bp->flash_info->page_size) <<
 			   bp->flash_info->page_bits) +
 			  (offset % bp->flash_info->page_size);
@@ -3439,8 +3450,8 @@ bnx2_nvram_write_dword(struct bnx2 *bp, u32 offset, u8 *val, u32 cmd_flags)
 	/* Build the command word. */
 	cmd = BNX2_NVM_COMMAND_DOIT | BNX2_NVM_COMMAND_WR | cmd_flags;
 
-	/* Calculate an offset of a buffered flash. */
-	if (bp->flash_info->buffered) {
+	/* Calculate an offset of a buffered flash, not needed for 5709. */
+	if (bp->flash_info->flags & BNX2_NV_TRANSLATE) {
 		offset = ((offset / bp->flash_info->page_size) <<
 			  bp->flash_info->page_bits) +
 			 (offset % bp->flash_info->page_size);
@@ -3478,15 +3489,19 @@ static int
 bnx2_init_nvram(struct bnx2 *bp)
 {
 	u32 val;
-	int j, entry_count, rc;
+	int j, entry_count, rc = 0;
 	struct flash_spec *flash;
 
+	if (CHIP_NUM(bp) == CHIP_NUM_5709) {
+		bp->flash_info = &flash_5709;
+		goto get_flash_size;
+	}
+
 	/* Determine the selected interface. */
 	val = REG_RD(bp, BNX2_NVM_CFG1);
 
 	entry_count = sizeof(flash_table) / sizeof(struct flash_spec);
 
-	rc = 0;
 	if (val & 0x40000000) {
 
 		/* Flash interface has been reconfigured */
@@ -3542,6 +3557,7 @@ bnx2_init_nvram(struct bnx2 *bp)
 		return -ENODEV;
 	}
 
+get_flash_size:
 	val = REG_RD_IND(bp, bp->shmem_base + BNX2_SHARED_HW_CFG_CONFIG2);
 	val &= BNX2_SHARED_HW_CFG2_NVM_SIZE_MASK;
 	if (val)
@@ -3706,7 +3722,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
 		buf = align_buf;
 	}
 
-	if (bp->flash_info->buffered == 0) {
+	if (!(bp->flash_info->flags & BNX2_NV_BUFFERED)) {
 		flash_buffer = kmalloc(264, GFP_KERNEL);
 		if (flash_buffer == NULL) {
 			rc = -ENOMEM;
@@ -3739,7 +3755,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
 		bnx2_enable_nvram_access(bp);
 
 		cmd_flags = BNX2_NVM_COMMAND_FIRST;
-		if (bp->flash_info->buffered == 0) {
+		if (!(bp->flash_info->flags & BNX2_NV_BUFFERED)) {
 			int j;
 
 			/* Read the whole page into the buffer
@@ -3767,7 +3783,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
 		/* Loop to write back the buffer data from page_start to
 		 * data_start */
 		i = 0;
-		if (bp->flash_info->buffered == 0) {
+		if (!(bp->flash_info->flags & BNX2_NV_BUFFERED)) {
 			/* Erase the page */
 			if ((rc = bnx2_nvram_erase_page(bp, page_start)) != 0)
 				goto nvram_write_end;
@@ -3791,7 +3807,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
 		/* Loop to write the new data from data_start to data_end */
 		for (addr = data_start; addr < data_end; addr += 4, i += 4) {
 			if ((addr == page_end - 4) ||
-				((bp->flash_info->buffered) &&
+				((bp->flash_info->flags & BNX2_NV_BUFFERED) &&
 				 (addr == data_end - 4))) {
 
 				cmd_flags |= BNX2_NVM_COMMAND_LAST;
@@ -3808,7 +3824,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
 
 		/* Loop to write back the buffer data from data_end
 		 * to page_end */
-		if (bp->flash_info->buffered == 0) {
+		if (!(bp->flash_info->flags & BNX2_NV_BUFFERED)) {
 			for (addr = data_end; addr < page_end;
 				addr += 4, i += 4) {
 
@@ -4107,7 +4123,7 @@ bnx2_init_chip(struct bnx2 *bp)
 	if (CHIP_NUM(bp) == CHIP_NUM_5708)
 		REG_WR(bp, BNX2_HC_STATS_TICKS, 0);
 	else
-		REG_WR(bp, BNX2_HC_STATS_TICKS, bp->stats_ticks & 0xffff00);
+		REG_WR(bp, BNX2_HC_STATS_TICKS, bp->stats_ticks);
 	REG_WR(bp, BNX2_HC_STAT_COLLECT_TICKS, 0xbb8);  /* 3ms */
 
 	if (CHIP_ID(bp) == CHIP_ID_5706_A1)
@@ -4127,10 +4143,6 @@ bnx2_init_chip(struct bnx2 *bp)
 
 	REG_WR(bp, BNX2_HC_ATTN_BITS_ENABLE, STATUS_ATTN_EVENTS);
 
-	if (REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_FEATURE) &
-	    BNX2_PORT_FEATURE_ASF_ENABLED)
-		bp->flags |= ASF_ENABLE_FLAG;
-
 	/* Initialize the receive filter. */
 	bnx2_set_rx_mode(bp->dev);
 
@@ -5786,8 +5798,9 @@ bnx2_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
 		if (bp->stats_ticks != 0 && bp->stats_ticks != USEC_PER_SEC)
 			bp->stats_ticks = USEC_PER_SEC;
 	}
-	if (bp->stats_ticks > 0xffff00) bp->stats_ticks = 0xffff00;
-	bp->stats_ticks &= 0xffff00;
+	if (bp->stats_ticks > BNX2_HC_STATS_TICKS_HC_STAT_TICKS)
+		bp->stats_ticks = BNX2_HC_STATS_TICKS_HC_STAT_TICKS;
+	bp->stats_ticks &= BNX2_HC_STATS_TICKS_HC_STAT_TICKS;
 
 	if (netif_running(bp->dev)) {
 		bnx2_netif_stop(bp);
@@ -6629,6 +6642,18 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 		if (i != 2)
 			bp->fw_version[j++] = '.';
 	}
+	if (REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_FEATURE) &
+	    BNX2_PORT_FEATURE_ASF_ENABLED) {
+		bp->flags |= ASF_ENABLE_FLAG;
+
+		for (i = 0; i < 30; i++) {
+			reg = REG_RD_IND(bp, bp->shmem_base +
+					     BNX2_BC_STATE_CONDITION);
+			if (reg & BNX2_CONDITION_MFW_RUN_MASK)
+				break;
+			msleep(10);
+		}
+	}
 	reg = REG_RD_IND(bp, bp->shmem_base + BNX2_BC_STATE_CONDITION);
 	reg &= BNX2_CONDITION_MFW_RUN_MASK;
 	if (reg != BNX2_CONDITION_MFW_RUN_UNKNOWN &&
@@ -6672,7 +6697,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 	bp->rx_ticks_int = 18;
 	bp->rx_ticks = 18;
 
-	bp->stats_ticks = 1000000 & 0xffff00;
+	bp->stats_ticks = USEC_PER_SEC & BNX2_HC_STATS_TICKS_HC_STAT_TICKS;
 
 	bp->timer_interval =  HZ;
 	bp->current_interval =  HZ;
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index d8cd1afeb23..102adfe1e92 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -6433,6 +6433,11 @@ struct sw_bd {
 #define ST_MICRO_FLASH_PAGE_SIZE		256
 #define ST_MICRO_FLASH_BASE_TOTAL_SIZE		65536
 
+#define BCM5709_FLASH_PAGE_BITS			8
+#define BCM5709_FLASH_PHY_PAGE_SIZE		(1 << BCM5709_FLASH_PAGE_BITS)
+#define BCM5709_FLASH_BYTE_ADDR_MASK		(BCM5709_FLASH_PHY_PAGE_SIZE-1)
+#define BCM5709_FLASH_PAGE_SIZE			256
+
 #define NVRAM_TIMEOUT_COUNT			30000
 
 
@@ -6449,7 +6454,10 @@ struct flash_spec {
 	u32 config2;
 	u32 config3;
 	u32 write1;
-	u32 buffered;
+	u32 flags;
+#define BNX2_NV_BUFFERED	0x00000001
+#define BNX2_NV_TRANSLATE	0x00000002
+#define BNX2_NV_WREN		0x00000004
 	u32 page_bits;
 	u32 page_size;
 	u32 addr_mask;
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 84aa2117c0e..355c6cf3d11 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -320,7 +320,7 @@ static int eppconfig(struct baycom_state *bc)
 	sprintf(portarg, "%ld", bc->pdev->port->base);
 	printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg);
 
-	return call_usermodehelper(eppconfig_path, argv, envp, 1);
+	return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index 5891a0fbdc8..f87176055d0 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -824,6 +824,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	struct pppol2tp_session *session;
 	struct pppol2tp_tunnel *tunnel;
 	struct udphdr *uh;
+	unsigned int len;
 
 	error = -ENOTCONN;
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -912,14 +913,15 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	}
 
 	/* Queue the packet to IP for output */
+	len = skb->len;
 	error = ip_queue_xmit(skb, 1);
 
 	/* Update stats */
 	if (error >= 0) {
 		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += skb->len;
+		tunnel->stats.tx_bytes += len;
 		session->stats.tx_packets++;
-		session->stats.tx_bytes += skb->len;
+		session->stats.tx_bytes += len;
 	} else {
 		tunnel->stats.tx_errors++;
 		session->stats.tx_errors++;
@@ -958,6 +960,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	__wsum csum = 0;
 	struct sk_buff *skb2 = NULL;
 	struct udphdr *uh;
+	unsigned int len;
 
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
 		goto abort;
@@ -1046,18 +1049,25 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 		printk("\n");
 	}
 
+	memset(&(IPCB(skb2)->opt), 0, sizeof(IPCB(skb2)->opt));
+	IPCB(skb2)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			       IPSKB_REROUTED);
+	nf_reset(skb2);
+
 	/* Get routing info from the tunnel socket */
+	dst_release(skb2->dst);
 	skb2->dst = sk_dst_get(sk_tun);
 
 	/* Queue the packet to IP for output */
+	len = skb2->len;
 	rc = ip_queue_xmit(skb2, 1);
 
 	/* Update stats */
 	if (rc >= 0) {
 		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += skb2->len;
+		tunnel->stats.tx_bytes += len;
 		session->stats.tx_packets++;
-		session->stats.tx_bytes += skb2->len;
+		session->stats.tx_bytes += len;
 	} else {
 		tunnel->stats.tx_errors++;
 		session->stats.tx_errors++;
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c
index 8a667c13fae..b801e3b3a11 100644
--- a/drivers/net/sunvnet.c
+++ b/drivers/net/sunvnet.c
@@ -12,6 +12,7 @@
 #include <linux/netdevice.h>
 #include <linux/ethtool.h>
 #include <linux/etherdevice.h>
+#include <linux/mutex.h>
 
 #include <asm/vio.h>
 #include <asm/ldc.h>
@@ -497,6 +498,8 @@ static void vnet_event(void *arg, int event)
 		vio_link_state_change(vio, event);
 		spin_unlock_irqrestore(&vio->lock, flags);
 
+		if (event == LDC_EVENT_RESET)
+			vio_port_up(vio);
 		return;
 	}
 
@@ -875,6 +878,115 @@ err_out:
 	return err;
 }
 
+static LIST_HEAD(vnet_list);
+static DEFINE_MUTEX(vnet_list_mutex);
+
+static struct vnet * __devinit vnet_new(const u64 *local_mac)
+{
+	struct net_device *dev;
+	struct vnet *vp;
+	int err, i;
+
+	dev = alloc_etherdev(sizeof(*vp));
+	if (!dev) {
+		printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for (i = 0; i < ETH_ALEN; i++)
+		dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff;
+
+	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
+
+	vp = netdev_priv(dev);
+
+	spin_lock_init(&vp->lock);
+	vp->dev = dev;
+
+	INIT_LIST_HEAD(&vp->port_list);
+	for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&vp->port_hash[i]);
+	INIT_LIST_HEAD(&vp->list);
+	vp->local_mac = *local_mac;
+
+	dev->open = vnet_open;
+	dev->stop = vnet_close;
+	dev->set_multicast_list = vnet_set_rx_mode;
+	dev->set_mac_address = vnet_set_mac_addr;
+	dev->tx_timeout = vnet_tx_timeout;
+	dev->ethtool_ops = &vnet_ethtool_ops;
+	dev->watchdog_timeo = VNET_TX_TIMEOUT;
+	dev->change_mtu = vnet_change_mtu;
+	dev->hard_start_xmit = vnet_start_xmit;
+
+	err = register_netdev(dev);
+	if (err) {
+		printk(KERN_ERR PFX "Cannot register net device, "
+		       "aborting.\n");
+		goto err_out_free_dev;
+	}
+
+	printk(KERN_INFO "%s: Sun LDOM vnet ", dev->name);
+
+	for (i = 0; i < 6; i++)
+		printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':');
+
+	list_add(&vp->list, &vnet_list);
+
+	return vp;
+
+err_out_free_dev:
+	free_netdev(dev);
+
+	return ERR_PTR(err);
+}
+
+static struct vnet * __devinit vnet_find_or_create(const u64 *local_mac)
+{
+	struct vnet *iter, *vp;
+
+	mutex_lock(&vnet_list_mutex);
+	vp = NULL;
+	list_for_each_entry(iter, &vnet_list, list) {
+		if (iter->local_mac == *local_mac) {
+			vp = iter;
+			break;
+		}
+	}
+	if (!vp)
+		vp = vnet_new(local_mac);
+	mutex_unlock(&vnet_list_mutex);
+
+	return vp;
+}
+
+static const char *local_mac_prop = "local-mac-address";
+
+static struct vnet * __devinit vnet_find_parent(struct mdesc_handle *hp,
+						u64 port_node)
+{
+	const u64 *local_mac = NULL;
+	u64 a;
+
+	mdesc_for_each_arc(a, hp, port_node, MDESC_ARC_TYPE_BACK) {
+		u64 target = mdesc_arc_target(hp, a);
+		const char *name;
+
+		name = mdesc_get_property(hp, target, "name", NULL);
+		if (!name || strcmp(name, "network"))
+			continue;
+
+		local_mac = mdesc_get_property(hp, target,
+					       local_mac_prop, NULL);
+		if (local_mac)
+			break;
+	}
+	if (!local_mac)
+		return ERR_PTR(-ENODEV);
+
+	return vnet_find_or_create(local_mac);
+}
+
 static struct ldc_channel_config vnet_ldc_cfg = {
 	.event		= vnet_event,
 	.mtu		= 64,
@@ -887,6 +999,14 @@ static struct vio_driver_ops vnet_vio_ops = {
 	.handshake_complete	= vnet_handshake_complete,
 };
 
+static void print_version(void)
+{
+	static int version_printed;
+
+	if (version_printed++ == 0)
+		printk(KERN_INFO "%s", version);
+}
+
 const char *remote_macaddr_prop = "remote-mac-address";
 
 static int __devinit vnet_port_probe(struct vio_dev *vdev,
@@ -899,14 +1019,17 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev,
 	const u64 *rmac;
 	int len, i, err, switch_port;
 
-	vp = dev_get_drvdata(vdev->dev.parent);
-	if (!vp) {
-		printk(KERN_ERR PFX "Cannot find port parent vnet.\n");
-		return -ENODEV;
-	}
+	print_version();
 
 	hp = mdesc_grab();
 
+	vp = vnet_find_parent(hp, vdev->mp);
+	if (IS_ERR(vp)) {
+		printk(KERN_ERR PFX "Cannot find port parent vnet.\n");
+		err = PTR_ERR(vp);
+		goto err_out_put_mdesc;
+	}
+
 	rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
 	err = -ENODEV;
 	if (!rmac) {
@@ -1025,139 +1148,14 @@ static struct vio_driver vnet_port_driver = {
 	}
 };
 
-const char *local_mac_prop = "local-mac-address";
-
-static int __devinit vnet_probe(struct vio_dev *vdev,
-				const struct vio_device_id *id)
-{
-	static int vnet_version_printed;
-	struct mdesc_handle *hp;
-	struct net_device *dev;
-	struct vnet *vp;
-	const u64 *mac;
-	int err, i, len;
-
-	if (vnet_version_printed++ == 0)
-		printk(KERN_INFO "%s", version);
-
-	hp = mdesc_grab();
-
-	mac = mdesc_get_property(hp, vdev->mp, local_mac_prop, &len);
-	if (!mac) {
-		printk(KERN_ERR PFX "vnet lacks %s property.\n",
-		       local_mac_prop);
-		err = -ENODEV;
-		goto err_out;
-	}
-
-	dev = alloc_etherdev(sizeof(*vp));
-	if (!dev) {
-		printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
-		err = -ENOMEM;
-		goto err_out;
-	}
-
-	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = (*mac >> (5 - i) * 8) & 0xff;
-
-	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
-
-	SET_NETDEV_DEV(dev, &vdev->dev);
-
-	vp = netdev_priv(dev);
-
-	spin_lock_init(&vp->lock);
-	vp->dev = dev;
-	vp->vdev = vdev;
-
-	INIT_LIST_HEAD(&vp->port_list);
-	for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
-		INIT_HLIST_HEAD(&vp->port_hash[i]);
-
-	dev->open = vnet_open;
-	dev->stop = vnet_close;
-	dev->set_multicast_list = vnet_set_rx_mode;
-	dev->set_mac_address = vnet_set_mac_addr;
-	dev->tx_timeout = vnet_tx_timeout;
-	dev->ethtool_ops = &vnet_ethtool_ops;
-	dev->watchdog_timeo = VNET_TX_TIMEOUT;
-	dev->change_mtu = vnet_change_mtu;
-	dev->hard_start_xmit = vnet_start_xmit;
-
-	err = register_netdev(dev);
-	if (err) {
-		printk(KERN_ERR PFX "Cannot register net device, "
-		       "aborting.\n");
-		goto err_out_free_dev;
-	}
-
-	printk(KERN_INFO "%s: Sun LDOM vnet ", dev->name);
-
-	for (i = 0; i < 6; i++)
-		printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':');
-
-	dev_set_drvdata(&vdev->dev, vp);
-
-	mdesc_release(hp);
-
-	return 0;
-
-err_out_free_dev:
-	free_netdev(dev);
-
-err_out:
-	mdesc_release(hp);
-	return err;
-}
-
-static int vnet_remove(struct vio_dev *vdev)
-{
-
-	struct vnet *vp = dev_get_drvdata(&vdev->dev);
-
-	if (vp) {
-		/* XXX unregister port, or at least check XXX */
-		unregister_netdevice(vp->dev);
-		dev_set_drvdata(&vdev->dev, NULL);
-	}
-	return 0;
-}
-
-static struct vio_device_id vnet_match[] = {
-	{
-		.type = "network",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(vio, vnet_match);
-
-static struct vio_driver vnet_driver = {
-	.id_table	= vnet_match,
-	.probe		= vnet_probe,
-	.remove		= vnet_remove,
-	.driver		= {
-		.name	= "vnet",
-		.owner	= THIS_MODULE,
-	}
-};
-
 static int __init vnet_init(void)
 {
-	int err = vio_register_driver(&vnet_driver);
-
-	if (!err) {
-		err = vio_register_driver(&vnet_port_driver);
-		if (err)
-			vio_unregister_driver(&vnet_driver);
-	}
-
-	return err;
+	return vio_register_driver(&vnet_port_driver);
 }
 
 static void __exit vnet_exit(void)
 {
 	vio_unregister_driver(&vnet_port_driver);
-	vio_unregister_driver(&vnet_driver);
 }
 
 module_init(vnet_init);
diff --git a/drivers/net/sunvnet.h b/drivers/net/sunvnet.h
index 1c887302d46..7d3a0cac727 100644
--- a/drivers/net/sunvnet.h
+++ b/drivers/net/sunvnet.h
@@ -60,11 +60,13 @@ struct vnet {
 	struct net_device	*dev;
 
 	u32			msg_enable;
-	struct vio_dev		*vdev;
 
 	struct list_head	port_list;
 
 	struct hlist_head	port_hash[VNET_PORT_HASH_SIZE];
+
+	struct list_head	list;
+	u64			local_mac;
 };
 
 #endif /* _SUNVNET_H */
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
new file mode 100644
index 00000000000..489f69c5d6c
--- /dev/null
+++ b/drivers/net/xen-netfront.c
@@ -0,0 +1,1863 @@
+/*
+ * Virtual network driver for conversing with remote driver backends.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005, XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/if_ether.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/moduleparam.h>
+#include <linux/mm.h>
+#include <net/ip.h>
+
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <xen/interface/io/netif.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/grant_table.h>
+
+static struct ethtool_ops xennet_ethtool_ops;
+
+struct netfront_cb {
+	struct page *page;
+	unsigned offset;
+};
+
+#define NETFRONT_SKB_CB(skb)	((struct netfront_cb *)((skb)->cb))
+
+#define RX_COPY_THRESHOLD 256
+
+#define GRANT_INVALID_REF	0
+
+#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+
+struct netfront_info {
+	struct list_head list;
+	struct net_device *netdev;
+
+	struct net_device_stats stats;
+
+	struct xen_netif_tx_front_ring tx;
+	struct xen_netif_rx_front_ring rx;
+
+	spinlock_t   tx_lock;
+	spinlock_t   rx_lock;
+
+	unsigned int evtchn;
+
+	/* Receive-ring batched refills. */
+#define RX_MIN_TARGET 8
+#define RX_DFL_MIN_TARGET 64
+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+	unsigned rx_min_target, rx_max_target, rx_target;
+	struct sk_buff_head rx_batch;
+
+	struct timer_list rx_refill_timer;
+
+	/*
+	 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
+	 * are linked from tx_skb_freelist through skb_entry.link.
+	 *
+	 *  NB. Freelist index entries are always going to be less than
+	 *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
+	 *  greater than PAGE_OFFSET: we use this property to distinguish
+	 *  them.
+	 */
+	union skb_entry {
+		struct sk_buff *skb;
+		unsigned link;
+	} tx_skbs[NET_TX_RING_SIZE];
+	grant_ref_t gref_tx_head;
+	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
+	unsigned tx_skb_freelist;
+
+	struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
+	grant_ref_t gref_rx_head;
+	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
+
+	struct xenbus_device *xbdev;
+	int tx_ring_ref;
+	int rx_ring_ref;
+
+	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
+	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
+	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+};
+
+struct netfront_rx_info {
+	struct xen_netif_rx_response rx;
+	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+};
+
+/*
+ * Access macros for acquiring freeing slots in tx_skbs[].
+ */
+
+static void add_id_to_freelist(unsigned *head, union skb_entry *list,
+			       unsigned short id)
+{
+	list[id].link = *head;
+	*head = id;
+}
+
+static unsigned short get_id_from_freelist(unsigned *head,
+					   union skb_entry *list)
+{
+	unsigned int id = *head;
+	*head = list[id].link;
+	return id;
+}
+
+static int xennet_rxidx(RING_IDX idx)
+{
+	return idx & (NET_RX_RING_SIZE - 1);
+}
+
+static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
+					 RING_IDX ri)
+{
+	int i = xennet_rxidx(ri);
+	struct sk_buff *skb = np->rx_skbs[i];
+	np->rx_skbs[i] = NULL;
+	return skb;
+}
+
+static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
+					    RING_IDX ri)
+{
+	int i = xennet_rxidx(ri);
+	grant_ref_t ref = np->grant_rx_ref[i];
+	np->grant_rx_ref[i] = GRANT_INVALID_REF;
+	return ref;
+}
+
+#ifdef CONFIG_SYSFS
+static int xennet_sysfs_addif(struct net_device *netdev);
+static void xennet_sysfs_delif(struct net_device *netdev);
+#else /* !CONFIG_SYSFS */
+#define xennet_sysfs_addif(dev) (0)
+#define xennet_sysfs_delif(dev) do { } while (0)
+#endif
+
+static int xennet_can_sg(struct net_device *dev)
+{
+	return dev->features & NETIF_F_SG;
+}
+
+
+static void rx_refill_timeout(unsigned long data)
+{
+	struct net_device *dev = (struct net_device *)data;
+	netif_rx_schedule(dev);
+}
+
+static int netfront_tx_slot_available(struct netfront_info *np)
+{
+	return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
+		(TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
+}
+
+static void xennet_maybe_wake_tx(struct net_device *dev)
+{
+	struct netfront_info *np = netdev_priv(dev);
+
+	if (unlikely(netif_queue_stopped(dev)) &&
+	    netfront_tx_slot_available(np) &&
+	    likely(netif_running(dev)))
+		netif_wake_queue(dev);
+}
+
+static void xennet_alloc_rx_buffers(struct net_device *dev)
+{
+	unsigned short id;
+	struct netfront_info *np = netdev_priv(dev);
+	struct sk_buff *skb;
+	struct page *page;
+	int i, batch_target, notify;
+	RING_IDX req_prod = np->rx.req_prod_pvt;
+	struct xen_memory_reservation reservation;
+	grant_ref_t ref;
+	unsigned long pfn;
+	void *vaddr;
+	int nr_flips;
+	struct xen_netif_rx_request *req;
+
+	if (unlikely(!netif_carrier_ok(dev)))
+		return;
+
+	/*
+	 * Allocate skbuffs greedily, even though we batch updates to the
+	 * receive ring. This creates a less bursty demand on the memory
+	 * allocator, so should reduce the chance of failed allocation requests
+	 * both for ourself and for other kernel subsystems.
+	 */
+	batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
+	for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
+		skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD,
+					 GFP_ATOMIC | __GFP_NOWARN);
+		if (unlikely(!skb))
+			goto no_skb;
+
+		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+		if (!page) {
+			kfree_skb(skb);
+no_skb:
+			/* Any skbuffs queued for refill? Force them out. */
+			if (i != 0)
+				goto refill;
+			/* Could not allocate any skbuffs. Try again later. */
+			mod_timer(&np->rx_refill_timer,
+				  jiffies + (HZ/10));
+			break;
+		}
+
+		skb_shinfo(skb)->frags[0].page = page;
+		skb_shinfo(skb)->nr_frags = 1;
+		__skb_queue_tail(&np->rx_batch, skb);
+	}
+
+	/* Is the batch large enough to be worthwhile? */
+	if (i < (np->rx_target/2)) {
+		if (req_prod > np->rx.sring->req_prod)
+			goto push;
+		return;
+	}
+
+	/* Adjust our fill target if we risked running out of buffers. */
+	if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
+	    ((np->rx_target *= 2) > np->rx_max_target))
+		np->rx_target = np->rx_max_target;
+
+ refill:
+	for (nr_flips = i = 0; ; i++) {
+		skb = __skb_dequeue(&np->rx_batch);
+		if (skb == NULL)
+			break;
+
+		skb->dev = dev;
+
+		id = xennet_rxidx(req_prod + i);
+
+		BUG_ON(np->rx_skbs[id]);
+		np->rx_skbs[id] = skb;
+
+		ref = gnttab_claim_grant_reference(&np->gref_rx_head);
+		BUG_ON((signed short)ref < 0);
+		np->grant_rx_ref[id] = ref;
+
+		pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
+		vaddr = page_address(skb_shinfo(skb)->frags[0].page);
+
+		req = RING_GET_REQUEST(&np->rx, req_prod + i);
+		gnttab_grant_foreign_access_ref(ref,
+						np->xbdev->otherend_id,
+						pfn_to_mfn(pfn),
+						0);
+
+		req->id = id;
+		req->gref = ref;
+	}
+
+	if (nr_flips != 0) {
+		reservation.extent_start = np->rx_pfn_array;
+		reservation.nr_extents   = nr_flips;
+		reservation.extent_order = 0;
+		reservation.address_bits = 0;
+		reservation.domid        = DOMID_SELF;
+
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			/* After all PTEs have been zapped, flush the TLB. */
+			np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+				UVMF_TLB_FLUSH|UVMF_ALL;
+
+			/* Give away a batch of pages. */
+			np->rx_mcl[i].op = __HYPERVISOR_memory_op;
+			np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+			np->rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+			/* Zap PTEs and give away pages in one big
+			 * multicall. */
+			(void)HYPERVISOR_multicall(np->rx_mcl, i+1);
+
+			/* Check return status of HYPERVISOR_memory_op(). */
+			if (unlikely(np->rx_mcl[i].result != i))
+				panic("Unable to reduce memory reservation\n");
+		} else {
+			if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+						 &reservation) != i)
+				panic("Unable to reduce memory reservation\n");
+		}
+	} else {
+		wmb();		/* barrier so backend seens requests */
+	}
+
+	/* Above is a suitable barrier to ensure backend will see requests. */
+	np->rx.req_prod_pvt = req_prod + i;
+ push:
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
+	if (notify)
+		notify_remote_via_irq(np->netdev->irq);
+}
+
+static int xennet_open(struct net_device *dev)
+{
+	struct netfront_info *np = netdev_priv(dev);
+
+	memset(&np->stats, 0, sizeof(np->stats));
+
+	spin_lock_bh(&np->rx_lock);
+	if (netif_carrier_ok(dev)) {
+		xennet_alloc_rx_buffers(dev);
+		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
+		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+			netif_rx_schedule(dev);
+	}
+	spin_unlock_bh(&np->rx_lock);
+
+	xennet_maybe_wake_tx(dev);
+
+	return 0;
+}
+
+static void xennet_tx_buf_gc(struct net_device *dev)
+{
+	RING_IDX cons, prod;
+	unsigned short id;
+	struct netfront_info *np = netdev_priv(dev);
+	struct sk_buff *skb;
+
+	BUG_ON(!netif_carrier_ok(dev));
+
+	do {
+		prod = np->tx.sring->rsp_prod;
+		rmb(); /* Ensure we see responses up to 'rp'. */
+
+		for (cons = np->tx.rsp_cons; cons != prod; cons++) {
+			struct xen_netif_tx_response *txrsp;
+
+			txrsp = RING_GET_RESPONSE(&np->tx, cons);
+			if (txrsp->status == NETIF_RSP_NULL)
+				continue;
+
+			id  = txrsp->id;
+			skb = np->tx_skbs[id].skb;
+			if (unlikely(gnttab_query_foreign_access(
+				np->grant_tx_ref[id]) != 0)) {
+				printk(KERN_ALERT "xennet_tx_buf_gc: warning "
+				       "-- grant still in use by backend "
+				       "domain.\n");
+				BUG();
+			}
+			gnttab_end_foreign_access_ref(
+				np->grant_tx_ref[id], GNTMAP_readonly);
+			gnttab_release_grant_reference(
+				&np->gref_tx_head, np->grant_tx_ref[id]);
+			np->grant_tx_ref[id] = GRANT_INVALID_REF;
+			add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
+			dev_kfree_skb_irq(skb);
+		}
+
+		np->tx.rsp_cons = prod;
+
+		/*
+		 * Set a new event, then check for race with update of tx_cons.
+		 * Note that it is essential to schedule a callback, no matter
+		 * how few buffers are pending. Even if there is space in the
+		 * transmit ring, higher layers may be blocked because too much
+		 * data is outstanding: in such cases notification from Xen is
+		 * likely to be the only kick that we'll get.
+		 */
+		np->tx.sring->rsp_event =
+			prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
+		mb();		/* update shared area */
+	} while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
+
+	xennet_maybe_wake_tx(dev);
+}
+
+static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+			      struct xen_netif_tx_request *tx)
+{
+	struct netfront_info *np = netdev_priv(dev);
+	char *data = skb->data;
+	unsigned long mfn;
+	RING_IDX prod = np->tx.req_prod_pvt;
+	int frags = skb_shinfo(skb)->nr_frags;
+	unsigned int offset = offset_in_page(data);
+	unsigned int len = skb_headlen(skb);
+	unsigned int id;
+	grant_ref_t ref;
+	int i;
+
+	/* While the header overlaps a page boundary (including being
+	   larger than a page), split it it into page-sized chunks. */
+	while (len > PAGE_SIZE - offset) {
+		tx->size = PAGE_SIZE - offset;
+		tx->flags |= NETTXF_more_data;
+		len -= tx->size;
+		data += tx->size;
+		offset = 0;
+
+		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+		np->tx_skbs[id].skb = skb_get(skb);
+		tx = RING_GET_REQUEST(&np->tx, prod++);
+		tx->id = id;
+		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+		BUG_ON((signed short)ref < 0);
+
+		mfn = virt_to_mfn(data);
+		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+						mfn, GNTMAP_readonly);
+
+		tx->gref = np->grant_tx_ref[id] = ref;
+		tx->offset = offset;
+		tx->size = len;
+		tx->flags = 0;
+	}
+
+	/* Grant backend access to each skb fragment page. */
+	for (i = 0; i < frags; i++) {
+		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+
+		tx->flags |= NETTXF_more_data;
+
+		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+		np->tx_skbs[id].skb = skb_get(skb);
+		tx = RING_GET_REQUEST(&np->tx, prod++);
+		tx->id = id;
+		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+		BUG_ON((signed short)ref < 0);
+
+		mfn = pfn_to_mfn(page_to_pfn(frag->page));
+		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+						mfn, GNTMAP_readonly);
+
+		tx->gref = np->grant_tx_ref[id] = ref;
+		tx->offset = frag->page_offset;
+		tx->size = frag->size;
+		tx->flags = 0;
+	}
+
+	np->tx.req_prod_pvt = prod;
+}
+
+static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	unsigned short id;
+	struct netfront_info *np = netdev_priv(dev);
+	struct xen_netif_tx_request *tx;
+	struct xen_netif_extra_info *extra;
+	char *data = skb->data;
+	RING_IDX i;
+	grant_ref_t ref;
+	unsigned long mfn;
+	int notify;
+	int frags = skb_shinfo(skb)->nr_frags;
+	unsigned int offset = offset_in_page(data);
+	unsigned int len = skb_headlen(skb);
+
+	frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
+	if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
+		printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
+		       frags);
+		dump_stack();
+		goto drop;
+	}
+
+	spin_lock_irq(&np->tx_lock);
+
+	if (unlikely(!netif_carrier_ok(dev) ||
+		     (frags > 1 && !xennet_can_sg(dev)) ||
+		     netif_needs_gso(dev, skb))) {
+		spin_unlock_irq(&np->tx_lock);
+		goto drop;
+	}
+
+	i = np->tx.req_prod_pvt;
+
+	id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+	np->tx_skbs[id].skb = skb;
+
+	tx = RING_GET_REQUEST(&np->tx, i);
+
+	tx->id   = id;
+	ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+	BUG_ON((signed short)ref < 0);
+	mfn = virt_to_mfn(data);
+	gnttab_grant_foreign_access_ref(
+		ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
+	tx->gref = np->grant_tx_ref[id] = ref;
+	tx->offset = offset;
+	tx->size = len;
+	extra = NULL;
+
+	tx->flags = 0;
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		/* local packet? */
+		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+		/* remote but checksummed. */
+		tx->flags |= NETTXF_data_validated;
+
+	if (skb_shinfo(skb)->gso_size) {
+		struct xen_netif_extra_info *gso;
+
+		gso = (struct xen_netif_extra_info *)
+			RING_GET_REQUEST(&np->tx, ++i);
+
+		if (extra)
+			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+		else
+			tx->flags |= NETTXF_extra_info;
+
+		gso->u.gso.size = skb_shinfo(skb)->gso_size;
+		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+		gso->u.gso.pad = 0;
+		gso->u.gso.features = 0;
+
+		gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+		gso->flags = 0;
+		extra = gso;
+	}
+
+	np->tx.req_prod_pvt = i + 1;
+
+	xennet_make_frags(skb, dev, tx);
+	tx->size = skb->len;
+
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
+	if (notify)
+		notify_remote_via_irq(np->netdev->irq);
+
+	xennet_tx_buf_gc(dev);
+
+	if (!netfront_tx_slot_available(np))
+		netif_stop_queue(dev);
+
+	spin_unlock_irq(&np->tx_lock);
+
+	np->stats.tx_bytes += skb->len;
+	np->stats.tx_packets++;
+
+	return 0;
+
+ drop:
+	np->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+static int xennet_close(struct net_device *dev)
+{
+	struct netfront_info *np = netdev_priv(dev);
+	netif_stop_queue(np->netdev);
+	return 0;
+}
+
+static struct net_device_stats *xennet_get_stats(struct net_device *dev)
+{
+	struct netfront_info *np = netdev_priv(dev);
+	return &np->stats;
+}
+
+static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
+				grant_ref_t ref)
+{
+	int new = xennet_rxidx(np->rx.req_prod_pvt);
+
+	BUG_ON(np->rx_skbs[new]);
+	np->rx_skbs[new] = skb;
+	np->grant_rx_ref[new] = ref;
+	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+	np->rx.req_prod_pvt++;
+}
+
+static int xennet_get_extras(struct netfront_info *np,
+			     struct xen_netif_extra_info *extras,
+			     RING_IDX rp)
+
+{
+	struct xen_netif_extra_info *extra;
+	struct device *dev = &np->netdev->dev;
+	RING_IDX cons = np->rx.rsp_cons;
+	int err = 0;
+
+	do {
+		struct sk_buff *skb;
+		grant_ref_t ref;
+
+		if (unlikely(cons + 1 == rp)) {
+			if (net_ratelimit())
+				dev_warn(dev, "Missing extra info\n");
+			err = -EBADR;
+			break;
+		}
+
+		extra = (struct xen_netif_extra_info *)
+			RING_GET_RESPONSE(&np->rx, ++cons);
+
+		if (unlikely(!extra->type ||
+			     extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+			if (net_ratelimit())
+				dev_warn(dev, "Invalid extra type: %d\n",
+					extra->type);
+			err = -EINVAL;
+		} else {
+			memcpy(&extras[extra->type - 1], extra,
+			       sizeof(*extra));
+		}
+
+		skb = xennet_get_rx_skb(np, cons);
+		ref = xennet_get_rx_ref(np, cons);
+		xennet_move_rx_slot(np, skb, ref);
+	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+	np->rx.rsp_cons = cons;
+	return err;
+}
+
+static int xennet_get_responses(struct netfront_info *np,
+				struct netfront_rx_info *rinfo, RING_IDX rp,
+				struct sk_buff_head *list)
+{
+	struct xen_netif_rx_response *rx = &rinfo->rx;
+	struct xen_netif_extra_info *extras = rinfo->extras;
+	struct device *dev = &np->netdev->dev;
+	RING_IDX cons = np->rx.rsp_cons;
+	struct sk_buff *skb = xennet_get_rx_skb(np, cons);
+	grant_ref_t ref = xennet_get_rx_ref(np, cons);
+	int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
+	int frags = 1;
+	int err = 0;
+	unsigned long ret;
+
+	if (rx->flags & NETRXF_extra_info) {
+		err = xennet_get_extras(np, extras, rp);
+		cons = np->rx.rsp_cons;
+	}
+
+	for (;;) {
+		if (unlikely(rx->status < 0 ||
+			     rx->offset + rx->status > PAGE_SIZE)) {
+			if (net_ratelimit())
+				dev_warn(dev, "rx->offset: %x, size: %u\n",
+					 rx->offset, rx->status);
+			xennet_move_rx_slot(np, skb, ref);
+			err = -EINVAL;
+			goto next;
+		}
+
+		/*
+		 * This definitely indicates a bug, either in this driver or in
+		 * the backend driver. In future this should flag the bad
+		 * situation to the system controller to reboot the backed.
+		 */
+		if (ref == GRANT_INVALID_REF) {
+			if (net_ratelimit())
+				dev_warn(dev, "Bad rx response id %d.\n",
+					 rx->id);
+			err = -EINVAL;
+			goto next;
+		}
+
+		ret = gnttab_end_foreign_access_ref(ref, 0);
+		BUG_ON(!ret);
+
+		gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+		__skb_queue_tail(list, skb);
+
+next:
+		if (!(rx->flags & NETRXF_more_data))
+			break;
+
+		if (cons + frags == rp) {
+			if (net_ratelimit())
+				dev_warn(dev, "Need more frags\n");
+			err = -ENOENT;
+			break;
+		}
+
+		rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+		skb = xennet_get_rx_skb(np, cons + frags);
+		ref = xennet_get_rx_ref(np, cons + frags);
+		frags++;
+	}
+
+	if (unlikely(frags > max)) {
+		if (net_ratelimit())
+			dev_warn(dev, "Too many frags\n");
+		err = -E2BIG;
+	}
+
+	if (unlikely(err))
+		np->rx.rsp_cons = cons + frags;
+
+	return err;
+}
+
+static int xennet_set_skb_gso(struct sk_buff *skb,
+			      struct xen_netif_extra_info *gso)
+{
+	if (!gso->u.gso.size) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "GSO size must not be zero.\n");
+		return -EINVAL;
+	}
+
+	/* Currently only TCPv4 S.O. is supported. */
+	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type);
+		return -EINVAL;
+	}
+
+	skb_shinfo(skb)->gso_size = gso->u.gso.size;
+	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+	/* Header must be checked, and gso_segs computed. */
+	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb)->gso_segs = 0;
+
+	return 0;
+}
+
+static RING_IDX xennet_fill_frags(struct netfront_info *np,
+				  struct sk_buff *skb,
+				  struct sk_buff_head *list)
+{
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	int nr_frags = shinfo->nr_frags;
+	RING_IDX cons = np->rx.rsp_cons;
+	skb_frag_t *frag = shinfo->frags + nr_frags;
+	struct sk_buff *nskb;
+
+	while ((nskb = __skb_dequeue(list))) {
+		struct xen_netif_rx_response *rx =
+			RING_GET_RESPONSE(&np->rx, ++cons);
+
+		frag->page = skb_shinfo(nskb)->frags[0].page;
+		frag->page_offset = rx->offset;
+		frag->size = rx->status;
+
+		skb->data_len += rx->status;
+
+		skb_shinfo(nskb)->nr_frags = 0;
+		kfree_skb(nskb);
+
+		frag++;
+		nr_frags++;
+	}
+
+	shinfo->nr_frags = nr_frags;
+	return cons;
+}
+
+static int skb_checksum_setup(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	unsigned char *th;
+	int err = -EPROTO;
+
+	if (skb->protocol != htons(ETH_P_IP))
+		goto out;
+
+	iph = (void *)skb->data;
+	th = skb->data + 4 * iph->ihl;
+	if (th >= skb_tail_pointer(skb))
+		goto out;
+
+	skb->csum_start = th - skb->head;
+	switch (iph->protocol) {
+	case IPPROTO_TCP:
+		skb->csum_offset = offsetof(struct tcphdr, check);
+		break;
+	case IPPROTO_UDP:
+		skb->csum_offset = offsetof(struct udphdr, check);
+		break;
+	default:
+		if (net_ratelimit())
+			printk(KERN_ERR "Attempting to checksum a non-"
+			       "TCP/UDP packet, dropping a protocol"
+			       " %d packet", iph->protocol);
+		goto out;
+	}
+
+	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
+		goto out;
+
+	err = 0;
+
+out:
+	return err;
+}
+
+static int handle_incoming_queue(struct net_device *dev,
+				  struct sk_buff_head *rxq)
+{
+	struct netfront_info *np = netdev_priv(dev);
+	int packets_dropped = 0;
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(rxq)) != NULL) {
+		struct page *page = NETFRONT_SKB_CB(skb)->page;
+		void *vaddr = page_address(page);
+		unsigned offset = NETFRONT_SKB_CB(skb)->offset;
+
+		memcpy(skb->data, vaddr + offset,
+		       skb_headlen(skb));
+
+		if (page != skb_shinfo(skb)->frags[0].page)
+			__free_page(page);
+
+		/* Ethernet work: Delayed to here as it peeks the header. */
+		skb->protocol = eth_type_trans(skb, dev);
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			if (skb_checksum_setup(skb)) {
+				kfree_skb(skb);
+				packets_dropped++;
+				np->stats.rx_errors++;
+				continue;
+			}
+		}
+
+		np->stats.rx_packets++;
+		np->stats.rx_bytes += skb->len;
+
+		/* Pass it up. */
+		netif_receive_skb(skb);
+		dev->last_rx = jiffies;
+	}
+
+	return packets_dropped;
+}
+
+static int xennet_poll(struct net_device *dev, int *pbudget)
+{
+	struct netfront_info *np = netdev_priv(dev);
+	struct sk_buff *skb;
+	struct netfront_rx_info rinfo;
+	struct xen_netif_rx_response *rx = &rinfo.rx;
+	struct xen_netif_extra_info *extras = rinfo.extras;
+	RING_IDX i, rp;
+	int work_done, budget, more_to_do = 1;
+	struct sk_buff_head rxq;
+	struct sk_buff_head errq;
+	struct sk_buff_head tmpq;
+	unsigned long flags;
+	unsigned int len;
+	int err;
+
+	spin_lock(&np->rx_lock);
+
+	if (unlikely(!netif_carrier_ok(dev))) {
+		spin_unlock(&np->rx_lock);
+		return 0;
+	}
+
+	skb_queue_head_init(&rxq);
+	skb_queue_head_init(&errq);
+	skb_queue_head_init(&tmpq);
+
+	budget = *pbudget;
+	if (budget > dev->quota)
+		budget = dev->quota;
+	rp = np->rx.sring->rsp_prod;
+	rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+	i = np->rx.rsp_cons;
+	work_done = 0;
+	while ((i != rp) && (work_done < budget)) {
+		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+		memset(extras, 0, sizeof(rinfo.extras));
+
+		err = xennet_get_responses(np, &rinfo, rp, &tmpq);
+
+		if (unlikely(err)) {
+err:
+			while ((skb = __skb_dequeue(&tmpq)))
+				__skb_queue_tail(&errq, skb);
+			np->stats.rx_errors++;
+			i = np->rx.rsp_cons;
+			continue;
+		}
+
+		skb = __skb_dequeue(&tmpq);
+
+		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+			struct xen_netif_extra_info *gso;
+			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+			if (unlikely(xennet_set_skb_gso(skb, gso))) {
+				__skb_queue_head(&tmpq, skb);
+				np->rx.rsp_cons += skb_queue_len(&tmpq);
+				goto err;
+			}
+		}
+
+		NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
+		NETFRONT_SKB_CB(skb)->offset = rx->offset;
+
+		len = rx->status;
+		if (len > RX_COPY_THRESHOLD)
+			len = RX_COPY_THRESHOLD;
+		skb_put(skb, len);
+
+		if (rx->status > len) {
+			skb_shinfo(skb)->frags[0].page_offset =
+				rx->offset + len;
+			skb_shinfo(skb)->frags[0].size = rx->status - len;
+			skb->data_len = rx->status - len;
+		} else {
+			skb_shinfo(skb)->frags[0].page = NULL;
+			skb_shinfo(skb)->nr_frags = 0;
+		}
+
+		i = xennet_fill_frags(np, skb, &tmpq);
+
+		/*
+		 * Truesize approximates the size of true data plus
+		 * any supervisor overheads. Adding hypervisor
+		 * overheads has been shown to significantly reduce
+		 * achievable bandwidth with the default receive
+		 * buffer size. It is therefore not wise to account
+		 * for it here.
+		 *
+		 * After alloc_skb(RX_COPY_THRESHOLD), truesize is set
+		 * to RX_COPY_THRESHOLD + the supervisor
+		 * overheads. Here, we add the size of the data pulled
+		 * in xennet_fill_frags().
+		 *
+		 * We also adjust for any unused space in the main
+		 * data area by subtracting (RX_COPY_THRESHOLD -
+		 * len). This is especially important with drivers
+		 * which split incoming packets into header and data,
+		 * using only 66 bytes of the main data area (see the
+		 * e1000 driver for example.)  On such systems,
+		 * without this last adjustement, our achievable
+		 * receive throughout using the standard receive
+		 * buffer size was cut by 25%(!!!).
+		 */
+		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
+		skb->len += skb->data_len;
+
+		if (rx->flags & NETRXF_csum_blank)
+			skb->ip_summed = CHECKSUM_PARTIAL;
+		else if (rx->flags & NETRXF_data_validated)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+		__skb_queue_tail(&rxq, skb);
+
+		np->rx.rsp_cons = ++i;
+		work_done++;
+	}
+
+	while ((skb = __skb_dequeue(&errq)))
+		kfree_skb(skb);
+
+	work_done -= handle_incoming_queue(dev, &rxq);
+
+	/* If we get a callback with very few responses, reduce fill target. */
+	/* NB. Note exponential increase, linear decrease. */
+	if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
+	     ((3*np->rx_target) / 4)) &&
+	    (--np->rx_target < np->rx_min_target))
+		np->rx_target = np->rx_min_target;
+
+	xennet_alloc_rx_buffers(dev);
+
+	*pbudget   -= work_done;
+	dev->quota -= work_done;
+
+	if (work_done < budget) {
+		local_irq_save(flags);
+
+		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
+		if (!more_to_do)
+			__netif_rx_complete(dev);
+
+		local_irq_restore(flags);
+	}
+
+	spin_unlock(&np->rx_lock);
+
+	return more_to_do;
+}
+
+static int xennet_change_mtu(struct net_device *dev, int mtu)
+{
+	int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+	if (mtu > max)
+		return -EINVAL;
+	dev->mtu = mtu;
+	return 0;
+}
+
+static void xennet_release_tx_bufs(struct netfront_info *np)
+{
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < NET_TX_RING_SIZE; i++) {
+		/* Skip over entries which are actually freelist references */
+		if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET)
+			continue;
+
+		skb = np->tx_skbs[i].skb;
+		gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
+					      GNTMAP_readonly);
+		gnttab_release_grant_reference(&np->gref_tx_head,
+					       np->grant_tx_ref[i]);
+		np->grant_tx_ref[i] = GRANT_INVALID_REF;
+		add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
+		dev_kfree_skb_irq(skb);
+	}
+}
+
+static void xennet_release_rx_bufs(struct netfront_info *np)
+{
+	struct mmu_update      *mmu = np->rx_mmu;
+	struct multicall_entry *mcl = np->rx_mcl;
+	struct sk_buff_head free_list;
+	struct sk_buff *skb;
+	unsigned long mfn;
+	int xfer = 0, noxfer = 0, unused = 0;
+	int id, ref;
+
+	dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n",
+			 __func__);
+	return;
+
+	skb_queue_head_init(&free_list);
+
+	spin_lock_bh(&np->rx_lock);
+
+	for (id = 0; id < NET_RX_RING_SIZE; id++) {
+		ref = np->grant_rx_ref[id];
+		if (ref == GRANT_INVALID_REF) {
+			unused++;
+			continue;
+		}
+
+		skb = np->rx_skbs[id];
+		mfn = gnttab_end_foreign_transfer_ref(ref);
+		gnttab_release_grant_reference(&np->gref_rx_head, ref);
+		np->grant_rx_ref[id] = GRANT_INVALID_REF;
+
+		if (0 == mfn) {
+			skb_shinfo(skb)->nr_frags = 0;
+			dev_kfree_skb(skb);
+			noxfer++;
+			continue;
+		}
+
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			/* Remap the page. */
+			struct page *page = skb_shinfo(skb)->frags[0].page;
+			unsigned long pfn = page_to_pfn(page);
+			void *vaddr = page_address(page);
+
+			MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
+						mfn_pte(mfn, PAGE_KERNEL),
+						0);
+			mcl++;
+			mmu->ptr = ((u64)mfn << PAGE_SHIFT)
+				| MMU_MACHPHYS_UPDATE;
+			mmu->val = pfn;
+			mmu++;
+
+			set_phys_to_machine(pfn, mfn);
+		}
+		__skb_queue_tail(&free_list, skb);
+		xfer++;
+	}
+
+	dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
+		 __func__, xfer, noxfer, unused);
+
+	if (xfer) {
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+			/* Do all the remapping work and M2P updates. */
+			MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
+					 0, DOMID_SELF);
+			mcl++;
+			HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
+		}
+	}
+
+	while ((skb = __skb_dequeue(&free_list)) != NULL)
+		dev_kfree_skb(skb);
+
+	spin_unlock_bh(&np->rx_lock);
+}
+
+static void xennet_uninit(struct net_device *dev)
+{
+	struct netfront_info *np = netdev_priv(dev);
+	xennet_release_tx_bufs(np);
+	xennet_release_rx_bufs(np);
+	gnttab_free_grant_references(np->gref_tx_head);
+	gnttab_free_grant_references(np->gref_rx_head);
+}
+
+static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev)
+{
+	int i, err;
+	struct net_device *netdev;
+	struct netfront_info *np;
+
+	netdev = alloc_etherdev(sizeof(struct netfront_info));
+	if (!netdev) {
+		printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
+		       __func__);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	np                   = netdev_priv(netdev);
+	np->xbdev            = dev;
+
+	spin_lock_init(&np->tx_lock);
+	spin_lock_init(&np->rx_lock);
+
+	skb_queue_head_init(&np->rx_batch);
+	np->rx_target     = RX_DFL_MIN_TARGET;
+	np->rx_min_target = RX_DFL_MIN_TARGET;
+	np->rx_max_target = RX_MAX_TARGET;
+
+	init_timer(&np->rx_refill_timer);
+	np->rx_refill_timer.data = (unsigned long)netdev;
+	np->rx_refill_timer.function = rx_refill_timeout;
+
+	/* Initialise tx_skbs as a free chain containing every entry. */
+	np->tx_skb_freelist = 0;
+	for (i = 0; i < NET_TX_RING_SIZE; i++) {
+		np->tx_skbs[i].link = i+1;
+		np->grant_tx_ref[i] = GRANT_INVALID_REF;
+	}
+
+	/* Clear out rx_skbs */
+	for (i = 0; i < NET_RX_RING_SIZE; i++) {
+		np->rx_skbs[i] = NULL;
+		np->grant_rx_ref[i] = GRANT_INVALID_REF;
+	}
+
+	/* A grant for every tx ring slot */
+	if (gnttab_alloc_grant_references(TX_MAX_TARGET,
+					  &np->gref_tx_head) < 0) {
+		printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+		err = -ENOMEM;
+		goto exit;
+	}
+	/* A grant for every rx ring slot */
+	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+					  &np->gref_rx_head) < 0) {
+		printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+		err = -ENOMEM;
+		goto exit_free_tx;
+	}
+
+	netdev->open            = xennet_open;
+	netdev->hard_start_xmit = xennet_start_xmit;
+	netdev->stop            = xennet_close;
+	netdev->get_stats       = xennet_get_stats;
+	netdev->poll            = xennet_poll;
+	netdev->uninit          = xennet_uninit;
+	netdev->change_mtu	= xennet_change_mtu;
+	netdev->weight          = 64;
+	netdev->features        = NETIF_F_IP_CSUM;
+
+	SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
+	SET_MODULE_OWNER(netdev);
+	SET_NETDEV_DEV(netdev, &dev->dev);
+
+	np->netdev = netdev;
+
+	netif_carrier_off(netdev);
+
+	return netdev;
+
+ exit_free_tx:
+	gnttab_free_grant_references(np->gref_tx_head);
+ exit:
+	free_netdev(netdev);
+	return ERR_PTR(err);
+}
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.
+ */
+static int __devinit netfront_probe(struct xenbus_device *dev,
+				    const struct xenbus_device_id *id)
+{
+	int err;
+	struct net_device *netdev;
+	struct netfront_info *info;
+
+	netdev = xennet_create_dev(dev);
+	if (IS_ERR(netdev)) {
+		err = PTR_ERR(netdev);
+		xenbus_dev_fatal(dev, err, "creating netdev");
+		return err;
+	}
+
+	info = netdev_priv(netdev);
+	dev->dev.driver_data = info;
+
+	err = register_netdev(info->netdev);
+	if (err) {
+		printk(KERN_WARNING "%s: register_netdev err=%d\n",
+		       __func__, err);
+		goto fail;
+	}
+
+	err = xennet_sysfs_addif(info->netdev);
+	if (err) {
+		unregister_netdev(info->netdev);
+		printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
+		       __func__, err);
+		goto fail;
+	}
+
+	return 0;
+
+ fail:
+	free_netdev(netdev);
+	dev->dev.driver_data = NULL;
+	return err;
+}
+
+static void xennet_end_access(int ref, void *page)
+{
+	/* This frees the page as a side-effect */
+	if (ref != GRANT_INVALID_REF)
+		gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+}
+
+static void xennet_disconnect_backend(struct netfront_info *info)
+{
+	/* Stop old i/f to prevent errors whilst we rebuild the state. */
+	spin_lock_bh(&info->rx_lock);
+	spin_lock_irq(&info->tx_lock);
+	netif_carrier_off(info->netdev);
+	spin_unlock_irq(&info->tx_lock);
+	spin_unlock_bh(&info->rx_lock);
+
+	if (info->netdev->irq)
+		unbind_from_irqhandler(info->netdev->irq, info->netdev);
+	info->evtchn = info->netdev->irq = 0;
+
+	/* End access and free the pages */
+	xennet_end_access(info->tx_ring_ref, info->tx.sring);
+	xennet_end_access(info->rx_ring_ref, info->rx.sring);
+
+	info->tx_ring_ref = GRANT_INVALID_REF;
+	info->rx_ring_ref = GRANT_INVALID_REF;
+	info->tx.sring = NULL;
+	info->rx.sring = NULL;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int netfront_resume(struct xenbus_device *dev)
+{
+	struct netfront_info *info = dev->dev.driver_data;
+
+	dev_dbg(&dev->dev, "%s\n", dev->nodename);
+
+	xennet_disconnect_backend(info);
+	return 0;
+}
+
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+	char *s, *e, *macstr;
+	int i;
+
+	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+	if (IS_ERR(macstr))
+		return PTR_ERR(macstr);
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac[i] = simple_strtoul(s, &e, 16);
+		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+			kfree(macstr);
+			return -ENOENT;
+		}
+		s = e+1;
+	}
+
+	kfree(macstr);
+	return 0;
+}
+
+static irqreturn_t xennet_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct netfront_info *np = netdev_priv(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&np->tx_lock, flags);
+
+	if (likely(netif_carrier_ok(dev))) {
+		xennet_tx_buf_gc(dev);
+		/* Under tx_lock: protects access to rx shared-ring indexes. */
+		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+			netif_rx_schedule(dev);
+	}
+
+	spin_unlock_irqrestore(&np->tx_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
+{
+	struct xen_netif_tx_sring *txs;
+	struct xen_netif_rx_sring *rxs;
+	int err;
+	struct net_device *netdev = info->netdev;
+
+	info->tx_ring_ref = GRANT_INVALID_REF;
+	info->rx_ring_ref = GRANT_INVALID_REF;
+	info->rx.sring = NULL;
+	info->tx.sring = NULL;
+	netdev->irq = 0;
+
+	err = xen_net_read_mac(dev, netdev->dev_addr);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+		goto fail;
+	}
+
+	txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
+	if (!txs) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(dev, err, "allocating tx ring page");
+		goto fail;
+	}
+	SHARED_RING_INIT(txs);
+	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+
+	err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+	if (err < 0) {
+		free_page((unsigned long)txs);
+		goto fail;
+	}
+
+	info->tx_ring_ref = err;
+	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
+	if (!rxs) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(dev, err, "allocating rx ring page");
+		goto fail;
+	}
+	SHARED_RING_INIT(rxs);
+	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+
+	err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+	if (err < 0) {
+		free_page((unsigned long)rxs);
+		goto fail;
+	}
+	info->rx_ring_ref = err;
+
+	err = xenbus_alloc_evtchn(dev, &info->evtchn);
+	if (err)
+		goto fail;
+
+	err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
+					IRQF_SAMPLE_RANDOM, netdev->name,
+					netdev);
+	if (err < 0)
+		goto fail;
+	netdev->irq = err;
+	return 0;
+
+ fail:
+	return err;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+			   struct netfront_info *info)
+{
+	const char *message;
+	struct xenbus_transaction xbt;
+	int err;
+
+	/* Create shared ring, alloc event channel. */
+	err = setup_netfront(dev, info);
+	if (err)
+		goto out;
+
+again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		goto destroy_ring;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
+			    info->tx_ring_ref);
+	if (err) {
+		message = "writing tx ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
+			    info->rx_ring_ref);
+	if (err) {
+		message = "writing rx ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, dev->nodename,
+			    "event-channel", "%u", info->evtchn);
+	if (err) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
+			    1);
+	if (err) {
+		message = "writing request-rx-copy";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
+	if (err) {
+		message = "writing feature-rx-notify";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+	if (err) {
+		message = "writing feature-sg";
+		goto abort_transaction;
+	}
+
+	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv4";
+		goto abort_transaction;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err) {
+		if (err == -EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto destroy_ring;
+	}
+
+	return 0;
+
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_ring:
+	xennet_disconnect_backend(info);
+ out:
+	return err;
+}
+
+static int xennet_set_sg(struct net_device *dev, u32 data)
+{
+	if (data) {
+		struct netfront_info *np = netdev_priv(dev);
+		int val;
+
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
+				 "%d", &val) < 0)
+			val = 0;
+		if (!val)
+			return -ENOSYS;
+	} else if (dev->mtu > ETH_DATA_LEN)
+		dev->mtu = ETH_DATA_LEN;
+
+	return ethtool_op_set_sg(dev, data);
+}
+
+static int xennet_set_tso(struct net_device *dev, u32 data)
+{
+	if (data) {
+		struct netfront_info *np = netdev_priv(dev);
+		int val;
+
+		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+				 "feature-gso-tcpv4", "%d", &val) < 0)
+			val = 0;
+		if (!val)
+			return -ENOSYS;
+	}
+
+	return ethtool_op_set_tso(dev, data);
+}
+
+static void xennet_set_features(struct net_device *dev)
+{
+	/* Turn off all GSO bits except ROBUST. */
+	dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
+	dev->features |= NETIF_F_GSO_ROBUST;
+	xennet_set_sg(dev, 0);
+
+	/* We need checksum offload to enable scatter/gather and TSO. */
+	if (!(dev->features & NETIF_F_IP_CSUM))
+		return;
+
+	if (!xennet_set_sg(dev, 1))
+		xennet_set_tso(dev, 1);
+}
+
+static int xennet_connect(struct net_device *dev)
+{
+	struct netfront_info *np = netdev_priv(dev);
+	int i, requeue_idx, err;
+	struct sk_buff *skb;
+	grant_ref_t ref;
+	struct xen_netif_rx_request *req;
+	unsigned int feature_rx_copy;
+
+	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+			   "feature-rx-copy", "%u", &feature_rx_copy);
+	if (err != 1)
+		feature_rx_copy = 0;
+
+	if (!feature_rx_copy) {
+		dev_info(&dev->dev,
+			 "backend does not support copying recieve path");
+		return -ENODEV;
+	}
+
+	err = talk_to_backend(np->xbdev, np);
+	if (err)
+		return err;
+
+	xennet_set_features(dev);
+
+	spin_lock_bh(&np->rx_lock);
+	spin_lock_irq(&np->tx_lock);
+
+	/* Step 1: Discard all pending TX packet fragments. */
+	xennet_release_tx_bufs(np);
+
+	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
+	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+		if (!np->rx_skbs[i])
+			continue;
+
+		skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
+		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
+		req = RING_GET_REQUEST(&np->rx, requeue_idx);
+
+		gnttab_grant_foreign_access_ref(
+			ref, np->xbdev->otherend_id,
+			pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
+					       frags->page)),
+			0);
+		req->gref = ref;
+		req->id   = requeue_idx;
+
+		requeue_idx++;
+	}
+
+	np->rx.req_prod_pvt = requeue_idx;
+
+	/*
+	 * Step 3: All public and private state should now be sane.  Get
+	 * ready to start sending and receiving packets and give the driver
+	 * domain a kick because we've probably just requeued some
+	 * packets.
+	 */
+	netif_carrier_on(np->netdev);
+	notify_remote_via_irq(np->netdev->irq);
+	xennet_tx_buf_gc(dev);
+	xennet_alloc_rx_buffers(dev);
+
+	spin_unlock_irq(&np->tx_lock);
+	spin_unlock_bh(&np->rx_lock);
+
+	return 0;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+			    enum xenbus_state backend_state)
+{
+	struct netfront_info *np = dev->dev.driver_data;
+	struct net_device *netdev = np->netdev;
+
+	dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
+
+	switch (backend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+	case XenbusStateUnknown:
+	case XenbusStateClosed:
+		break;
+
+	case XenbusStateInitWait:
+		if (dev->state != XenbusStateInitialising)
+			break;
+		if (xennet_connect(netdev) != 0)
+			break;
+		xenbus_switch_state(dev, XenbusStateConnected);
+		break;
+
+	case XenbusStateClosing:
+		xenbus_frontend_closed(dev);
+		break;
+	}
+}
+
+static struct ethtool_ops xennet_ethtool_ops =
+{
+	.get_tx_csum = ethtool_op_get_tx_csum,
+	.set_tx_csum = ethtool_op_set_tx_csum,
+	.get_sg = ethtool_op_get_sg,
+	.set_sg = xennet_set_sg,
+	.get_tso = ethtool_op_get_tso,
+	.set_tso = xennet_set_tso,
+	.get_link = ethtool_op_get_link,
+};
+
+#ifdef CONFIG_SYSFS
+static ssize_t show_rxbuf_min(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	struct netfront_info *info = netdev_priv(netdev);
+
+	return sprintf(buf, "%u\n", info->rx_min_target);
+}
+
+static ssize_t store_rxbuf_min(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	struct netfront_info *np = netdev_priv(netdev);
+	char *endp;
+	unsigned long target;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	target = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EBADMSG;
+
+	if (target < RX_MIN_TARGET)
+		target = RX_MIN_TARGET;
+	if (target > RX_MAX_TARGET)
+		target = RX_MAX_TARGET;
+
+	spin_lock_bh(&np->rx_lock);
+	if (target > np->rx_max_target)
+		np->rx_max_target = target;
+	np->rx_min_target = target;
+	if (target > np->rx_target)
+		np->rx_target = target;
+
+	xennet_alloc_rx_buffers(netdev);
+
+	spin_unlock_bh(&np->rx_lock);
+	return len;
+}
+
+static ssize_t show_rxbuf_max(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	struct netfront_info *info = netdev_priv(netdev);
+
+	return sprintf(buf, "%u\n", info->rx_max_target);
+}
+
+static ssize_t store_rxbuf_max(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t len)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	struct netfront_info *np = netdev_priv(netdev);
+	char *endp;
+	unsigned long target;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	target = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EBADMSG;
+
+	if (target < RX_MIN_TARGET)
+		target = RX_MIN_TARGET;
+	if (target > RX_MAX_TARGET)
+		target = RX_MAX_TARGET;
+
+	spin_lock_bh(&np->rx_lock);
+	if (target < np->rx_min_target)
+		np->rx_min_target = target;
+	np->rx_max_target = target;
+	if (target < np->rx_target)
+		np->rx_target = target;
+
+	xennet_alloc_rx_buffers(netdev);
+
+	spin_unlock_bh(&np->rx_lock);
+	return len;
+}
+
+static ssize_t show_rxbuf_cur(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	struct netfront_info *info = netdev_priv(netdev);
+
+	return sprintf(buf, "%u\n", info->rx_target);
+}
+
+static struct device_attribute xennet_attrs[] = {
+	__ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
+	__ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
+	__ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
+};
+
+static int xennet_sysfs_addif(struct net_device *netdev)
+{
+	int i;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
+		err = device_create_file(&netdev->dev,
+					   &xennet_attrs[i]);
+		if (err)
+			goto fail;
+	}
+	return 0;
+
+ fail:
+	while (--i >= 0)
+		device_remove_file(&netdev->dev, &xennet_attrs[i]);
+	return err;
+}
+
+static void xennet_sysfs_delif(struct net_device *netdev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
+		device_remove_file(&netdev->dev, &xennet_attrs[i]);
+}
+
+#endif /* CONFIG_SYSFS */
+
+static struct xenbus_device_id netfront_ids[] = {
+	{ "vif" },
+	{ "" }
+};
+
+
+static int __devexit xennet_remove(struct xenbus_device *dev)
+{
+	struct netfront_info *info = dev->dev.driver_data;
+
+	dev_dbg(&dev->dev, "%s\n", dev->nodename);
+
+	unregister_netdev(info->netdev);
+
+	xennet_disconnect_backend(info);
+
+	del_timer_sync(&info->rx_refill_timer);
+
+	xennet_sysfs_delif(info->netdev);
+
+	free_netdev(info->netdev);
+
+	return 0;
+}
+
+static struct xenbus_driver netfront = {
+	.name = "vif",
+	.owner = THIS_MODULE,
+	.ids = netfront_ids,
+	.probe = netfront_probe,
+	.remove = __devexit_p(xennet_remove),
+	.resume = netfront_resume,
+	.otherend_changed = backend_changed,
+};
+
+static int __init netif_init(void)
+{
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	if (is_initial_xendomain())
+		return 0;
+
+	printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
+
+	return xenbus_register_frontend(&netfront);
+}
+module_init(netif_init);
+
+
+static void __exit netif_exit(void)
+{
+	if (is_initial_xendomain())
+		return;
+
+	return xenbus_unregister_driver(&netfront);
+}
+module_exit(netif_exit);
+
+MODULE_DESCRIPTION("Xen virtual network device frontend");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 03baf1c64a2..ed112ee1601 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -147,7 +147,7 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
 		info->location_id, info->serial, info->capabilities);
 	envp[i] = NULL;
 	
-	value = call_usermodehelper (argv [0], argv, envp, 0);
+	value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC);
 	kfree (buf);
 	kfree (envp);
 	return 0;
diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c
index a54e4140683..e821a155b65 100644
--- a/drivers/sbus/char/bbc_envctrl.c
+++ b/drivers/sbus/char/bbc_envctrl.c
@@ -7,6 +7,7 @@
 #include <linux/kthread.h>
 #include <linux/delay.h>
 #include <linux/kmod.h>
+#include <linux/reboot.h>
 #include <asm/oplib.h>
 #include <asm/ebus.h>
 
@@ -170,8 +171,6 @@ static void get_current_temps(struct bbc_cpu_temperature *tp)
 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
 {
 	static int shutting_down = 0;
-	static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
-	char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
 	char *type = "???";
 	s8 val = -1;
 
@@ -195,7 +194,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
 	printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
 
 	shutting_down = 1;
-	if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0)
+	if (orderly_poweroff(true) < 0)
 		printk(KERN_CRIT "envctrl: shutdown execution failed\n");
 }
 
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 8328acab47f..dadabef116b 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -26,6 +26,7 @@
 #include <linux/ioport.h>
 #include <linux/miscdevice.h>
 #include <linux/kmod.h>
+#include <linux/reboot.h>
 
 #include <asm/ebus.h>
 #include <asm/uaccess.h>
@@ -966,10 +967,6 @@ static struct i2c_child_t *envctrl_get_i2c_child(unsigned char mon_type)
 static void envctrl_do_shutdown(void)
 {
 	static int inprog = 0;
-	static char *envp[] = {	
-		"HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
-	char *argv[] = { 
-		"/sbin/shutdown", "-h", "now", NULL };	
 	int ret;
 
 	if (inprog != 0)
@@ -977,7 +974,7 @@ static void envctrl_do_shutdown(void)
 
 	inprog = 1;
 	printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
-	ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0);
+	ret = orderly_poweroff(true);
 	if (ret < 0) {
 		printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); 
 		inprog = 0;  /* unlikely to succeed, but we could try again */
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
new file mode 100644
index 00000000000..56592f0d6ce
--- /dev/null
+++ b/drivers/xen/Makefile
@@ -0,0 +1,2 @@
+obj-y	+= grant-table.o
+obj-y	+= xenbus/
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
new file mode 100644
index 00000000000..ea94dbabf9a
--- /dev/null
+++ b/drivers/xen/grant-table.c
@@ -0,0 +1,582 @@
+/******************************************************************************
+ * grant_table.c
+ *
+ * Granting foreign access to our memory reservation.
+ *
+ * Copyright (c) 2005-2006, Christopher Clark
+ * Copyright (c) 2004-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+
+#include <xen/interface/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <asm/pgtable.h>
+#include <asm/sync_bitops.h>
+
+
+/* External tools reserve first few grant table entries. */
+#define NR_RESERVED_ENTRIES 8
+#define GNTTAB_LIST_END 0xffffffff
+#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+
+static grant_ref_t **gnttab_list;
+static unsigned int nr_grant_frames;
+static unsigned int boot_max_nr_grant_frames;
+static int gnttab_free_count;
+static grant_ref_t gnttab_free_head;
+static DEFINE_SPINLOCK(gnttab_list_lock);
+
+static struct grant_entry *shared;
+
+static struct gnttab_free_callback *gnttab_free_callback_list;
+
+static int gnttab_expand(unsigned int req_entries);
+
+#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+
+static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
+{
+	return &gnttab_list[(entry) / RPP][(entry) % RPP];
+}
+/* This can be used as an l-value */
+#define gnttab_entry(entry) (*__gnttab_entry(entry))
+
+static int get_free_entries(unsigned count)
+{
+	unsigned long flags;
+	int ref, rc;
+	grant_ref_t head;
+
+	spin_lock_irqsave(&gnttab_list_lock, flags);
+
+	if ((gnttab_free_count < count) &&
+	    ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
+		spin_unlock_irqrestore(&gnttab_list_lock, flags);
+		return rc;
+	}
+
+	ref = head = gnttab_free_head;
+	gnttab_free_count -= count;
+	while (count-- > 1)
+		head = gnttab_entry(head);
+	gnttab_free_head = gnttab_entry(head);
+	gnttab_entry(head) = GNTTAB_LIST_END;
+
+	spin_unlock_irqrestore(&gnttab_list_lock, flags);
+
+	return ref;
+}
+
+static void do_free_callbacks(void)
+{
+	struct gnttab_free_callback *callback, *next;
+
+	callback = gnttab_free_callback_list;
+	gnttab_free_callback_list = NULL;
+
+	while (callback != NULL) {
+		next = callback->next;
+		if (gnttab_free_count >= callback->count) {
+			callback->next = NULL;
+			callback->fn(callback->arg);
+		} else {
+			callback->next = gnttab_free_callback_list;
+			gnttab_free_callback_list = callback;
+		}
+		callback = next;
+	}
+}
+
+static inline void check_free_callbacks(void)
+{
+	if (unlikely(gnttab_free_callback_list))
+		do_free_callbacks();
+}
+
+static void put_free_entry(grant_ref_t ref)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&gnttab_list_lock, flags);
+	gnttab_entry(ref) = gnttab_free_head;
+	gnttab_free_head = ref;
+	gnttab_free_count++;
+	check_free_callbacks();
+	spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
+static void update_grant_entry(grant_ref_t ref, domid_t domid,
+			       unsigned long frame, unsigned flags)
+{
+	/*
+	 * Introducing a valid entry into the grant table:
+	 *  1. Write ent->domid.
+	 *  2. Write ent->frame:
+	 *      GTF_permit_access:   Frame to which access is permitted.
+	 *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+	 *                           frame, or zero if none.
+	 *  3. Write memory barrier (WMB).
+	 *  4. Write ent->flags, inc. valid type.
+	 */
+	shared[ref].frame = frame;
+	shared[ref].domid = domid;
+	wmb();
+	shared[ref].flags = flags;
+}
+
+/*
+ * Public grant-issuing interface functions
+ */
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+				     unsigned long frame, int readonly)
+{
+	update_grant_entry(ref, domid, frame,
+			   GTF_permit_access | (readonly ? GTF_readonly : 0));
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
+
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+				int readonly)
+{
+	int ref;
+
+	ref = get_free_entries(1);
+	if (unlikely(ref < 0))
+		return -ENOSPC;
+
+	gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
+
+	return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
+
+int gnttab_query_foreign_access(grant_ref_t ref)
+{
+	u16 nflags;
+
+	nflags = shared[ref].flags;
+
+	return (nflags & (GTF_reading|GTF_writing));
+}
+EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
+
+int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+{
+	u16 flags, nflags;
+
+	nflags = shared[ref].flags;
+	do {
+		flags = nflags;
+		if (flags & (GTF_reading|GTF_writing)) {
+			printk(KERN_ALERT "WARNING: g.e. still in use!\n");
+			return 0;
+		}
+	} while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
+
+void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
+			       unsigned long page)
+{
+	if (gnttab_end_foreign_access_ref(ref, readonly)) {
+		put_free_entry(ref);
+		if (page != 0)
+			free_page(page);
+	} else {
+		/* XXX This needs to be fixed so that the ref and page are
+		   placed on a list to be freed up later. */
+		printk(KERN_WARNING
+		       "WARNING: leaking g.e. and page still in use!\n");
+	}
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
+
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
+{
+	int ref;
+
+	ref = get_free_entries(1);
+	if (unlikely(ref < 0))
+		return -ENOSPC;
+	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
+
+	return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
+
+void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+				       unsigned long pfn)
+{
+	update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
+
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
+{
+	unsigned long frame;
+	u16           flags;
+
+	/*
+	 * If a transfer is not even yet started, try to reclaim the grant
+	 * reference and return failure (== 0).
+	 */
+	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
+		if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+			return 0;
+		cpu_relax();
+	}
+
+	/* If a transfer is in progress then wait until it is completed. */
+	while (!(flags & GTF_transfer_completed)) {
+		flags = shared[ref].flags;
+		cpu_relax();
+	}
+
+	rmb();	/* Read the frame number /after/ reading completion status. */
+	frame = shared[ref].frame;
+	BUG_ON(frame == 0);
+
+	return frame;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
+
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
+{
+	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
+	put_free_entry(ref);
+	return frame;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
+
+void gnttab_free_grant_reference(grant_ref_t ref)
+{
+	put_free_entry(ref);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
+
+void gnttab_free_grant_references(grant_ref_t head)
+{
+	grant_ref_t ref;
+	unsigned long flags;
+	int count = 1;
+	if (head == GNTTAB_LIST_END)
+		return;
+	spin_lock_irqsave(&gnttab_list_lock, flags);
+	ref = head;
+	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
+		ref = gnttab_entry(ref);
+		count++;
+	}
+	gnttab_entry(ref) = gnttab_free_head;
+	gnttab_free_head = head;
+	gnttab_free_count += count;
+	check_free_callbacks();
+	spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
+
+int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
+{
+	int h = get_free_entries(count);
+
+	if (h < 0)
+		return -ENOSPC;
+
+	*head = h;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
+
+int gnttab_empty_grant_references(const grant_ref_t *private_head)
+{
+	return (*private_head == GNTTAB_LIST_END);
+}
+EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
+
+int gnttab_claim_grant_reference(grant_ref_t *private_head)
+{
+	grant_ref_t g = *private_head;
+	if (unlikely(g == GNTTAB_LIST_END))
+		return -ENOSPC;
+	*private_head = gnttab_entry(g);
+	return g;
+}
+EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
+
+void gnttab_release_grant_reference(grant_ref_t *private_head,
+				    grant_ref_t release)
+{
+	gnttab_entry(release) = *private_head;
+	*private_head = release;
+}
+EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
+
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
+				  void (*fn)(void *), void *arg, u16 count)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&gnttab_list_lock, flags);
+	if (callback->next)
+		goto out;
+	callback->fn = fn;
+	callback->arg = arg;
+	callback->count = count;
+	callback->next = gnttab_free_callback_list;
+	gnttab_free_callback_list = callback;
+	check_free_callbacks();
+out:
+	spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
+
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
+{
+	struct gnttab_free_callback **pcb;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gnttab_list_lock, flags);
+	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
+		if (*pcb == callback) {
+			*pcb = callback->next;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
+
+static int grow_gnttab_list(unsigned int more_frames)
+{
+	unsigned int new_nr_grant_frames, extra_entries, i;
+
+	new_nr_grant_frames = nr_grant_frames + more_frames;
+	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
+
+	for (i = nr_grant_frames; i < new_nr_grant_frames; i++) {
+		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
+		if (!gnttab_list[i])
+			goto grow_nomem;
+	}
+
+
+	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
+		gnttab_entry(i) = i + 1;
+
+	gnttab_entry(i) = gnttab_free_head;
+	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+	gnttab_free_count += extra_entries;
+
+	nr_grant_frames = new_nr_grant_frames;
+
+	check_free_callbacks();
+
+	return 0;
+
+grow_nomem:
+	for ( ; i >= nr_grant_frames; i--)
+		free_page((unsigned long) gnttab_list[i]);
+	return -ENOMEM;
+}
+
+static unsigned int __max_nr_grant_frames(void)
+{
+	struct gnttab_query_size query;
+	int rc;
+
+	query.dom = DOMID_SELF;
+
+	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
+	if ((rc < 0) || (query.status != GNTST_okay))
+		return 4; /* Legacy max supported number of frames */
+
+	return query.max_nr_frames;
+}
+
+static inline unsigned int max_nr_grant_frames(void)
+{
+	unsigned int xen_max = __max_nr_grant_frames();
+
+	if (xen_max > boot_max_nr_grant_frames)
+		return boot_max_nr_grant_frames;
+	return xen_max;
+}
+
+static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+		      unsigned long addr, void *data)
+{
+	unsigned long **frames = (unsigned long **)data;
+
+	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+	(*frames)++;
+	return 0;
+}
+
+static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+			unsigned long addr, void *data)
+{
+
+	set_pte_at(&init_mm, addr, pte, __pte(0));
+	return 0;
+}
+
+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+{
+	struct gnttab_setup_table setup;
+	unsigned long *frames;
+	unsigned int nr_gframes = end_idx + 1;
+	int rc;
+
+	frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
+	if (!frames)
+		return -ENOMEM;
+
+	setup.dom        = DOMID_SELF;
+	setup.nr_frames  = nr_gframes;
+	setup.frame_list = frames;
+
+	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
+	if (rc == -ENOSYS) {
+		kfree(frames);
+		return -ENOSYS;
+	}
+
+	BUG_ON(rc || setup.status);
+
+	if (shared == NULL) {
+		struct vm_struct *area;
+		area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
+		BUG_ON(area == NULL);
+		shared = area->addr;
+	}
+	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+				 PAGE_SIZE * nr_gframes,
+				 map_pte_fn, &frames);
+	BUG_ON(rc);
+	frames -= nr_gframes; /* adjust after map_pte_fn() */
+
+	kfree(frames);
+
+	return 0;
+}
+
+static int gnttab_resume(void)
+{
+	if (max_nr_grant_frames() < nr_grant_frames)
+		return -ENOSYS;
+	return gnttab_map(0, nr_grant_frames - 1);
+}
+
+static int gnttab_suspend(void)
+{
+	apply_to_page_range(&init_mm, (unsigned long)shared,
+			    PAGE_SIZE * nr_grant_frames,
+			    unmap_pte_fn, NULL);
+
+	return 0;
+}
+
+static int gnttab_expand(unsigned int req_entries)
+{
+	int rc;
+	unsigned int cur, extra;
+
+	cur = nr_grant_frames;
+	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
+		 GREFS_PER_GRANT_FRAME);
+	if (cur + extra > max_nr_grant_frames())
+		return -ENOSPC;
+
+	rc = gnttab_map(cur, cur + extra - 1);
+	if (rc == 0)
+		rc = grow_gnttab_list(extra);
+
+	return rc;
+}
+
+static int __devinit gnttab_init(void)
+{
+	int i;
+	unsigned int max_nr_glist_frames;
+	unsigned int nr_init_grefs;
+
+	if (!is_running_on_xen())
+		return -ENODEV;
+
+	nr_grant_frames = 1;
+	boot_max_nr_grant_frames = __max_nr_grant_frames();
+
+	/* Determine the maximum number of frames required for the
+	 * grant reference free list on the current hypervisor.
+	 */
+	max_nr_glist_frames = (boot_max_nr_grant_frames *
+			       GREFS_PER_GRANT_FRAME /
+			       (PAGE_SIZE / sizeof(grant_ref_t)));
+
+	gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
+			      GFP_KERNEL);
+	if (gnttab_list == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_grant_frames; i++) {
+		gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
+		if (gnttab_list[i] == NULL)
+			goto ini_nomem;
+	}
+
+	if (gnttab_resume() < 0)
+		return -ENODEV;
+
+	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
+
+	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
+		gnttab_entry(i) = i + 1;
+
+	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
+	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
+	gnttab_free_head  = NR_RESERVED_ENTRIES;
+
+	printk("Grant table initialized\n");
+	return 0;
+
+ ini_nomem:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)gnttab_list[i]);
+	kfree(gnttab_list);
+	return -ENOMEM;
+}
+
+core_initcall(gnttab_init);
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
new file mode 100644
index 00000000000..5571f5b8422
--- /dev/null
+++ b/drivers/xen/xenbus/Makefile
@@ -0,0 +1,7 @@
+obj-y	+= xenbus.o
+
+xenbus-objs =
+xenbus-objs += xenbus_client.o
+xenbus-objs += xenbus_comms.o
+xenbus-objs += xenbus_xs.o
+xenbus-objs += xenbus_probe.o
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
new file mode 100644
index 00000000000..9fd2f70ab46
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -0,0 +1,569 @@
+/******************************************************************************
+ * Client-facing interface for the Xenbus driver.  In other words, the
+ * interface between the Xenbus and the device-specific code, be it the
+ * frontend or the backend of that driver.
+ *
+ * Copyright (C) 2005 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+
+const char *xenbus_strstate(enum xenbus_state state)
+{
+	static const char *const name[] = {
+		[ XenbusStateUnknown      ] = "Unknown",
+		[ XenbusStateInitialising ] = "Initialising",
+		[ XenbusStateInitWait     ] = "InitWait",
+		[ XenbusStateInitialised  ] = "Initialised",
+		[ XenbusStateConnected    ] = "Connected",
+		[ XenbusStateClosing      ] = "Closing",
+		[ XenbusStateClosed	  ] = "Closed",
+	};
+	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+EXPORT_SYMBOL_GPL(xenbus_strstate);
+
+/**
+ * xenbus_watch_path - register a watch
+ * @dev: xenbus device
+ * @path: path to watch
+ * @watch: watch to register
+ * @callback: callback to register
+ *
+ * Register a @watch on the given path, using the given xenbus_watch structure
+ * for storage, and the given @callback function as the callback.  Return 0 on
+ * success, or -errno on error.  On success, the given @path will be saved as
+ * @watch->node, and remains the caller's to free.  On error, @watch->node will
+ * be NULL, the device will switch to %XenbusStateClosing, and the error will
+ * be saved in the store.
+ */
+int xenbus_watch_path(struct xenbus_device *dev, const char *path,
+		      struct xenbus_watch *watch,
+		      void (*callback)(struct xenbus_watch *,
+				       const char **, unsigned int))
+{
+	int err;
+
+	watch->node = path;
+	watch->callback = callback;
+
+	err = register_xenbus_watch(watch);
+
+	if (err) {
+		watch->node = NULL;
+		watch->callback = NULL;
+		xenbus_dev_fatal(dev, err, "adding watch on %s", path);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_watch_path);
+
+
+/**
+ * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
+ * @dev: xenbus device
+ * @watch: watch to register
+ * @callback: callback to register
+ * @pathfmt: format of path to watch
+ *
+ * Register a watch on the given @path, using the given xenbus_watch
+ * structure for storage, and the given @callback function as the callback.
+ * Return 0 on success, or -errno on error.  On success, the watched path
+ * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
+ * kfree().  On error, watch->node will be NULL, so the caller has nothing to
+ * free, the device will switch to %XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_watch_pathfmt(struct xenbus_device *dev,
+			 struct xenbus_watch *watch,
+			 void (*callback)(struct xenbus_watch *,
+					const char **, unsigned int),
+			 const char *pathfmt, ...)
+{
+	int err;
+	va_list ap;
+	char *path;
+
+	va_start(ap, pathfmt);
+	path = kvasprintf(GFP_KERNEL, pathfmt, ap);
+	va_end(ap);
+
+	if (!path) {
+		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
+		return -ENOMEM;
+	}
+	err = xenbus_watch_path(dev, path, watch, callback);
+
+	if (err)
+		kfree(path);
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
+
+
+/**
+ * xenbus_switch_state
+ * @dev: xenbus device
+ * @xbt: transaction handle
+ * @state: new state
+ *
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Return 0 on success, or -errno on error.  On error, the device will switch
+ * to XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
+{
+	/* We check whether the state is currently set to the given value, and
+	   if not, then the state is set.  We don't want to unconditionally
+	   write the given state, because we don't want to fire watches
+	   unnecessarily.  Furthermore, if the node has gone, we don't write
+	   to it, as the device will be tearing down, and we don't want to
+	   resurrect that directory.
+
+	   Note that, because of this cached value of our state, this function
+	   will not work inside a Xenstore transaction (something it was
+	   trying to in the past) because dev->state would not get reset if
+	   the transaction was aborted.
+
+	 */
+
+	int current_state;
+	int err;
+
+	if (state == dev->state)
+		return 0;
+
+	err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
+			   &current_state);
+	if (err != 1)
+		return 0;
+
+	err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
+	if (err) {
+		if (state != XenbusStateClosing) /* Avoid looping */
+			xenbus_dev_fatal(dev, err, "writing new state");
+		return err;
+	}
+
+	dev->state = state;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_switch_state);
+
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+	xenbus_switch_state(dev, XenbusStateClosed);
+	complete(&dev->down);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
+
+/**
+ * Return the path to the error node for the given device, or NULL on failure.
+ * If the value returned is non-NULL, then it is the caller's to kfree.
+ */
+static char *error_path(struct xenbus_device *dev)
+{
+	return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
+}
+
+
+static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
+				const char *fmt, va_list ap)
+{
+	int ret;
+	unsigned int len;
+	char *printf_buffer = NULL;
+	char *path_buffer = NULL;
+
+#define PRINTF_BUFFER_SIZE 4096
+	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+	if (printf_buffer == NULL)
+		goto fail;
+
+	len = sprintf(printf_buffer, "%i ", -err);
+	ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
+
+	BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
+
+	dev_err(&dev->dev, "%s\n", printf_buffer);
+
+	path_buffer = error_path(dev);
+
+	if (path_buffer == NULL) {
+		dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
+		       dev->nodename, printf_buffer);
+		goto fail;
+	}
+
+	if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
+		dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
+		       dev->nodename, printf_buffer);
+		goto fail;
+	}
+
+fail:
+	kfree(printf_buffer);
+	kfree(path_buffer);
+}
+
+
+/**
+ * xenbus_dev_error
+ * @dev: xenbus device
+ * @err: error to report
+ * @fmt: error message format
+ *
+ * Report the given negative errno into the store, along with the given
+ * formatted message.
+ */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	xenbus_va_dev_error(dev, err, fmt, ap);
+	va_end(ap);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_error);
+
+/**
+ * xenbus_dev_fatal
+ * @dev: xenbus device
+ * @err: error to report
+ * @fmt: error message format
+ *
+ * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
+ * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
+ * closedown of this driver and its peer.
+ */
+
+void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	xenbus_va_dev_error(dev, err, fmt, ap);
+	va_end(ap);
+
+	xenbus_switch_state(dev, XenbusStateClosing);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
+
+/**
+ * xenbus_grant_ring
+ * @dev: xenbus device
+ * @ring_mfn: mfn of ring to grant
+
+ * Grant access to the given @ring_mfn to the peer of the given device.  Return
+ * 0 on success, or -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+{
+	int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
+	if (err < 0)
+		xenbus_dev_fatal(dev, err, "granting access to ring page");
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_grant_ring);
+
+
+/**
+ * Allocate an event channel for the given xenbus_device, assigning the newly
+ * created local port to *port.  Return 0 on success, or -errno on error.  On
+ * error, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
+{
+	struct evtchn_alloc_unbound alloc_unbound;
+	int err;
+
+	alloc_unbound.dom = DOMID_SELF;
+	alloc_unbound.remote_dom = dev->otherend_id;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+					  &alloc_unbound);
+	if (err)
+		xenbus_dev_fatal(dev, err, "allocating event channel");
+	else
+		*port = alloc_unbound.port;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
+
+
+/**
+ * Bind to an existing interdomain event channel in another domain. Returns 0
+ * on success and stores the local port in *port. On error, returns -errno,
+ * switches the device to XenbusStateClosing, and saves the error in XenStore.
+ */
+int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
+{
+	struct evtchn_bind_interdomain bind_interdomain;
+	int err;
+
+	bind_interdomain.remote_dom = dev->otherend_id;
+	bind_interdomain.remote_port = remote_port;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+					  &bind_interdomain);
+	if (err)
+		xenbus_dev_fatal(dev, err,
+				 "binding to event channel %d from domain %d",
+				 remote_port, dev->otherend_id);
+	else
+		*port = bind_interdomain.local_port;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
+
+
+/**
+ * Free an existing event channel. Returns 0 on success or -errno on error.
+ */
+int xenbus_free_evtchn(struct xenbus_device *dev, int port)
+{
+	struct evtchn_close close;
+	int err;
+
+	close.port = port;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+	if (err)
+		xenbus_dev_error(dev, err, "freeing event channel %d", port);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
+
+
+/**
+ * xenbus_map_ring_valloc
+ * @dev: xenbus device
+ * @gnt_ref: grant reference
+ * @vaddr: pointer to address to be filled out by mapping
+ *
+ * Based on Rusty Russell's skeleton driver's map_page.
+ * Map a page of memory into this domain from another domain's grant table.
+ * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
+ * page to that address, and sets *vaddr to that address.
+ * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
+ * or -ENOMEM on error. If an error is returned, device will switch to
+ * XenbusStateClosing and the error message will be saved in XenStore.
+ */
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+{
+	struct gnttab_map_grant_ref op = {
+		.flags = GNTMAP_host_map,
+		.ref   = gnt_ref,
+		.dom   = dev->otherend_id,
+	};
+	struct vm_struct *area;
+
+	*vaddr = NULL;
+
+	area = alloc_vm_area(PAGE_SIZE);
+	if (!area)
+		return -ENOMEM;
+
+	op.host_addr = (unsigned long)area->addr;
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status != GNTST_okay) {
+		free_vm_area(area);
+		xenbus_dev_fatal(dev, op.status,
+				 "mapping in shared page %d from domain %d",
+				 gnt_ref, dev->otherend_id);
+		return op.status;
+	}
+
+	/* Stuff the handle in an unused field */
+	area->phys_addr = (unsigned long)op.handle;
+
+	*vaddr = area->addr;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+
+/**
+ * xenbus_map_ring
+ * @dev: xenbus device
+ * @gnt_ref: grant reference
+ * @handle: pointer to grant handle to be filled
+ * @vaddr: address to be mapped to
+ *
+ * Map a page of memory into this domain from another domain's grant table.
+ * xenbus_map_ring does not allocate the virtual address space (you must do
+ * this yourself!). It only maps in the page to the specified address.
+ * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
+ * or -ENOMEM on error. If an error is returned, device will switch to
+ * XenbusStateClosing and the error message will be saved in XenStore.
+ */
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
+		    grant_handle_t *handle, void *vaddr)
+{
+	struct gnttab_map_grant_ref op = {
+		.host_addr = (unsigned long)vaddr,
+		.flags     = GNTMAP_host_map,
+		.ref       = gnt_ref,
+		.dom       = dev->otherend_id,
+	};
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status != GNTST_okay) {
+		xenbus_dev_fatal(dev, op.status,
+				 "mapping in shared page %d from domain %d",
+				 gnt_ref, dev->otherend_id);
+	} else
+		*handle = op.handle;
+
+	return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring);
+
+
+/**
+ * xenbus_unmap_ring_vfree
+ * @dev: xenbus device
+ * @vaddr: addr to unmap
+ *
+ * Based on Rusty Russell's skeleton driver's unmap_page.
+ * Unmap a page of memory in this domain that was imported from another domain.
+ * Use xenbus_unmap_ring_vfree if you mapped in your memory with
+ * xenbus_map_ring_valloc (it will free the virtual address space).
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
+{
+	struct vm_struct *area;
+	struct gnttab_unmap_grant_ref op = {
+		.host_addr = (unsigned long)vaddr,
+	};
+
+	/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
+	 * method so that we don't have to muck with vmalloc internals here.
+	 * We could force the user to hang on to their struct vm_struct from
+	 * xenbus_map_ring_valloc, but these 6 lines considerably simplify
+	 * this API.
+	 */
+	read_lock(&vmlist_lock);
+	for (area = vmlist; area != NULL; area = area->next) {
+		if (area->addr == vaddr)
+			break;
+	}
+	read_unlock(&vmlist_lock);
+
+	if (!area) {
+		xenbus_dev_error(dev, -ENOENT,
+				 "can't find mapped virtual address %p", vaddr);
+		return GNTST_bad_virt_addr;
+	}
+
+	op.handle = (grant_handle_t)area->phys_addr;
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status == GNTST_okay)
+		free_vm_area(area);
+	else
+		xenbus_dev_error(dev, op.status,
+				 "unmapping page at handle %d error %d",
+				 (int16_t)area->phys_addr, op.status);
+
+	return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+
+
+/**
+ * xenbus_unmap_ring
+ * @dev: xenbus device
+ * @handle: grant handle
+ * @vaddr: addr to unmap
+ *
+ * Unmap a page of memory in this domain that was imported from another domain.
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+int xenbus_unmap_ring(struct xenbus_device *dev,
+		      grant_handle_t handle, void *vaddr)
+{
+	struct gnttab_unmap_grant_ref op = {
+		.host_addr = (unsigned long)vaddr,
+		.handle    = handle,
+	};
+
+	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+		BUG();
+
+	if (op.status != GNTST_okay)
+		xenbus_dev_error(dev, op.status,
+				 "unmapping page at handle %d error %d",
+				 handle, op.status);
+
+	return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
+
+
+/**
+ * xenbus_read_driver_state
+ * @path: path for driver
+ *
+ * Return the state of the driver rooted at the given store path, or
+ * XenbusStateUnknown if no state can be read.
+ */
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+	enum xenbus_state result;
+	int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
+	if (err)
+		result = XenbusStateUnknown;
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
new file mode 100644
index 00000000000..6efbe3f29ca
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -0,0 +1,233 @@
+/******************************************************************************
+ * xenbus_comms.c
+ *
+ * Low level code to talks to Xen Store: ringbuffer and event channel.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <xen/xenbus.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include "xenbus_comms.h"
+
+static int xenbus_irq;
+
+static DECLARE_WORK(probe_work, xenbus_probe);
+
+static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+
+static irqreturn_t wake_waiting(int irq, void *unused)
+{
+	if (unlikely(xenstored_ready == 0)) {
+		xenstored_ready = 1;
+		schedule_work(&probe_work);
+	}
+
+	wake_up(&xb_waitq);
+	return IRQ_HANDLED;
+}
+
+static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
+{
+	return ((prod - cons) <= XENSTORE_RING_SIZE);
+}
+
+static void *get_output_chunk(XENSTORE_RING_IDX cons,
+			      XENSTORE_RING_IDX prod,
+			      char *buf, uint32_t *len)
+{
+	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
+	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
+		*len = XENSTORE_RING_SIZE - (prod - cons);
+	return buf + MASK_XENSTORE_IDX(prod);
+}
+
+static const void *get_input_chunk(XENSTORE_RING_IDX cons,
+				   XENSTORE_RING_IDX prod,
+				   const char *buf, uint32_t *len)
+{
+	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
+	if ((prod - cons) < *len)
+		*len = prod - cons;
+	return buf + MASK_XENSTORE_IDX(cons);
+}
+
+/**
+ * xb_write - low level write
+ * @data: buffer to send
+ * @len: length of buffer
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int xb_write(const void *data, unsigned len)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+	XENSTORE_RING_IDX cons, prod;
+	int rc;
+
+	while (len != 0) {
+		void *dst;
+		unsigned int avail;
+
+		rc = wait_event_interruptible(
+			xb_waitq,
+			(intf->req_prod - intf->req_cons) !=
+			XENSTORE_RING_SIZE);
+		if (rc < 0)
+			return rc;
+
+		/* Read indexes, then verify. */
+		cons = intf->req_cons;
+		prod = intf->req_prod;
+		if (!check_indexes(cons, prod)) {
+			intf->req_cons = intf->req_prod = 0;
+			return -EIO;
+		}
+
+		dst = get_output_chunk(cons, prod, intf->req, &avail);
+		if (avail == 0)
+			continue;
+		if (avail > len)
+			avail = len;
+
+		/* Must write data /after/ reading the consumer index. */
+		mb();
+
+		memcpy(dst, data, avail);
+		data += avail;
+		len -= avail;
+
+		/* Other side must not see new producer until data is there. */
+		wmb();
+		intf->req_prod += avail;
+
+		/* Implies mb(): other side will see the updated producer. */
+		notify_remote_via_evtchn(xen_store_evtchn);
+	}
+
+	return 0;
+}
+
+int xb_data_to_read(void)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+	return (intf->rsp_cons != intf->rsp_prod);
+}
+
+int xb_wait_for_data_to_read(void)
+{
+	return wait_event_interruptible(xb_waitq, xb_data_to_read());
+}
+
+int xb_read(void *data, unsigned len)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+	XENSTORE_RING_IDX cons, prod;
+	int rc;
+
+	while (len != 0) {
+		unsigned int avail;
+		const char *src;
+
+		rc = xb_wait_for_data_to_read();
+		if (rc < 0)
+			return rc;
+
+		/* Read indexes, then verify. */
+		cons = intf->rsp_cons;
+		prod = intf->rsp_prod;
+		if (!check_indexes(cons, prod)) {
+			intf->rsp_cons = intf->rsp_prod = 0;
+			return -EIO;
+		}
+
+		src = get_input_chunk(cons, prod, intf->rsp, &avail);
+		if (avail == 0)
+			continue;
+		if (avail > len)
+			avail = len;
+
+		/* Must read data /after/ reading the producer index. */
+		rmb();
+
+		memcpy(data, src, avail);
+		data += avail;
+		len -= avail;
+
+		/* Other side must not see free space until we've copied out */
+		mb();
+		intf->rsp_cons += avail;
+
+		pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
+
+		/* Implies mb(): other side will see the updated consumer. */
+		notify_remote_via_evtchn(xen_store_evtchn);
+	}
+
+	return 0;
+}
+
+/**
+ * xb_init_comms - Set up interrupt handler off store event channel.
+ */
+int xb_init_comms(void)
+{
+	struct xenstore_domain_interface *intf = xen_store_interface;
+	int err;
+
+	if (intf->req_prod != intf->req_cons)
+		printk(KERN_ERR "XENBUS request ring is not quiescent "
+		       "(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
+
+	if (intf->rsp_prod != intf->rsp_cons) {
+		printk(KERN_WARNING "XENBUS response ring is not quiescent "
+		       "(%08x:%08x): fixing up\n",
+		       intf->rsp_cons, intf->rsp_prod);
+		intf->rsp_cons = intf->rsp_prod;
+	}
+
+	if (xenbus_irq)
+		unbind_from_irqhandler(xenbus_irq, &xb_waitq);
+
+	err = bind_evtchn_to_irqhandler(
+		xen_store_evtchn, wake_waiting,
+		0, "xenbus", &xb_waitq);
+	if (err <= 0) {
+		printk(KERN_ERR "XENBUS request irq failed %i\n", err);
+		return err;
+	}
+
+	xenbus_irq = err;
+
+	return 0;
+}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
new file mode 100644
index 00000000000..c21db751373
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -0,0 +1,46 @@
+/*
+ * Private include for xenbus communications.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_COMMS_H
+#define _XENBUS_COMMS_H
+
+int xs_init(void);
+int xb_init_comms(void);
+
+/* Low level routines. */
+int xb_write(const void *data, unsigned len);
+int xb_read(void *data, unsigned len);
+int xb_data_to_read(void);
+int xb_wait_for_data_to_read(void);
+int xs_input_avail(void);
+extern struct xenstore_domain_interface *xen_store_interface;
+extern int xen_store_evtchn;
+
+#endif /* _XENBUS_COMMS_H */
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
new file mode 100644
index 00000000000..0b769f7c4a4
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -0,0 +1,935 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define DPRINTK(fmt, args...)				\
+	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
+		 __func__, __LINE__, ##args)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/page.h>
+
+#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+int xen_store_evtchn;
+struct xenstore_domain_interface *xen_store_interface;
+static unsigned long xen_store_mfn;
+
+static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
+
+static void wait_for_devices(struct xenbus_driver *xendrv);
+
+static int xenbus_probe_frontend(const char *type, const char *name);
+
+static void xenbus_dev_shutdown(struct device *_dev);
+
+/* If something in array of ids matches this device, return it. */
+static const struct xenbus_device_id *
+match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
+{
+	for (; *arr->devicetype != '\0'; arr++) {
+		if (!strcmp(arr->devicetype, dev->devicetype))
+			return arr;
+	}
+	return NULL;
+}
+
+int xenbus_match(struct device *_dev, struct device_driver *_drv)
+{
+	struct xenbus_driver *drv = to_xenbus_driver(_drv);
+
+	if (!drv->ids)
+		return 0;
+
+	return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
+}
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+	nodename = strchr(nodename, '/');
+	if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
+		printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
+		return -EINVAL;
+	}
+
+	strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
+	if (!strchr(bus_id, '/')) {
+		printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
+		return -EINVAL;
+	}
+	*strchr(bus_id, '/') = '-';
+	return 0;
+}
+
+
+static void free_otherend_details(struct xenbus_device *dev)
+{
+	kfree(dev->otherend);
+	dev->otherend = NULL;
+}
+
+
+static void free_otherend_watch(struct xenbus_device *dev)
+{
+	if (dev->otherend_watch.node) {
+		unregister_xenbus_watch(&dev->otherend_watch);
+		kfree(dev->otherend_watch.node);
+		dev->otherend_watch.node = NULL;
+	}
+}
+
+
+int read_otherend_details(struct xenbus_device *xendev,
+				 char *id_node, char *path_node)
+{
+	int err = xenbus_gather(XBT_NIL, xendev->nodename,
+				id_node, "%i", &xendev->otherend_id,
+				path_node, NULL, &xendev->otherend,
+				NULL);
+	if (err) {
+		xenbus_dev_fatal(xendev, err,
+				 "reading other end details from %s",
+				 xendev->nodename);
+		return err;
+	}
+	if (strlen(xendev->otherend) == 0 ||
+	    !xenbus_exists(XBT_NIL, xendev->otherend, "")) {
+		xenbus_dev_fatal(xendev, -ENOENT,
+				 "unable to read other end from %s.  "
+				 "missing or inaccessible.",
+				 xendev->nodename);
+		free_otherend_details(xendev);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+
+static int read_backend_details(struct xenbus_device *xendev)
+{
+	return read_otherend_details(xendev, "backend-id", "backend");
+}
+
+
+/* Bus type for frontend drivers. */
+static struct xen_bus_type xenbus_frontend = {
+	.root = "device",
+	.levels = 2, 		/* device/type/<id> */
+	.get_bus_id = frontend_bus_id,
+	.probe = xenbus_probe_frontend,
+	.bus = {
+		.name     = "xen",
+		.match    = xenbus_match,
+		.probe    = xenbus_dev_probe,
+		.remove   = xenbus_dev_remove,
+		.shutdown = xenbus_dev_shutdown,
+	},
+};
+
+static void otherend_changed(struct xenbus_watch *watch,
+			     const char **vec, unsigned int len)
+{
+	struct xenbus_device *dev =
+		container_of(watch, struct xenbus_device, otherend_watch);
+	struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
+	enum xenbus_state state;
+
+	/* Protect us against watches firing on old details when the otherend
+	   details change, say immediately after a resume. */
+	if (!dev->otherend ||
+	    strncmp(dev->otherend, vec[XS_WATCH_PATH],
+		    strlen(dev->otherend))) {
+		dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]);
+		return;
+	}
+
+	state = xenbus_read_driver_state(dev->otherend);
+
+	dev_dbg(&dev->dev, "state is %d, (%s), %s, %s",
+		state, xenbus_strstate(state), dev->otherend_watch.node,
+		vec[XS_WATCH_PATH]);
+
+	/*
+	 * Ignore xenbus transitions during shutdown. This prevents us doing
+	 * work that can fail e.g., when the rootfs is gone.
+	 */
+	if (system_state > SYSTEM_RUNNING) {
+		struct xen_bus_type *bus = bus;
+		bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
+		/* If we're frontend, drive the state machine to Closed. */
+		/* This should cause the backend to release our resources. */
+		if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+			xenbus_frontend_closed(dev);
+		return;
+	}
+
+	if (drv->otherend_changed)
+		drv->otherend_changed(dev, state);
+}
+
+
+static int talk_to_otherend(struct xenbus_device *dev)
+{
+	struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
+
+	free_otherend_watch(dev);
+	free_otherend_details(dev);
+
+	return drv->read_otherend_details(dev);
+}
+
+
+static int watch_otherend(struct xenbus_device *dev)
+{
+	return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
+				    "%s/%s", dev->otherend, "state");
+}
+
+
+int xenbus_dev_probe(struct device *_dev)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
+	const struct xenbus_device_id *id;
+	int err;
+
+	DPRINTK("%s", dev->nodename);
+
+	if (!drv->probe) {
+		err = -ENODEV;
+		goto fail;
+	}
+
+	id = match_device(drv->ids, dev);
+	if (!id) {
+		err = -ENODEV;
+		goto fail;
+	}
+
+	err = talk_to_otherend(dev);
+	if (err) {
+		dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n",
+			 dev->nodename);
+		return err;
+	}
+
+	err = drv->probe(dev, id);
+	if (err)
+		goto fail;
+
+	err = watch_otherend(dev);
+	if (err) {
+		dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
+		       dev->nodename);
+		return err;
+	}
+
+	return 0;
+fail:
+	xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
+	xenbus_switch_state(dev, XenbusStateClosed);
+	return -ENODEV;
+}
+
+int xenbus_dev_remove(struct device *_dev)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+	struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
+
+	DPRINTK("%s", dev->nodename);
+
+	free_otherend_watch(dev);
+	free_otherend_details(dev);
+
+	if (drv->remove)
+		drv->remove(dev);
+
+	xenbus_switch_state(dev, XenbusStateClosed);
+	return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+	struct xenbus_device *dev = to_xenbus_device(_dev);
+	unsigned long timeout = 5*HZ;
+
+	DPRINTK("%s", dev->nodename);
+
+	get_device(&dev->dev);
+	if (dev->state != XenbusStateConnected) {
+		printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__,
+		       dev->nodename, xenbus_strstate(dev->state));
+		goto out;
+	}
+	xenbus_switch_state(dev, XenbusStateClosing);
+	timeout = wait_for_completion_timeout(&dev->down, timeout);
+	if (!timeout)
+		printk(KERN_INFO "%s: %s timeout closing device\n",
+		       __func__, dev->nodename);
+ out:
+	put_device(&dev->dev);
+}
+
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+				  struct xen_bus_type *bus,
+				  struct module *owner,
+				  const char *mod_name)
+{
+	drv->driver.name = drv->name;
+	drv->driver.bus = &bus->bus;
+	drv->driver.owner = owner;
+	drv->driver.mod_name = mod_name;
+
+	return driver_register(&drv->driver);
+}
+
+int __xenbus_register_frontend(struct xenbus_driver *drv,
+			       struct module *owner, const char *mod_name)
+{
+	int ret;
+
+	drv->read_otherend_details = read_backend_details;
+
+	ret = xenbus_register_driver_common(drv, &xenbus_frontend,
+					    owner, mod_name);
+	if (ret)
+		return ret;
+
+	/* If this driver is loaded as a module wait for devices to attach. */
+	wait_for_devices(drv);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+
+void xenbus_unregister_driver(struct xenbus_driver *drv)
+{
+	driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
+
+struct xb_find_info
+{
+	struct xenbus_device *dev;
+	const char *nodename;
+};
+
+static int cmp_dev(struct device *dev, void *data)
+{
+	struct xenbus_device *xendev = to_xenbus_device(dev);
+	struct xb_find_info *info = data;
+
+	if (!strcmp(xendev->nodename, info->nodename)) {
+		info->dev = xendev;
+		get_device(dev);
+		return 1;
+	}
+	return 0;
+}
+
+struct xenbus_device *xenbus_device_find(const char *nodename,
+					 struct bus_type *bus)
+{
+	struct xb_find_info info = { .dev = NULL, .nodename = nodename };
+
+	bus_for_each_dev(bus, NULL, &info, cmp_dev);
+	return info.dev;
+}
+
+static int cleanup_dev(struct device *dev, void *data)
+{
+	struct xenbus_device *xendev = to_xenbus_device(dev);
+	struct xb_find_info *info = data;
+	int len = strlen(info->nodename);
+
+	DPRINTK("%s", info->nodename);
+
+	/* Match the info->nodename path, or any subdirectory of that path. */
+	if (strncmp(xendev->nodename, info->nodename, len))
+		return 0;
+
+	/* If the node name is longer, ensure it really is a subdirectory. */
+	if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
+		return 0;
+
+	info->dev = xendev;
+	get_device(dev);
+	return 1;
+}
+
+static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
+{
+	struct xb_find_info info = { .nodename = path };
+
+	do {
+		info.dev = NULL;
+		bus_for_each_dev(bus, NULL, &info, cleanup_dev);
+		if (info.dev) {
+			device_unregister(&info.dev->dev);
+			put_device(&info.dev->dev);
+		}
+	} while (info.dev);
+}
+
+static void xenbus_dev_release(struct device *dev)
+{
+	if (dev)
+		kfree(to_xenbus_device(dev));
+}
+
+static ssize_t xendev_show_nodename(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
+}
+DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
+
+static ssize_t xendev_show_devtype(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
+}
+DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
+
+
+int xenbus_probe_node(struct xen_bus_type *bus,
+		      const char *type,
+		      const char *nodename)
+{
+	int err;
+	struct xenbus_device *xendev;
+	size_t stringlen;
+	char *tmpstring;
+
+	enum xenbus_state state = xenbus_read_driver_state(nodename);
+
+	if (state != XenbusStateInitialising) {
+		/* Device is not new, so ignore it.  This can happen if a
+		   device is going away after switching to Closed.  */
+		return 0;
+	}
+
+	stringlen = strlen(nodename) + 1 + strlen(type) + 1;
+	xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
+	if (!xendev)
+		return -ENOMEM;
+
+	xendev->state = XenbusStateInitialising;
+
+	/* Copy the strings into the extra space. */
+
+	tmpstring = (char *)(xendev + 1);
+	strcpy(tmpstring, nodename);
+	xendev->nodename = tmpstring;
+
+	tmpstring += strlen(tmpstring) + 1;
+	strcpy(tmpstring, type);
+	xendev->devicetype = tmpstring;
+	init_completion(&xendev->down);
+
+	xendev->dev.bus = &bus->bus;
+	xendev->dev.release = xenbus_dev_release;
+
+	err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
+	if (err)
+		goto fail;
+
+	/* Register with generic device framework. */
+	err = device_register(&xendev->dev);
+	if (err)
+		goto fail;
+
+	err = device_create_file(&xendev->dev, &dev_attr_nodename);
+	if (err)
+		goto fail_unregister;
+
+	err = device_create_file(&xendev->dev, &dev_attr_devtype);
+	if (err)
+		goto fail_remove_file;
+
+	return 0;
+fail_remove_file:
+	device_remove_file(&xendev->dev, &dev_attr_nodename);
+fail_unregister:
+	device_unregister(&xendev->dev);
+fail:
+	kfree(xendev);
+	return err;
+}
+
+/* device/<typename>/<name> */
+static int xenbus_probe_frontend(const char *type, const char *name)
+{
+	char *nodename;
+	int err;
+
+	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
+			     xenbus_frontend.root, type, name);
+	if (!nodename)
+		return -ENOMEM;
+
+	DPRINTK("%s", nodename);
+
+	err = xenbus_probe_node(&xenbus_frontend, type, nodename);
+	kfree(nodename);
+	return err;
+}
+
+static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
+{
+	int err = 0;
+	char **dir;
+	unsigned int dir_n = 0;
+	int i;
+
+	dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	for (i = 0; i < dir_n; i++) {
+		err = bus->probe(type, dir[i]);
+		if (err)
+			break;
+	}
+	kfree(dir);
+	return err;
+}
+
+int xenbus_probe_devices(struct xen_bus_type *bus)
+{
+	int err = 0;
+	char **dir;
+	unsigned int i, dir_n;
+
+	dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	for (i = 0; i < dir_n; i++) {
+		err = xenbus_probe_device_type(bus, dir[i]);
+		if (err)
+			break;
+	}
+	kfree(dir);
+	return err;
+}
+
+static unsigned int char_count(const char *str, char c)
+{
+	unsigned int i, ret = 0;
+
+	for (i = 0; str[i]; i++)
+		if (str[i] == c)
+			ret++;
+	return ret;
+}
+
+static int strsep_len(const char *str, char c, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; str[i]; i++)
+		if (str[i] == c) {
+			if (len == 0)
+				return i;
+			len--;
+		}
+	return (len == 0) ? i : -ERANGE;
+}
+
+void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
+{
+	int exists, rootlen;
+	struct xenbus_device *dev;
+	char type[BUS_ID_SIZE];
+	const char *p, *root;
+
+	if (char_count(node, '/') < 2)
+		return;
+
+	exists = xenbus_exists(XBT_NIL, node, "");
+	if (!exists) {
+		xenbus_cleanup_devices(node, &bus->bus);
+		return;
+	}
+
+	/* backend/<type>/... or device/<type>/... */
+	p = strchr(node, '/') + 1;
+	snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p);
+	type[BUS_ID_SIZE-1] = '\0';
+
+	rootlen = strsep_len(node, '/', bus->levels);
+	if (rootlen < 0)
+		return;
+	root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
+	if (!root)
+		return;
+
+	dev = xenbus_device_find(root, &bus->bus);
+	if (!dev)
+		xenbus_probe_node(bus, type, root);
+	else
+		put_device(&dev->dev);
+
+	kfree(root);
+}
+
+static void frontend_changed(struct xenbus_watch *watch,
+			     const char **vec, unsigned int len)
+{
+	DPRINTK("");
+
+	xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
+}
+
+/* We watch for devices appearing and vanishing. */
+static struct xenbus_watch fe_watch = {
+	.node = "device",
+	.callback = frontend_changed,
+};
+
+static int suspend_dev(struct device *dev, void *data)
+{
+	int err = 0;
+	struct xenbus_driver *drv;
+	struct xenbus_device *xdev;
+
+	DPRINTK("");
+
+	if (dev->driver == NULL)
+		return 0;
+	drv = to_xenbus_driver(dev->driver);
+	xdev = container_of(dev, struct xenbus_device, dev);
+	if (drv->suspend)
+		err = drv->suspend(xdev);
+	if (err)
+		printk(KERN_WARNING
+		       "xenbus: suspend %s failed: %i\n", dev->bus_id, err);
+	return 0;
+}
+
+static int suspend_cancel_dev(struct device *dev, void *data)
+{
+	int err = 0;
+	struct xenbus_driver *drv;
+	struct xenbus_device *xdev;
+
+	DPRINTK("");
+
+	if (dev->driver == NULL)
+		return 0;
+	drv = to_xenbus_driver(dev->driver);
+	xdev = container_of(dev, struct xenbus_device, dev);
+	if (drv->suspend_cancel)
+		err = drv->suspend_cancel(xdev);
+	if (err)
+		printk(KERN_WARNING
+		       "xenbus: suspend_cancel %s failed: %i\n",
+		       dev->bus_id, err);
+	return 0;
+}
+
+static int resume_dev(struct device *dev, void *data)
+{
+	int err;
+	struct xenbus_driver *drv;
+	struct xenbus_device *xdev;
+
+	DPRINTK("");
+
+	if (dev->driver == NULL)
+		return 0;
+
+	drv = to_xenbus_driver(dev->driver);
+	xdev = container_of(dev, struct xenbus_device, dev);
+
+	err = talk_to_otherend(xdev);
+	if (err) {
+		printk(KERN_WARNING
+		       "xenbus: resume (talk_to_otherend) %s failed: %i\n",
+		       dev->bus_id, err);
+		return err;
+	}
+
+	xdev->state = XenbusStateInitialising;
+
+	if (drv->resume) {
+		err = drv->resume(xdev);
+		if (err) {
+			printk(KERN_WARNING
+			       "xenbus: resume %s failed: %i\n",
+			       dev->bus_id, err);
+			return err;
+		}
+	}
+
+	err = watch_otherend(xdev);
+	if (err) {
+		printk(KERN_WARNING
+		       "xenbus_probe: resume (watch_otherend) %s failed: "
+		       "%d.\n", dev->bus_id, err);
+		return err;
+	}
+
+	return 0;
+}
+
+void xenbus_suspend(void)
+{
+	DPRINTK("");
+
+	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
+	xenbus_backend_suspend(suspend_dev);
+	xs_suspend();
+}
+EXPORT_SYMBOL_GPL(xenbus_suspend);
+
+void xenbus_resume(void)
+{
+	xb_init_comms();
+	xs_resume();
+	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
+	xenbus_backend_resume(resume_dev);
+}
+EXPORT_SYMBOL_GPL(xenbus_resume);
+
+void xenbus_suspend_cancel(void)
+{
+	xs_suspend_cancel();
+	bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
+	xenbus_backend_resume(suspend_cancel_dev);
+}
+EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
+
+/* A flag to determine if xenstored is 'ready' (i.e. has started) */
+int xenstored_ready = 0;
+
+
+int register_xenstore_notifier(struct notifier_block *nb)
+{
+	int ret = 0;
+
+	if (xenstored_ready > 0)
+		ret = nb->notifier_call(nb, 0, NULL);
+	else
+		blocking_notifier_chain_register(&xenstore_chain, nb);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(register_xenstore_notifier);
+
+void unregister_xenstore_notifier(struct notifier_block *nb)
+{
+	blocking_notifier_chain_unregister(&xenstore_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
+
+void xenbus_probe(struct work_struct *unused)
+{
+	BUG_ON((xenstored_ready <= 0));
+
+	/* Enumerate devices in xenstore and watch for changes. */
+	xenbus_probe_devices(&xenbus_frontend);
+	register_xenbus_watch(&fe_watch);
+	xenbus_backend_probe_and_watch();
+
+	/* Notify others that xenstore is up */
+	blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
+}
+
+static int __init xenbus_probe_init(void)
+{
+	int err = 0;
+
+	DPRINTK("");
+
+	err = -ENODEV;
+	if (!is_running_on_xen())
+		goto out_error;
+
+	/* Register ourselves with the kernel bus subsystem */
+	err = bus_register(&xenbus_frontend.bus);
+	if (err)
+		goto out_error;
+
+	err = xenbus_backend_bus_register();
+	if (err)
+		goto out_unreg_front;
+
+	/*
+	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
+	 */
+	if (is_initial_xendomain()) {
+		/* dom0 not yet supported */
+	} else {
+		xenstored_ready = 1;
+		xen_store_evtchn = xen_start_info->store_evtchn;
+		xen_store_mfn = xen_start_info->store_mfn;
+	}
+	xen_store_interface = mfn_to_virt(xen_store_mfn);
+
+	/* Initialize the interface to xenstore. */
+	err = xs_init();
+	if (err) {
+		printk(KERN_WARNING
+		       "XENBUS: Error initializing xenstore comms: %i\n", err);
+		goto out_unreg_back;
+	}
+
+	if (!is_initial_xendomain())
+		xenbus_probe(NULL);
+
+	return 0;
+
+  out_unreg_back:
+	xenbus_backend_bus_unregister();
+
+  out_unreg_front:
+	bus_unregister(&xenbus_frontend.bus);
+
+  out_error:
+	return err;
+}
+
+postcore_initcall(xenbus_probe_init);
+
+MODULE_LICENSE("GPL");
+
+static int is_disconnected_device(struct device *dev, void *data)
+{
+	struct xenbus_device *xendev = to_xenbus_device(dev);
+	struct device_driver *drv = data;
+
+	/*
+	 * A device with no driver will never connect. We care only about
+	 * devices which should currently be in the process of connecting.
+	 */
+	if (!dev->driver)
+		return 0;
+
+	/* Is this search limited to a particular driver? */
+	if (drv && (dev->driver != drv))
+		return 0;
+
+	return (xendev->state != XenbusStateConnected);
+}
+
+static int exists_disconnected_device(struct device_driver *drv)
+{
+	return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+				is_disconnected_device);
+}
+
+static int print_device_status(struct device *dev, void *data)
+{
+	struct xenbus_device *xendev = to_xenbus_device(dev);
+	struct device_driver *drv = data;
+
+	/* Is this operation limited to a particular driver? */
+	if (drv && (dev->driver != drv))
+		return 0;
+
+	if (!dev->driver) {
+		/* Information only: is this too noisy? */
+		printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
+		       xendev->nodename);
+	} else if (xendev->state != XenbusStateConnected) {
+		printk(KERN_WARNING "XENBUS: Timeout connecting "
+		       "to device: %s (state %d)\n",
+		       xendev->nodename, xendev->state);
+	}
+
+	return 0;
+}
+
+/* We only wait for device setup after most initcalls have run. */
+static int ready_to_wait_for_devices;
+
+/*
+ * On a 10 second timeout, wait for all devices currently configured.  We need
+ * to do this to guarantee that the filesystems and / or network devices
+ * needed for boot are available, before we can allow the boot to proceed.
+ *
+ * This needs to be on a late_initcall, to happen after the frontend device
+ * drivers have been initialised, but before the root fs is mounted.
+ *
+ * A possible improvement here would be to have the tools add a per-device
+ * flag to the store entry, indicating whether it is needed at boot time.
+ * This would allow people who knew what they were doing to accelerate their
+ * boot slightly, but of course needs tools or manual intervention to set up
+ * those flags correctly.
+ */
+static void wait_for_devices(struct xenbus_driver *xendrv)
+{
+	unsigned long timeout = jiffies + 10*HZ;
+	struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+
+	if (!ready_to_wait_for_devices || !is_running_on_xen())
+		return;
+
+	while (exists_disconnected_device(drv)) {
+		if (time_after(jiffies, timeout))
+			break;
+		schedule_timeout_interruptible(HZ/10);
+	}
+
+	bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+			 print_device_status);
+}
+
+#ifndef MODULE
+static int __init boot_wait_for_devices(void)
+{
+	ready_to_wait_for_devices = 1;
+	wait_for_devices(NULL);
+	return 0;
+}
+
+late_initcall(boot_wait_for_devices);
+#endif
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
new file mode 100644
index 00000000000..e09b19415a4
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -0,0 +1,74 @@
+/******************************************************************************
+ * xenbus_probe.h
+ *
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_PROBE_H
+#define _XENBUS_PROBE_H
+
+#ifdef CONFIG_XEN_BACKEND
+extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
+extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
+extern void xenbus_backend_probe_and_watch(void);
+extern int xenbus_backend_bus_register(void);
+extern void xenbus_backend_bus_unregister(void);
+#else
+static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_probe_and_watch(void) {}
+static inline int xenbus_backend_bus_register(void) { return 0; }
+static inline void xenbus_backend_bus_unregister(void) {}
+#endif
+
+struct xen_bus_type
+{
+	char *root;
+	unsigned int levels;
+	int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+	int (*probe)(const char *type, const char *dir);
+	struct bus_type bus;
+};
+
+extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
+extern int xenbus_dev_probe(struct device *_dev);
+extern int xenbus_dev_remove(struct device *_dev);
+extern int xenbus_register_driver_common(struct xenbus_driver *drv,
+					 struct xen_bus_type *bus,
+					 struct module *owner,
+					 const char *mod_name);
+extern int xenbus_probe_node(struct xen_bus_type *bus,
+			     const char *type,
+			     const char *nodename);
+extern int xenbus_probe_devices(struct xen_bus_type *bus);
+
+extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
+
+#endif
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
new file mode 100644
index 00000000000..9e943fbce81
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -0,0 +1,861 @@
+/******************************************************************************
+ * xenbus_xs.c
+ *
+ * This is the kernel equivalent of the "xs" library.  We don't need everything
+ * and we use xenbus_comms for communication.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/unistd.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/fcntl.h>
+#include <linux/kthread.h>
+#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <xen/xenbus.h>
+#include "xenbus_comms.h"
+
+struct xs_stored_msg {
+	struct list_head list;
+
+	struct xsd_sockmsg hdr;
+
+	union {
+		/* Queued replies. */
+		struct {
+			char *body;
+		} reply;
+
+		/* Queued watch events. */
+		struct {
+			struct xenbus_watch *handle;
+			char **vec;
+			unsigned int vec_size;
+		} watch;
+	} u;
+};
+
+struct xs_handle {
+	/* A list of replies. Currently only one will ever be outstanding. */
+	struct list_head reply_list;
+	spinlock_t reply_lock;
+	wait_queue_head_t reply_waitq;
+
+	/*
+	 * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
+	 * response_mutex is never taken simultaneously with the other three.
+	 */
+
+	/* One request at a time. */
+	struct mutex request_mutex;
+
+	/* Protect xenbus reader thread against save/restore. */
+	struct mutex response_mutex;
+
+	/* Protect transactions against save/restore. */
+	struct rw_semaphore transaction_mutex;
+
+	/* Protect watch (de)register against save/restore. */
+	struct rw_semaphore watch_mutex;
+};
+
+static struct xs_handle xs_state;
+
+/* List of registered watches, and a lock to protect it. */
+static LIST_HEAD(watches);
+static DEFINE_SPINLOCK(watches_lock);
+
+/* List of pending watch callback events, and a lock to protect it. */
+static LIST_HEAD(watch_events);
+static DEFINE_SPINLOCK(watch_events_lock);
+
+/*
+ * Details of the xenwatch callback kernel thread. The thread waits on the
+ * watch_events_waitq for work to do (queued on watch_events list). When it
+ * wakes up it acquires the xenwatch_mutex before reading the list and
+ * carrying out work.
+ */
+static pid_t xenwatch_pid;
+static DEFINE_MUTEX(xenwatch_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
+
+static int get_error(const char *errorstring)
+{
+	unsigned int i;
+
+	for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
+		if (i == ARRAY_SIZE(xsd_errors) - 1) {
+			printk(KERN_WARNING
+			       "XENBUS xen store gave: unknown error %s",
+			       errorstring);
+			return EINVAL;
+		}
+	}
+	return xsd_errors[i].errnum;
+}
+
+static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
+{
+	struct xs_stored_msg *msg;
+	char *body;
+
+	spin_lock(&xs_state.reply_lock);
+
+	while (list_empty(&xs_state.reply_list)) {
+		spin_unlock(&xs_state.reply_lock);
+		/* XXX FIXME: Avoid synchronous wait for response here. */
+		wait_event(xs_state.reply_waitq,
+			   !list_empty(&xs_state.reply_list));
+		spin_lock(&xs_state.reply_lock);
+	}
+
+	msg = list_entry(xs_state.reply_list.next,
+			 struct xs_stored_msg, list);
+	list_del(&msg->list);
+
+	spin_unlock(&xs_state.reply_lock);
+
+	*type = msg->hdr.type;
+	if (len)
+		*len = msg->hdr.len;
+	body = msg->u.reply.body;
+
+	kfree(msg);
+
+	return body;
+}
+
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+{
+	void *ret;
+	struct xsd_sockmsg req_msg = *msg;
+	int err;
+
+	if (req_msg.type == XS_TRANSACTION_START)
+		down_read(&xs_state.transaction_mutex);
+
+	mutex_lock(&xs_state.request_mutex);
+
+	err = xb_write(msg, sizeof(*msg) + msg->len);
+	if (err) {
+		msg->type = XS_ERROR;
+		ret = ERR_PTR(err);
+	} else
+		ret = read_reply(&msg->type, &msg->len);
+
+	mutex_unlock(&xs_state.request_mutex);
+
+	if ((msg->type == XS_TRANSACTION_END) ||
+	    ((req_msg.type == XS_TRANSACTION_START) &&
+	     (msg->type == XS_ERROR)))
+		up_read(&xs_state.transaction_mutex);
+
+	return ret;
+}
+
+/* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
+static void *xs_talkv(struct xenbus_transaction t,
+		      enum xsd_sockmsg_type type,
+		      const struct kvec *iovec,
+		      unsigned int num_vecs,
+		      unsigned int *len)
+{
+	struct xsd_sockmsg msg;
+	void *ret = NULL;
+	unsigned int i;
+	int err;
+
+	msg.tx_id = t.id;
+	msg.req_id = 0;
+	msg.type = type;
+	msg.len = 0;
+	for (i = 0; i < num_vecs; i++)
+		msg.len += iovec[i].iov_len;
+
+	mutex_lock(&xs_state.request_mutex);
+
+	err = xb_write(&msg, sizeof(msg));
+	if (err) {
+		mutex_unlock(&xs_state.request_mutex);
+		return ERR_PTR(err);
+	}
+
+	for (i = 0; i < num_vecs; i++) {
+		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
+		if (err) {
+			mutex_unlock(&xs_state.request_mutex);
+			return ERR_PTR(err);
+		}
+	}
+
+	ret = read_reply(&msg.type, len);
+
+	mutex_unlock(&xs_state.request_mutex);
+
+	if (IS_ERR(ret))
+		return ret;
+
+	if (msg.type == XS_ERROR) {
+		err = get_error(ret);
+		kfree(ret);
+		return ERR_PTR(-err);
+	}
+
+	if (msg.type != type) {
+		if (printk_ratelimit())
+			printk(KERN_WARNING
+			       "XENBUS unexpected type [%d], expected [%d]\n",
+			       msg.type, type);
+		kfree(ret);
+		return ERR_PTR(-EINVAL);
+	}
+	return ret;
+}
+
+/* Simplified version of xs_talkv: single message. */
+static void *xs_single(struct xenbus_transaction t,
+		       enum xsd_sockmsg_type type,
+		       const char *string,
+		       unsigned int *len)
+{
+	struct kvec iovec;
+
+	iovec.iov_base = (void *)string;
+	iovec.iov_len = strlen(string) + 1;
+	return xs_talkv(t, type, &iovec, 1, len);
+}
+
+/* Many commands only need an ack, don't care what it says. */
+static int xs_error(char *reply)
+{
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+	kfree(reply);
+	return 0;
+}
+
+static unsigned int count_strings(const char *strings, unsigned int len)
+{
+	unsigned int num;
+	const char *p;
+
+	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
+		num++;
+
+	return num;
+}
+
+/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
+static char *join(const char *dir, const char *name)
+{
+	char *buffer;
+
+	if (strlen(name) == 0)
+		buffer = kasprintf(GFP_KERNEL, "%s", dir);
+	else
+		buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
+	return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
+}
+
+static char **split(char *strings, unsigned int len, unsigned int *num)
+{
+	char *p, **ret;
+
+	/* Count the strings. */
+	*num = count_strings(strings, len);
+
+	/* Transfer to one big alloc for easy freeing. */
+	ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL);
+	if (!ret) {
+		kfree(strings);
+		return ERR_PTR(-ENOMEM);
+	}
+	memcpy(&ret[*num], strings, len);
+	kfree(strings);
+
+	strings = (char *)&ret[*num];
+	for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
+		ret[(*num)++] = p;
+
+	return ret;
+}
+
+char **xenbus_directory(struct xenbus_transaction t,
+			const char *dir, const char *node, unsigned int *num)
+{
+	char *strings, *path;
+	unsigned int len;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return (char **)path;
+
+	strings = xs_single(t, XS_DIRECTORY, path, &len);
+	kfree(path);
+	if (IS_ERR(strings))
+		return (char **)strings;
+
+	return split(strings, len, num);
+}
+EXPORT_SYMBOL_GPL(xenbus_directory);
+
+/* Check if a path exists. Return 1 if it does. */
+int xenbus_exists(struct xenbus_transaction t,
+		  const char *dir, const char *node)
+{
+	char **d;
+	int dir_n;
+
+	d = xenbus_directory(t, dir, node, &dir_n);
+	if (IS_ERR(d))
+		return 0;
+	kfree(d);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(xenbus_exists);
+
+/* Get the value of a single file.
+ * Returns a kmalloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xenbus_read(struct xenbus_transaction t,
+		  const char *dir, const char *node, unsigned int *len)
+{
+	char *path;
+	void *ret;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return (void *)path;
+
+	ret = xs_single(t, XS_READ, path, len);
+	kfree(path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_read);
+
+/* Write the value of a single file.
+ * Returns -err on failure.
+ */
+int xenbus_write(struct xenbus_transaction t,
+		 const char *dir, const char *node, const char *string)
+{
+	const char *path;
+	struct kvec iovec[2];
+	int ret;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	iovec[0].iov_base = (void *)path;
+	iovec[0].iov_len = strlen(path) + 1;
+	iovec[1].iov_base = (void *)string;
+	iovec[1].iov_len = strlen(string);
+
+	ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+	kfree(path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_write);
+
+/* Create a new directory. */
+int xenbus_mkdir(struct xenbus_transaction t,
+		 const char *dir, const char *node)
+{
+	char *path;
+	int ret;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
+	kfree(path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_mkdir);
+
+/* Destroy a file or directory (directories must be empty). */
+int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
+{
+	char *path;
+	int ret;
+
+	path = join(dir, node);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	ret = xs_error(xs_single(t, XS_RM, path, NULL));
+	kfree(path);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_rm);
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ */
+int xenbus_transaction_start(struct xenbus_transaction *t)
+{
+	char *id_str;
+
+	down_read(&xs_state.transaction_mutex);
+
+	id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
+	if (IS_ERR(id_str)) {
+		up_read(&xs_state.transaction_mutex);
+		return PTR_ERR(id_str);
+	}
+
+	t->id = simple_strtoul(id_str, NULL, 0);
+	kfree(id_str);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_transaction_start);
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ */
+int xenbus_transaction_end(struct xenbus_transaction t, int abort)
+{
+	char abortstr[2];
+	int err;
+
+	if (abort)
+		strcpy(abortstr, "F");
+	else
+		strcpy(abortstr, "T");
+
+	err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
+
+	up_read(&xs_state.transaction_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_transaction_end);
+
+/* Single read and scanf: returns -errno or num scanned. */
+int xenbus_scanf(struct xenbus_transaction t,
+		 const char *dir, const char *node, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+	char *val;
+
+	val = xenbus_read(t, dir, node, NULL);
+	if (IS_ERR(val))
+		return PTR_ERR(val);
+
+	va_start(ap, fmt);
+	ret = vsscanf(val, fmt, ap);
+	va_end(ap);
+	kfree(val);
+	/* Distinctive errno. */
+	if (ret == 0)
+		return -ERANGE;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_scanf);
+
+/* Single printf and write: returns -errno or 0. */
+int xenbus_printf(struct xenbus_transaction t,
+		  const char *dir, const char *node, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+#define PRINTF_BUFFER_SIZE 4096
+	char *printf_buffer;
+
+	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+	if (printf_buffer == NULL)
+		return -ENOMEM;
+
+	va_start(ap, fmt);
+	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
+	va_end(ap);
+
+	BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
+	ret = xenbus_write(t, dir, node, printf_buffer);
+
+	kfree(printf_buffer);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_printf);
+
+/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
+int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
+{
+	va_list ap;
+	const char *name;
+	int ret = 0;
+
+	va_start(ap, dir);
+	while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
+		const char *fmt = va_arg(ap, char *);
+		void *result = va_arg(ap, void *);
+		char *p;
+
+		p = xenbus_read(t, dir, name, NULL);
+		if (IS_ERR(p)) {
+			ret = PTR_ERR(p);
+			break;
+		}
+		if (fmt) {
+			if (sscanf(p, fmt, result) == 0)
+				ret = -EINVAL;
+			kfree(p);
+		} else
+			*(char **)result = p;
+	}
+	va_end(ap);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_gather);
+
+static int xs_watch(const char *path, const char *token)
+{
+	struct kvec iov[2];
+
+	iov[0].iov_base = (void *)path;
+	iov[0].iov_len = strlen(path) + 1;
+	iov[1].iov_base = (void *)token;
+	iov[1].iov_len = strlen(token) + 1;
+
+	return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
+				 ARRAY_SIZE(iov), NULL));
+}
+
+static int xs_unwatch(const char *path, const char *token)
+{
+	struct kvec iov[2];
+
+	iov[0].iov_base = (char *)path;
+	iov[0].iov_len = strlen(path) + 1;
+	iov[1].iov_base = (char *)token;
+	iov[1].iov_len = strlen(token) + 1;
+
+	return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
+				 ARRAY_SIZE(iov), NULL));
+}
+
+static struct xenbus_watch *find_watch(const char *token)
+{
+	struct xenbus_watch *i, *cmp;
+
+	cmp = (void *)simple_strtoul(token, NULL, 16);
+
+	list_for_each_entry(i, &watches, list)
+		if (i == cmp)
+			return i;
+
+	return NULL;
+}
+
+/* Register callback to watch this node. */
+int register_xenbus_watch(struct xenbus_watch *watch)
+{
+	/* Pointer in ascii is the token. */
+	char token[sizeof(watch) * 2 + 1];
+	int err;
+
+	sprintf(token, "%lX", (long)watch);
+
+	down_read(&xs_state.watch_mutex);
+
+	spin_lock(&watches_lock);
+	BUG_ON(find_watch(token));
+	list_add(&watch->list, &watches);
+	spin_unlock(&watches_lock);
+
+	err = xs_watch(watch->node, token);
+
+	/* Ignore errors due to multiple registration. */
+	if ((err != 0) && (err != -EEXIST)) {
+		spin_lock(&watches_lock);
+		list_del(&watch->list);
+		spin_unlock(&watches_lock);
+	}
+
+	up_read(&xs_state.watch_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(register_xenbus_watch);
+
+void unregister_xenbus_watch(struct xenbus_watch *watch)
+{
+	struct xs_stored_msg *msg, *tmp;
+	char token[sizeof(watch) * 2 + 1];
+	int err;
+
+	sprintf(token, "%lX", (long)watch);
+
+	down_read(&xs_state.watch_mutex);
+
+	spin_lock(&watches_lock);
+	BUG_ON(!find_watch(token));
+	list_del(&watch->list);
+	spin_unlock(&watches_lock);
+
+	err = xs_unwatch(watch->node, token);
+	if (err)
+		printk(KERN_WARNING
+		       "XENBUS Failed to release watch %s: %i\n",
+		       watch->node, err);
+
+	up_read(&xs_state.watch_mutex);
+
+	/* Make sure there are no callbacks running currently (unless
+	   its us) */
+	if (current->pid != xenwatch_pid)
+		mutex_lock(&xenwatch_mutex);
+
+	/* Cancel pending watch events. */
+	spin_lock(&watch_events_lock);
+	list_for_each_entry_safe(msg, tmp, &watch_events, list) {
+		if (msg->u.watch.handle != watch)
+			continue;
+		list_del(&msg->list);
+		kfree(msg->u.watch.vec);
+		kfree(msg);
+	}
+	spin_unlock(&watch_events_lock);
+
+	if (current->pid != xenwatch_pid)
+		mutex_unlock(&xenwatch_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
+
+void xs_suspend(void)
+{
+	down_write(&xs_state.transaction_mutex);
+	down_write(&xs_state.watch_mutex);
+	mutex_lock(&xs_state.request_mutex);
+	mutex_lock(&xs_state.response_mutex);
+}
+
+void xs_resume(void)
+{
+	struct xenbus_watch *watch;
+	char token[sizeof(watch) * 2 + 1];
+
+	mutex_unlock(&xs_state.response_mutex);
+	mutex_unlock(&xs_state.request_mutex);
+	up_write(&xs_state.transaction_mutex);
+
+	/* No need for watches_lock: the watch_mutex is sufficient. */
+	list_for_each_entry(watch, &watches, list) {
+		sprintf(token, "%lX", (long)watch);
+		xs_watch(watch->node, token);
+	}
+
+	up_write(&xs_state.watch_mutex);
+}
+
+void xs_suspend_cancel(void)
+{
+	mutex_unlock(&xs_state.response_mutex);
+	mutex_unlock(&xs_state.request_mutex);
+	up_write(&xs_state.watch_mutex);
+	up_write(&xs_state.transaction_mutex);
+}
+
+static int xenwatch_thread(void *unused)
+{
+	struct list_head *ent;
+	struct xs_stored_msg *msg;
+
+	for (;;) {
+		wait_event_interruptible(watch_events_waitq,
+					 !list_empty(&watch_events));
+
+		if (kthread_should_stop())
+			break;
+
+		mutex_lock(&xenwatch_mutex);
+
+		spin_lock(&watch_events_lock);
+		ent = watch_events.next;
+		if (ent != &watch_events)
+			list_del(ent);
+		spin_unlock(&watch_events_lock);
+
+		if (ent != &watch_events) {
+			msg = list_entry(ent, struct xs_stored_msg, list);
+			msg->u.watch.handle->callback(
+				msg->u.watch.handle,
+				(const char **)msg->u.watch.vec,
+				msg->u.watch.vec_size);
+			kfree(msg->u.watch.vec);
+			kfree(msg);
+		}
+
+		mutex_unlock(&xenwatch_mutex);
+	}
+
+	return 0;
+}
+
+static int process_msg(void)
+{
+	struct xs_stored_msg *msg;
+	char *body;
+	int err;
+
+	/*
+	 * We must disallow save/restore while reading a xenstore message.
+	 * A partial read across s/r leaves us out of sync with xenstored.
+	 */
+	for (;;) {
+		err = xb_wait_for_data_to_read();
+		if (err)
+			return err;
+		mutex_lock(&xs_state.response_mutex);
+		if (xb_data_to_read())
+			break;
+		/* We raced with save/restore: pending data 'disappeared'. */
+		mutex_unlock(&xs_state.response_mutex);
+	}
+
+
+	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+	if (msg == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = xb_read(&msg->hdr, sizeof(msg->hdr));
+	if (err) {
+		kfree(msg);
+		goto out;
+	}
+
+	body = kmalloc(msg->hdr.len + 1, GFP_KERNEL);
+	if (body == NULL) {
+		kfree(msg);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = xb_read(body, msg->hdr.len);
+	if (err) {
+		kfree(body);
+		kfree(msg);
+		goto out;
+	}
+	body[msg->hdr.len] = '\0';
+
+	if (msg->hdr.type == XS_WATCH_EVENT) {
+		msg->u.watch.vec = split(body, msg->hdr.len,
+					 &msg->u.watch.vec_size);
+		if (IS_ERR(msg->u.watch.vec)) {
+			kfree(msg);
+			err = PTR_ERR(msg->u.watch.vec);
+			goto out;
+		}
+
+		spin_lock(&watches_lock);
+		msg->u.watch.handle = find_watch(
+			msg->u.watch.vec[XS_WATCH_TOKEN]);
+		if (msg->u.watch.handle != NULL) {
+			spin_lock(&watch_events_lock);
+			list_add_tail(&msg->list, &watch_events);
+			wake_up(&watch_events_waitq);
+			spin_unlock(&watch_events_lock);
+		} else {
+			kfree(msg->u.watch.vec);
+			kfree(msg);
+		}
+		spin_unlock(&watches_lock);
+	} else {
+		msg->u.reply.body = body;
+		spin_lock(&xs_state.reply_lock);
+		list_add_tail(&msg->list, &xs_state.reply_list);
+		spin_unlock(&xs_state.reply_lock);
+		wake_up(&xs_state.reply_waitq);
+	}
+
+ out:
+	mutex_unlock(&xs_state.response_mutex);
+	return err;
+}
+
+static int xenbus_thread(void *unused)
+{
+	int err;
+
+	for (;;) {
+		err = process_msg();
+		if (err)
+			printk(KERN_WARNING "XENBUS error %d while reading "
+			       "message\n", err);
+		if (kthread_should_stop())
+			break;
+	}
+
+	return 0;
+}
+
+int xs_init(void)
+{
+	int err;
+	struct task_struct *task;
+
+	INIT_LIST_HEAD(&xs_state.reply_list);
+	spin_lock_init(&xs_state.reply_lock);
+	init_waitqueue_head(&xs_state.reply_waitq);
+
+	mutex_init(&xs_state.request_mutex);
+	mutex_init(&xs_state.response_mutex);
+	init_rwsem(&xs_state.transaction_mutex);
+	init_rwsem(&xs_state.watch_mutex);
+
+	/* Initialize the shared memory rings to talk to xenstored */
+	err = xb_init_comms();
+	if (err)
+		return err;
+
+	task = kthread_run(xenwatch_thread, NULL, "xenwatch");
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	xenwatch_pid = task->pid;
+
+	task = kthread_run(xenbus_thread, NULL, "xenbus");
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+
+	return 0;
+}