aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/Makefile2
-rw-r--r--drivers/acpi/thermal.c24
-rw-r--r--drivers/atm/Kconfig2
-rw-r--r--drivers/atm/eni.c19
-rw-r--r--drivers/atm/firestream.c14
-rw-r--r--drivers/atm/idt77252.c6
-rw-r--r--drivers/atm/lanai.c4
-rw-r--r--drivers/atm/nicstarmac.c2
-rw-r--r--drivers/block/Kconfig9
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/sunvdc.c97
-rw-r--r--drivers/block/xen-blkfront.c988
-rw-r--r--drivers/char/Kconfig8
-rw-r--r--drivers/char/Makefile1
-rw-r--r--drivers/char/hvc_xen.c159
-rw-r--r--drivers/macintosh/therm_pm72.c3
-rw-r--r--drivers/macintosh/windfarm_core.c3
-rw-r--r--drivers/mtd/ubi/build.c25
-rw-r--r--drivers/mtd/ubi/cdev.c49
-rw-r--r--drivers/mtd/ubi/debug.c44
-rw-r--r--drivers/mtd/ubi/debug.h2
-rw-r--r--drivers/mtd/ubi/eba.c98
-rw-r--r--drivers/mtd/ubi/gluebi.c27
-rw-r--r--drivers/mtd/ubi/io.c65
-rw-r--r--drivers/mtd/ubi/kapi.c19
-rw-r--r--drivers/mtd/ubi/misc.c4
-rw-r--r--drivers/mtd/ubi/scan.c127
-rw-r--r--drivers/mtd/ubi/scan.h2
-rw-r--r--drivers/mtd/ubi/ubi.h3
-rw-r--r--drivers/mtd/ubi/upd.c4
-rw-r--r--drivers/mtd/ubi/vmt.c53
-rw-r--r--drivers/mtd/ubi/vtbl.c85
-rw-r--r--drivers/mtd/ubi/wl.c93
-rw-r--r--drivers/net/Kconfig12
-rw-r--r--drivers/net/Makefile2
-rw-r--r--drivers/net/bnx2.c103
-rw-r--r--drivers/net/bnx2.h10
-rw-r--r--drivers/net/hamradio/baycom_epp.c2
-rw-r--r--drivers/net/pppol2tp.c18
-rw-r--r--drivers/net/sunvnet.c260
-rw-r--r--drivers/net/sunvnet.h4
-rw-r--r--drivers/net/xen-netfront.c1863
-rw-r--r--drivers/pnp/pnpbios/core.c2
-rw-r--r--drivers/sbus/char/bbc_envctrl.c5
-rw-r--r--drivers/sbus/char/envctrl.c7
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/grant-table.c582
-rw-r--r--drivers/xen/xenbus/Makefile7
-rw-r--r--drivers/xen/xenbus/xenbus_client.c569
-rw-r--r--drivers/xen/xenbus/xenbus_comms.c233
-rw-r--r--drivers/xen/xenbus/xenbus_comms.h46
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c935
-rw-r--r--drivers/xen/xenbus/xenbus_probe.h74
-rw-r--r--drivers/xen/xenbus/xenbus_xs.c861
54 files changed, 6992 insertions, 647 deletions
diff --git a/drivers/Makefile b/drivers/Makefile
index 503d8256944..6d9d7fab77f 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -15,6 +15,8 @@ obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_PNP) += pnp/
obj-$(CONFIG_ARM_AMBA) += amba/
+obj-$(CONFIG_XEN) += xen/
+
# char/ comes before serial/ etc so that the VT console is the boot-time
# default.
obj-y += char/
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 88a6fc7fd27..58f1338981b 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -40,6 +40,7 @@
#include <linux/jiffies.h>
#include <linux/kmod.h>
#include <linux/seq_file.h>
+#include <linux/reboot.h>
#include <asm/uaccess.h>
#include <acpi/acpi_bus.h>
@@ -59,7 +60,6 @@
#define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0
#define ACPI_THERMAL_NOTIFY_HOT 0xF1
#define ACPI_THERMAL_MODE_ACTIVE 0x00
-#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff"
#define ACPI_THERMAL_MAX_ACTIVE 10
#define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
@@ -419,26 +419,6 @@ static int acpi_thermal_get_devices(struct acpi_thermal *tz)
return 0;
}
-static int acpi_thermal_call_usermode(char *path)
-{
- char *argv[2] = { NULL, NULL };
- char *envp[3] = { NULL, NULL, NULL };
-
-
- if (!path)
- return -EINVAL;
-
- argv[0] = path;
-
- /* minimal command environment */
- envp[0] = "HOME=/";
- envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-
- call_usermodehelper(argv[0], argv, envp, 0);
-
- return 0;
-}
-
static int acpi_thermal_critical(struct acpi_thermal *tz)
{
if (!tz || !tz->trips.critical.flags.valid)
@@ -456,7 +436,7 @@ static int acpi_thermal_critical(struct acpi_thermal *tz)
acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL,
tz->trips.critical.flags.enabled);
- acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF);
+ orderly_poweroff(true);
return 0;
}
diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig
index bb4ae628149..bed9f58c2d5 100644
--- a/drivers/atm/Kconfig
+++ b/drivers/atm/Kconfig
@@ -172,7 +172,7 @@ config ATM_ZATM_DEBUG
config ATM_NICSTAR
tristate "IDT 77201 (NICStAR) (ForeRunnerLE)"
- depends on PCI && !64BIT
+ depends on PCI && !64BIT && VIRT_TO_BUS
help
The NICStAR chipset family is used in a large number of ATM NICs for
25 and for 155 Mbps, including IDT cards and the Fore ForeRunnerLE
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 77637e780d4..41b2204ebc6 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -1738,7 +1738,8 @@ static int __devinit eni_do_init(struct atm_dev *dev)
printk(KERN_ERR KERN_ERR DEV_LABEL "(itf %d): bad "
"magic - expected 0x%x, got 0x%x\n",dev->number,
ENI155_MAGIC,(unsigned) readl(&eprom->magic));
- return -EINVAL;
+ error = -EINVAL;
+ goto unmap;
}
}
eni_dev->phy = base+PHY_BASE;
@@ -1765,17 +1766,27 @@ static int __devinit eni_do_init(struct atm_dev *dev)
printk(")\n");
printk(KERN_ERR DEV_LABEL "(itf %d): ERROR - wrong id 0x%x\n",
dev->number,(unsigned) eni_in(MID_RES_ID_MCON));
- return -EINVAL;
+ error = -EINVAL;
+ goto unmap;
}
error = eni_dev->asic ? get_esi_asic(dev) : get_esi_fpga(dev,base);
- if (error) return error;
+ if (error)
+ goto unmap;
for (i = 0; i < ESI_LEN; i++)
printk("%s%02X",i ? "-" : "",dev->esi[i]);
printk(")\n");
printk(KERN_NOTICE DEV_LABEL "(itf %d): %s,%s\n",dev->number,
eni_in(MID_RES_ID_MCON) & 0x200 ? "ASIC" : "FPGA",
media_name[eni_in(MID_RES_ID_MCON) & DAUGTHER_ID]);
- return suni_init(dev);
+
+ error = suni_init(dev);
+ if (error)
+ goto unmap;
+out:
+ return error;
+unmap:
+ iounmap(base);
+ goto out;
}
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 38b688f9f6a..737cea49f87 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1710,7 +1710,7 @@ static int __devinit fs_init (struct fs_dev *dev)
/* This bit is documented as "RESERVED" */
if (isr & ISR_INIT_ERR) {
printk (KERN_ERR "Error initializing the FS... \n");
- return 1;
+ goto unmap;
}
if (isr & ISR_INIT) {
fs_dprintk (FS_DEBUG_INIT, "Ha! Initialized OK!\n");
@@ -1723,7 +1723,7 @@ static int __devinit fs_init (struct fs_dev *dev)
if (!to) {
printk (KERN_ERR "timeout initializing the FS... \n");
- return 1;
+ goto unmap;
}
/* XXX fix for fs155 */
@@ -1803,7 +1803,7 @@ static int __devinit fs_init (struct fs_dev *dev)
if (!dev->atm_vccs) {
printk (KERN_WARNING "Couldn't allocate memory for VCC buffers. Woops!\n");
/* XXX Clean up..... */
- return 1;
+ goto unmap;
}
dev->tx_inuse = kzalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL);
@@ -1813,7 +1813,7 @@ static int __devinit fs_init (struct fs_dev *dev)
if (!dev->tx_inuse) {
printk (KERN_WARNING "Couldn't allocate memory for tx_inuse bits!\n");
/* XXX Clean up..... */
- return 1;
+ goto unmap;
}
/* -- RAS1 : FS155 and 50 differ. Default (0) should be OK for both */
/* -- RAS2 : FS50 only: Default is OK. */
@@ -1840,7 +1840,7 @@ static int __devinit fs_init (struct fs_dev *dev)
if (request_irq (dev->irq, fs_irq, IRQF_SHARED, "firestream", dev)) {
printk (KERN_WARNING "couldn't get irq %d for firestream.\n", pci_dev->irq);
/* XXX undo all previous stuff... */
- return 1;
+ goto unmap;
}
fs_dprintk (FS_DEBUG_INIT, "Grabbed irq %d for dev at %p.\n", dev->irq, dev);
@@ -1890,6 +1890,9 @@ static int __devinit fs_init (struct fs_dev *dev)
func_exit ();
return 0;
+unmap:
+ iounmap(dev->base);
+ return 1;
}
static int __devinit firestream_init_one (struct pci_dev *pci_dev,
@@ -2012,6 +2015,7 @@ static void __devexit firestream_remove_one (struct pci_dev *pdev)
for (i=0;i < FS_NR_RX_QUEUES;i++)
free_queue (dev, &dev->rx_rq[i]);
+ iounmap(dev->base);
fs_dprintk (FS_DEBUG_ALLOC, "Free fs-dev: %p\n", dev);
nxtdev = dev->next;
kfree (dev);
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 8f995ce8d73..f8b1700f4c1 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -65,7 +65,7 @@ static char const rcsid[] =
static unsigned int vpibits = 1;
-#define CONFIG_ATM_IDT77252_SEND_IDLE 1
+#define ATM_IDT77252_SEND_IDLE 1
/*
@@ -3404,7 +3404,7 @@ init_card(struct atm_dev *dev)
conf = SAR_CFG_TX_FIFO_SIZE_9 | /* Use maximum fifo size */
SAR_CFG_RXSTQ_SIZE_8k | /* Receive Status Queue is 8k */
SAR_CFG_IDLE_CLP | /* Set CLP on idle cells */
-#ifndef CONFIG_ATM_IDT77252_SEND_IDLE
+#ifndef ATM_IDT77252_SEND_IDLE
SAR_CFG_NO_IDLE | /* Do not send idle cells */
#endif
0;
@@ -3541,7 +3541,7 @@ init_card(struct atm_dev *dev)
printk("%s: Linkrate on ATM line : %u bit/s, %u cell/s.\n",
card->name, linkrate, card->link_pcr);
-#ifdef CONFIG_ATM_IDT77252_SEND_IDLE
+#ifdef ATM_IDT77252_SEND_IDLE
card->utopia_pcr = card->link_pcr;
#else
card->utopia_pcr = (160000000 / 8 / 54);
diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c
index 0e2c1ae650e..55fd1b4543f 100644
--- a/drivers/atm/lanai.c
+++ b/drivers/atm/lanai.c
@@ -552,8 +552,8 @@ static inline void sram_write(const struct lanai_dev *lanai,
writel(val, sram_addr(lanai, offset));
}
-static int __init sram_test_word(
- const struct lanai_dev *lanai, int offset, u32 pattern)
+static int __devinit sram_test_word(const struct lanai_dev *lanai,
+ int offset, u32 pattern)
{
u32 readback;
sram_write(lanai, pattern, offset);
diff --git a/drivers/atm/nicstarmac.c b/drivers/atm/nicstarmac.c
index 480947f4e01..842e26c4555 100644
--- a/drivers/atm/nicstarmac.c
+++ b/drivers/atm/nicstarmac.c
@@ -134,7 +134,7 @@ nicstar_read_eprom_status( virt_addr_t base )
/* Send read instruction */
val = NICSTAR_REG_READ( base, NICSTAR_REG_GENERAL_PURPOSE ) & 0xFFFFFFF0;
- for (i=0; i<sizeof rdsrtab/sizeof rdsrtab[0]; i++)
+ for (i=0; i<ARRAY_SIZE(rdsrtab); i++)
{
NICSTAR_REG_WRITE( base, NICSTAR_REG_GENERAL_PURPOSE,
(val | rdsrtab[i]) );
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 8f65b88cf71..a4a31199240 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -427,4 +427,13 @@ config XILINX_SYSACE
help
Include support for the Xilinx SystemACE CompactFlash interface
+config XEN_BLKDEV_FRONTEND
+ tristate "Xen virtual block device support"
+ depends on XEN
+ default y
+ help
+ This driver implements the front-end of the Xen virtual
+ block device driver. It communicates with a back-end driver
+ in another domain which drives the actual block device.
+
endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 9ee08ab4ffa..3e31532df0e 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_VIODASD) += viodasd.o
obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
obj-$(CONFIG_BLK_DEV_UB) += ub.o
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 0f5e3caf85d..2288b55d916 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -45,8 +45,6 @@ struct vdc_req_entry {
struct vdc_port {
struct vio_driver_state vio;
- struct vdc *vp;
-
struct gendisk *disk;
struct vdc_completion *cmp;
@@ -72,8 +70,6 @@ struct vdc_port {
struct vio_disk_geom geom;
struct vio_disk_vtoc label;
-
- struct list_head list;
};
static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
@@ -81,15 +77,6 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
return container_of(vio, struct vdc_port, vio);
}
-struct vdc {
- /* Protects prot_list. */
- spinlock_t lock;
-
- struct vio_dev *dev;
-
- struct list_head port_list;
-};
-
/* Ordered from largest major to lowest */
static struct vio_version vdc_versions[] = {
{ .major = 1, .minor = 0 },
@@ -747,21 +734,23 @@ static struct vio_driver_ops vdc_vio_ops = {
.handshake_complete = vdc_handshake_complete,
};
+static void print_version(void)
+{
+ static int version_printed;
+
+ if (version_printed++ == 0)
+ printk(KERN_INFO "%s", version);
+}
+
static int __devinit vdc_port_probe(struct vio_dev *vdev,
const struct vio_device_id *id)
{
struct mdesc_handle *hp;
struct vdc_port *port;
- unsigned long flags;
- struct vdc *vp;
const u64 *port_id;
int err;
- vp = dev_get_drvdata(vdev->dev.parent);
- if (!vp) {
- printk(KERN_ERR PFX "Cannot find port parent vdc.\n");
- return -ENODEV;
- }
+ print_version();
hp = mdesc_grab();
@@ -783,7 +772,6 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev,
goto err_out_release_mdesc;
}
- port->vp = vp;
port->dev_no = *port_id;
if (port->dev_no >= 26)
@@ -818,12 +806,6 @@ static int __devinit vdc_port_probe(struct vio_dev *vdev,
if (err)
goto err_out_free_tx_ring;
- INIT_LIST_HEAD(&port->list);
-
- spin_lock_irqsave(&vp->lock, flags);
- list_add(&port->list, &vp->port_list);
- spin_unlock_irqrestore(&vp->lock, flags);
-
dev_set_drvdata(&vdev->dev, port);
mdesc_release(hp);
@@ -879,58 +861,6 @@ static struct vio_driver vdc_port_driver = {
}
};
-static int __devinit vdc_probe(struct vio_dev *vdev,
- const struct vio_device_id *id)
-{
- static int vdc_version_printed;
- struct vdc *vp;
-
- if (vdc_version_printed++ == 0)
- printk(KERN_INFO "%s", version);
-
- vp = kzalloc(sizeof(struct vdc), GFP_KERNEL);
- if (!vp)
- return -ENOMEM;
-
- spin_lock_init(&vp->lock);
- vp->dev = vdev;
- INIT_LIST_HEAD(&vp->port_list);
-
- dev_set_drvdata(&vdev->dev, vp);
-
- return 0;
-}
-
-static int vdc_remove(struct vio_dev *vdev)
-{
-
- struct vdc *vp = dev_get_drvdata(&vdev->dev);
-
- if (vp) {
- kfree(vp);
- dev_set_drvdata(&vdev->dev, NULL);
- }
- return 0;
-}
-
-static struct vio_device_id vdc_match[] = {
- {
- .type = "block",
- },
- {},
-};
-MODULE_DEVICE_TABLE(vio, vdc_match);
-
-static struct vio_driver vdc_driver = {
- .id_table = vdc_match,
- .probe = vdc_probe,
- .remove = vdc_remove,
- .driver = {
- .name = "vdc",
- .owner = THIS_MODULE,
- }
-};
-
static int __init vdc_init(void)
{
int err;
@@ -940,19 +870,13 @@ static int __init vdc_init(void)
goto out_err;
vdc_major = err;
- err = vio_register_driver(&vdc_driver);
- if (err)
- goto out_unregister_blkdev;
err = vio_register_driver(&vdc_port_driver);
if (err)
- goto out_unregister_vdc;
+ goto out_unregister_blkdev;
return 0;
-out_unregister_vdc:
- vio_unregister_driver(&vdc_driver);
-
out_unregister_blkdev:
unregister_blkdev(vdc_major, VDCBLK_NAME);
vdc_major = 0;
@@ -964,7 +888,6 @@ out_err:
static void __exit vdc_exit(void)
{
vio_unregister_driver(&vdc_port_driver);
- vio_unregister_driver(&vdc_driver);
unregister_blkdev(vdc_major, VDCBLK_NAME);
}
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
new file mode 100644
index 00000000000..6746c29181f
--- /dev/null
+++ b/drivers/block/xen-blkfront.c
@@ -0,0 +1,988 @@
+/*
+ * blkfront.c
+ *
+ * XenLinux virtual block device driver.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ * Copyright (c) 2004, Andrew Warfield
+ * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2005, XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/blkdev.h>
+#include <linux/module.h>
+
+#include <xen/xenbus.h>
+#include <xen/grant_table.h>
+#include <xen/events.h>
+#include <xen/page.h>
+
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/blkif.h>
+
+#include <asm/xen/hypervisor.h>
+
+enum blkif_state {
+ BLKIF_STATE_DISCONNECTED,
+ BLKIF_STATE_CONNECTED,
+ BLKIF_STATE_SUSPENDED,
+};
+
+struct blk_shadow {
+ struct blkif_request req;
+ unsigned long request;
+ unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+static struct block_device_operations xlvbd_block_fops;
+
+#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'. They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct blkfront_info
+{
+ struct xenbus_device *xbdev;
+ dev_t dev;
+ struct gendisk *gd;
+ int vdevice;
+ blkif_vdev_t handle;
+ enum blkif_state connected;
+ int ring_ref;
+ struct blkif_front_ring ring;
+ unsigned int evtchn, irq;
+ struct request_queue *rq;
+ struct work_struct work;
+ struct gnttab_free_callback callback;
+ struct blk_shadow shadow[BLK_RING_SIZE];
+ unsigned long shadow_free;
+ int feature_barrier;
+
+ /**
+ * The number of people holding this device open. We won't allow a
+ * hot-unplug unless this is 0.
+ */
+ int users;
+};
+
+static DEFINE_SPINLOCK(blkif_io_lock);
+
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
+#define GRANT_INVALID_REF 0
+
+#define PARTS_PER_DISK 16
+
+#define BLKIF_MAJOR(dev) ((dev)>>8)
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
+
+#define DEV_NAME "xvd" /* name in /dev */
+
+/* Information about our VBDs. */
+#define MAX_VBDS 64
+static LIST_HEAD(vbds_list);
+
+static int get_id_from_freelist(struct blkfront_info *info)
+{
+ unsigned long free = info->shadow_free;
+ BUG_ON(free > BLK_RING_SIZE);
+ info->shadow_free = info->shadow[free].req.id;
+ info->shadow[free].req.id = 0x0fffffee; /* debug */
+ return free;
+}
+
+static void add_id_to_freelist(struct blkfront_info *info,
+ unsigned long id)
+{
+ info->shadow[id].req.id = info->shadow_free;
+ info->shadow[id].request = 0;
+ info->shadow_free = id;
+}
+
+static void blkif_restart_queue_callback(void *arg)
+{
+ struct blkfront_info *info = (struct blkfront_info *)arg;
+ schedule_work(&info->work);
+}
+
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ * virtual address in the guest os.
+ */
+static int blkif_queue_request(struct request *req)
+{
+ struct blkfront_info *info = req->rq_disk->private_data;
+ unsigned long buffer_mfn;
+ struct blkif_request *ring_req;
+ struct bio *bio;
+ struct bio_vec *bvec;
+ int idx;
+ unsigned long id;
+ unsigned int fsect, lsect;
+ int ref;
+ grant_ref_t gref_head;
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+ return 1;
+
+ if (gnttab_alloc_grant_references(
+ BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+ gnttab_request_free_callback(
+ &info->callback,
+ blkif_restart_queue_callback,
+ info,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ return 1;
+ }
+
+ /* Fill out a communications ring structure. */
+ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+ id = get_id_from_freelist(info);
+ info->shadow[id].request = (unsigned long)req;
+
+ ring_req->id = id;
+ ring_req->sector_number = (blkif_sector_t)req->sector;
+ ring_req->handle = info->handle;
+
+ ring_req->operation = rq_data_dir(req) ?
+ BLKIF_OP_WRITE : BLKIF_OP_READ;
+ if (blk_barrier_rq(req))
+ ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+
+ ring_req->nr_segments = 0;
+ rq_for_each_bio (bio, req) {
+ bio_for_each_segment (bvec, bio, idx) {
+ BUG_ON(ring_req->nr_segments
+ == BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
+ fsect = bvec->bv_offset >> 9;
+ lsect = fsect + (bvec->bv_len >> 9) - 1;
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&gref_head);
+ BUG_ON(ref == -ENOSPC);
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ info->xbdev->otherend_id,
+ buffer_mfn,
+ rq_data_dir(req) );
+
+ info->shadow[id].frame[ring_req->nr_segments] =
+ mfn_to_pfn(buffer_mfn);
+
+ ring_req->seg[ring_req->nr_segments] =
+ (struct blkif_request_segment) {
+ .gref = ref,
+ .first_sect = fsect,
+ .last_sect = lsect };
+
+ ring_req->nr_segments++;
+ }
+ }
+
+ info->ring.req_prod_pvt++;
+
+ /* Keep a private copy so we can reissue requests when recovering. */
+ info->shadow[id].req = *ring_req;
+
+ gnttab_free_grant_references(gref_head);
+
+ return 0;
+}
+
+
+static inline void flush_requests(struct blkfront_info *info)
+{
+ int notify;
+
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+
+ if (notify)
+ notify_remote_via_irq(info->irq);
+}
+
+/*
+ * do_blkif_request
+ * read a block; request is in a request queue
+ */
+static void do_blkif_request(request_queue_t *rq)
+{
+ struct blkfront_info *info = NULL;
+ struct request *req;
+ int queued;
+
+ pr_debug("Entered do_blkif_request\n");
+
+ queued = 0;
+
+ while ((req = elv_next_request(rq)) != NULL) {
+ info = req->rq_disk->private_data;
+ if (!blk_fs_request(req)) {
+ end_request(req, 0);
+ continue;
+ }
+
+ if (RING_FULL(&info->ring))
+ goto wait;
+
+ pr_debug("do_blk_req %p: cmd %p, sec %lx, "
+ "(%u/%li) buffer:%p [%s]\n",
+ req, req->cmd, (unsigned long)req->sector,
+ req->current_nr_sectors,
+ req->nr_sectors, req->buffer,
+ rq_data_dir(req) ? "write" : "read");
+
+
+ blkdev_dequeue_request(req);
+ if (blkif_queue_request(req)) {
+ blk_requeue_request(rq, req);
+wait:
+ /* Avoid pointless unplugs. */
+ blk_stop_queue(rq);
+ break;
+ }
+
+ queued++;
+ }
+
+ if (queued != 0)
+ flush_requests(info);
+}
+
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+{
+ request_queue_t *rq;
+
+ rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+ if (rq == NULL)
+ return -1;
+
+ elevator_init(rq, "noop");
+
+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
+ blk_queue_hardsect_size(rq, sector_size);
+ blk_queue_max_sectors(rq, 512);
+
+ /* Each segment in a request is up to an aligned page in size. */
+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+ blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+ /* Ensure a merged request will fit in a single I/O ring slot. */
+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+ /* Make sure buffer addresses are sector-aligned. */
+ blk_queue_dma_alignment(rq, 511);
+
+ gd->queue = rq;
+
+ return 0;
+}
+
+
+static int xlvbd_barrier(struct blkfront_info *info)
+{
+ int err;
+
+ err = blk_queue_ordered(info->rq,
+ info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
+ NULL);
+
+ if (err)
+ return err;
+
+ printk(KERN_INFO "blkfront: %s: barriers %s\n",
+ info->gd->disk_name,
+ info->feature_barrier ? "enabled" : "disabled");
+ return 0;
+}
+
+
+static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
+ int vdevice, u16 vdisk_info, u16 sector_size,
+ struct blkfront_info *info)
+{
+ struct gendisk *gd;
+ int nr_minors = 1;
+ int err = -ENODEV;
+
+ BUG_ON(info->gd != NULL);
+ BUG_ON(info->rq != NULL);
+
+ if ((minor % PARTS_PER_DISK) == 0)
+ nr_minors = PARTS_PER_DISK;
+
+ gd = alloc_disk(nr_minors);
+ if (gd == NULL)
+ goto out;
+
+ if (nr_minors > 1)
+ sprintf(gd->disk_name, "%s%c", DEV_NAME,
+ 'a' + minor / PARTS_PER_DISK);
+ else
+ sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
+ 'a' + minor / PARTS_PER_DISK,
+ minor % PARTS_PER_DISK);
+
+ gd->major = XENVBD_MAJOR;
+ gd->first_minor = minor;
+ gd->fops = &xlvbd_block_fops;
+ gd->private_data = info;
+ gd->driverfs_dev = &(info->xbdev->dev);
+ set_capacity(gd, capacity);
+
+ if (xlvbd_init_blk_queue(gd, sector_size)) {
+ del_gendisk(gd);
+ goto out;
+ }
+
+ info->rq = gd->queue;
+ info->gd = gd;
+
+ if (info->feature_barrier)
+ xlvbd_barrier(info);
+
+ if (vdisk_info & VDISK_READONLY)
+ set_disk_ro(gd, 1);
+
+ if (vdisk_info & VDISK_REMOVABLE)
+ gd->flags |= GENHD_FL_REMOVABLE;
+
+ if (vdisk_info & VDISK_CDROM)
+ gd->flags |= GENHD_FL_CD;
+
+ return 0;
+
+ out:
+ return err;
+}
+
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+ if (!RING_FULL(&info->ring)) {
+ /* Re-enable calldowns. */
+ blk_start_queue(info->rq);
+ /* Kick things off immediately. */
+ do_blkif_request(info->rq);
+ }
+}
+
+static void blkif_restart_queue(struct work_struct *work)
+{
+ struct blkfront_info *info = container_of(work, struct blkfront_info, work);
+
+ spin_lock_irq(&blkif_io_lock);
+ if (info->connected == BLKIF_STATE_CONNECTED)
+ kick_pending_request_queues(info);
+ spin_unlock_irq(&blkif_io_lock);
+}
+
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+ /* Prevent new requests being issued until we fix things up. */
+ spin_lock_irq(&blkif_io_lock);
+ info->connected = suspend ?
+ BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+ /* No more blkif_request(). */
+ if (info->rq)
+ blk_stop_queue(info->rq);
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&info->callback);
+ spin_unlock_irq(&blkif_io_lock);
+
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_scheduled_work();
+
+ /* Free resources associated with old device channel. */
+ if (info->ring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->ring_ref, 0,
+ (unsigned long)info->ring.sring);
+ info->ring_ref = GRANT_INVALID_REF;
+ info->ring.sring = NULL;
+ }
+ if (info->irq)
+ unbind_from_irqhandler(info->irq, info);
+ info->evtchn = info->irq = 0;
+
+}
+
+static void blkif_completion(struct blk_shadow *s)
+{
+ int i;
+ for (i = 0; i < s->req.nr_segments; i++)
+ gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
+}
+
+static irqreturn_t blkif_interrupt(int irq, void *dev_id)
+{
+ struct request *req;
+ struct blkif_response *bret;
+ RING_IDX i, rp;
+ unsigned long flags;
+ struct blkfront_info *info = (struct blkfront_info *)dev_id;
+ int uptodate;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ again:
+ rp = info->ring.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ for (i = info->ring.rsp_cons; i != rp; i++) {
+ unsigned long id;
+ int ret;
+
+ bret = RING_GET_RESPONSE(&info->ring, i);
+ id = bret->id;
+ req = (struct request *)info->shadow[id].request;
+
+ blkif_completion(&info->shadow[id]);
+
+ add_id_to_freelist(info, id);
+
+ uptodate = (bret->status == BLKIF_RSP_OKAY);
+ switch (bret->operation) {
+ case BLKIF_OP_WRITE_BARRIER:
+ if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
+ printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
+ info->gd->disk_name);
+ uptodate = -EOPNOTSUPP;
+ info->feature_barrier = 0;
+ xlvbd_barrier(info);
+ }
+ /* fall through */
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ if (unlikely(bret->status != BLKIF_RSP_OKAY))
+ dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
+ "request: %x\n", bret->status);
+
+ ret = end_that_request_first(req, uptodate,
+ req->hard_nr_sectors);
+ BUG_ON(ret);
+ end_that_request_last(req, uptodate);
+ break;
+ default:
+ BUG();
+ }
+ }
+
+ info->ring.rsp_cons = i;
+
+ if (i != info->ring.req_prod_pvt) {
+ int more_to_do;
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ if (more_to_do)
+ goto again;
+ } else
+ info->ring.sring->rsp_event = i + 1;
+
+ kick_pending_request_queues(info);
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+
+static int setup_blkring(struct xenbus_device *dev,
+ struct blkfront_info *info)
+{
+ struct blkif_sring *sring;
+ int err;
+
+ info->ring_ref = GRANT_INVALID_REF;
+
+ sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL);
+ if (!sring) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+ return -ENOMEM;
+ }
+ SHARED_RING_INIT(sring);
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+ if (err < 0) {
+ free_page((unsigned long)sring);
+ info->ring.sring = NULL;
+ goto fail;
+ }
+ info->ring_ref = err;
+
+ err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ if (err)
+ goto fail;
+
+ err = bind_evtchn_to_irqhandler(info->evtchn,
+ blkif_interrupt,
+ IRQF_SAMPLE_RANDOM, "blkif", info);
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err,
+ "bind_evtchn_to_irqhandler failed");
+ goto fail;
+ }
+ info->irq = err;
+
+ return 0;
+fail:
+ blkif_free(info, 0);
+ return err;
+}
+
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+ struct blkfront_info *info)
+{
+ const char *message = NULL;
+ struct xenbus_transaction xbt;
+ int err;
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_blkring(dev, info);
+ if (err)
+ goto out;
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ goto destroy_blkring;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-ref", "%u", info->ring_ref);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename,
+ "event-channel", "%u", info->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto destroy_blkring;
+ }
+
+ xenbus_switch_state(dev, XenbusStateInitialised);
+
+ return 0;
+
+ abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ if (message)
+ xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_blkring:
+ blkif_free(info, 0);
+ out:
+ return err;
+}
+
+
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffer for communication with the backend, and
+ * inform the backend of the appropriate details for those. Switch to
+ * Initialised state.
+ */
+static int blkfront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err, vdevice, i;
+ struct blkfront_info *info;
+
+ /* FIXME: Use dynamic device id if this is not set. */
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
+ "virtual-device", "%i", &vdevice);
+ if (err != 1) {
+ xenbus_dev_fatal(dev, err, "reading virtual-device");
+ return err;
+ }
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+ return -ENOMEM;
+ }
+
+ info->xbdev = dev;
+ info->vdevice = vdevice;
+ info->connected = BLKIF_STATE_DISCONNECTED;
+ INIT_WORK(&info->work, blkif_restart_queue);
+
+ for (i = 0; i < BLK_RING_SIZE; i++)
+ info->shadow[i].req.id = i+1;
+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+ /* Front end dir is a number, which is used as the id. */
+ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
+ dev->dev.driver_data = info;
+
+ err = talk_to_backend(dev, info);
+ if (err) {
+ kfree(info);
+ dev->dev.driver_data = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+
+static int blkif_recover(struct blkfront_info *info)
+{
+ int i;
+ struct blkif_request *req;
+ struct blk_shadow *copy;
+ int j;
+
+ /* Stage 1: Make a safe copy of the shadow state. */
+ copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
+ if (!copy)
+ return -ENOMEM;
+ memcpy(copy, info->shadow, sizeof(info->shadow));
+
+ /* Stage 2: Set up free list. */
+ memset(&info->shadow, 0, sizeof(info->shadow));
+ for (i = 0; i < BLK_RING_SIZE; i++)
+ info->shadow[i].req.id = i+1;
+ info->shadow_free = info->ring.req_prod_pvt;
+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+ /* Stage 3: Find pending requests and requeue them. */
+ for (i = 0; i < BLK_RING_SIZE; i++) {
+ /* Not in use? */
+ if (copy[i].request == 0)
+ continue;
+
+ /* Grab a request slot and copy shadow state into it. */
+ req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+ *req = copy[i].req;
+
+ /* We get a new request id, and must reset the shadow state. */
+ req->id = get_id_from_freelist(info);
+ memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+
+ /* Rewrite any grant references invalidated by susp/resume. */
+ for (j = 0; j < req->nr_segments; j++)
+ gnttab_grant_foreign_access_ref(
+ req->seg[j].gref,
+ info->xbdev->otherend_id,
+ pfn_to_mfn(info->shadow[req->id].frame[j]),
+ rq_data_dir(
+ (struct request *)
+ info->shadow[req->id].request));
+ info->shadow[req->id].req = *req;
+
+ info->ring.req_prod_pvt++;
+ }
+
+ kfree(copy);
+
+ xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+ spin_lock_irq(&blkif_io_lock);
+
+ /* Now safe for us to use the shared ring */
+ info->connected = BLKIF_STATE_CONNECTED;
+
+ /* Send off requeued requests */
+ flush_requests(info);
+
+ /* Kick any other new requests queued since we resumed */
+ kick_pending_request_queues(info);
+
+ spin_unlock_irq(&blkif_io_lock);
+
+ return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart. We tear down our blkif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int blkfront_resume(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev.driver_data;
+ int err;
+
+ dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
+
+ blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
+
+ err = talk_to_backend(dev, info);
+ if (info->connected == BLKIF_STATE_SUSPENDED && !err)
+ err = blkif_recover(info);
+
+ return err;
+}
+
+
+/*
+ * Invoked when the backend is finally 'ready' (and has told produced
+ * the details about the physical device - #sectors, size, etc).
+ */
+static void blkfront_connect(struct blkfront_info *info)
+{
+ unsigned long long sectors;
+ unsigned long sector_size;
+ unsigned int binfo;
+ int err;
+
+ if ((info->connected == BLKIF_STATE_CONNECTED) ||
+ (info->connected == BLKIF_STATE_SUSPENDED) )
+ return;
+
+ dev_dbg(&info->xbdev->dev, "%s:%s.\n",
+ __func__, info->xbdev->otherend);
+
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "sectors", "%llu", &sectors,
+ "info", "%u", &binfo,
+ "sector-size", "%lu", &sector_size,
+ NULL);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err,
+ "reading backend fields at %s",
+ info->xbdev->otherend);
+ return;
+ }
+
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ "feature-barrier", "%lu", &info->feature_barrier,
+ NULL);
+ if (err)
+ info->feature_barrier = 0;
+
+ err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
+ sectors, info->vdevice,
+ binfo, sector_size, info);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+ info->xbdev->otherend);
+ return;
+ }
+
+ xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+ /* Kick pending requests. */
+ spin_lock_irq(&blkif_io_lock);
+ info->connected = BLKIF_STATE_CONNECTED;
+ kick_pending_request_queues(info);
+ spin_unlock_irq(&blkif_io_lock);
+
+ add_disk(info->gd);
+}
+
+/**
+ * Handle the change of state of the backend to Closing. We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend. Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+static void blkfront_closing(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev.driver_data;
+ unsigned long flags;
+
+ dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
+
+ if (info->rq == NULL)
+ goto out;
+
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ del_gendisk(info->gd);
+
+ /* No more blkif_request(). */
+ blk_stop_queue(info->rq);
+
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&info->callback);
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_scheduled_work();
+
+ blk_cleanup_queue(info->rq);
+ info->rq = NULL;
+
+ out:
+ xenbus_frontend_closed(dev);
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
+{
+ struct blkfront_info *info = dev->dev.driver_data;
+ struct block_device *bd;
+
+ dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
+
+ switch (backend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitWait:
+ case XenbusStateInitialised:
+ case XenbusStateUnknown:
+ case XenbusStateClosed:
+ break;
+
+ case XenbusStateConnected:
+ blkfront_connect(info);
+ break;
+
+ case XenbusStateClosing:
+ bd = bdget(info->dev);
+ if (bd == NULL)
+ xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
+
+ mutex_lock(&bd->bd_mutex);
+ if (info->users > 0)
+ xenbus_dev_error(dev, -EBUSY,
+ "Device in use; refusing to close");
+ else
+ blkfront_closing(dev);
+ mutex_unlock(&bd->bd_mutex);
+ bdput(bd);
+ break;
+ }
+}
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+ struct blkfront_info *info = dev->dev.driver_data;
+
+ dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
+
+ blkif_free(info, 0);
+
+ kfree(info);
+
+ return 0;
+}
+
+static int blkif_open(struct inode *inode, struct file *filep)
+{
+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+ info->users++;
+ return 0;
+}
+
+static int blkif_release(struct inode *inode, struct file *filep)
+{
+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+ info->users--;
+ if (info->users == 0) {
+ /* Check whether we have been instructed to close. We will
+ have ignored this request initially, as the device was
+ still mounted. */
+ struct xenbus_device *dev = info->xbdev;
+ enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
+
+ if (state == XenbusStateClosing)
+ blkfront_closing(dev);
+ }
+ return 0;
+}
+
+static struct block_device_operations xlvbd_block_fops =
+{
+ .owner = THIS_MODULE,
+ .open = blkif_open,
+ .release = blkif_release,
+};
+
+
+static struct xenbus_device_id blkfront_ids[] = {
+ { "vbd" },
+ { "" }
+};
+
+static struct xenbus_driver blkfront = {
+ .name = "vbd",
+ .owner = THIS_MODULE,
+ .ids = blkfront_ids,
+ .probe = blkfront_probe,
+ .remove = blkfront_remove,
+ .resume = blkfront_resume,
+ .otherend_changed = backend_changed,
+};
+
+static int __init xlblk_init(void)
+{
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
+ printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
+ XENVBD_MAJOR, DEV_NAME);
+ return -ENODEV;
+ }
+
+ return xenbus_register_frontend(&blkfront);
+}
+module_init(xlblk_init);
+
+
+static void xlblk_exit(void)
+{
+ return xenbus_unregister_driver(&blkfront);
+}
+module_exit(xlblk_exit);
+
+MODULE_DESCRIPTION("Xen virtual block device frontend");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 97bd71bc3ae..9e8f21410d2 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -604,6 +604,14 @@ config HVC_BEAT
help
Toshiba's Cell Reference Set Beat Console device driver
+config HVC_XEN
+ bool "Xen Hypervisor Console support"
+ depends on XEN
+ select HVC_DRIVER
+ default y
+ help
+ Xen virtual console device driver
+
config HVCS
tristate "IBM Hypervisor Virtual Console Server support"
depends on PPC_PSERIES
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index f2996a95eb0..8852b8d643c 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o
obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
obj-$(CONFIG_HVC_BEAT) += hvc_beat.o
obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
+obj-$(CONFIG_HVC_XEN) += hvc_xen.o
obj-$(CONFIG_RAW_DRIVER) += raw.o
obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
obj-$(CONFIG_MSPEC) += mspec.o
diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c
new file mode 100644
index 00000000000..dd68f8541c2
--- /dev/null
+++ b/drivers/char/hvc_xen.c
@@ -0,0 +1,159 @@
+/*
+ * xen console driver interface to hvc_console.c
+ *
+ * (c) 2007 Gerd Hoffmann <kraxel@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include <asm/xen/hypervisor.h>
+#include <xen/page.h>
+#include <xen/events.h>
+#include <xen/interface/io/console.h>
+#include <xen/hvc-console.h>
+
+#include "hvc_console.h"
+
+#define HVC_COOKIE 0x58656e /* "Xen" in hex */
+
+static struct hvc_struct *hvc;
+static int xencons_irq;
+
+/* ------------------------------------------------------------------ */
+
+static inline struct xencons_interface *xencons_interface(void)
+{
+ return mfn_to_virt(xen_start_info->console.domU.mfn);
+}
+
+static inline void notify_daemon(void)
+{
+ /* Use evtchn: this is called early, before irq is set up. */
+ notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
+}
+
+static int write_console(uint32_t vtermno, const char *data, int len)
+{
+ struct xencons_interface *intf = xencons_interface();
+ XENCONS_RING_IDX cons, prod;
+ int sent = 0;
+
+ cons = intf->out_cons;
+ prod = intf->out_prod;
+ mb(); /* update queue values before going on */
+ BUG_ON((prod - cons) > sizeof(intf->out));
+
+ while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
+ intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
+
+ wmb(); /* write ring before updating pointer */
+ intf->out_prod = prod;
+
+ notify_daemon();
+ return sent;
+}
+
+static int read_console(uint32_t vtermno, char *buf, int len)
+{
+ struct xencons_interface *intf = xencons_interface();
+ XENCONS_RING_IDX cons, prod;
+ int recv = 0;
+
+ cons = intf->in_cons;
+ prod = intf->in_prod;
+ mb(); /* get pointers before reading ring */
+ BUG_ON((prod - cons) > sizeof(intf->in));
+
+ while (cons != prod && recv < len)
+ buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)];
+
+ mb(); /* read ring before consuming */
+ intf->in_cons = cons;
+
+ notify_daemon();
+ return recv;
+}
+
+static struct hv_ops hvc_ops = {
+ .get_chars = read_console,
+ .put_chars = write_console,
+};
+
+static int __init xen_init(void)
+{
+ struct hvc_struct *hp;
+
+ if (!is_running_on_xen())
+ return 0;
+
+ xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn);
+ if (xencons_irq < 0)
+ xencons_irq = 0 /* NO_IRQ */;
+ hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256);
+ if (IS_ERR(hp))
+ return PTR_ERR(hp);
+
+ hvc = hp;
+ return 0;
+}
+
+static void __exit xen_fini(void)
+{
+ if (hvc)
+ hvc_remove(hvc);
+}
+
+static int xen_cons_init(void)
+{
+ if (!is_running_on_xen())
+ return 0;
+
+ hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);
+ return 0;
+}
+
+module_init(xen_init);
+module_exit(xen_fini);
+console_initcall(xen_cons_init);
+
+static void xenboot_write_console(struct console *console, const char *string,
+ unsigned len)
+{
+ unsigned int linelen, off = 0;
+ const char *pos;
+
+ while (off < len && NULL != (pos = strchr(string+off, '\n'))) {
+ linelen = pos-string+off;
+ if (off + linelen > len)
+ break;
+ write_console(0, string+off, linelen);
+ write_console(0, "\r\n", 2);
+ off += linelen + 1;
+ }
+ if (off < len)
+ write_console(0, string+off, len-off);
+}
+
+struct console xenboot_console = {
+ .name = "xenboot",
+ .write = xenboot_write_console,
+ .flags = CON_PRINTBUFFER | CON_BOOT,
+};
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index dbb22403979..3d90fc00209 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -1770,7 +1770,8 @@ static int call_critical_overtemp(void)
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL };
- return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
+ return call_usermodehelper(critical_overtemp_path,
+ argv, envp, UMH_WAIT_EXEC);
}
diff --git a/drivers/macintosh/windfarm_core.c b/drivers/macintosh/windfarm_core.c
index e18d265d5d3..516d943227e 100644
--- a/drivers/macintosh/windfarm_core.c
+++ b/drivers/macintosh/windfarm_core.c
@@ -80,7 +80,8 @@ int wf_critical_overtemp(void)
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL };
- return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
+ return call_usermodehelper(critical_overtemp_path,
+ argv, envp, UMH_WAIT_EXEC);
}
EXPORT_SYMBOL_GPL(wf_critical_overtemp);
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 555d594d181..1cb22bfae75 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -33,6 +33,7 @@
#include <linux/moduleparam.h>
#include <linux/stringify.h>
#include <linux/stat.h>
+#include <linux/log2.h>
#include "ubi.h"
/* Maximum length of the 'mtd=' parameter */
@@ -369,7 +370,7 @@ static int attach_by_scanning(struct ubi_device *ubi)
out_wl:
ubi_wl_close(ubi);
out_vtbl:
- kfree(ubi->vtbl);
+ vfree(ubi->vtbl);
out_si:
ubi_scan_destroy_si(si);
return err;
@@ -422,8 +423,7 @@ static int io_init(struct ubi_device *ubi)
ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
/* Make sure minimal I/O unit is power of 2 */
- if (ubi->min_io_size == 0 ||
- (ubi->min_io_size & (ubi->min_io_size - 1))) {
+ if (!is_power_of_2(ubi->min_io_size)) {
ubi_err("bad min. I/O unit");
return -EINVAL;
}
@@ -593,8 +593,6 @@ static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset,
if (err)
goto out_detach;
- ubi_devices_cnt += 1;
-
ubi_msg("attached mtd%d to ubi%d", ubi->mtd->index, ubi_devices_cnt);
ubi_msg("MTD device name: \"%s\"", ubi->mtd->name);
ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20);
@@ -624,12 +622,13 @@ static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset,
wake_up_process(ubi->bgt_thread);
}
+ ubi_devices_cnt += 1;
return 0;
out_detach:
ubi_eba_close(ubi);
ubi_wl_close(ubi);
- kfree(ubi->vtbl);
+ vfree(ubi->vtbl);
out_free:
kfree(ubi);
out_mtd:
@@ -650,7 +649,7 @@ static void detach_mtd_dev(struct ubi_device *ubi)
uif_close(ubi);
ubi_eba_close(ubi);
ubi_wl_close(ubi);
- kfree(ubi->vtbl);
+ vfree(ubi->vtbl);
put_mtd_device(ubi->mtd);
kfree(ubi_devices[ubi_num]);
ubi_devices[ubi_num] = NULL;
@@ -686,13 +685,6 @@ static int __init ubi_init(void)
struct mtd_dev_param *p = &mtd_dev_param[i];
cond_resched();
-
- if (!p->name) {
- dbg_err("empty name");
- err = -EINVAL;
- goto out_detach;
- }
-
err = attach_mtd_dev(p->name, p->vid_hdr_offs, p->data_offs);
if (err)
goto out_detach;
@@ -799,7 +791,7 @@ static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
/* Get rid of the final newline */
if (buf[len - 1] == '\n')
- buf[len - 1] = 0;
+ buf[len - 1] = '\0';
for (i = 0; i < 3; i++)
tokens[i] = strsep(&pbuf, ",");
@@ -809,9 +801,6 @@ static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp)
return -EINVAL;
}
- if (tokens[0] == '\0')
- return -EINVAL;
-
p = &mtd_dev_param[mtd_devs];
strcpy(&p->name[0], tokens[0]);
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 6612eb79bf1..fe4da1e96c5 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -64,6 +64,7 @@ static struct ubi_device *major_to_device(int major)
if (ubi_devices[i] && ubi_devices[i]->major == major)
return ubi_devices[i];
BUG();
+ return NULL;
}
/**
@@ -153,7 +154,7 @@ static int vol_cdev_release(struct inode *inode, struct file *file)
ubi_warn("update of volume %d not finished, volume is damaged",
vol->vol_id);
vol->updating = 0;
- kfree(vol->upd_buf);
+ vfree(vol->upd_buf);
}
ubi_close_volume(desc);
@@ -232,7 +233,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
tbuf_size = vol->usable_leb_size;
if (count < tbuf_size)
tbuf_size = ALIGN(count, ubi->min_io_size);
- tbuf = kmalloc(tbuf_size, GFP_KERNEL);
+ tbuf = vmalloc(tbuf_size);
if (!tbuf)
return -ENOMEM;
@@ -271,7 +272,7 @@ static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
len = count > tbuf_size ? tbuf_size : count;
} while (count);
- kfree(tbuf);
+ vfree(tbuf);
return err ? err : count_save - count;
}
@@ -320,7 +321,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
tbuf_size = vol->usable_leb_size;
if (count < tbuf_size)
tbuf_size = ALIGN(count, ubi->min_io_size);
- tbuf = kmalloc(tbuf_size, GFP_KERNEL);
+ tbuf = vmalloc(tbuf_size);
if (!tbuf)
return -ENOMEM;
@@ -355,7 +356,7 @@ static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
len = count > tbuf_size ? tbuf_size : count;
}
- kfree(tbuf);
+ vfree(tbuf);
return err ? err : count_save - count;
}
@@ -397,6 +398,7 @@ static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
vol->corrupted = 1;
}
vol->checked = 1;
+ ubi_gluebi_updated(vol);
revoke_exclusive(desc, UBI_READWRITE);
}
@@ -413,19 +415,7 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
struct ubi_device *ubi = vol->ubi;
void __user *argp = (void __user *)arg;
- if (_IOC_NR(cmd) > VOL_CDEV_IOC_MAX_SEQ ||
- _IOC_TYPE(cmd) != UBI_VOL_IOC_MAGIC)
- return -ENOTTY;
-
- if (_IOC_DIR(cmd) && _IOC_READ)
- err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd));
- else if (_IOC_DIR(cmd) && _IOC_WRITE)
- err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd));
- if (err)
- return -EFAULT;
-
switch (cmd) {
-
/* Volume update command */
case UBI_IOCVOLUP:
{
@@ -471,7 +461,7 @@ static int vol_cdev_ioctl(struct inode *inode, struct file *file,
{
int32_t lnum;
- err = __get_user(lnum, (__user int32_t *)argp);
+ err = get_user(lnum, (__user int32_t *)argp);
if (err) {
err = -EFAULT;
break;
@@ -587,17 +577,6 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
struct ubi_volume_desc *desc;
void __user *argp = (void __user *)arg;
- if (_IOC_NR(cmd) > UBI_CDEV_IOC_MAX_SEQ ||
- _IOC_TYPE(cmd) != UBI_IOC_MAGIC)
- return -ENOTTY;
-
- if (_IOC_DIR(cmd) && _IOC_READ)
- err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd));
- else if (_IOC_DIR(cmd) && _IOC_WRITE)
- err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd));
- if (err)
- return -EFAULT;
-
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@@ -612,7 +591,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
struct ubi_mkvol_req req;
dbg_msg("create volume");
- err = __copy_from_user(&req, argp,
+ err = copy_from_user(&req, argp,
sizeof(struct ubi_mkvol_req));
if (err) {
err = -EFAULT;
@@ -629,7 +608,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
if (err)
break;
- err = __put_user(req.vol_id, (__user int32_t *)argp);
+ err = put_user(req.vol_id, (__user int32_t *)argp);
if (err)
err = -EFAULT;
@@ -642,7 +621,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
int vol_id;
dbg_msg("remove volume");
- err = __get_user(vol_id, (__user int32_t *)argp);
+ err = get_user(vol_id, (__user int32_t *)argp);
if (err) {
err = -EFAULT;
break;
@@ -669,7 +648,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
struct ubi_rsvol_req req;
dbg_msg("re-size volume");
- err = __copy_from_user(&req, argp,
+ err = copy_from_user(&req, argp,
sizeof(struct ubi_rsvol_req));
if (err) {
err = -EFAULT;
@@ -707,7 +686,7 @@ static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
struct file_operations ubi_cdev_operations = {
.owner = THIS_MODULE,
.ioctl = ubi_cdev_ioctl,
- .llseek = no_llseek
+ .llseek = no_llseek,
};
/* UBI volume character device operations */
@@ -718,5 +697,5 @@ struct file_operations ubi_vol_cdev_operations = {
.llseek = vol_cdev_llseek,
.read = vol_cdev_read,
.write = vol_cdev_write,
- .ioctl = vol_cdev_ioctl
+ .ioctl = vol_cdev_ioctl,
};
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index 86364221faf..310341e5cd4 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -35,12 +35,12 @@
void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
{
dbg_msg("erase counter header dump:");
- dbg_msg("magic %#08x", ubi32_to_cpu(ec_hdr->magic));
+ dbg_msg("magic %#08x", be32_to_cpu(ec_hdr->magic));
dbg_msg("version %d", (int)ec_hdr->version);
- dbg_msg("ec %llu", (long long)ubi64_to_cpu(ec_hdr->ec));
- dbg_msg("vid_hdr_offset %d", ubi32_to_cpu(ec_hdr->vid_hdr_offset));
- dbg_msg("data_offset %d", ubi32_to_cpu(ec_hdr->data_offset));
- dbg_msg("hdr_crc %#08x", ubi32_to_cpu(ec_hdr->hdr_crc));
+ dbg_msg("ec %llu", (long long)be64_to_cpu(ec_hdr->ec));
+ dbg_msg("vid_hdr_offset %d", be32_to_cpu(ec_hdr->vid_hdr_offset));
+ dbg_msg("data_offset %d", be32_to_cpu(ec_hdr->data_offset));
+ dbg_msg("hdr_crc %#08x", be32_to_cpu(ec_hdr->hdr_crc));
dbg_msg("erase counter header hexdump:");
ubi_dbg_hexdump(ec_hdr, UBI_EC_HDR_SIZE);
}
@@ -52,20 +52,20 @@ void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr)
void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr)
{
dbg_msg("volume identifier header dump:");
- dbg_msg("magic %08x", ubi32_to_cpu(vid_hdr->magic));
+ dbg_msg("magic %08x", be32_to_cpu(vid_hdr->magic));
dbg_msg("version %d", (int)vid_hdr->version);
dbg_msg("vol_type %d", (int)vid_hdr->vol_type);
dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag);
dbg_msg("compat %d", (int)vid_hdr->compat);
- dbg_msg("vol_id %d", ubi32_to_cpu(vid_hdr->vol_id));
- dbg_msg("lnum %d", ubi32_to_cpu(vid_hdr->lnum));
- dbg_msg("leb_ver %u", ubi32_to_cpu(vid_hdr->leb_ver));
- dbg_msg("data_size %d", ubi32_to_cpu(vid_hdr->data_size));
- dbg_msg("used_ebs %d", ubi32_to_cpu(vid_hdr->used_ebs));
- dbg_msg("data_pad %d", ubi32_to_cpu(vid_hdr->data_pad));
+ dbg_msg("vol_id %d", be32_to_cpu(vid_hdr->vol_id));
+ dbg_msg("lnum %d", be32_to_cpu(vid_hdr->lnum));
+ dbg_msg("leb_ver %u", be32_to_cpu(vid_hdr->leb_ver));
+ dbg_msg("data_size %d", be32_to_cpu(vid_hdr->data_size));
+ dbg_msg("used_ebs %d", be32_to_cpu(vid_hdr->used_ebs));
+ dbg_msg("data_pad %d", be32_to_cpu(vid_hdr->data_pad));
dbg_msg("sqnum %llu",
- (unsigned long long)ubi64_to_cpu(vid_hdr->sqnum));
- dbg_msg("hdr_crc %08x", ubi32_to_cpu(vid_hdr->hdr_crc));
+ (unsigned long long)be64_to_cpu(vid_hdr->sqnum));
+ dbg_msg("hdr_crc %08x", be32_to_cpu(vid_hdr->hdr_crc));
dbg_msg("volume identifier header hexdump:");
}
@@ -91,7 +91,7 @@ void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
if (vol->name_len <= UBI_VOL_NAME_MAX &&
strnlen(vol->name, vol->name_len + 1) == vol->name_len) {
- dbg_msg("name %s", vol->name);
+ dbg_msg("name %s", vol->name);
} else {
dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c",
vol->name[0], vol->name[1], vol->name[2],
@@ -106,30 +106,30 @@ void ubi_dbg_dump_vol_info(const struct ubi_volume *vol)
*/
void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx)
{
- int name_len = ubi16_to_cpu(r->name_len);
+ int name_len = be16_to_cpu(r->name_len);
dbg_msg("volume table record %d dump:", idx);
- dbg_msg("reserved_pebs %d", ubi32_to_cpu(r->reserved_pebs));
- dbg_msg("alignment %d", ubi32_to_cpu(r->alignment));
- dbg_msg("data_pad %d", ubi32_to_cpu(r->data_pad));
+ dbg_msg("reserved_pebs %d", be32_to_cpu(r->reserved_pebs));
+ dbg_msg("alignment %d", be32_to_cpu(r->alignment));
+ dbg_msg("data_pad %d", be32_to_cpu(r->data_pad));
dbg_msg("vol_type %d", (int)r->vol_type);
dbg_msg("upd_marker %d", (int)r->upd_marker);
dbg_msg("name_len %d", name_len);
if (r->name[0] == '\0') {
- dbg_msg("name NULL");
+ dbg_msg("name NULL");
return;
}
if (name_len <= UBI_VOL_NAME_MAX &&
strnlen(&r->name[0], name_len + 1) == name_len) {
- dbg_msg("name %s", &r->name[0]);
+ dbg_msg("name %s", &r->name[0]);
} else {
dbg_msg("1st 5 characters of the name: %c%c%c%c%c",
r->name[0], r->name[1], r->name[2], r->name[3],
r->name[4]);
}
- dbg_msg("crc %#08x", ubi32_to_cpu(r->crc));
+ dbg_msg("crc %#08x", be32_to_cpu(r->crc));
}
/**
diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index f816ad9a36c..ff8f39548cd 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h
@@ -52,7 +52,6 @@ struct ubi_scan_volume;
struct ubi_scan_leb;
struct ubi_mkvol_req;
-void ubi_dbg_print(int type, const char *func, const char *fmt, ...);
void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr);
void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr);
void ubi_dbg_dump_vol_info(const struct ubi_volume *vol);
@@ -66,7 +65,6 @@ void ubi_dbg_hexdump(const void *buf, int size);
#define dbg_msg(fmt, ...) ({})
#define ubi_dbg_dump_stack() ({})
-#define ubi_dbg_print(func, fmt, ...) ({})
#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
#define ubi_dbg_dump_vol_info(vol) ({})
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 7c6b223b3f8..8aff9385613 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -425,10 +425,10 @@ retry:
} else if (err == UBI_IO_BITFLIPS)
scrub = 1;
- ubi_assert(lnum < ubi32_to_cpu(vid_hdr->used_ebs));
- ubi_assert(len == ubi32_to_cpu(vid_hdr->data_size));
+ ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs));
+ ubi_assert(len == be32_to_cpu(vid_hdr->data_size));
- crc = ubi32_to_cpu(vid_hdr->data_crc);
+ crc = be32_to_cpu(vid_hdr->data_crc);
ubi_free_vid_hdr(ubi, vid_hdr);
}
@@ -518,13 +518,13 @@ retry:
goto out_put;
}
- vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+ vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
if (err)
goto write_error;
data_size = offset + len;
- new_buf = kmalloc(data_size, GFP_KERNEL);
+ new_buf = vmalloc(data_size);
if (!new_buf) {
err = -ENOMEM;
goto out_put;
@@ -535,7 +535,7 @@ retry:
if (offset > 0) {
err = ubi_io_read_data(ubi, new_buf, pnum, 0, offset);
if (err && err != UBI_IO_BITFLIPS) {
- kfree(new_buf);
+ vfree(new_buf);
goto out_put;
}
}
@@ -544,11 +544,11 @@ retry:
err = ubi_io_write_data(ubi, new_buf, new_pnum, 0, data_size);
if (err) {
- kfree(new_buf);
+ vfree(new_buf);
goto write_error;
}
- kfree(new_buf);
+ vfree(new_buf);
ubi_free_vid_hdr(ubi, vid_hdr);
vol->eba_tbl[lnum] = new_pnum;
@@ -634,11 +634,11 @@ int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum,
}
vid_hdr->vol_type = UBI_VID_DYNAMIC;
- vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
- vid_hdr->vol_id = cpu_to_ubi32(vol_id);
- vid_hdr->lnum = cpu_to_ubi32(lnum);
+ vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
+ vid_hdr->vol_id = cpu_to_be32(vol_id);
+ vid_hdr->lnum = cpu_to_be32(lnum);
vid_hdr->compat = ubi_get_compat(ubi, vol_id);
- vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+ vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
retry:
pnum = ubi_wl_get_peb(ubi, dtype);
@@ -692,7 +692,7 @@ write_error:
return err;
}
- vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+ vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
ubi_msg("try another PEB");
goto retry;
}
@@ -748,17 +748,17 @@ int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum,
return err;
}
- vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
- vid_hdr->vol_id = cpu_to_ubi32(vol_id);
- vid_hdr->lnum = cpu_to_ubi32(lnum);
+ vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
+ vid_hdr->vol_id = cpu_to_be32(vol_id);
+ vid_hdr->lnum = cpu_to_be32(lnum);
vid_hdr->compat = ubi_get_compat(ubi, vol_id);
- vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+ vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
crc = crc32(UBI_CRC32_INIT, buf, data_size);
vid_hdr->vol_type = UBI_VID_STATIC;
- vid_hdr->data_size = cpu_to_ubi32(data_size);
- vid_hdr->used_ebs = cpu_to_ubi32(used_ebs);
- vid_hdr->data_crc = cpu_to_ubi32(crc);
+ vid_hdr->data_size = cpu_to_be32(data_size);
+ vid_hdr->used_ebs = cpu_to_be32(used_ebs);
+ vid_hdr->data_crc = cpu_to_be32(crc);
retry:
pnum = ubi_wl_get_peb(ubi, dtype);
@@ -813,7 +813,7 @@ write_error:
return err;
}
- vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+ vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
ubi_msg("try another PEB");
goto retry;
}
@@ -854,17 +854,17 @@ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum,
return err;
}
- vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
- vid_hdr->vol_id = cpu_to_ubi32(vol_id);
- vid_hdr->lnum = cpu_to_ubi32(lnum);
+ vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
+ vid_hdr->vol_id = cpu_to_be32(vol_id);
+ vid_hdr->lnum = cpu_to_be32(lnum);
vid_hdr->compat = ubi_get_compat(ubi, vol_id);
- vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad);
+ vid_hdr->data_pad = cpu_to_be32(vol->data_pad);
crc = crc32(UBI_CRC32_INIT, buf, len);
- vid_hdr->vol_type = UBI_VID_STATIC;
- vid_hdr->data_size = cpu_to_ubi32(len);
+ vid_hdr->vol_type = UBI_VID_DYNAMIC;
+ vid_hdr->data_size = cpu_to_be32(len);
vid_hdr->copy_flag = 1;
- vid_hdr->data_crc = cpu_to_ubi32(crc);
+ vid_hdr->data_crc = cpu_to_be32(crc);
retry:
pnum = ubi_wl_get_peb(ubi, dtype);
@@ -891,11 +891,13 @@ retry:
goto write_error;
}
- err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
- if (err) {
- ubi_free_vid_hdr(ubi, vid_hdr);
- leb_write_unlock(ubi, vol_id, lnum);
- return err;
+ if (vol->eba_tbl[lnum] >= 0) {
+ err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1);
+ if (err) {
+ ubi_free_vid_hdr(ubi, vid_hdr);
+ leb_write_unlock(ubi, vol_id, lnum);
+ return err;
+ }
}
vol->eba_tbl[lnum] = pnum;
@@ -924,7 +926,7 @@ write_error:
return err;
}
- vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+ vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
ubi_msg("try another PEB");
goto retry;
}
@@ -965,19 +967,19 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
uint32_t crc;
void *buf, *buf1 = NULL;
- vol_id = ubi32_to_cpu(vid_hdr->vol_id);
- lnum = ubi32_to_cpu(vid_hdr->lnum);
+ vol_id = be32_to_cpu(vid_hdr->vol_id);
+ lnum = be32_to_cpu(vid_hdr->lnum);
dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to);
if (vid_hdr->vol_type == UBI_VID_STATIC) {
- data_size = ubi32_to_cpu(vid_hdr->data_size);
+ data_size = be32_to_cpu(vid_hdr->data_size);
aldata_size = ALIGN(data_size, ubi->min_io_size);
} else
data_size = aldata_size =
- ubi->leb_size - ubi32_to_cpu(vid_hdr->data_pad);
+ ubi->leb_size - be32_to_cpu(vid_hdr->data_pad);
- buf = kmalloc(aldata_size, GFP_KERNEL);
+ buf = vmalloc(aldata_size);
if (!buf)
return -ENOMEM;
@@ -987,7 +989,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
*/
err = leb_write_lock(ubi, vol_id, lnum);
if (err) {
- kfree(buf);
+ vfree(buf);
return err;
}
@@ -1054,10 +1056,10 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
*/
if (data_size > 0) {
vid_hdr->copy_flag = 1;
- vid_hdr->data_size = cpu_to_ubi32(data_size);
- vid_hdr->data_crc = cpu_to_ubi32(crc);
+ vid_hdr->data_size = cpu_to_be32(data_size);
+ vid_hdr->data_crc = cpu_to_be32(crc);
}
- vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi));
+ vid_hdr->sqnum = cpu_to_be64(next_sqnum(ubi));
err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
if (err)
@@ -1082,7 +1084,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
* We've written the data and are going to read it back to make
* sure it was written correctly.
*/
- buf1 = kmalloc(aldata_size, GFP_KERNEL);
+ buf1 = vmalloc(aldata_size);
if (!buf1) {
err = -ENOMEM;
goto out_unlock;
@@ -1111,15 +1113,15 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
vol->eba_tbl[lnum] = to;
leb_write_unlock(ubi, vol_id, lnum);
- kfree(buf);
- kfree(buf1);
+ vfree(buf);
+ vfree(buf1);
return 0;
out_unlock:
leb_write_unlock(ubi, vol_id, lnum);
- kfree(buf);
- kfree(buf1);
+ vfree(buf);
+ vfree(buf1);
return err;
}
diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c
index fc9478d605f..41ff74c60e1 100644
--- a/drivers/mtd/ubi/gluebi.c
+++ b/drivers/mtd/ubi/gluebi.c
@@ -282,7 +282,6 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
mtd->flags = MTD_WRITEABLE;
mtd->writesize = ubi->min_io_size;
mtd->owner = THIS_MODULE;
- mtd->size = vol->usable_leb_size * vol->reserved_pebs;
mtd->erasesize = vol->usable_leb_size;
mtd->read = gluebi_read;
mtd->write = gluebi_write;
@@ -290,6 +289,15 @@ int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
mtd->get_device = gluebi_get_device;
mtd->put_device = gluebi_put_device;
+ /*
+ * In case of dynamic volume, MTD device size is just volume size. In
+ * case of a static volume the size is equivalent to the amount of data
+ * bytes, which is zero at this moment and will be changed after volume
+ * update.
+ */
+ if (vol->vol_type == UBI_DYNAMIC_VOLUME)
+ mtd->size = vol->usable_leb_size * vol->reserved_pebs;
+
if (add_mtd_device(mtd)) {
ubi_err("cannot not add MTD device\n");
kfree(mtd->name);
@@ -321,3 +329,20 @@ int ubi_destroy_gluebi(struct ubi_volume *vol)
kfree(mtd->name);
return 0;
}
+
+/**
+ * ubi_gluebi_updated - UBI volume was updated notifier.
+ * @vol: volume description object
+ *
+ * This function is called every time an UBI volume is updated. This function
+ * does nothing if volume @vol is dynamic, and changes MTD device size if the
+ * volume is static. This is needed because static volumes cannot be read past
+ * data they contain.
+ */
+void ubi_gluebi_updated(struct ubi_volume *vol)
+{
+ struct mtd_info *mtd = &vol->gluebi_mtd;
+
+ if (vol->vol_type == UBI_STATIC_VOLUME)
+ mtd->size = vol->used_bytes;
+}
diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c
index 438914d0515..b0d8f4cede9 100644
--- a/drivers/mtd/ubi/io.c
+++ b/drivers/mtd/ubi/io.c
@@ -125,9 +125,9 @@ static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
* o %UBI_IO_BITFLIPS if all the requested data were successfully read, but
* correctable bit-flips were detected; this is harmless but may indicate
* that this eraseblock may become bad soon (but do not have to);
- * o %-EBADMSG if the MTD subsystem reported about data data integrity
- * problems, for example it can me an ECC error in case of NAND; this most
- * probably means that the data is corrupted;
+ * o %-EBADMSG if the MTD subsystem reported about data integrity problems, for
+ * example it can be an ECC error in case of NAND; this most probably means
+ * that the data is corrupted;
* o %-EIO if some I/O error occurred;
* o other negative error codes in case of other errors.
*/
@@ -298,7 +298,7 @@ retry:
memset(&ei, 0, sizeof(struct erase_info));
ei.mtd = ubi->mtd;
- ei.addr = pnum * ubi->peb_size;
+ ei.addr = (loff_t)pnum * ubi->peb_size;
ei.len = ubi->peb_size;
ei.callback = erase_callback;
ei.priv = (unsigned long)&wq;
@@ -382,7 +382,7 @@ static int torture_peb(const struct ubi_device *ubi, int pnum)
void *buf;
int err, i, patt_count;
- buf = kmalloc(ubi->peb_size, GFP_KERNEL);
+ buf = vmalloc(ubi->peb_size);
if (!buf)
return -ENOMEM;
@@ -437,7 +437,7 @@ out:
* physical eraseblock which means something is wrong with it.
*/
err = -EIO;
- kfree(buf);
+ vfree(buf);
return err;
}
@@ -557,9 +557,9 @@ static int validate_ec_hdr(const struct ubi_device *ubi,
long long ec;
int vid_hdr_offset, leb_start;
- ec = ubi64_to_cpu(ec_hdr->ec);
- vid_hdr_offset = ubi32_to_cpu(ec_hdr->vid_hdr_offset);
- leb_start = ubi32_to_cpu(ec_hdr->data_offset);
+ ec = be64_to_cpu(ec_hdr->ec);
+ vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset);
+ leb_start = be32_to_cpu(ec_hdr->data_offset);
if (ec_hdr->version != UBI_VERSION) {
ubi_err("node with incompatible UBI version found: "
@@ -640,7 +640,7 @@ int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum,
read_err = err;
}
- magic = ubi32_to_cpu(ec_hdr->magic);
+ magic = be32_to_cpu(ec_hdr->magic);
if (magic != UBI_EC_HDR_MAGIC) {
/*
* The magic field is wrong. Let's check if we have read all
@@ -684,7 +684,7 @@ int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum,
}
crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
- hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc);
+ hdr_crc = be32_to_cpu(ec_hdr->hdr_crc);
if (hdr_crc != crc) {
if (verbose) {
@@ -729,12 +729,12 @@ int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum,
dbg_io("write EC header to PEB %d", pnum);
ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
- ec_hdr->magic = cpu_to_ubi32(UBI_EC_HDR_MAGIC);
+ ec_hdr->magic = cpu_to_be32(UBI_EC_HDR_MAGIC);
ec_hdr->version = UBI_VERSION;
- ec_hdr->vid_hdr_offset = cpu_to_ubi32(ubi->vid_hdr_offset);
- ec_hdr->data_offset = cpu_to_ubi32(ubi->leb_start);
+ ec_hdr->vid_hdr_offset = cpu_to_be32(ubi->vid_hdr_offset);
+ ec_hdr->data_offset = cpu_to_be32(ubi->leb_start);
crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
- ec_hdr->hdr_crc = cpu_to_ubi32(crc);
+ ec_hdr->hdr_crc = cpu_to_be32(crc);
err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr);
if (err)
@@ -757,13 +757,13 @@ static int validate_vid_hdr(const struct ubi_device *ubi,
{
int vol_type = vid_hdr->vol_type;
int copy_flag = vid_hdr->copy_flag;
- int vol_id = ubi32_to_cpu(vid_hdr->vol_id);
- int lnum = ubi32_to_cpu(vid_hdr->lnum);
+ int vol_id = be32_to_cpu(vid_hdr->vol_id);
+ int lnum = be32_to_cpu(vid_hdr->lnum);
int compat = vid_hdr->compat;
- int data_size = ubi32_to_cpu(vid_hdr->data_size);
- int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
- int data_pad = ubi32_to_cpu(vid_hdr->data_pad);
- int data_crc = ubi32_to_cpu(vid_hdr->data_crc);
+ int data_size = be32_to_cpu(vid_hdr->data_size);
+ int used_ebs = be32_to_cpu(vid_hdr->used_ebs);
+ int data_pad = be32_to_cpu(vid_hdr->data_pad);
+ int data_crc = be32_to_cpu(vid_hdr->data_crc);
int usable_leb_size = ubi->leb_size - data_pad;
if (copy_flag != 0 && copy_flag != 1) {
@@ -914,7 +914,7 @@ int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum,
read_err = err;
}
- magic = ubi32_to_cpu(vid_hdr->magic);
+ magic = be32_to_cpu(vid_hdr->magic);
if (magic != UBI_VID_HDR_MAGIC) {
/*
* If we have read all 0xFF bytes, the VID header probably does
@@ -957,7 +957,7 @@ int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum,
}
crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
- hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc);
+ hdr_crc = be32_to_cpu(vid_hdr->hdr_crc);
if (hdr_crc != crc) {
if (verbose) {
@@ -1007,10 +1007,10 @@ int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum,
if (err)
return err > 0 ? -EINVAL: err;
- vid_hdr->magic = cpu_to_ubi32(UBI_VID_HDR_MAGIC);
+ vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC);
vid_hdr->version = UBI_VERSION;
crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
- vid_hdr->hdr_crc = cpu_to_ubi32(crc);
+ vid_hdr->hdr_crc = cpu_to_be32(crc);
err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr);
if (err)
@@ -1060,7 +1060,7 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
int err;
uint32_t magic;
- magic = ubi32_to_cpu(ec_hdr->magic);
+ magic = be32_to_cpu(ec_hdr->magic);
if (magic != UBI_EC_HDR_MAGIC) {
ubi_err("bad magic %#08x, must be %#08x",
magic, UBI_EC_HDR_MAGIC);
@@ -1105,7 +1105,7 @@ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum)
goto exit;
crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
- hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc);
+ hdr_crc = be32_to_cpu(ec_hdr->hdr_crc);
if (hdr_crc != crc) {
ubi_err("bad CRC, calculated %#08x, read %#08x", crc, hdr_crc);
ubi_err("paranoid check failed for PEB %d", pnum);
@@ -1137,7 +1137,7 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
int err;
uint32_t magic;
- magic = ubi32_to_cpu(vid_hdr->magic);
+ magic = be32_to_cpu(vid_hdr->magic);
if (magic != UBI_VID_HDR_MAGIC) {
ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x",
magic, pnum, UBI_VID_HDR_MAGIC);
@@ -1187,7 +1187,7 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum)
goto exit;
crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC);
- hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc);
+ hdr_crc = be32_to_cpu(vid_hdr->hdr_crc);
if (hdr_crc != crc) {
ubi_err("bad VID header CRC at PEB %d, calculated %#08x, "
"read %#08x", pnum, crc, hdr_crc);
@@ -1224,9 +1224,10 @@ static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
void *buf;
loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
- buf = kzalloc(len, GFP_KERNEL);
+ buf = vmalloc(len);
if (!buf)
return -ENOMEM;
+ memset(buf, 0, len);
err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
if (err && err != -EUCLEAN) {
@@ -1242,7 +1243,7 @@ static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum,
goto fail;
}
- kfree(buf);
+ vfree(buf);
return 0;
fail:
@@ -1252,7 +1253,7 @@ fail:
err = 1;
error:
ubi_dbg_dump_stack();
- kfree(buf);
+ vfree(buf);
return err;
}
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index d352c4575c3..4a458e83e4e 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -37,14 +37,9 @@ int ubi_get_device_info(int ubi_num, struct ubi_device_info *di)
{
const struct ubi_device *ubi;
- if (!try_module_get(THIS_MODULE))
- return -ENODEV;
-
if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES ||
- !ubi_devices[ubi_num]) {
- module_put(THIS_MODULE);
+ !ubi_devices[ubi_num])
return -ENODEV;
- }
ubi = ubi_devices[ubi_num];
di->ubi_num = ubi->ubi_num;
@@ -52,7 +47,6 @@ int ubi_get_device_info(int ubi_num, struct ubi_device_info *di)
di->min_io_size = ubi->min_io_size;
di->ro_mode = ubi->ro_mode;
di->cdev = MKDEV(ubi->major, 0);
- module_put(THIS_MODULE);
return 0;
}
EXPORT_SYMBOL_GPL(ubi_get_device_info);
@@ -319,9 +313,14 @@ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
offset + len > vol->usable_leb_size)
return -EINVAL;
- if (vol->vol_type == UBI_STATIC_VOLUME && lnum == vol->used_ebs - 1 &&
- offset + len > vol->last_eb_bytes)
- return -EINVAL;
+ if (vol->vol_type == UBI_STATIC_VOLUME) {
+ if (vol->used_ebs == 0)
+ /* Empty static UBI volume */
+ return 0;
+ if (lnum == vol->used_ebs - 1 &&
+ offset + len > vol->last_eb_bytes)
+ return -EINVAL;
+ }
if (vol->upd_marker)
return -EBADF;
diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c
index 38d4e6757dc..9e2338c8e2c 100644
--- a/drivers/mtd/ubi/misc.c
+++ b/drivers/mtd/ubi/misc.c
@@ -67,7 +67,7 @@ int ubi_check_volume(struct ubi_device *ubi, int vol_id)
if (vol->vol_type != UBI_STATIC_VOLUME)
return 0;
- buf = kmalloc(vol->usable_leb_size, GFP_KERNEL);
+ buf = vmalloc(vol->usable_leb_size);
if (!buf)
return -ENOMEM;
@@ -87,7 +87,7 @@ int ubi_check_volume(struct ubi_device *ubi, int vol_id)
}
}
- kfree(buf);
+ vfree(buf);
return err;
}
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 473f3200b86..94ee5493441 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -24,7 +24,7 @@
* This unit is responsible for scanning the flash media, checking UBI
* headers and providing complete information about the UBI flash image.
*
- * The scanning information is reoresented by a &struct ubi_scan_info' object.
+ * The scanning information is represented by a &struct ubi_scan_info' object.
* Information about found volumes is represented by &struct ubi_scan_volume
* objects which are kept in volume RB-tree with root at the @volumes field.
* The RB-tree is indexed by the volume ID.
@@ -55,8 +55,19 @@ static int paranoid_check_si(const struct ubi_device *ubi,
static struct ubi_ec_hdr *ech;
static struct ubi_vid_hdr *vidh;
-int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec,
- struct list_head *list)
+/**
+ * add_to_list - add physical eraseblock to a list.
+ * @si: scanning information
+ * @pnum: physical eraseblock number to add
+ * @ec: erase counter of the physical eraseblock
+ * @list: the list to add to
+ *
+ * This function adds physical eraseblock @pnum to free, erase, corrupted or
+ * alien lists. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
+ struct list_head *list)
{
struct ubi_scan_leb *seb;
@@ -121,9 +132,9 @@ static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr,
const struct ubi_scan_volume *sv, int pnum)
{
int vol_type = vid_hdr->vol_type;
- int vol_id = ubi32_to_cpu(vid_hdr->vol_id);
- int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
- int data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+ int vol_id = be32_to_cpu(vid_hdr->vol_id);
+ int used_ebs = be32_to_cpu(vid_hdr->used_ebs);
+ int data_pad = be32_to_cpu(vid_hdr->data_pad);
if (sv->leb_count != 0) {
int sv_vol_type;
@@ -189,7 +200,7 @@ static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id,
struct ubi_scan_volume *sv;
struct rb_node **p = &si->volumes.rb_node, *parent = NULL;
- ubi_assert(vol_id == ubi32_to_cpu(vid_hdr->vol_id));
+ ubi_assert(vol_id == be32_to_cpu(vid_hdr->vol_id));
/* Walk the volume RB-tree to look if this volume is already present */
while (*p) {
@@ -211,11 +222,10 @@ static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id,
return ERR_PTR(-ENOMEM);
sv->highest_lnum = sv->leb_count = 0;
- si->max_sqnum = 0;
sv->vol_id = vol_id;
sv->root = RB_ROOT;
- sv->used_ebs = ubi32_to_cpu(vid_hdr->used_ebs);
- sv->data_pad = ubi32_to_cpu(vid_hdr->data_pad);
+ sv->used_ebs = be32_to_cpu(vid_hdr->used_ebs);
+ sv->data_pad = be32_to_cpu(vid_hdr->data_pad);
sv->compat = vid_hdr->compat;
sv->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME
: UBI_STATIC_VOLUME;
@@ -257,10 +267,10 @@ static int compare_lebs(const struct ubi_device *ubi,
int len, err, second_is_newer, bitflips = 0, corrupted = 0;
uint32_t data_crc, crc;
struct ubi_vid_hdr *vidh = NULL;
- unsigned long long sqnum2 = ubi64_to_cpu(vid_hdr->sqnum);
+ unsigned long long sqnum2 = be64_to_cpu(vid_hdr->sqnum);
if (seb->sqnum == 0 && sqnum2 == 0) {
- long long abs, v1 = seb->leb_ver, v2 = ubi32_to_cpu(vid_hdr->leb_ver);
+ long long abs, v1 = seb->leb_ver, v2 = be32_to_cpu(vid_hdr->leb_ver);
/*
* UBI constantly increases the logical eraseblock version
@@ -344,8 +354,8 @@ static int compare_lebs(const struct ubi_device *ubi,
/* Read the data of the copy and check the CRC */
- len = ubi32_to_cpu(vid_hdr->data_size);
- buf = kmalloc(len, GFP_KERNEL);
+ len = be32_to_cpu(vid_hdr->data_size);
+ buf = vmalloc(len);
if (!buf) {
err = -ENOMEM;
goto out_free_vidh;
@@ -355,7 +365,7 @@ static int compare_lebs(const struct ubi_device *ubi,
if (err && err != UBI_IO_BITFLIPS)
goto out_free_buf;
- data_crc = ubi32_to_cpu(vid_hdr->data_crc);
+ data_crc = be32_to_cpu(vid_hdr->data_crc);
crc = crc32(UBI_CRC32_INIT, buf, len);
if (crc != data_crc) {
dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x",
@@ -368,7 +378,7 @@ static int compare_lebs(const struct ubi_device *ubi,
bitflips = !!err;
}
- kfree(buf);
+ vfree(buf);
ubi_free_vid_hdr(ubi, vidh);
if (second_is_newer)
@@ -379,7 +389,7 @@ static int compare_lebs(const struct ubi_device *ubi,
return second_is_newer | (bitflips << 1) | (corrupted << 2);
out_free_buf:
- kfree(buf);
+ vfree(buf);
out_free_vidh:
ubi_free_vid_hdr(ubi, vidh);
ubi_assert(err < 0);
@@ -396,8 +406,12 @@ out_free_vidh:
* @vid_hdr: the volume identifier header
* @bitflips: if bit-flips were detected when this physical eraseblock was read
*
- * This function returns zero in case of success and a negative error code in
- * case of failure.
+ * This function adds information about a used physical eraseblock to the
+ * 'used' tree of the corresponding volume. The function is rather complex
+ * because it has to handle cases when this is not the first physical
+ * eraseblock belonging to the same logical eraseblock, and the newer one has
+ * to be picked, while the older one has to be dropped. This function returns
+ * zero in case of success and a negative error code in case of failure.
*/
int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
@@ -410,10 +424,10 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
struct ubi_scan_leb *seb;
struct rb_node **p, *parent = NULL;
- vol_id = ubi32_to_cpu(vid_hdr->vol_id);
- lnum = ubi32_to_cpu(vid_hdr->lnum);
- sqnum = ubi64_to_cpu(vid_hdr->sqnum);
- leb_ver = ubi32_to_cpu(vid_hdr->leb_ver);
+ vol_id = be32_to_cpu(vid_hdr->vol_id);
+ lnum = be32_to_cpu(vid_hdr->lnum);
+ sqnum = be64_to_cpu(vid_hdr->sqnum);
+ leb_ver = be32_to_cpu(vid_hdr->leb_ver);
dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d",
pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips);
@@ -422,6 +436,9 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
if (IS_ERR(sv) < 0)
return PTR_ERR(sv);
+ if (si->max_sqnum < sqnum)
+ si->max_sqnum = sqnum;
+
/*
* Walk the RB-tree of logical eraseblocks of volume @vol_id to look
* if this is the first instance of this logical eraseblock or not.
@@ -492,11 +509,11 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
return err;
if (cmp_res & 4)
- err = ubi_scan_add_to_list(si, seb->pnum,
- seb->ec, &si->corr);
+ err = add_to_list(si, seb->pnum, seb->ec,
+ &si->corr);
else
- err = ubi_scan_add_to_list(si, seb->pnum,
- seb->ec, &si->erase);
+ err = add_to_list(si, seb->pnum, seb->ec,
+ &si->erase);
if (err)
return err;
@@ -508,7 +525,7 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
if (sv->highest_lnum == lnum)
sv->last_data_size =
- ubi32_to_cpu(vid_hdr->data_size);
+ be32_to_cpu(vid_hdr->data_size);
return 0;
} else {
@@ -517,11 +534,9 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
* previously.
*/
if (cmp_res & 4)
- return ubi_scan_add_to_list(si, pnum, ec,
- &si->corr);
+ return add_to_list(si, pnum, ec, &si->corr);
else
- return ubi_scan_add_to_list(si, pnum, ec,
- &si->erase);
+ return add_to_list(si, pnum, ec, &si->erase);
}
}
@@ -547,12 +562,9 @@ int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
if (sv->highest_lnum <= lnum) {
sv->highest_lnum = lnum;
- sv->last_data_size = ubi32_to_cpu(vid_hdr->data_size);
+ sv->last_data_size = be32_to_cpu(vid_hdr->data_size);
}
- if (si->max_sqnum < sqnum)
- si->max_sqnum = sqnum;
-
sv->leb_count += 1;
rb_link_node(&seb->u.rb, parent, p);
rb_insert_color(&seb->u.rb, &sv->root);
@@ -674,7 +686,7 @@ int ubi_scan_erase_peb(const struct ubi_device *ubi,
return -EINVAL;
}
- ec_hdr->ec = cpu_to_ubi64(ec);
+ ec_hdr->ec = cpu_to_be64(ec);
err = ubi_io_sync_erase(ubi, pnum, 0);
if (err < 0)
@@ -754,7 +766,7 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(const struct ubi_device *ubi,
* @si: scanning information
* @pnum: the physical eraseblock number
*
- * This function returns a zero if the physical eraseblock was succesfully
+ * This function returns a zero if the physical eraseblock was successfully
* handled and a negative error code in case of failure.
*/
static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum)
@@ -783,8 +795,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
else if (err == UBI_IO_BITFLIPS)
bitflips = 1;
else if (err == UBI_IO_PEB_EMPTY)
- return ubi_scan_add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC,
- &si->erase);
+ return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase);
else if (err == UBI_IO_BAD_EC_HDR) {
/*
* We have to also look at the VID header, possibly it is not
@@ -806,7 +817,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
return -EINVAL;
}
- ec = ubi64_to_cpu(ech->ec);
+ ec = be64_to_cpu(ech->ec);
if (ec > UBI_MAX_ERASECOUNTER) {
/*
* Erase counter overflow. The EC headers have 64 bits
@@ -832,28 +843,28 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
else if (err == UBI_IO_BAD_VID_HDR ||
(err == UBI_IO_PEB_FREE && ec_corr)) {
/* VID header is corrupted */
- err = ubi_scan_add_to_list(si, pnum, ec, &si->corr);
+ err = add_to_list(si, pnum, ec, &si->corr);
if (err)
return err;
goto adjust_mean_ec;
} else if (err == UBI_IO_PEB_FREE) {
/* No VID header - the physical eraseblock is free */
- err = ubi_scan_add_to_list(si, pnum, ec, &si->free);
+ err = add_to_list(si, pnum, ec, &si->free);
if (err)
return err;
goto adjust_mean_ec;
}
- vol_id = ubi32_to_cpu(vidh->vol_id);
+ vol_id = be32_to_cpu(vidh->vol_id);
if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOL_ID) {
- int lnum = ubi32_to_cpu(vidh->lnum);
+ int lnum = be32_to_cpu(vidh->lnum);
/* Unsupported internal volume */
switch (vidh->compat) {
case UBI_COMPAT_DELETE:
ubi_msg("\"delete\" compatible internal volume %d:%d"
" found, remove it", vol_id, lnum);
- err = ubi_scan_add_to_list(si, pnum, ec, &si->corr);
+ err = add_to_list(si, pnum, ec, &si->corr);
if (err)
return err;
break;
@@ -868,7 +879,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum
case UBI_COMPAT_PRESERVE:
ubi_msg("\"preserve\" compatible internal volume %d:%d"
" found", vol_id, lnum);
- err = ubi_scan_add_to_list(si, pnum, ec, &si->alien);
+ err = add_to_list(si, pnum, ec, &si->alien);
if (err)
return err;
si->alien_peb_count += 1;
@@ -1109,7 +1120,7 @@ static int paranoid_check_si(const struct ubi_device *ubi,
uint8_t *buf;
/*
- * At first, check that scanning information is ok.
+ * At first, check that scanning information is OK.
*/
ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
int leb_count = 0;
@@ -1249,12 +1260,12 @@ static int paranoid_check_si(const struct ubi_device *ubi,
goto bad_vid_hdr;
}
- if (seb->sqnum != ubi64_to_cpu(vidh->sqnum)) {
+ if (seb->sqnum != be64_to_cpu(vidh->sqnum)) {
ubi_err("bad sqnum %llu", seb->sqnum);
goto bad_vid_hdr;
}
- if (sv->vol_id != ubi32_to_cpu(vidh->vol_id)) {
+ if (sv->vol_id != be32_to_cpu(vidh->vol_id)) {
ubi_err("bad vol_id %d", sv->vol_id);
goto bad_vid_hdr;
}
@@ -1264,22 +1275,22 @@ static int paranoid_check_si(const struct ubi_device *ubi,
goto bad_vid_hdr;
}
- if (seb->lnum != ubi32_to_cpu(vidh->lnum)) {
+ if (seb->lnum != be32_to_cpu(vidh->lnum)) {
ubi_err("bad lnum %d", seb->lnum);
goto bad_vid_hdr;
}
- if (sv->used_ebs != ubi32_to_cpu(vidh->used_ebs)) {
+ if (sv->used_ebs != be32_to_cpu(vidh->used_ebs)) {
ubi_err("bad used_ebs %d", sv->used_ebs);
goto bad_vid_hdr;
}
- if (sv->data_pad != ubi32_to_cpu(vidh->data_pad)) {
+ if (sv->data_pad != be32_to_cpu(vidh->data_pad)) {
ubi_err("bad data_pad %d", sv->data_pad);
goto bad_vid_hdr;
}
- if (seb->leb_ver != ubi32_to_cpu(vidh->leb_ver)) {
+ if (seb->leb_ver != be32_to_cpu(vidh->leb_ver)) {
ubi_err("bad leb_ver %u", seb->leb_ver);
goto bad_vid_hdr;
}
@@ -1288,12 +1299,12 @@ static int paranoid_check_si(const struct ubi_device *ubi,
if (!last_seb)
continue;
- if (sv->highest_lnum != ubi32_to_cpu(vidh->lnum)) {
+ if (sv->highest_lnum != be32_to_cpu(vidh->lnum)) {
ubi_err("bad highest_lnum %d", sv->highest_lnum);
goto bad_vid_hdr;
}
- if (sv->last_data_size != ubi32_to_cpu(vidh->data_size)) {
+ if (sv->last_data_size != be32_to_cpu(vidh->data_size)) {
ubi_err("bad last_data_size %d", sv->last_data_size);
goto bad_vid_hdr;
}
@@ -1310,8 +1321,10 @@ static int paranoid_check_si(const struct ubi_device *ubi,
memset(buf, 1, ubi->peb_count);
for (pnum = 0; pnum < ubi->peb_count; pnum++) {
err = ubi_io_is_bad(ubi, pnum);
- if (err < 0)
+ if (err < 0) {
+ kfree(buf);
return err;
+ }
else if (err)
buf[pnum] = 0;
}
diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h
index 3949f6192c7..140e82e2653 100644
--- a/drivers/mtd/ubi/scan.h
+++ b/drivers/mtd/ubi/scan.h
@@ -147,8 +147,6 @@ static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv,
list_add_tail(&seb->u.list, list);
}
-int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec,
- struct list_head *list);
int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si,
int pnum, int ec, const struct ubi_vid_hdr *vid_hdr,
int bitflips);
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index feb647f108f..5959f91be24 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -35,6 +35,7 @@
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/string.h>
+#include <linux/vmalloc.h>
#include <linux/mtd/mtd.h>
#include <mtd/ubi-header.h>
@@ -374,9 +375,11 @@ void ubi_calculate_reserved(struct ubi_device *ubi);
#ifdef CONFIG_MTD_UBI_GLUEBI
int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol);
int ubi_destroy_gluebi(struct ubi_volume *vol);
+void ubi_gluebi_updated(struct ubi_volume *vol);
#else
#define ubi_create_gluebi(ubi, vol) 0
#define ubi_destroy_gluebi(vol) 0
+#define ubi_gluebi_updated(vol)
#endif
/* eba.c */
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 8925b977e3d..0efc586a832 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -150,7 +150,7 @@ int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes)
vol->updating = 0;
}
- vol->upd_buf = kmalloc(ubi->leb_size, GFP_KERNEL);
+ vol->upd_buf = vmalloc(ubi->leb_size);
if (!vol->upd_buf)
return -ENOMEM;
@@ -339,7 +339,7 @@ int ubi_more_update_data(struct ubi_device *ubi, int vol_id,
err = ubi_wl_flush(ubi);
if (err == 0) {
err = to_write;
- kfree(vol->upd_buf);
+ vfree(vol->upd_buf);
vol->updating = 0;
}
}
diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 622d0d18952..ea0d5c825ab 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -228,7 +228,7 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
for (i = 0; i < ubi->vtbl_slots; i++)
if (ubi->volumes[i] &&
ubi->volumes[i]->name_len == req->name_len &&
- strcmp(ubi->volumes[i]->name, req->name) == 0) {
+ !strcmp(ubi->volumes[i]->name, req->name)) {
dbg_err("volume \"%s\" exists (ID %d)", req->name, i);
goto out_unlock;
}
@@ -243,7 +243,6 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
/* Reserve physical eraseblocks */
if (vol->reserved_pebs > ubi->avail_pebs) {
dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
- spin_unlock(&ubi->volumes_lock);
err = -ENOSPC;
goto out_unlock;
}
@@ -281,7 +280,8 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
vol->used_ebs = vol->reserved_pebs;
vol->last_eb_bytes = vol->usable_leb_size;
- vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+ vol->used_bytes =
+ (long long)vol->used_ebs * vol->usable_leb_size;
} else {
bytes = vol->used_bytes;
vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size);
@@ -320,10 +320,10 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
/* Fill volume table record */
memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record));
- vtbl_rec.reserved_pebs = cpu_to_ubi32(vol->reserved_pebs);
- vtbl_rec.alignment = cpu_to_ubi32(vol->alignment);
- vtbl_rec.data_pad = cpu_to_ubi32(vol->data_pad);
- vtbl_rec.name_len = cpu_to_ubi16(vol->name_len);
+ vtbl_rec.reserved_pebs = cpu_to_be32(vol->reserved_pebs);
+ vtbl_rec.alignment = cpu_to_be32(vol->alignment);
+ vtbl_rec.data_pad = cpu_to_be32(vol->data_pad);
+ vtbl_rec.name_len = cpu_to_be16(vol->name_len);
if (vol->vol_type == UBI_DYNAMIC_VOLUME)
vtbl_rec.vol_type = UBI_VID_DYNAMIC;
else
@@ -352,6 +352,7 @@ out_acc:
spin_lock(&ubi->volumes_lock);
ubi->rsvd_pebs -= vol->reserved_pebs;
ubi->avail_pebs += vol->reserved_pebs;
+ ubi->volumes[vol_id] = NULL;
out_unlock:
spin_unlock(&ubi->volumes_lock);
kfree(vol);
@@ -368,6 +369,7 @@ out_sysfs:
spin_lock(&ubi->volumes_lock);
ubi->rsvd_pebs -= vol->reserved_pebs;
ubi->avail_pebs += vol->reserved_pebs;
+ ubi->volumes[vol_id] = NULL;
spin_unlock(&ubi->volumes_lock);
volume_sysfs_close(vol);
return err;
@@ -503,7 +505,7 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
/* Change volume table record */
memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record));
- vtbl_rec.reserved_pebs = cpu_to_ubi32(reserved_pebs);
+ vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs);
err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec);
if (err)
goto out_acc;
@@ -537,7 +539,8 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
vol->used_ebs = reserved_pebs;
vol->last_eb_bytes = vol->usable_leb_size;
- vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+ vol->used_bytes =
+ (long long)vol->used_ebs * vol->usable_leb_size;
}
paranoid_check_volumes(ubi);
@@ -643,21 +646,33 @@ void ubi_free_volume(struct ubi_device *ubi, int vol_id)
* @ubi: UBI device description object
* @vol_id: volume ID
*/
-static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
+static void paranoid_check_volume(struct ubi_device *ubi, int vol_id)
{
int idx = vol_id2idx(ubi, vol_id);
int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker;
- const struct ubi_volume *vol = ubi->volumes[idx];
+ const struct ubi_volume *vol;
long long n;
const char *name;
- reserved_pebs = ubi32_to_cpu(ubi->vtbl[vol_id].reserved_pebs);
+ spin_lock(&ubi->volumes_lock);
+ reserved_pebs = be32_to_cpu(ubi->vtbl[vol_id].reserved_pebs);
+ vol = ubi->volumes[idx];
if (!vol) {
if (reserved_pebs) {
ubi_err("no volume info, but volume exists");
goto fail;
}
+ spin_unlock(&ubi->volumes_lock);
+ return;
+ }
+
+ if (vol->exclusive) {
+ /*
+ * The volume may be being created at the moment, do not check
+ * it (e.g., it may be in the middle of ubi_create_volume().
+ */
+ spin_unlock(&ubi->volumes_lock);
return;
}
@@ -726,7 +741,7 @@ static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
goto fail;
}
- n = vol->used_ebs * vol->usable_leb_size;
+ n = (long long)vol->used_ebs * vol->usable_leb_size;
if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
if (vol->corrupted != 0) {
ubi_err("corrupted dynamic volume");
@@ -765,9 +780,9 @@ static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
}
}
- alignment = ubi32_to_cpu(ubi->vtbl[vol_id].alignment);
- data_pad = ubi32_to_cpu(ubi->vtbl[vol_id].data_pad);
- name_len = ubi16_to_cpu(ubi->vtbl[vol_id].name_len);
+ alignment = be32_to_cpu(ubi->vtbl[vol_id].alignment);
+ data_pad = be32_to_cpu(ubi->vtbl[vol_id].data_pad);
+ name_len = be16_to_cpu(ubi->vtbl[vol_id].name_len);
upd_marker = ubi->vtbl[vol_id].upd_marker;
name = &ubi->vtbl[vol_id].name[0];
if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC)
@@ -782,12 +797,14 @@ static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id)
goto fail;
}
+ spin_unlock(&ubi->volumes_lock);
return;
fail:
- ubi_err("paranoid check failed");
+ ubi_err("paranoid check failed for volume %d", vol_id);
ubi_dbg_dump_vol_info(vol);
ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
+ spin_unlock(&ubi->volumes_lock);
BUG();
}
@@ -800,10 +817,8 @@ static void paranoid_check_volumes(struct ubi_device *ubi)
int i;
mutex_lock(&ubi->vtbl_mutex);
- spin_lock(&ubi->volumes_lock);
for (i = 0; i < ubi->vtbl_slots; i++)
paranoid_check_volume(ubi, i);
- spin_unlock(&ubi->volumes_lock);
mutex_unlock(&ubi->vtbl_mutex);
}
#endif
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index b6fd6bbd941..bc5df50813d 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -93,12 +93,9 @@ int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
vtbl_rec = &empty_vtbl_record;
else {
crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC);
- vtbl_rec->crc = cpu_to_ubi32(crc);
+ vtbl_rec->crc = cpu_to_be32(crc);
}
- dbg_msg("change record %d", idx);
- ubi_dbg_dump_vtbl_record(vtbl_rec, idx);
-
mutex_lock(&ubi->vtbl_mutex);
memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record));
for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
@@ -141,18 +138,18 @@ static int vtbl_check(const struct ubi_device *ubi,
for (i = 0; i < ubi->vtbl_slots; i++) {
cond_resched();
- reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs);
- alignment = ubi32_to_cpu(vtbl[i].alignment);
- data_pad = ubi32_to_cpu(vtbl[i].data_pad);
+ reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
+ alignment = be32_to_cpu(vtbl[i].alignment);
+ data_pad = be32_to_cpu(vtbl[i].data_pad);
upd_marker = vtbl[i].upd_marker;
vol_type = vtbl[i].vol_type;
- name_len = ubi16_to_cpu(vtbl[i].name_len);
+ name_len = be16_to_cpu(vtbl[i].name_len);
name = &vtbl[i].name[0];
crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC);
- if (ubi32_to_cpu(vtbl[i].crc) != crc) {
+ if (be32_to_cpu(vtbl[i].crc) != crc) {
ubi_err("bad CRC at record %u: %#08x, not %#08x",
- i, crc, ubi32_to_cpu(vtbl[i].crc));
+ i, crc, be32_to_cpu(vtbl[i].crc));
ubi_dbg_dump_vtbl_record(&vtbl[i], i);
return 1;
}
@@ -225,8 +222,8 @@ static int vtbl_check(const struct ubi_device *ubi,
/* Checks that all names are unique */
for (i = 0; i < ubi->vtbl_slots - 1; i++) {
for (n = i + 1; n < ubi->vtbl_slots; n++) {
- int len1 = ubi16_to_cpu(vtbl[i].name_len);
- int len2 = ubi16_to_cpu(vtbl[n].name_len);
+ int len1 = be16_to_cpu(vtbl[i].name_len);
+ int len2 = be16_to_cpu(vtbl[n].name_len);
if (len1 > 0 && len1 == len2 &&
!strncmp(vtbl[i].name, vtbl[n].name, len1)) {
@@ -288,13 +285,13 @@ retry:
}
vid_hdr->vol_type = UBI_VID_DYNAMIC;
- vid_hdr->vol_id = cpu_to_ubi32(UBI_LAYOUT_VOL_ID);
+ vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOL_ID);
vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT;
vid_hdr->data_size = vid_hdr->used_ebs =
- vid_hdr->data_pad = cpu_to_ubi32(0);
- vid_hdr->lnum = cpu_to_ubi32(copy);
- vid_hdr->sqnum = cpu_to_ubi64(++si->max_sqnum);
- vid_hdr->leb_ver = cpu_to_ubi32(old_seb ? old_seb->leb_ver + 1: 0);
+ vid_hdr->data_pad = cpu_to_be32(0);
+ vid_hdr->lnum = cpu_to_be32(copy);
+ vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
+ vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
/* The EC header is already there, write the VID header */
err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
@@ -317,14 +314,15 @@ retry:
return err;
write_error:
- kfree(new_seb);
- /* May be this physical eraseblock went bad, try to pick another one */
- if (++tries <= 5) {
- err = ubi_scan_add_to_list(si, new_seb->pnum, new_seb->ec,
- &si->corr);
- if (!err)
- goto retry;
+ if (err == -EIO && ++tries <= 5) {
+ /*
+ * Probably this physical eraseblock went bad, try to pick
+ * another one.
+ */
+ list_add_tail(&new_seb->u.list, &si->corr);
+ goto retry;
}
+ kfree(new_seb);
out_free:
ubi_free_vid_hdr(ubi, vid_hdr);
return err;
@@ -380,11 +378,12 @@ static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi,
/* Read both LEB 0 and LEB 1 into memory */
ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
- leb[seb->lnum] = kzalloc(ubi->vtbl_size, GFP_KERNEL);
+ leb[seb->lnum] = vmalloc(ubi->vtbl_size);
if (!leb[seb->lnum]) {
err = -ENOMEM;
goto out_free;
}
+ memset(leb[seb->lnum], 0, ubi->vtbl_size);
err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
ubi->vtbl_size);
@@ -415,7 +414,7 @@ static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi,
}
/* Both LEB 1 and LEB 2 are OK and consistent */
- kfree(leb[1]);
+ vfree(leb[1]);
return leb[0];
} else {
/* LEB 0 is corrupted or does not exist */
@@ -436,13 +435,13 @@ static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi,
goto out_free;
ubi_msg("volume table was restored");
- kfree(leb[0]);
+ vfree(leb[0]);
return leb[1];
}
out_free:
- kfree(leb[0]);
- kfree(leb[1]);
+ vfree(leb[0]);
+ vfree(leb[1]);
return ERR_PTR(err);
}
@@ -460,9 +459,10 @@ static struct ubi_vtbl_record *create_empty_lvol(const struct ubi_device *ubi,
int i;
struct ubi_vtbl_record *vtbl;
- vtbl = kzalloc(ubi->vtbl_size, GFP_KERNEL);
+ vtbl = vmalloc(ubi->vtbl_size);
if (!vtbl)
return ERR_PTR(-ENOMEM);
+ memset(vtbl, 0, ubi->vtbl_size);
for (i = 0; i < ubi->vtbl_slots; i++)
memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE);
@@ -472,7 +472,7 @@ static struct ubi_vtbl_record *create_empty_lvol(const struct ubi_device *ubi,
err = create_vtbl(ubi, si, i, vtbl);
if (err) {
- kfree(vtbl);
+ vfree(vtbl);
return ERR_PTR(err);
}
}
@@ -500,19 +500,19 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
for (i = 0; i < ubi->vtbl_slots; i++) {
cond_resched();
- if (ubi32_to_cpu(vtbl[i].reserved_pebs) == 0)
+ if (be32_to_cpu(vtbl[i].reserved_pebs) == 0)
continue; /* Empty record */
vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
if (!vol)
return -ENOMEM;
- vol->reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs);
- vol->alignment = ubi32_to_cpu(vtbl[i].alignment);
- vol->data_pad = ubi32_to_cpu(vtbl[i].data_pad);
+ vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
+ vol->alignment = be32_to_cpu(vtbl[i].alignment);
+ vol->data_pad = be32_to_cpu(vtbl[i].data_pad);
vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ?
UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
- vol->name_len = ubi16_to_cpu(vtbl[i].name_len);
+ vol->name_len = be16_to_cpu(vtbl[i].name_len);
vol->usable_leb_size = ubi->leb_size - vol->data_pad;
memcpy(vol->name, vtbl[i].name, vol->name_len);
vol->name[vol->name_len] = '\0';
@@ -531,7 +531,8 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
vol->used_ebs = vol->reserved_pebs;
vol->last_eb_bytes = vol->usable_leb_size;
- vol->used_bytes = vol->used_ebs * vol->usable_leb_size;
+ vol->used_bytes =
+ (long long)vol->used_ebs * vol->usable_leb_size;
continue;
}
@@ -561,7 +562,8 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
}
vol->used_ebs = sv->used_ebs;
- vol->used_bytes = (vol->used_ebs - 1) * vol->usable_leb_size;
+ vol->used_bytes =
+ (long long)(vol->used_ebs - 1) * vol->usable_leb_size;
vol->used_bytes += sv->last_data_size;
vol->last_eb_bytes = sv->last_data_size;
}
@@ -578,7 +580,8 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
vol->usable_leb_size = ubi->leb_size;
vol->used_ebs = vol->reserved_pebs;
vol->last_eb_bytes = vol->reserved_pebs;
- vol->used_bytes = vol->used_ebs * (ubi->leb_size - vol->data_pad);
+ vol->used_bytes =
+ (long long)vol->used_ebs * (ubi->leb_size - vol->data_pad);
vol->vol_id = UBI_LAYOUT_VOL_ID;
ubi_assert(!ubi->volumes[i]);
@@ -718,7 +721,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
int i, err;
struct ubi_scan_volume *sv;
- empty_vtbl_record.crc = cpu_to_ubi32(0xf116c36b);
+ empty_vtbl_record.crc = cpu_to_be32(0xf116c36b);
/*
* The number of supported volumes is limited by the eraseblock size
@@ -783,7 +786,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
return 0;
out_free:
- kfree(ubi->vtbl);
+ vfree(ubi->vtbl);
for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
if (ubi->volumes[i]) {
kfree(ubi->volumes[i]);
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index ab2174a56bc..9de95376209 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -667,7 +667,7 @@ static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int tortur
dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec);
- ec_hdr->ec = cpu_to_ubi64(ec);
+ ec_hdr->ec = cpu_to_be64(ec);
err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr);
if (err)
@@ -1060,9 +1060,8 @@ out_unlock:
static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
int cancel)
{
- int err;
struct ubi_wl_entry *e = wl_wrk->e;
- int pnum = e->pnum;
+ int pnum = e->pnum, err, need;
if (cancel) {
dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
@@ -1097,62 +1096,70 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
kfree(wl_wrk);
kmem_cache_free(wl_entries_slab, e);
- if (err != -EIO) {
+ if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
+ err == -EBUSY) {
+ int err1;
+
+ /* Re-schedule the LEB for erasure */
+ err1 = schedule_erase(ubi, e, 0);
+ if (err1) {
+ err = err1;
+ goto out_ro;
+ }
+ return err;
+ } else if (err != -EIO) {
/*
* If this is not %-EIO, we have no idea what to do. Scheduling
* this physical eraseblock for erasure again would cause
* errors again and again. Well, lets switch to RO mode.
*/
- ubi_ro_mode(ubi);
- return err;
+ goto out_ro;
}
/* It is %-EIO, the PEB went bad */
if (!ubi->bad_allowed) {
ubi_err("bad physical eraseblock %d detected", pnum);
- ubi_ro_mode(ubi);
- err = -EIO;
- } else {
- int need;
-
- spin_lock(&ubi->volumes_lock);
- need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1;
- if (need > 0) {
- need = ubi->avail_pebs >= need ? need : ubi->avail_pebs;
- ubi->avail_pebs -= need;
- ubi->rsvd_pebs += need;
- ubi->beb_rsvd_pebs += need;
- if (need > 0)
- ubi_msg("reserve more %d PEBs", need);
- }
+ goto out_ro;
+ }
- if (ubi->beb_rsvd_pebs == 0) {
- spin_unlock(&ubi->volumes_lock);
- ubi_err("no reserved physical eraseblocks");
- ubi_ro_mode(ubi);
- return -EIO;
- }
+ spin_lock(&ubi->volumes_lock);
+ need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1;
+ if (need > 0) {
+ need = ubi->avail_pebs >= need ? need : ubi->avail_pebs;
+ ubi->avail_pebs -= need;
+ ubi->rsvd_pebs += need;
+ ubi->beb_rsvd_pebs += need;
+ if (need > 0)
+ ubi_msg("reserve more %d PEBs", need);
+ }
+ if (ubi->beb_rsvd_pebs == 0) {
spin_unlock(&ubi->volumes_lock);
- ubi_msg("mark PEB %d as bad", pnum);
+ ubi_err("no reserved physical eraseblocks");
+ goto out_ro;
+ }
- err = ubi_io_mark_bad(ubi, pnum);
- if (err) {
- ubi_ro_mode(ubi);
- return err;
- }
+ spin_unlock(&ubi->volumes_lock);
+ ubi_msg("mark PEB %d as bad", pnum);
- spin_lock(&ubi->volumes_lock);
- ubi->beb_rsvd_pebs -= 1;
- ubi->bad_peb_count += 1;
- ubi->good_peb_count -= 1;
- ubi_calculate_reserved(ubi);
- if (ubi->beb_rsvd_pebs == 0)
- ubi_warn("last PEB from the reserved pool was used");
- spin_unlock(&ubi->volumes_lock);
- }
+ err = ubi_io_mark_bad(ubi, pnum);
+ if (err)
+ goto out_ro;
+
+ spin_lock(&ubi->volumes_lock);
+ ubi->beb_rsvd_pebs -= 1;
+ ubi->bad_peb_count += 1;
+ ubi->good_peb_count -= 1;
+ ubi_calculate_reserved(ubi);
+ if (ubi->beb_rsvd_pebs == 0)
+ ubi_warn("last PEB from the reserved pool was used");
+ spin_unlock(&ubi->volumes_lock);
+
+ return err;
+out_ro:
+ ubi_ro_mode(ubi);
return err;
}
@@ -1634,7 +1641,7 @@ static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec)
goto out_free;
}
- read_ec = ubi64_to_cpu(ec_hdr->ec);
+ read_ec = be64_to_cpu(ec_hdr->ec);
if (ec != read_ec) {
ubi_err("paranoid check failed for PEB %d", pnum);
ubi_err("read EC is %lld, should be %d", read_ec, ec);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 43d03178064..5fb659f8b20 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2486,6 +2486,18 @@ source "drivers/atm/Kconfig"
source "drivers/s390/net/Kconfig"
+config XEN_NETDEV_FRONTEND
+ tristate "Xen network device frontend driver"
+ depends on XEN
+ default y
+ help
+ The network device frontend driver allows the kernel to
+ access network devices exported exported by a virtual
+ machine containing a physical network device driver. The
+ frontend driver is intended for unprivileged guest domains;
+ if you are compiling a kernel for a Xen guest, you almost
+ certainly want to enable this.
+
config ISERIES_VETH
tristate "iSeries Virtual Ethernet driver support"
depends on PPC_ISERIES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index eb4167622a6..0e286ab8855 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -127,6 +127,8 @@ obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2tp.o
obj-$(CONFIG_SLIP) += slip.o
obj-$(CONFIG_SLHC) += slhc.o
+obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
+
obj-$(CONFIG_DUMMY) += dummy.o
obj-$(CONFIG_IFB) += ifb.o
obj-$(CONFIG_MACVLAN) += macvlan.o
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d23861c8658..a729da061bb 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -54,8 +54,8 @@
#define DRV_MODULE_NAME "bnx2"
#define PFX DRV_MODULE_NAME ": "
-#define DRV_MODULE_VERSION "1.6.2"
-#define DRV_MODULE_RELDATE "July 6, 2007"
+#define DRV_MODULE_VERSION "1.6.3"
+#define DRV_MODULE_RELDATE "July 16, 2007"
#define RUN_AT(x) (jiffies + (x))
@@ -126,91 +126,102 @@ static struct pci_device_id bnx2_pci_tbl[] = {
static struct flash_spec flash_table[] =
{
+#define BUFFERED_FLAGS (BNX2_NV_BUFFERED | BNX2_NV_TRANSLATE)
+#define NONBUFFERED_FLAGS (BNX2_NV_WREN)
/* Slow EEPROM */
{0x00000000, 0x40830380, 0x009f0081, 0xa184a053, 0xaf000400,
- 1, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
+ BUFFERED_FLAGS, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
SEEPROM_BYTE_ADDR_MASK, SEEPROM_TOTAL_SIZE,
"EEPROM - slow"},
/* Expansion entry 0001 */
{0x08000002, 0x4b808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
"Entry 0001"},
/* Saifun SA25F010 (non-buffered flash) */
/* strap, cfg1, & write1 need updates */
{0x04000001, 0x47808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE*2,
"Non-buffered flash (128kB)"},
/* Saifun SA25F020 (non-buffered flash) */
/* strap, cfg1, & write1 need updates */
{0x0c000003, 0x4f808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE*4,
"Non-buffered flash (256kB)"},
/* Expansion entry 0100 */
{0x11000000, 0x53808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
"Entry 0100"},
/* Entry 0101: ST M45PE10 (non-buffered flash, TetonII B0) */
{0x19000002, 0x5b808201, 0x000500db, 0x03840253, 0xaf020406,
- 0, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
ST_MICRO_FLASH_BYTE_ADDR_MASK, ST_MICRO_FLASH_BASE_TOTAL_SIZE*2,
"Entry 0101: ST M45PE10 (128kB non-bufferred)"},
/* Entry 0110: ST M45PE20 (non-buffered flash)*/
{0x15000001, 0x57808201, 0x000500db, 0x03840253, 0xaf020406,
- 0, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
ST_MICRO_FLASH_BYTE_ADDR_MASK, ST_MICRO_FLASH_BASE_TOTAL_SIZE*4,
"Entry 0110: ST M45PE20 (256kB non-bufferred)"},
/* Saifun SA25F005 (non-buffered flash) */
/* strap, cfg1, & write1 need updates */
{0x1d000003, 0x5f808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE,
"Non-buffered flash (64kB)"},
/* Fast EEPROM */
{0x22000000, 0x62808380, 0x009f0081, 0xa184a053, 0xaf000400,
- 1, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
+ BUFFERED_FLAGS, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
SEEPROM_BYTE_ADDR_MASK, SEEPROM_TOTAL_SIZE,
"EEPROM - fast"},
/* Expansion entry 1001 */
{0x2a000002, 0x6b808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
"Entry 1001"},
/* Expansion entry 1010 */
{0x26000001, 0x67808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
"Entry 1010"},
/* ATMEL AT45DB011B (buffered flash) */
{0x2e000003, 0x6e808273, 0x00570081, 0x68848353, 0xaf000400,
- 1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+ BUFFERED_FLAGS, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
BUFFERED_FLASH_BYTE_ADDR_MASK, BUFFERED_FLASH_TOTAL_SIZE,
"Buffered flash (128kB)"},
/* Expansion entry 1100 */
{0x33000000, 0x73808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
"Entry 1100"},
/* Expansion entry 1101 */
{0x3b000002, 0x7b808201, 0x00050081, 0x03840253, 0xaf020406,
- 0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+ NONBUFFERED_FLAGS, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
"Entry 1101"},
/* Ateml Expansion entry 1110 */
{0x37000001, 0x76808273, 0x00570081, 0x68848353, 0xaf000400,
- 1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+ BUFFERED_FLAGS, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
BUFFERED_FLASH_BYTE_ADDR_MASK, 0,
"Entry 1110 (Atmel)"},
/* ATMEL AT45DB021B (buffered flash) */
{0x3f000003, 0x7e808273, 0x00570081, 0x68848353, 0xaf000400,
- 1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+ BUFFERED_FLAGS, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
BUFFERED_FLASH_BYTE_ADDR_MASK, BUFFERED_FLASH_TOTAL_SIZE*2,
"Buffered flash (256kB)"},
};
+static struct flash_spec flash_5709 = {
+ .flags = BNX2_NV_BUFFERED,
+ .page_bits = BCM5709_FLASH_PAGE_BITS,
+ .page_size = BCM5709_FLASH_PAGE_SIZE,
+ .addr_mask = BCM5709_FLASH_BYTE_ADDR_MASK,
+ .total_size = BUFFERED_FLASH_TOTAL_SIZE*2,
+ .name = "5709 Buffered flash (256kB)",
+};
+
MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl);
static inline u32 bnx2_tx_avail(struct bnx2 *bp)
@@ -3289,7 +3300,7 @@ bnx2_enable_nvram_write(struct bnx2 *bp)
val = REG_RD(bp, BNX2_MISC_CFG);
REG_WR(bp, BNX2_MISC_CFG, val | BNX2_MISC_CFG_NVM_WR_EN_PCI);
- if (!bp->flash_info->buffered) {
+ if (bp->flash_info->flags & BNX2_NV_WREN) {
int j;
REG_WR(bp, BNX2_NVM_COMMAND, BNX2_NVM_COMMAND_DONE);
@@ -3349,7 +3360,7 @@ bnx2_nvram_erase_page(struct bnx2 *bp, u32 offset)
u32 cmd;
int j;
- if (bp->flash_info->buffered)
+ if (bp->flash_info->flags & BNX2_NV_BUFFERED)
/* Buffered flash, no erase needed */
return 0;
@@ -3392,8 +3403,8 @@ bnx2_nvram_read_dword(struct bnx2 *bp, u32 offset, u8 *ret_val, u32 cmd_flags)
/* Build the command word. */
cmd = BNX2_NVM_COMMAND_DOIT | cmd_flags;
- /* Calculate an offset of a buffered flash. */
- if (bp->flash_info->buffered) {
+ /* Calculate an offset of a buffered flash, not needed for 5709. */
+ if (bp->flash_info->flags & BNX2_NV_TRANSLATE) {
offset = ((offset / bp->flash_info->page_size) <<
bp->flash_info->page_bits) +
(offset % bp->flash_info->page_size);
@@ -3439,8 +3450,8 @@ bnx2_nvram_write_dword(struct bnx2 *bp, u32 offset, u8 *val, u32 cmd_flags)
/* Build the command word. */
cmd = BNX2_NVM_COMMAND_DOIT | BNX2_NVM_COMMAND_WR | cmd_flags;
- /* Calculate an offset of a buffered flash. */
- if (bp->flash_info->buffered) {
+ /* Calculate an offset of a buffered flash, not needed for 5709. */
+ if (bp->flash_info->flags & BNX2_NV_TRANSLATE) {
offset = ((offset / bp->flash_info->page_size) <<
bp->flash_info->page_bits) +
(offset % bp->flash_info->page_size);
@@ -3478,15 +3489,19 @@ static int
bnx2_init_nvram(struct bnx2 *bp)
{
u32 val;
- int j, entry_count, rc;
+ int j, entry_count, rc = 0;
struct flash_spec *flash;
+ if (CHIP_NUM(bp) == CHIP_NUM_5709) {
+ bp->flash_info = &flash_5709;
+ goto get_flash_size;
+ }
+
/* Determine the selected interface. */
val = REG_RD(bp, BNX2_NVM_CFG1);
entry_count = sizeof(flash_table) / sizeof(struct flash_spec);
- rc = 0;
if (val & 0x40000000) {
/* Flash interface has been reconfigured */
@@ -3542,6 +3557,7 @@ bnx2_init_nvram(struct bnx2 *bp)
return -ENODEV;
}
+get_flash_size:
val = REG_RD_IND(bp, bp->shmem_base + BNX2_SHARED_HW_CFG_CONFIG2);
val &= BNX2_SHARED_HW_CFG2_NVM_SIZE_MASK;
if (val)
@@ -3706,7 +3722,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
buf = align_buf;
}
- if (bp->flash_info->buffered == 0) {
+ if (!(bp->flash_info->flags & BNX2_NV_BUFFERED)) {
flash_buffer = kmalloc(264, GFP_KERNEL);
if (flash_buffer == NULL) {
rc = -ENOMEM;
@@ -3739,7 +3755,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
bnx2_enable_nvram_access(bp);
cmd_flags = BNX2_NVM_COMMAND_FIRST;
- if (bp->flash_info->buffered == 0) {
+ if (!(bp->flash_info->flags & BNX2_NV_BUFFERED)) {
int j;
/* Read the whole page into the buffer
@@ -3767,7 +3783,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
/* Loop to write back the buffer data from page_start to
* data_start */
i = 0;
- if (bp->flash_info->buffered == 0) {
+ if (!(bp->flash_info->flags & BNX2_NV_BUFFERED)) {
/* Erase the page */
if ((rc = bnx2_nvram_erase_page(bp, page_start)) != 0)
goto nvram_write_end;
@@ -3791,7 +3807,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
/* Loop to write the new data from data_start to data_end */
for (addr = data_start; addr < data_end; addr += 4, i += 4) {
if ((addr == page_end - 4) ||
- ((bp->flash_info->buffered) &&
+ ((bp->flash_info->flags & BNX2_NV_BUFFERED) &&
(addr == data_end - 4))) {
cmd_flags |= BNX2_NVM_COMMAND_LAST;
@@ -3808,7 +3824,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
/* Loop to write back the buffer data from data_end
* to page_end */
- if (bp->flash_info->buffered == 0) {
+ if (!(bp->flash_info->flags & BNX2_NV_BUFFERED)) {
for (addr = data_end; addr < page_end;
addr += 4, i += 4) {
@@ -4107,7 +4123,7 @@ bnx2_init_chip(struct bnx2 *bp)
if (CHIP_NUM(bp) == CHIP_NUM_5708)
REG_WR(bp, BNX2_HC_STATS_TICKS, 0);
else
- REG_WR(bp, BNX2_HC_STATS_TICKS, bp->stats_ticks & 0xffff00);
+ REG_WR(bp, BNX2_HC_STATS_TICKS, bp->stats_ticks);
REG_WR(bp, BNX2_HC_STAT_COLLECT_TICKS, 0xbb8); /* 3ms */
if (CHIP_ID(bp) == CHIP_ID_5706_A1)
@@ -4127,10 +4143,6 @@ bnx2_init_chip(struct bnx2 *bp)
REG_WR(bp, BNX2_HC_ATTN_BITS_ENABLE, STATUS_ATTN_EVENTS);
- if (REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_FEATURE) &
- BNX2_PORT_FEATURE_ASF_ENABLED)
- bp->flags |= ASF_ENABLE_FLAG;
-
/* Initialize the receive filter. */
bnx2_set_rx_mode(bp->dev);
@@ -5786,8 +5798,9 @@ bnx2_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
if (bp->stats_ticks != 0 && bp->stats_ticks != USEC_PER_SEC)
bp->stats_ticks = USEC_PER_SEC;
}
- if (bp->stats_ticks > 0xffff00) bp->stats_ticks = 0xffff00;
- bp->stats_ticks &= 0xffff00;
+ if (bp->stats_ticks > BNX2_HC_STATS_TICKS_HC_STAT_TICKS)
+ bp->stats_ticks = BNX2_HC_STATS_TICKS_HC_STAT_TICKS;
+ bp->stats_ticks &= BNX2_HC_STATS_TICKS_HC_STAT_TICKS;
if (netif_running(bp->dev)) {
bnx2_netif_stop(bp);
@@ -6629,6 +6642,18 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
if (i != 2)
bp->fw_version[j++] = '.';
}
+ if (REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_FEATURE) &
+ BNX2_PORT_FEATURE_ASF_ENABLED) {
+ bp->flags |= ASF_ENABLE_FLAG;
+
+ for (i = 0; i < 30; i++) {
+ reg = REG_RD_IND(bp, bp->shmem_base +
+ BNX2_BC_STATE_CONDITION);
+ if (reg & BNX2_CONDITION_MFW_RUN_MASK)
+ break;
+ msleep(10);
+ }
+ }
reg = REG_RD_IND(bp, bp->shmem_base + BNX2_BC_STATE_CONDITION);
reg &= BNX2_CONDITION_MFW_RUN_MASK;
if (reg != BNX2_CONDITION_MFW_RUN_UNKNOWN &&
@@ -6672,7 +6697,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
bp->rx_ticks_int = 18;
bp->rx_ticks = 18;
- bp->stats_ticks = 1000000 & 0xffff00;
+ bp->stats_ticks = USEC_PER_SEC & BNX2_HC_STATS_TICKS_HC_STAT_TICKS;
bp->timer_interval = HZ;
bp->current_interval = HZ;
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index d8cd1afeb23..102adfe1e92 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -6433,6 +6433,11 @@ struct sw_bd {
#define ST_MICRO_FLASH_PAGE_SIZE 256
#define ST_MICRO_FLASH_BASE_TOTAL_SIZE 65536
+#define BCM5709_FLASH_PAGE_BITS 8
+#define BCM5709_FLASH_PHY_PAGE_SIZE (1 << BCM5709_FLASH_PAGE_BITS)
+#define BCM5709_FLASH_BYTE_ADDR_MASK (BCM5709_FLASH_PHY_PAGE_SIZE-1)
+#define BCM5709_FLASH_PAGE_SIZE 256
+
#define NVRAM_TIMEOUT_COUNT 30000
@@ -6449,7 +6454,10 @@ struct flash_spec {
u32 config2;
u32 config3;
u32 write1;
- u32 buffered;
+ u32 flags;
+#define BNX2_NV_BUFFERED 0x00000001
+#define BNX2_NV_TRANSLATE 0x00000002
+#define BNX2_NV_WREN 0x00000004
u32 page_bits;
u32 page_size;
u32 addr_mask;
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 84aa2117c0e..355c6cf3d11 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -320,7 +320,7 @@ static int eppconfig(struct baycom_state *bc)
sprintf(portarg, "%ld", bc->pdev->port->base);
printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg);
- return call_usermodehelper(eppconfig_path, argv, envp, 1);
+ return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC);
}
/* ---------------------------------------------------------------------- */
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index 5891a0fbdc8..f87176055d0 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -824,6 +824,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
struct pppol2tp_session *session;
struct pppol2tp_tunnel *tunnel;
struct udphdr *uh;
+ unsigned int len;
error = -ENOTCONN;
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -912,14 +913,15 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
}
/* Queue the packet to IP for output */
+ len = skb->len;
error = ip_queue_xmit(skb, 1);
/* Update stats */
if (error >= 0) {
tunnel->stats.tx_packets++;
- tunnel->stats.tx_bytes += skb->len;
+ tunnel->stats.tx_bytes += len;
session->stats.tx_packets++;
- session->stats.tx_bytes += skb->len;
+ session->stats.tx_bytes += len;
} else {
tunnel->stats.tx_errors++;
session->stats.tx_errors++;
@@ -958,6 +960,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
__wsum csum = 0;
struct sk_buff *skb2 = NULL;
struct udphdr *uh;
+ unsigned int len;
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
goto abort;
@@ -1046,18 +1049,25 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
printk("\n");
}
+ memset(&(IPCB(skb2)->opt), 0, sizeof(IPCB(skb2)->opt));
+ IPCB(skb2)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+ IPSKB_REROUTED);
+ nf_reset(skb2);
+
/* Get routing info from the tunnel socket */
+ dst_release(skb2->dst);
skb2->dst = sk_dst_get(sk_tun);
/* Queue the packet to IP for output */
+ len = skb2->len;
rc = ip_queue_xmit(skb2, 1);
/* Update stats */
if (rc >= 0) {
tunnel->stats.tx_packets++;
- tunnel->stats.tx_bytes += skb2->len;
+ tunnel->stats.tx_bytes += len;
session->stats.tx_packets++;
- session->stats.tx_bytes += skb2->len;
+ session->stats.tx_bytes += len;
} else {
tunnel->stats.tx_errors++;
session->stats.tx_errors++;
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c
index 8a667c13fae..b801e3b3a11 100644
--- a/drivers/net/sunvnet.c
+++ b/drivers/net/sunvnet.c
@@ -12,6 +12,7 @@
#include <linux/netdevice.h>
#include <linux/ethtool.h>
#include <linux/etherdevice.h>
+#include <linux/mutex.h>
#include <asm/vio.h>
#include <asm/ldc.h>
@@ -497,6 +498,8 @@ static void vnet_event(void *arg, int event)
vio_link_state_change(vio, event);
spin_unlock_irqrestore(&vio->lock, flags);
+ if (event == LDC_EVENT_RESET)
+ vio_port_up(vio);
return;
}
@@ -875,6 +878,115 @@ err_out:
return err;
}
+static LIST_HEAD(vnet_list);
+static DEFINE_MUTEX(vnet_list_mutex);
+
+static struct vnet * __devinit vnet_new(const u64 *local_mac)
+{
+ struct net_device *dev;
+ struct vnet *vp;
+ int err, i;
+
+ dev = alloc_etherdev(sizeof(*vp));
+ if (!dev) {
+ printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for (i = 0; i < ETH_ALEN; i++)
+ dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff;
+
+ memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
+
+ vp = netdev_priv(dev);
+
+ spin_lock_init(&vp->lock);
+ vp->dev = dev;
+
+ INIT_LIST_HEAD(&vp->port_list);
+ for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
+ INIT_HLIST_HEAD(&vp->port_hash[i]);
+ INIT_LIST_HEAD(&vp->list);
+ vp->local_mac = *local_mac;
+
+ dev->open = vnet_open;
+ dev->stop = vnet_close;
+ dev->set_multicast_list = vnet_set_rx_mode;
+ dev->set_mac_address = vnet_set_mac_addr;
+ dev->tx_timeout = vnet_tx_timeout;
+ dev->ethtool_ops = &vnet_ethtool_ops;
+ dev->watchdog_timeo = VNET_TX_TIMEOUT;
+ dev->change_mtu = vnet_change_mtu;
+ dev->hard_start_xmit = vnet_start_xmit;
+
+ err = register_netdev(dev);
+ if (err) {
+ printk(KERN_ERR PFX "Cannot register net device, "
+ "aborting.\n");
+ goto err_out_free_dev;
+ }
+
+ printk(KERN_INFO "%s: Sun LDOM vnet ", dev->name);
+
+ for (i = 0; i < 6; i++)
+ printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':');
+
+ list_add(&vp->list, &vnet_list);
+
+ return vp;
+
+err_out_free_dev:
+ free_netdev(dev);
+
+ return ERR_PTR(err);
+}
+
+static struct vnet * __devinit vnet_find_or_create(const u64 *local_mac)
+{
+ struct vnet *iter, *vp;
+
+ mutex_lock(&vnet_list_mutex);
+ vp = NULL;
+ list_for_each_entry(iter, &vnet_list, list) {
+ if (iter->local_mac == *local_mac) {
+ vp = iter;
+ break;
+ }
+ }
+ if (!vp)
+ vp = vnet_new(local_mac);
+ mutex_unlock(&vnet_list_mutex);
+
+ return vp;
+}
+
+static const char *local_mac_prop = "local-mac-address";
+
+static struct vnet * __devinit vnet_find_parent(struct mdesc_handle *hp,
+ u64 port_node)
+{
+ const u64 *local_mac = NULL;
+ u64 a;
+
+ mdesc_for_each_arc(a, hp, port_node, MDESC_ARC_TYPE_BACK) {
+ u64 target = mdesc_arc_target(hp, a);
+ const char *name;
+
+ name = mdesc_get_property(hp, target, "name", NULL);
+ if (!name || strcmp(name, "network"))
+ continue;
+
+ local_mac = mdesc_get_property(hp, target,
+ local_mac_prop, NULL);
+ if (local_mac)
+ break;
+ }
+ if (!local_mac)
+ return ERR_PTR(-ENODEV);
+
+ return vnet_find_or_create(local_mac);
+}
+
static struct ldc_channel_config vnet_ldc_cfg = {
.event = vnet_event,
.mtu = 64,
@@ -887,6 +999,14 @@ static struct vio_driver_ops vnet_vio_ops = {
.handshake_complete = vnet_handshake_complete,
};
+static void print_version(void)
+{
+ static int version_printed;
+
+ if (version_printed++ == 0)
+ printk(KERN_INFO "%s", version);
+}
+
const char *remote_macaddr_prop = "remote-mac-address";
static int __devinit vnet_port_probe(struct vio_dev *vdev,
@@ -899,14 +1019,17 @@ static int __devinit vnet_port_probe(struct vio_dev *vdev,
const u64 *rmac;
int len, i, err, switch_port;
- vp = dev_get_drvdata(vdev->dev.parent);
- if (!vp) {
- printk(KERN_ERR PFX "Cannot find port parent vnet.\n");
- return -ENODEV;
- }
+ print_version();
hp = mdesc_grab();
+ vp = vnet_find_parent(hp, vdev->mp);
+ if (IS_ERR(vp)) {
+ printk(KERN_ERR PFX "Cannot find port parent vnet.\n");
+ err = PTR_ERR(vp);
+ goto err_out_put_mdesc;
+ }
+
rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
err = -ENODEV;
if (!rmac) {
@@ -1025,139 +1148,14 @@ static struct vio_driver vnet_port_driver = {
}
};
-const char *local_mac_prop = "local-mac-address";
-
-static int __devinit vnet_probe(struct vio_dev *vdev,
- const struct vio_device_id *id)
-{
- static int vnet_version_printed;
- struct mdesc_handle *hp;
- struct net_device *dev;
- struct vnet *vp;
- const u64 *mac;
- int err, i, len;
-
- if (vnet_version_printed++ == 0)
- printk(KERN_INFO "%s", version);
-
- hp = mdesc_grab();
-
- mac = mdesc_get_property(hp, vdev->mp, local_mac_prop, &len);
- if (!mac) {
- printk(KERN_ERR PFX "vnet lacks %s property.\n",
- local_mac_prop);
- err = -ENODEV;
- goto err_out;
- }
-
- dev = alloc_etherdev(sizeof(*vp));
- if (!dev) {
- printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
- err = -ENOMEM;
- goto err_out;
- }
-
- for (i = 0; i < ETH_ALEN; i++)
- dev->dev_addr[i] = (*mac >> (5 - i) * 8) & 0xff;
-
- memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
-
- SET_NETDEV_DEV(dev, &vdev->dev);
-
- vp = netdev_priv(dev);
-
- spin_lock_init(&vp->lock);
- vp->dev = dev;
- vp->vdev = vdev;
-
- INIT_LIST_HEAD(&vp->port_list);
- for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
- INIT_HLIST_HEAD(&vp->port_hash[i]);
-
- dev->open = vnet_open;
- dev->stop = vnet_close;
- dev->set_multicast_list = vnet_set_rx_mode;
- dev->set_mac_address = vnet_set_mac_addr;
- dev->tx_timeout = vnet_tx_timeout;
- dev->ethtool_ops = &vnet_ethtool_ops;
- dev->watchdog_timeo = VNET_TX_TIMEOUT;
- dev->change_mtu = vnet_change_mtu;
- dev->hard_start_xmit = vnet_start_xmit;
-
- err = register_netdev(dev);
- if (err) {
- printk(KERN_ERR PFX "Cannot register net device, "
- "aborting.\n");
- goto err_out_free_dev;
- }
-
- printk(KERN_INFO "%s: Sun LDOM vnet ", dev->name);
-
- for (i = 0; i < 6; i++)
- printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':');
-
- dev_set_drvdata(&vdev->dev, vp);
-
- mdesc_release(hp);
-
- return 0;
-
-err_out_free_dev:
- free_netdev(dev);
-
-err_out:
- mdesc_release(hp);
- return err;
-}
-
-static int vnet_remove(struct vio_dev *vdev)
-{
-
- struct vnet *vp = dev_get_drvdata(&vdev->dev);
-
- if (vp) {
- /* XXX unregister port, or at least check XXX */
- unregister_netdevice(vp->dev);
- dev_set_drvdata(&vdev->dev, NULL);
- }
- return 0;
-}
-
-static struct vio_device_id vnet_match[] = {
- {
- .type = "network",
- },
- {},
-};
-MODULE_DEVICE_TABLE(vio, vnet_match);
-
-static struct vio_driver vnet_driver = {
- .id_table = vnet_match,
- .probe = vnet_probe,
- .remove = vnet_remove,
- .driver = {
- .name = "vnet",
- .owner = THIS_MODULE,
- }
-};
-
static int __init vnet_init(void)
{
- int err = vio_register_driver(&vnet_driver);
-
- if (!err) {
- err = vio_register_driver(&vnet_port_driver);
- if (err)
- vio_unregister_driver(&vnet_driver);
- }
-
- return err;
+ return vio_register_driver(&vnet_port_driver);
}
static void __exit vnet_exit(void)
{
vio_unregister_driver(&vnet_port_driver);
- vio_unregister_driver(&vnet_driver);
}
module_init(vnet_init);
diff --git a/drivers/net/sunvnet.h b/drivers/net/sunvnet.h
index 1c887302d46..7d3a0cac727 100644
--- a/drivers/net/sunvnet.h
+++ b/drivers/net/sunvnet.h
@@ -60,11 +60,13 @@ struct vnet {
struct net_device *dev;
u32 msg_enable;
- struct vio_dev *vdev;
struct list_head port_list;
struct hlist_head port_hash[VNET_PORT_HASH_SIZE];
+
+ struct list_head list;
+ u64 local_mac;
};
#endif /* _SUNVNET_H */
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
new file mode 100644
index 00000000000..489f69c5d6c
--- /dev/null
+++ b/drivers/net/xen-netfront.c
@@ -0,0 +1,1863 @@
+/*
+ * Virtual network driver for conversing with remote driver backends.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005, XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/if_ether.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/moduleparam.h>
+#include <linux/mm.h>
+#include <net/ip.h>
+
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <xen/interface/io/netif.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/grant_table.h>
+
+static struct ethtool_ops xennet_ethtool_ops;
+
+struct netfront_cb {
+ struct page *page;
+ unsigned offset;
+};
+
+#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
+
+#define RX_COPY_THRESHOLD 256
+
+#define GRANT_INVALID_REF 0
+
+#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
+#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+
+struct netfront_info {
+ struct list_head list;
+ struct net_device *netdev;
+
+ struct net_device_stats stats;
+
+ struct xen_netif_tx_front_ring tx;
+ struct xen_netif_rx_front_ring rx;
+
+ spinlock_t tx_lock;
+ spinlock_t rx_lock;
+
+ unsigned int evtchn;
+
+ /* Receive-ring batched refills. */
+#define RX_MIN_TARGET 8
+#define RX_DFL_MIN_TARGET 64
+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+ unsigned rx_min_target, rx_max_target, rx_target;
+ struct sk_buff_head rx_batch;
+
+ struct timer_list rx_refill_timer;
+
+ /*
+ * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
+ * are linked from tx_skb_freelist through skb_entry.link.
+ *
+ * NB. Freelist index entries are always going to be less than
+ * PAGE_OFFSET, whereas pointers to skbs will always be equal or
+ * greater than PAGE_OFFSET: we use this property to distinguish
+ * them.
+ */
+ union skb_entry {
+ struct sk_buff *skb;
+ unsigned link;
+ } tx_skbs[NET_TX_RING_SIZE];
+ grant_ref_t gref_tx_head;
+ grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
+ unsigned tx_skb_freelist;
+
+ struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
+ grant_ref_t gref_rx_head;
+ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
+
+ struct xenbus_device *xbdev;
+ int tx_ring_ref;
+ int rx_ring_ref;
+
+ unsigned long rx_pfn_array[NET_RX_RING_SIZE];
+ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
+ struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+};
+
+struct netfront_rx_info {
+ struct xen_netif_rx_response rx;
+ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+};
+
+/*
+ * Access macros for acquiring freeing slots in tx_skbs[].
+ */
+
+static void add_id_to_freelist(unsigned *head, union skb_entry *list,
+ unsigned short id)
+{
+ list[id].link = *head;
+ *head = id;
+}
+
+static unsigned short get_id_from_freelist(unsigned *head,
+ union skb_entry *list)
+{
+ unsigned int id = *head;
+ *head = list[id].link;
+ return id;
+}
+
+static int xennet_rxidx(RING_IDX idx)
+{
+ return idx & (NET_RX_RING_SIZE - 1);
+}
+
+static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
+ RING_IDX ri)
+{
+ int i = xennet_rxidx(ri);
+ struct sk_buff *skb = np->rx_skbs[i];
+ np->rx_skbs[i] = NULL;
+ return skb;
+}
+
+static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
+ RING_IDX ri)
+{
+ int i = xennet_rxidx(ri);
+ grant_ref_t ref = np->grant_rx_ref[i];
+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
+ return ref;
+}
+
+#ifdef CONFIG_SYSFS
+static int xennet_sysfs_addif(struct net_device *netdev);
+static void xennet_sysfs_delif(struct net_device *netdev);
+#else /* !CONFIG_SYSFS */
+#define xennet_sysfs_addif(dev) (0)
+#define xennet_sysfs_delif(dev) do { } while (0)
+#endif
+
+static int xennet_can_sg(struct net_device *dev)
+{
+ return dev->features & NETIF_F_SG;
+}
+
+
+static void rx_refill_timeout(unsigned long data)
+{
+ struct net_device *dev = (struct net_device *)data;
+ netif_rx_schedule(dev);
+}
+
+static int netfront_tx_slot_available(struct netfront_info *np)
+{
+ return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
+ (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
+}
+
+static void xennet_maybe_wake_tx(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+
+ if (unlikely(netif_queue_stopped(dev)) &&
+ netfront_tx_slot_available(np) &&
+ likely(netif_running(dev)))
+ netif_wake_queue(dev);
+}
+
+static void xennet_alloc_rx_buffers(struct net_device *dev)
+{
+ unsigned short id;
+ struct netfront_info *np = netdev_priv(dev);
+ struct sk_buff *skb;
+ struct page *page;
+ int i, batch_target, notify;
+ RING_IDX req_prod = np->rx.req_prod_pvt;
+ struct xen_memory_reservation reservation;
+ grant_ref_t ref;
+ unsigned long pfn;
+ void *vaddr;
+ int nr_flips;
+ struct xen_netif_rx_request *req;
+
+ if (unlikely(!netif_carrier_ok(dev)))
+ return;
+
+ /*
+ * Allocate skbuffs greedily, even though we batch updates to the
+ * receive ring. This creates a less bursty demand on the memory
+ * allocator, so should reduce the chance of failed allocation requests
+ * both for ourself and for other kernel subsystems.
+ */
+ batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
+ for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
+ skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb))
+ goto no_skb;
+
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+ if (!page) {
+ kfree_skb(skb);
+no_skb:
+ /* Any skbuffs queued for refill? Force them out. */
+ if (i != 0)
+ goto refill;
+ /* Could not allocate any skbuffs. Try again later. */
+ mod_timer(&np->rx_refill_timer,
+ jiffies + (HZ/10));
+ break;
+ }
+
+ skb_shinfo(skb)->frags[0].page = page;
+ skb_shinfo(skb)->nr_frags = 1;
+ __skb_queue_tail(&np->rx_batch, skb);
+ }
+
+ /* Is the batch large enough to be worthwhile? */
+ if (i < (np->rx_target/2)) {
+ if (req_prod > np->rx.sring->req_prod)
+ goto push;
+ return;
+ }
+
+ /* Adjust our fill target if we risked running out of buffers. */
+ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
+ ((np->rx_target *= 2) > np->rx_max_target))
+ np->rx_target = np->rx_max_target;
+
+ refill:
+ for (nr_flips = i = 0; ; i++) {
+ skb = __skb_dequeue(&np->rx_batch);
+ if (skb == NULL)
+ break;
+
+ skb->dev = dev;
+
+ id = xennet_rxidx(req_prod + i);
+
+ BUG_ON(np->rx_skbs[id]);
+ np->rx_skbs[id] = skb;
+
+ ref = gnttab_claim_grant_reference(&np->gref_rx_head);
+ BUG_ON((signed short)ref < 0);
+ np->grant_rx_ref[id] = ref;
+
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
+ vaddr = page_address(skb_shinfo(skb)->frags[0].page);
+
+ req = RING_GET_REQUEST(&np->rx, req_prod + i);
+ gnttab_grant_foreign_access_ref(ref,
+ np->xbdev->otherend_id,
+ pfn_to_mfn(pfn),
+ 0);
+
+ req->id = id;
+ req->gref = ref;
+ }
+
+ if (nr_flips != 0) {
+ reservation.extent_start = np->rx_pfn_array;
+ reservation.nr_extents = nr_flips;
+ reservation.extent_order = 0;
+ reservation.address_bits = 0;
+ reservation.domid = DOMID_SELF;
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* After all PTEs have been zapped, flush the TLB. */
+ np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+ UVMF_TLB_FLUSH|UVMF_ALL;
+
+ /* Give away a batch of pages. */
+ np->rx_mcl[i].op = __HYPERVISOR_memory_op;
+ np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+ np->rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+ /* Zap PTEs and give away pages in one big
+ * multicall. */
+ (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
+
+ /* Check return status of HYPERVISOR_memory_op(). */
+ if (unlikely(np->rx_mcl[i].result != i))
+ panic("Unable to reduce memory reservation\n");
+ } else {
+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+ &reservation) != i)
+ panic("Unable to reduce memory reservation\n");
+ }
+ } else {
+ wmb(); /* barrier so backend seens requests */
+ }
+
+ /* Above is a suitable barrier to ensure backend will see requests. */
+ np->rx.req_prod_pvt = req_prod + i;
+ push:
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
+ if (notify)
+ notify_remote_via_irq(np->netdev->irq);
+}
+
+static int xennet_open(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+
+ memset(&np->stats, 0, sizeof(np->stats));
+
+ spin_lock_bh(&np->rx_lock);
+ if (netif_carrier_ok(dev)) {
+ xennet_alloc_rx_buffers(dev);
+ np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+ netif_rx_schedule(dev);
+ }
+ spin_unlock_bh(&np->rx_lock);
+
+ xennet_maybe_wake_tx(dev);
+
+ return 0;
+}
+
+static void xennet_tx_buf_gc(struct net_device *dev)
+{
+ RING_IDX cons, prod;
+ unsigned short id;
+ struct netfront_info *np = netdev_priv(dev);
+ struct sk_buff *skb;
+
+ BUG_ON(!netif_carrier_ok(dev));
+
+ do {
+ prod = np->tx.sring->rsp_prod;
+ rmb(); /* Ensure we see responses up to 'rp'. */
+
+ for (cons = np->tx.rsp_cons; cons != prod; cons++) {
+ struct xen_netif_tx_response *txrsp;
+
+ txrsp = RING_GET_RESPONSE(&np->tx, cons);
+ if (txrsp->status == NETIF_RSP_NULL)
+ continue;
+
+ id = txrsp->id;
+ skb = np->tx_skbs[id].skb;
+ if (unlikely(gnttab_query_foreign_access(
+ np->grant_tx_ref[id]) != 0)) {
+ printk(KERN_ALERT "xennet_tx_buf_gc: warning "
+ "-- grant still in use by backend "
+ "domain.\n");
+ BUG();
+ }
+ gnttab_end_foreign_access_ref(
+ np->grant_tx_ref[id], GNTMAP_readonly);
+ gnttab_release_grant_reference(
+ &np->gref_tx_head, np->grant_tx_ref[id]);
+ np->grant_tx_ref[id] = GRANT_INVALID_REF;
+ add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
+ dev_kfree_skb_irq(skb);
+ }
+
+ np->tx.rsp_cons = prod;
+
+ /*
+ * Set a new event, then check for race with update of tx_cons.
+ * Note that it is essential to schedule a callback, no matter
+ * how few buffers are pending. Even if there is space in the
+ * transmit ring, higher layers may be blocked because too much
+ * data is outstanding: in such cases notification from Xen is
+ * likely to be the only kick that we'll get.
+ */
+ np->tx.sring->rsp_event =
+ prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
+ mb(); /* update shared area */
+ } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
+
+ xennet_maybe_wake_tx(dev);
+}
+
+static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+ struct xen_netif_tx_request *tx)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ char *data = skb->data;
+ unsigned long mfn;
+ RING_IDX prod = np->tx.req_prod_pvt;
+ int frags = skb_shinfo(skb)->nr_frags;
+ unsigned int offset = offset_in_page(data);
+ unsigned int len = skb_headlen(skb);
+ unsigned int id;
+ grant_ref_t ref;
+ int i;
+
+ /* While the header overlaps a page boundary (including being
+ larger than a page), split it it into page-sized chunks. */
+ while (len > PAGE_SIZE - offset) {
+ tx->size = PAGE_SIZE - offset;
+ tx->flags |= NETTXF_more_data;
+ len -= tx->size;
+ data += tx->size;
+ offset = 0;
+
+ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+ np->tx_skbs[id].skb = skb_get(skb);
+ tx = RING_GET_REQUEST(&np->tx, prod++);
+ tx->id = id;
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+ BUG_ON((signed short)ref < 0);
+
+ mfn = virt_to_mfn(data);
+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+ mfn, GNTMAP_readonly);
+
+ tx->gref = np->grant_tx_ref[id] = ref;
+ tx->offset = offset;
+ tx->size = len;
+ tx->flags = 0;
+ }
+
+ /* Grant backend access to each skb fragment page. */
+ for (i = 0; i < frags; i++) {
+ skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+
+ tx->flags |= NETTXF_more_data;
+
+ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+ np->tx_skbs[id].skb = skb_get(skb);
+ tx = RING_GET_REQUEST(&np->tx, prod++);
+ tx->id = id;
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+ BUG_ON((signed short)ref < 0);
+
+ mfn = pfn_to_mfn(page_to_pfn(frag->page));
+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+ mfn, GNTMAP_readonly);
+
+ tx->gref = np->grant_tx_ref[id] = ref;
+ tx->offset = frag->page_offset;
+ tx->size = frag->size;
+ tx->flags = 0;
+ }
+
+ np->tx.req_prod_pvt = prod;
+}
+
+static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ unsigned short id;
+ struct netfront_info *np = netdev_priv(dev);
+ struct xen_netif_tx_request *tx;
+ struct xen_netif_extra_info *extra;
+ char *data = skb->data;
+ RING_IDX i;
+ grant_ref_t ref;
+ unsigned long mfn;
+ int notify;
+ int frags = skb_shinfo(skb)->nr_frags;
+ unsigned int offset = offset_in_page(data);
+ unsigned int len = skb_headlen(skb);
+
+ frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
+ if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
+ printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
+ frags);
+ dump_stack();
+ goto drop;
+ }
+
+ spin_lock_irq(&np->tx_lock);
+
+ if (unlikely(!netif_carrier_ok(dev) ||
+ (frags > 1 && !xennet_can_sg(dev)) ||
+ netif_needs_gso(dev, skb))) {
+ spin_unlock_irq(&np->tx_lock);
+ goto drop;
+ }
+
+ i = np->tx.req_prod_pvt;
+
+ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
+ np->tx_skbs[id].skb = skb;
+
+ tx = RING_GET_REQUEST(&np->tx, i);
+
+ tx->id = id;
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+ BUG_ON((signed short)ref < 0);
+ mfn = virt_to_mfn(data);
+ gnttab_grant_foreign_access_ref(
+ ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
+ tx->gref = np->grant_tx_ref[id] = ref;
+ tx->offset = offset;
+ tx->size = len;
+ extra = NULL;
+
+ tx->flags = 0;
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ /* local packet? */
+ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+ else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+ /* remote but checksummed. */
+ tx->flags |= NETTXF_data_validated;
+
+ if (skb_shinfo(skb)->gso_size) {
+ struct xen_netif_extra_info *gso;
+
+ gso = (struct xen_netif_extra_info *)
+ RING_GET_REQUEST(&np->tx, ++i);
+
+ if (extra)
+ extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+ else
+ tx->flags |= NETTXF_extra_info;
+
+ gso->u.gso.size = skb_shinfo(skb)->gso_size;
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+ gso->u.gso.pad = 0;
+ gso->u.gso.features = 0;
+
+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+ gso->flags = 0;
+ extra = gso;
+ }
+
+ np->tx.req_prod_pvt = i + 1;
+
+ xennet_make_frags(skb, dev, tx);
+ tx->size = skb->len;
+
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
+ if (notify)
+ notify_remote_via_irq(np->netdev->irq);
+
+ xennet_tx_buf_gc(dev);
+
+ if (!netfront_tx_slot_available(np))
+ netif_stop_queue(dev);
+
+ spin_unlock_irq(&np->tx_lock);
+
+ np->stats.tx_bytes += skb->len;
+ np->stats.tx_packets++;
+
+ return 0;
+
+ drop:
+ np->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return 0;
+}
+
+static int xennet_close(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ netif_stop_queue(np->netdev);
+ return 0;
+}
+
+static struct net_device_stats *xennet_get_stats(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ return &np->stats;
+}
+
+static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
+ grant_ref_t ref)
+{
+ int new = xennet_rxidx(np->rx.req_prod_pvt);
+
+ BUG_ON(np->rx_skbs[new]);
+ np->rx_skbs[new] = skb;
+ np->grant_rx_ref[new] = ref;
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+ np->rx.req_prod_pvt++;
+}
+
+static int xennet_get_extras(struct netfront_info *np,
+ struct xen_netif_extra_info *extras,
+ RING_IDX rp)
+
+{
+ struct xen_netif_extra_info *extra;
+ struct device *dev = &np->netdev->dev;
+ RING_IDX cons = np->rx.rsp_cons;
+ int err = 0;
+
+ do {
+ struct sk_buff *skb;
+ grant_ref_t ref;
+
+ if (unlikely(cons + 1 == rp)) {
+ if (net_ratelimit())
+ dev_warn(dev, "Missing extra info\n");
+ err = -EBADR;
+ break;
+ }
+
+ extra = (struct xen_netif_extra_info *)
+ RING_GET_RESPONSE(&np->rx, ++cons);
+
+ if (unlikely(!extra->type ||
+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+ if (net_ratelimit())
+ dev_warn(dev, "Invalid extra type: %d\n",
+ extra->type);
+ err = -EINVAL;
+ } else {
+ memcpy(&extras[extra->type - 1], extra,
+ sizeof(*extra));
+ }
+
+ skb = xennet_get_rx_skb(np, cons);
+ ref = xennet_get_rx_ref(np, cons);
+ xennet_move_rx_slot(np, skb, ref);
+ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+ np->rx.rsp_cons = cons;
+ return err;
+}
+
+static int xennet_get_responses(struct netfront_info *np,
+ struct netfront_rx_info *rinfo, RING_IDX rp,
+ struct sk_buff_head *list)
+{
+ struct xen_netif_rx_response *rx = &rinfo->rx;
+ struct xen_netif_extra_info *extras = rinfo->extras;
+ struct device *dev = &np->netdev->dev;
+ RING_IDX cons = np->rx.rsp_cons;
+ struct sk_buff *skb = xennet_get_rx_skb(np, cons);
+ grant_ref_t ref = xennet_get_rx_ref(np, cons);
+ int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
+ int frags = 1;
+ int err = 0;
+ unsigned long ret;
+
+ if (rx->flags & NETRXF_extra_info) {
+ err = xennet_get_extras(np, extras, rp);
+ cons = np->rx.rsp_cons;
+ }
+
+ for (;;) {
+ if (unlikely(rx->status < 0 ||
+ rx->offset + rx->status > PAGE_SIZE)) {
+ if (net_ratelimit())
+ dev_warn(dev, "rx->offset: %x, size: %u\n",
+ rx->offset, rx->status);
+ xennet_move_rx_slot(np, skb, ref);
+ err = -EINVAL;
+ goto next;
+ }
+
+ /*
+ * This definitely indicates a bug, either in this driver or in
+ * the backend driver. In future this should flag the bad
+ * situation to the system controller to reboot the backed.
+ */
+ if (ref == GRANT_INVALID_REF) {
+ if (net_ratelimit())
+ dev_warn(dev, "Bad rx response id %d.\n",
+ rx->id);
+ err = -EINVAL;
+ goto next;
+ }
+
+ ret = gnttab_end_foreign_access_ref(ref, 0);
+ BUG_ON(!ret);
+
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+ __skb_queue_tail(list, skb);
+
+next:
+ if (!(rx->flags & NETRXF_more_data))
+ break;
+
+ if (cons + frags == rp) {
+ if (net_ratelimit())
+ dev_warn(dev, "Need more frags\n");
+ err = -ENOENT;
+ break;
+ }
+
+ rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+ skb = xennet_get_rx_skb(np, cons + frags);
+ ref = xennet_get_rx_ref(np, cons + frags);
+ frags++;
+ }
+
+ if (unlikely(frags > max)) {
+ if (net_ratelimit())
+ dev_warn(dev, "Too many frags\n");
+ err = -E2BIG;
+ }
+
+ if (unlikely(err))
+ np->rx.rsp_cons = cons + frags;
+
+ return err;
+}
+
+static int xennet_set_skb_gso(struct sk_buff *skb,
+ struct xen_netif_extra_info *gso)
+{
+ if (!gso->u.gso.size) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "GSO size must not be zero.\n");
+ return -EINVAL;
+ }
+
+ /* Currently only TCPv4 S.O. is supported. */
+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type);
+ return -EINVAL;
+ }
+
+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ /* Header must be checked, and gso_segs computed. */
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+ skb_shinfo(skb)->gso_segs = 0;
+
+ return 0;
+}
+
+static RING_IDX xennet_fill_frags(struct netfront_info *np,
+ struct sk_buff *skb,
+ struct sk_buff_head *list)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ int nr_frags = shinfo->nr_frags;
+ RING_IDX cons = np->rx.rsp_cons;
+ skb_frag_t *frag = shinfo->frags + nr_frags;
+ struct sk_buff *nskb;
+
+ while ((nskb = __skb_dequeue(list))) {
+ struct xen_netif_rx_response *rx =
+ RING_GET_RESPONSE(&np->rx, ++cons);
+
+ frag->page = skb_shinfo(nskb)->frags[0].page;
+ frag->page_offset = rx->offset;
+ frag->size = rx->status;
+
+ skb->data_len += rx->status;
+
+ skb_shinfo(nskb)->nr_frags = 0;
+ kfree_skb(nskb);
+
+ frag++;
+ nr_frags++;
+ }
+
+ shinfo->nr_frags = nr_frags;
+ return cons;
+}
+
+static int skb_checksum_setup(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ unsigned char *th;
+ int err = -EPROTO;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ goto out;
+
+ iph = (void *)skb->data;
+ th = skb->data + 4 * iph->ihl;
+ if (th >= skb_tail_pointer(skb))
+ goto out;
+
+ skb->csum_start = th - skb->head;
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ skb->csum_offset = offsetof(struct tcphdr, check);
+ break;
+ case IPPROTO_UDP:
+ skb->csum_offset = offsetof(struct udphdr, check);
+ break;
+ default:
+ if (net_ratelimit())
+ printk(KERN_ERR "Attempting to checksum a non-"
+ "TCP/UDP packet, dropping a protocol"
+ " %d packet", iph->protocol);
+ goto out;
+ }
+
+ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
+ goto out;
+
+ err = 0;
+
+out:
+ return err;
+}
+
+static int handle_incoming_queue(struct net_device *dev,
+ struct sk_buff_head *rxq)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ int packets_dropped = 0;
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue(rxq)) != NULL) {
+ struct page *page = NETFRONT_SKB_CB(skb)->page;
+ void *vaddr = page_address(page);
+ unsigned offset = NETFRONT_SKB_CB(skb)->offset;
+
+ memcpy(skb->data, vaddr + offset,
+ skb_headlen(skb));
+
+ if (page != skb_shinfo(skb)->frags[0].page)
+ __free_page(page);
+
+ /* Ethernet work: Delayed to here as it peeks the header. */
+ skb->protocol = eth_type_trans(skb, dev);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (skb_checksum_setup(skb)) {
+ kfree_skb(skb);
+ packets_dropped++;
+ np->stats.rx_errors++;
+ continue;
+ }
+ }
+
+ np->stats.rx_packets++;
+ np->stats.rx_bytes += skb->len;
+
+ /* Pass it up. */
+ netif_receive_skb(skb);
+ dev->last_rx = jiffies;
+ }
+
+ return packets_dropped;
+}
+
+static int xennet_poll(struct net_device *dev, int *pbudget)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ struct sk_buff *skb;
+ struct netfront_rx_info rinfo;
+ struct xen_netif_rx_response *rx = &rinfo.rx;
+ struct xen_netif_extra_info *extras = rinfo.extras;
+ RING_IDX i, rp;
+ int work_done, budget, more_to_do = 1;
+ struct sk_buff_head rxq;
+ struct sk_buff_head errq;
+ struct sk_buff_head tmpq;
+ unsigned long flags;
+ unsigned int len;
+ int err;
+
+ spin_lock(&np->rx_lock);
+
+ if (unlikely(!netif_carrier_ok(dev))) {
+ spin_unlock(&np->rx_lock);
+ return 0;
+ }
+
+ skb_queue_head_init(&rxq);
+ skb_queue_head_init(&errq);
+ skb_queue_head_init(&tmpq);
+
+ budget = *pbudget;
+ if (budget > dev->quota)
+ budget = dev->quota;
+ rp = np->rx.sring->rsp_prod;
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+ i = np->rx.rsp_cons;
+ work_done = 0;
+ while ((i != rp) && (work_done < budget)) {
+ memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+ memset(extras, 0, sizeof(rinfo.extras));
+
+ err = xennet_get_responses(np, &rinfo, rp, &tmpq);
+
+ if (unlikely(err)) {
+err:
+ while ((skb = __skb_dequeue(&tmpq)))
+ __skb_queue_tail(&errq, skb);
+ np->stats.rx_errors++;
+ i = np->rx.rsp_cons;
+ continue;
+ }
+
+ skb = __skb_dequeue(&tmpq);
+
+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+ struct xen_netif_extra_info *gso;
+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+ if (unlikely(xennet_set_skb_gso(skb, gso))) {
+ __skb_queue_head(&tmpq, skb);
+ np->rx.rsp_cons += skb_queue_len(&tmpq);
+ goto err;
+ }
+ }
+
+ NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
+ NETFRONT_SKB_CB(skb)->offset = rx->offset;
+
+ len = rx->status;
+ if (len > RX_COPY_THRESHOLD)
+ len = RX_COPY_THRESHOLD;
+ skb_put(skb, len);
+
+ if (rx->status > len) {
+ skb_shinfo(skb)->frags[0].page_offset =
+ rx->offset + len;
+ skb_shinfo(skb)->frags[0].size = rx->status - len;
+ skb->data_len = rx->status - len;
+ } else {
+ skb_shinfo(skb)->frags[0].page = NULL;
+ skb_shinfo(skb)->nr_frags = 0;
+ }
+
+ i = xennet_fill_frags(np, skb, &tmpq);
+
+ /*
+ * Truesize approximates the size of true data plus
+ * any supervisor overheads. Adding hypervisor
+ * overheads has been shown to significantly reduce
+ * achievable bandwidth with the default receive
+ * buffer size. It is therefore not wise to account
+ * for it here.
+ *
+ * After alloc_skb(RX_COPY_THRESHOLD), truesize is set
+ * to RX_COPY_THRESHOLD + the supervisor
+ * overheads. Here, we add the size of the data pulled
+ * in xennet_fill_frags().
+ *
+ * We also adjust for any unused space in the main
+ * data area by subtracting (RX_COPY_THRESHOLD -
+ * len). This is especially important with drivers
+ * which split incoming packets into header and data,
+ * using only 66 bytes of the main data area (see the
+ * e1000 driver for example.) On such systems,
+ * without this last adjustement, our achievable
+ * receive throughout using the standard receive
+ * buffer size was cut by 25%(!!!).
+ */
+ skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
+ skb->len += skb->data_len;
+
+ if (rx->flags & NETRXF_csum_blank)
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ else if (rx->flags & NETRXF_data_validated)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ __skb_queue_tail(&rxq, skb);
+
+ np->rx.rsp_cons = ++i;
+ work_done++;
+ }
+
+ while ((skb = __skb_dequeue(&errq)))
+ kfree_skb(skb);
+
+ work_done -= handle_incoming_queue(dev, &rxq);
+
+ /* If we get a callback with very few responses, reduce fill target. */
+ /* NB. Note exponential increase, linear decrease. */
+ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
+ ((3*np->rx_target) / 4)) &&
+ (--np->rx_target < np->rx_min_target))
+ np->rx_target = np->rx_min_target;
+
+ xennet_alloc_rx_buffers(dev);
+
+ *pbudget -= work_done;
+ dev->quota -= work_done;
+
+ if (work_done < budget) {
+ local_irq_save(flags);
+
+ RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
+ if (!more_to_do)
+ __netif_rx_complete(dev);
+
+ local_irq_restore(flags);
+ }
+
+ spin_unlock(&np->rx_lock);
+
+ return more_to_do;
+}
+
+static int xennet_change_mtu(struct net_device *dev, int mtu)
+{
+ int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+ if (mtu > max)
+ return -EINVAL;
+ dev->mtu = mtu;
+ return 0;
+}
+
+static void xennet_release_tx_bufs(struct netfront_info *np)
+{
+ struct sk_buff *skb;
+ int i;
+
+ for (i = 0; i < NET_TX_RING_SIZE; i++) {
+ /* Skip over entries which are actually freelist references */
+ if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET)
+ continue;
+
+ skb = np->tx_skbs[i].skb;
+ gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
+ GNTMAP_readonly);
+ gnttab_release_grant_reference(&np->gref_tx_head,
+ np->grant_tx_ref[i]);
+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
+ add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
+ dev_kfree_skb_irq(skb);
+ }
+}
+
+static void xennet_release_rx_bufs(struct netfront_info *np)
+{
+ struct mmu_update *mmu = np->rx_mmu;
+ struct multicall_entry *mcl = np->rx_mcl;
+ struct sk_buff_head free_list;
+ struct sk_buff *skb;
+ unsigned long mfn;
+ int xfer = 0, noxfer = 0, unused = 0;
+ int id, ref;
+
+ dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n",
+ __func__);
+ return;
+
+ skb_queue_head_init(&free_list);
+
+ spin_lock_bh(&np->rx_lock);
+
+ for (id = 0; id < NET_RX_RING_SIZE; id++) {
+ ref = np->grant_rx_ref[id];
+ if (ref == GRANT_INVALID_REF) {
+ unused++;
+ continue;
+ }
+
+ skb = np->rx_skbs[id];
+ mfn = gnttab_end_foreign_transfer_ref(ref);
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
+ np->grant_rx_ref[id] = GRANT_INVALID_REF;
+
+ if (0 == mfn) {
+ skb_shinfo(skb)->nr_frags = 0;
+ dev_kfree_skb(skb);
+ noxfer++;
+ continue;
+ }
+
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Remap the page. */
+ struct page *page = skb_shinfo(skb)->frags[0].page;
+ unsigned long pfn = page_to_pfn(page);
+ void *vaddr = page_address(page);
+
+ MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
+ mfn_pte(mfn, PAGE_KERNEL),
+ 0);
+ mcl++;
+ mmu->ptr = ((u64)mfn << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE;
+ mmu->val = pfn;
+ mmu++;
+
+ set_phys_to_machine(pfn, mfn);
+ }
+ __skb_queue_tail(&free_list, skb);
+ xfer++;
+ }
+
+ dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
+ __func__, xfer, noxfer, unused);
+
+ if (xfer) {
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* Do all the remapping work and M2P updates. */
+ MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
+ 0, DOMID_SELF);
+ mcl++;
+ HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
+ }
+ }
+
+ while ((skb = __skb_dequeue(&free_list)) != NULL)
+ dev_kfree_skb(skb);
+
+ spin_unlock_bh(&np->rx_lock);
+}
+
+static void xennet_uninit(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ xennet_release_tx_bufs(np);
+ xennet_release_rx_bufs(np);
+ gnttab_free_grant_references(np->gref_tx_head);
+ gnttab_free_grant_references(np->gref_rx_head);
+}
+
+static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev)
+{
+ int i, err;
+ struct net_device *netdev;
+ struct netfront_info *np;
+
+ netdev = alloc_etherdev(sizeof(struct netfront_info));
+ if (!netdev) {
+ printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
+ __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ np = netdev_priv(netdev);
+ np->xbdev = dev;
+
+ spin_lock_init(&np->tx_lock);
+ spin_lock_init(&np->rx_lock);
+
+ skb_queue_head_init(&np->rx_batch);
+ np->rx_target = RX_DFL_MIN_TARGET;
+ np->rx_min_target = RX_DFL_MIN_TARGET;
+ np->rx_max_target = RX_MAX_TARGET;
+
+ init_timer(&np->rx_refill_timer);
+ np->rx_refill_timer.data = (unsigned long)netdev;
+ np->rx_refill_timer.function = rx_refill_timeout;
+
+ /* Initialise tx_skbs as a free chain containing every entry. */
+ np->tx_skb_freelist = 0;
+ for (i = 0; i < NET_TX_RING_SIZE; i++) {
+ np->tx_skbs[i].link = i+1;
+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
+ }
+
+ /* Clear out rx_skbs */
+ for (i = 0; i < NET_RX_RING_SIZE; i++) {
+ np->rx_skbs[i] = NULL;
+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
+ }
+
+ /* A grant for every tx ring slot */
+ if (gnttab_alloc_grant_references(TX_MAX_TARGET,
+ &np->gref_tx_head) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+ err = -ENOMEM;
+ goto exit;
+ }
+ /* A grant for every rx ring slot */
+ if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+ &np->gref_rx_head) < 0) {
+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+ err = -ENOMEM;
+ goto exit_free_tx;
+ }
+
+ netdev->open = xennet_open;
+ netdev->hard_start_xmit = xennet_start_xmit;
+ netdev->stop = xennet_close;
+ netdev->get_stats = xennet_get_stats;
+ netdev->poll = xennet_poll;
+ netdev->uninit = xennet_uninit;
+ netdev->change_mtu = xennet_change_mtu;
+ netdev->weight = 64;
+ netdev->features = NETIF_F_IP_CSUM;
+
+ SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
+ SET_MODULE_OWNER(netdev);
+ SET_NETDEV_DEV(netdev, &dev->dev);
+
+ np->netdev = netdev;
+
+ netif_carrier_off(netdev);
+
+ return netdev;
+
+ exit_free_tx:
+ gnttab_free_grant_references(np->gref_tx_head);
+ exit:
+ free_netdev(netdev);
+ return ERR_PTR(err);
+}
+
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.
+ */
+static int __devinit netfront_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err;
+ struct net_device *netdev;
+ struct netfront_info *info;
+
+ netdev = xennet_create_dev(dev);
+ if (IS_ERR(netdev)) {
+ err = PTR_ERR(netdev);
+ xenbus_dev_fatal(dev, err, "creating netdev");
+ return err;
+ }
+
+ info = netdev_priv(netdev);
+ dev->dev.driver_data = info;
+
+ err = register_netdev(info->netdev);
+ if (err) {
+ printk(KERN_WARNING "%s: register_netdev err=%d\n",
+ __func__, err);
+ goto fail;
+ }
+
+ err = xennet_sysfs_addif(info->netdev);
+ if (err) {
+ unregister_netdev(info->netdev);
+ printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
+ __func__, err);
+ goto fail;
+ }
+
+ return 0;
+
+ fail:
+ free_netdev(netdev);
+ dev->dev.driver_data = NULL;
+ return err;
+}
+
+static void xennet_end_access(int ref, void *page)
+{
+ /* This frees the page as a side-effect */
+ if (ref != GRANT_INVALID_REF)
+ gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+}
+
+static void xennet_disconnect_backend(struct netfront_info *info)
+{
+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
+ spin_lock_bh(&info->rx_lock);
+ spin_lock_irq(&info->tx_lock);
+ netif_carrier_off(info->netdev);
+ spin_unlock_irq(&info->tx_lock);
+ spin_unlock_bh(&info->rx_lock);
+
+ if (info->netdev->irq)
+ unbind_from_irqhandler(info->netdev->irq, info->netdev);
+ info->evtchn = info->netdev->irq = 0;
+
+ /* End access and free the pages */
+ xennet_end_access(info->tx_ring_ref, info->tx.sring);
+ xennet_end_access(info->rx_ring_ref, info->rx.sring);
+
+ info->tx_ring_ref = GRANT_INVALID_REF;
+ info->rx_ring_ref = GRANT_INVALID_REF;
+ info->tx.sring = NULL;
+ info->rx.sring = NULL;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart. We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int netfront_resume(struct xenbus_device *dev)
+{
+ struct netfront_info *info = dev->dev.driver_data;
+
+ dev_dbg(&dev->dev, "%s\n", dev->nodename);
+
+ xennet_disconnect_backend(info);
+ return 0;
+}
+
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+ char *s, *e, *macstr;
+ int i;
+
+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+ if (IS_ERR(macstr))
+ return PTR_ERR(macstr);
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ mac[i] = simple_strtoul(s, &e, 16);
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+ kfree(macstr);
+ return -ENOENT;
+ }
+ s = e+1;
+ }
+
+ kfree(macstr);
+ return 0;
+}
+
+static irqreturn_t xennet_interrupt(int irq, void *dev_id)
+{
+ struct net_device *dev = dev_id;
+ struct netfront_info *np = netdev_priv(dev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&np->tx_lock, flags);
+
+ if (likely(netif_carrier_ok(dev))) {
+ xennet_tx_buf_gc(dev);
+ /* Under tx_lock: protects access to rx shared-ring indexes. */
+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+ netif_rx_schedule(dev);
+ }
+
+ spin_unlock_irqrestore(&np->tx_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
+{
+ struct xen_netif_tx_sring *txs;
+ struct xen_netif_rx_sring *rxs;
+ int err;
+ struct net_device *netdev = info->netdev;
+
+ info->tx_ring_ref = GRANT_INVALID_REF;
+ info->rx_ring_ref = GRANT_INVALID_REF;
+ info->rx.sring = NULL;
+ info->tx.sring = NULL;
+ netdev->irq = 0;
+
+ err = xen_net_read_mac(dev, netdev->dev_addr);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+ goto fail;
+ }
+
+ txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
+ if (!txs) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(dev, err, "allocating tx ring page");
+ goto fail;
+ }
+ SHARED_RING_INIT(txs);
+ FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+ if (err < 0) {
+ free_page((unsigned long)txs);
+ goto fail;
+ }
+
+ info->tx_ring_ref = err;
+ rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
+ if (!rxs) {
+ err = -ENOMEM;
+ xenbus_dev_fatal(dev, err, "allocating rx ring page");
+ goto fail;
+ }
+ SHARED_RING_INIT(rxs);
+ FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+
+ err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+ if (err < 0) {
+ free_page((unsigned long)rxs);
+ goto fail;
+ }
+ info->rx_ring_ref = err;
+
+ err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ if (err)
+ goto fail;
+
+ err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
+ IRQF_SAMPLE_RANDOM, netdev->name,
+ netdev);
+ if (err < 0)
+ goto fail;
+ netdev->irq = err;
+ return 0;
+
+ fail:
+ return err;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+ struct netfront_info *info)
+{
+ const char *message;
+ struct xenbus_transaction xbt;
+ int err;
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_netfront(dev, info);
+ if (err)
+ goto out;
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ goto destroy_ring;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref", "%u",
+ info->tx_ring_ref);
+ if (err) {
+ message = "writing tx ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref", "%u",
+ info->rx_ring_ref);
+ if (err) {
+ message = "writing rx ring-ref";
+ goto abort_transaction;
+ }
+ err = xenbus_printf(xbt, dev->nodename,
+ "event-channel", "%u", info->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
+ 1);
+ if (err) {
+ message = "writing request-rx-copy";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
+ if (err) {
+ message = "writing feature-rx-notify";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+ if (err) {
+ message = "writing feature-sg";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
+ if (err) {
+ message = "writing feature-gso-tcpv4";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto destroy_ring;
+ }
+
+ return 0;
+
+ abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_ring:
+ xennet_disconnect_backend(info);
+ out:
+ return err;
+}
+
+static int xennet_set_sg(struct net_device *dev, u32 data)
+{
+ if (data) {
+ struct netfront_info *np = netdev_priv(dev);
+ int val;
+
+ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
+ "%d", &val) < 0)
+ val = 0;
+ if (!val)
+ return -ENOSYS;
+ } else if (dev->mtu > ETH_DATA_LEN)
+ dev->mtu = ETH_DATA_LEN;
+
+ return ethtool_op_set_sg(dev, data);
+}
+
+static int xennet_set_tso(struct net_device *dev, u32 data)
+{
+ if (data) {
+ struct netfront_info *np = netdev_priv(dev);
+ int val;
+
+ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-gso-tcpv4", "%d", &val) < 0)
+ val = 0;
+ if (!val)
+ return -ENOSYS;
+ }
+
+ return ethtool_op_set_tso(dev, data);
+}
+
+static void xennet_set_features(struct net_device *dev)
+{
+ /* Turn off all GSO bits except ROBUST. */
+ dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
+ dev->features |= NETIF_F_GSO_ROBUST;
+ xennet_set_sg(dev, 0);
+
+ /* We need checksum offload to enable scatter/gather and TSO. */
+ if (!(dev->features & NETIF_F_IP_CSUM))
+ return;
+
+ if (!xennet_set_sg(dev, 1))
+ xennet_set_tso(dev, 1);
+}
+
+static int xennet_connect(struct net_device *dev)
+{
+ struct netfront_info *np = netdev_priv(dev);
+ int i, requeue_idx, err;
+ struct sk_buff *skb;
+ grant_ref_t ref;
+ struct xen_netif_rx_request *req;
+ unsigned int feature_rx_copy;
+
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+ "feature-rx-copy", "%u", &feature_rx_copy);
+ if (err != 1)
+ feature_rx_copy = 0;
+
+ if (!feature_rx_copy) {
+ dev_info(&dev->dev,
+ "backend does not support copying recieve path");
+ return -ENODEV;
+ }
+
+ err = talk_to_backend(np->xbdev, np);
+ if (err)
+ return err;
+
+ xennet_set_features(dev);
+
+ spin_lock_bh(&np->rx_lock);
+ spin_lock_irq(&np->tx_lock);
+
+ /* Step 1: Discard all pending TX packet fragments. */
+ xennet_release_tx_bufs(np);
+
+ /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
+ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+ if (!np->rx_skbs[i])
+ continue;
+
+ skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
+ ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
+ req = RING_GET_REQUEST(&np->rx, requeue_idx);
+
+ gnttab_grant_foreign_access_ref(
+ ref, np->xbdev->otherend_id,
+ pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
+ frags->page)),
+ 0);
+ req->gref = ref;
+ req->id = requeue_idx;
+
+ requeue_idx++;
+ }
+
+ np->rx.req_prod_pvt = requeue_idx;
+
+ /*
+ * Step 3: All public and private state should now be sane. Get
+ * ready to start sending and receiving packets and give the driver
+ * domain a kick because we've probably just requeued some
+ * packets.
+ */
+ netif_carrier_on(np->netdev);
+ notify_remote_via_irq(np->netdev->irq);
+ xennet_tx_buf_gc(dev);
+ xennet_alloc_rx_buffers(dev);
+
+ spin_unlock_irq(&np->tx_lock);
+ spin_unlock_bh(&np->rx_lock);
+
+ return 0;
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
+{
+ struct netfront_info *np = dev->dev.driver_data;
+ struct net_device *netdev = np->netdev;
+
+ dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
+
+ switch (backend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitialised:
+ case XenbusStateConnected:
+ case XenbusStateUnknown:
+ case XenbusStateClosed:
+ break;
+
+ case XenbusStateInitWait:
+ if (dev->state != XenbusStateInitialising)
+ break;
+ if (xennet_connect(netdev) != 0)
+ break;
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateClosing:
+ xenbus_frontend_closed(dev);
+ break;
+ }
+}
+
+static struct ethtool_ops xennet_ethtool_ops =
+{
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .set_tx_csum = ethtool_op_set_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .set_sg = xennet_set_sg,
+ .get_tso = ethtool_op_get_tso,
+ .set_tso = xennet_set_tso,
+ .get_link = ethtool_op_get_link,
+};
+
+#ifdef CONFIG_SYSFS
+static ssize_t show_rxbuf_min(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ struct netfront_info *info = netdev_priv(netdev);
+
+ return sprintf(buf, "%u\n", info->rx_min_target);
+}
+
+static ssize_t store_rxbuf_min(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ struct netfront_info *np = netdev_priv(netdev);
+ char *endp;
+ unsigned long target;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ target = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EBADMSG;
+
+ if (target < RX_MIN_TARGET)
+ target = RX_MIN_TARGET;
+ if (target > RX_MAX_TARGET)
+ target = RX_MAX_TARGET;
+
+ spin_lock_bh(&np->rx_lock);
+ if (target > np->rx_max_target)
+ np->rx_max_target = target;
+ np->rx_min_target = target;
+ if (target > np->rx_target)
+ np->rx_target = target;
+
+ xennet_alloc_rx_buffers(netdev);
+
+ spin_unlock_bh(&np->rx_lock);
+ return len;
+}
+
+static ssize_t show_rxbuf_max(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ struct netfront_info *info = netdev_priv(netdev);
+
+ return sprintf(buf, "%u\n", info->rx_max_target);
+}
+
+static ssize_t store_rxbuf_max(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ struct netfront_info *np = netdev_priv(netdev);
+ char *endp;
+ unsigned long target;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ target = simple_strtoul(buf, &endp, 0);
+ if (endp == buf)
+ return -EBADMSG;
+
+ if (target < RX_MIN_TARGET)
+ target = RX_MIN_TARGET;
+ if (target > RX_MAX_TARGET)
+ target = RX_MAX_TARGET;
+
+ spin_lock_bh(&np->rx_lock);
+ if (target < np->rx_min_target)
+ np->rx_min_target = target;
+ np->rx_max_target = target;
+ if (target < np->rx_target)
+ np->rx_target = target;
+
+ xennet_alloc_rx_buffers(netdev);
+
+ spin_unlock_bh(&np->rx_lock);
+ return len;
+}
+
+static ssize_t show_rxbuf_cur(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+ struct netfront_info *info = netdev_priv(netdev);
+
+ return sprintf(buf, "%u\n", info->rx_target);
+}
+
+static struct device_attribute xennet_attrs[] = {
+ __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
+ __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
+ __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
+};
+
+static int xennet_sysfs_addif(struct net_device *netdev)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
+ err = device_create_file(&netdev->dev,
+ &xennet_attrs[i]);
+ if (err)
+ goto fail;
+ }
+ return 0;
+
+ fail:
+ while (--i >= 0)
+ device_remove_file(&netdev->dev, &xennet_attrs[i]);
+ return err;
+}
+
+static void xennet_sysfs_delif(struct net_device *netdev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
+ device_remove_file(&netdev->dev, &xennet_attrs[i]);
+}
+
+#endif /* CONFIG_SYSFS */
+
+static struct xenbus_device_id netfront_ids[] = {
+ { "vif" },
+ { "" }
+};
+
+
+static int __devexit xennet_remove(struct xenbus_device *dev)
+{
+ struct netfront_info *info = dev->dev.driver_data;
+
+ dev_dbg(&dev->dev, "%s\n", dev->nodename);
+
+ unregister_netdev(info->netdev);
+
+ xennet_disconnect_backend(info);
+
+ del_timer_sync(&info->rx_refill_timer);
+
+ xennet_sysfs_delif(info->netdev);
+
+ free_netdev(info->netdev);
+
+ return 0;
+}
+
+static struct xenbus_driver netfront = {
+ .name = "vif",
+ .owner = THIS_MODULE,
+ .ids = netfront_ids,
+ .probe = netfront_probe,
+ .remove = __devexit_p(xennet_remove),
+ .resume = netfront_resume,
+ .otherend_changed = backend_changed,
+};
+
+static int __init netif_init(void)
+{
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ if (is_initial_xendomain())
+ return 0;
+
+ printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
+
+ return xenbus_register_frontend(&netfront);
+}
+module_init(netif_init);
+
+
+static void __exit netif_exit(void)
+{
+ if (is_initial_xendomain())
+ return;
+
+ return xenbus_unregister_driver(&netfront);
+}
+module_exit(netif_exit);
+
+MODULE_DESCRIPTION("Xen virtual network device frontend");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c
index 03baf1c64a2..ed112ee1601 100644
--- a/drivers/pnp/pnpbios/core.c
+++ b/drivers/pnp/pnpbios/core.c
@@ -147,7 +147,7 @@ static int pnp_dock_event(int dock, struct pnp_docking_station_info *info)
info->location_id, info->serial, info->capabilities);
envp[i] = NULL;
- value = call_usermodehelper (argv [0], argv, envp, 0);
+ value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC);
kfree (buf);
kfree (envp);
return 0;
diff --git a/drivers/sbus/char/bbc_envctrl.c b/drivers/sbus/char/bbc_envctrl.c
index a54e4140683..e821a155b65 100644
--- a/drivers/sbus/char/bbc_envctrl.c
+++ b/drivers/sbus/char/bbc_envctrl.c
@@ -7,6 +7,7 @@
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/kmod.h>
+#include <linux/reboot.h>
#include <asm/oplib.h>
#include <asm/ebus.h>
@@ -170,8 +171,6 @@ static void get_current_temps(struct bbc_cpu_temperature *tp)
static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
{
static int shutting_down = 0;
- static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
- char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
char *type = "???";
s8 val = -1;
@@ -195,7 +194,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
shutting_down = 1;
- if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0)
+ if (orderly_poweroff(true) < 0)
printk(KERN_CRIT "envctrl: shutdown execution failed\n");
}
diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 8328acab47f..dadabef116b 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -26,6 +26,7 @@
#include <linux/ioport.h>
#include <linux/miscdevice.h>
#include <linux/kmod.h>
+#include <linux/reboot.h>
#include <asm/ebus.h>
#include <asm/uaccess.h>
@@ -966,10 +967,6 @@ static struct i2c_child_t *envctrl_get_i2c_child(unsigned char mon_type)
static void envctrl_do_shutdown(void)
{
static int inprog = 0;
- static char *envp[] = {
- "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
- char *argv[] = {
- "/sbin/shutdown", "-h", "now", NULL };
int ret;
if (inprog != 0)
@@ -977,7 +974,7 @@ static void envctrl_do_shutdown(void)
inprog = 1;
printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
- ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0);
+ ret = orderly_poweroff(true);
if (ret < 0) {
printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n");
inprog = 0; /* unlikely to succeed, but we could try again */
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
new file mode 100644
index 00000000000..56592f0d6ce
--- /dev/null
+++ b/drivers/xen/Makefile
@@ -0,0 +1,2 @@
+obj-y += grant-table.o
+obj-y += xenbus/
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
new file mode 100644
index 00000000000..ea94dbabf9a
--- /dev/null
+++ b/drivers/xen/grant-table.c
@@ -0,0 +1,582 @@
+/******************************************************************************
+ * grant_table.c
+ *
+ * Granting foreign access to our memory reservation.
+ *
+ * Copyright (c) 2005-2006, Christopher Clark
+ * Copyright (c) 2004-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/uaccess.h>
+
+#include <xen/interface/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <asm/pgtable.h>
+#include <asm/sync_bitops.h>
+
+
+/* External tools reserve first few grant table entries. */
+#define NR_RESERVED_ENTRIES 8
+#define GNTTAB_LIST_END 0xffffffff
+#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry))
+
+static grant_ref_t **gnttab_list;
+static unsigned int nr_grant_frames;
+static unsigned int boot_max_nr_grant_frames;
+static int gnttab_free_count;
+static grant_ref_t gnttab_free_head;
+static DEFINE_SPINLOCK(gnttab_list_lock);
+
+static struct grant_entry *shared;
+
+static struct gnttab_free_callback *gnttab_free_callback_list;
+
+static int gnttab_expand(unsigned int req_entries);
+
+#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+
+static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
+{
+ return &gnttab_list[(entry) / RPP][(entry) % RPP];
+}
+/* This can be used as an l-value */
+#define gnttab_entry(entry) (*__gnttab_entry(entry))
+
+static int get_free_entries(unsigned count)
+{
+ unsigned long flags;
+ int ref, rc;
+ grant_ref_t head;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+
+ if ((gnttab_free_count < count) &&
+ ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ return rc;
+ }
+
+ ref = head = gnttab_free_head;
+ gnttab_free_count -= count;
+ while (count-- > 1)
+ head = gnttab_entry(head);
+ gnttab_free_head = gnttab_entry(head);
+ gnttab_entry(head) = GNTTAB_LIST_END;
+
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+
+ return ref;
+}
+
+static void do_free_callbacks(void)
+{
+ struct gnttab_free_callback *callback, *next;
+
+ callback = gnttab_free_callback_list;
+ gnttab_free_callback_list = NULL;
+
+ while (callback != NULL) {
+ next = callback->next;
+ if (gnttab_free_count >= callback->count) {
+ callback->next = NULL;
+ callback->fn(callback->arg);
+ } else {
+ callback->next = gnttab_free_callback_list;
+ gnttab_free_callback_list = callback;
+ }
+ callback = next;
+ }
+}
+
+static inline void check_free_callbacks(void)
+{
+ if (unlikely(gnttab_free_callback_list))
+ do_free_callbacks();
+}
+
+static void put_free_entry(grant_ref_t ref)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ gnttab_entry(ref) = gnttab_free_head;
+ gnttab_free_head = ref;
+ gnttab_free_count++;
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+
+static void update_grant_entry(grant_ref_t ref, domid_t domid,
+ unsigned long frame, unsigned flags)
+{
+ /*
+ * Introducing a valid entry into the grant table:
+ * 1. Write ent->domid.
+ * 2. Write ent->frame:
+ * GTF_permit_access: Frame to which access is permitted.
+ * GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ * frame, or zero if none.
+ * 3. Write memory barrier (WMB).
+ * 4. Write ent->flags, inc. valid type.
+ */
+ shared[ref].frame = frame;
+ shared[ref].domid = domid;
+ wmb();
+ shared[ref].flags = flags;
+}
+
+/*
+ * Public grant-issuing interface functions
+ */
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+ unsigned long frame, int readonly)
+{
+ update_grant_entry(ref, domid, frame,
+ GTF_permit_access | (readonly ? GTF_readonly : 0));
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
+
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+ int readonly)
+{
+ int ref;
+
+ ref = get_free_entries(1);
+ if (unlikely(ref < 0))
+ return -ENOSPC;
+
+ gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
+
+ return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
+
+int gnttab_query_foreign_access(grant_ref_t ref)
+{
+ u16 nflags;
+
+ nflags = shared[ref].flags;
+
+ return (nflags & (GTF_reading|GTF_writing));
+}
+EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
+
+int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
+{
+ u16 flags, nflags;
+
+ nflags = shared[ref].flags;
+ do {
+ flags = nflags;
+ if (flags & (GTF_reading|GTF_writing)) {
+ printk(KERN_ALERT "WARNING: g.e. still in use!\n");
+ return 0;
+ }
+ } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags);
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
+
+void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
+ unsigned long page)
+{
+ if (gnttab_end_foreign_access_ref(ref, readonly)) {
+ put_free_entry(ref);
+ if (page != 0)
+ free_page(page);
+ } else {
+ /* XXX This needs to be fixed so that the ref and page are
+ placed on a list to be freed up later. */
+ printk(KERN_WARNING
+ "WARNING: leaking g.e. and page still in use!\n");
+ }
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
+
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
+{
+ int ref;
+
+ ref = get_free_entries(1);
+ if (unlikely(ref < 0))
+ return -ENOSPC;
+ gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
+
+ return ref;
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
+
+void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+ unsigned long pfn)
+{
+ update_grant_entry(ref, domid, pfn, GTF_accept_transfer);
+}
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
+
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
+{
+ unsigned long frame;
+ u16 flags;
+
+ /*
+ * If a transfer is not even yet started, try to reclaim the grant
+ * reference and return failure (== 0).
+ */
+ while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
+ if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags)
+ return 0;
+ cpu_relax();
+ }
+
+ /* If a transfer is in progress then wait until it is completed. */
+ while (!(flags & GTF_transfer_completed)) {
+ flags = shared[ref].flags;
+ cpu_relax();
+ }
+
+ rmb(); /* Read the frame number /after/ reading completion status. */
+ frame = shared[ref].frame;
+ BUG_ON(frame == 0);
+
+ return frame;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
+
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
+{
+ unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
+ put_free_entry(ref);
+ return frame;
+}
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
+
+void gnttab_free_grant_reference(grant_ref_t ref)
+{
+ put_free_entry(ref);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
+
+void gnttab_free_grant_references(grant_ref_t head)
+{
+ grant_ref_t ref;
+ unsigned long flags;
+ int count = 1;
+ if (head == GNTTAB_LIST_END)
+ return;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ ref = head;
+ while (gnttab_entry(ref) != GNTTAB_LIST_END) {
+ ref = gnttab_entry(ref);
+ count++;
+ }
+ gnttab_entry(ref) = gnttab_free_head;
+ gnttab_free_head = head;
+ gnttab_free_count += count;
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
+
+int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
+{
+ int h = get_free_entries(count);
+
+ if (h < 0)
+ return -ENOSPC;
+
+ *head = h;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
+
+int gnttab_empty_grant_references(const grant_ref_t *private_head)
+{
+ return (*private_head == GNTTAB_LIST_END);
+}
+EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
+
+int gnttab_claim_grant_reference(grant_ref_t *private_head)
+{
+ grant_ref_t g = *private_head;
+ if (unlikely(g == GNTTAB_LIST_END))
+ return -ENOSPC;
+ *private_head = gnttab_entry(g);
+ return g;
+}
+EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
+
+void gnttab_release_grant_reference(grant_ref_t *private_head,
+ grant_ref_t release)
+{
+ gnttab_entry(release) = *private_head;
+ *private_head = release;
+}
+EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
+
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
+ void (*fn)(void *), void *arg, u16 count)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ if (callback->next)
+ goto out;
+ callback->fn = fn;
+ callback->arg = arg;
+ callback->count = count;
+ callback->next = gnttab_free_callback_list;
+ gnttab_free_callback_list = callback;
+ check_free_callbacks();
+out:
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
+
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
+{
+ struct gnttab_free_callback **pcb;
+ unsigned long flags;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
+ if (*pcb == callback) {
+ *pcb = callback->next;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
+
+static int grow_gnttab_list(unsigned int more_frames)
+{
+ unsigned int new_nr_grant_frames, extra_entries, i;
+
+ new_nr_grant_frames = nr_grant_frames + more_frames;
+ extra_entries = more_frames * GREFS_PER_GRANT_FRAME;
+
+ for (i = nr_grant_frames; i < new_nr_grant_frames; i++) {
+ gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
+ if (!gnttab_list[i])
+ goto grow_nomem;
+ }
+
+
+ for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+ i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
+ gnttab_entry(i) = i + 1;
+
+ gnttab_entry(i) = gnttab_free_head;
+ gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+ gnttab_free_count += extra_entries;
+
+ nr_grant_frames = new_nr_grant_frames;
+
+ check_free_callbacks();
+
+ return 0;
+
+grow_nomem:
+ for ( ; i >= nr_grant_frames; i--)
+ free_page((unsigned long) gnttab_list[i]);
+ return -ENOMEM;
+}
+
+static unsigned int __max_nr_grant_frames(void)
+{
+ struct gnttab_query_size query;
+ int rc;
+
+ query.dom = DOMID_SELF;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
+ if ((rc < 0) || (query.status != GNTST_okay))
+ return 4; /* Legacy max supported number of frames */
+
+ return query.max_nr_frames;
+}
+
+static inline unsigned int max_nr_grant_frames(void)
+{
+ unsigned int xen_max = __max_nr_grant_frames();
+
+ if (xen_max > boot_max_nr_grant_frames)
+ return boot_max_nr_grant_frames;
+ return xen_max;
+}
+
+static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+ unsigned long addr, void *data)
+{
+ unsigned long **frames = (unsigned long **)data;
+
+ set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+ (*frames)++;
+ return 0;
+}
+
+static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+ unsigned long addr, void *data)
+{
+
+ set_pte_at(&init_mm, addr, pte, __pte(0));
+ return 0;
+}
+
+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+{
+ struct gnttab_setup_table setup;
+ unsigned long *frames;
+ unsigned int nr_gframes = end_idx + 1;
+ int rc;
+
+ frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
+ if (!frames)
+ return -ENOMEM;
+
+ setup.dom = DOMID_SELF;
+ setup.nr_frames = nr_gframes;
+ setup.frame_list = frames;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
+ if (rc == -ENOSYS) {
+ kfree(frames);
+ return -ENOSYS;
+ }
+
+ BUG_ON(rc || setup.status);
+
+ if (shared == NULL) {
+ struct vm_struct *area;
+ area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
+ BUG_ON(area == NULL);
+ shared = area->addr;
+ }
+ rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+ PAGE_SIZE * nr_gframes,
+ map_pte_fn, &frames);
+ BUG_ON(rc);
+ frames -= nr_gframes; /* adjust after map_pte_fn() */
+
+ kfree(frames);
+
+ return 0;
+}
+
+static int gnttab_resume(void)
+{
+ if (max_nr_grant_frames() < nr_grant_frames)
+ return -ENOSYS;
+ return gnttab_map(0, nr_grant_frames - 1);
+}
+
+static int gnttab_suspend(void)
+{
+ apply_to_page_range(&init_mm, (unsigned long)shared,
+ PAGE_SIZE * nr_grant_frames,
+ unmap_pte_fn, NULL);
+
+ return 0;
+}
+
+static int gnttab_expand(unsigned int req_entries)
+{
+ int rc;
+ unsigned int cur, extra;
+
+ cur = nr_grant_frames;
+ extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
+ GREFS_PER_GRANT_FRAME);
+ if (cur + extra > max_nr_grant_frames())
+ return -ENOSPC;
+
+ rc = gnttab_map(cur, cur + extra - 1);
+ if (rc == 0)
+ rc = grow_gnttab_list(extra);
+
+ return rc;
+}
+
+static int __devinit gnttab_init(void)
+{
+ int i;
+ unsigned int max_nr_glist_frames;
+ unsigned int nr_init_grefs;
+
+ if (!is_running_on_xen())
+ return -ENODEV;
+
+ nr_grant_frames = 1;
+ boot_max_nr_grant_frames = __max_nr_grant_frames();
+
+ /* Determine the maximum number of frames required for the
+ * grant reference free list on the current hypervisor.
+ */
+ max_nr_glist_frames = (boot_max_nr_grant_frames *
+ GREFS_PER_GRANT_FRAME /
+ (PAGE_SIZE / sizeof(grant_ref_t)));
+
+ gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
+ GFP_KERNEL);
+ if (gnttab_list == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_grant_frames; i++) {
+ gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
+ if (gnttab_list[i] == NULL)
+ goto ini_nomem;
+ }
+
+ if (gnttab_resume() < 0)
+ return -ENODEV;
+
+ nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
+
+ for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
+ gnttab_entry(i) = i + 1;
+
+ gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
+ gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
+ gnttab_free_head = NR_RESERVED_ENTRIES;
+
+ printk("Grant table initialized\n");
+ return 0;
+
+ ini_nomem:
+ for (i--; i >= 0; i--)
+ free_page((unsigned long)gnttab_list[i]);
+ kfree(gnttab_list);
+ return -ENOMEM;
+}
+
+core_initcall(gnttab_init);
diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile
new file mode 100644
index 00000000000..5571f5b8422
--- /dev/null
+++ b/drivers/xen/xenbus/Makefile
@@ -0,0 +1,7 @@
+obj-y += xenbus.o
+
+xenbus-objs =
+xenbus-objs += xenbus_client.o
+xenbus-objs += xenbus_comms.o
+xenbus-objs += xenbus_xs.o
+xenbus-objs += xenbus_probe.o
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
new file mode 100644
index 00000000000..9fd2f70ab46
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -0,0 +1,569 @@
+/******************************************************************************
+ * Client-facing interface for the Xenbus driver. In other words, the
+ * interface between the Xenbus and the device-specific code, be it the
+ * frontend or the backend of that driver.
+ *
+ * Copyright (C) 2005 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+#include <xen/events.h>
+#include <xen/grant_table.h>
+#include <xen/xenbus.h>
+
+const char *xenbus_strstate(enum xenbus_state state)
+{
+ static const char *const name[] = {
+ [ XenbusStateUnknown ] = "Unknown",
+ [ XenbusStateInitialising ] = "Initialising",
+ [ XenbusStateInitWait ] = "InitWait",
+ [ XenbusStateInitialised ] = "Initialised",
+ [ XenbusStateConnected ] = "Connected",
+ [ XenbusStateClosing ] = "Closing",
+ [ XenbusStateClosed ] = "Closed",
+ };
+ return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
+}
+EXPORT_SYMBOL_GPL(xenbus_strstate);
+
+/**
+ * xenbus_watch_path - register a watch
+ * @dev: xenbus device
+ * @path: path to watch
+ * @watch: watch to register
+ * @callback: callback to register
+ *
+ * Register a @watch on the given path, using the given xenbus_watch structure
+ * for storage, and the given @callback function as the callback. Return 0 on
+ * success, or -errno on error. On success, the given @path will be saved as
+ * @watch->node, and remains the caller's to free. On error, @watch->node will
+ * be NULL, the device will switch to %XenbusStateClosing, and the error will
+ * be saved in the store.
+ */
+int xenbus_watch_path(struct xenbus_device *dev, const char *path,
+ struct xenbus_watch *watch,
+ void (*callback)(struct xenbus_watch *,
+ const char **, unsigned int))
+{
+ int err;
+
+ watch->node = path;
+ watch->callback = callback;
+
+ err = register_xenbus_watch(watch);
+
+ if (err) {
+ watch->node = NULL;
+ watch->callback = NULL;
+ xenbus_dev_fatal(dev, err, "adding watch on %s", path);
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_watch_path);
+
+
+/**
+ * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
+ * @dev: xenbus device
+ * @watch: watch to register
+ * @callback: callback to register
+ * @pathfmt: format of path to watch
+ *
+ * Register a watch on the given @path, using the given xenbus_watch
+ * structure for storage, and the given @callback function as the callback.
+ * Return 0 on success, or -errno on error. On success, the watched path
+ * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
+ * kfree(). On error, watch->node will be NULL, so the caller has nothing to
+ * free, the device will switch to %XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_watch_pathfmt(struct xenbus_device *dev,
+ struct xenbus_watch *watch,
+ void (*callback)(struct xenbus_watch *,
+ const char **, unsigned int),
+ const char *pathfmt, ...)
+{
+ int err;
+ va_list ap;
+ char *path;
+
+ va_start(ap, pathfmt);
+ path = kvasprintf(GFP_KERNEL, pathfmt, ap);
+ va_end(ap);
+
+ if (!path) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
+ return -ENOMEM;
+ }
+ err = xenbus_watch_path(dev, path, watch, callback);
+
+ if (err)
+ kfree(path);
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
+
+
+/**
+ * xenbus_switch_state
+ * @dev: xenbus device
+ * @xbt: transaction handle
+ * @state: new state
+ *
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Return 0 on success, or -errno on error. On error, the device will switch
+ * to XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
+{
+ /* We check whether the state is currently set to the given value, and
+ if not, then the state is set. We don't want to unconditionally
+ write the given state, because we don't want to fire watches
+ unnecessarily. Furthermore, if the node has gone, we don't write
+ to it, as the device will be tearing down, and we don't want to
+ resurrect that directory.
+
+ Note that, because of this cached value of our state, this function
+ will not work inside a Xenstore transaction (something it was
+ trying to in the past) because dev->state would not get reset if
+ the transaction was aborted.
+
+ */
+
+ int current_state;
+ int err;
+
+ if (state == dev->state)
+ return 0;
+
+ err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
+ &current_state);
+ if (err != 1)
+ return 0;
+
+ err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
+ if (err) {
+ if (state != XenbusStateClosing) /* Avoid looping */
+ xenbus_dev_fatal(dev, err, "writing new state");
+ return err;
+ }
+
+ dev->state = state;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_switch_state);
+
+int xenbus_frontend_closed(struct xenbus_device *dev)
+{
+ xenbus_switch_state(dev, XenbusStateClosed);
+ complete(&dev->down);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_frontend_closed);
+
+/**
+ * Return the path to the error node for the given device, or NULL on failure.
+ * If the value returned is non-NULL, then it is the caller's to kfree.
+ */
+static char *error_path(struct xenbus_device *dev)
+{
+ return kasprintf(GFP_KERNEL, "error/%s", dev->nodename);
+}
+
+
+static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
+ const char *fmt, va_list ap)
+{
+ int ret;
+ unsigned int len;
+ char *printf_buffer = NULL;
+ char *path_buffer = NULL;
+
+#define PRINTF_BUFFER_SIZE 4096
+ printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+ if (printf_buffer == NULL)
+ goto fail;
+
+ len = sprintf(printf_buffer, "%i ", -err);
+ ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
+
+ BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
+
+ dev_err(&dev->dev, "%s\n", printf_buffer);
+
+ path_buffer = error_path(dev);
+
+ if (path_buffer == NULL) {
+ dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
+ dev->nodename, printf_buffer);
+ goto fail;
+ }
+
+ if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
+ dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
+ dev->nodename, printf_buffer);
+ goto fail;
+ }
+
+fail:
+ kfree(printf_buffer);
+ kfree(path_buffer);
+}
+
+
+/**
+ * xenbus_dev_error
+ * @dev: xenbus device
+ * @err: error to report
+ * @fmt: error message format
+ *
+ * Report the given negative errno into the store, along with the given
+ * formatted message.
+ */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ xenbus_va_dev_error(dev, err, fmt, ap);
+ va_end(ap);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_error);
+
+/**
+ * xenbus_dev_fatal
+ * @dev: xenbus device
+ * @err: error to report
+ * @fmt: error message format
+ *
+ * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
+ * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
+ * closedown of this driver and its peer.
+ */
+
+void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ xenbus_va_dev_error(dev, err, fmt, ap);
+ va_end(ap);
+
+ xenbus_switch_state(dev, XenbusStateClosing);
+}
+EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
+
+/**
+ * xenbus_grant_ring
+ * @dev: xenbus device
+ * @ring_mfn: mfn of ring to grant
+
+ * Grant access to the given @ring_mfn to the peer of the given device. Return
+ * 0 on success, or -errno on error. On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
+{
+ int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
+ if (err < 0)
+ xenbus_dev_fatal(dev, err, "granting access to ring page");
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_grant_ring);
+
+
+/**
+ * Allocate an event channel for the given xenbus_device, assigning the newly
+ * created local port to *port. Return 0 on success, or -errno on error. On
+ * error, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
+{
+ struct evtchn_alloc_unbound alloc_unbound;
+ int err;
+
+ alloc_unbound.dom = DOMID_SELF;
+ alloc_unbound.remote_dom = dev->otherend_id;
+
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+ &alloc_unbound);
+ if (err)
+ xenbus_dev_fatal(dev, err, "allocating event channel");
+ else
+ *port = alloc_unbound.port;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);
+
+
+/**
+ * Bind to an existing interdomain event channel in another domain. Returns 0
+ * on success and stores the local port in *port. On error, returns -errno,
+ * switches the device to XenbusStateClosing, and saves the error in XenStore.
+ */
+int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
+{
+ struct evtchn_bind_interdomain bind_interdomain;
+ int err;
+
+ bind_interdomain.remote_dom = dev->otherend_id;
+ bind_interdomain.remote_port = remote_port;
+
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+ &bind_interdomain);
+ if (err)
+ xenbus_dev_fatal(dev, err,
+ "binding to event channel %d from domain %d",
+ remote_port, dev->otherend_id);
+ else
+ *port = bind_interdomain.local_port;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
+
+
+/**
+ * Free an existing event channel. Returns 0 on success or -errno on error.
+ */
+int xenbus_free_evtchn(struct xenbus_device *dev, int port)
+{
+ struct evtchn_close close;
+ int err;
+
+ close.port = port;
+
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ if (err)
+ xenbus_dev_error(dev, err, "freeing event channel %d", port);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
+
+
+/**
+ * xenbus_map_ring_valloc
+ * @dev: xenbus device
+ * @gnt_ref: grant reference
+ * @vaddr: pointer to address to be filled out by mapping
+ *
+ * Based on Rusty Russell's skeleton driver's map_page.
+ * Map a page of memory into this domain from another domain's grant table.
+ * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
+ * page to that address, and sets *vaddr to that address.
+ * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
+ * or -ENOMEM on error. If an error is returned, device will switch to
+ * XenbusStateClosing and the error message will be saved in XenStore.
+ */
+int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
+{
+ struct gnttab_map_grant_ref op = {
+ .flags = GNTMAP_host_map,
+ .ref = gnt_ref,
+ .dom = dev->otherend_id,
+ };
+ struct vm_struct *area;
+
+ *vaddr = NULL;
+
+ area = alloc_vm_area(PAGE_SIZE);
+ if (!area)
+ return -ENOMEM;
+
+ op.host_addr = (unsigned long)area->addr;
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+ BUG();
+
+ if (op.status != GNTST_okay) {
+ free_vm_area(area);
+ xenbus_dev_fatal(dev, op.status,
+ "mapping in shared page %d from domain %d",
+ gnt_ref, dev->otherend_id);
+ return op.status;
+ }
+
+ /* Stuff the handle in an unused field */
+ area->phys_addr = (unsigned long)op.handle;
+
+ *vaddr = area->addr;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
+
+
+/**
+ * xenbus_map_ring
+ * @dev: xenbus device
+ * @gnt_ref: grant reference
+ * @handle: pointer to grant handle to be filled
+ * @vaddr: address to be mapped to
+ *
+ * Map a page of memory into this domain from another domain's grant table.
+ * xenbus_map_ring does not allocate the virtual address space (you must do
+ * this yourself!). It only maps in the page to the specified address.
+ * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
+ * or -ENOMEM on error. If an error is returned, device will switch to
+ * XenbusStateClosing and the error message will be saved in XenStore.
+ */
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
+ grant_handle_t *handle, void *vaddr)
+{
+ struct gnttab_map_grant_ref op = {
+ .host_addr = (unsigned long)vaddr,
+ .flags = GNTMAP_host_map,
+ .ref = gnt_ref,
+ .dom = dev->otherend_id,
+ };
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
+ BUG();
+
+ if (op.status != GNTST_okay) {
+ xenbus_dev_fatal(dev, op.status,
+ "mapping in shared page %d from domain %d",
+ gnt_ref, dev->otherend_id);
+ } else
+ *handle = op.handle;
+
+ return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_map_ring);
+
+
+/**
+ * xenbus_unmap_ring_vfree
+ * @dev: xenbus device
+ * @vaddr: addr to unmap
+ *
+ * Based on Rusty Russell's skeleton driver's unmap_page.
+ * Unmap a page of memory in this domain that was imported from another domain.
+ * Use xenbus_unmap_ring_vfree if you mapped in your memory with
+ * xenbus_map_ring_valloc (it will free the virtual address space).
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
+{
+ struct vm_struct *area;
+ struct gnttab_unmap_grant_ref op = {
+ .host_addr = (unsigned long)vaddr,
+ };
+
+ /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
+ * method so that we don't have to muck with vmalloc internals here.
+ * We could force the user to hang on to their struct vm_struct from
+ * xenbus_map_ring_valloc, but these 6 lines considerably simplify
+ * this API.
+ */
+ read_lock(&vmlist_lock);
+ for (area = vmlist; area != NULL; area = area->next) {
+ if (area->addr == vaddr)
+ break;
+ }
+ read_unlock(&vmlist_lock);
+
+ if (!area) {
+ xenbus_dev_error(dev, -ENOENT,
+ "can't find mapped virtual address %p", vaddr);
+ return GNTST_bad_virt_addr;
+ }
+
+ op.handle = (grant_handle_t)area->phys_addr;
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+ BUG();
+
+ if (op.status == GNTST_okay)
+ free_vm_area(area);
+ else
+ xenbus_dev_error(dev, op.status,
+ "unmapping page at handle %d error %d",
+ (int16_t)area->phys_addr, op.status);
+
+ return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
+
+
+/**
+ * xenbus_unmap_ring
+ * @dev: xenbus device
+ * @handle: grant handle
+ * @vaddr: addr to unmap
+ *
+ * Unmap a page of memory in this domain that was imported from another domain.
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+int xenbus_unmap_ring(struct xenbus_device *dev,
+ grant_handle_t handle, void *vaddr)
+{
+ struct gnttab_unmap_grant_ref op = {
+ .host_addr = (unsigned long)vaddr,
+ .handle = handle,
+ };
+
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
+ BUG();
+
+ if (op.status != GNTST_okay)
+ xenbus_dev_error(dev, op.status,
+ "unmapping page at handle %d error %d",
+ handle, op.status);
+
+ return op.status;
+}
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
+
+
+/**
+ * xenbus_read_driver_state
+ * @path: path for driver
+ *
+ * Return the state of the driver rooted at the given store path, or
+ * XenbusStateUnknown if no state can be read.
+ */
+enum xenbus_state xenbus_read_driver_state(const char *path)
+{
+ enum xenbus_state result;
+ int err = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
+ if (err)
+ result = XenbusStateUnknown;
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(xenbus_read_driver_state);
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
new file mode 100644
index 00000000000..6efbe3f29ca
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -0,0 +1,233 @@
+/******************************************************************************
+ * xenbus_comms.c
+ *
+ * Low level code to talks to Xen Store: ringbuffer and event channel.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <xen/xenbus.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/page.h>
+#include "xenbus_comms.h"
+
+static int xenbus_irq;
+
+static DECLARE_WORK(probe_work, xenbus_probe);
+
+static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+
+static irqreturn_t wake_waiting(int irq, void *unused)
+{
+ if (unlikely(xenstored_ready == 0)) {
+ xenstored_ready = 1;
+ schedule_work(&probe_work);
+ }
+
+ wake_up(&xb_waitq);
+ return IRQ_HANDLED;
+}
+
+static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
+{
+ return ((prod - cons) <= XENSTORE_RING_SIZE);
+}
+
+static void *get_output_chunk(XENSTORE_RING_IDX cons,
+ XENSTORE_RING_IDX prod,
+ char *buf, uint32_t *len)
+{
+ *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
+ if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
+ *len = XENSTORE_RING_SIZE - (prod - cons);
+ return buf + MASK_XENSTORE_IDX(prod);
+}
+
+static const void *get_input_chunk(XENSTORE_RING_IDX cons,
+ XENSTORE_RING_IDX prod,
+ const char *buf, uint32_t *len)
+{
+ *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
+ if ((prod - cons) < *len)
+ *len = prod - cons;
+ return buf + MASK_XENSTORE_IDX(cons);
+}
+
+/**
+ * xb_write - low level write
+ * @data: buffer to send
+ * @len: length of buffer
+ *
+ * Returns 0 on success, error otherwise.
+ */
+int xb_write(const void *data, unsigned len)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+ XENSTORE_RING_IDX cons, prod;
+ int rc;
+
+ while (len != 0) {
+ void *dst;
+ unsigned int avail;
+
+ rc = wait_event_interruptible(
+ xb_waitq,
+ (intf->req_prod - intf->req_cons) !=
+ XENSTORE_RING_SIZE);
+ if (rc < 0)
+ return rc;
+
+ /* Read indexes, then verify. */
+ cons = intf->req_cons;
+ prod = intf->req_prod;
+ if (!check_indexes(cons, prod)) {
+ intf->req_cons = intf->req_prod = 0;
+ return -EIO;
+ }
+
+ dst = get_output_chunk(cons, prod, intf->req, &avail);
+ if (avail == 0)
+ continue;
+ if (avail > len)
+ avail = len;
+
+ /* Must write data /after/ reading the consumer index. */
+ mb();
+
+ memcpy(dst, data, avail);
+ data += avail;
+ len -= avail;
+
+ /* Other side must not see new producer until data is there. */
+ wmb();
+ intf->req_prod += avail;
+
+ /* Implies mb(): other side will see the updated producer. */
+ notify_remote_via_evtchn(xen_store_evtchn);
+ }
+
+ return 0;
+}
+
+int xb_data_to_read(void)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+ return (intf->rsp_cons != intf->rsp_prod);
+}
+
+int xb_wait_for_data_to_read(void)
+{
+ return wait_event_interruptible(xb_waitq, xb_data_to_read());
+}
+
+int xb_read(void *data, unsigned len)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+ XENSTORE_RING_IDX cons, prod;
+ int rc;
+
+ while (len != 0) {
+ unsigned int avail;
+ const char *src;
+
+ rc = xb_wait_for_data_to_read();
+ if (rc < 0)
+ return rc;
+
+ /* Read indexes, then verify. */
+ cons = intf->rsp_cons;
+ prod = intf->rsp_prod;
+ if (!check_indexes(cons, prod)) {
+ intf->rsp_cons = intf->rsp_prod = 0;
+ return -EIO;
+ }
+
+ src = get_input_chunk(cons, prod, intf->rsp, &avail);
+ if (avail == 0)
+ continue;
+ if (avail > len)
+ avail = len;
+
+ /* Must read data /after/ reading the producer index. */
+ rmb();
+
+ memcpy(data, src, avail);
+ data += avail;
+ len -= avail;
+
+ /* Other side must not see free space until we've copied out */
+ mb();
+ intf->rsp_cons += avail;
+
+ pr_debug("Finished read of %i bytes (%i to go)\n", avail, len);
+
+ /* Implies mb(): other side will see the updated consumer. */
+ notify_remote_via_evtchn(xen_store_evtchn);
+ }
+
+ return 0;
+}
+
+/**
+ * xb_init_comms - Set up interrupt handler off store event channel.
+ */
+int xb_init_comms(void)
+{
+ struct xenstore_domain_interface *intf = xen_store_interface;
+ int err;
+
+ if (intf->req_prod != intf->req_cons)
+ printk(KERN_ERR "XENBUS request ring is not quiescent "
+ "(%08x:%08x)!\n", intf->req_cons, intf->req_prod);
+
+ if (intf->rsp_prod != intf->rsp_cons) {
+ printk(KERN_WARNING "XENBUS response ring is not quiescent "
+ "(%08x:%08x): fixing up\n",
+ intf->rsp_cons, intf->rsp_prod);
+ intf->rsp_cons = intf->rsp_prod;
+ }
+
+ if (xenbus_irq)
+ unbind_from_irqhandler(xenbus_irq, &xb_waitq);
+
+ err = bind_evtchn_to_irqhandler(
+ xen_store_evtchn, wake_waiting,
+ 0, "xenbus", &xb_waitq);
+ if (err <= 0) {
+ printk(KERN_ERR "XENBUS request irq failed %i\n", err);
+ return err;
+ }
+
+ xenbus_irq = err;
+
+ return 0;
+}
diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h
new file mode 100644
index 00000000000..c21db751373
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_comms.h
@@ -0,0 +1,46 @@
+/*
+ * Private include for xenbus communications.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_COMMS_H
+#define _XENBUS_COMMS_H
+
+int xs_init(void);
+int xb_init_comms(void);
+
+/* Low level routines. */
+int xb_write(const void *data, unsigned len);
+int xb_read(void *data, unsigned len);
+int xb_data_to_read(void);
+int xb_wait_for_data_to_read(void);
+int xs_input_avail(void);
+extern struct xenstore_domain_interface *xen_store_interface;
+extern int xen_store_evtchn;
+
+#endif /* _XENBUS_COMMS_H */
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
new file mode 100644
index 00000000000..0b769f7c4a4
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -0,0 +1,935 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define DPRINTK(fmt, args...) \
+ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \
+ __func__, __LINE__, ##args)
+
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#include <xen/page.h>
+
+#include "xenbus_comms.h"
+#include "xenbus_probe.h"
+
+int xen_store_evtchn;
+struct xenstore_domain_interface *xen_store_interface;
+static unsigned long xen_store_mfn;
+
+static BLOCKING_NOTIFIER_HEAD(xenstore_chain);
+
+static void wait_for_devices(struct xenbus_driver *xendrv);
+
+static int xenbus_probe_frontend(const char *type, const char *name);
+
+static void xenbus_dev_shutdown(struct device *_dev);
+
+/* If something in array of ids matches this device, return it. */
+static const struct xenbus_device_id *
+match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
+{
+ for (; *arr->devicetype != '\0'; arr++) {
+ if (!strcmp(arr->devicetype, dev->devicetype))
+ return arr;
+ }
+ return NULL;
+}
+
+int xenbus_match(struct device *_dev, struct device_driver *_drv)
+{
+ struct xenbus_driver *drv = to_xenbus_driver(_drv);
+
+ if (!drv->ids)
+ return 0;
+
+ return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
+}
+
+/* device/<type>/<id> => <type>-<id> */
+static int frontend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+ nodename = strchr(nodename, '/');
+ if (!nodename || strlen(nodename + 1) >= BUS_ID_SIZE) {
+ printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename);
+ return -EINVAL;
+ }
+
+ strlcpy(bus_id, nodename + 1, BUS_ID_SIZE);
+ if (!strchr(bus_id, '/')) {
+ printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id);
+ return -EINVAL;
+ }
+ *strchr(bus_id, '/') = '-';
+ return 0;
+}
+
+
+static void free_otherend_details(struct xenbus_device *dev)
+{
+ kfree(dev->otherend);
+ dev->otherend = NULL;
+}
+
+
+static void free_otherend_watch(struct xenbus_device *dev)
+{
+ if (dev->otherend_watch.node) {
+ unregister_xenbus_watch(&dev->otherend_watch);
+ kfree(dev->otherend_watch.node);
+ dev->otherend_watch.node = NULL;
+ }
+}
+
+
+int read_otherend_details(struct xenbus_device *xendev,
+ char *id_node, char *path_node)
+{
+ int err = xenbus_gather(XBT_NIL, xendev->nodename,
+ id_node, "%i", &xendev->otherend_id,
+ path_node, NULL, &xendev->otherend,
+ NULL);
+ if (err) {
+ xenbus_dev_fatal(xendev, err,
+ "reading other end details from %s",
+ xendev->nodename);
+ return err;
+ }
+ if (strlen(xendev->otherend) == 0 ||
+ !xenbus_exists(XBT_NIL, xendev->otherend, "")) {
+ xenbus_dev_fatal(xendev, -ENOENT,
+ "unable to read other end from %s. "
+ "missing or inaccessible.",
+ xendev->nodename);
+ free_otherend_details(xendev);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+
+static int read_backend_details(struct xenbus_device *xendev)
+{
+ return read_otherend_details(xendev, "backend-id", "backend");
+}
+
+
+/* Bus type for frontend drivers. */
+static struct xen_bus_type xenbus_frontend = {
+ .root = "device",
+ .levels = 2, /* device/type/<id> */
+ .get_bus_id = frontend_bus_id,
+ .probe = xenbus_probe_frontend,
+ .bus = {
+ .name = "xen",
+ .match = xenbus_match,
+ .probe = xenbus_dev_probe,
+ .remove = xenbus_dev_remove,
+ .shutdown = xenbus_dev_shutdown,
+ },
+};
+
+static void otherend_changed(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ struct xenbus_device *dev =
+ container_of(watch, struct xenbus_device, otherend_watch);
+ struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
+ enum xenbus_state state;
+
+ /* Protect us against watches firing on old details when the otherend
+ details change, say immediately after a resume. */
+ if (!dev->otherend ||
+ strncmp(dev->otherend, vec[XS_WATCH_PATH],
+ strlen(dev->otherend))) {
+ dev_dbg(&dev->dev, "Ignoring watch at %s", vec[XS_WATCH_PATH]);
+ return;
+ }
+
+ state = xenbus_read_driver_state(dev->otherend);
+
+ dev_dbg(&dev->dev, "state is %d, (%s), %s, %s",
+ state, xenbus_strstate(state), dev->otherend_watch.node,
+ vec[XS_WATCH_PATH]);
+
+ /*
+ * Ignore xenbus transitions during shutdown. This prevents us doing
+ * work that can fail e.g., when the rootfs is gone.
+ */
+ if (system_state > SYSTEM_RUNNING) {
+ struct xen_bus_type *bus = bus;
+ bus = container_of(dev->dev.bus, struct xen_bus_type, bus);
+ /* If we're frontend, drive the state machine to Closed. */
+ /* This should cause the backend to release our resources. */
+ if ((bus == &xenbus_frontend) && (state == XenbusStateClosing))
+ xenbus_frontend_closed(dev);
+ return;
+ }
+
+ if (drv->otherend_changed)
+ drv->otherend_changed(dev, state);
+}
+
+
+static int talk_to_otherend(struct xenbus_device *dev)
+{
+ struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver);
+
+ free_otherend_watch(dev);
+ free_otherend_details(dev);
+
+ return drv->read_otherend_details(dev);
+}
+
+
+static int watch_otherend(struct xenbus_device *dev)
+{
+ return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
+ "%s/%s", dev->otherend, "state");
+}
+
+
+int xenbus_dev_probe(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
+ const struct xenbus_device_id *id;
+ int err;
+
+ DPRINTK("%s", dev->nodename);
+
+ if (!drv->probe) {
+ err = -ENODEV;
+ goto fail;
+ }
+
+ id = match_device(drv->ids, dev);
+ if (!id) {
+ err = -ENODEV;
+ goto fail;
+ }
+
+ err = talk_to_otherend(dev);
+ if (err) {
+ dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n",
+ dev->nodename);
+ return err;
+ }
+
+ err = drv->probe(dev, id);
+ if (err)
+ goto fail;
+
+ err = watch_otherend(dev);
+ if (err) {
+ dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
+ dev->nodename);
+ return err;
+ }
+
+ return 0;
+fail:
+ xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
+ xenbus_switch_state(dev, XenbusStateClosed);
+ return -ENODEV;
+}
+
+int xenbus_dev_remove(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ struct xenbus_driver *drv = to_xenbus_driver(_dev->driver);
+
+ DPRINTK("%s", dev->nodename);
+
+ free_otherend_watch(dev);
+ free_otherend_details(dev);
+
+ if (drv->remove)
+ drv->remove(dev);
+
+ xenbus_switch_state(dev, XenbusStateClosed);
+ return 0;
+}
+
+static void xenbus_dev_shutdown(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned long timeout = 5*HZ;
+
+ DPRINTK("%s", dev->nodename);
+
+ get_device(&dev->dev);
+ if (dev->state != XenbusStateConnected) {
+ printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__,
+ dev->nodename, xenbus_strstate(dev->state));
+ goto out;
+ }
+ xenbus_switch_state(dev, XenbusStateClosing);
+ timeout = wait_for_completion_timeout(&dev->down, timeout);
+ if (!timeout)
+ printk(KERN_INFO "%s: %s timeout closing device\n",
+ __func__, dev->nodename);
+ out:
+ put_device(&dev->dev);
+}
+
+int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus,
+ struct module *owner,
+ const char *mod_name)
+{
+ drv->driver.name = drv->name;
+ drv->driver.bus = &bus->bus;
+ drv->driver.owner = owner;
+ drv->driver.mod_name = mod_name;
+
+ return driver_register(&drv->driver);
+}
+
+int __xenbus_register_frontend(struct xenbus_driver *drv,
+ struct module *owner, const char *mod_name)
+{
+ int ret;
+
+ drv->read_otherend_details = read_backend_details;
+
+ ret = xenbus_register_driver_common(drv, &xenbus_frontend,
+ owner, mod_name);
+ if (ret)
+ return ret;
+
+ /* If this driver is loaded as a module wait for devices to attach. */
+ wait_for_devices(drv);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+
+void xenbus_unregister_driver(struct xenbus_driver *drv)
+{
+ driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(xenbus_unregister_driver);
+
+struct xb_find_info
+{
+ struct xenbus_device *dev;
+ const char *nodename;
+};
+
+static int cmp_dev(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct xb_find_info *info = data;
+
+ if (!strcmp(xendev->nodename, info->nodename)) {
+ info->dev = xendev;
+ get_device(dev);
+ return 1;
+ }
+ return 0;
+}
+
+struct xenbus_device *xenbus_device_find(const char *nodename,
+ struct bus_type *bus)
+{
+ struct xb_find_info info = { .dev = NULL, .nodename = nodename };
+
+ bus_for_each_dev(bus, NULL, &info, cmp_dev);
+ return info.dev;
+}
+
+static int cleanup_dev(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct xb_find_info *info = data;
+ int len = strlen(info->nodename);
+
+ DPRINTK("%s", info->nodename);
+
+ /* Match the info->nodename path, or any subdirectory of that path. */
+ if (strncmp(xendev->nodename, info->nodename, len))
+ return 0;
+
+ /* If the node name is longer, ensure it really is a subdirectory. */
+ if ((strlen(xendev->nodename) > len) && (xendev->nodename[len] != '/'))
+ return 0;
+
+ info->dev = xendev;
+ get_device(dev);
+ return 1;
+}
+
+static void xenbus_cleanup_devices(const char *path, struct bus_type *bus)
+{
+ struct xb_find_info info = { .nodename = path };
+
+ do {
+ info.dev = NULL;
+ bus_for_each_dev(bus, NULL, &info, cleanup_dev);
+ if (info.dev) {
+ device_unregister(&info.dev->dev);
+ put_device(&info.dev->dev);
+ }
+ } while (info.dev);
+}
+
+static void xenbus_dev_release(struct device *dev)
+{
+ if (dev)
+ kfree(to_xenbus_device(dev));
+}
+
+static ssize_t xendev_show_nodename(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
+}
+DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
+
+static ssize_t xendev_show_devtype(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
+}
+DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
+
+
+int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename)
+{
+ int err;
+ struct xenbus_device *xendev;
+ size_t stringlen;
+ char *tmpstring;
+
+ enum xenbus_state state = xenbus_read_driver_state(nodename);
+
+ if (state != XenbusStateInitialising) {
+ /* Device is not new, so ignore it. This can happen if a
+ device is going away after switching to Closed. */
+ return 0;
+ }
+
+ stringlen = strlen(nodename) + 1 + strlen(type) + 1;
+ xendev = kzalloc(sizeof(*xendev) + stringlen, GFP_KERNEL);
+ if (!xendev)
+ return -ENOMEM;
+
+ xendev->state = XenbusStateInitialising;
+
+ /* Copy the strings into the extra space. */
+
+ tmpstring = (char *)(xendev + 1);
+ strcpy(tmpstring, nodename);
+ xendev->nodename = tmpstring;
+
+ tmpstring += strlen(tmpstring) + 1;
+ strcpy(tmpstring, type);
+ xendev->devicetype = tmpstring;
+ init_completion(&xendev->down);
+
+ xendev->dev.bus = &bus->bus;
+ xendev->dev.release = xenbus_dev_release;
+
+ err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
+ if (err)
+ goto fail;
+
+ /* Register with generic device framework. */
+ err = device_register(&xendev->dev);
+ if (err)
+ goto fail;
+
+ err = device_create_file(&xendev->dev, &dev_attr_nodename);
+ if (err)
+ goto fail_unregister;
+
+ err = device_create_file(&xendev->dev, &dev_attr_devtype);
+ if (err)
+ goto fail_remove_file;
+
+ return 0;
+fail_remove_file:
+ device_remove_file(&xendev->dev, &dev_attr_nodename);
+fail_unregister:
+ device_unregister(&xendev->dev);
+fail:
+ kfree(xendev);
+ return err;
+}
+
+/* device/<typename>/<name> */
+static int xenbus_probe_frontend(const char *type, const char *name)
+{
+ char *nodename;
+ int err;
+
+ nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
+ xenbus_frontend.root, type, name);
+ if (!nodename)
+ return -ENOMEM;
+
+ DPRINTK("%s", nodename);
+
+ err = xenbus_probe_node(&xenbus_frontend, type, nodename);
+ kfree(nodename);
+ return err;
+}
+
+static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type)
+{
+ int err = 0;
+ char **dir;
+ unsigned int dir_n = 0;
+ int i;
+
+ dir = xenbus_directory(XBT_NIL, bus->root, type, &dir_n);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ for (i = 0; i < dir_n; i++) {
+ err = bus->probe(type, dir[i]);
+ if (err)
+ break;
+ }
+ kfree(dir);
+ return err;
+}
+
+int xenbus_probe_devices(struct xen_bus_type *bus)
+{
+ int err = 0;
+ char **dir;
+ unsigned int i, dir_n;
+
+ dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ for (i = 0; i < dir_n; i++) {
+ err = xenbus_probe_device_type(bus, dir[i]);
+ if (err)
+ break;
+ }
+ kfree(dir);
+ return err;
+}
+
+static unsigned int char_count(const char *str, char c)
+{
+ unsigned int i, ret = 0;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] == c)
+ ret++;
+ return ret;
+}
+
+static int strsep_len(const char *str, char c, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; str[i]; i++)
+ if (str[i] == c) {
+ if (len == 0)
+ return i;
+ len--;
+ }
+ return (len == 0) ? i : -ERANGE;
+}
+
+void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
+{
+ int exists, rootlen;
+ struct xenbus_device *dev;
+ char type[BUS_ID_SIZE];
+ const char *p, *root;
+
+ if (char_count(node, '/') < 2)
+ return;
+
+ exists = xenbus_exists(XBT_NIL, node, "");
+ if (!exists) {
+ xenbus_cleanup_devices(node, &bus->bus);
+ return;
+ }
+
+ /* backend/<type>/... or device/<type>/... */
+ p = strchr(node, '/') + 1;
+ snprintf(type, BUS_ID_SIZE, "%.*s", (int)strcspn(p, "/"), p);
+ type[BUS_ID_SIZE-1] = '\0';
+
+ rootlen = strsep_len(node, '/', bus->levels);
+ if (rootlen < 0)
+ return;
+ root = kasprintf(GFP_KERNEL, "%.*s", rootlen, node);
+ if (!root)
+ return;
+
+ dev = xenbus_device_find(root, &bus->bus);
+ if (!dev)
+ xenbus_probe_node(bus, type, root);
+ else
+ put_device(&dev->dev);
+
+ kfree(root);
+}
+
+static void frontend_changed(struct xenbus_watch *watch,
+ const char **vec, unsigned int len)
+{
+ DPRINTK("");
+
+ xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
+}
+
+/* We watch for devices appearing and vanishing. */
+static struct xenbus_watch fe_watch = {
+ .node = "device",
+ .callback = frontend_changed,
+};
+
+static int suspend_dev(struct device *dev, void *data)
+{
+ int err = 0;
+ struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ DPRINTK("");
+
+ if (dev->driver == NULL)
+ return 0;
+ drv = to_xenbus_driver(dev->driver);
+ xdev = container_of(dev, struct xenbus_device, dev);
+ if (drv->suspend)
+ err = drv->suspend(xdev);
+ if (err)
+ printk(KERN_WARNING
+ "xenbus: suspend %s failed: %i\n", dev->bus_id, err);
+ return 0;
+}
+
+static int suspend_cancel_dev(struct device *dev, void *data)
+{
+ int err = 0;
+ struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ DPRINTK("");
+
+ if (dev->driver == NULL)
+ return 0;
+ drv = to_xenbus_driver(dev->driver);
+ xdev = container_of(dev, struct xenbus_device, dev);
+ if (drv->suspend_cancel)
+ err = drv->suspend_cancel(xdev);
+ if (err)
+ printk(KERN_WARNING
+ "xenbus: suspend_cancel %s failed: %i\n",
+ dev->bus_id, err);
+ return 0;
+}
+
+static int resume_dev(struct device *dev, void *data)
+{
+ int err;
+ struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ DPRINTK("");
+
+ if (dev->driver == NULL)
+ return 0;
+
+ drv = to_xenbus_driver(dev->driver);
+ xdev = container_of(dev, struct xenbus_device, dev);
+
+ err = talk_to_otherend(xdev);
+ if (err) {
+ printk(KERN_WARNING
+ "xenbus: resume (talk_to_otherend) %s failed: %i\n",
+ dev->bus_id, err);
+ return err;
+ }
+
+ xdev->state = XenbusStateInitialising;
+
+ if (drv->resume) {
+ err = drv->resume(xdev);
+ if (err) {
+ printk(KERN_WARNING
+ "xenbus: resume %s failed: %i\n",
+ dev->bus_id, err);
+ return err;
+ }
+ }
+
+ err = watch_otherend(xdev);
+ if (err) {
+ printk(KERN_WARNING
+ "xenbus_probe: resume (watch_otherend) %s failed: "
+ "%d.\n", dev->bus_id, err);
+ return err;
+ }
+
+ return 0;
+}
+
+void xenbus_suspend(void)
+{
+ DPRINTK("");
+
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
+ xenbus_backend_suspend(suspend_dev);
+ xs_suspend();
+}
+EXPORT_SYMBOL_GPL(xenbus_suspend);
+
+void xenbus_resume(void)
+{
+ xb_init_comms();
+ xs_resume();
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
+ xenbus_backend_resume(resume_dev);
+}
+EXPORT_SYMBOL_GPL(xenbus_resume);
+
+void xenbus_suspend_cancel(void)
+{
+ xs_suspend_cancel();
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
+ xenbus_backend_resume(suspend_cancel_dev);
+}
+EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
+
+/* A flag to determine if xenstored is 'ready' (i.e. has started) */
+int xenstored_ready = 0;
+
+
+int register_xenstore_notifier(struct notifier_block *nb)
+{
+ int ret = 0;
+
+ if (xenstored_ready > 0)
+ ret = nb->notifier_call(nb, 0, NULL);
+ else
+ blocking_notifier_chain_register(&xenstore_chain, nb);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(register_xenstore_notifier);
+
+void unregister_xenstore_notifier(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&xenstore_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
+
+void xenbus_probe(struct work_struct *unused)
+{
+ BUG_ON((xenstored_ready <= 0));
+
+ /* Enumerate devices in xenstore and watch for changes. */
+ xenbus_probe_devices(&xenbus_frontend);
+ register_xenbus_watch(&fe_watch);
+ xenbus_backend_probe_and_watch();
+
+ /* Notify others that xenstore is up */
+ blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
+}
+
+static int __init xenbus_probe_init(void)
+{
+ int err = 0;
+
+ DPRINTK("");
+
+ err = -ENODEV;
+ if (!is_running_on_xen())
+ goto out_error;
+
+ /* Register ourselves with the kernel bus subsystem */
+ err = bus_register(&xenbus_frontend.bus);
+ if (err)
+ goto out_error;
+
+ err = xenbus_backend_bus_register();
+ if (err)
+ goto out_unreg_front;
+
+ /*
+ * Domain0 doesn't have a store_evtchn or store_mfn yet.
+ */
+ if (is_initial_xendomain()) {
+ /* dom0 not yet supported */
+ } else {
+ xenstored_ready = 1;
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ }
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
+
+ /* Initialize the interface to xenstore. */
+ err = xs_init();
+ if (err) {
+ printk(KERN_WARNING
+ "XENBUS: Error initializing xenstore comms: %i\n", err);
+ goto out_unreg_back;
+ }
+
+ if (!is_initial_xendomain())
+ xenbus_probe(NULL);
+
+ return 0;
+
+ out_unreg_back:
+ xenbus_backend_bus_unregister();
+
+ out_unreg_front:
+ bus_unregister(&xenbus_frontend.bus);
+
+ out_error:
+ return err;
+}
+
+postcore_initcall(xenbus_probe_init);
+
+MODULE_LICENSE("GPL");
+
+static int is_disconnected_device(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct device_driver *drv = data;
+
+ /*
+ * A device with no driver will never connect. We care only about
+ * devices which should currently be in the process of connecting.
+ */
+ if (!dev->driver)
+ return 0;
+
+ /* Is this search limited to a particular driver? */
+ if (drv && (dev->driver != drv))
+ return 0;
+
+ return (xendev->state != XenbusStateConnected);
+}
+
+static int exists_disconnected_device(struct device_driver *drv)
+{
+ return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+ is_disconnected_device);
+}
+
+static int print_device_status(struct device *dev, void *data)
+{
+ struct xenbus_device *xendev = to_xenbus_device(dev);
+ struct device_driver *drv = data;
+
+ /* Is this operation limited to a particular driver? */
+ if (drv && (dev->driver != drv))
+ return 0;
+
+ if (!dev->driver) {
+ /* Information only: is this too noisy? */
+ printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
+ xendev->nodename);
+ } else if (xendev->state != XenbusStateConnected) {
+ printk(KERN_WARNING "XENBUS: Timeout connecting "
+ "to device: %s (state %d)\n",
+ xendev->nodename, xendev->state);
+ }
+
+ return 0;
+}
+
+/* We only wait for device setup after most initcalls have run. */
+static int ready_to_wait_for_devices;
+
+/*
+ * On a 10 second timeout, wait for all devices currently configured. We need
+ * to do this to guarantee that the filesystems and / or network devices
+ * needed for boot are available, before we can allow the boot to proceed.
+ *
+ * This needs to be on a late_initcall, to happen after the frontend device
+ * drivers have been initialised, but before the root fs is mounted.
+ *
+ * A possible improvement here would be to have the tools add a per-device
+ * flag to the store entry, indicating whether it is needed at boot time.
+ * This would allow people who knew what they were doing to accelerate their
+ * boot slightly, but of course needs tools or manual intervention to set up
+ * those flags correctly.
+ */
+static void wait_for_devices(struct xenbus_driver *xendrv)
+{
+ unsigned long timeout = jiffies + 10*HZ;
+ struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
+
+ if (!ready_to_wait_for_devices || !is_running_on_xen())
+ return;
+
+ while (exists_disconnected_device(drv)) {
+ if (time_after(jiffies, timeout))
+ break;
+ schedule_timeout_interruptible(HZ/10);
+ }
+
+ bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
+ print_device_status);
+}
+
+#ifndef MODULE
+static int __init boot_wait_for_devices(void)
+{
+ ready_to_wait_for_devices = 1;
+ wait_for_devices(NULL);
+ return 0;
+}
+
+late_initcall(boot_wait_for_devices);
+#endif
diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h
new file mode 100644
index 00000000000..e09b19415a4
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_probe.h
@@ -0,0 +1,74 @@
+/******************************************************************************
+ * xenbus_probe.h
+ *
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XENBUS_PROBE_H
+#define _XENBUS_PROBE_H
+
+#ifdef CONFIG_XEN_BACKEND
+extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
+extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
+extern void xenbus_backend_probe_and_watch(void);
+extern int xenbus_backend_bus_register(void);
+extern void xenbus_backend_bus_unregister(void);
+#else
+static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
+static inline void xenbus_backend_probe_and_watch(void) {}
+static inline int xenbus_backend_bus_register(void) { return 0; }
+static inline void xenbus_backend_bus_unregister(void) {}
+#endif
+
+struct xen_bus_type
+{
+ char *root;
+ unsigned int levels;
+ int (*get_bus_id)(char bus_id[BUS_ID_SIZE], const char *nodename);
+ int (*probe)(const char *type, const char *dir);
+ struct bus_type bus;
+};
+
+extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
+extern int xenbus_dev_probe(struct device *_dev);
+extern int xenbus_dev_remove(struct device *_dev);
+extern int xenbus_register_driver_common(struct xenbus_driver *drv,
+ struct xen_bus_type *bus,
+ struct module *owner,
+ const char *mod_name);
+extern int xenbus_probe_node(struct xen_bus_type *bus,
+ const char *type,
+ const char *nodename);
+extern int xenbus_probe_devices(struct xen_bus_type *bus);
+
+extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
+
+#endif
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
new file mode 100644
index 00000000000..9e943fbce81
--- /dev/null
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -0,0 +1,861 @@
+/******************************************************************************
+ * xenbus_xs.c
+ *
+ * This is the kernel equivalent of the "xs" library. We don't need everything
+ * and we use xenbus_comms for communication.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/unistd.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/fcntl.h>
+#include <linux/kthread.h>
+#include <linux/rwsem.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <xen/xenbus.h>
+#include "xenbus_comms.h"
+
+struct xs_stored_msg {
+ struct list_head list;
+
+ struct xsd_sockmsg hdr;
+
+ union {
+ /* Queued replies. */
+ struct {
+ char *body;
+ } reply;
+
+ /* Queued watch events. */
+ struct {
+ struct xenbus_watch *handle;
+ char **vec;
+ unsigned int vec_size;
+ } watch;
+ } u;
+};
+
+struct xs_handle {
+ /* A list of replies. Currently only one will ever be outstanding. */
+ struct list_head reply_list;
+ spinlock_t reply_lock;
+ wait_queue_head_t reply_waitq;
+
+ /*
+ * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
+ * response_mutex is never taken simultaneously with the other three.
+ */
+
+ /* One request at a time. */
+ struct mutex request_mutex;
+
+ /* Protect xenbus reader thread against save/restore. */
+ struct mutex response_mutex;
+
+ /* Protect transactions against save/restore. */
+ struct rw_semaphore transaction_mutex;
+
+ /* Protect watch (de)register against save/restore. */
+ struct rw_semaphore watch_mutex;
+};
+
+static struct xs_handle xs_state;
+
+/* List of registered watches, and a lock to protect it. */
+static LIST_HEAD(watches);
+static DEFINE_SPINLOCK(watches_lock);
+
+/* List of pending watch callback events, and a lock to protect it. */
+static LIST_HEAD(watch_events);
+static DEFINE_SPINLOCK(watch_events_lock);
+
+/*
+ * Details of the xenwatch callback kernel thread. The thread waits on the
+ * watch_events_waitq for work to do (queued on watch_events list). When it
+ * wakes up it acquires the xenwatch_mutex before reading the list and
+ * carrying out work.
+ */
+static pid_t xenwatch_pid;
+static DEFINE_MUTEX(xenwatch_mutex);
+static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);
+
+static int get_error(const char *errorstring)
+{
+ unsigned int i;
+
+ for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) {
+ if (i == ARRAY_SIZE(xsd_errors) - 1) {
+ printk(KERN_WARNING
+ "XENBUS xen store gave: unknown error %s",
+ errorstring);
+ return EINVAL;
+ }
+ }
+ return xsd_errors[i].errnum;
+}
+
+static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
+{
+ struct xs_stored_msg *msg;
+ char *body;
+
+ spin_lock(&xs_state.reply_lock);
+
+ while (list_empty(&xs_state.reply_list)) {
+ spin_unlock(&xs_state.reply_lock);
+ /* XXX FIXME: Avoid synchronous wait for response here. */
+ wait_event(xs_state.reply_waitq,
+ !list_empty(&xs_state.reply_list));
+ spin_lock(&xs_state.reply_lock);
+ }
+
+ msg = list_entry(xs_state.reply_list.next,
+ struct xs_stored_msg, list);
+ list_del(&msg->list);
+
+ spin_unlock(&xs_state.reply_lock);
+
+ *type = msg->hdr.type;
+ if (len)
+ *len = msg->hdr.len;
+ body = msg->u.reply.body;
+
+ kfree(msg);
+
+ return body;
+}
+
+void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
+{
+ void *ret;
+ struct xsd_sockmsg req_msg = *msg;
+ int err;
+
+ if (req_msg.type == XS_TRANSACTION_START)
+ down_read(&xs_state.transaction_mutex);
+
+ mutex_lock(&xs_state.request_mutex);
+
+ err = xb_write(msg, sizeof(*msg) + msg->len);
+ if (err) {
+ msg->type = XS_ERROR;
+ ret = ERR_PTR(err);
+ } else
+ ret = read_reply(&msg->type, &msg->len);
+
+ mutex_unlock(&xs_state.request_mutex);
+
+ if ((msg->type == XS_TRANSACTION_END) ||
+ ((req_msg.type == XS_TRANSACTION_START) &&
+ (msg->type == XS_ERROR)))
+ up_read(&xs_state.transaction_mutex);
+
+ return ret;
+}
+
+/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
+static void *xs_talkv(struct xenbus_transaction t,
+ enum xsd_sockmsg_type type,
+ const struct kvec *iovec,
+ unsigned int num_vecs,
+ unsigned int *len)
+{
+ struct xsd_sockmsg msg;
+ void *ret = NULL;
+ unsigned int i;
+ int err;
+
+ msg.tx_id = t.id;
+ msg.req_id = 0;
+ msg.type = type;
+ msg.len = 0;
+ for (i = 0; i < num_vecs; i++)
+ msg.len += iovec[i].iov_len;
+
+ mutex_lock(&xs_state.request_mutex);
+
+ err = xb_write(&msg, sizeof(msg));
+ if (err) {
+ mutex_unlock(&xs_state.request_mutex);
+ return ERR_PTR(err);
+ }
+
+ for (i = 0; i < num_vecs; i++) {
+ err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
+ if (err) {
+ mutex_unlock(&xs_state.request_mutex);
+ return ERR_PTR(err);
+ }
+ }
+
+ ret = read_reply(&msg.type, len);
+
+ mutex_unlock(&xs_state.request_mutex);
+
+ if (IS_ERR(ret))
+ return ret;
+
+ if (msg.type == XS_ERROR) {
+ err = get_error(ret);
+ kfree(ret);
+ return ERR_PTR(-err);
+ }
+
+ if (msg.type != type) {
+ if (printk_ratelimit())
+ printk(KERN_WARNING
+ "XENBUS unexpected type [%d], expected [%d]\n",
+ msg.type, type);
+ kfree(ret);
+ return ERR_PTR(-EINVAL);
+ }
+ return ret;
+}
+
+/* Simplified version of xs_talkv: single message. */
+static void *xs_single(struct xenbus_transaction t,
+ enum xsd_sockmsg_type type,
+ const char *string,
+ unsigned int *len)
+{
+ struct kvec iovec;
+
+ iovec.iov_base = (void *)string;
+ iovec.iov_len = strlen(string) + 1;
+ return xs_talkv(t, type, &iovec, 1, len);
+}
+
+/* Many commands only need an ack, don't care what it says. */
+static int xs_error(char *reply)
+{
+ if (IS_ERR(reply))
+ return PTR_ERR(reply);
+ kfree(reply);
+ return 0;
+}
+
+static unsigned int count_strings(const char *strings, unsigned int len)
+{
+ unsigned int num;
+ const char *p;
+
+ for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
+ num++;
+
+ return num;
+}
+
+/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */
+static char *join(const char *dir, const char *name)
+{
+ char *buffer;
+
+ if (strlen(name) == 0)
+ buffer = kasprintf(GFP_KERNEL, "%s", dir);
+ else
+ buffer = kasprintf(GFP_KERNEL, "%s/%s", dir, name);
+ return (!buffer) ? ERR_PTR(-ENOMEM) : buffer;
+}
+
+static char **split(char *strings, unsigned int len, unsigned int *num)
+{
+ char *p, **ret;
+
+ /* Count the strings. */
+ *num = count_strings(strings, len);
+
+ /* Transfer to one big alloc for easy freeing. */
+ ret = kmalloc(*num * sizeof(char *) + len, GFP_KERNEL);
+ if (!ret) {
+ kfree(strings);
+ return ERR_PTR(-ENOMEM);
+ }
+ memcpy(&ret[*num], strings, len);
+ kfree(strings);
+
+ strings = (char *)&ret[*num];
+ for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
+ ret[(*num)++] = p;
+
+ return ret;
+}
+
+char **xenbus_directory(struct xenbus_transaction t,
+ const char *dir, const char *node, unsigned int *num)
+{
+ char *strings, *path;
+ unsigned int len;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return (char **)path;
+
+ strings = xs_single(t, XS_DIRECTORY, path, &len);
+ kfree(path);
+ if (IS_ERR(strings))
+ return (char **)strings;
+
+ return split(strings, len, num);
+}
+EXPORT_SYMBOL_GPL(xenbus_directory);
+
+/* Check if a path exists. Return 1 if it does. */
+int xenbus_exists(struct xenbus_transaction t,
+ const char *dir, const char *node)
+{
+ char **d;
+ int dir_n;
+
+ d = xenbus_directory(t, dir, node, &dir_n);
+ if (IS_ERR(d))
+ return 0;
+ kfree(d);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(xenbus_exists);
+
+/* Get the value of a single file.
+ * Returns a kmalloced value: call free() on it after use.
+ * len indicates length in bytes.
+ */
+void *xenbus_read(struct xenbus_transaction t,
+ const char *dir, const char *node, unsigned int *len)
+{
+ char *path;
+ void *ret;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return (void *)path;
+
+ ret = xs_single(t, XS_READ, path, len);
+ kfree(path);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_read);
+
+/* Write the value of a single file.
+ * Returns -err on failure.
+ */
+int xenbus_write(struct xenbus_transaction t,
+ const char *dir, const char *node, const char *string)
+{
+ const char *path;
+ struct kvec iovec[2];
+ int ret;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ iovec[0].iov_base = (void *)path;
+ iovec[0].iov_len = strlen(path) + 1;
+ iovec[1].iov_base = (void *)string;
+ iovec[1].iov_len = strlen(string);
+
+ ret = xs_error(xs_talkv(t, XS_WRITE, iovec, ARRAY_SIZE(iovec), NULL));
+ kfree(path);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_write);
+
+/* Create a new directory. */
+int xenbus_mkdir(struct xenbus_transaction t,
+ const char *dir, const char *node)
+{
+ char *path;
+ int ret;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ret = xs_error(xs_single(t, XS_MKDIR, path, NULL));
+ kfree(path);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_mkdir);
+
+/* Destroy a file or directory (directories must be empty). */
+int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
+{
+ char *path;
+ int ret;
+
+ path = join(dir, node);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ ret = xs_error(xs_single(t, XS_RM, path, NULL));
+ kfree(path);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_rm);
+
+/* Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ */
+int xenbus_transaction_start(struct xenbus_transaction *t)
+{
+ char *id_str;
+
+ down_read(&xs_state.transaction_mutex);
+
+ id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
+ if (IS_ERR(id_str)) {
+ up_read(&xs_state.transaction_mutex);
+ return PTR_ERR(id_str);
+ }
+
+ t->id = simple_strtoul(id_str, NULL, 0);
+ kfree(id_str);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xenbus_transaction_start);
+
+/* End a transaction.
+ * If abandon is true, transaction is discarded instead of committed.
+ */
+int xenbus_transaction_end(struct xenbus_transaction t, int abort)
+{
+ char abortstr[2];
+ int err;
+
+ if (abort)
+ strcpy(abortstr, "F");
+ else
+ strcpy(abortstr, "T");
+
+ err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));
+
+ up_read(&xs_state.transaction_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(xenbus_transaction_end);
+
+/* Single read and scanf: returns -errno or num scanned. */
+int xenbus_scanf(struct xenbus_transaction t,
+ const char *dir, const char *node, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+ char *val;
+
+ val = xenbus_read(t, dir, node, NULL);
+ if (IS_ERR(val))
+ return PTR_ERR(val);
+
+ va_start(ap, fmt);
+ ret = vsscanf(val, fmt, ap);
+ va_end(ap);
+ kfree(val);
+ /* Distinctive errno. */
+ if (ret == 0)
+ return -ERANGE;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_scanf);
+
+/* Single printf and write: returns -errno or 0. */
+int xenbus_printf(struct xenbus_transaction t,
+ const char *dir, const char *node, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+#define PRINTF_BUFFER_SIZE 4096
+ char *printf_buffer;
+
+ printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
+ if (printf_buffer == NULL)
+ return -ENOMEM;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
+ va_end(ap);
+
+ BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
+ ret = xenbus_write(t, dir, node, printf_buffer);
+
+ kfree(printf_buffer);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_printf);
+
+/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
+int xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
+{
+ va_list ap;
+ const char *name;
+ int ret = 0;
+
+ va_start(ap, dir);
+ while (ret == 0 && (name = va_arg(ap, char *)) != NULL) {
+ const char *fmt = va_arg(ap, char *);
+ void *result = va_arg(ap, void *);
+ char *p;
+
+ p = xenbus_read(t, dir, name, NULL);
+ if (IS_ERR(p)) {
+ ret = PTR_ERR(p);
+ break;
+ }
+ if (fmt) {
+ if (sscanf(p, fmt, result) == 0)
+ ret = -EINVAL;
+ kfree(p);
+ } else
+ *(char **)result = p;
+ }
+ va_end(ap);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xenbus_gather);
+
+static int xs_watch(const char *path, const char *token)
+{
+ struct kvec iov[2];
+
+ iov[0].iov_base = (void *)path;
+ iov[0].iov_len = strlen(path) + 1;
+ iov[1].iov_base = (void *)token;
+ iov[1].iov_len = strlen(token) + 1;
+
+ return xs_error(xs_talkv(XBT_NIL, XS_WATCH, iov,
+ ARRAY_SIZE(iov), NULL));
+}
+
+static int xs_unwatch(const char *path, const char *token)
+{
+ struct kvec iov[2];
+
+ iov[0].iov_base = (char *)path;
+ iov[0].iov_len = strlen(path) + 1;
+ iov[1].iov_base = (char *)token;
+ iov[1].iov_len = strlen(token) + 1;
+
+ return xs_error(xs_talkv(XBT_NIL, XS_UNWATCH, iov,
+ ARRAY_SIZE(iov), NULL));
+}
+
+static struct xenbus_watch *find_watch(const char *token)
+{
+ struct xenbus_watch *i, *cmp;
+
+ cmp = (void *)simple_strtoul(token, NULL, 16);
+
+ list_for_each_entry(i, &watches, list)
+ if (i == cmp)
+ return i;
+
+ return NULL;
+}
+
+/* Register callback to watch this node. */
+int register_xenbus_watch(struct xenbus_watch *watch)
+{
+ /* Pointer in ascii is the token. */
+ char token[sizeof(watch) * 2 + 1];
+ int err;
+
+ sprintf(token, "%lX", (long)watch);
+
+ down_read(&xs_state.watch_mutex);
+
+ spin_lock(&watches_lock);
+ BUG_ON(find_watch(token));
+ list_add(&watch->list, &watches);
+ spin_unlock(&watches_lock);
+
+ err = xs_watch(watch->node, token);
+
+ /* Ignore errors due to multiple registration. */
+ if ((err != 0) && (err != -EEXIST)) {
+ spin_lock(&watches_lock);
+ list_del(&watch->list);
+ spin_unlock(&watches_lock);
+ }
+
+ up_read(&xs_state.watch_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(register_xenbus_watch);
+
+void unregister_xenbus_watch(struct xenbus_watch *watch)
+{
+ struct xs_stored_msg *msg, *tmp;
+ char token[sizeof(watch) * 2 + 1];
+ int err;
+
+ sprintf(token, "%lX", (long)watch);
+
+ down_read(&xs_state.watch_mutex);
+
+ spin_lock(&watches_lock);
+ BUG_ON(!find_watch(token));
+ list_del(&watch->list);
+ spin_unlock(&watches_lock);
+
+ err = xs_unwatch(watch->node, token);
+ if (err)
+ printk(KERN_WARNING
+ "XENBUS Failed to release watch %s: %i\n",
+ watch->node, err);
+
+ up_read(&xs_state.watch_mutex);
+
+ /* Make sure there are no callbacks running currently (unless
+ its us) */
+ if (current->pid != xenwatch_pid)
+ mutex_lock(&xenwatch_mutex);
+
+ /* Cancel pending watch events. */
+ spin_lock(&watch_events_lock);
+ list_for_each_entry_safe(msg, tmp, &watch_events, list) {
+ if (msg->u.watch.handle != watch)
+ continue;
+ list_del(&msg->list);
+ kfree(msg->u.watch.vec);
+ kfree(msg);
+ }
+ spin_unlock(&watch_events_lock);
+
+ if (current->pid != xenwatch_pid)
+ mutex_unlock(&xenwatch_mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_xenbus_watch);
+
+void xs_suspend(void)
+{
+ down_write(&xs_state.transaction_mutex);
+ down_write(&xs_state.watch_mutex);
+ mutex_lock(&xs_state.request_mutex);
+ mutex_lock(&xs_state.response_mutex);
+}
+
+void xs_resume(void)
+{
+ struct xenbus_watch *watch;
+ char token[sizeof(watch) * 2 + 1];
+
+ mutex_unlock(&xs_state.response_mutex);
+ mutex_unlock(&xs_state.request_mutex);
+ up_write(&xs_state.transaction_mutex);
+
+ /* No need for watches_lock: the watch_mutex is sufficient. */
+ list_for_each_entry(watch, &watches, list) {
+ sprintf(token, "%lX", (long)watch);
+ xs_watch(watch->node, token);
+ }
+
+ up_write(&xs_state.watch_mutex);
+}
+
+void xs_suspend_cancel(void)
+{
+ mutex_unlock(&xs_state.response_mutex);
+ mutex_unlock(&xs_state.request_mutex);
+ up_write(&xs_state.watch_mutex);
+ up_write(&xs_state.transaction_mutex);
+}
+
+static int xenwatch_thread(void *unused)
+{
+ struct list_head *ent;
+ struct xs_stored_msg *msg;
+
+ for (;;) {
+ wait_event_interruptible(watch_events_waitq,
+ !list_empty(&watch_events));
+
+ if (kthread_should_stop())
+ break;
+
+ mutex_lock(&xenwatch_mutex);
+
+ spin_lock(&watch_events_lock);
+ ent = watch_events.next;
+ if (ent != &watch_events)
+ list_del(ent);
+ spin_unlock(&watch_events_lock);
+
+ if (ent != &watch_events) {
+ msg = list_entry(ent, struct xs_stored_msg, list);
+ msg->u.watch.handle->callback(
+ msg->u.watch.handle,
+ (const char **)msg->u.watch.vec,
+ msg->u.watch.vec_size);
+ kfree(msg->u.watch.vec);
+ kfree(msg);
+ }
+
+ mutex_unlock(&xenwatch_mutex);
+ }
+
+ return 0;
+}
+
+static int process_msg(void)
+{
+ struct xs_stored_msg *msg;
+ char *body;
+ int err;
+
+ /*
+ * We must disallow save/restore while reading a xenstore message.
+ * A partial read across s/r leaves us out of sync with xenstored.
+ */
+ for (;;) {
+ err = xb_wait_for_data_to_read();
+ if (err)
+ return err;
+ mutex_lock(&xs_state.response_mutex);
+ if (xb_data_to_read())
+ break;
+ /* We raced with save/restore: pending data 'disappeared'. */
+ mutex_unlock(&xs_state.response_mutex);
+ }
+
+
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (msg == NULL) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = xb_read(&msg->hdr, sizeof(msg->hdr));
+ if (err) {
+ kfree(msg);
+ goto out;
+ }
+
+ body = kmalloc(msg->hdr.len + 1, GFP_KERNEL);
+ if (body == NULL) {
+ kfree(msg);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = xb_read(body, msg->hdr.len);
+ if (err) {
+ kfree(body);
+ kfree(msg);
+ goto out;
+ }
+ body[msg->hdr.len] = '\0';
+
+ if (msg->hdr.type == XS_WATCH_EVENT) {
+ msg->u.watch.vec = split(body, msg->hdr.len,
+ &msg->u.watch.vec_size);
+ if (IS_ERR(msg->u.watch.vec)) {
+ kfree(msg);
+ err = PTR_ERR(msg->u.watch.vec);
+ goto out;
+ }
+
+ spin_lock(&watches_lock);
+ msg->u.watch.handle = find_watch(
+ msg->u.watch.vec[XS_WATCH_TOKEN]);
+ if (msg->u.watch.handle != NULL) {
+ spin_lock(&watch_events_lock);
+ list_add_tail(&msg->list, &watch_events);
+ wake_up(&watch_events_waitq);
+ spin_unlock(&watch_events_lock);
+ } else {
+ kfree(msg->u.watch.vec);
+ kfree(msg);
+ }
+ spin_unlock(&watches_lock);
+ } else {
+ msg->u.reply.body = body;
+ spin_lock(&xs_state.reply_lock);
+ list_add_tail(&msg->list, &xs_state.reply_list);
+ spin_unlock(&xs_state.reply_lock);
+ wake_up(&xs_state.reply_waitq);
+ }
+
+ out:
+ mutex_unlock(&xs_state.response_mutex);
+ return err;
+}
+
+static int xenbus_thread(void *unused)
+{
+ int err;
+
+ for (;;) {
+ err = process_msg();
+ if (err)
+ printk(KERN_WARNING "XENBUS error %d while reading "
+ "message\n", err);
+ if (kthread_should_stop())
+ break;
+ }
+
+ return 0;
+}
+
+int xs_init(void)
+{
+ int err;
+ struct task_struct *task;
+
+ INIT_LIST_HEAD(&xs_state.reply_list);
+ spin_lock_init(&xs_state.reply_lock);
+ init_waitqueue_head(&xs_state.reply_waitq);
+
+ mutex_init(&xs_state.request_mutex);
+ mutex_init(&xs_state.response_mutex);
+ init_rwsem(&xs_state.transaction_mutex);
+ init_rwsem(&xs_state.watch_mutex);
+
+ /* Initialize the shared memory rings to talk to xenstored */
+ err = xb_init_comms();
+ if (err)
+ return err;
+
+ task = kthread_run(xenwatch_thread, NULL, "xenwatch");
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ xenwatch_pid = task->pid;
+
+ task = kthread_run(xenbus_thread, NULL, "xenbus");
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ return 0;
+}